1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7 * Copyright (c) 2016 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief Vulkan ShaderExecutor
24 *//*--------------------------------------------------------------------*/
25
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkMemUtil.hpp"
29 #include "vkRef.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 #include "vkBarrierUtil.hpp"
38
39 #include "gluShaderUtil.hpp"
40
41 #include "tcuVector.hpp"
42 #include "tcuTestLog.hpp"
43 #include "tcuTextureUtil.hpp"
44
45 #include "deUniquePtr.hpp"
46 #include "deStringUtil.hpp"
47 #include "deSharedPtr.hpp"
48 #include "deFloat16.h"
49
50 #include <map>
51 #include <sstream>
52 #include <iostream>
53
54 using std::vector;
55 using namespace vk;
56
57 namespace vkt
58 {
59 namespace shaderexecutor
60 {
61 namespace
62 {
63
64 enum
65 {
66 DEFAULT_RENDER_WIDTH = 100,
67 DEFAULT_RENDER_HEIGHT = 100,
68 };
69
70 // Common typedefs
71
72 typedef de::SharedPtr<Unique<VkImage>> VkImageSp;
73 typedef de::SharedPtr<Unique<VkImageView>> VkImageViewSp;
74 typedef de::SharedPtr<Unique<VkBuffer>> VkBufferSp;
75 typedef de::SharedPtr<Allocation> AllocationSp;
76
77 static VkFormat getAttributeFormat(const glu::DataType dataType);
78
79 // Shader utilities
80
getDefaultClearColor(void)81 static VkClearValue getDefaultClearColor(void)
82 {
83 return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
84 }
85
generateEmptyFragmentSource(void)86 static std::string generateEmptyFragmentSource(void)
87 {
88 std::ostringstream src;
89
90 src << "#version 450\n"
91 "layout(location=0) out highp vec4 o_color;\n";
92
93 src << "void main (void)\n{\n";
94 src << " o_color = vec4(0.0);\n";
95 src << "}\n";
96
97 return src.str();
98 }
99
packFloat16Bit(std::ostream & src,const std::vector<Symbol> & outputs)100 void packFloat16Bit(std::ostream &src, const std::vector<Symbol> &outputs)
101 {
102 for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
103 {
104 if (glu::isDataTypeFloatType(symIter->varType.getBasicType()))
105 {
106 if (glu::isDataTypeVector(symIter->varType.getBasicType()))
107 {
108 for (int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
109 {
110 src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2("
111 << symIter->name << "[" << i << "], -1.0)));\n";
112 }
113 }
114 else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
115 {
116 int maxRow = 0;
117 int maxCol = 0;
118 switch (symIter->varType.getBasicType())
119 {
120 case glu::TYPE_FLOAT_MAT2:
121 maxRow = maxCol = 2;
122 break;
123 case glu::TYPE_FLOAT_MAT2X3:
124 maxRow = 2;
125 maxCol = 3;
126 break;
127 case glu::TYPE_FLOAT_MAT2X4:
128 maxRow = 2;
129 maxCol = 4;
130 break;
131 case glu::TYPE_FLOAT_MAT3X2:
132 maxRow = 3;
133 maxCol = 2;
134 break;
135 case glu::TYPE_FLOAT_MAT3:
136 maxRow = maxCol = 3;
137 break;
138 case glu::TYPE_FLOAT_MAT3X4:
139 maxRow = 3;
140 maxCol = 4;
141 break;
142 case glu::TYPE_FLOAT_MAT4X2:
143 maxRow = 4;
144 maxCol = 2;
145 break;
146 case glu::TYPE_FLOAT_MAT4X3:
147 maxRow = 4;
148 maxCol = 3;
149 break;
150 case glu::TYPE_FLOAT_MAT4:
151 maxRow = maxCol = 4;
152 break;
153 default:
154 DE_ASSERT(false);
155 break;
156 }
157
158 for (int i = 0; i < maxRow; i++)
159 for (int j = 0; j < maxCol; j++)
160 {
161 src << "\tpacked_" << symIter->name << "[" << i << "][" << j
162 << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j
163 << "], -1.0)));\n";
164 }
165 }
166 else
167 {
168 src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name
169 << ", -1.0)));\n";
170 }
171 }
172 }
173 }
174
generatePassthroughVertexShader(const ShaderSpec & shaderSpec,const char * inputPrefix,const char * outputPrefix)175 static std::string generatePassthroughVertexShader(const ShaderSpec &shaderSpec, const char *inputPrefix,
176 const char *outputPrefix)
177 {
178 std::ostringstream src;
179 int location = 0;
180
181 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
182
183 if (!shaderSpec.globalDeclarations.empty())
184 src << shaderSpec.globalDeclarations << "\n";
185
186 src << "layout(location = " << location << ") in highp vec4 a_position;\n";
187
188 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
189 {
190 location++;
191 src << "layout(location = " << location << ") in " << glu::declare(input->varType, inputPrefix + input->name)
192 << ";\n"
193 << "layout(location = " << location - 1 << ") flat out "
194 << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
195 }
196
197 src << "\nvoid main (void)\n{\n"
198 << " gl_Position = a_position;\n"
199 << " gl_PointSize = 1.0;\n";
200
201 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
202 src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
203
204 src << "}\n";
205
206 return src.str();
207 }
208
generateVertexShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix)209 static std::string generateVertexShader(const ShaderSpec &shaderSpec, const std::string &inputPrefix,
210 const std::string &outputPrefix)
211 {
212 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
213
214 std::ostringstream src;
215
216 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
217
218 if (!shaderSpec.globalDeclarations.empty())
219 src << shaderSpec.globalDeclarations << "\n";
220
221 src << "layout(location = 0) in highp vec4 a_position;\n";
222
223 int locationNumber = 1;
224 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end();
225 ++input, ++locationNumber)
226 {
227 src << "layout(location = " << locationNumber << ") in "
228 << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
229 }
230
231 locationNumber = 0;
232 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
233 ++output, ++locationNumber)
234 {
235 DE_ASSERT(output->varType.isBasicType());
236
237 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
238 {
239 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
240 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
241 const glu::VarType intType(intBaseType, glu::PRECISION_HIGHP);
242
243 src << "layout(location = " << locationNumber << ") flat out "
244 << glu::declare(intType, outputPrefix + output->name) << ";\n";
245 }
246 else
247 src << "layout(location = " << locationNumber << ") flat out "
248 << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
249 }
250
251 src << "\n"
252 << "void main (void)\n"
253 << "{\n"
254 << " gl_Position = a_position;\n"
255 << " gl_PointSize = 1.0;\n";
256
257 // Declare & fetch local input variables
258 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
259 {
260 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
261 {
262 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
263 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
264 }
265 else
266 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
267 }
268
269 // Declare local output variables
270 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
271 ++output)
272 {
273 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
274 {
275 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
276 src << "\t" << tname << " " << output->name << ";\n";
277 const char *tname2 = glu::getDataTypeName(output->varType.getBasicType());
278 src << "\t" << tname2 << " "
279 << "packed_" << output->name << ";\n";
280 }
281 else
282 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
283 }
284
285 // Operation - indented to correct level.
286 {
287 std::istringstream opSrc(shaderSpec.source);
288 std::string line;
289
290 while (std::getline(opSrc, line))
291 src << "\t" << line << "\n";
292 }
293
294 if (shaderSpec.packFloat16Bit)
295 packFloat16Bit(src, shaderSpec.outputs);
296
297 // Assignments to outputs.
298 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
299 ++output)
300 {
301 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
302 {
303 src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
304 }
305 else
306 {
307 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
308 {
309 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
310 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
311
312 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "("
313 << output->name << ");\n";
314 }
315 else
316 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
317 }
318 }
319
320 src << "}\n";
321
322 return src.str();
323 }
324
325 struct FragmentOutputLayout
326 {
327 std::vector<const Symbol *> locationSymbols; //! Symbols by location
328 std::map<std::string, int> locationMap; //! Map from symbol name to start location
329 };
330
generateFragShaderOutputDecl(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & outputPrefix)331 static void generateFragShaderOutputDecl(std::ostream &src, const ShaderSpec &shaderSpec, bool useIntOutputs,
332 const std::map<std::string, int> &outLocationMap,
333 const std::string &outputPrefix)
334 {
335 for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
336 {
337 const Symbol &output = shaderSpec.outputs[outNdx];
338 const int location = de::lookup(outLocationMap, output.name);
339 const std::string outVarName = outputPrefix + output.name;
340 glu::VariableDeclaration decl(output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST,
341 glu::Layout(location));
342
343 TCU_CHECK_INTERNAL(output.varType.isBasicType());
344
345 if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
346 {
347 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
348 const glu::DataType uintBasicType = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
349 const glu::VarType uintType(uintBasicType, glu::PRECISION_HIGHP);
350
351 decl.varType = uintType;
352 src << decl << ";\n";
353 }
354 else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
355 {
356 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
357 const glu::DataType intBasicType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
358 const glu::VarType intType(intBasicType, glu::PRECISION_HIGHP);
359
360 decl.varType = intType;
361 src << decl << ";\n";
362 }
363 else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
364 {
365 const int vecSize = glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
366 const int numVecs = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
367 const glu::DataType uintBasicType = glu::getDataTypeUintVec(vecSize);
368 const glu::VarType uintType(uintBasicType, glu::PRECISION_HIGHP);
369
370 decl.varType = uintType;
371 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
372 {
373 decl.name = outVarName + "_" + de::toString(vecNdx);
374 decl.layout.location = location + vecNdx;
375 src << decl << ";\n";
376 }
377 }
378 else
379 src << decl << ";\n";
380 }
381 }
382
generateFragShaderOutAssign(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::string & valuePrefix,const std::string & outputPrefix,const bool isInput16Bit=false)383 static void generateFragShaderOutAssign(std::ostream &src, const ShaderSpec &shaderSpec, bool useIntOutputs,
384 const std::string &valuePrefix, const std::string &outputPrefix,
385 const bool isInput16Bit = false)
386 {
387 if (isInput16Bit)
388 packFloat16Bit(src, shaderSpec.outputs);
389
390 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
391 ++output)
392 {
393 const std::string packPrefix =
394 (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
395
396 if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
397 src << " o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
398 else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
399 {
400 const int numVecs = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
401
402 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
403 if (useIntOutputs)
404 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix
405 << output->name << "[" << vecNdx << "]);\n";
406 else
407 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix
408 << output->name << "[" << vecNdx << "];\n";
409 }
410 else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
411 {
412 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
413 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
414
415 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "("
416 << valuePrefix << output->name << ");\n";
417 }
418 else
419 src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
420 }
421 }
422
generatePassthroughFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)423 static std::string generatePassthroughFragmentShader(const ShaderSpec &shaderSpec, bool useIntOutputs,
424 const std::map<std::string, int> &outLocationMap,
425 const std::string &inputPrefix, const std::string &outputPrefix)
426 {
427 std::ostringstream src;
428
429 src << "#version 450\n";
430
431 if (!shaderSpec.globalDeclarations.empty())
432 src << shaderSpec.globalDeclarations << "\n";
433
434 int locationNumber = 0;
435 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
436 ++output, ++locationNumber)
437 {
438 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
439 {
440 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
441 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
442 const glu::VarType intType(intBaseType, glu::PRECISION_HIGHP);
443
444 src << "layout(location = " << locationNumber << ") flat in "
445 << glu::declare(intType, inputPrefix + output->name) << ";\n";
446 }
447 else
448 src << "layout(location = " << locationNumber << ") flat in "
449 << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
450 }
451
452 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
453
454 src << "\nvoid main (void)\n{\n";
455
456 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
457
458 src << "}\n";
459
460 return src.str();
461 }
462
generateGeometryShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix,const bool pointSizeSupported)463 static std::string generateGeometryShader(const ShaderSpec &shaderSpec, const std::string &inputPrefix,
464 const std::string &outputPrefix, const bool pointSizeSupported)
465 {
466 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
467
468 std::ostringstream src;
469
470 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
471
472 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
473 src << "#extension GL_EXT_geometry_shader : require\n";
474
475 if (!shaderSpec.globalDeclarations.empty())
476 src << shaderSpec.globalDeclarations << "\n";
477
478 src << "layout(points) in;\n"
479 << "layout(points, max_vertices = 1) out;\n";
480
481 int locationNumber = 0;
482 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end();
483 ++input, ++locationNumber)
484 src << "layout(location = " << locationNumber << ") flat in "
485 << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
486
487 locationNumber = 0;
488 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
489 ++output, ++locationNumber)
490 {
491 DE_ASSERT(output->varType.isBasicType());
492
493 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
494 {
495 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
496 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
497 const glu::VarType intType(intBaseType, glu::PRECISION_HIGHP);
498
499 src << "layout(location = " << locationNumber << ") flat out "
500 << glu::declare(intType, outputPrefix + output->name) << ";\n";
501 }
502 else
503 src << "layout(location = " << locationNumber << ") flat out "
504 << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
505 }
506
507 src << "\n"
508 << "void main (void)\n"
509 << "{\n"
510 << " gl_Position = gl_in[0].gl_Position;\n"
511 << (pointSizeSupported ? " gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
512
513 // Fetch input variables
514 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
515 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
516
517 // Declare local output variables.
518 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
519 ++output)
520 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
521
522 src << "\n";
523
524 // Operation - indented to correct level.
525 {
526 std::istringstream opSrc(shaderSpec.source);
527 std::string line;
528
529 while (std::getline(opSrc, line))
530 src << "\t" << line << "\n";
531 }
532
533 // Assignments to outputs.
534 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
535 ++output)
536 {
537 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
538 {
539 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
540 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
541
542 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "("
543 << output->name << ");\n";
544 }
545 else
546 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
547 }
548
549 src << " EmitVertex();\n"
550 << " EndPrimitive();\n"
551 << "}\n";
552
553 return src.str();
554 }
555
generateFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)556 static std::string generateFragmentShader(const ShaderSpec &shaderSpec, bool useIntOutputs,
557 const std::map<std::string, int> &outLocationMap,
558 const std::string &inputPrefix, const std::string &outputPrefix)
559 {
560 std::ostringstream src;
561 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
562 if (!shaderSpec.globalDeclarations.empty())
563 src << shaderSpec.globalDeclarations << "\n";
564
565 int locationNumber = 0;
566 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end();
567 ++input, ++locationNumber)
568 {
569 src << "layout(location = " << locationNumber << ") flat in "
570 << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
571 }
572
573 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
574
575 src << "\nvoid main (void)\n{\n";
576
577 // Declare & fetch local input variables
578 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
579 {
580 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
581 {
582 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
583 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
584 }
585 else
586 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
587 }
588
589 // Declare output variables
590 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
591 ++output)
592 {
593 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
594 {
595 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
596 src << "\t" << tname << " " << output->name << ";\n";
597 const char *tname2 = glu::getDataTypeName(output->varType.getBasicType());
598 src << "\t" << tname2 << " "
599 << "packed_" << output->name << ";\n";
600 }
601 else
602 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
603 }
604
605 // Operation - indented to correct level.
606 {
607 std::istringstream opSrc(shaderSpec.source);
608 std::string line;
609
610 while (std::getline(opSrc, line))
611 src << "\t" << line << "\n";
612 }
613
614 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
615
616 src << "}\n";
617
618 return src.str();
619 }
620
621 // FragmentOutExecutor
622
623 class FragmentOutExecutor : public ShaderExecutor
624 {
625 public:
626 FragmentOutExecutor(Context &context, glu::ShaderType shaderType, const ShaderSpec &shaderSpec,
627 VkDescriptorSetLayout extraResourcesLayout);
628 virtual ~FragmentOutExecutor(void);
629
630 virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
631 VkDescriptorSet extraResources);
632
633 protected:
634 const glu::ShaderType m_shaderType;
635 const FragmentOutputLayout m_outputLayout;
636
637 private:
638 void bindAttributes(int numValues, const void *const *inputs);
639
640 void addAttribute(uint32_t bindingLocation, VkFormat format, uint32_t sizePerElement, uint32_t count,
641 const void *dataPtr);
642 // reinit render data members
643 virtual void clearRenderData(void);
644
645 const VkDescriptorSetLayout m_extraResourcesLayout;
646
647 std::vector<VkVertexInputBindingDescription> m_vertexBindingDescriptions;
648 std::vector<VkVertexInputAttributeDescription> m_vertexAttributeDescriptions;
649 std::vector<VkBufferSp> m_vertexBuffers;
650 std::vector<AllocationSp> m_vertexBufferAllocs;
651 };
652
computeFragmentOutputLayout(const std::vector<Symbol> & symbols)653 static FragmentOutputLayout computeFragmentOutputLayout(const std::vector<Symbol> &symbols)
654 {
655 FragmentOutputLayout ret;
656 int location = 0;
657
658 for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
659 {
660 const int numLocations = glu::getDataTypeNumLocations(it->varType.getBasicType());
661
662 TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
663 de::insert(ret.locationMap, it->name, location);
664 location += numLocations;
665
666 for (int ndx = 0; ndx < numLocations; ++ndx)
667 ret.locationSymbols.push_back(&*it);
668 }
669
670 return ret;
671 }
672
FragmentOutExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)673 FragmentOutExecutor::FragmentOutExecutor(Context &context, glu::ShaderType shaderType, const ShaderSpec &shaderSpec,
674 VkDescriptorSetLayout extraResourcesLayout)
675 : ShaderExecutor(context, shaderSpec)
676 , m_shaderType(shaderType)
677 , m_outputLayout(computeFragmentOutputLayout(m_shaderSpec.outputs))
678 , m_extraResourcesLayout(extraResourcesLayout)
679 {
680 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
681 const InstanceInterface &vki = m_context.getInstanceInterface();
682
683 // Input attributes
684 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
685 {
686 const Symbol &symbol = m_shaderSpec.inputs[inputNdx];
687 const glu::DataType basicType = symbol.varType.getBasicType();
688 const VkFormat format = getAttributeFormat(basicType);
689 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
690 if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
691 TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
692 }
693 }
694
~FragmentOutExecutor(void)695 FragmentOutExecutor::~FragmentOutExecutor(void)
696 {
697 }
698
computeVertexPositions(int numValues,const tcu::IVec2 & renderSize)699 static std::vector<tcu::Vec2> computeVertexPositions(int numValues, const tcu::IVec2 &renderSize)
700 {
701 std::vector<tcu::Vec2> positions(numValues);
702 for (int valNdx = 0; valNdx < numValues; valNdx++)
703 {
704 const int ix = valNdx % renderSize.x();
705 const int iy = valNdx / renderSize.x();
706 const float fx = -1.0f + 2.0f * ((float(ix) + 0.5f) / float(renderSize.x()));
707 const float fy = -1.0f + 2.0f * ((float(iy) + 0.5f) / float(renderSize.y()));
708
709 positions[valNdx] = tcu::Vec2(fx, fy);
710 }
711
712 return positions;
713 }
714
getRenderbufferFormatForOutput(const glu::VarType & outputType,bool useIntOutputs)715 static tcu::TextureFormat getRenderbufferFormatForOutput(const glu::VarType &outputType, bool useIntOutputs)
716 {
717 const tcu::TextureFormat::ChannelOrder channelOrderMap[] = {tcu::TextureFormat::R, tcu::TextureFormat::RG,
718 tcu::TextureFormat::RGBA, // No RGB variants available.
719 tcu::TextureFormat::RGBA};
720
721 const glu::DataType basicType = outputType.getBasicType();
722 const int numComps = glu::getDataTypeNumComponents(basicType);
723 tcu::TextureFormat::ChannelType channelType;
724
725 switch (glu::getDataTypeScalarType(basicType))
726 {
727 case glu::TYPE_UINT:
728 channelType = tcu::TextureFormat::UNSIGNED_INT32;
729 break;
730 case glu::TYPE_INT:
731 channelType = tcu::TextureFormat::SIGNED_INT32;
732 break;
733 case glu::TYPE_BOOL:
734 channelType = tcu::TextureFormat::SIGNED_INT32;
735 break;
736 case glu::TYPE_FLOAT:
737 channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT;
738 break;
739 case glu::TYPE_FLOAT16:
740 channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT;
741 break;
742 default:
743 throw tcu::InternalError("Invalid output type");
744 }
745
746 DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
747
748 return tcu::TextureFormat(channelOrderMap[numComps - 1], channelType);
749 }
750
getAttributeFormat(const glu::DataType dataType)751 static VkFormat getAttributeFormat(const glu::DataType dataType)
752 {
753 switch (dataType)
754 {
755 case glu::TYPE_FLOAT16:
756 return VK_FORMAT_R16_SFLOAT;
757 case glu::TYPE_FLOAT16_VEC2:
758 return VK_FORMAT_R16G16_SFLOAT;
759 case glu::TYPE_FLOAT16_VEC3:
760 return VK_FORMAT_R16G16B16_SFLOAT;
761 case glu::TYPE_FLOAT16_VEC4:
762 return VK_FORMAT_R16G16B16A16_SFLOAT;
763
764 case glu::TYPE_FLOAT:
765 return VK_FORMAT_R32_SFLOAT;
766 case glu::TYPE_FLOAT_VEC2:
767 return VK_FORMAT_R32G32_SFLOAT;
768 case glu::TYPE_FLOAT_VEC3:
769 return VK_FORMAT_R32G32B32_SFLOAT;
770 case glu::TYPE_FLOAT_VEC4:
771 return VK_FORMAT_R32G32B32A32_SFLOAT;
772
773 case glu::TYPE_INT:
774 return VK_FORMAT_R32_SINT;
775 case glu::TYPE_INT_VEC2:
776 return VK_FORMAT_R32G32_SINT;
777 case glu::TYPE_INT_VEC3:
778 return VK_FORMAT_R32G32B32_SINT;
779 case glu::TYPE_INT_VEC4:
780 return VK_FORMAT_R32G32B32A32_SINT;
781
782 case glu::TYPE_UINT:
783 return VK_FORMAT_R32_UINT;
784 case glu::TYPE_UINT_VEC2:
785 return VK_FORMAT_R32G32_UINT;
786 case glu::TYPE_UINT_VEC3:
787 return VK_FORMAT_R32G32B32_UINT;
788 case glu::TYPE_UINT_VEC4:
789 return VK_FORMAT_R32G32B32A32_UINT;
790
791 case glu::TYPE_FLOAT_MAT2:
792 return VK_FORMAT_R32G32_SFLOAT;
793 case glu::TYPE_FLOAT_MAT2X3:
794 return VK_FORMAT_R32G32B32_SFLOAT;
795 case glu::TYPE_FLOAT_MAT2X4:
796 return VK_FORMAT_R32G32B32A32_SFLOAT;
797 case glu::TYPE_FLOAT_MAT3X2:
798 return VK_FORMAT_R32G32_SFLOAT;
799 case glu::TYPE_FLOAT_MAT3:
800 return VK_FORMAT_R32G32B32_SFLOAT;
801 case glu::TYPE_FLOAT_MAT3X4:
802 return VK_FORMAT_R32G32B32A32_SFLOAT;
803 case glu::TYPE_FLOAT_MAT4X2:
804 return VK_FORMAT_R32G32_SFLOAT;
805 case glu::TYPE_FLOAT_MAT4X3:
806 return VK_FORMAT_R32G32B32_SFLOAT;
807 case glu::TYPE_FLOAT_MAT4:
808 return VK_FORMAT_R32G32B32A32_SFLOAT;
809 default:
810 DE_ASSERT(false);
811 return VK_FORMAT_UNDEFINED;
812 }
813 }
814
addAttribute(uint32_t bindingLocation,VkFormat format,uint32_t sizePerElement,uint32_t count,const void * dataPtr)815 void FragmentOutExecutor::addAttribute(uint32_t bindingLocation, VkFormat format, uint32_t sizePerElement,
816 uint32_t count, const void *dataPtr)
817 {
818 // Portability requires stride to be multiply of minVertexInputBindingStrideAlignment
819 // this value is usually 4 and current tests meet this requirement but
820 // if this changes in future then this limit should be verified in checkSupport
821 #ifndef CTS_USES_VULKANSC
822 if (m_context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
823 ((sizePerElement % m_context.getPortabilitySubsetProperties().minVertexInputBindingStrideAlignment) != 0))
824 {
825 DE_FATAL("stride is not multiply of minVertexInputBindingStrideAlignment");
826 }
827 #endif // CTS_USES_VULKANSC
828
829 // Add binding specification
830 const uint32_t binding = (uint32_t)m_vertexBindingDescriptions.size();
831 const VkVertexInputBindingDescription bindingDescription = {binding, sizePerElement, VK_VERTEX_INPUT_RATE_VERTEX};
832
833 m_vertexBindingDescriptions.push_back(bindingDescription);
834
835 // Add location and format specification
836 const VkVertexInputAttributeDescription attributeDescription = {
837 bindingLocation, // uint32_t location;
838 binding, // uint32_t binding;
839 format, // VkFormat format;
840 0u, // uint32_t offsetInBytes;
841 };
842
843 m_vertexAttributeDescriptions.push_back(attributeDescription);
844
845 // Upload data to buffer
846 const VkDevice vkDevice = m_context.getDevice();
847 const DeviceInterface &vk = m_context.getDeviceInterface();
848 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
849
850 const VkDeviceSize inputSize = sizePerElement * count;
851 const VkBufferCreateInfo vertexBufferParams = {
852 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
853 nullptr, // const void* pNext;
854 0u, // VkBufferCreateFlags flags;
855 inputSize, // VkDeviceSize size;
856 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, // VkBufferUsageFlags usage;
857 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
858 1u, // uint32_t queueFamilyCount;
859 &queueFamilyIndex // const uint32_t* pQueueFamilyIndices;
860 };
861
862 Move<VkBuffer> buffer = createBuffer(vk, vkDevice, &vertexBufferParams);
863 de::MovePtr<Allocation> alloc = m_context.getDefaultAllocator().allocate(
864 getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
865
866 VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
867
868 deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
869 flushAlloc(vk, vkDevice, *alloc);
870
871 m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer>>(new Unique<VkBuffer>(buffer)));
872 m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
873 }
874
bindAttributes(int numValues,const void * const * inputs)875 void FragmentOutExecutor::bindAttributes(int numValues, const void *const *inputs)
876 {
877 // Input attributes
878 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
879 {
880 const Symbol &symbol = m_shaderSpec.inputs[inputNdx];
881 const void *ptr = inputs[inputNdx];
882 const glu::DataType basicType = symbol.varType.getBasicType();
883 const int vecSize = glu::getDataTypeScalarSize(basicType);
884 const VkFormat format = getAttributeFormat(basicType);
885 int elementSize = 0;
886 int numAttrsToAdd = 1;
887
888 if (glu::isDataTypeDoubleOrDVec(basicType))
889 elementSize = sizeof(double);
890 if (glu::isDataTypeFloatOrVec(basicType))
891 elementSize = sizeof(float);
892 else if (glu::isDataTypeFloat16OrVec(basicType))
893 elementSize = sizeof(uint16_t);
894 else if (glu::isDataTypeIntOrIVec(basicType))
895 elementSize = sizeof(int);
896 else if (glu::isDataTypeUintOrUVec(basicType))
897 elementSize = sizeof(uint32_t);
898 else if (glu::isDataTypeMatrix(basicType))
899 {
900 int numRows = glu::getDataTypeMatrixNumRows(basicType);
901 int numCols = glu::getDataTypeMatrixNumColumns(basicType);
902
903 elementSize = numRows * numCols * (int)sizeof(float);
904 numAttrsToAdd = numCols;
905 }
906 else
907 DE_ASSERT(false);
908
909 // add attributes, in case of matrix every column is binded as an attribute
910 for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
911 {
912 addAttribute((uint32_t)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
913 }
914 }
915 }
916
clearRenderData(void)917 void FragmentOutExecutor::clearRenderData(void)
918 {
919 m_vertexBindingDescriptions.clear();
920 m_vertexAttributeDescriptions.clear();
921 m_vertexBuffers.clear();
922 m_vertexBufferAllocs.clear();
923 }
924
createEmptyDescriptorSetLayout(const DeviceInterface & vkd,VkDevice device)925 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout(const DeviceInterface &vkd, VkDevice device)
926 {
927 const VkDescriptorSetLayoutCreateInfo createInfo = {
928 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, (VkDescriptorSetLayoutCreateFlags)0, 0u, nullptr,
929 };
930 return createDescriptorSetLayout(vkd, device, &createInfo);
931 }
932
createEmptyDescriptorPool(const DeviceInterface & vkd,VkDevice device)933 static Move<VkDescriptorPool> createEmptyDescriptorPool(const DeviceInterface &vkd, VkDevice device)
934 {
935 const VkDescriptorPoolSize emptySize = {
936 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
937 1u,
938 };
939 const VkDescriptorPoolCreateInfo createInfo = {
940 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
941 nullptr,
942 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
943 1u,
944 1u,
945 &emptySize};
946 return createDescriptorPool(vkd, device, &createInfo);
947 }
948
allocateSingleDescriptorSet(const DeviceInterface & vkd,VkDevice device,VkDescriptorPool pool,VkDescriptorSetLayout layout)949 static Move<VkDescriptorSet> allocateSingleDescriptorSet(const DeviceInterface &vkd, VkDevice device,
950 VkDescriptorPool pool, VkDescriptorSetLayout layout)
951 {
952 const VkDescriptorSetAllocateInfo allocInfo = {
953 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr, pool, 1u, &layout,
954 };
955 return allocateDescriptorSet(vkd, device, &allocInfo);
956 }
957
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)958 void FragmentOutExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
959 VkDescriptorSet extraResources)
960 {
961 const VkDevice vkDevice = m_context.getDevice();
962 const DeviceInterface &vk = m_context.getDeviceInterface();
963 const VkQueue queue = m_context.getUniversalQueue();
964 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
965 Allocator &memAlloc = m_context.getDefaultAllocator();
966
967 const uint32_t renderSizeX = de::min(static_cast<uint32_t>(128), (uint32_t)numValues);
968 const uint32_t renderSizeY =
969 ((uint32_t)numValues / renderSizeX) + (((uint32_t)numValues % renderSizeX != 0) ? 1u : 0u);
970 const tcu::UVec2 renderSize(renderSizeX, renderSizeY);
971 std::vector<tcu::Vec2> positions;
972
973 const bool useGeometryShader = m_shaderType == glu::SHADERTYPE_GEOMETRY;
974
975 std::vector<VkImageSp> colorImages;
976 std::vector<VkImageMemoryBarrier> colorImagePreRenderBarriers;
977 std::vector<VkImageMemoryBarrier> colorImagePostRenderBarriers;
978 std::vector<AllocationSp> colorImageAllocs;
979 std::vector<VkAttachmentDescription> attachments;
980 std::vector<VkClearValue> attachmentClearValues;
981 std::vector<VkImageViewSp> colorImageViews;
982
983 std::vector<VkPipelineColorBlendAttachmentState> colorBlendAttachmentStates;
984 std::vector<VkAttachmentReference> colorAttachmentReferences;
985
986 Move<VkRenderPass> renderPass;
987 Move<VkFramebuffer> framebuffer;
988 Move<VkPipelineLayout> pipelineLayout;
989 Move<VkPipeline> graphicsPipeline;
990
991 Move<VkShaderModule> vertexShaderModule;
992 Move<VkShaderModule> geometryShaderModule;
993 Move<VkShaderModule> fragmentShaderModule;
994
995 Move<VkCommandPool> cmdPool;
996 Move<VkCommandBuffer> cmdBuffer;
997
998 Unique<VkDescriptorSetLayout> emptyDescriptorSetLayout(createEmptyDescriptorSetLayout(vk, vkDevice));
999 Unique<VkDescriptorPool> emptyDescriptorPool(createEmptyDescriptorPool(vk, vkDevice));
1000 Unique<VkDescriptorSet> emptyDescriptorSet(
1001 allocateSingleDescriptorSet(vk, vkDevice, *emptyDescriptorPool, *emptyDescriptorSetLayout));
1002
1003 clearRenderData();
1004
1005 // Compute positions - 1px points are used to drive fragment shading.
1006 positions = computeVertexPositions(numValues, renderSize.cast<int>());
1007
1008 // Bind attributes
1009 addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (uint32_t)positions.size(), &positions[0]);
1010 bindAttributes(numValues, inputs);
1011
1012 // Create color images
1013 {
1014 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {
1015 VK_FALSE, // VkBool32 blendEnable;
1016 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
1017 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
1018 VK_BLEND_OP_ADD, // VkBlendOp blendOpColor;
1019 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
1020 VK_BLEND_FACTOR_ZERO, // VkBlendFactor destAlphaBlendFactor;
1021 VK_BLEND_OP_ADD, // VkBlendOp blendOpAlpha;
1022 (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT |
1023 VK_COLOR_COMPONENT_A_BIT) // VkColorComponentFlags colorWriteMask;
1024 };
1025
1026 for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
1027 {
1028 const bool isDouble = glu::isDataTypeDoubleOrDVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1029 const bool isFloat = isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1030 const bool isFloat16b = glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1031 const bool isSigned = isDataTypeIntOrIVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1032 const bool isBool = isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1033 const VkFormat colorFormat =
1034 (isDouble ? VK_FORMAT_R64G64B64A64_SFLOAT :
1035 (isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT :
1036 (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT :
1037 (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT :
1038 VK_FORMAT_R32G32B32A32_UINT))));
1039
1040 {
1041 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(
1042 m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
1043 if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
1044 TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
1045 }
1046
1047 const VkImageCreateInfo colorImageParams = {
1048 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1049 nullptr, // const void* pNext;
1050 0u, // VkImageCreateFlags flags;
1051 VK_IMAGE_TYPE_2D, // VkImageType imageType;
1052 colorFormat, // VkFormat format;
1053 {renderSize.x(), renderSize.y(), 1u}, // VkExtent3D extent;
1054 1u, // uint32_t mipLevels;
1055 1u, // uint32_t arraySize;
1056 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1057 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
1058 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
1059 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1060 1u, // uint32_t queueFamilyCount;
1061 &queueFamilyIndex, // const uint32_t* pQueueFamilyIndices;
1062 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1063 };
1064
1065 const VkAttachmentDescription colorAttachmentDescription = {
1066 0u, // VkAttachmentDescriptorFlags flags;
1067 colorFormat, // VkFormat format;
1068 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1069 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
1070 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
1071 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
1072 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
1073 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout initialLayout;
1074 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout;
1075 };
1076
1077 Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1078 colorImages.push_back(de::SharedPtr<Unique<VkImage>>(new Unique<VkImage>(colorImage)));
1079 attachmentClearValues.push_back(getDefaultClearColor());
1080
1081 // Allocate and bind color image memory
1082 {
1083 de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(
1084 getImageMemoryRequirements(vk, vkDevice, *((const VkImage *)colorImages.back().get())),
1085 MemoryRequirement::Any);
1086 VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(),
1087 colorImageAlloc->getOffset()));
1088 colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1089
1090 attachments.push_back(colorAttachmentDescription);
1091 colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1092
1093 const VkAttachmentReference colorAttachmentReference = {
1094 (uint32_t)(colorImages.size() - 1), // uint32_t attachment;
1095 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
1096 };
1097
1098 colorAttachmentReferences.push_back(colorAttachmentReference);
1099 }
1100
1101 // Create color attachment view
1102 {
1103 const VkImageViewCreateInfo colorImageViewParams = {
1104 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
1105 nullptr, // const void* pNext;
1106 0u, // VkImageViewCreateFlags flags;
1107 colorImages.back().get()->get(), // VkImage image;
1108 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
1109 colorFormat, // VkFormat format;
1110 {
1111 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
1112 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
1113 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
1114 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
1115 }, // VkComponentMapping components;
1116 {
1117 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
1118 0u, // uint32_t baseMipLevel;
1119 1u, // uint32_t mipLevels;
1120 0u, // uint32_t baseArraySlice;
1121 1u // uint32_t arraySize;
1122 } // VkImageSubresourceRange subresourceRange;
1123 };
1124
1125 Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1126 colorImageViews.push_back(de::SharedPtr<Unique<VkImageView>>(new Unique<VkImageView>(colorImageView)));
1127
1128 const VkImageMemoryBarrier colorImagePreRenderBarrier = {
1129 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1130 nullptr, // pNext
1131 0u, // srcAccessMask
1132 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // dstAccessMask
1133 VK_IMAGE_LAYOUT_UNDEFINED, // oldLayout
1134 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // newLayout
1135 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1136 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1137 colorImages.back().get()->get(), // image
1138 {
1139 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1140 0u, // baseMipLevel
1141 1u, // levelCount
1142 0u, // baseArrayLayer
1143 1u, // layerCount
1144 } // subresourceRange
1145 };
1146 colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1147
1148 const VkImageMemoryBarrier colorImagePostRenderBarrier = {
1149 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1150 nullptr, // pNext
1151 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // srcAccessMask
1152 VK_ACCESS_TRANSFER_READ_BIT, // dstAccessMask
1153 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // oldLayout
1154 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // newLayout
1155 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1156 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1157 colorImages.back().get()->get(), // image
1158 {
1159 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1160 0u, // baseMipLevel
1161 1u, // levelCount
1162 0u, // baseArrayLayer
1163 1u, // layerCount
1164 } // subresourceRange
1165 };
1166 colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1167 }
1168 }
1169 }
1170
1171 // Create render pass
1172 {
1173 const VkSubpassDescription subpassDescription = {
1174 0u, // VkSubpassDescriptionFlags flags;
1175 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
1176 0u, // uint32_t inputCount;
1177 nullptr, // const VkAttachmentReference* pInputAttachments;
1178 (uint32_t)colorImages.size(), // uint32_t colorCount;
1179 &colorAttachmentReferences[0], // const VkAttachmentReference* colorAttachments;
1180 nullptr, // const VkAttachmentReference* resolveAttachments;
1181 nullptr, // VkAttachmentReference depthStencilAttachment;
1182 0u, // uint32_t preserveCount;
1183 nullptr // const VkAttachmentReference* pPreserveAttachments;
1184 };
1185
1186 const VkRenderPassCreateInfo renderPassParams = {
1187 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
1188 nullptr, // const void* pNext;
1189 (VkRenderPassCreateFlags)0, // VkRenderPassCreateFlags flags;
1190 (uint32_t)attachments.size(), // uint32_t attachmentCount;
1191 &attachments[0], // const VkAttachmentDescription* pAttachments;
1192 1u, // uint32_t subpassCount;
1193 &subpassDescription, // const VkSubpassDescription* pSubpasses;
1194 0u, // uint32_t dependencyCount;
1195 nullptr // const VkSubpassDependency* pDependencies;
1196 };
1197
1198 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1199 }
1200
1201 // Create framebuffer
1202 {
1203 std::vector<VkImageView> views(colorImageViews.size());
1204 for (size_t i = 0; i < colorImageViews.size(); i++)
1205 {
1206 views[i] = colorImageViews[i].get()->get();
1207 }
1208
1209 const VkFramebufferCreateInfo framebufferParams = {
1210 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
1211 nullptr, // const void* pNext;
1212 0u, // VkFramebufferCreateFlags flags;
1213 *renderPass, // VkRenderPass renderPass;
1214 (uint32_t)views.size(), // uint32_t attachmentCount;
1215 &views[0], // const VkImageView* pAttachments;
1216 (uint32_t)renderSize.x(), // uint32_t width;
1217 (uint32_t)renderSize.y(), // uint32_t height;
1218 1u // uint32_t layers;
1219 };
1220
1221 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1222 }
1223
1224 // Create pipeline layout
1225 {
1226 const VkDescriptorSetLayout setLayouts[] = {*emptyDescriptorSetLayout, m_extraResourcesLayout};
1227 const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
1228 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
1229 nullptr, // const void* pNext;
1230 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
1231 (m_extraResourcesLayout != VK_NULL_HANDLE ? 2u : 0u), // uint32_t descriptorSetCount;
1232 setLayouts, // const VkDescriptorSetLayout* pSetLayouts;
1233 0u, // uint32_t pushConstantRangeCount;
1234 nullptr // const VkPushConstantRange* pPushConstantRanges;
1235 };
1236
1237 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1238 }
1239
1240 // Create shaders
1241 {
1242 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1243 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1244
1245 if (useGeometryShader)
1246 {
1247 if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1248 geometryShaderModule =
1249 createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1250 else
1251 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1252 }
1253 }
1254
1255 // Create pipeline
1256 {
1257 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams = {
1258 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1259 nullptr, // const void* pNext;
1260 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
1261 (uint32_t)m_vertexBindingDescriptions.size(), // uint32_t bindingCount;
1262 &m_vertexBindingDescriptions[0], // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1263 (uint32_t)m_vertexAttributeDescriptions.size(), // uint32_t attributeCount;
1264 &m_vertexAttributeDescriptions[0], // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
1265 };
1266
1267 const std::vector<VkViewport> viewports(1, makeViewport(renderSize));
1268 const std::vector<VkRect2D> scissors(1, makeRect2D(renderSize));
1269
1270 const VkPipelineColorBlendStateCreateInfo colorBlendStateParams = {
1271 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
1272 nullptr, // const void* pNext;
1273 (VkPipelineColorBlendStateCreateFlags)0, // VkPipelineColorBlendStateCreateFlags flags;
1274 VK_FALSE, // VkBool32 logicOpEnable;
1275 VK_LOGIC_OP_COPY, // VkLogicOp logicOp;
1276 (uint32_t)colorBlendAttachmentStates.size(), // uint32_t attachmentCount;
1277 &colorBlendAttachmentStates[0], // const VkPipelineColorBlendAttachmentState* pAttachments;
1278 {0.0f, 0.0f, 0.0f, 0.0f} // float blendConst[4];
1279 };
1280
1281 graphicsPipeline = makeGraphicsPipeline(
1282 vk, // const DeviceInterface& vk
1283 vkDevice, // const VkDevice device
1284 *pipelineLayout, // const VkPipelineLayout pipelineLayout
1285 *vertexShaderModule, // const VkShaderModule vertexShaderModule
1286 VK_NULL_HANDLE, // const VkShaderModule tessellationControlShaderModule
1287 VK_NULL_HANDLE, // const VkShaderModule tessellationEvalShaderModule
1288 useGeometryShader ? *geometryShaderModule :
1289 VK_NULL_HANDLE, // const VkShaderModule geometryShaderModule
1290 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
1291 *renderPass, // const VkRenderPass renderPass
1292 viewports, // const std::vector<VkViewport>& viewports
1293 scissors, // const std::vector<VkRect2D>& scissors
1294 VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // const VkPrimitiveTopology topology
1295 0u, // const uint32_t subpass
1296 0u, // const uint32_t patchControlPoints
1297 &vertexInputStateParams, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
1298 nullptr, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1299 nullptr, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
1300 nullptr, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
1301 &colorBlendStateParams); // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
1302 }
1303
1304 // Create command pool
1305 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1306
1307 // Create command buffer
1308 {
1309 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1310
1311 beginCommandBuffer(vk, *cmdBuffer);
1312
1313 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1314 vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0, nullptr, 0,
1315 nullptr, (uint32_t)colorImagePreRenderBarriers.size(),
1316 colorImagePreRenderBarriers.empty() ? nullptr : &colorImagePreRenderBarriers[0]);
1317 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()),
1318 (uint32_t)attachmentClearValues.size(), &attachmentClearValues[0]);
1319
1320 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1321
1322 if (m_extraResourcesLayout != VK_NULL_HANDLE)
1323 {
1324 DE_ASSERT(extraResources != VK_NULL_HANDLE);
1325 const VkDescriptorSet descriptorSets[] = {*emptyDescriptorSet, extraResources};
1326 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u,
1327 DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, nullptr);
1328 }
1329 else
1330 DE_ASSERT(extraResources == VK_NULL_HANDLE);
1331
1332 const uint32_t numberOfVertexAttributes = (uint32_t)m_vertexBuffers.size();
1333
1334 std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1335
1336 std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1337 for (size_t i = 0; i < numberOfVertexAttributes; i++)
1338 {
1339 buffers[i] = m_vertexBuffers[i].get()->get();
1340 }
1341
1342 vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1343 vk.cmdDraw(*cmdBuffer, (uint32_t)positions.size(), 1u, 0u, 0u);
1344
1345 endRenderPass(vk, *cmdBuffer);
1346 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1347 vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, nullptr, 0, nullptr,
1348 (uint32_t)colorImagePostRenderBarriers.size(),
1349 colorImagePostRenderBarriers.empty() ? nullptr : &colorImagePostRenderBarriers[0]);
1350
1351 endCommandBuffer(vk, *cmdBuffer);
1352 }
1353
1354 // Execute Draw
1355 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1356
1357 // Read back result and output
1358 {
1359 const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(uint32_t) * renderSize.x() * renderSize.y());
1360 const VkBufferCreateInfo readImageBufferParams = {
1361 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1362 nullptr, // const void* pNext;
1363 0u, // VkBufferCreateFlags flags;
1364 imageSizeBytes, // VkDeviceSize size;
1365 VK_BUFFER_USAGE_TRANSFER_DST_BIT, // VkBufferUsageFlags usage;
1366 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1367 1u, // uint32_t queueFamilyCount;
1368 &queueFamilyIndex, // const uint32_t* pQueueFamilyIndices;
1369 };
1370
1371 // constants for image copy
1372 Move<VkCommandPool> copyCmdPool =
1373 createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1374
1375 const VkBufferImageCopy copyParams = {
1376 0u, // VkDeviceSize bufferOffset;
1377 (uint32_t)renderSize.x(), // uint32_t bufferRowLength;
1378 (uint32_t)renderSize.y(), // uint32_t bufferImageHeight;
1379 {
1380 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspect aspect;
1381 0u, // uint32_t mipLevel;
1382 0u, // uint32_t arraySlice;
1383 1u, // uint32_t arraySize;
1384 }, // VkImageSubresource imageSubresource;
1385 {0u, 0u, 0u}, // VkOffset3D imageOffset;
1386 {renderSize.x(), renderSize.y(), 1u} // VkExtent3D imageExtent;
1387 };
1388
1389 // Read back pixels.
1390 for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1391 {
1392 const Symbol &output = m_shaderSpec.outputs[outNdx];
1393 const int outSize = output.varType.getScalarSize();
1394 const int outVecSize = glu::getDataTypeNumComponents(output.varType.getBasicType());
1395 const int outNumLocs = glu::getDataTypeNumLocations(output.varType.getBasicType());
1396 const int outLocation = de::lookup(m_outputLayout.locationMap, output.name);
1397
1398 for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1399 {
1400 tcu::TextureLevel tmpBuf;
1401 const tcu::TextureFormat format = getRenderbufferFormatForOutput(output.varType, false);
1402 const tcu::TextureFormat readFormat(tcu::TextureFormat::RGBA, format.type);
1403 const Unique<VkBuffer> readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1404 const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(
1405 getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1406
1407 VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(),
1408 readImageBufferMemory->getOffset()));
1409
1410 // Copy image to buffer
1411 {
1412
1413 Move<VkCommandBuffer> copyCmdBuffer =
1414 allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1415
1416 beginCommandBuffer(vk, *copyCmdBuffer);
1417 vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(),
1418 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, ©Params);
1419
1420 // Insert a barrier so data written by the transfer is available to the host
1421 {
1422 const VkBufferMemoryBarrier barrier = {
1423 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
1424 nullptr, // const void* pNext;
1425 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags srcAccessMask;
1426 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
1427 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
1428 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
1429 *readImageBuffer, // VkBuffer buffer;
1430 0, // VkDeviceSize offset;
1431 VK_WHOLE_SIZE, // VkDeviceSize size;
1432 };
1433
1434 vk.cmdPipelineBarrier(*copyCmdBuffer, vk::VK_PIPELINE_STAGE_TRANSFER_BIT,
1435 vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, nullptr, 1,
1436 &barrier, 0, nullptr);
1437 }
1438
1439 endCommandBuffer(vk, *copyCmdBuffer);
1440
1441 submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1442 }
1443
1444 invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1445
1446 tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1447
1448 const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1449 const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1,
1450 readImageBufferMemory->getHostPtr());
1451
1452 tcu::copy(tmpBuf.getAccess(), resultAccess);
1453
1454 if (isOutput16Bit(static_cast<size_t>(outNdx)))
1455 {
1456 uint16_t *dstPtrBase = static_cast<uint16_t *>(outputs[outNdx]);
1457 if (outSize == 4 && outNumLocs == 1)
1458 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(),
1459 numValues * outVecSize * sizeof(uint16_t));
1460 else
1461 {
1462 for (int valNdx = 0; valNdx < numValues; valNdx++)
1463 {
1464 const uint16_t *srcPtr = (const uint16_t *)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1465 uint16_t *dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1466 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(uint16_t));
1467 }
1468 }
1469 }
1470 else
1471 {
1472 uint32_t *dstPtrBase = static_cast<uint32_t *>(outputs[outNdx]);
1473 if (outSize == 4 && outNumLocs == 1)
1474 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(),
1475 numValues * outVecSize * sizeof(uint32_t));
1476 else
1477 {
1478 for (int valNdx = 0; valNdx < numValues; valNdx++)
1479 {
1480 const uint32_t *srcPtr = (const uint32_t *)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1481 uint32_t *dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1482 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(uint32_t));
1483 }
1484 }
1485 }
1486 }
1487 }
1488 }
1489 }
1490
1491 // VertexShaderExecutor
1492
1493 class VertexShaderExecutor : public FragmentOutExecutor
1494 {
1495 public:
1496 VertexShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1497 virtual ~VertexShaderExecutor(void);
1498
1499 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &dst);
1500 };
1501
VertexShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1502 VertexShaderExecutor::VertexShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
1503 VkDescriptorSetLayout extraResourcesLayout)
1504 : FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1505 {
1506 }
1507
~VertexShaderExecutor(void)1508 VertexShaderExecutor::~VertexShaderExecutor(void)
1509 {
1510 }
1511
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1512 void VertexShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
1513 {
1514 const FragmentOutputLayout outputLayout(computeFragmentOutputLayout(shaderSpec.outputs));
1515
1516 programCollection.glslSources.add("vert")
1517 << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1518 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1519 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(
1520 shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_"))
1521 << shaderSpec.buildOptions;
1522 }
1523
1524 // GeometryShaderExecutor
1525
1526 class GeometryShaderExecutor : public FragmentOutExecutor
1527 {
1528 public:
1529 GeometryShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1530 virtual ~GeometryShaderExecutor(void);
1531
1532 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
1533 };
1534
GeometryShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1535 GeometryShaderExecutor::GeometryShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
1536 VkDescriptorSetLayout extraResourcesLayout)
1537 : FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1538 {
1539 const VkPhysicalDeviceFeatures &features = context.getDeviceFeatures();
1540
1541 if (!features.geometryShader)
1542 TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1543 }
1544
~GeometryShaderExecutor(void)1545 GeometryShaderExecutor::~GeometryShaderExecutor(void)
1546 {
1547 }
1548
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1549 void GeometryShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
1550 {
1551 const FragmentOutputLayout outputLayout(computeFragmentOutputLayout(shaderSpec.outputs));
1552
1553 programCollection.glslSources.add("vert")
1554 << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1555
1556 programCollection.glslSources.add("geom")
1557 << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false))
1558 << shaderSpec.buildOptions;
1559 programCollection.glslSources.add("geom_point_size")
1560 << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true))
1561 << shaderSpec.buildOptions;
1562
1563 /* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1564 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(
1565 shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_"))
1566 << shaderSpec.buildOptions;
1567 }
1568
1569 // FragmentShaderExecutor
1570
1571 class FragmentShaderExecutor : public FragmentOutExecutor
1572 {
1573 public:
1574 FragmentShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1575 virtual ~FragmentShaderExecutor(void);
1576
1577 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
1578 };
1579
FragmentShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1580 FragmentShaderExecutor::FragmentShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
1581 VkDescriptorSetLayout extraResourcesLayout)
1582 : FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1583 {
1584 }
1585
~FragmentShaderExecutor(void)1586 FragmentShaderExecutor::~FragmentShaderExecutor(void)
1587 {
1588 }
1589
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1590 void FragmentShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
1591 {
1592 const FragmentOutputLayout outputLayout(computeFragmentOutputLayout(shaderSpec.outputs));
1593
1594 programCollection.glslSources.add("vert")
1595 << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1596 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1597 programCollection.glslSources.add("frag")
1598 << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_"))
1599 << shaderSpec.buildOptions;
1600 }
1601
1602 // Shared utilities for compute and tess executors
1603
getVecStd430ByteAlignment(glu::DataType type)1604 static uint32_t getVecStd430ByteAlignment(glu::DataType type)
1605 {
1606 uint32_t baseSize;
1607
1608 switch (glu::getDataTypeScalarType(type))
1609 {
1610 case glu::TYPE_FLOAT16:
1611 baseSize = 2u;
1612 break;
1613 case glu::TYPE_DOUBLE:
1614 baseSize = 8u;
1615 break;
1616 default:
1617 baseSize = 4u;
1618 break;
1619 }
1620
1621 switch (glu::getDataTypeScalarSize(type))
1622 {
1623 case 1:
1624 return baseSize;
1625 case 2:
1626 return baseSize * 2u;
1627 case 3: // fallthrough.
1628 case 4:
1629 return baseSize * 4u;
1630 default:
1631 DE_ASSERT(false);
1632 return 0u;
1633 }
1634 }
1635
1636 class BufferIoExecutor : public ShaderExecutor
1637 {
1638 public:
1639 BufferIoExecutor(Context &context, const ShaderSpec &shaderSpec);
1640 virtual ~BufferIoExecutor(void);
1641
1642 protected:
1643 enum
1644 {
1645 INPUT_BUFFER_BINDING = 0,
1646 OUTPUT_BUFFER_BINDING = 1,
1647 };
1648
1649 void initBuffers(int numValues);
getInputBuffer(void) const1650 VkBuffer getInputBuffer(void) const
1651 {
1652 return *m_inputBuffer;
1653 }
getOutputBuffer(void) const1654 VkBuffer getOutputBuffer(void) const
1655 {
1656 return *m_outputBuffer;
1657 }
getInputStride(void) const1658 uint32_t getInputStride(void) const
1659 {
1660 return getLayoutStride(m_inputLayout);
1661 }
getOutputStride(void) const1662 uint32_t getOutputStride(void) const
1663 {
1664 return getLayoutStride(m_outputLayout);
1665 }
1666
1667 void uploadInputBuffer(const void *const *inputPtrs, int numValues, bool packFloat16Bit);
1668 void readOutputBuffer(void *const *outputPtrs, int numValues);
1669
1670 static void declareBufferBlocks(std::ostream &src, const ShaderSpec &spec);
1671 static void generateExecBufferIo(std::ostream &src, const ShaderSpec &spec, const char *invocationNdxName);
1672
1673 protected:
1674 Move<VkBuffer> m_inputBuffer;
1675 Move<VkBuffer> m_outputBuffer;
1676
1677 private:
1678 struct VarLayout
1679 {
1680 uint32_t offset;
1681 uint32_t stride;
1682 uint32_t matrixStride;
1683
VarLayoutvkt::shaderexecutor::__anon1a79f0250111::BufferIoExecutor::VarLayout1684 VarLayout(void) : offset(0), stride(0), matrixStride(0)
1685 {
1686 }
1687 };
1688
1689 static void computeVarLayout(const std::vector<Symbol> &symbols, std::vector<VarLayout> *layout);
1690 static uint32_t getLayoutStride(const vector<VarLayout> &layout);
1691
1692 static void copyToBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1693 const void *srcBasePtr, void *dstBasePtr, bool packFloat16Bit);
1694 static void copyFromBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1695 const void *srcBasePtr, void *dstBasePtr);
1696
1697 de::MovePtr<Allocation> m_inputAlloc;
1698 de::MovePtr<Allocation> m_outputAlloc;
1699
1700 vector<VarLayout> m_inputLayout;
1701 vector<VarLayout> m_outputLayout;
1702 };
1703
BufferIoExecutor(Context & context,const ShaderSpec & shaderSpec)1704 BufferIoExecutor::BufferIoExecutor(Context &context, const ShaderSpec &shaderSpec) : ShaderExecutor(context, shaderSpec)
1705 {
1706 computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1707 computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1708 }
1709
~BufferIoExecutor(void)1710 BufferIoExecutor::~BufferIoExecutor(void)
1711 {
1712 }
1713
getLayoutStride(const vector<VarLayout> & layout)1714 inline uint32_t BufferIoExecutor::getLayoutStride(const vector<VarLayout> &layout)
1715 {
1716 return layout.empty() ? 0 : layout[0].stride;
1717 }
1718
computeVarLayout(const std::vector<Symbol> & symbols,std::vector<VarLayout> * layout)1719 void BufferIoExecutor::computeVarLayout(const std::vector<Symbol> &symbols, std::vector<VarLayout> *layout)
1720 {
1721 uint32_t maxAlignment = 0;
1722 uint32_t curOffset = 0;
1723
1724 DE_ASSERT(layout != nullptr);
1725 DE_ASSERT(layout->empty());
1726 layout->resize(symbols.size());
1727
1728 for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1729 {
1730 const Symbol &symbol = symbols[varNdx];
1731 const glu::DataType basicType = symbol.varType.getBasicType();
1732 VarLayout &layoutEntry = (*layout)[varNdx];
1733
1734 if (glu::isDataTypeScalarOrVector(basicType))
1735 {
1736 const uint32_t alignment = getVecStd430ByteAlignment(basicType);
1737 const uint32_t size =
1738 (uint32_t)glu::getDataTypeScalarSize(basicType) *
1739 (isDataTypeDoubleType(basicType) ?
1740 (int)(sizeof(uint64_t)) :
1741 (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(uint16_t) : (int)sizeof(uint32_t)));
1742
1743 curOffset = (uint32_t)deAlign32((int)curOffset, (int)alignment);
1744 maxAlignment = de::max(maxAlignment, alignment);
1745
1746 layoutEntry.offset = curOffset;
1747 layoutEntry.matrixStride = 0;
1748
1749 curOffset += size;
1750 }
1751 else if (glu::isDataTypeMatrix(basicType))
1752 {
1753 const int numVecs = glu::getDataTypeMatrixNumColumns(basicType);
1754 const glu::DataType vecType =
1755 glu::getDataTypeVector(glu::getDataTypeScalarType(basicType), glu::getDataTypeMatrixNumRows(basicType));
1756 const uint32_t vecAlignment = getVecStd430ByteAlignment(vecType);
1757
1758 curOffset = (uint32_t)deAlign32((int)curOffset, (int)vecAlignment);
1759 maxAlignment = de::max(maxAlignment, vecAlignment);
1760
1761 layoutEntry.offset = curOffset;
1762 layoutEntry.matrixStride = vecAlignment;
1763
1764 curOffset += vecAlignment * numVecs;
1765 }
1766 else
1767 DE_ASSERT(false);
1768 }
1769
1770 {
1771 const uint32_t totalSize = (uint32_t)deAlign32(curOffset, maxAlignment);
1772
1773 for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1774 varIter->stride = totalSize;
1775 }
1776 }
1777
declareBufferBlocks(std::ostream & src,const ShaderSpec & spec)1778 void BufferIoExecutor::declareBufferBlocks(std::ostream &src, const ShaderSpec &spec)
1779 {
1780 // Input struct
1781 if (!spec.inputs.empty())
1782 {
1783 glu::StructType inputStruct("Inputs");
1784 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1785 inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1786 src << glu::declare(&inputStruct) << ";\n";
1787 }
1788
1789 // Output struct
1790 {
1791 glu::StructType outputStruct("Outputs");
1792 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1793 outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1794 src << glu::declare(&outputStruct) << ";\n";
1795 }
1796
1797 src << "\n";
1798
1799 if (!spec.inputs.empty())
1800 {
1801 src << "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1802 << "{\n"
1803 << " Inputs inputs[];\n"
1804 << "};\n";
1805 }
1806
1807 src << "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1808 << "{\n"
1809 << " Outputs outputs[];\n"
1810 << "};\n"
1811 << "\n";
1812 }
1813
generateExecBufferIo(std::ostream & src,const ShaderSpec & spec,const char * invocationNdxName)1814 void BufferIoExecutor::generateExecBufferIo(std::ostream &src, const ShaderSpec &spec, const char *invocationNdxName)
1815 {
1816 std::string tname;
1817 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1818 {
1819 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1820 if (f16BitTest)
1821 {
1822 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1823 }
1824 else
1825 {
1826 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1827 }
1828 src << "\t" << tname << " " << symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]."
1829 << symIter->name << ");\n";
1830 }
1831
1832 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1833 {
1834 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1835 if (f16BitTest)
1836 {
1837 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1838 }
1839 else
1840 {
1841 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1842 }
1843 src << "\t" << tname << " " << symIter->name << ";\n";
1844 if (f16BitTest)
1845 {
1846 const char *ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1847 src << "\t" << ttname << " "
1848 << "packed_" << symIter->name << ";\n";
1849 }
1850 }
1851
1852 src << "\n";
1853
1854 {
1855 std::istringstream opSrc(spec.source);
1856 std::string line;
1857
1858 while (std::getline(opSrc, line))
1859 src << "\t" << line << "\n";
1860 }
1861
1862 if (spec.packFloat16Bit)
1863 packFloat16Bit(src, spec.outputs);
1864
1865 src << "\n";
1866 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1867 {
1868 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1869 if (f16BitTest)
1870 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1871 else
1872 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1873 }
1874 }
1875
copyToBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr,bool packFloat16Bit)1876 void BufferIoExecutor::copyToBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1877 const void *srcBasePtr, void *dstBasePtr, bool packFloat16Bit)
1878 {
1879 if (varType.isBasicType())
1880 {
1881 const glu::DataType basicType = varType.getBasicType();
1882 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1883 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1884 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1885 const int numComps = scalarSize / numVecs;
1886 const int size = (glu::isDataTypeDoubleType(basicType) ?
1887 (int)sizeof(uint64_t) :
1888 (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(uint16_t) : (int)sizeof(uint32_t)));
1889
1890 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1891 {
1892 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1893 {
1894 const int srcOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1895 const int dstOffset =
1896 layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1897 const uint8_t *srcPtr = (const uint8_t *)srcBasePtr + srcOffset;
1898 uint8_t *dstPtr = (uint8_t *)dstBasePtr + dstOffset;
1899
1900 if (packFloat16Bit)
1901 {
1902 // Convert the float values to 16 bit and store in the lower 16 bits of 32 bit ints.
1903 for (int cmpNdx = 0; cmpNdx < numComps; ++cmpNdx)
1904 {
1905 deFloat16 f16vals[2] = {};
1906 f16vals[0] = deFloat32To16Round(((float *)srcPtr)[cmpNdx], DE_ROUNDINGMODE_TO_ZERO);
1907 deMemcpy(dstPtr + cmpNdx * size, &f16vals[0], size);
1908 }
1909 }
1910 else
1911 {
1912 deMemcpy(dstPtr, srcPtr, size * numComps);
1913 }
1914 }
1915 }
1916 }
1917 else
1918 throw tcu::InternalError("Unsupported type");
1919 }
1920
copyFromBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1921 void BufferIoExecutor::copyFromBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1922 const void *srcBasePtr, void *dstBasePtr)
1923 {
1924 if (varType.isBasicType())
1925 {
1926 const glu::DataType basicType = varType.getBasicType();
1927 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1928 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1929 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1930 const int numComps = scalarSize / numVecs;
1931
1932 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1933 {
1934 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1935 {
1936 const int size =
1937 (glu::isDataTypeDoubleType(basicType) ?
1938 (int)sizeof(uint64_t) :
1939 (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(uint16_t) : (int)sizeof(uint32_t)));
1940 const int srcOffset =
1941 layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1942 const int dstOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1943 const uint8_t *srcPtr = (const uint8_t *)srcBasePtr + srcOffset;
1944 uint8_t *dstPtr = (uint8_t *)dstBasePtr + dstOffset;
1945
1946 deMemcpy(dstPtr, srcPtr, size * numComps);
1947 }
1948 }
1949 }
1950 else
1951 throw tcu::InternalError("Unsupported type");
1952 }
1953
uploadInputBuffer(const void * const * inputPtrs,int numValues,bool packFloat16Bit)1954 void BufferIoExecutor::uploadInputBuffer(const void *const *inputPtrs, int numValues, bool packFloat16Bit)
1955 {
1956 const VkDevice vkDevice = m_context.getDevice();
1957 const DeviceInterface &vk = m_context.getDeviceInterface();
1958
1959 const uint32_t inputStride = getLayoutStride(m_inputLayout);
1960 const int inputBufferSize = inputStride * numValues;
1961
1962 if (inputBufferSize == 0)
1963 return; // No inputs
1964
1965 DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1966 for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1967 {
1968 const glu::VarType &varType = m_shaderSpec.inputs[inputNdx].varType;
1969 const VarLayout &layout = m_inputLayout[inputNdx];
1970
1971 copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr(), packFloat16Bit);
1972 }
1973
1974 flushAlloc(vk, vkDevice, *m_inputAlloc);
1975 }
1976
readOutputBuffer(void * const * outputPtrs,int numValues)1977 void BufferIoExecutor::readOutputBuffer(void *const *outputPtrs, int numValues)
1978 {
1979 const VkDevice vkDevice = m_context.getDevice();
1980 const DeviceInterface &vk = m_context.getDeviceInterface();
1981
1982 DE_ASSERT(numValues > 0); // At least some outputs are required.
1983
1984 invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1985
1986 DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1987 for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1988 {
1989 const glu::VarType &varType = m_shaderSpec.outputs[outputNdx].varType;
1990 const VarLayout &layout = m_outputLayout[outputNdx];
1991
1992 copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1993 }
1994 }
1995
initBuffers(int numValues)1996 void BufferIoExecutor::initBuffers(int numValues)
1997 {
1998 const uint32_t inputStride = getLayoutStride(m_inputLayout);
1999 const uint32_t outputStride = getLayoutStride(m_outputLayout);
2000 // Avoid creating zero-sized buffer/memory
2001 const size_t inputBufferSize = de::max(numValues * inputStride, 1u);
2002 const size_t outputBufferSize = numValues * outputStride;
2003
2004 // Upload data to buffer
2005 const VkDevice vkDevice = m_context.getDevice();
2006 const DeviceInterface &vk = m_context.getDeviceInterface();
2007 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2008 Allocator &memAlloc = m_context.getDefaultAllocator();
2009
2010 const VkBufferCreateInfo inputBufferParams = {
2011 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
2012 nullptr, // const void* pNext;
2013 0u, // VkBufferCreateFlags flags;
2014 inputBufferSize, // VkDeviceSize size;
2015 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
2016 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
2017 1u, // uint32_t queueFamilyCount;
2018 &queueFamilyIndex // const uint32_t* pQueueFamilyIndices;
2019 };
2020
2021 m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
2022 m_inputAlloc =
2023 memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
2024
2025 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
2026
2027 const VkBufferCreateInfo outputBufferParams = {
2028 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
2029 nullptr, // const void* pNext;
2030 0u, // VkBufferCreateFlags flags;
2031 outputBufferSize, // VkDeviceSize size;
2032 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
2033 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
2034 1u, // uint32_t queueFamilyCount;
2035 &queueFamilyIndex // const uint32_t* pQueueFamilyIndices;
2036 };
2037
2038 m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
2039 m_outputAlloc =
2040 memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
2041
2042 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
2043 }
2044
2045 // ComputeShaderExecutor
2046
2047 class ComputeShaderExecutor : public BufferIoExecutor
2048 {
2049 public:
2050 ComputeShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2051 virtual ~ComputeShaderExecutor(void);
2052
2053 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
2054
2055 virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
2056 VkDescriptorSet extraResources);
2057
2058 protected:
2059 static std::string generateComputeShader(const ShaderSpec &spec);
2060
2061 private:
2062 const VkDescriptorSetLayout m_extraResourcesLayout;
2063 };
2064
ComputeShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2065 ComputeShaderExecutor::ComputeShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
2066 VkDescriptorSetLayout extraResourcesLayout)
2067 : BufferIoExecutor(context, shaderSpec)
2068 , m_extraResourcesLayout(extraResourcesLayout)
2069 {
2070 }
2071
~ComputeShaderExecutor(void)2072 ComputeShaderExecutor::~ComputeShaderExecutor(void)
2073 {
2074 }
2075
getTypeSpirv(const glu::DataType type,const bool packFloat16Bit=false)2076 std::string getTypeSpirv(const glu::DataType type, const bool packFloat16Bit = false)
2077 {
2078 switch (type)
2079 {
2080 case glu::TYPE_FLOAT16:
2081 return "%f16";
2082 case glu::TYPE_FLOAT16_VEC2:
2083 return "%v2f16";
2084 case glu::TYPE_FLOAT16_VEC3:
2085 return "%v3f16";
2086 case glu::TYPE_FLOAT16_VEC4:
2087 return "%v4f16";
2088 case glu::TYPE_FLOAT:
2089 return packFloat16Bit ? "%u32" : "%f32"; // f16 values will be bitcast from ui32.
2090 case glu::TYPE_FLOAT_VEC2:
2091 return packFloat16Bit ? "%v2u32" : "%v2f32"; // f16 values will be bitcast from ui32.
2092 case glu::TYPE_FLOAT_VEC3:
2093 return packFloat16Bit ? "%v3u32" : "%v3f32"; // f16 values will be bitcast from ui32.
2094 case glu::TYPE_FLOAT_VEC4:
2095 return packFloat16Bit ? "%v4u32" : "%v4f32"; // f16 values will be bitcast from ui32.
2096 case glu::TYPE_INT:
2097 return "%i32";
2098 case glu::TYPE_INT_VEC2:
2099 return "%v2i32";
2100 case glu::TYPE_INT_VEC3:
2101 return "%v3i32";
2102 case glu::TYPE_INT_VEC4:
2103 return "%v4i32";
2104 case glu::TYPE_DOUBLE:
2105 return "%f64";
2106 case glu::TYPE_DOUBLE_VEC2:
2107 return "%v2f64";
2108 case glu::TYPE_DOUBLE_VEC3:
2109 return "%v3f64";
2110 case glu::TYPE_DOUBLE_VEC4:
2111 return "%v4f64";
2112 default:
2113 DE_ASSERT(0);
2114 return "";
2115 }
2116 }
2117
moveBitOperation(std::string variableName,const int operationNdx)2118 std::string moveBitOperation(std::string variableName, const int operationNdx)
2119 {
2120 std::ostringstream src;
2121 src << "\n"
2122 << "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
2123 << "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_" << operationNdx << " %c_i32_1\n"
2124 << "OpStore " << variableName << " %move1_" << operationNdx << "\n";
2125 return src.str();
2126 }
2127
scalarComparison(const std::string operation,const int operationNdx,const glu::DataType type,const std::string & outputType,const int scalarSize)2128 std::string scalarComparison(const std::string operation, const int operationNdx, const glu::DataType type,
2129 const std::string &outputType, const int scalarSize)
2130 {
2131 std::ostringstream src;
2132 std::string boolType;
2133
2134 switch (type)
2135 {
2136 case glu::TYPE_FLOAT16:
2137 case glu::TYPE_FLOAT:
2138 case glu::TYPE_DOUBLE:
2139 src << "\n"
2140 << "%operation_result_" << operationNdx << " = " << operation << " %bool %in0_val %in1_val\n"
2141 << "OpSelectionMerge %IF_" << operationNdx << " None\n"
2142 << "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_"
2143 << operationNdx << "\n"
2144 << "%label_IF_" << operationNdx << " = OpLabel\n"
2145 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
2146 << "%out_val_" << operationNdx << " = OpLoad %i32 %out0\n"
2147 << "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_"
2148 << operationNdx << "\n"
2149 << "OpStore %out0 %add_if_" << operationNdx << "\n"
2150 << "OpBranch %IF_" << operationNdx << "\n"
2151 << "%IF_" << operationNdx << " = OpLabel\n";
2152 return src.str();
2153 case glu::TYPE_FLOAT16_VEC2:
2154 case glu::TYPE_FLOAT_VEC2:
2155 case glu::TYPE_DOUBLE_VEC2:
2156 boolType = "%v2bool";
2157 break;
2158 case glu::TYPE_FLOAT16_VEC3:
2159 case glu::TYPE_FLOAT_VEC3:
2160 case glu::TYPE_DOUBLE_VEC3:
2161 boolType = "%v3bool";
2162 break;
2163 case glu::TYPE_FLOAT16_VEC4:
2164 case glu::TYPE_FLOAT_VEC4:
2165 case glu::TYPE_DOUBLE_VEC4:
2166 boolType = "%v4bool";
2167 break;
2168 default:
2169 DE_ASSERT(0);
2170 return "";
2171 }
2172
2173 src << "\n"
2174 << "%operation_result_" << operationNdx << " = " << operation << " " << boolType << " %in0_val %in1_val\n"
2175 << "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx
2176 << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2177 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2178
2179 src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2180 for (int ndx = 0; ndx < scalarSize; ++ndx)
2181 src << " %operation_val_" << operationNdx;
2182 src << "\n";
2183
2184 src << "%toAdd" << operationNdx << " = OpIMul " << outputType << " %ivec_result_" << operationNdx
2185 << " %operation_vec_" << operationNdx << "\n"
2186 << "%out_val_" << operationNdx << " = OpLoad " << outputType << " %out0\n"
2187
2188 << "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd"
2189 << operationNdx << "\n"
2190 << "OpStore %out0 %add_if_" << operationNdx << "\n";
2191
2192 return src.str();
2193 }
2194
generateSpirv(const ShaderSpec & spec,const bool are16Bit,const bool are64Bit,const bool isMediump)2195 std::string generateSpirv(const ShaderSpec &spec, const bool are16Bit, const bool are64Bit, const bool isMediump)
2196 {
2197 static const std::string COMPARE_OPERATIONS[] = {"OpFOrdEqual",
2198 "OpFOrdGreaterThan",
2199 "OpFOrdLessThan",
2200 "OpFOrdGreaterThanEqual",
2201 "OpFOrdLessThanEqual",
2202 "OpFUnordEqual",
2203 "OpFUnordGreaterThan",
2204 "OpFUnordLessThan",
2205 "OpFUnordGreaterThanEqual",
2206 "OpFUnordLessThanEqual"};
2207
2208 int moveBitNdx = 0;
2209 vector<std::string> inputTypes;
2210 vector<std::string> outputTypes;
2211 const std::string packType =
2212 spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2213
2214 vector<bool> floatResult;
2215 for (const auto &symbol : spec.outputs)
2216 floatResult.push_back(glu::isDataTypeFloatType(symbol.varType.getBasicType()));
2217
2218 const bool anyFloatResult = std::any_of(begin(floatResult), end(floatResult), [](bool b) { return b; });
2219
2220 vector<bool> packFloatRes;
2221 for (const auto &floatRes : floatResult)
2222 packFloatRes.push_back(floatRes && spec.packFloat16Bit);
2223
2224 const bool useF32Types = (!are16Bit && !are64Bit);
2225 const bool useF64Types = are64Bit;
2226 const bool useF16Types = (spec.packFloat16Bit || are16Bit);
2227
2228 for (const auto &symbol : spec.inputs)
2229 inputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2230
2231 for (const auto &symbol : spec.outputs)
2232 outputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2233
2234 DE_ASSERT(!inputTypes.empty());
2235 DE_ASSERT(!outputTypes.empty());
2236
2237 // Assert input and output types match the expected operations.
2238 switch (spec.spirvCase)
2239 {
2240 case SPIRV_CASETYPE_COMPARE:
2241 case SPIRV_CASETYPE_FREM:
2242 DE_ASSERT(inputTypes.size() == 2);
2243 DE_ASSERT(outputTypes.size() == 1);
2244 break;
2245 case SPIRV_CASETYPE_MODFSTRUCT:
2246 case SPIRV_CASETYPE_FREXPSTRUCT:
2247 DE_ASSERT(inputTypes.size() == 1);
2248 DE_ASSERT(outputTypes.size() == 2);
2249 break;
2250 default:
2251 DE_ASSERT(false);
2252 break;
2253 }
2254
2255 std::ostringstream src;
2256 src << "; SPIR-V\n"
2257 "; Version: 1.0\n"
2258 "; Generator: Khronos Glslang Reference Front End; 4\n"
2259 "; Bound: 114\n"
2260 "; Schema: 0\n"
2261 "OpCapability Shader\n";
2262
2263 if (useF16Types)
2264 src << "OpCapability Float16\n";
2265
2266 if (are16Bit)
2267 src << "OpCapability StorageBuffer16BitAccess\n"
2268 "OpCapability UniformAndStorageBuffer16BitAccess\n";
2269
2270 if (useF64Types)
2271 src << "OpCapability Float64\n";
2272
2273 if (are16Bit)
2274 src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2275
2276 src << "%glslstd450 = OpExtInstImport \"GLSL.std.450\"\n"
2277 "OpMemoryModel Logical GLSL450\n"
2278 "OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2279 "OpExecutionMode %BP_main LocalSize 1 1 1\n"
2280 "OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2281 "OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2282
2283 // Input offsets and stride.
2284 {
2285 int offset = 0;
2286 int ndx = 0;
2287 int largest = 0;
2288 for (const auto &symbol : spec.inputs)
2289 {
2290 const int scalarSize = symbol.varType.getScalarSize();
2291 const int memberSize =
2292 (scalarSize + ((scalarSize == 3) ? 1 : 0)) *
2293 (isDataTypeDoubleType(symbol.varType.getBasicType()) ?
2294 (int)sizeof(uint64_t) :
2295 (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(uint16_t) :
2296 (int)sizeof(uint32_t)));
2297 const int extraMemberBytes = (offset % memberSize);
2298
2299 offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2300 src << "OpMemberDecorate %SSB0_IN " << ndx << " Offset " << offset << "\n";
2301 ++ndx;
2302
2303 if (memberSize > largest)
2304 largest = memberSize;
2305
2306 offset += memberSize;
2307 }
2308 DE_ASSERT(largest > 0);
2309 const int extraBytes = (offset % largest);
2310 const int stride = offset + (extraBytes == 0 ? 0 : (largest - extraBytes));
2311 src << "OpDecorate %up_SSB0_IN ArrayStride " << stride << "\n";
2312 }
2313
2314 src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2315 "OpDecorate %ssboIN BufferBlock\n"
2316 "OpDecorate %ssbo_src DescriptorSet 0\n"
2317 "OpDecorate %ssbo_src Binding 0\n"
2318 "\n";
2319
2320 if (isMediump)
2321 {
2322 for (size_t i = 0; i < inputTypes.size(); ++i)
2323 {
2324 src << "OpMemberDecorate %SSB0_IN " << i
2325 << " RelaxedPrecision\n"
2326 "OpDecorate %in"
2327 << i
2328 << " RelaxedPrecision\n"
2329 "OpDecorate %src_val_0_"
2330 << i
2331 << " RelaxedPrecision\n"
2332 "OpDecorate %in"
2333 << i << "_val RelaxedPrecision\n";
2334 }
2335
2336 if (anyFloatResult)
2337 {
2338 switch (spec.spirvCase)
2339 {
2340 case SPIRV_CASETYPE_FREM:
2341 src << "OpDecorate %frem_result RelaxedPrecision\n";
2342 break;
2343 case SPIRV_CASETYPE_MODFSTRUCT:
2344 src << "OpDecorate %modfstruct_result RelaxedPrecision\n";
2345 break;
2346 case SPIRV_CASETYPE_FREXPSTRUCT:
2347 src << "OpDecorate %frexpstruct_result RelaxedPrecision\n";
2348 break;
2349 default:
2350 DE_ASSERT(false);
2351 break;
2352 }
2353
2354 for (size_t i = 0; i < outputTypes.size(); ++i)
2355 {
2356 src << "OpMemberDecorate %SSB0_OUT " << i << " RelaxedPrecision\n";
2357 src << "OpDecorate %out_val_final_" << i << " RelaxedPrecision\n";
2358 src << "OpDecorate %out" << i << " RelaxedPrecision\n";
2359 }
2360 }
2361 }
2362
2363 // Output offsets and stride.
2364 {
2365 int offset = 0;
2366 int ndx = 0;
2367 int largest = 0;
2368 for (const auto &symbol : spec.outputs)
2369 {
2370 const int scalarSize = symbol.varType.getScalarSize();
2371 const int memberSize =
2372 (scalarSize + ((scalarSize == 3) ? 1 : 0)) *
2373 (isDataTypeDoubleType(symbol.varType.getBasicType()) ?
2374 (int)sizeof(uint64_t) :
2375 (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(uint16_t) :
2376 (int)sizeof(uint32_t)));
2377 const int extraMemberBytes = (offset % memberSize);
2378
2379 offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2380 src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2381 ++ndx;
2382
2383 if (memberSize > largest)
2384 largest = memberSize;
2385
2386 offset += memberSize;
2387 }
2388 DE_ASSERT(largest > 0);
2389 const int extraBytes = (offset % largest);
2390 const int stride = offset + ((extraBytes == 0) ? 0 : (largest - extraBytes));
2391 src << "OpDecorate %up_SSB0_OUT ArrayStride " << stride << "\n";
2392 }
2393
2394 src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2395 "OpDecorate %ssboOUT BufferBlock\n"
2396 "OpDecorate %ssbo_dst DescriptorSet 0\n"
2397 "OpDecorate %ssbo_dst Binding 1\n"
2398 "\n"
2399 "%void = OpTypeVoid\n"
2400 "%bool = OpTypeBool\n"
2401 "%v2bool = OpTypeVector %bool 2\n"
2402 "%v3bool = OpTypeVector %bool 3\n"
2403 "%v4bool = OpTypeVector %bool 4\n"
2404 "%u32 = OpTypeInt 32 0\n";
2405
2406 if (useF32Types)
2407 src << "%f32 = OpTypeFloat 32\n"
2408 "%v2f32 = OpTypeVector %f32 2\n"
2409 "%v3f32 = OpTypeVector %f32 3\n"
2410 "%v4f32 = OpTypeVector %f32 4\n";
2411
2412 if (useF64Types)
2413 src << "%f64 = OpTypeFloat 64\n"
2414 "%v2f64 = OpTypeVector %f64 2\n"
2415 "%v3f64 = OpTypeVector %f64 3\n"
2416 "%v4f64 = OpTypeVector %f64 4\n";
2417
2418 if (useF16Types)
2419 src << "%f16 = OpTypeFloat 16\n"
2420 "%v2f16 = OpTypeVector %f16 2\n"
2421 "%v3f16 = OpTypeVector %f16 3\n"
2422 "%v4f16 = OpTypeVector %f16 4\n";
2423
2424 src << "%i32 = OpTypeInt 32 1\n"
2425 "%v2i32 = OpTypeVector %i32 2\n"
2426 "%v3i32 = OpTypeVector %i32 3\n"
2427 "%v4i32 = OpTypeVector %i32 4\n"
2428 "%v2u32 = OpTypeVector %u32 2\n"
2429 "%v3u32 = OpTypeVector %u32 3\n"
2430 "%v4u32 = OpTypeVector %u32 4\n"
2431 "\n"
2432 "%ip_u32 = OpTypePointer Input %u32\n"
2433 "%ip_v3u32 = OpTypePointer Input %v3u32\n"
2434 "%up_float = OpTypePointer Uniform "
2435 << inputTypes[0]
2436 << "\n"
2437 "\n"
2438 "%fp_operation = OpTypePointer Function %i32\n"
2439 "%voidf = OpTypeFunction %void\n"
2440 "%fp_u32 = OpTypePointer Function %u32\n"
2441 "%fp_it1 = OpTypePointer Function "
2442 << inputTypes[0] << "\n";
2443
2444 for (size_t i = 0; i < outputTypes.size(); ++i)
2445 {
2446 src << "%fp_out_" << i << " = OpTypePointer Function " << outputTypes[i] << "\n"
2447 << "%up_out_" << i << " = OpTypePointer Uniform " << outputTypes[i] << "\n";
2448 }
2449
2450 if (spec.packFloat16Bit)
2451 src << "%fp_f16 = OpTypePointer Function " << packType << "\n";
2452
2453 src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2454 "%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2455 "\n"
2456 "%c_u32_0 = OpConstant %u32 0\n"
2457 "%c_u32_1 = OpConstant %u32 1\n"
2458 "%c_u32_2 = OpConstant %u32 2\n"
2459 "%c_i32_0 = OpConstant %i32 0\n"
2460 "%c_i32_1 = OpConstant %i32 1\n"
2461 "\n";
2462
2463 if (useF32Types)
2464 src << "%c_f32_0 = OpConstant %f32 0\n"
2465 "%c_f32_1 = OpConstant %f32 1\n";
2466
2467 if (useF16Types)
2468 src << "%c_f16_0 = OpConstant %f16 0\n"
2469 "%c_f16_1 = OpConstant %f16 1\n"
2470 "%c_f16_minus1 = OpConstant %f16 -0x1p+0";
2471
2472 if (useF64Types)
2473 src << "%c_f64_0 = OpConstant %f64 0\n"
2474 "%c_f64_1 = OpConstant %f64 1\n";
2475
2476 src << "\n"
2477 "%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2478 "%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2479 "%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2480 "%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2481 "%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2482 "%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2483 "\n";
2484
2485 if (useF32Types)
2486 src << "%c_v2f32_0 = OpConstantComposite %v2f32 %c_f32_0 %c_f32_0\n"
2487 "%c_v2f32_1 = OpConstantComposite %v2f32 %c_f32_1 %c_f32_1\n"
2488 "%c_v3f32_0 = OpConstantComposite %v3f32 %c_f32_0 %c_f32_0 %c_f32_0\n"
2489 "%c_v3f32_1 = OpConstantComposite %v3f32 %c_f32_1 %c_f32_1 %c_f32_1\n"
2490 "%c_v4f32_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_0\n"
2491 "%c_v4f32_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n";
2492
2493 if (useF16Types)
2494 src << "%c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
2495 "%c_v2f16_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
2496 "%c_v3f16_0 = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0 %c_f16_0\n"
2497 "%c_v3f16_1 = OpConstantComposite %v3f16 %c_f16_1 %c_f16_1 %c_f16_1\n"
2498 "%c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
2499 "%c_v4f16_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n";
2500
2501 if (useF64Types)
2502 src << "%c_v2f64_0 = OpConstantComposite %v2f64 %c_f64_0 %c_f64_0\n"
2503 "%c_v2f64_1 = OpConstantComposite %v2f64 %c_f64_1 %c_f64_1\n"
2504 "%c_v3f64_0 = OpConstantComposite %v3f64 %c_f64_0 %c_f64_0 %c_f64_0\n"
2505 "%c_v3f64_1 = OpConstantComposite %v3f64 %c_f64_1 %c_f64_1 %c_f64_1\n"
2506 "%c_v4f64_0 = OpConstantComposite %v4f64 %c_f64_0 %c_f64_0 %c_f64_0 %c_f64_0\n"
2507 "%c_v4f64_1 = OpConstantComposite %v4f64 %c_f64_1 %c_f64_1 %c_f64_1 %c_f64_1\n"
2508 "\n";
2509
2510 // Input struct.
2511 {
2512 src << "%SSB0_IN = OpTypeStruct";
2513 for (const auto &t : inputTypes)
2514 src << " " << t;
2515 src << "\n";
2516 }
2517
2518 src << "%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2519 "%ssboIN = OpTypeStruct %up_SSB0_IN\n"
2520 "%up_ssboIN = OpTypePointer Uniform %ssboIN\n"
2521 "%ssbo_src = OpVariable %up_ssboIN Uniform\n"
2522 "\n";
2523
2524 // Output struct.
2525 {
2526 src << "%SSB0_OUT = OpTypeStruct";
2527 for (const auto &t : outputTypes)
2528 src << " " << t;
2529 src << "\n";
2530 }
2531
2532 std::string modfStructMemberType;
2533 std::string frexpStructFirstMemberType;
2534 if (spec.spirvCase == SPIRV_CASETYPE_MODFSTRUCT)
2535 {
2536 modfStructMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2537 src << "%modfstruct_ret_t = OpTypeStruct " << modfStructMemberType << " " << modfStructMemberType << "\n";
2538 }
2539 else if (spec.spirvCase == SPIRV_CASETYPE_FREXPSTRUCT)
2540 {
2541 frexpStructFirstMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2542 src << "%frexpstruct_ret_t = OpTypeStruct " << frexpStructFirstMemberType << " " << outputTypes[1] << "\n";
2543 }
2544
2545 src << "%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2546 "%ssboOUT = OpTypeStruct %up_SSB0_OUT\n"
2547 "%up_ssboOUT = OpTypePointer Uniform %ssboOUT\n"
2548 "%ssbo_dst = OpVariable %up_ssboOUT Uniform\n"
2549 "\n"
2550 "%BP_main = OpFunction %void None %voidf\n"
2551 "%BP_label = OpLabel\n"
2552 "%invocationNdx = OpVariable %fp_u32 Function\n";
2553
2554 // Note: here we are supposing all inputs have the same type.
2555 for (size_t i = 0; i < inputTypes.size(); ++i)
2556 src << "%in" << i << " = OpVariable " << (spec.packFloat16Bit ? "%fp_f16" : "%fp_it1") << " Function\n";
2557
2558 for (size_t i = 0; i < outputTypes.size(); ++i)
2559 src << "%out" << i << " = OpVariable "
2560 << (packFloatRes[i] ? std::string("%fp_f16") : std::string("%fp_out_") + de::toString(i)) << " Function\n";
2561
2562 src << "%operation = OpVariable %fp_operation Function\n"
2563 "%BP_id_0_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2564 "%BP_id_1_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2565 "%BP_id_2_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2566 "%BP_num_0_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2567 "%BP_num_1_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2568 "%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2569 "%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2570 "%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2571 "%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2572 "%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2573 "\n"
2574 "%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2575 "%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2576 "%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2577 "%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2578 "%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2579 "OpStore %invocationNdx %add_2\n"
2580 "%invocationNdx_val = OpLoad %u32 %invocationNdx\n";
2581
2582 // Load input values.
2583 for (size_t inputNdx = 0; inputNdx < inputTypes.size(); ++inputNdx)
2584 {
2585 src << "\n"
2586 << "%src_ptr_0_" << inputNdx << " = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_"
2587 << inputNdx << "\n"
2588 << "%src_val_0_" << inputNdx << " = OpLoad " << inputTypes[inputNdx] << " %src_ptr_0_" << inputNdx << "\n";
2589
2590 if (spec.packFloat16Bit)
2591 {
2592 if (spec.inputs[inputNdx].varType.getScalarSize() > 1)
2593 {
2594 // Extract the val<inputNdx> u32 input channels into individual f16 values.
2595 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2596 {
2597 src << "%src_val_0_" << inputNdx << "_" << i << " = OpCompositeExtract %u32 %src_val_0_" << inputNdx
2598 << " " << i
2599 << "\n"
2600 "%val_v2f16_0_"
2601 << inputNdx << "_" << i << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "_" << i
2602 << "\n"
2603 "%val_f16_0_"
2604 << inputNdx << "_" << i << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << "_" << i
2605 << " 0\n";
2606 }
2607
2608 // Construct the input vector.
2609 src << "%val_f16_0_" << inputNdx << " = OpCompositeConstruct " << packType;
2610 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2611 {
2612 src << " %val_f16_0_" << inputNdx << "_" << i;
2613 }
2614
2615 src << "\n";
2616 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2617 }
2618 else
2619 {
2620 src << "%val_v2f16_0_" << inputNdx << " = OpBitcast %v2f16 %src_val_0_" << inputNdx
2621 << "\n"
2622 "%val_f16_0_"
2623 << inputNdx << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << " 0\n";
2624
2625 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2626 }
2627 }
2628 else
2629 src << "OpStore %in" << inputNdx << " %src_val_0_" << inputNdx << "\n";
2630
2631 src << "%in" << inputNdx << "_val = OpLoad " << (spec.packFloat16Bit ? packType : inputTypes[inputNdx])
2632 << " %in" << inputNdx << "\n";
2633 }
2634
2635 src << "\n"
2636 "OpStore %operation %c_i32_1\n";
2637
2638 // Fill output values with dummy data.
2639 for (size_t i = 0; i < outputTypes.size(); ++i)
2640 src << "OpStore %out" << i << " %c_" << (packFloatRes[i] ? &packType[1] : &outputTypes[i][1]) << "_0\n";
2641
2642 src << "\n";
2643
2644 // Run operation.
2645 switch (spec.spirvCase)
2646 {
2647 case SPIRV_CASETYPE_COMPARE:
2648 for (int operationNdx = 0; operationNdx < DE_LENGTH_OF_ARRAY(COMPARE_OPERATIONS); ++operationNdx)
2649 {
2650 src << scalarComparison(COMPARE_OPERATIONS[operationNdx], operationNdx,
2651 spec.inputs[0].varType.getBasicType(), outputTypes[0],
2652 spec.outputs[0].varType.getScalarSize());
2653 src << moveBitOperation("%operation", moveBitNdx);
2654 ++moveBitNdx;
2655 }
2656 break;
2657 case SPIRV_CASETYPE_FREM:
2658 src << "%frem_result = OpFRem " << (packFloatRes[0] ? packType : outputTypes[0]) << " %in0_val %in1_val\n"
2659 << "OpStore %out0 %frem_result\n";
2660 break;
2661 case SPIRV_CASETYPE_MODFSTRUCT:
2662 src << "%modfstruct_result = OpExtInst %modfstruct_ret_t %glslstd450 ModfStruct %in0_val\n"
2663 << "%modfstruct_result_0 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 0\n"
2664 << "%modfstruct_result_1 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 1\n"
2665 << "OpStore %out0 %modfstruct_result_0\n"
2666 << "OpStore %out1 %modfstruct_result_1\n";
2667 break;
2668 case SPIRV_CASETYPE_FREXPSTRUCT:
2669 src << "%frexpstruct_result = OpExtInst %frexpstruct_ret_t %glslstd450 FrexpStruct %in0_val\n"
2670 << "%frexpstruct_result_0 = OpCompositeExtract " << frexpStructFirstMemberType << " %frexpstruct_result 0\n"
2671 << "%frexpstruct_result_1 = OpCompositeExtract " << outputTypes[1] << " %frexpstruct_result 1\n"
2672 << "OpStore %out0 %frexpstruct_result_0\n"
2673 << "OpStore %out1 %frexpstruct_result_1\n";
2674 break;
2675 default:
2676 DE_ASSERT(false);
2677 break;
2678 }
2679
2680 for (size_t outputNdx = 0; outputNdx < outputTypes.size(); ++outputNdx)
2681 {
2682 src << "\n"
2683 "%out_val_final_"
2684 << outputNdx << " = OpLoad " << (packFloatRes[outputNdx] ? packType : outputTypes[outputNdx]) << " %out"
2685 << outputNdx
2686 << "\n"
2687 "%ssbo_dst_ptr_"
2688 << outputNdx << " = OpAccessChain %up_out_" << outputNdx << " %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_"
2689 << outputNdx << "\n";
2690
2691 if (packFloatRes[outputNdx])
2692 {
2693 if (spec.outputs[outputNdx].varType.getScalarSize() > 1)
2694 {
2695 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2696 {
2697 src << "%out_val_final_" << outputNdx << "_" << i << " = OpCompositeExtract %f16 %out_val_final_"
2698 << outputNdx << " " << i << "\n";
2699 src << "%out_composite_" << outputNdx << "_" << i
2700 << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << "_" << i
2701 << " %c_f16_minus1\n";
2702 src << "%u32_val_" << outputNdx << "_" << i << " = OpBitcast %u32 %out_composite_" << outputNdx
2703 << "_" << i << "\n";
2704 }
2705
2706 src << "%u32_final_val_" << outputNdx << " = OpCompositeConstruct " << outputTypes[outputNdx];
2707 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2708 src << " %u32_val_" << outputNdx << "_" << i;
2709 src << "\n";
2710 src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %u32_final_val_" << outputNdx << "\n";
2711 }
2712 else
2713 {
2714 src << "%out_composite_" << outputNdx << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx
2715 << " %c_f16_minus1\n"
2716 "%out_result_"
2717 << outputNdx << " = OpBitcast " << outputTypes[outputNdx] << " %out_composite_" << outputNdx
2718 << "\n"
2719 "OpStore %ssbo_dst_ptr_"
2720 << outputNdx << " %out_result_" << outputNdx << "\n";
2721 }
2722 }
2723 else
2724 {
2725 src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %out_val_final_" << outputNdx << "\n";
2726 }
2727 }
2728
2729 src << "\n"
2730 "OpReturn\n"
2731 "OpFunctionEnd\n";
2732
2733 return src.str();
2734 }
2735
generateComputeShader(const ShaderSpec & spec)2736 std::string ComputeShaderExecutor::generateComputeShader(const ShaderSpec &spec)
2737 {
2738 if (spec.spirvCase != SPIRV_CASETYPE_NONE)
2739 {
2740 bool are16Bit = false;
2741 bool are64Bit = false;
2742 bool isMediump = false;
2743 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2744 {
2745 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2746 are16Bit = true;
2747
2748 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
2749 are64Bit = true;
2750
2751 if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2752 isMediump = true;
2753
2754 if (isMediump && are16Bit)
2755 break;
2756 }
2757
2758 return generateSpirv(spec, are16Bit, are64Bit, isMediump);
2759 }
2760 else
2761 {
2762 std::ostringstream src;
2763 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2764
2765 if (!spec.globalDeclarations.empty())
2766 src << spec.globalDeclarations << "\n";
2767
2768 src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2769 << "\n";
2770
2771 declareBufferBlocks(src, spec);
2772
2773 src << "void main (void)\n"
2774 << "{\n"
2775 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2776 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2777
2778 generateExecBufferIo(src, spec, "invocationNdx");
2779
2780 src << "}\n";
2781
2782 return src.str();
2783 }
2784 }
2785
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2786 void ComputeShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
2787 {
2788 if (shaderSpec.spirvCase != SPIRV_CASETYPE_NONE)
2789 programCollection.spirvAsmSources.add("compute")
2790 << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3)
2791 << generateComputeShader(shaderSpec);
2792 else
2793 programCollection.glslSources.add("compute")
2794 << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2795 }
2796
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2797 void ComputeShaderExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
2798 VkDescriptorSet extraResources)
2799 {
2800 const VkDevice vkDevice = m_context.getDevice();
2801 const DeviceInterface &vk = m_context.getDeviceInterface();
2802 const VkQueue queue = m_context.getUniversalQueue();
2803 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2804
2805 DescriptorPoolBuilder descriptorPoolBuilder;
2806 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
2807
2808 Move<VkShaderModule> computeShaderModule;
2809 Move<VkPipeline> computePipeline;
2810 Move<VkPipelineLayout> pipelineLayout;
2811 Move<VkCommandPool> cmdPool;
2812 Move<VkDescriptorPool> descriptorPool;
2813 Move<VkDescriptorSetLayout> descriptorSetLayout;
2814 Move<VkDescriptorSet> descriptorSet;
2815 const uint32_t numDescriptorSets = (m_extraResourcesLayout != VK_NULL_HANDLE) ? 2u : 1u;
2816
2817 DE_ASSERT((m_extraResourcesLayout != VK_NULL_HANDLE) == (extraResources != VK_NULL_HANDLE));
2818
2819 initBuffers(numValues);
2820
2821 // Setup input buffer & copy data
2822 // For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
2823 // storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
2824 // the shader.
2825 uploadInputBuffer(inputs, numValues,
2826 m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
2827
2828 // Create command pool
2829 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2830
2831 // Create command buffer
2832
2833 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2834 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2835 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2836 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2837
2838 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2839 descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2840
2841 const VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr,
2842 *descriptorPool, 1u, &*descriptorSetLayout};
2843
2844 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2845
2846 // Create pipeline layout
2847 {
2848 const VkDescriptorSetLayout descriptorSetLayouts[] = {*descriptorSetLayout, m_extraResourcesLayout};
2849 const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
2850 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2851 nullptr, // const void* pNext;
2852 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
2853 numDescriptorSets, // uint32_t CdescriptorSetCount;
2854 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
2855 0u, // uint32_t pushConstantRangeCount;
2856 nullptr // const VkPushConstantRange* pPushConstantRanges;
2857 };
2858
2859 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2860 }
2861
2862 // Create shaders
2863 {
2864 computeShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2865 }
2866
2867 // create pipeline
2868 {
2869 const VkPipelineShaderStageCreateInfo shaderStageParams[1] = {{
2870 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
2871 nullptr, // const void* pNext;
2872 (VkPipelineShaderStageCreateFlags)0u, // VkPipelineShaderStageCreateFlags flags;
2873 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagsBit stage;
2874 *computeShaderModule, // VkShaderModule shader;
2875 "main", // const char* pName;
2876 nullptr // const VkSpecializationInfo* pSpecializationInfo;
2877 }};
2878
2879 const VkComputePipelineCreateInfo computePipelineParams = {
2880 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
2881 nullptr, // const void* pNext;
2882 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
2883 *shaderStageParams, // VkPipelineShaderStageCreateInfo cs;
2884 *pipelineLayout, // VkPipelineLayout layout;
2885 VK_NULL_HANDLE, // VkPipeline basePipelineHandle;
2886 0u, // int32_t basePipelineIndex;
2887 };
2888
2889 computePipeline = createComputePipeline(vk, vkDevice, VK_NULL_HANDLE, &computePipelineParams);
2890 }
2891
2892 const int maxValuesPerInvocation = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2893 int curOffset = 0;
2894 const uint32_t inputStride = getInputStride();
2895 const uint32_t outputStride = getOutputStride();
2896
2897 while (curOffset < numValues)
2898 {
2899 Move<VkCommandBuffer> cmdBuffer;
2900 const int numToExec = de::min(maxValuesPerInvocation, numValues - curOffset);
2901
2902 // Update descriptors
2903 {
2904 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2905
2906 const VkDescriptorBufferInfo outputDescriptorBufferInfo = {
2907 *m_outputBuffer, // VkBuffer buffer;
2908 curOffset * outputStride, // VkDeviceSize offset;
2909 numToExec * outputStride // VkDeviceSize range;
2910 };
2911
2912 descriptorSetUpdateBuilder.writeSingle(
2913 *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)OUTPUT_BUFFER_BINDING),
2914 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2915
2916 if (inputStride)
2917 {
2918 const VkDescriptorBufferInfo inputDescriptorBufferInfo = {
2919 *m_inputBuffer, // VkBuffer buffer;
2920 curOffset * inputStride, // VkDeviceSize offset;
2921 numToExec * inputStride // VkDeviceSize range;
2922 };
2923
2924 descriptorSetUpdateBuilder.writeSingle(
2925 *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)INPUT_BUFFER_BINDING),
2926 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2927 }
2928
2929 descriptorSetUpdateBuilder.update(vk, vkDevice);
2930 }
2931
2932 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2933 beginCommandBuffer(vk, *cmdBuffer);
2934 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2935
2936 {
2937 const VkDescriptorSet descriptorSets[] = {*descriptorSet, extraResources};
2938 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets,
2939 descriptorSets, 0u, nullptr);
2940 }
2941
2942 vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2943
2944 // Insert a barrier so data written by the shader is available to the host
2945 {
2946 const VkBufferMemoryBarrier bufferBarrier = {
2947 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
2948 nullptr, // const void* pNext;
2949 VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
2950 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
2951 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
2952 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
2953 *m_outputBuffer, // VkBuffer buffer;
2954 0, // VkDeviceSize offset;
2955 VK_WHOLE_SIZE, // VkDeviceSize size;
2956 };
2957
2958 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT,
2959 (VkDependencyFlags)0, 0, nullptr, 1, &bufferBarrier, 0, nullptr);
2960 }
2961
2962 endCommandBuffer(vk, *cmdBuffer);
2963
2964 curOffset += numToExec;
2965
2966 // Execute
2967 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2968 }
2969
2970 // Read back data
2971 readOutputBuffer(outputs, numValues);
2972 }
2973
2974 #ifndef CTS_USES_VULKANSC
2975 // MeshTaskShaderExecutor
2976
2977 class MeshTaskShaderExecutor : public BufferIoExecutor
2978 {
2979 public:
2980 MeshTaskShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2981 virtual ~MeshTaskShaderExecutor(void);
2982
2983 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection, bool useTask);
2984
2985 virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
2986 VkDescriptorSet extraResources);
2987
2988 protected:
2989 static std::string generateMeshShader(const ShaderSpec &spec, bool useTask);
2990 static std::string generateTaskShader(const ShaderSpec &spec);
2991
2992 private:
2993 const VkDescriptorSetLayout m_extraResourcesLayout;
2994 };
2995
MeshTaskShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2996 MeshTaskShaderExecutor::MeshTaskShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
2997 VkDescriptorSetLayout extraResourcesLayout)
2998 : BufferIoExecutor(context, shaderSpec)
2999 , m_extraResourcesLayout(extraResourcesLayout)
3000 {
3001 }
3002
~MeshTaskShaderExecutor(void)3003 MeshTaskShaderExecutor::~MeshTaskShaderExecutor(void)
3004 {
3005 }
3006
generateMeshShader(const ShaderSpec & spec,bool useTask)3007 std::string MeshTaskShaderExecutor::generateMeshShader(const ShaderSpec &spec, bool useTask)
3008 {
3009 DE_ASSERT(spec.spirvCase == SPIRV_CASETYPE_NONE);
3010
3011 std::ostringstream src;
3012
3013 if (useTask)
3014 {
3015 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
3016 << "#extension GL_EXT_mesh_shader : enable\n"
3017 << "layout(local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
3018 << "layout(points) out;\n"
3019 << "layout(max_vertices=1, max_primitives=1) out;\n"
3020 << "\n"
3021 << "void main (void)\n"
3022 << "{\n"
3023 << " SetMeshOutputsEXT(0u, 0u);\n"
3024 << "}\n";
3025 }
3026 else
3027 {
3028 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
3029 << "#extension GL_EXT_mesh_shader : enable\n";
3030
3031 if (!spec.globalDeclarations.empty())
3032 src << spec.globalDeclarations << "\n";
3033
3034 src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
3035 << "layout(points) out;\n"
3036 << "layout(max_vertices=1, max_primitives=1) out;\n"
3037 << "\n";
3038
3039 declareBufferBlocks(src, spec);
3040
3041 src << "void main (void)\n"
3042 << "{\n"
3043 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
3044 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
3045
3046 generateExecBufferIo(src, spec, "invocationNdx");
3047
3048 src << " SetMeshOutputsEXT(0u, 0u);\n"
3049 << "}\n";
3050 }
3051
3052 return src.str();
3053 }
3054
generateTaskShader(const ShaderSpec & spec)3055 std::string MeshTaskShaderExecutor::generateTaskShader(const ShaderSpec &spec)
3056 {
3057 std::ostringstream src;
3058
3059 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
3060 << "#extension GL_EXT_mesh_shader : enable\n";
3061
3062 if (!spec.globalDeclarations.empty())
3063 src << spec.globalDeclarations << "\n";
3064
3065 src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
3066 << "\n";
3067
3068 declareBufferBlocks(src, spec);
3069
3070 src << "void main (void)\n"
3071 << "{\n"
3072 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
3073 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
3074
3075 generateExecBufferIo(src, spec, "invocationNdx");
3076
3077 src << " EmitMeshTasksEXT(0u, 0u, 0u);\n"
3078 << "}\n";
3079
3080 return src.str();
3081 }
3082
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection,bool useTask)3083 void MeshTaskShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection,
3084 bool useTask)
3085 {
3086 DE_ASSERT(shaderSpec.spirvCase == SPIRV_CASETYPE_NONE);
3087 programCollection.glslSources.add("mesh")
3088 << glu::MeshSource(generateMeshShader(shaderSpec, useTask)) << shaderSpec.buildOptions;
3089 if (useTask)
3090 programCollection.glslSources.add("task")
3091 << glu::TaskSource(generateTaskShader(shaderSpec)) << shaderSpec.buildOptions;
3092 }
3093
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3094 void MeshTaskShaderExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
3095 VkDescriptorSet extraResources)
3096 {
3097 const auto vkDevice = m_context.getDevice();
3098 const auto &vk = m_context.getDeviceInterface();
3099 const auto queue = m_context.getUniversalQueue();
3100 const auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
3101 const auto bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
3102 const auto &binaries = m_context.getBinaryCollection();
3103 const bool useTask = binaries.contains("task");
3104 const auto shaderStage = (useTask ? VK_SHADER_STAGE_TASK_BIT_EXT : VK_SHADER_STAGE_MESH_BIT_EXT);
3105 const auto pipelineStage =
3106 (useTask ? VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT : VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT);
3107
3108 DE_ASSERT((m_extraResourcesLayout != VK_NULL_HANDLE) == (extraResources != VK_NULL_HANDLE));
3109
3110 // Create input and output buffers.
3111 initBuffers(numValues);
3112
3113 // Setup input buffer & copy data
3114 // For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
3115 // storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
3116 // the shader.
3117 uploadInputBuffer(inputs, numValues,
3118 m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
3119
3120 // Create command pool
3121 const auto cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3122
3123 // Descriptor pool, set layout and set.
3124 DescriptorPoolBuilder descriptorPoolBuilder;
3125 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
3126
3127 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, shaderStage);
3128 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3129 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, shaderStage);
3130 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3131
3132 const auto descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
3133 const auto descriptorPool =
3134 descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3135 const auto descriptorSet = makeDescriptorSet(vk, vkDevice, descriptorPool.get(), descriptorSetLayout.get());
3136
3137 // Create pipeline layout
3138 std::vector<VkDescriptorSetLayout> setLayouts;
3139 setLayouts.push_back(descriptorSetLayout.get());
3140 if (m_extraResourcesLayout != VK_NULL_HANDLE)
3141 setLayouts.push_back(m_extraResourcesLayout);
3142
3143 const auto pipelineLayout =
3144 makePipelineLayout(vk, vkDevice, static_cast<uint32_t>(setLayouts.size()), de::dataOrNull(setLayouts));
3145
3146 // Create shaders
3147 const auto meshShaderModule = createShaderModule(vk, vkDevice, binaries.get("mesh"));
3148 const auto taskShaderModule =
3149 (useTask ? createShaderModule(vk, vkDevice, binaries.get("task")) : Move<VkShaderModule>());
3150
3151 // Render pass and framebuffer.
3152 const auto fbExtent = makeExtent2D(1u, 1u);
3153 const auto renderPass = makeRenderPass(vk, vkDevice);
3154 const auto framebuffer =
3155 makeFramebuffer(vk, vkDevice, renderPass.get(), 0u, nullptr, fbExtent.width, fbExtent.height);
3156
3157 const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
3158 const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
3159
3160 // Create pipeline.
3161 const auto meshPipeline =
3162 makeGraphicsPipeline(vk, vkDevice, pipelineLayout.get(), taskShaderModule.get(), meshShaderModule.get(),
3163 VK_NULL_HANDLE, renderPass.get(), viewports, scissors);
3164
3165 const int maxValuesPerInvocation = m_context.getMeshShaderPropertiesEXT().maxMeshWorkGroupSize[0];
3166 const uint32_t inputStride = getInputStride();
3167 const uint32_t outputStride = getOutputStride();
3168 const auto outputBufferBinding =
3169 DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(OUTPUT_BUFFER_BINDING));
3170 const auto inputBufferBinding =
3171 DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(INPUT_BUFFER_BINDING));
3172 int curOffset = 0;
3173
3174 while (curOffset < numValues)
3175 {
3176 const auto remaining = numValues - curOffset;
3177 const auto numToExec = de::min(maxValuesPerInvocation, remaining);
3178
3179 // Update descriptors
3180 {
3181 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3182
3183 const auto outputDescriptorBufferInfo =
3184 makeDescriptorBufferInfo(m_outputBuffer.get(), curOffset * outputStride, numToExec * outputStride);
3185 descriptorSetUpdateBuilder.writeSingle(descriptorSet.get(), outputBufferBinding,
3186 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3187
3188 if (inputStride)
3189 {
3190 const auto inputDescriptorBufferInfo =
3191 makeDescriptorBufferInfo(m_inputBuffer.get(), curOffset * inputStride, numToExec * inputStride);
3192 descriptorSetUpdateBuilder.writeSingle(descriptorSet.get(), inputBufferBinding,
3193 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3194 }
3195
3196 descriptorSetUpdateBuilder.update(vk, vkDevice);
3197 }
3198
3199 std::vector<VkDescriptorSet> descriptorSets;
3200 descriptorSets.push_back(descriptorSet.get());
3201 if (extraResources != VK_NULL_HANDLE)
3202 descriptorSets.push_back(extraResources);
3203
3204 const auto bufferBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT,
3205 m_outputBuffer.get(), 0ull, VK_WHOLE_SIZE);
3206 const auto cmdBufferPtr = allocateCommandBuffer(vk, vkDevice, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3207 const auto cmdBuffer = cmdBufferPtr.get();
3208
3209 // Record command buffer, including pipeline barrier from output buffer to the host.
3210 beginCommandBuffer(vk, cmdBuffer);
3211 beginRenderPass(vk, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u));
3212 vk.cmdBindPipeline(cmdBuffer, bindPoint, meshPipeline.get());
3213 vk.cmdBindDescriptorSets(cmdBuffer, bindPoint, pipelineLayout.get(), 0u,
3214 static_cast<uint32_t>(descriptorSets.size()), de::dataOrNull(descriptorSets), 0u,
3215 nullptr);
3216 vk.cmdDrawMeshTasksEXT(cmdBuffer, numToExec, 1u, 1u);
3217 endRenderPass(vk, cmdBuffer);
3218 cmdPipelineBufferMemoryBarrier(vk, cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, &bufferBarrier);
3219 endCommandBuffer(vk, cmdBuffer);
3220
3221 // Execute
3222 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer);
3223
3224 curOffset += numToExec;
3225 }
3226
3227 // Read back data
3228 readOutputBuffer(outputs, numValues);
3229 }
3230 #endif // CTS_USES_VULKANSC
3231
3232 // Tessellation utils
3233
generateVertexShaderForTess(void)3234 static std::string generateVertexShaderForTess(void)
3235 {
3236 std::ostringstream src;
3237 src << "#version 450\n"
3238 << "void main (void)\n{\n"
3239 << " gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
3240 << "}\n";
3241
3242 return src.str();
3243 }
3244
3245 class TessellationExecutor : public BufferIoExecutor
3246 {
3247 public:
3248 TessellationExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3249 virtual ~TessellationExecutor(void);
3250
3251 void renderTess(uint32_t numValues, uint32_t vertexCount, uint32_t patchControlPoints,
3252 VkDescriptorSet extraResources);
3253
3254 private:
3255 const VkDescriptorSetLayout m_extraResourcesLayout;
3256 };
3257
TessellationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3258 TessellationExecutor::TessellationExecutor(Context &context, const ShaderSpec &shaderSpec,
3259 VkDescriptorSetLayout extraResourcesLayout)
3260 : BufferIoExecutor(context, shaderSpec)
3261 , m_extraResourcesLayout(extraResourcesLayout)
3262 {
3263 const VkPhysicalDeviceFeatures &features = context.getDeviceFeatures();
3264
3265 if (!features.tessellationShader)
3266 TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
3267 }
3268
~TessellationExecutor(void)3269 TessellationExecutor::~TessellationExecutor(void)
3270 {
3271 }
3272
renderTess(uint32_t numValues,uint32_t vertexCount,uint32_t patchControlPoints,VkDescriptorSet extraResources)3273 void TessellationExecutor::renderTess(uint32_t numValues, uint32_t vertexCount, uint32_t patchControlPoints,
3274 VkDescriptorSet extraResources)
3275 {
3276 const size_t inputBufferSize = numValues * getInputStride();
3277 const VkDevice vkDevice = m_context.getDevice();
3278 const DeviceInterface &vk = m_context.getDeviceInterface();
3279 const VkQueue queue = m_context.getUniversalQueue();
3280 const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
3281 Allocator &memAlloc = m_context.getDefaultAllocator();
3282
3283 const tcu::UVec2 renderSize(DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
3284
3285 Move<VkImage> colorImage;
3286 de::MovePtr<Allocation> colorImageAlloc;
3287 VkFormat colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
3288 Move<VkImageView> colorImageView;
3289
3290 Move<VkRenderPass> renderPass;
3291 Move<VkFramebuffer> framebuffer;
3292 Move<VkPipelineLayout> pipelineLayout;
3293 Move<VkPipeline> graphicsPipeline;
3294
3295 Move<VkShaderModule> vertexShaderModule;
3296 Move<VkShaderModule> tessControlShaderModule;
3297 Move<VkShaderModule> tessEvalShaderModule;
3298 Move<VkShaderModule> fragmentShaderModule;
3299
3300 Move<VkCommandPool> cmdPool;
3301 Move<VkCommandBuffer> cmdBuffer;
3302
3303 Move<VkDescriptorPool> descriptorPool;
3304 Move<VkDescriptorSetLayout> descriptorSetLayout;
3305 Move<VkDescriptorSet> descriptorSet;
3306 const uint32_t numDescriptorSets = (m_extraResourcesLayout != VK_NULL_HANDLE) ? 2u : 1u;
3307
3308 DE_ASSERT((m_extraResourcesLayout != VK_NULL_HANDLE) == (extraResources != VK_NULL_HANDLE));
3309
3310 // Create color image
3311 {
3312 const VkImageCreateInfo colorImageParams = {
3313 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
3314 nullptr, // const void* pNext;
3315 0u, // VkImageCreateFlags flags;
3316 VK_IMAGE_TYPE_2D, // VkImageType imageType;
3317 colorFormat, // VkFormat format;
3318 {renderSize.x(), renderSize.y(), 1u}, // VkExtent3D extent;
3319 1u, // uint32_t mipLevels;
3320 1u, // uint32_t arraySize;
3321 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
3322 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
3323 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
3324 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
3325 1u, // uint32_t queueFamilyCount;
3326 &queueFamilyIndex, // const uint32_t* pQueueFamilyIndices;
3327 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
3328 };
3329
3330 colorImage = createImage(vk, vkDevice, &colorImageParams);
3331
3332 // Allocate and bind color image memory
3333 colorImageAlloc =
3334 memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
3335 VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
3336 }
3337
3338 // Create color attachment view
3339 {
3340 const VkImageViewCreateInfo colorImageViewParams = {
3341 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
3342 nullptr, // const void* pNext;
3343 0u, // VkImageViewCreateFlags flags;
3344 *colorImage, // VkImage image;
3345 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
3346 colorFormat, // VkFormat format;
3347 {
3348 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
3349 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
3350 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
3351 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
3352 }, // VkComponentsMapping components;
3353 {
3354 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
3355 0u, // uint32_t baseMipLevel;
3356 1u, // uint32_t mipLevels;
3357 0u, // uint32_t baseArraylayer;
3358 1u // uint32_t layerCount;
3359 } // VkImageSubresourceRange subresourceRange;
3360 };
3361
3362 colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
3363 }
3364
3365 // Create render pass
3366 {
3367 const VkAttachmentDescription colorAttachmentDescription = {
3368 0u, // VkAttachmentDescriptorFlags flags;
3369 colorFormat, // VkFormat format;
3370 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
3371 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
3372 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
3373 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
3374 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
3375 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
3376 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout finalLayout
3377 };
3378
3379 const VkAttachmentDescription attachments[1] = {colorAttachmentDescription};
3380
3381 const VkAttachmentReference colorAttachmentReference = {
3382 0u, // uint32_t attachment;
3383 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
3384 };
3385
3386 const VkSubpassDescription subpassDescription = {
3387 0u, // VkSubpassDescriptionFlags flags;
3388 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
3389 0u, // uint32_t inputCount;
3390 nullptr, // const VkAttachmentReference* pInputAttachments;
3391 1u, // uint32_t colorCount;
3392 &colorAttachmentReference, // const VkAttachmentReference* pColorAttachments;
3393 nullptr, // const VkAttachmentReference* pResolveAttachments;
3394 nullptr, // VkAttachmentReference depthStencilAttachment;
3395 0u, // uint32_t preserveCount;
3396 nullptr // const VkAttachmentReference* pPreserveAttachments;
3397 };
3398
3399 const VkRenderPassCreateInfo renderPassParams = {
3400 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
3401 nullptr, // const void* pNext;
3402 0u, // VkRenderPassCreateFlags flags;
3403 1u, // uint32_t attachmentCount;
3404 attachments, // const VkAttachmentDescription* pAttachments;
3405 1u, // uint32_t subpassCount;
3406 &subpassDescription, // const VkSubpassDescription* pSubpasses;
3407 0u, // uint32_t dependencyCount;
3408 nullptr // const VkSubpassDependency* pDependencies;
3409 };
3410
3411 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
3412 }
3413
3414 // Create framebuffer
3415 {
3416 const VkFramebufferCreateInfo framebufferParams = {
3417 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
3418 nullptr, // const void* pNext;
3419 0u, // VkFramebufferCreateFlags flags;
3420 *renderPass, // VkRenderPass renderPass;
3421 1u, // uint32_t attachmentCount;
3422 &*colorImageView, // const VkAttachmentBindInfo* pAttachments;
3423 (uint32_t)renderSize.x(), // uint32_t width;
3424 (uint32_t)renderSize.y(), // uint32_t height;
3425 1u // uint32_t layers;
3426 };
3427
3428 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
3429 }
3430
3431 // Create descriptors
3432 {
3433 DescriptorPoolBuilder descriptorPoolBuilder;
3434 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
3435
3436 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3437 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3438 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3439 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3440
3441 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
3442 descriptorPool =
3443 descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3444
3445 const VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr,
3446 *descriptorPool, 1u, &*descriptorSetLayout};
3447
3448 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
3449 // Update descriptors
3450 {
3451 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3452 const VkDescriptorBufferInfo outputDescriptorBufferInfo = {
3453 *m_outputBuffer, // VkBuffer buffer;
3454 0u, // VkDeviceSize offset;
3455 VK_WHOLE_SIZE // VkDeviceSize range;
3456 };
3457
3458 descriptorSetUpdateBuilder.writeSingle(
3459 *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)OUTPUT_BUFFER_BINDING),
3460 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3461
3462 VkDescriptorBufferInfo inputDescriptorBufferInfo = {
3463 VK_NULL_HANDLE, // VkBuffer buffer;
3464 0u, // VkDeviceSize offset;
3465 VK_WHOLE_SIZE // VkDeviceSize range;
3466 };
3467
3468 if (inputBufferSize > 0)
3469 {
3470 inputDescriptorBufferInfo.buffer = *m_inputBuffer;
3471
3472 descriptorSetUpdateBuilder.writeSingle(
3473 *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)INPUT_BUFFER_BINDING),
3474 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3475 }
3476
3477 descriptorSetUpdateBuilder.update(vk, vkDevice);
3478 }
3479 }
3480
3481 // Create pipeline layout
3482 {
3483 const VkDescriptorSetLayout descriptorSetLayouts[] = {*descriptorSetLayout, m_extraResourcesLayout};
3484 const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
3485 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
3486 nullptr, // const void* pNext;
3487 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
3488 numDescriptorSets, // uint32_t descriptorSetCount;
3489 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
3490 0u, // uint32_t pushConstantRangeCount;
3491 nullptr // const VkPushConstantRange* pPushConstantRanges;
3492 };
3493
3494 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
3495 }
3496
3497 // Create shader modules
3498 {
3499 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
3500 tessControlShaderModule =
3501 createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
3502 tessEvalShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
3503 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
3504 }
3505
3506 // Create pipeline
3507 {
3508 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams = {
3509 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
3510 nullptr, // const void* pNext;
3511 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
3512 0u, // uint32_t bindingCount;
3513 nullptr, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
3514 0u, // uint32_t attributeCount;
3515 nullptr, // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
3516 };
3517
3518 const std::vector<VkViewport> viewports(1, makeViewport(renderSize));
3519 const std::vector<VkRect2D> scissors(1, makeRect2D(renderSize));
3520
3521 graphicsPipeline = makeGraphicsPipeline(
3522 vk, // const DeviceInterface& vk
3523 vkDevice, // const VkDevice device
3524 *pipelineLayout, // const VkPipelineLayout pipelineLayout
3525 *vertexShaderModule, // const VkShaderModule vertexShaderModule
3526 *tessControlShaderModule, // const VkShaderModule tessellationControlShaderModule
3527 *tessEvalShaderModule, // const VkShaderModule tessellationEvalShaderModule
3528 VK_NULL_HANDLE, // const VkShaderModule geometryShaderModule
3529 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
3530 *renderPass, // const VkRenderPass renderPass
3531 viewports, // const std::vector<VkViewport>& viewports
3532 scissors, // const std::vector<VkRect2D>& scissors
3533 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, // const VkPrimitiveTopology topology
3534 0u, // const uint32_t subpass
3535 patchControlPoints, // const uint32_t patchControlPoints
3536 &vertexInputStateParams); // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
3537 }
3538
3539 // Create command pool
3540 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3541
3542 // Create command buffer
3543 {
3544 const VkClearValue clearValue = getDefaultClearColor();
3545
3546 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3547
3548 beginCommandBuffer(vk, *cmdBuffer);
3549
3550 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()),
3551 clearValue);
3552
3553 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
3554
3555 {
3556 const VkDescriptorSet descriptorSets[] = {*descriptorSet, extraResources};
3557 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u,
3558 numDescriptorSets, descriptorSets, 0u, nullptr);
3559 }
3560
3561 vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
3562
3563 endRenderPass(vk, *cmdBuffer);
3564
3565 // Insert a barrier so data written by the shader is available to the host
3566 {
3567 const VkBufferMemoryBarrier bufferBarrier = {
3568 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
3569 nullptr, // const void* pNext;
3570 VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
3571 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
3572 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
3573 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
3574 *m_outputBuffer, // VkBuffer buffer;
3575 0, // VkDeviceSize offset;
3576 VK_WHOLE_SIZE, // VkDeviceSize size;
3577 };
3578
3579 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT,
3580 vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, nullptr, 1, &bufferBarrier,
3581 0, nullptr);
3582 }
3583
3584 endCommandBuffer(vk, *cmdBuffer);
3585 }
3586
3587 // Execute Draw
3588 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
3589 }
3590
3591 // TessControlExecutor
3592
3593 class TessControlExecutor : public TessellationExecutor
3594 {
3595 public:
3596 TessControlExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3597 virtual ~TessControlExecutor(void);
3598
3599 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
3600
3601 virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
3602 VkDescriptorSet extraResources);
3603
3604 protected:
3605 static std::string generateTessControlShader(const ShaderSpec &shaderSpec);
3606 };
3607
TessControlExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3608 TessControlExecutor::TessControlExecutor(Context &context, const ShaderSpec &shaderSpec,
3609 VkDescriptorSetLayout extraResourcesLayout)
3610 : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3611 {
3612 }
3613
~TessControlExecutor(void)3614 TessControlExecutor::~TessControlExecutor(void)
3615 {
3616 }
3617
generateTessControlShader(const ShaderSpec & shaderSpec)3618 std::string TessControlExecutor::generateTessControlShader(const ShaderSpec &shaderSpec)
3619 {
3620 std::ostringstream src;
3621 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3622
3623 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3624 src << "#extension GL_EXT_tessellation_shader : require\n\n";
3625
3626 if (!shaderSpec.globalDeclarations.empty())
3627 src << shaderSpec.globalDeclarations << "\n";
3628
3629 src << "\nlayout(vertices = 1) out;\n\n";
3630
3631 declareBufferBlocks(src, shaderSpec);
3632
3633 src << "void main (void)\n{\n";
3634
3635 for (int ndx = 0; ndx < 2; ndx++)
3636 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3637
3638 for (int ndx = 0; ndx < 4; ndx++)
3639 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3640
3641 src << "\n"
3642 << "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
3643
3644 generateExecBufferIo(src, shaderSpec, "invocationId");
3645
3646 src << "}\n";
3647
3648 return src.str();
3649 }
3650
generateEmptyTessEvalShader()3651 static std::string generateEmptyTessEvalShader()
3652 {
3653 std::ostringstream src;
3654
3655 src << "#version 450\n"
3656 "#extension GL_EXT_tessellation_shader : require\n\n";
3657
3658 src << "layout(triangles, ccw) in;\n";
3659
3660 src << "\nvoid main (void)\n{\n"
3661 << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
3662 << "}\n";
3663
3664 return src.str();
3665 }
3666
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3667 void TessControlExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
3668 {
3669 programCollection.glslSources.add("vert")
3670 << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3671 programCollection.glslSources.add("tess_control")
3672 << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
3673 programCollection.glslSources.add("tess_eval")
3674 << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
3675 programCollection.glslSources.add("frag")
3676 << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3677 }
3678
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3679 void TessControlExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
3680 VkDescriptorSet extraResources)
3681 {
3682 const uint32_t patchSize = 3;
3683
3684 initBuffers(numValues);
3685
3686 // Setup input buffer & copy data
3687 uploadInputBuffer(inputs, numValues, false);
3688
3689 renderTess(numValues, patchSize * numValues, patchSize, extraResources);
3690
3691 // Read back data
3692 readOutputBuffer(outputs, numValues);
3693 }
3694
3695 // TessEvaluationExecutor
3696
3697 class TessEvaluationExecutor : public TessellationExecutor
3698 {
3699 public:
3700 TessEvaluationExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3701 virtual ~TessEvaluationExecutor(void);
3702
3703 static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
3704
3705 virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
3706 VkDescriptorSet extraResources);
3707
3708 protected:
3709 static std::string generateTessEvalShader(const ShaderSpec &shaderSpec);
3710 };
3711
TessEvaluationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3712 TessEvaluationExecutor::TessEvaluationExecutor(Context &context, const ShaderSpec &shaderSpec,
3713 VkDescriptorSetLayout extraResourcesLayout)
3714 : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3715 {
3716 }
3717
~TessEvaluationExecutor(void)3718 TessEvaluationExecutor::~TessEvaluationExecutor(void)
3719 {
3720 }
3721
generatePassthroughTessControlShader(void)3722 static std::string generatePassthroughTessControlShader(void)
3723 {
3724 std::ostringstream src;
3725
3726 src << "#version 450\n"
3727 "#extension GL_EXT_tessellation_shader : require\n\n";
3728
3729 src << "layout(vertices = 1) out;\n\n";
3730
3731 src << "void main (void)\n{\n";
3732
3733 for (int ndx = 0; ndx < 2; ndx++)
3734 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3735
3736 for (int ndx = 0; ndx < 4; ndx++)
3737 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3738
3739 src << "}\n";
3740
3741 return src.str();
3742 }
3743
generateTessEvalShader(const ShaderSpec & shaderSpec)3744 std::string TessEvaluationExecutor::generateTessEvalShader(const ShaderSpec &shaderSpec)
3745 {
3746 std::ostringstream src;
3747
3748 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3749
3750 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3751 src << "#extension GL_EXT_tessellation_shader : require\n\n";
3752
3753 if (!shaderSpec.globalDeclarations.empty())
3754 src << shaderSpec.globalDeclarations << "\n";
3755
3756 src << "\n";
3757
3758 src << "layout(isolines, equal_spacing) in;\n\n";
3759
3760 declareBufferBlocks(src, shaderSpec);
3761
3762 src << "void main (void)\n{\n"
3763 << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3764 << "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3765
3766 generateExecBufferIo(src, shaderSpec, "invocationId");
3767
3768 src << "}\n";
3769
3770 return src.str();
3771 }
3772
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3773 void TessEvaluationExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
3774 {
3775 programCollection.glslSources.add("vert")
3776 << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3777 programCollection.glslSources.add("tess_control")
3778 << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3779 programCollection.glslSources.add("tess_eval")
3780 << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3781 programCollection.glslSources.add("frag")
3782 << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3783 }
3784
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3785 void TessEvaluationExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
3786 VkDescriptorSet extraResources)
3787 {
3788 const int patchSize = 2;
3789 const int alignedValues = deAlign32(numValues, patchSize);
3790
3791 // Initialize buffers with aligned value count to make room for padding
3792 initBuffers(alignedValues);
3793
3794 // Setup input buffer & copy data
3795 uploadInputBuffer(inputs, numValues, false);
3796
3797 renderTess((uint32_t)alignedValues, (uint32_t)alignedValues, (uint32_t)patchSize, extraResources);
3798
3799 // Read back data
3800 readOutputBuffer(outputs, numValues);
3801 }
3802
3803 } // namespace
3804
3805 // ShaderExecutor
3806
~ShaderExecutor(void)3807 ShaderExecutor::~ShaderExecutor(void)
3808 {
3809 }
3810
areInputs16Bit(void) const3811 bool ShaderExecutor::areInputs16Bit(void) const
3812 {
3813 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end();
3814 ++symIter)
3815 {
3816 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3817 return true;
3818 }
3819 return false;
3820 }
3821
areOutputs16Bit(void) const3822 bool ShaderExecutor::areOutputs16Bit(void) const
3823 {
3824 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end();
3825 ++symIter)
3826 {
3827 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3828 return true;
3829 }
3830 return false;
3831 }
3832
isOutput16Bit(const size_t ndx) const3833 bool ShaderExecutor::isOutput16Bit(const size_t ndx) const
3834 {
3835 if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3836 return true;
3837 return false;
3838 }
3839
areInputs64Bit(void) const3840 bool ShaderExecutor::areInputs64Bit(void) const
3841 {
3842 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end();
3843 ++symIter)
3844 {
3845 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3846 return true;
3847 }
3848 return false;
3849 }
3850
areOutputs64Bit(void) const3851 bool ShaderExecutor::areOutputs64Bit(void) const
3852 {
3853 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end();
3854 ++symIter)
3855 {
3856 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3857 return true;
3858 }
3859 return false;
3860 }
3861
isOutput64Bit(const size_t ndx) const3862 bool ShaderExecutor::isOutput64Bit(const size_t ndx) const
3863 {
3864 if (glu::isDataTypeDoubleType(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3865 return true;
3866 return false;
3867 }
3868
3869 // Utilities
3870
generateSources(glu::ShaderType shaderType,const ShaderSpec & shaderSpec,vk::SourceCollections & dst)3871 void generateSources(glu::ShaderType shaderType, const ShaderSpec &shaderSpec, vk::SourceCollections &dst)
3872 {
3873 switch (shaderType)
3874 {
3875 case glu::SHADERTYPE_VERTEX:
3876 VertexShaderExecutor::generateSources(shaderSpec, dst);
3877 break;
3878 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3879 TessControlExecutor::generateSources(shaderSpec, dst);
3880 break;
3881 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3882 TessEvaluationExecutor::generateSources(shaderSpec, dst);
3883 break;
3884 case glu::SHADERTYPE_GEOMETRY:
3885 GeometryShaderExecutor::generateSources(shaderSpec, dst);
3886 break;
3887 case glu::SHADERTYPE_FRAGMENT:
3888 FragmentShaderExecutor::generateSources(shaderSpec, dst);
3889 break;
3890 case glu::SHADERTYPE_COMPUTE:
3891 ComputeShaderExecutor::generateSources(shaderSpec, dst);
3892 break;
3893 #ifndef CTS_USES_VULKANSC
3894 case glu::SHADERTYPE_MESH:
3895 MeshTaskShaderExecutor::generateSources(shaderSpec, dst, false /*useTask*/);
3896 break;
3897 case glu::SHADERTYPE_TASK:
3898 MeshTaskShaderExecutor::generateSources(shaderSpec, dst, true /*useTask*/);
3899 break;
3900 #endif // CTS_USES_VULKANSC
3901 default:
3902 TCU_THROW(InternalError, "Unsupported shader type");
3903 }
3904 }
3905
createExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3906 ShaderExecutor *createExecutor(Context &context, glu::ShaderType shaderType, const ShaderSpec &shaderSpec,
3907 VkDescriptorSetLayout extraResourcesLayout)
3908 {
3909 switch (shaderType)
3910 {
3911 case glu::SHADERTYPE_VERTEX:
3912 return new VertexShaderExecutor(context, shaderSpec, extraResourcesLayout);
3913 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3914 return new TessControlExecutor(context, shaderSpec, extraResourcesLayout);
3915 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3916 return new TessEvaluationExecutor(context, shaderSpec, extraResourcesLayout);
3917 case glu::SHADERTYPE_GEOMETRY:
3918 return new GeometryShaderExecutor(context, shaderSpec, extraResourcesLayout);
3919 case glu::SHADERTYPE_FRAGMENT:
3920 return new FragmentShaderExecutor(context, shaderSpec, extraResourcesLayout);
3921 case glu::SHADERTYPE_COMPUTE:
3922 return new ComputeShaderExecutor(context, shaderSpec, extraResourcesLayout);
3923 #ifndef CTS_USES_VULKANSC
3924 case glu::SHADERTYPE_MESH:
3925 return new MeshTaskShaderExecutor(context, shaderSpec, extraResourcesLayout);
3926 case glu::SHADERTYPE_TASK:
3927 return new MeshTaskShaderExecutor(context, shaderSpec, extraResourcesLayout);
3928 #endif // CTS_USES_VULKANSC
3929 default:
3930 TCU_THROW(InternalError, "Unsupported shader type");
3931 }
3932 }
3933
executorSupported(glu::ShaderType shaderType)3934 bool executorSupported(glu::ShaderType shaderType)
3935 {
3936 switch (shaderType)
3937 {
3938 case glu::SHADERTYPE_VERTEX:
3939 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3940 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3941 case glu::SHADERTYPE_GEOMETRY:
3942 case glu::SHADERTYPE_FRAGMENT:
3943 case glu::SHADERTYPE_COMPUTE:
3944 case glu::SHADERTYPE_MESH:
3945 case glu::SHADERTYPE_TASK:
3946 return true;
3947 default:
3948 return false;
3949 }
3950 }
3951
checkSupportShader(Context & context,const glu::ShaderType shaderType)3952 void checkSupportShader(Context &context, const glu::ShaderType shaderType)
3953 {
3954 // Stage support.
3955 switch (shaderType)
3956 {
3957 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3958 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3959 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_TESSELLATION_SHADER);
3960 break;
3961
3962 case glu::SHADERTYPE_GEOMETRY:
3963 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_GEOMETRY_SHADER);
3964 break;
3965
3966 case glu::SHADERTYPE_TASK:
3967 case glu::SHADERTYPE_MESH:
3968 {
3969 context.requireDeviceFunctionality("VK_EXT_mesh_shader");
3970
3971 if (shaderType == glu::SHADERTYPE_TASK)
3972 {
3973 #ifndef CTS_USES_VULKANSC
3974 const auto &features = context.getMeshShaderFeaturesEXT();
3975 if (!features.taskShader)
3976 TCU_THROW(NotSupportedError, "taskShader not supported");
3977 #else // CTS_USES_VULKANSC
3978 TCU_THROW(NotSupportedError, "taskShader not supported");
3979 #endif // CTS_USES_VULKANSC
3980 }
3981 }
3982 break;
3983
3984 default:
3985 break;
3986 }
3987
3988 // Stores and atomic operation support.
3989 switch (shaderType)
3990 {
3991 case glu::SHADERTYPE_VERTEX:
3992 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3993 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3994 case glu::SHADERTYPE_GEOMETRY:
3995 case glu::SHADERTYPE_TASK:
3996 case glu::SHADERTYPE_MESH:
3997 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
3998 break;
3999 case glu::SHADERTYPE_FRAGMENT:
4000 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_FRAGMENT_STORES_AND_ATOMICS);
4001 break;
4002 case glu::SHADERTYPE_COMPUTE:
4003 break;
4004 default:
4005 DE_FATAL("Unsupported shader type");
4006 break;
4007 }
4008
4009 #ifndef CTS_USES_VULKANSC
4010 if (shaderType == glu::SHADERTYPE_TESSELLATION_EVALUATION &&
4011 context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4012 !context.getPortabilitySubsetFeatures().tessellationIsolines)
4013 {
4014 TCU_THROW(NotSupportedError,
4015 "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4016 }
4017 #endif // CTS_USES_VULKANSC
4018 }
4019
4020 } // namespace shaderexecutor
4021 } // namespace vkt
4022