1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7 * Copyright (c) 2016 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief Vulkan ShaderExecutor
24 *//*--------------------------------------------------------------------*/
25
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkMemUtil.hpp"
29 #include "vkRef.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37
38 #include "gluShaderUtil.hpp"
39
40 #include "tcuVector.hpp"
41 #include "tcuTestLog.hpp"
42 #include "tcuTextureUtil.hpp"
43
44 #include "deUniquePtr.hpp"
45 #include "deStringUtil.hpp"
46 #include "deSharedPtr.hpp"
47 #include "deFloat16.h"
48
49 #include <map>
50 #include <sstream>
51 #include <iostream>
52
53 using std::vector;
54 using namespace vk;
55
56 namespace vkt
57 {
58 namespace shaderexecutor
59 {
60 namespace
61 {
62
63 enum
64 {
65 DEFAULT_RENDER_WIDTH = 100,
66 DEFAULT_RENDER_HEIGHT = 100,
67 };
68
69 // Common typedefs
70
71 typedef de::SharedPtr<Unique<VkImage> > VkImageSp;
72 typedef de::SharedPtr<Unique<VkImageView> > VkImageViewSp;
73 typedef de::SharedPtr<Unique<VkBuffer> > VkBufferSp;
74 typedef de::SharedPtr<Allocation> AllocationSp;
75
76 static VkFormat getAttributeFormat(const glu::DataType dataType);
77
78 // Shader utilities
79
getDefaultClearColor(void)80 static VkClearValue getDefaultClearColor (void)
81 {
82 return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
83 }
84
generateEmptyFragmentSource(void)85 static std::string generateEmptyFragmentSource (void)
86 {
87 std::ostringstream src;
88
89 src << "#version 450\n"
90 "layout(location=0) out highp vec4 o_color;\n";
91
92 src << "void main (void)\n{\n";
93 src << " o_color = vec4(0.0);\n";
94 src << "}\n";
95
96 return src.str();
97 }
98
packFloat16Bit(std::ostream & src,const std::vector<Symbol> & outputs)99 void packFloat16Bit (std::ostream& src, const std::vector<Symbol>& outputs)
100 {
101 for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
102 {
103 if(glu::isDataTypeFloatType(symIter->varType.getBasicType()))
104 {
105 if(glu::isDataTypeVector(symIter->varType.getBasicType()))
106 {
107 for(int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
108 {
109 src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "], -1.0)));\n";
110 }
111 }
112 else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
113 {
114 int maxRow = 0;
115 int maxCol = 0;
116 switch (symIter->varType.getBasicType())
117 {
118 case glu::TYPE_FLOAT_MAT2:
119 maxRow = maxCol = 2;
120 break;
121 case glu::TYPE_FLOAT_MAT2X3:
122 maxRow = 2;
123 maxCol = 3;
124 break;
125 case glu::TYPE_FLOAT_MAT2X4:
126 maxRow = 2;
127 maxCol = 4;
128 break;
129 case glu::TYPE_FLOAT_MAT3X2:
130 maxRow = 3;
131 maxCol = 2;
132 break;
133 case glu::TYPE_FLOAT_MAT3:
134 maxRow = maxCol = 3;
135 break;
136 case glu::TYPE_FLOAT_MAT3X4:
137 maxRow = 3;
138 maxCol = 4;
139 break;
140 case glu::TYPE_FLOAT_MAT4X2:
141 maxRow = 4;
142 maxCol = 2;
143 break;
144 case glu::TYPE_FLOAT_MAT4X3:
145 maxRow = 4;
146 maxCol = 3;
147 break;
148 case glu::TYPE_FLOAT_MAT4:
149 maxRow = maxCol = 4;
150 break;
151 default:
152 DE_ASSERT(false);
153 break;
154 }
155
156 for(int i = 0; i < maxRow; i++)
157 for(int j = 0; j < maxCol; j++)
158 {
159 src << "\tpacked_" << symIter->name << "[" << i << "][" << j << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j << "], -1.0)));\n";
160 }
161 }
162 else
163 {
164 src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << ", -1.0)));\n";
165 }
166 }
167 }
168 }
169
generatePassthroughVertexShader(const ShaderSpec & shaderSpec,const char * inputPrefix,const char * outputPrefix)170 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const char* inputPrefix, const char* outputPrefix)
171 {
172 std::ostringstream src;
173 int location = 0;
174
175 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
176
177 if (!shaderSpec.globalDeclarations.empty())
178 src << shaderSpec.globalDeclarations << "\n";
179
180 src << "layout(location = " << location << ") in highp vec4 a_position;\n";
181
182 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
183 {
184 location++;
185 src << "layout(location = "<< location << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
186 << "layout(location = " << location - 1 << ") flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
187 }
188
189 src << "\nvoid main (void)\n{\n"
190 << " gl_Position = a_position;\n"
191 << " gl_PointSize = 1.0;\n";
192
193 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
194 src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
195
196 src << "}\n";
197
198 return src.str();
199 }
200
generateVertexShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix)201 static std::string generateVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
202 {
203 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
204
205 std::ostringstream src;
206
207 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
208
209 if (!shaderSpec.globalDeclarations.empty())
210 src << shaderSpec.globalDeclarations << "\n";
211
212 src << "layout(location = 0) in highp vec4 a_position;\n";
213
214 int locationNumber = 1;
215 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
216 {
217 src << "layout(location = " << locationNumber << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
218 }
219
220 locationNumber = 0;
221 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
222 {
223 DE_ASSERT(output->varType.isBasicType());
224
225 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
226 {
227 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
228 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
229 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
230
231 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
232 }
233 else
234 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
235 }
236
237 src << "\n"
238 << "void main (void)\n"
239 << "{\n"
240 << " gl_Position = a_position;\n"
241 << " gl_PointSize = 1.0;\n";
242
243 // Declare & fetch local input variables
244 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
245 {
246 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
247 {
248 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
249 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
250 }
251 else
252 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
253 }
254
255 // Declare local output variables
256 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
257 {
258 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
259 {
260 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
261 src << "\t" << tname << " " << output->name << ";\n";
262 const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
263 src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
264 }
265 else
266 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
267 }
268
269 // Operation - indented to correct level.
270 {
271 std::istringstream opSrc (shaderSpec.source);
272 std::string line;
273
274 while (std::getline(opSrc, line))
275 src << "\t" << line << "\n";
276 }
277
278 if (shaderSpec.packFloat16Bit)
279 packFloat16Bit(src, shaderSpec.outputs);
280
281 // Assignments to outputs.
282 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
283 {
284 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
285 {
286 src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
287 }
288 else
289 {
290 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
291 {
292 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
293 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
294
295 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
296 }
297 else
298 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
299 }
300 }
301
302 src << "}\n";
303
304 return src.str();
305 }
306
307 struct FragmentOutputLayout
308 {
309 std::vector<const Symbol*> locationSymbols; //! Symbols by location
310 std::map<std::string, int> locationMap; //! Map from symbol name to start location
311 };
312
generateFragShaderOutputDecl(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & outputPrefix)313 static void generateFragShaderOutputDecl (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& outputPrefix)
314 {
315 for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
316 {
317 const Symbol& output = shaderSpec.outputs[outNdx];
318 const int location = de::lookup(outLocationMap, output.name);
319 const std::string outVarName = outputPrefix + output.name;
320 glu::VariableDeclaration decl (output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
321
322 TCU_CHECK_INTERNAL(output.varType.isBasicType());
323
324 if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
325 {
326 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
327 const glu::DataType uintBasicType = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
328 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP);
329
330 decl.varType = uintType;
331 src << decl << ";\n";
332 }
333 else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
334 {
335 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
336 const glu::DataType intBasicType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
337 const glu::VarType intType (intBasicType, glu::PRECISION_HIGHP);
338
339 decl.varType = intType;
340 src << decl << ";\n";
341 }
342 else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
343 {
344 const int vecSize = glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
345 const int numVecs = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
346 const glu::DataType uintBasicType = glu::getDataTypeUintVec(vecSize);
347 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP);
348
349 decl.varType = uintType;
350 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
351 {
352 decl.name = outVarName + "_" + de::toString(vecNdx);
353 decl.layout.location = location + vecNdx;
354 src << decl << ";\n";
355 }
356 }
357 else
358 src << decl << ";\n";
359 }
360 }
361
generateFragShaderOutAssign(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::string & valuePrefix,const std::string & outputPrefix,const bool isInput16Bit=false)362 static void generateFragShaderOutAssign (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::string& valuePrefix, const std::string& outputPrefix, const bool isInput16Bit = false)
363 {
364 if (isInput16Bit)
365 packFloat16Bit(src, shaderSpec.outputs);
366
367 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
368 {
369 const std::string packPrefix = (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
370
371 if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
372 src << " o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
373 else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
374 {
375 const int numVecs = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
376
377 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
378 if (useIntOutputs)
379 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix << output->name << "[" << vecNdx << "]);\n";
380 else
381 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix << output->name << "[" << vecNdx << "];\n";
382 }
383 else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
384 {
385 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
386 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
387
388 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << valuePrefix << output->name << ");\n";
389 }
390 else
391 src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
392 }
393 }
394
generatePassthroughFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)395 static std::string generatePassthroughFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
396 {
397 std::ostringstream src;
398
399 src <<"#version 450\n";
400
401 if (!shaderSpec.globalDeclarations.empty())
402 src << shaderSpec.globalDeclarations << "\n";
403
404 int locationNumber = 0;
405 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
406 {
407 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
408 {
409 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
410 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
411 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
412
413 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(intType, inputPrefix + output->name) << ";\n";
414 }
415 else
416 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
417 }
418
419 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
420
421 src << "\nvoid main (void)\n{\n";
422
423 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
424
425 src << "}\n";
426
427 return src.str();
428 }
429
generateGeometryShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix,const bool pointSizeSupported)430 static std::string generateGeometryShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix, const bool pointSizeSupported)
431 {
432 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
433
434 std::ostringstream src;
435
436 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
437
438 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
439 src << "#extension GL_EXT_geometry_shader : require\n";
440
441 if (!shaderSpec.globalDeclarations.empty())
442 src << shaderSpec.globalDeclarations << "\n";
443
444 src << "layout(points) in;\n"
445 << "layout(points, max_vertices = 1) out;\n";
446
447 int locationNumber = 0;
448 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
449 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
450
451 locationNumber = 0;
452 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
453 {
454 DE_ASSERT(output->varType.isBasicType());
455
456 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
457 {
458 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
459 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
460 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
461
462 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
463 }
464 else
465 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
466 }
467
468 src << "\n"
469 << "void main (void)\n"
470 << "{\n"
471 << " gl_Position = gl_in[0].gl_Position;\n"
472 << (pointSizeSupported ? " gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
473
474 // Fetch input variables
475 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
476 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
477
478 // Declare local output variables.
479 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
480 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
481
482 src << "\n";
483
484 // Operation - indented to correct level.
485 {
486 std::istringstream opSrc (shaderSpec.source);
487 std::string line;
488
489 while (std::getline(opSrc, line))
490 src << "\t" << line << "\n";
491 }
492
493 // Assignments to outputs.
494 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
495 {
496 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
497 {
498 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
499 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
500
501 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
502 }
503 else
504 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
505 }
506
507 src << " EmitVertex();\n"
508 << " EndPrimitive();\n"
509 << "}\n";
510
511 return src.str();
512 }
513
generateFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)514 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
515 {
516 std::ostringstream src;
517 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
518 if (!shaderSpec.globalDeclarations.empty())
519 src << shaderSpec.globalDeclarations << "\n";
520
521 int locationNumber = 0;
522 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
523 {
524 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
525 }
526
527 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
528
529 src << "\nvoid main (void)\n{\n";
530
531 // Declare & fetch local input variables
532 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
533 {
534 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
535 {
536 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
537 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
538 }
539 else
540 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
541 }
542
543 // Declare output variables
544 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
545 {
546 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
547 {
548 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
549 src << "\t" << tname << " " << output->name << ";\n";
550 const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
551 src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
552 }
553 else
554 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
555 }
556
557 // Operation - indented to correct level.
558 {
559 std::istringstream opSrc (shaderSpec.source);
560 std::string line;
561
562 while (std::getline(opSrc, line))
563 src << "\t" << line << "\n";
564 }
565
566 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
567
568 src << "}\n";
569
570 return src.str();
571 }
572
573 // FragmentOutExecutor
574
575 class FragmentOutExecutor : public ShaderExecutor
576 {
577 public:
578 FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
579 virtual ~FragmentOutExecutor (void);
580
581 virtual void execute (int numValues,
582 const void* const* inputs,
583 void* const* outputs,
584 VkDescriptorSet extraResources);
585
586 protected:
587 const glu::ShaderType m_shaderType;
588 const FragmentOutputLayout m_outputLayout;
589
590 private:
591 void bindAttributes (int numValues,
592 const void* const* inputs);
593
594 void addAttribute (deUint32 bindingLocation,
595 VkFormat format,
596 deUint32 sizePerElement,
597 deUint32 count,
598 const void* dataPtr);
599 // reinit render data members
600 virtual void clearRenderData (void);
601
602 const VkDescriptorSetLayout m_extraResourcesLayout;
603
604 std::vector<VkVertexInputBindingDescription> m_vertexBindingDescriptions;
605 std::vector<VkVertexInputAttributeDescription> m_vertexAttributeDescriptions;
606 std::vector<VkBufferSp> m_vertexBuffers;
607 std::vector<AllocationSp> m_vertexBufferAllocs;
608 };
609
computeFragmentOutputLayout(const std::vector<Symbol> & symbols)610 static FragmentOutputLayout computeFragmentOutputLayout (const std::vector<Symbol>& symbols)
611 {
612 FragmentOutputLayout ret;
613 int location = 0;
614
615 for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
616 {
617 const int numLocations = glu::getDataTypeNumLocations(it->varType.getBasicType());
618
619 TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
620 de::insert(ret.locationMap, it->name, location);
621 location += numLocations;
622
623 for (int ndx = 0; ndx < numLocations; ++ndx)
624 ret.locationSymbols.push_back(&*it);
625 }
626
627 return ret;
628 }
629
FragmentOutExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)630 FragmentOutExecutor::FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
631 : ShaderExecutor (context, shaderSpec)
632 , m_shaderType (shaderType)
633 , m_outputLayout (computeFragmentOutputLayout(m_shaderSpec.outputs))
634 , m_extraResourcesLayout (extraResourcesLayout)
635 {
636 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
637 const InstanceInterface& vki = m_context.getInstanceInterface();
638
639 // Input attributes
640 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
641 {
642 const Symbol& symbol = m_shaderSpec.inputs[inputNdx];
643 const glu::DataType basicType = symbol.varType.getBasicType();
644 const VkFormat format = getAttributeFormat(basicType);
645 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
646 if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
647 TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
648 }
649 }
650
~FragmentOutExecutor(void)651 FragmentOutExecutor::~FragmentOutExecutor (void)
652 {
653 }
654
computeVertexPositions(int numValues,const tcu::IVec2 & renderSize)655 static std::vector<tcu::Vec2> computeVertexPositions (int numValues, const tcu::IVec2& renderSize)
656 {
657 std::vector<tcu::Vec2> positions(numValues);
658 for (int valNdx = 0; valNdx < numValues; valNdx++)
659 {
660 const int ix = valNdx % renderSize.x();
661 const int iy = valNdx / renderSize.x();
662 const float fx = -1.0f + 2.0f*((float(ix) + 0.5f) / float(renderSize.x()));
663 const float fy = -1.0f + 2.0f*((float(iy) + 0.5f) / float(renderSize.y()));
664
665 positions[valNdx] = tcu::Vec2(fx, fy);
666 }
667
668 return positions;
669 }
670
getRenderbufferFormatForOutput(const glu::VarType & outputType,bool useIntOutputs)671 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
672 {
673 const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
674 {
675 tcu::TextureFormat::R,
676 tcu::TextureFormat::RG,
677 tcu::TextureFormat::RGBA, // No RGB variants available.
678 tcu::TextureFormat::RGBA
679 };
680
681 const glu::DataType basicType = outputType.getBasicType();
682 const int numComps = glu::getDataTypeNumComponents(basicType);
683 tcu::TextureFormat::ChannelType channelType;
684
685 switch (glu::getDataTypeScalarType(basicType))
686 {
687 case glu::TYPE_UINT: channelType = tcu::TextureFormat::UNSIGNED_INT32; break;
688 case glu::TYPE_INT: channelType = tcu::TextureFormat::SIGNED_INT32; break;
689 case glu::TYPE_BOOL: channelType = tcu::TextureFormat::SIGNED_INT32; break;
690 case glu::TYPE_FLOAT: channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT; break;
691 case glu::TYPE_FLOAT16: channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT; break;
692 default:
693 throw tcu::InternalError("Invalid output type");
694 }
695
696 DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
697
698 return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
699 }
700
getAttributeFormat(const glu::DataType dataType)701 static VkFormat getAttributeFormat (const glu::DataType dataType)
702 {
703 switch (dataType)
704 {
705 case glu::TYPE_FLOAT16: return VK_FORMAT_R16_SFLOAT;
706 case glu::TYPE_FLOAT16_VEC2: return VK_FORMAT_R16G16_SFLOAT;
707 case glu::TYPE_FLOAT16_VEC3: return VK_FORMAT_R16G16B16_SFLOAT;
708 case glu::TYPE_FLOAT16_VEC4: return VK_FORMAT_R16G16B16A16_SFLOAT;
709
710 case glu::TYPE_FLOAT: return VK_FORMAT_R32_SFLOAT;
711 case glu::TYPE_FLOAT_VEC2: return VK_FORMAT_R32G32_SFLOAT;
712 case glu::TYPE_FLOAT_VEC3: return VK_FORMAT_R32G32B32_SFLOAT;
713 case glu::TYPE_FLOAT_VEC4: return VK_FORMAT_R32G32B32A32_SFLOAT;
714
715 case glu::TYPE_INT: return VK_FORMAT_R32_SINT;
716 case glu::TYPE_INT_VEC2: return VK_FORMAT_R32G32_SINT;
717 case glu::TYPE_INT_VEC3: return VK_FORMAT_R32G32B32_SINT;
718 case glu::TYPE_INT_VEC4: return VK_FORMAT_R32G32B32A32_SINT;
719
720 case glu::TYPE_UINT: return VK_FORMAT_R32_UINT;
721 case glu::TYPE_UINT_VEC2: return VK_FORMAT_R32G32_UINT;
722 case glu::TYPE_UINT_VEC3: return VK_FORMAT_R32G32B32_UINT;
723 case glu::TYPE_UINT_VEC4: return VK_FORMAT_R32G32B32A32_UINT;
724
725 case glu::TYPE_FLOAT_MAT2: return VK_FORMAT_R32G32_SFLOAT;
726 case glu::TYPE_FLOAT_MAT2X3: return VK_FORMAT_R32G32B32_SFLOAT;
727 case glu::TYPE_FLOAT_MAT2X4: return VK_FORMAT_R32G32B32A32_SFLOAT;
728 case glu::TYPE_FLOAT_MAT3X2: return VK_FORMAT_R32G32_SFLOAT;
729 case glu::TYPE_FLOAT_MAT3: return VK_FORMAT_R32G32B32_SFLOAT;
730 case glu::TYPE_FLOAT_MAT3X4: return VK_FORMAT_R32G32B32A32_SFLOAT;
731 case glu::TYPE_FLOAT_MAT4X2: return VK_FORMAT_R32G32_SFLOAT;
732 case glu::TYPE_FLOAT_MAT4X3: return VK_FORMAT_R32G32B32_SFLOAT;
733 case glu::TYPE_FLOAT_MAT4: return VK_FORMAT_R32G32B32A32_SFLOAT;
734 default:
735 DE_ASSERT(false);
736 return VK_FORMAT_UNDEFINED;
737 }
738 }
739
addAttribute(deUint32 bindingLocation,VkFormat format,deUint32 sizePerElement,deUint32 count,const void * dataPtr)740 void FragmentOutExecutor::addAttribute (deUint32 bindingLocation, VkFormat format, deUint32 sizePerElement, deUint32 count, const void* dataPtr)
741 {
742 // Portability requires stride to be multiply of minVertexInputBindingStrideAlignment
743 // this value is usually 4 and current tests meet this requirement but
744 // if this changes in future then this limit should be verified in checkSupport
745 if (m_context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
746 ((sizePerElement % m_context.getPortabilitySubsetProperties().minVertexInputBindingStrideAlignment) != 0))
747 {
748 DE_FATAL("stride is not multiply of minVertexInputBindingStrideAlignment");
749 }
750
751 // Add binding specification
752 const deUint32 binding = (deUint32)m_vertexBindingDescriptions.size();
753 const VkVertexInputBindingDescription bindingDescription =
754 {
755 binding,
756 sizePerElement,
757 VK_VERTEX_INPUT_RATE_VERTEX
758 };
759
760 m_vertexBindingDescriptions.push_back(bindingDescription);
761
762 // Add location and format specification
763 const VkVertexInputAttributeDescription attributeDescription =
764 {
765 bindingLocation, // deUint32 location;
766 binding, // deUint32 binding;
767 format, // VkFormat format;
768 0u, // deUint32 offsetInBytes;
769 };
770
771 m_vertexAttributeDescriptions.push_back(attributeDescription);
772
773 // Upload data to buffer
774 const VkDevice vkDevice = m_context.getDevice();
775 const DeviceInterface& vk = m_context.getDeviceInterface();
776 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
777
778 const VkDeviceSize inputSize = sizePerElement * count;
779 const VkBufferCreateInfo vertexBufferParams =
780 {
781 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
782 DE_NULL, // const void* pNext;
783 0u, // VkBufferCreateFlags flags;
784 inputSize, // VkDeviceSize size;
785 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, // VkBufferUsageFlags usage;
786 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
787 1u, // deUint32 queueFamilyCount;
788 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
789 };
790
791 Move<VkBuffer> buffer = createBuffer(vk, vkDevice, &vertexBufferParams);
792 de::MovePtr<Allocation> alloc = m_context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
793
794 VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
795
796 deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
797 flushAlloc(vk, vkDevice, *alloc);
798
799 m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer> >(new Unique<VkBuffer>(buffer)));
800 m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
801 }
802
bindAttributes(int numValues,const void * const * inputs)803 void FragmentOutExecutor::bindAttributes (int numValues, const void* const* inputs)
804 {
805 // Input attributes
806 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
807 {
808 const Symbol& symbol = m_shaderSpec.inputs[inputNdx];
809 const void* ptr = inputs[inputNdx];
810 const glu::DataType basicType = symbol.varType.getBasicType();
811 const int vecSize = glu::getDataTypeScalarSize(basicType);
812 const VkFormat format = getAttributeFormat(basicType);
813 int elementSize = 0;
814 int numAttrsToAdd = 1;
815
816 if (glu::isDataTypeDoubleOrDVec(basicType))
817 elementSize = sizeof(double);
818 if (glu::isDataTypeFloatOrVec(basicType))
819 elementSize = sizeof(float);
820 else if (glu::isDataTypeFloat16OrVec(basicType))
821 elementSize = sizeof(deUint16);
822 else if (glu::isDataTypeIntOrIVec(basicType))
823 elementSize = sizeof(int);
824 else if (glu::isDataTypeUintOrUVec(basicType))
825 elementSize = sizeof(deUint32);
826 else if (glu::isDataTypeMatrix(basicType))
827 {
828 int numRows = glu::getDataTypeMatrixNumRows(basicType);
829 int numCols = glu::getDataTypeMatrixNumColumns(basicType);
830
831 elementSize = numRows * numCols * (int)sizeof(float);
832 numAttrsToAdd = numCols;
833 }
834 else
835 DE_ASSERT(false);
836
837 // add attributes, in case of matrix every column is binded as an attribute
838 for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
839 {
840 addAttribute((deUint32)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
841 }
842 }
843 }
844
clearRenderData(void)845 void FragmentOutExecutor::clearRenderData (void)
846 {
847 m_vertexBindingDescriptions.clear();
848 m_vertexAttributeDescriptions.clear();
849 m_vertexBuffers.clear();
850 m_vertexBufferAllocs.clear();
851 }
852
createEmptyDescriptorSetLayout(const DeviceInterface & vkd,VkDevice device)853 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout (const DeviceInterface& vkd, VkDevice device)
854 {
855 const VkDescriptorSetLayoutCreateInfo createInfo =
856 {
857 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
858 DE_NULL,
859 (VkDescriptorSetLayoutCreateFlags)0,
860 0u,
861 DE_NULL,
862 };
863 return createDescriptorSetLayout(vkd, device, &createInfo);
864 }
865
createDummyDescriptorPool(const DeviceInterface & vkd,VkDevice device)866 static Move<VkDescriptorPool> createDummyDescriptorPool (const DeviceInterface& vkd, VkDevice device)
867 {
868 const VkDescriptorPoolSize dummySize =
869 {
870 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
871 1u,
872 };
873 const VkDescriptorPoolCreateInfo createInfo =
874 {
875 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
876 DE_NULL,
877 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
878 1u,
879 1u,
880 &dummySize
881 };
882 return createDescriptorPool(vkd, device, &createInfo);
883 }
884
allocateSingleDescriptorSet(const DeviceInterface & vkd,VkDevice device,VkDescriptorPool pool,VkDescriptorSetLayout layout)885 static Move<VkDescriptorSet> allocateSingleDescriptorSet (const DeviceInterface& vkd, VkDevice device, VkDescriptorPool pool, VkDescriptorSetLayout layout)
886 {
887 const VkDescriptorSetAllocateInfo allocInfo =
888 {
889 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
890 DE_NULL,
891 pool,
892 1u,
893 &layout,
894 };
895 return allocateDescriptorSet(vkd, device, &allocInfo);
896 }
897
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)898 void FragmentOutExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
899 {
900 const VkDevice vkDevice = m_context.getDevice();
901 const DeviceInterface& vk = m_context.getDeviceInterface();
902 const VkQueue queue = m_context.getUniversalQueue();
903 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
904 Allocator& memAlloc = m_context.getDefaultAllocator();
905
906 const deUint32 renderSizeX = de::min(static_cast<deUint32>(128), (deUint32)numValues);
907 const deUint32 renderSizeY = ((deUint32)numValues / renderSizeX) + (((deUint32)numValues % renderSizeX != 0) ? 1u : 0u);
908 const tcu::UVec2 renderSize (renderSizeX, renderSizeY);
909 std::vector<tcu::Vec2> positions;
910
911 const bool useGeometryShader = m_shaderType == glu::SHADERTYPE_GEOMETRY;
912
913 std::vector<VkImageSp> colorImages;
914 std::vector<VkImageMemoryBarrier> colorImagePreRenderBarriers;
915 std::vector<VkImageMemoryBarrier> colorImagePostRenderBarriers;
916 std::vector<AllocationSp> colorImageAllocs;
917 std::vector<VkAttachmentDescription> attachments;
918 std::vector<VkClearValue> attachmentClearValues;
919 std::vector<VkImageViewSp> colorImageViews;
920
921 std::vector<VkPipelineColorBlendAttachmentState> colorBlendAttachmentStates;
922 std::vector<VkAttachmentReference> colorAttachmentReferences;
923
924 Move<VkRenderPass> renderPass;
925 Move<VkFramebuffer> framebuffer;
926 Move<VkPipelineLayout> pipelineLayout;
927 Move<VkPipeline> graphicsPipeline;
928
929 Move<VkShaderModule> vertexShaderModule;
930 Move<VkShaderModule> geometryShaderModule;
931 Move<VkShaderModule> fragmentShaderModule;
932
933 Move<VkCommandPool> cmdPool;
934 Move<VkCommandBuffer> cmdBuffer;
935
936 Unique<VkDescriptorSetLayout> emptyDescriptorSetLayout (createEmptyDescriptorSetLayout(vk, vkDevice));
937 Unique<VkDescriptorPool> dummyDescriptorPool (createDummyDescriptorPool(vk, vkDevice));
938 Unique<VkDescriptorSet> emptyDescriptorSet (allocateSingleDescriptorSet(vk, vkDevice, *dummyDescriptorPool, *emptyDescriptorSetLayout));
939
940 clearRenderData();
941
942 // Compute positions - 1px points are used to drive fragment shading.
943 positions = computeVertexPositions(numValues, renderSize.cast<int>());
944
945 // Bind attributes
946 addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (deUint32)positions.size(), &positions[0]);
947 bindAttributes(numValues, inputs);
948
949 // Create color images
950 {
951 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
952 {
953 VK_FALSE, // VkBool32 blendEnable;
954 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
955 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
956 VK_BLEND_OP_ADD, // VkBlendOp blendOpColor;
957 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
958 VK_BLEND_FACTOR_ZERO, // VkBlendFactor destAlphaBlendFactor;
959 VK_BLEND_OP_ADD, // VkBlendOp blendOpAlpha;
960 (VK_COLOR_COMPONENT_R_BIT |
961 VK_COLOR_COMPONENT_G_BIT |
962 VK_COLOR_COMPONENT_B_BIT |
963 VK_COLOR_COMPONENT_A_BIT) // VkColorComponentFlags colorWriteMask;
964 };
965
966 for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
967 {
968 const bool isDouble = glu::isDataTypeDoubleOrDVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
969 const bool isFloat = isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
970 const bool isFloat16b = glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
971 const bool isSigned = isDataTypeIntOrIVec (m_shaderSpec.outputs[outNdx].varType.getBasicType());
972 const bool isBool = isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
973 const VkFormat colorFormat = (isDouble ? VK_FORMAT_R64G64B64A64_SFLOAT : (isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT : (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT : (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32G32B32A32_UINT))));
974
975 {
976 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
977 if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
978 TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
979 }
980
981 const VkImageCreateInfo colorImageParams =
982 {
983 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
984 DE_NULL, // const void* pNext;
985 0u, // VkImageCreateFlags flags;
986 VK_IMAGE_TYPE_2D, // VkImageType imageType;
987 colorFormat, // VkFormat format;
988 { renderSize.x(), renderSize.y(), 1u }, // VkExtent3D extent;
989 1u, // deUint32 mipLevels;
990 1u, // deUint32 arraySize;
991 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
992 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
993 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
994 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
995 1u, // deUint32 queueFamilyCount;
996 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
997 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
998 };
999
1000 const VkAttachmentDescription colorAttachmentDescription =
1001 {
1002 0u, // VkAttachmentDescriptorFlags flags;
1003 colorFormat, // VkFormat format;
1004 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1005 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
1006 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
1007 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
1008 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
1009 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout initialLayout;
1010 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout;
1011 };
1012
1013 Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1014 colorImages.push_back(de::SharedPtr<Unique<VkImage> >(new Unique<VkImage>(colorImage)));
1015 attachmentClearValues.push_back(getDefaultClearColor());
1016
1017 // Allocate and bind color image memory
1018 {
1019 de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *((const VkImage*) colorImages.back().get())), MemoryRequirement::Any);
1020 VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
1021 colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1022
1023 attachments.push_back(colorAttachmentDescription);
1024 colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1025
1026 const VkAttachmentReference colorAttachmentReference =
1027 {
1028 (deUint32) (colorImages.size() - 1), // deUint32 attachment;
1029 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
1030 };
1031
1032 colorAttachmentReferences.push_back(colorAttachmentReference);
1033 }
1034
1035 // Create color attachment view
1036 {
1037 const VkImageViewCreateInfo colorImageViewParams =
1038 {
1039 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
1040 DE_NULL, // const void* pNext;
1041 0u, // VkImageViewCreateFlags flags;
1042 colorImages.back().get()->get(), // VkImage image;
1043 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
1044 colorFormat, // VkFormat format;
1045 {
1046 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
1047 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
1048 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
1049 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
1050 }, // VkComponentMapping components;
1051 {
1052 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
1053 0u, // deUint32 baseMipLevel;
1054 1u, // deUint32 mipLevels;
1055 0u, // deUint32 baseArraySlice;
1056 1u // deUint32 arraySize;
1057 } // VkImageSubresourceRange subresourceRange;
1058 };
1059
1060 Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1061 colorImageViews.push_back(de::SharedPtr<Unique<VkImageView> >(new Unique<VkImageView>(colorImageView)));
1062
1063 const VkImageMemoryBarrier colorImagePreRenderBarrier =
1064 {
1065 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1066 DE_NULL, // pNext
1067 0u, // srcAccessMask
1068 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1069 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // dstAccessMask
1070 VK_IMAGE_LAYOUT_UNDEFINED, // oldLayout
1071 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // newLayout
1072 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1073 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1074 colorImages.back().get()->get(), // image
1075 {
1076 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1077 0u, // baseMipLevel
1078 1u, // levelCount
1079 0u, // baseArrayLayer
1080 1u, // layerCount
1081 } // subresourceRange
1082 };
1083 colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1084
1085 const VkImageMemoryBarrier colorImagePostRenderBarrier =
1086 {
1087 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1088 DE_NULL, // pNext
1089 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1090 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // srcAccessMask
1091 VK_ACCESS_TRANSFER_READ_BIT, // dstAccessMask
1092 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // oldLayout
1093 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // newLayout
1094 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1095 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1096 colorImages.back().get()->get(), // image
1097 {
1098 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1099 0u, // baseMipLevel
1100 1u, // levelCount
1101 0u, // baseArrayLayer
1102 1u, // layerCount
1103 } // subresourceRange
1104 };
1105 colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1106 }
1107 }
1108 }
1109
1110 // Create render pass
1111 {
1112 const VkSubpassDescription subpassDescription =
1113 {
1114 0u, // VkSubpassDescriptionFlags flags;
1115 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
1116 0u, // deUint32 inputCount;
1117 DE_NULL, // const VkAttachmentReference* pInputAttachments;
1118 (deUint32)colorImages.size(), // deUint32 colorCount;
1119 &colorAttachmentReferences[0], // const VkAttachmentReference* colorAttachments;
1120 DE_NULL, // const VkAttachmentReference* resolveAttachments;
1121 DE_NULL, // VkAttachmentReference depthStencilAttachment;
1122 0u, // deUint32 preserveCount;
1123 DE_NULL // const VkAttachmentReference* pPreserveAttachments;
1124 };
1125
1126 const VkRenderPassCreateInfo renderPassParams =
1127 {
1128 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
1129 DE_NULL, // const void* pNext;
1130 (VkRenderPassCreateFlags)0, // VkRenderPassCreateFlags flags;
1131 (deUint32)attachments.size(), // deUint32 attachmentCount;
1132 &attachments[0], // const VkAttachmentDescription* pAttachments;
1133 1u, // deUint32 subpassCount;
1134 &subpassDescription, // const VkSubpassDescription* pSubpasses;
1135 0u, // deUint32 dependencyCount;
1136 DE_NULL // const VkSubpassDependency* pDependencies;
1137 };
1138
1139 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1140 }
1141
1142 // Create framebuffer
1143 {
1144 std::vector<VkImageView> views(colorImageViews.size());
1145 for (size_t i = 0; i < colorImageViews.size(); i++)
1146 {
1147 views[i] = colorImageViews[i].get()->get();
1148 }
1149
1150 const VkFramebufferCreateInfo framebufferParams =
1151 {
1152 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
1153 DE_NULL, // const void* pNext;
1154 0u, // VkFramebufferCreateFlags flags;
1155 *renderPass, // VkRenderPass renderPass;
1156 (deUint32)views.size(), // deUint32 attachmentCount;
1157 &views[0], // const VkImageView* pAttachments;
1158 (deUint32)renderSize.x(), // deUint32 width;
1159 (deUint32)renderSize.y(), // deUint32 height;
1160 1u // deUint32 layers;
1161 };
1162
1163 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1164 }
1165
1166 // Create pipeline layout
1167 {
1168 const VkDescriptorSetLayout setLayouts[] =
1169 {
1170 *emptyDescriptorSetLayout,
1171 m_extraResourcesLayout
1172 };
1173 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
1174 {
1175 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
1176 DE_NULL, // const void* pNext;
1177 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
1178 (m_extraResourcesLayout != 0 ? 2u : 0u), // deUint32 descriptorSetCount;
1179 setLayouts, // const VkDescriptorSetLayout* pSetLayouts;
1180 0u, // deUint32 pushConstantRangeCount;
1181 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
1182 };
1183
1184 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1185 }
1186
1187 // Create shaders
1188 {
1189 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1190 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1191
1192 if (useGeometryShader)
1193 {
1194 if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1195 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1196 else
1197 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1198 }
1199 }
1200
1201 // Create pipeline
1202 {
1203 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
1204 {
1205 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1206 DE_NULL, // const void* pNext;
1207 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
1208 (deUint32)m_vertexBindingDescriptions.size(), // deUint32 bindingCount;
1209 &m_vertexBindingDescriptions[0], // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1210 (deUint32)m_vertexAttributeDescriptions.size(), // deUint32 attributeCount;
1211 &m_vertexAttributeDescriptions[0], // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
1212 };
1213
1214 const std::vector<VkViewport> viewports (1, makeViewport(renderSize));
1215 const std::vector<VkRect2D> scissors (1, makeRect2D(renderSize));
1216
1217 const VkPipelineColorBlendStateCreateInfo colorBlendStateParams =
1218 {
1219 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
1220 DE_NULL, // const void* pNext;
1221 (VkPipelineColorBlendStateCreateFlags)0, // VkPipelineColorBlendStateCreateFlags flags;
1222 VK_FALSE, // VkBool32 logicOpEnable;
1223 VK_LOGIC_OP_COPY, // VkLogicOp logicOp;
1224 (deUint32)colorBlendAttachmentStates.size(), // deUint32 attachmentCount;
1225 &colorBlendAttachmentStates[0], // const VkPipelineColorBlendAttachmentState* pAttachments;
1226 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConst[4];
1227 };
1228
1229 graphicsPipeline = makeGraphicsPipeline(vk, // const DeviceInterface& vk
1230 vkDevice, // const VkDevice device
1231 *pipelineLayout, // const VkPipelineLayout pipelineLayout
1232 *vertexShaderModule, // const VkShaderModule vertexShaderModule
1233 DE_NULL, // const VkShaderModule tessellationControlShaderModule
1234 DE_NULL, // const VkShaderModule tessellationEvalShaderModule
1235 useGeometryShader ? *geometryShaderModule : DE_NULL, // const VkShaderModule geometryShaderModule
1236 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
1237 *renderPass, // const VkRenderPass renderPass
1238 viewports, // const std::vector<VkViewport>& viewports
1239 scissors, // const std::vector<VkRect2D>& scissors
1240 VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // const VkPrimitiveTopology topology
1241 0u, // const deUint32 subpass
1242 0u, // const deUint32 patchControlPoints
1243 &vertexInputStateParams, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
1244 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1245 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
1246 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
1247 &colorBlendStateParams); // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
1248 }
1249
1250 // Create command pool
1251 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1252
1253 // Create command buffer
1254 {
1255 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1256
1257 beginCommandBuffer(vk, *cmdBuffer);
1258
1259 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0,
1260 0, (const VkMemoryBarrier*)DE_NULL,
1261 0, (const VkBufferMemoryBarrier*)DE_NULL,
1262 (deUint32)colorImagePreRenderBarriers.size(), colorImagePreRenderBarriers.empty() ? DE_NULL : &colorImagePreRenderBarriers[0]);
1263 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), (deUint32)attachmentClearValues.size(), &attachmentClearValues[0]);
1264
1265 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1266
1267 if (m_extraResourcesLayout != 0)
1268 {
1269 DE_ASSERT(extraResources != 0);
1270 const VkDescriptorSet descriptorSets[] = { *emptyDescriptorSet, extraResources };
1271 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, DE_NULL);
1272 }
1273 else
1274 DE_ASSERT(extraResources == 0);
1275
1276 const deUint32 numberOfVertexAttributes = (deUint32)m_vertexBuffers.size();
1277
1278 std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1279
1280 std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1281 for (size_t i = 0; i < numberOfVertexAttributes; i++)
1282 {
1283 buffers[i] = m_vertexBuffers[i].get()->get();
1284 }
1285
1286 vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1287 vk.cmdDraw(*cmdBuffer, (deUint32)positions.size(), 1u, 0u, 0u);
1288
1289 endRenderPass(vk, *cmdBuffer);
1290 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0,
1291 0, (const VkMemoryBarrier*)DE_NULL,
1292 0, (const VkBufferMemoryBarrier*)DE_NULL,
1293 (deUint32)colorImagePostRenderBarriers.size(), colorImagePostRenderBarriers.empty() ? DE_NULL : &colorImagePostRenderBarriers[0]);
1294
1295 endCommandBuffer(vk, *cmdBuffer);
1296 }
1297
1298 // Execute Draw
1299 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1300
1301 // Read back result and output
1302 {
1303 const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(deUint32) * renderSize.x() * renderSize.y());
1304 const VkBufferCreateInfo readImageBufferParams =
1305 {
1306 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1307 DE_NULL, // const void* pNext;
1308 0u, // VkBufferCreateFlags flags;
1309 imageSizeBytes, // VkDeviceSize size;
1310 VK_BUFFER_USAGE_TRANSFER_DST_BIT, // VkBufferUsageFlags usage;
1311 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1312 1u, // deUint32 queueFamilyCount;
1313 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
1314 };
1315
1316 // constants for image copy
1317 Move<VkCommandPool> copyCmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1318
1319 const VkBufferImageCopy copyParams =
1320 {
1321 0u, // VkDeviceSize bufferOffset;
1322 (deUint32)renderSize.x(), // deUint32 bufferRowLength;
1323 (deUint32)renderSize.y(), // deUint32 bufferImageHeight;
1324 {
1325 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspect aspect;
1326 0u, // deUint32 mipLevel;
1327 0u, // deUint32 arraySlice;
1328 1u, // deUint32 arraySize;
1329 }, // VkImageSubresource imageSubresource;
1330 { 0u, 0u, 0u }, // VkOffset3D imageOffset;
1331 { renderSize.x(), renderSize.y(), 1u } // VkExtent3D imageExtent;
1332 };
1333
1334 // Read back pixels.
1335 for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1336 {
1337 const Symbol& output = m_shaderSpec.outputs[outNdx];
1338 const int outSize = output.varType.getScalarSize();
1339 const int outVecSize = glu::getDataTypeNumComponents(output.varType.getBasicType());
1340 const int outNumLocs = glu::getDataTypeNumLocations(output.varType.getBasicType());
1341 const int outLocation = de::lookup(m_outputLayout.locationMap, output.name);
1342
1343 for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1344 {
1345 tcu::TextureLevel tmpBuf;
1346 const tcu::TextureFormat format = getRenderbufferFormatForOutput(output.varType, false);
1347 const tcu::TextureFormat readFormat (tcu::TextureFormat::RGBA, format.type);
1348 const Unique<VkBuffer> readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1349 const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1350
1351 VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(), readImageBufferMemory->getOffset()));
1352
1353 // Copy image to buffer
1354 {
1355
1356 Move<VkCommandBuffer> copyCmdBuffer = allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1357
1358 beginCommandBuffer(vk, *copyCmdBuffer);
1359 vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, ©Params);
1360
1361 // Insert a barrier so data written by the transfer is available to the host
1362 {
1363 const VkBufferMemoryBarrier barrier =
1364 {
1365 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
1366 DE_NULL, // const void* pNext;
1367 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags srcAccessMask;
1368 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
1369 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
1370 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
1371 *readImageBuffer, // VkBuffer buffer;
1372 0, // VkDeviceSize offset;
1373 VK_WHOLE_SIZE, // VkDeviceSize size;
1374 };
1375
1376 vk.cmdPipelineBarrier(*copyCmdBuffer, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
1377 0, (const VkMemoryBarrier*)DE_NULL,
1378 1, &barrier,
1379 0, (const VkImageMemoryBarrier*)DE_NULL);
1380 }
1381
1382 endCommandBuffer(vk, *copyCmdBuffer);
1383
1384 submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1385 }
1386
1387 invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1388
1389 tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1390
1391 const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1392 const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1, readImageBufferMemory->getHostPtr());
1393
1394 tcu::copy(tmpBuf.getAccess(), resultAccess);
1395
1396 if (isOutput16Bit(static_cast<size_t>(outNdx)))
1397 {
1398 deUint16* dstPtrBase = static_cast<deUint16*>(outputs[outNdx]);
1399 if (outSize == 4 && outNumLocs == 1)
1400 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint16));
1401 else
1402 {
1403 for (int valNdx = 0; valNdx < numValues; valNdx++)
1404 {
1405 const deUint16* srcPtr = (const deUint16*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1406 deUint16* dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1407 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint16));
1408 }
1409 }
1410 }
1411 else
1412 {
1413 deUint32* dstPtrBase = static_cast<deUint32*>(outputs[outNdx]);
1414 if (outSize == 4 && outNumLocs == 1)
1415 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint32));
1416 else
1417 {
1418 for (int valNdx = 0; valNdx < numValues; valNdx++)
1419 {
1420 const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1421 deUint32* dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1422 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint32));
1423 }
1424 }
1425 }
1426 }
1427 }
1428 }
1429 }
1430
1431 // VertexShaderExecutor
1432
1433 class VertexShaderExecutor : public FragmentOutExecutor
1434 {
1435 public:
1436 VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1437 virtual ~VertexShaderExecutor (void);
1438
1439 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& dst);
1440 };
1441
VertexShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1442 VertexShaderExecutor::VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1443 : FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1444 {
1445 }
1446
~VertexShaderExecutor(void)1447 VertexShaderExecutor::~VertexShaderExecutor (void)
1448 {
1449 }
1450
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1451 void VertexShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1452 {
1453 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1454
1455 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1456 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1457 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1458 }
1459
1460 // GeometryShaderExecutor
1461
1462 class GeometryShaderExecutor : public FragmentOutExecutor
1463 {
1464 public:
1465 GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1466 virtual ~GeometryShaderExecutor (void);
1467
1468 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1469
1470 };
1471
GeometryShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1472 GeometryShaderExecutor::GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1473 : FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1474 {
1475 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1476
1477 if (!features.geometryShader)
1478 TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1479 }
1480
~GeometryShaderExecutor(void)1481 GeometryShaderExecutor::~GeometryShaderExecutor (void)
1482 {
1483 }
1484
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1485 void GeometryShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1486 {
1487 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1488
1489 programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1490
1491 programCollection.glslSources.add("geom") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false)) << shaderSpec.buildOptions;
1492 programCollection.glslSources.add("geom_point_size") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true)) << shaderSpec.buildOptions;
1493
1494 /* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1495 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_")) << shaderSpec.buildOptions;
1496
1497 }
1498
1499 // FragmentShaderExecutor
1500
1501 class FragmentShaderExecutor : public FragmentOutExecutor
1502 {
1503 public:
1504 FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1505 virtual ~FragmentShaderExecutor (void);
1506
1507 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1508
1509 };
1510
FragmentShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1511 FragmentShaderExecutor::FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1512 : FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1513 {
1514 }
1515
~FragmentShaderExecutor(void)1516 FragmentShaderExecutor::~FragmentShaderExecutor (void)
1517 {
1518 }
1519
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1520 void FragmentShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1521 {
1522 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1523
1524 programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1525 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1526 programCollection.glslSources.add("frag") << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1527 }
1528
1529 // Shared utilities for compute and tess executors
1530
getVecStd430ByteAlignment(glu::DataType type)1531 static deUint32 getVecStd430ByteAlignment (glu::DataType type)
1532 {
1533 deUint32 baseSize;
1534
1535 switch (glu::getDataTypeScalarType(type))
1536 {
1537 case glu::TYPE_FLOAT16: baseSize = 2u; break;
1538 case glu::TYPE_DOUBLE: baseSize = 8u; break;
1539 default: baseSize = 4u; break;
1540 }
1541
1542 switch (glu::getDataTypeScalarSize(type))
1543 {
1544 case 1: return baseSize;
1545 case 2: return baseSize * 2u;
1546 case 3: // fallthrough.
1547 case 4: return baseSize * 4u;
1548 default:
1549 DE_ASSERT(false);
1550 return 0u;
1551 }
1552 }
1553
1554 class BufferIoExecutor : public ShaderExecutor
1555 {
1556 public:
1557 BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec);
1558 virtual ~BufferIoExecutor (void);
1559
1560 protected:
1561 enum
1562 {
1563 INPUT_BUFFER_BINDING = 0,
1564 OUTPUT_BUFFER_BINDING = 1,
1565 };
1566
1567 void initBuffers (int numValues);
getInputBuffer(void) const1568 VkBuffer getInputBuffer (void) const { return *m_inputBuffer; }
getOutputBuffer(void) const1569 VkBuffer getOutputBuffer (void) const { return *m_outputBuffer; }
getInputStride(void) const1570 deUint32 getInputStride (void) const { return getLayoutStride(m_inputLayout); }
getOutputStride(void) const1571 deUint32 getOutputStride (void) const { return getLayoutStride(m_outputLayout); }
1572
1573 void uploadInputBuffer (const void* const* inputPtrs, int numValues, bool packFloat16Bit);
1574 void readOutputBuffer (void* const* outputPtrs, int numValues);
1575
1576 static void declareBufferBlocks (std::ostream& src, const ShaderSpec& spec);
1577 static void generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
1578
1579 protected:
1580 Move<VkBuffer> m_inputBuffer;
1581 Move<VkBuffer> m_outputBuffer;
1582
1583 private:
1584 struct VarLayout
1585 {
1586 deUint32 offset;
1587 deUint32 stride;
1588 deUint32 matrixStride;
1589
VarLayoutvkt::shaderexecutor::__anon4f1a0b6f0111::BufferIoExecutor::VarLayout1590 VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
1591 };
1592
1593 static void computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
1594 static deUint32 getLayoutStride (const vector<VarLayout>& layout);
1595
1596 static void copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr, bool packFloat16Bit);
1597 static void copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1598
1599 de::MovePtr<Allocation> m_inputAlloc;
1600 de::MovePtr<Allocation> m_outputAlloc;
1601
1602 vector<VarLayout> m_inputLayout;
1603 vector<VarLayout> m_outputLayout;
1604 };
1605
BufferIoExecutor(Context & context,const ShaderSpec & shaderSpec)1606 BufferIoExecutor::BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec)
1607 : ShaderExecutor(context, shaderSpec)
1608 {
1609 computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1610 computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1611 }
1612
~BufferIoExecutor(void)1613 BufferIoExecutor::~BufferIoExecutor (void)
1614 {
1615 }
1616
getLayoutStride(const vector<VarLayout> & layout)1617 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
1618 {
1619 return layout.empty() ? 0 : layout[0].stride;
1620 }
1621
computeVarLayout(const std::vector<Symbol> & symbols,std::vector<VarLayout> * layout)1622 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
1623 {
1624 deUint32 maxAlignment = 0;
1625 deUint32 curOffset = 0;
1626
1627 DE_ASSERT(layout != DE_NULL);
1628 DE_ASSERT(layout->empty());
1629 layout->resize(symbols.size());
1630
1631 for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1632 {
1633 const Symbol& symbol = symbols[varNdx];
1634 const glu::DataType basicType = symbol.varType.getBasicType();
1635 VarLayout& layoutEntry = (*layout)[varNdx];
1636
1637 if (glu::isDataTypeScalarOrVector(basicType))
1638 {
1639 const deUint32 alignment = getVecStd430ByteAlignment(basicType);
1640 const deUint32 size = (deUint32)glu::getDataTypeScalarSize(basicType) * (isDataTypeDoubleType(basicType) ? (int)(sizeof(deUint64)) : (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1641
1642 curOffset = (deUint32)deAlign32((int)curOffset, (int)alignment);
1643 maxAlignment = de::max(maxAlignment, alignment);
1644
1645 layoutEntry.offset = curOffset;
1646 layoutEntry.matrixStride = 0;
1647
1648 curOffset += size;
1649 }
1650 else if (glu::isDataTypeMatrix(basicType))
1651 {
1652 const int numVecs = glu::getDataTypeMatrixNumColumns(basicType);
1653 const glu::DataType vecType = glu::getDataTypeVector(glu::getDataTypeScalarType(basicType), glu::getDataTypeMatrixNumRows(basicType));
1654 const deUint32 vecAlignment = getVecStd430ByteAlignment(vecType);
1655
1656 curOffset = (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
1657 maxAlignment = de::max(maxAlignment, vecAlignment);
1658
1659 layoutEntry.offset = curOffset;
1660 layoutEntry.matrixStride = vecAlignment;
1661
1662 curOffset += vecAlignment*numVecs;
1663 }
1664 else
1665 DE_ASSERT(false);
1666 }
1667
1668 {
1669 const deUint32 totalSize = (deUint32)deAlign32(curOffset, maxAlignment);
1670
1671 for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1672 varIter->stride = totalSize;
1673 }
1674 }
1675
declareBufferBlocks(std::ostream & src,const ShaderSpec & spec)1676 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1677 {
1678 // Input struct
1679 if (!spec.inputs.empty())
1680 {
1681 glu::StructType inputStruct("Inputs");
1682 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1683 inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1684 src << glu::declare(&inputStruct) << ";\n";
1685 }
1686
1687 // Output struct
1688 {
1689 glu::StructType outputStruct("Outputs");
1690 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1691 outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1692 src << glu::declare(&outputStruct) << ";\n";
1693 }
1694
1695 src << "\n";
1696
1697 if (!spec.inputs.empty())
1698 {
1699 src << "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1700 << "{\n"
1701 << " Inputs inputs[];\n"
1702 << "};\n";
1703 }
1704
1705 src << "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1706 << "{\n"
1707 << " Outputs outputs[];\n"
1708 << "};\n"
1709 << "\n";
1710 }
1711
generateExecBufferIo(std::ostream & src,const ShaderSpec & spec,const char * invocationNdxName)1712 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1713 {
1714 std::string tname;
1715 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1716 {
1717 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1718 if (f16BitTest)
1719 {
1720 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1721 }
1722 else
1723 {
1724 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1725 }
1726 src << "\t" << tname << " "<< symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]." << symIter->name << ");\n";
1727 }
1728
1729 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1730 {
1731 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1732 if (f16BitTest)
1733 {
1734 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1735 }
1736 else
1737 {
1738 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1739 }
1740 src << "\t" << tname << " " << symIter->name << ";\n";
1741 if (f16BitTest)
1742 {
1743 const char* ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1744 src << "\t" << ttname << " " << "packed_" << symIter->name << ";\n";
1745 }
1746 }
1747
1748 src << "\n";
1749
1750 {
1751 std::istringstream opSrc (spec.source);
1752 std::string line;
1753
1754 while (std::getline(opSrc, line))
1755 src << "\t" << line << "\n";
1756 }
1757
1758 if (spec.packFloat16Bit)
1759 packFloat16Bit (src, spec.outputs);
1760
1761 src << "\n";
1762 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1763 {
1764 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1765 if(f16BitTest)
1766 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1767 else
1768 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1769 }
1770 }
1771
copyToBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr,bool packFloat16Bit)1772 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr, bool packFloat16Bit)
1773 {
1774 if (varType.isBasicType())
1775 {
1776 const glu::DataType basicType = varType.getBasicType();
1777 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1778 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1779 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1780 const int numComps = scalarSize / numVecs;
1781 const int size = (glu::isDataTypeDoubleType(basicType) ? (int)sizeof(deUint64) : (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1782
1783 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1784 {
1785 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1786 {
1787 const int srcOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1788 const int dstOffset = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1789 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset;
1790 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset;
1791
1792 if (packFloat16Bit)
1793 {
1794 // Convert the float values to 16 bit and store in the lower 16 bits of 32 bit ints.
1795 for (int cmpNdx=0; cmpNdx < numComps; ++cmpNdx)
1796 {
1797 deFloat16 f16vals[2] = {};
1798 f16vals[0] = deFloat32To16Round(((float*)srcPtr)[cmpNdx], DE_ROUNDINGMODE_TO_ZERO);
1799 deMemcpy(dstPtr + cmpNdx * size, &f16vals[0], size);
1800 }
1801 }
1802 else
1803 {
1804 deMemcpy(dstPtr, srcPtr, size * numComps);
1805 }
1806 }
1807 }
1808 }
1809 else
1810 throw tcu::InternalError("Unsupported type");
1811 }
1812
copyFromBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1813 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1814 {
1815 if (varType.isBasicType())
1816 {
1817 const glu::DataType basicType = varType.getBasicType();
1818 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1819 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1820 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1821 const int numComps = scalarSize / numVecs;
1822
1823 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1824 {
1825 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1826 {
1827 const int size = (glu::isDataTypeDoubleType(basicType) ? (int)sizeof(deUint64) : (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1828 const int srcOffset = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1829 const int dstOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1830 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset;
1831 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset;
1832
1833 deMemcpy(dstPtr, srcPtr, size * numComps);
1834 }
1835 }
1836 }
1837 else
1838 throw tcu::InternalError("Unsupported type");
1839 }
1840
uploadInputBuffer(const void * const * inputPtrs,int numValues,bool packFloat16Bit)1841 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues, bool packFloat16Bit)
1842 {
1843 const VkDevice vkDevice = m_context.getDevice();
1844 const DeviceInterface& vk = m_context.getDeviceInterface();
1845
1846 const deUint32 inputStride = getLayoutStride(m_inputLayout);
1847 const int inputBufferSize = inputStride * numValues;
1848
1849 if (inputBufferSize == 0)
1850 return; // No inputs
1851
1852 DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1853 for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1854 {
1855 const glu::VarType& varType = m_shaderSpec.inputs[inputNdx].varType;
1856 const VarLayout& layout = m_inputLayout[inputNdx];
1857
1858 copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr(), packFloat16Bit);
1859 }
1860
1861 flushAlloc(vk, vkDevice, *m_inputAlloc);
1862 }
1863
readOutputBuffer(void * const * outputPtrs,int numValues)1864 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1865 {
1866 const VkDevice vkDevice = m_context.getDevice();
1867 const DeviceInterface& vk = m_context.getDeviceInterface();
1868
1869 DE_ASSERT(numValues > 0); // At least some outputs are required.
1870
1871 invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1872
1873 DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1874 for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1875 {
1876 const glu::VarType& varType = m_shaderSpec.outputs[outputNdx].varType;
1877 const VarLayout& layout = m_outputLayout[outputNdx];
1878
1879 copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1880 }
1881 }
1882
initBuffers(int numValues)1883 void BufferIoExecutor::initBuffers (int numValues)
1884 {
1885 const deUint32 inputStride = getLayoutStride(m_inputLayout);
1886 const deUint32 outputStride = getLayoutStride(m_outputLayout);
1887 // Avoid creating zero-sized buffer/memory
1888 const size_t inputBufferSize = de::max(numValues * inputStride, 1u);
1889 const size_t outputBufferSize = numValues * outputStride;
1890
1891 // Upload data to buffer
1892 const VkDevice vkDevice = m_context.getDevice();
1893 const DeviceInterface& vk = m_context.getDeviceInterface();
1894 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1895 Allocator& memAlloc = m_context.getDefaultAllocator();
1896
1897 const VkBufferCreateInfo inputBufferParams =
1898 {
1899 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1900 DE_NULL, // const void* pNext;
1901 0u, // VkBufferCreateFlags flags;
1902 inputBufferSize, // VkDeviceSize size;
1903 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
1904 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1905 1u, // deUint32 queueFamilyCount;
1906 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
1907 };
1908
1909 m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
1910 m_inputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
1911
1912 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
1913
1914 const VkBufferCreateInfo outputBufferParams =
1915 {
1916 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1917 DE_NULL, // const void* pNext;
1918 0u, // VkBufferCreateFlags flags;
1919 outputBufferSize, // VkDeviceSize size;
1920 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
1921 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1922 1u, // deUint32 queueFamilyCount;
1923 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
1924 };
1925
1926 m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
1927 m_outputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
1928
1929 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
1930 }
1931
1932 // ComputeShaderExecutor
1933
1934 class ComputeShaderExecutor : public BufferIoExecutor
1935 {
1936 public:
1937 ComputeShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1938 virtual ~ComputeShaderExecutor (void);
1939
1940 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1941
1942 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
1943
1944 protected:
1945 static std::string generateComputeShader (const ShaderSpec& spec);
1946
1947 private:
1948 const VkDescriptorSetLayout m_extraResourcesLayout;
1949 };
1950
ComputeShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1951 ComputeShaderExecutor::ComputeShaderExecutor(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1952 : BufferIoExecutor (context, shaderSpec)
1953 , m_extraResourcesLayout (extraResourcesLayout)
1954 {
1955 }
1956
~ComputeShaderExecutor(void)1957 ComputeShaderExecutor::~ComputeShaderExecutor (void)
1958 {
1959 }
1960
getTypeSpirv(const glu::DataType type,const bool packFloat16Bit=false)1961 std::string getTypeSpirv(const glu::DataType type, const bool packFloat16Bit = false)
1962 {
1963 switch(type)
1964 {
1965 case glu::TYPE_FLOAT16:
1966 return "%f16";
1967 case glu::TYPE_FLOAT16_VEC2:
1968 return "%v2f16";
1969 case glu::TYPE_FLOAT16_VEC3:
1970 return "%v3f16";
1971 case glu::TYPE_FLOAT16_VEC4:
1972 return "%v4f16";
1973 case glu::TYPE_FLOAT:
1974 return packFloat16Bit ? "%u32" : "%f32"; // f16 values will be bitcast from ui32.
1975 case glu::TYPE_FLOAT_VEC2:
1976 return packFloat16Bit ? "%v2u32" : "%v2f32"; // f16 values will be bitcast from ui32.
1977 case glu::TYPE_FLOAT_VEC3:
1978 return packFloat16Bit ? "%v3u32" : "%v3f32"; // f16 values will be bitcast from ui32.
1979 case glu::TYPE_FLOAT_VEC4:
1980 return packFloat16Bit ? "%v4u32" : "%v4f32"; // f16 values will be bitcast from ui32.
1981 case glu::TYPE_INT:
1982 return "%i32";
1983 case glu::TYPE_INT_VEC2:
1984 return "%v2i32";
1985 case glu::TYPE_INT_VEC3:
1986 return "%v3i32";
1987 case glu::TYPE_INT_VEC4:
1988 return "%v4i32";
1989 case glu::TYPE_DOUBLE:
1990 return "%f64";
1991 case glu::TYPE_DOUBLE_VEC2:
1992 return "%v2f64";
1993 case glu::TYPE_DOUBLE_VEC3:
1994 return "%v3f64";
1995 case glu::TYPE_DOUBLE_VEC4:
1996 return "%v4f64";
1997 default:
1998 DE_ASSERT(0);
1999 return "";
2000 }
2001 }
2002
moveBitOperation(std::string variableName,const int operationNdx)2003 std::string moveBitOperation (std::string variableName, const int operationNdx)
2004 {
2005 std::ostringstream src;
2006 src << "\n"
2007 << "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
2008 << "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_"<< operationNdx <<" %c_i32_1\n"
2009 << "OpStore " << variableName << " %move1_" << operationNdx << "\n";
2010 return src.str();
2011 }
2012
scalarComparison(const std::string operation,const int operationNdx,const glu::DataType type,const std::string & outputType,const int scalarSize)2013 std::string scalarComparison(const std::string operation, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
2014 {
2015 std::ostringstream src;
2016 std::string boolType;
2017
2018 switch (type)
2019 {
2020 case glu::TYPE_FLOAT16:
2021 case glu::TYPE_FLOAT:
2022 case glu::TYPE_DOUBLE:
2023 src << "\n"
2024 << "%operation_result_" << operationNdx << " = " << operation << " %bool %in0_val %in1_val\n"
2025 << "OpSelectionMerge %IF_" << operationNdx << " None\n"
2026 << "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_" << operationNdx << "\n"
2027 << "%label_IF_" << operationNdx << " = OpLabel\n"
2028 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
2029 << "%out_val_" << operationNdx << " = OpLoad %i32 %out0\n"
2030 << "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_" << operationNdx << "\n"
2031 << "OpStore %out0 %add_if_" << operationNdx << "\n"
2032 << "OpBranch %IF_" << operationNdx << "\n"
2033 << "%IF_" << operationNdx << " = OpLabel\n";
2034 return src.str();
2035 case glu::TYPE_FLOAT16_VEC2:
2036 case glu::TYPE_FLOAT_VEC2:
2037 case glu::TYPE_DOUBLE_VEC2:
2038 boolType = "%v2bool";
2039 break;
2040 case glu::TYPE_FLOAT16_VEC3:
2041 case glu::TYPE_FLOAT_VEC3:
2042 case glu::TYPE_DOUBLE_VEC3:
2043 boolType = "%v3bool";
2044 break;
2045 case glu::TYPE_FLOAT16_VEC4:
2046 case glu::TYPE_FLOAT_VEC4:
2047 case glu::TYPE_DOUBLE_VEC4:
2048 boolType = "%v4bool";
2049 break;
2050 default:
2051 DE_ASSERT(0);
2052 return "";
2053 }
2054
2055 src << "\n"
2056 << "%operation_result_" << operationNdx << " = " << operation << " " << boolType << " %in0_val %in1_val\n"
2057 << "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2058 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2059
2060 src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2061 for(int ndx = 0; ndx < scalarSize; ++ndx)
2062 src << " %operation_val_" << operationNdx;
2063 src << "\n";
2064
2065 src << "%toAdd" << operationNdx << " = OpIMul "<< outputType << " %ivec_result_" << operationNdx << " %operation_vec_" << operationNdx <<"\n"
2066 << "%out_val_" << operationNdx << " = OpLoad "<< outputType << " %out0\n"
2067
2068 << "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd" << operationNdx << "\n"
2069 << "OpStore %out0 %add_if_" << operationNdx << "\n";
2070
2071 return src.str();
2072 }
2073
generateSpirv(const ShaderSpec & spec,const bool are16Bit,const bool are64Bit,const bool isMediump)2074 std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const bool are64Bit, const bool isMediump)
2075 {
2076 static const std::string COMPARE_OPERATIONS[] =
2077 {
2078 "OpFOrdEqual",
2079 "OpFOrdGreaterThan",
2080 "OpFOrdLessThan",
2081 "OpFOrdGreaterThanEqual",
2082 "OpFOrdLessThanEqual",
2083 "OpFUnordEqual",
2084 "OpFUnordGreaterThan",
2085 "OpFUnordLessThan",
2086 "OpFUnordGreaterThanEqual",
2087 "OpFUnordLessThanEqual"
2088 };
2089
2090 int moveBitNdx = 0;
2091 vector<std::string> inputTypes;
2092 vector<std::string> outputTypes;
2093 const std::string packType = spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2094
2095 vector<bool> floatResult;
2096 for (const auto& symbol : spec.outputs)
2097 floatResult.push_back(glu::isDataTypeFloatType(symbol.varType.getBasicType()));
2098
2099 const bool anyFloatResult = std::any_of(begin(floatResult), end(floatResult), [](bool b) { return b; });
2100
2101 vector<bool> packFloatRes;
2102 for (const auto& floatRes : floatResult)
2103 packFloatRes.push_back(floatRes && spec.packFloat16Bit);
2104
2105 const bool useF32Types = (!are16Bit && !are64Bit);
2106 const bool useF64Types = are64Bit;
2107 const bool useF16Types = (spec.packFloat16Bit || are16Bit);
2108
2109 for (const auto& symbol : spec.inputs)
2110 inputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2111
2112 for (const auto& symbol : spec.outputs)
2113 outputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2114
2115 DE_ASSERT(!inputTypes.empty());
2116 DE_ASSERT(!outputTypes.empty());
2117
2118 // Assert input and output types match the expected operations.
2119 switch (spec.spirvCase)
2120 {
2121 case SPIRV_CASETYPE_COMPARE:
2122 case SPIRV_CASETYPE_FREM:
2123 DE_ASSERT(inputTypes.size() == 2);
2124 DE_ASSERT(outputTypes.size() == 1);
2125 break;
2126 case SPIRV_CASETYPE_MODFSTRUCT:
2127 case SPIRV_CASETYPE_FREXPSTRUCT:
2128 DE_ASSERT(inputTypes.size() == 1);
2129 DE_ASSERT(outputTypes.size() == 2);
2130 break;
2131 default:
2132 DE_ASSERT(false);
2133 break;
2134 }
2135
2136 std::ostringstream src;
2137 src << "; SPIR-V\n"
2138 "; Version: 1.0\n"
2139 "; Generator: Khronos Glslang Reference Front End; 4\n"
2140 "; Bound: 114\n"
2141 "; Schema: 0\n"
2142 "OpCapability Shader\n";
2143
2144 if (useF16Types)
2145 src << "OpCapability Float16\n";
2146
2147 if (are16Bit)
2148 src << "OpCapability StorageBuffer16BitAccess\n"
2149 "OpCapability UniformAndStorageBuffer16BitAccess\n";
2150
2151 if (useF64Types)
2152 src << "OpCapability Float64\n";
2153
2154 if (are16Bit)
2155 src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2156
2157 src << "%glslstd450 = OpExtInstImport \"GLSL.std.450\"\n"
2158 "OpMemoryModel Logical GLSL450\n"
2159 "OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2160 "OpExecutionMode %BP_main LocalSize 1 1 1\n"
2161 "OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2162 "OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2163
2164 // Input offsets and stride.
2165 {
2166 int offset = 0;
2167 int ndx = 0;
2168 int largest = 0;
2169 for (const auto& symbol : spec.inputs)
2170 {
2171 const int scalarSize = symbol.varType.getScalarSize();
2172 const int memberSize = (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeDoubleType(symbol.varType.getBasicType()) ? (int)sizeof(deUint64) : (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
2173 const int extraMemberBytes = (offset % memberSize);
2174
2175 offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2176 src << "OpMemberDecorate %SSB0_IN "<< ndx <<" Offset " << offset << "\n";
2177 ++ndx;
2178
2179 if (memberSize > largest)
2180 largest = memberSize;
2181
2182 offset += memberSize;
2183 }
2184 DE_ASSERT(largest > 0);
2185 const int extraBytes = (offset % largest);
2186 const int stride = offset + (extraBytes == 0 ? 0 : (largest - extraBytes));
2187 src << "OpDecorate %up_SSB0_IN ArrayStride "<< stride << "\n";
2188 }
2189
2190 src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2191 "OpDecorate %ssboIN BufferBlock\n"
2192 "OpDecorate %ssbo_src DescriptorSet 0\n"
2193 "OpDecorate %ssbo_src Binding 0\n"
2194 "\n";
2195
2196 if (isMediump)
2197 {
2198 for (size_t i = 0; i < inputTypes.size(); ++i)
2199 {
2200 src <<
2201 "OpMemberDecorate %SSB0_IN " << i << " RelaxedPrecision\n"
2202 "OpDecorate %in" << i << " RelaxedPrecision\n"
2203 "OpDecorate %src_val_0_" << i << " RelaxedPrecision\n"
2204 "OpDecorate %in" << i << "_val RelaxedPrecision\n"
2205 ;
2206 }
2207
2208 if (anyFloatResult)
2209 {
2210 switch (spec.spirvCase)
2211 {
2212 case SPIRV_CASETYPE_FREM:
2213 src << "OpDecorate %frem_result RelaxedPrecision\n";
2214 break;
2215 case SPIRV_CASETYPE_MODFSTRUCT:
2216 src << "OpDecorate %modfstruct_result RelaxedPrecision\n";
2217 break;
2218 case SPIRV_CASETYPE_FREXPSTRUCT:
2219 src << "OpDecorate %frexpstruct_result RelaxedPrecision\n";
2220 break;
2221 default:
2222 DE_ASSERT(false);
2223 break;
2224 }
2225
2226 for (size_t i = 0; i < outputTypes.size(); ++i)
2227 {
2228 src << "OpMemberDecorate %SSB0_OUT " << i << " RelaxedPrecision\n";
2229 src << "OpDecorate %out_val_final_" << i << " RelaxedPrecision\n";
2230 src << "OpDecorate %out" << i << " RelaxedPrecision\n";
2231 }
2232 }
2233 }
2234
2235 // Output offsets and stride.
2236 {
2237 int offset = 0;
2238 int ndx = 0;
2239 int largest = 0;
2240 for (const auto& symbol : spec.outputs)
2241 {
2242 const int scalarSize = symbol.varType.getScalarSize();
2243 const int memberSize = (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeDoubleType(symbol.varType.getBasicType()) ? (int)sizeof(deUint64) : (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
2244 const int extraMemberBytes = (offset % memberSize);
2245
2246 offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2247 src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2248 ++ndx;
2249
2250 if (memberSize > largest)
2251 largest = memberSize;
2252
2253 offset += memberSize;
2254 }
2255 DE_ASSERT(largest > 0);
2256 const int extraBytes = (offset % largest);
2257 const int stride = offset + ((extraBytes == 0) ? 0 : (largest - extraBytes));
2258 src << "OpDecorate %up_SSB0_OUT ArrayStride " << stride << "\n";
2259 }
2260
2261 src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2262 "OpDecorate %ssboOUT BufferBlock\n"
2263 "OpDecorate %ssbo_dst DescriptorSet 0\n"
2264 "OpDecorate %ssbo_dst Binding 1\n"
2265 "\n"
2266 "%void = OpTypeVoid\n"
2267 "%bool = OpTypeBool\n"
2268 "%v2bool = OpTypeVector %bool 2\n"
2269 "%v3bool = OpTypeVector %bool 3\n"
2270 "%v4bool = OpTypeVector %bool 4\n"
2271 "%u32 = OpTypeInt 32 0\n";
2272
2273 if (useF32Types)
2274 src << "%f32 = OpTypeFloat 32\n"
2275 "%v2f32 = OpTypeVector %f32 2\n"
2276 "%v3f32 = OpTypeVector %f32 3\n"
2277 "%v4f32 = OpTypeVector %f32 4\n";
2278
2279 if (useF64Types)
2280 src << "%f64 = OpTypeFloat 64\n"
2281 "%v2f64 = OpTypeVector %f64 2\n"
2282 "%v3f64 = OpTypeVector %f64 3\n"
2283 "%v4f64 = OpTypeVector %f64 4\n";
2284
2285 if (useF16Types)
2286 src << "%f16 = OpTypeFloat 16\n"
2287 "%v2f16 = OpTypeVector %f16 2\n"
2288 "%v3f16 = OpTypeVector %f16 3\n"
2289 "%v4f16 = OpTypeVector %f16 4\n";
2290
2291 src << "%i32 = OpTypeInt 32 1\n"
2292 "%v2i32 = OpTypeVector %i32 2\n"
2293 "%v3i32 = OpTypeVector %i32 3\n"
2294 "%v4i32 = OpTypeVector %i32 4\n"
2295 "%v2u32 = OpTypeVector %u32 2\n"
2296 "%v3u32 = OpTypeVector %u32 3\n"
2297 "%v4u32 = OpTypeVector %u32 4\n"
2298 "\n"
2299 "%ip_u32 = OpTypePointer Input %u32\n"
2300 "%ip_v3u32 = OpTypePointer Input %v3u32\n"
2301 "%up_float = OpTypePointer Uniform " << inputTypes[0] << "\n"
2302 "\n"
2303 "%fp_operation = OpTypePointer Function %i32\n"
2304 "%voidf = OpTypeFunction %void\n"
2305 "%fp_u32 = OpTypePointer Function %u32\n"
2306 "%fp_it1 = OpTypePointer Function " << inputTypes[0] << "\n"
2307 ;
2308
2309 for (size_t i = 0; i < outputTypes.size(); ++i)
2310 {
2311 src << "%fp_out_" << i << " = OpTypePointer Function " << outputTypes[i] << "\n"
2312 << "%up_out_" << i << " = OpTypePointer Uniform " << outputTypes[i] << "\n";
2313 }
2314
2315 if (spec.packFloat16Bit)
2316 src << "%fp_f16 = OpTypePointer Function " << packType << "\n";
2317
2318 src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2319 "%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2320 "\n"
2321 "%c_u32_0 = OpConstant %u32 0\n"
2322 "%c_u32_1 = OpConstant %u32 1\n"
2323 "%c_u32_2 = OpConstant %u32 2\n"
2324 "%c_i32_0 = OpConstant %i32 0\n"
2325 "%c_i32_1 = OpConstant %i32 1\n"
2326 "\n";
2327
2328 if (useF32Types)
2329 src <<
2330 "%c_f32_0 = OpConstant %f32 0\n"
2331 "%c_f32_1 = OpConstant %f32 1\n"
2332 ;
2333
2334 if (useF16Types)
2335 src <<
2336 "%c_f16_0 = OpConstant %f16 0\n"
2337 "%c_f16_1 = OpConstant %f16 1\n"
2338 "%c_f16_minus1 = OpConstant %f16 -0x1p+0"
2339 ;
2340
2341 if (useF64Types)
2342 src <<
2343 "%c_f64_0 = OpConstant %f64 0\n"
2344 "%c_f64_1 = OpConstant %f64 1\n"
2345 ;
2346
2347 src << "\n"
2348 "%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2349 "%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2350 "%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2351 "%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2352 "%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2353 "%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2354 "\n";
2355
2356 if (useF32Types)
2357 src <<
2358 "%c_v2f32_0 = OpConstantComposite %v2f32 %c_f32_0 %c_f32_0\n"
2359 "%c_v2f32_1 = OpConstantComposite %v2f32 %c_f32_1 %c_f32_1\n"
2360 "%c_v3f32_0 = OpConstantComposite %v3f32 %c_f32_0 %c_f32_0 %c_f32_0\n"
2361 "%c_v3f32_1 = OpConstantComposite %v3f32 %c_f32_1 %c_f32_1 %c_f32_1\n"
2362 "%c_v4f32_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_0\n"
2363 "%c_v4f32_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
2364 ;
2365
2366 if (useF16Types)
2367 src <<
2368 "%c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
2369 "%c_v2f16_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
2370 "%c_v3f16_0 = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0 %c_f16_0\n"
2371 "%c_v3f16_1 = OpConstantComposite %v3f16 %c_f16_1 %c_f16_1 %c_f16_1\n"
2372 "%c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
2373 "%c_v4f16_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
2374 ;
2375
2376 if (useF64Types)
2377 src <<
2378 "%c_v2f64_0 = OpConstantComposite %v2f64 %c_f64_0 %c_f64_0\n"
2379 "%c_v2f64_1 = OpConstantComposite %v2f64 %c_f64_1 %c_f64_1\n"
2380 "%c_v3f64_0 = OpConstantComposite %v3f64 %c_f64_0 %c_f64_0 %c_f64_0\n"
2381 "%c_v3f64_1 = OpConstantComposite %v3f64 %c_f64_1 %c_f64_1 %c_f64_1\n"
2382 "%c_v4f64_0 = OpConstantComposite %v4f64 %c_f64_0 %c_f64_0 %c_f64_0 %c_f64_0\n"
2383 "%c_v4f64_1 = OpConstantComposite %v4f64 %c_f64_1 %c_f64_1 %c_f64_1 %c_f64_1\n"
2384 "\n";
2385
2386 // Input struct.
2387 {
2388 src << "%SSB0_IN = OpTypeStruct";
2389 for (const auto& t : inputTypes)
2390 src << " " << t;
2391 src << "\n";
2392 }
2393
2394 src <<
2395 "%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2396 "%ssboIN = OpTypeStruct %up_SSB0_IN\n"
2397 "%up_ssboIN = OpTypePointer Uniform %ssboIN\n"
2398 "%ssbo_src = OpVariable %up_ssboIN Uniform\n"
2399 "\n";
2400
2401 // Output struct.
2402 {
2403 src << "%SSB0_OUT = OpTypeStruct";
2404 for (const auto& t : outputTypes)
2405 src << " " << t;
2406 src << "\n";
2407 }
2408
2409 std::string modfStructMemberType;
2410 std::string frexpStructFirstMemberType;
2411 if (spec.spirvCase == SPIRV_CASETYPE_MODFSTRUCT)
2412 {
2413 modfStructMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2414 src << "%modfstruct_ret_t = OpTypeStruct " << modfStructMemberType << " " << modfStructMemberType << "\n";
2415 }
2416 else if (spec.spirvCase == SPIRV_CASETYPE_FREXPSTRUCT)
2417 {
2418 frexpStructFirstMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2419 src << "%frexpstruct_ret_t = OpTypeStruct " << frexpStructFirstMemberType << " " << outputTypes[1] << "\n";
2420 }
2421
2422 src <<
2423 "%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2424 "%ssboOUT = OpTypeStruct %up_SSB0_OUT\n"
2425 "%up_ssboOUT = OpTypePointer Uniform %ssboOUT\n"
2426 "%ssbo_dst = OpVariable %up_ssboOUT Uniform\n"
2427 "\n"
2428 "%BP_main = OpFunction %void None %voidf\n"
2429 "%BP_label = OpLabel\n"
2430 "%invocationNdx = OpVariable %fp_u32 Function\n";
2431
2432 // Note: here we are supposing all inputs have the same type.
2433 for (size_t i = 0; i < inputTypes.size(); ++i)
2434 src << "%in" << i << " = OpVariable " << (spec.packFloat16Bit ? "%fp_f16" : "%fp_it1") << " Function\n";
2435
2436 for (size_t i = 0; i < outputTypes.size(); ++i)
2437 src << "%out" << i << " = OpVariable " << (packFloatRes[i] ? std::string("%fp_f16") : std::string("%fp_out_") + de::toString(i)) << " Function\n";
2438
2439 src << "%operation = OpVariable %fp_operation Function\n"
2440 "%BP_id_0_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2441 "%BP_id_1_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2442 "%BP_id_2_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2443 "%BP_num_0_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2444 "%BP_num_1_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2445 "%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2446 "%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2447 "%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2448 "%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2449 "%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2450 "\n"
2451 "%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2452 "%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2453 "%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2454 "%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2455 "%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2456 "OpStore %invocationNdx %add_2\n"
2457 "%invocationNdx_val = OpLoad %u32 %invocationNdx\n";
2458
2459 // Load input values.
2460 for (size_t inputNdx = 0; inputNdx < inputTypes.size(); ++inputNdx)
2461 {
2462 src << "\n"
2463 << "%src_ptr_0_" << inputNdx << " = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_" << inputNdx << "\n"
2464 << "%src_val_0_" << inputNdx << " = OpLoad " << inputTypes[inputNdx] << " %src_ptr_0_" << inputNdx << "\n";
2465
2466 if (spec.packFloat16Bit)
2467 {
2468 if (spec.inputs[inputNdx].varType.getScalarSize() > 1)
2469 {
2470 // Extract the val<inputNdx> u32 input channels into individual f16 values.
2471 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2472 {
2473 src << "%src_val_0_" << inputNdx << "_" << i << " = OpCompositeExtract %u32 %src_val_0_" << inputNdx << " " << i << "\n"
2474 "%val_v2f16_0_" << inputNdx << "_" << i << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "_" << i << "\n"
2475 "%val_f16_0_" << inputNdx << "_" << i << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << "_" << i << " 0\n";
2476 }
2477
2478 // Construct the input vector.
2479 src << "%val_f16_0_" << inputNdx << " = OpCompositeConstruct " << packType;
2480 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2481 {
2482 src << " %val_f16_0_" << inputNdx << "_" << i;
2483 }
2484
2485 src << "\n";
2486 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2487 }
2488 else
2489 {
2490 src << "%val_v2f16_0_" << inputNdx << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "\n"
2491 "%val_f16_0_" << inputNdx << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << " 0\n";
2492
2493 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2494 }
2495 }
2496 else
2497 src << "OpStore %in" << inputNdx << " %src_val_0_" << inputNdx << "\n";
2498
2499 src << "%in" << inputNdx << "_val = OpLoad " << (spec.packFloat16Bit ? packType : inputTypes[inputNdx]) << " %in" << inputNdx << "\n";
2500 }
2501
2502 src << "\n"
2503 "OpStore %operation %c_i32_1\n";
2504
2505 // Fill output values with dummy data.
2506 for (size_t i = 0; i < outputTypes.size(); ++i)
2507 src << "OpStore %out" << i << " %c_" << (packFloatRes[i] ? &packType[1] : &outputTypes[i][1]) << "_0\n";
2508
2509 src << "\n";
2510
2511 // Run operation.
2512 switch (spec.spirvCase)
2513 {
2514 case SPIRV_CASETYPE_COMPARE:
2515 for (int operationNdx = 0; operationNdx < DE_LENGTH_OF_ARRAY(COMPARE_OPERATIONS); ++operationNdx)
2516 {
2517 src << scalarComparison (COMPARE_OPERATIONS[operationNdx], operationNdx,
2518 spec.inputs[0].varType.getBasicType(),
2519 outputTypes[0],
2520 spec.outputs[0].varType.getScalarSize());
2521 src << moveBitOperation("%operation", moveBitNdx);
2522 ++moveBitNdx;
2523 }
2524 break;
2525 case SPIRV_CASETYPE_FREM:
2526 src << "%frem_result = OpFRem " << (packFloatRes[0] ? packType : outputTypes[0]) << " %in0_val %in1_val\n"
2527 << "OpStore %out0 %frem_result\n";
2528 break;
2529 case SPIRV_CASETYPE_MODFSTRUCT:
2530 src << "%modfstruct_result = OpExtInst %modfstruct_ret_t %glslstd450 ModfStruct %in0_val\n"
2531 << "%modfstruct_result_0 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 0\n"
2532 << "%modfstruct_result_1 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 1\n"
2533 << "OpStore %out0 %modfstruct_result_0\n"
2534 << "OpStore %out1 %modfstruct_result_1\n";
2535 break;
2536 case SPIRV_CASETYPE_FREXPSTRUCT:
2537 src << "%frexpstruct_result = OpExtInst %frexpstruct_ret_t %glslstd450 FrexpStruct %in0_val\n"
2538 << "%frexpstruct_result_0 = OpCompositeExtract " << frexpStructFirstMemberType << " %frexpstruct_result 0\n"
2539 << "%frexpstruct_result_1 = OpCompositeExtract " << outputTypes[1] << " %frexpstruct_result 1\n"
2540 << "OpStore %out0 %frexpstruct_result_0\n"
2541 << "OpStore %out1 %frexpstruct_result_1\n";
2542 break;
2543 default:
2544 DE_ASSERT(false);
2545 break;
2546 }
2547
2548 for (size_t outputNdx = 0; outputNdx < outputTypes.size(); ++outputNdx)
2549 {
2550 src << "\n"
2551 "%out_val_final_" << outputNdx << " = OpLoad " << (packFloatRes[outputNdx] ? packType : outputTypes[outputNdx]) << " %out" << outputNdx << "\n"
2552 "%ssbo_dst_ptr_" << outputNdx << " = OpAccessChain %up_out_" << outputNdx << " %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_" << outputNdx << "\n";
2553
2554 if (packFloatRes[outputNdx])
2555 {
2556 if (spec.outputs[outputNdx].varType.getScalarSize() > 1)
2557 {
2558 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2559 {
2560 src << "%out_val_final_" << outputNdx << "_" << i << " = OpCompositeExtract %f16 %out_val_final_" << outputNdx << " " << i << "\n";
2561 src << "%out_composite_" << outputNdx << "_" << i << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << "_" << i << " %c_f16_minus1\n";
2562 src << "%u32_val_" << outputNdx << "_" << i << " = OpBitcast %u32 %out_composite_" << outputNdx << "_" << i << "\n";
2563 }
2564
2565 src << "%u32_final_val_" << outputNdx << " = OpCompositeConstruct " << outputTypes[outputNdx];
2566 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2567 src << " %u32_val_" << outputNdx << "_" << i;
2568 src << "\n";
2569 src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %u32_final_val_" << outputNdx << "\n";
2570 }
2571 else
2572 {
2573 src <<
2574 "%out_composite_" << outputNdx << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << " %c_f16_minus1\n"
2575 "%out_result_" << outputNdx << " = OpBitcast " << outputTypes[outputNdx] << " %out_composite_" << outputNdx << "\n"
2576 "OpStore %ssbo_dst_ptr_" << outputNdx << " %out_result_" << outputNdx << "\n";
2577 }
2578 }
2579 else
2580 {
2581 src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %out_val_final_" << outputNdx << "\n";
2582 }
2583 }
2584
2585 src << "\n"
2586 "OpReturn\n"
2587 "OpFunctionEnd\n";
2588
2589 return src.str();
2590 }
2591
2592
generateComputeShader(const ShaderSpec & spec)2593 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
2594 {
2595 if (spec.spirvCase != SPIRV_CASETYPE_NONE)
2596 {
2597 bool are16Bit = false;
2598 bool are64Bit = false;
2599 bool isMediump = false;
2600 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2601 {
2602 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2603 are16Bit = true;
2604
2605 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
2606 are64Bit = true;
2607
2608 if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2609 isMediump = true;
2610
2611 if (isMediump && are16Bit)
2612 break;
2613 }
2614
2615 return generateSpirv(spec, are16Bit, are64Bit, isMediump);
2616 }
2617 else
2618 {
2619 std::ostringstream src;
2620 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2621
2622 if (!spec.globalDeclarations.empty())
2623 src << spec.globalDeclarations << "\n";
2624
2625 src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2626 << "\n";
2627
2628 declareBufferBlocks(src, spec);
2629
2630 src << "void main (void)\n"
2631 << "{\n"
2632 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2633 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2634
2635 generateExecBufferIo(src, spec, "invocationNdx");
2636
2637 src << "}\n";
2638
2639 return src.str();
2640 }
2641 }
2642
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2643 void ComputeShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2644 {
2645 if (shaderSpec.spirvCase != SPIRV_CASETYPE_NONE)
2646 programCollection.spirvAsmSources.add("compute") << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3) << generateComputeShader(shaderSpec);
2647 else
2648 programCollection.glslSources.add("compute") << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2649 }
2650
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2651 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2652 {
2653 const VkDevice vkDevice = m_context.getDevice();
2654 const DeviceInterface& vk = m_context.getDeviceInterface();
2655 const VkQueue queue = m_context.getUniversalQueue();
2656 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2657
2658 DescriptorPoolBuilder descriptorPoolBuilder;
2659 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
2660
2661 Move<VkShaderModule> computeShaderModule;
2662 Move<VkPipeline> computePipeline;
2663 Move<VkPipelineLayout> pipelineLayout;
2664 Move<VkCommandPool> cmdPool;
2665 Move<VkDescriptorPool> descriptorPool;
2666 Move<VkDescriptorSetLayout> descriptorSetLayout;
2667 Move<VkDescriptorSet> descriptorSet;
2668 const deUint32 numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
2669
2670 DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2671
2672 initBuffers(numValues);
2673
2674 // Setup input buffer & copy data
2675 // For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
2676 // storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
2677 // the shader.
2678 uploadInputBuffer(inputs, numValues, m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
2679
2680 // Create command pool
2681 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2682
2683 // Create command buffer
2684
2685 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2686 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2687 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2688 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2689
2690 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2691 descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2692
2693 const VkDescriptorSetAllocateInfo allocInfo =
2694 {
2695 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2696 DE_NULL,
2697 *descriptorPool,
2698 1u,
2699 &*descriptorSetLayout
2700 };
2701
2702 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2703
2704 // Create pipeline layout
2705 {
2706 const VkDescriptorSetLayout descriptorSetLayouts[] =
2707 {
2708 *descriptorSetLayout,
2709 m_extraResourcesLayout
2710 };
2711 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
2712 {
2713 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2714 DE_NULL, // const void* pNext;
2715 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
2716 numDescriptorSets, // deUint32 CdescriptorSetCount;
2717 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
2718 0u, // deUint32 pushConstantRangeCount;
2719 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
2720 };
2721
2722 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2723 }
2724
2725 // Create shaders
2726 {
2727 computeShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2728 }
2729
2730 // create pipeline
2731 {
2732 const VkPipelineShaderStageCreateInfo shaderStageParams[1] =
2733 {
2734 {
2735 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
2736 DE_NULL, // const void* pNext;
2737 (VkPipelineShaderStageCreateFlags)0u, // VkPipelineShaderStageCreateFlags flags;
2738 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagsBit stage;
2739 *computeShaderModule, // VkShaderModule shader;
2740 "main", // const char* pName;
2741 DE_NULL // const VkSpecializationInfo* pSpecializationInfo;
2742 }
2743 };
2744
2745 const VkComputePipelineCreateInfo computePipelineParams =
2746 {
2747 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
2748 DE_NULL, // const void* pNext;
2749 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
2750 *shaderStageParams, // VkPipelineShaderStageCreateInfo cs;
2751 *pipelineLayout, // VkPipelineLayout layout;
2752 0u, // VkPipeline basePipelineHandle;
2753 0u, // int32_t basePipelineIndex;
2754 };
2755
2756 computePipeline = createComputePipeline(vk, vkDevice, DE_NULL, &computePipelineParams);
2757 }
2758
2759 const int maxValuesPerInvocation = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2760 int curOffset = 0;
2761 const deUint32 inputStride = getInputStride();
2762 const deUint32 outputStride = getOutputStride();
2763
2764 while (curOffset < numValues)
2765 {
2766 Move<VkCommandBuffer> cmdBuffer;
2767 const int numToExec = de::min(maxValuesPerInvocation, numValues-curOffset);
2768
2769 // Update descriptors
2770 {
2771 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2772
2773 const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2774 {
2775 *m_outputBuffer, // VkBuffer buffer;
2776 curOffset * outputStride, // VkDeviceSize offset;
2777 numToExec * outputStride // VkDeviceSize range;
2778 };
2779
2780 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2781
2782 if (inputStride)
2783 {
2784 const VkDescriptorBufferInfo inputDescriptorBufferInfo =
2785 {
2786 *m_inputBuffer, // VkBuffer buffer;
2787 curOffset * inputStride, // VkDeviceSize offset;
2788 numToExec * inputStride // VkDeviceSize range;
2789 };
2790
2791 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2792 }
2793
2794 descriptorSetUpdateBuilder.update(vk, vkDevice);
2795 }
2796
2797 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2798 beginCommandBuffer(vk, *cmdBuffer);
2799 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2800
2801 {
2802 const VkDescriptorSet descriptorSets[] = { *descriptorSet, extraResources };
2803 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2804 }
2805
2806 vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2807
2808 // Insert a barrier so data written by the shader is available to the host
2809 {
2810 const VkBufferMemoryBarrier bufferBarrier =
2811 {
2812 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
2813 DE_NULL, // const void* pNext;
2814 VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
2815 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
2816 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
2817 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
2818 *m_outputBuffer, // VkBuffer buffer;
2819 0, // VkDeviceSize offset;
2820 VK_WHOLE_SIZE, // VkDeviceSize size;
2821 };
2822
2823 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
2824 0, (const VkMemoryBarrier*)DE_NULL,
2825 1, &bufferBarrier,
2826 0, (const VkImageMemoryBarrier*)DE_NULL);
2827 }
2828
2829 endCommandBuffer(vk, *cmdBuffer);
2830
2831 curOffset += numToExec;
2832
2833 // Execute
2834 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2835 }
2836
2837 // Read back data
2838 readOutputBuffer(outputs, numValues);
2839 }
2840
2841 // Tessellation utils
2842
generateVertexShaderForTess(void)2843 static std::string generateVertexShaderForTess (void)
2844 {
2845 std::ostringstream src;
2846 src << "#version 450\n"
2847 << "void main (void)\n{\n"
2848 << " gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
2849 << "}\n";
2850
2851 return src.str();
2852 }
2853
2854 class TessellationExecutor : public BufferIoExecutor
2855 {
2856 public:
2857 TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2858 virtual ~TessellationExecutor (void);
2859
2860 void renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources);
2861
2862 private:
2863 const VkDescriptorSetLayout m_extraResourcesLayout;
2864 };
2865
TessellationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2866 TessellationExecutor::TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2867 : BufferIoExecutor (context, shaderSpec)
2868 , m_extraResourcesLayout (extraResourcesLayout)
2869 {
2870 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
2871
2872 if (!features.tessellationShader)
2873 TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
2874 }
2875
~TessellationExecutor(void)2876 TessellationExecutor::~TessellationExecutor (void)
2877 {
2878 }
2879
renderTess(deUint32 numValues,deUint32 vertexCount,deUint32 patchControlPoints,VkDescriptorSet extraResources)2880 void TessellationExecutor::renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources)
2881 {
2882 const size_t inputBufferSize = numValues * getInputStride();
2883 const VkDevice vkDevice = m_context.getDevice();
2884 const DeviceInterface& vk = m_context.getDeviceInterface();
2885 const VkQueue queue = m_context.getUniversalQueue();
2886 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2887 Allocator& memAlloc = m_context.getDefaultAllocator();
2888
2889 const tcu::UVec2 renderSize (DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
2890
2891 Move<VkImage> colorImage;
2892 de::MovePtr<Allocation> colorImageAlloc;
2893 VkFormat colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
2894 Move<VkImageView> colorImageView;
2895
2896 Move<VkRenderPass> renderPass;
2897 Move<VkFramebuffer> framebuffer;
2898 Move<VkPipelineLayout> pipelineLayout;
2899 Move<VkPipeline> graphicsPipeline;
2900
2901 Move<VkShaderModule> vertexShaderModule;
2902 Move<VkShaderModule> tessControlShaderModule;
2903 Move<VkShaderModule> tessEvalShaderModule;
2904 Move<VkShaderModule> fragmentShaderModule;
2905
2906 Move<VkCommandPool> cmdPool;
2907 Move<VkCommandBuffer> cmdBuffer;
2908
2909 Move<VkDescriptorPool> descriptorPool;
2910 Move<VkDescriptorSetLayout> descriptorSetLayout;
2911 Move<VkDescriptorSet> descriptorSet;
2912 const deUint32 numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
2913
2914 DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2915
2916 // Create color image
2917 {
2918 const VkImageCreateInfo colorImageParams =
2919 {
2920 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
2921 DE_NULL, // const void* pNext;
2922 0u, // VkImageCreateFlags flags;
2923 VK_IMAGE_TYPE_2D, // VkImageType imageType;
2924 colorFormat, // VkFormat format;
2925 { renderSize.x(), renderSize.y(), 1u }, // VkExtent3D extent;
2926 1u, // deUint32 mipLevels;
2927 1u, // deUint32 arraySize;
2928 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
2929 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
2930 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
2931 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
2932 1u, // deUint32 queueFamilyCount;
2933 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
2934 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
2935 };
2936
2937 colorImage = createImage(vk, vkDevice, &colorImageParams);
2938
2939 // Allocate and bind color image memory
2940 colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
2941 VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
2942 }
2943
2944 // Create color attachment view
2945 {
2946 const VkImageViewCreateInfo colorImageViewParams =
2947 {
2948 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
2949 DE_NULL, // const void* pNext;
2950 0u, // VkImageViewCreateFlags flags;
2951 *colorImage, // VkImage image;
2952 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
2953 colorFormat, // VkFormat format;
2954 {
2955 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
2956 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
2957 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
2958 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
2959 }, // VkComponentsMapping components;
2960 {
2961 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
2962 0u, // deUint32 baseMipLevel;
2963 1u, // deUint32 mipLevels;
2964 0u, // deUint32 baseArraylayer;
2965 1u // deUint32 layerCount;
2966 } // VkImageSubresourceRange subresourceRange;
2967 };
2968
2969 colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
2970 }
2971
2972 // Create render pass
2973 {
2974 const VkAttachmentDescription colorAttachmentDescription =
2975 {
2976 0u, // VkAttachmentDescriptorFlags flags;
2977 colorFormat, // VkFormat format;
2978 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
2979 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
2980 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
2981 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
2982 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
2983 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
2984 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout finalLayout
2985 };
2986
2987 const VkAttachmentDescription attachments[1] =
2988 {
2989 colorAttachmentDescription
2990 };
2991
2992 const VkAttachmentReference colorAttachmentReference =
2993 {
2994 0u, // deUint32 attachment;
2995 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
2996 };
2997
2998 const VkSubpassDescription subpassDescription =
2999 {
3000 0u, // VkSubpassDescriptionFlags flags;
3001 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
3002 0u, // deUint32 inputCount;
3003 DE_NULL, // const VkAttachmentReference* pInputAttachments;
3004 1u, // deUint32 colorCount;
3005 &colorAttachmentReference, // const VkAttachmentReference* pColorAttachments;
3006 DE_NULL, // const VkAttachmentReference* pResolveAttachments;
3007 DE_NULL, // VkAttachmentReference depthStencilAttachment;
3008 0u, // deUint32 preserveCount;
3009 DE_NULL // const VkAttachmentReference* pPreserveAttachments;
3010 };
3011
3012 const VkRenderPassCreateInfo renderPassParams =
3013 {
3014 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
3015 DE_NULL, // const void* pNext;
3016 0u, // VkRenderPassCreateFlags flags;
3017 1u, // deUint32 attachmentCount;
3018 attachments, // const VkAttachmentDescription* pAttachments;
3019 1u, // deUint32 subpassCount;
3020 &subpassDescription, // const VkSubpassDescription* pSubpasses;
3021 0u, // deUint32 dependencyCount;
3022 DE_NULL // const VkSubpassDependency* pDependencies;
3023 };
3024
3025 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
3026 }
3027
3028 // Create framebuffer
3029 {
3030 const VkFramebufferCreateInfo framebufferParams =
3031 {
3032 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
3033 DE_NULL, // const void* pNext;
3034 0u, // VkFramebufferCreateFlags flags;
3035 *renderPass, // VkRenderPass renderPass;
3036 1u, // deUint32 attachmentCount;
3037 &*colorImageView, // const VkAttachmentBindInfo* pAttachments;
3038 (deUint32)renderSize.x(), // deUint32 width;
3039 (deUint32)renderSize.y(), // deUint32 height;
3040 1u // deUint32 layers;
3041 };
3042
3043 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
3044 }
3045
3046 // Create descriptors
3047 {
3048 DescriptorPoolBuilder descriptorPoolBuilder;
3049 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
3050
3051 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3052 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3053 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3054 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3055
3056 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
3057 descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3058
3059 const VkDescriptorSetAllocateInfo allocInfo =
3060 {
3061 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
3062 DE_NULL,
3063 *descriptorPool,
3064 1u,
3065 &*descriptorSetLayout
3066 };
3067
3068 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
3069 // Update descriptors
3070 {
3071 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3072 const VkDescriptorBufferInfo outputDescriptorBufferInfo =
3073 {
3074 *m_outputBuffer, // VkBuffer buffer;
3075 0u, // VkDeviceSize offset;
3076 VK_WHOLE_SIZE // VkDeviceSize range;
3077 };
3078
3079 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3080
3081 VkDescriptorBufferInfo inputDescriptorBufferInfo =
3082 {
3083 0, // VkBuffer buffer;
3084 0u, // VkDeviceSize offset;
3085 VK_WHOLE_SIZE // VkDeviceSize range;
3086 };
3087
3088 if (inputBufferSize > 0)
3089 {
3090 inputDescriptorBufferInfo.buffer = *m_inputBuffer;
3091
3092 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3093 }
3094
3095 descriptorSetUpdateBuilder.update(vk, vkDevice);
3096 }
3097 }
3098
3099 // Create pipeline layout
3100 {
3101 const VkDescriptorSetLayout descriptorSetLayouts[] =
3102 {
3103 *descriptorSetLayout,
3104 m_extraResourcesLayout
3105 };
3106 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
3107 {
3108 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
3109 DE_NULL, // const void* pNext;
3110 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
3111 numDescriptorSets, // deUint32 descriptorSetCount;
3112 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
3113 0u, // deUint32 pushConstantRangeCount;
3114 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
3115 };
3116
3117 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
3118 }
3119
3120 // Create shader modules
3121 {
3122 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
3123 tessControlShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
3124 tessEvalShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
3125 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
3126 }
3127
3128 // Create pipeline
3129 {
3130 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
3131 {
3132 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
3133 DE_NULL, // const void* pNext;
3134 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
3135 0u, // deUint32 bindingCount;
3136 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
3137 0u, // deUint32 attributeCount;
3138 DE_NULL, // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
3139 };
3140
3141 const std::vector<VkViewport> viewports (1, makeViewport(renderSize));
3142 const std::vector<VkRect2D> scissors (1, makeRect2D(renderSize));
3143
3144 graphicsPipeline = makeGraphicsPipeline(vk, // const DeviceInterface& vk
3145 vkDevice, // const VkDevice device
3146 *pipelineLayout, // const VkPipelineLayout pipelineLayout
3147 *vertexShaderModule, // const VkShaderModule vertexShaderModule
3148 *tessControlShaderModule, // const VkShaderModule tessellationControlShaderModule
3149 *tessEvalShaderModule, // const VkShaderModule tessellationEvalShaderModule
3150 DE_NULL, // const VkShaderModule geometryShaderModule
3151 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
3152 *renderPass, // const VkRenderPass renderPass
3153 viewports, // const std::vector<VkViewport>& viewports
3154 scissors, // const std::vector<VkRect2D>& scissors
3155 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, // const VkPrimitiveTopology topology
3156 0u, // const deUint32 subpass
3157 patchControlPoints, // const deUint32 patchControlPoints
3158 &vertexInputStateParams); // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
3159 }
3160
3161 // Create command pool
3162 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3163
3164 // Create command buffer
3165 {
3166 const VkClearValue clearValue = getDefaultClearColor();
3167
3168 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3169
3170 beginCommandBuffer(vk, *cmdBuffer);
3171
3172 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), clearValue);
3173
3174 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
3175
3176 {
3177 const VkDescriptorSet descriptorSets[] = { *descriptorSet, extraResources };
3178 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
3179 }
3180
3181 vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
3182
3183 endRenderPass(vk, *cmdBuffer);
3184
3185 // Insert a barrier so data written by the shader is available to the host
3186 {
3187 const VkBufferMemoryBarrier bufferBarrier =
3188 {
3189 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
3190 DE_NULL, // const void* pNext;
3191 VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
3192 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
3193 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
3194 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
3195 *m_outputBuffer, // VkBuffer buffer;
3196 0, // VkDeviceSize offset;
3197 VK_WHOLE_SIZE, // VkDeviceSize size;
3198 };
3199
3200 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
3201 0, (const VkMemoryBarrier*)DE_NULL,
3202 1, &bufferBarrier,
3203 0, (const VkImageMemoryBarrier*)DE_NULL);
3204 }
3205
3206 endCommandBuffer(vk, *cmdBuffer);
3207 }
3208
3209 // Execute Draw
3210 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
3211 }
3212
3213 // TessControlExecutor
3214
3215 class TessControlExecutor : public TessellationExecutor
3216 {
3217 public:
3218 TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3219 virtual ~TessControlExecutor (void);
3220
3221 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3222
3223 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3224
3225 protected:
3226 static std::string generateTessControlShader (const ShaderSpec& shaderSpec);
3227 };
3228
TessControlExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3229 TessControlExecutor::TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3230 : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3231 {
3232 }
3233
~TessControlExecutor(void)3234 TessControlExecutor::~TessControlExecutor (void)
3235 {
3236 }
3237
generateTessControlShader(const ShaderSpec & shaderSpec)3238 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
3239 {
3240 std::ostringstream src;
3241 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3242
3243 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3244 src << "#extension GL_EXT_tessellation_shader : require\n\n";
3245
3246 if (!shaderSpec.globalDeclarations.empty())
3247 src << shaderSpec.globalDeclarations << "\n";
3248
3249 src << "\nlayout(vertices = 1) out;\n\n";
3250
3251 declareBufferBlocks(src, shaderSpec);
3252
3253 src << "void main (void)\n{\n";
3254
3255 for (int ndx = 0; ndx < 2; ndx++)
3256 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3257
3258 for (int ndx = 0; ndx < 4; ndx++)
3259 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3260
3261 src << "\n"
3262 << "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
3263
3264 generateExecBufferIo(src, shaderSpec, "invocationId");
3265
3266 src << "}\n";
3267
3268 return src.str();
3269 }
3270
generateEmptyTessEvalShader()3271 static std::string generateEmptyTessEvalShader ()
3272 {
3273 std::ostringstream src;
3274
3275 src << "#version 450\n"
3276 "#extension GL_EXT_tessellation_shader : require\n\n";
3277
3278 src << "layout(triangles, ccw) in;\n";
3279
3280 src << "\nvoid main (void)\n{\n"
3281 << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
3282 << "}\n";
3283
3284 return src.str();
3285 }
3286
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3287 void TessControlExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3288 {
3289 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3290 programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
3291 programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
3292 programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3293 }
3294
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3295 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3296 {
3297 const deUint32 patchSize = 3;
3298
3299 initBuffers(numValues);
3300
3301 // Setup input buffer & copy data
3302 uploadInputBuffer(inputs, numValues, false);
3303
3304 renderTess(numValues, patchSize * numValues, patchSize, extraResources);
3305
3306 // Read back data
3307 readOutputBuffer(outputs, numValues);
3308 }
3309
3310 // TessEvaluationExecutor
3311
3312 class TessEvaluationExecutor : public TessellationExecutor
3313 {
3314 public:
3315 TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3316 virtual ~TessEvaluationExecutor (void);
3317
3318 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3319
3320 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3321
3322 protected:
3323 static std::string generateTessEvalShader (const ShaderSpec& shaderSpec);
3324 };
3325
TessEvaluationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3326 TessEvaluationExecutor::TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3327 : TessellationExecutor (context, shaderSpec, extraResourcesLayout)
3328 {
3329 }
3330
~TessEvaluationExecutor(void)3331 TessEvaluationExecutor::~TessEvaluationExecutor (void)
3332 {
3333 }
3334
generatePassthroughTessControlShader(void)3335 static std::string generatePassthroughTessControlShader (void)
3336 {
3337 std::ostringstream src;
3338
3339 src << "#version 450\n"
3340 "#extension GL_EXT_tessellation_shader : require\n\n";
3341
3342 src << "layout(vertices = 1) out;\n\n";
3343
3344 src << "void main (void)\n{\n";
3345
3346 for (int ndx = 0; ndx < 2; ndx++)
3347 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3348
3349 for (int ndx = 0; ndx < 4; ndx++)
3350 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3351
3352 src << "}\n";
3353
3354 return src.str();
3355 }
3356
generateTessEvalShader(const ShaderSpec & shaderSpec)3357 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
3358 {
3359 std::ostringstream src;
3360
3361 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3362
3363 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3364 src << "#extension GL_EXT_tessellation_shader : require\n\n";
3365
3366 if (!shaderSpec.globalDeclarations.empty())
3367 src << shaderSpec.globalDeclarations << "\n";
3368
3369 src << "\n";
3370
3371 src << "layout(isolines, equal_spacing) in;\n\n";
3372
3373 declareBufferBlocks(src, shaderSpec);
3374
3375 src << "void main (void)\n{\n"
3376 << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3377 << "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3378
3379 generateExecBufferIo(src, shaderSpec, "invocationId");
3380
3381 src << "}\n";
3382
3383 return src.str();
3384 }
3385
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3386 void TessEvaluationExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3387 {
3388 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3389 programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3390 programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3391 programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3392 }
3393
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3394 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3395 {
3396 const int patchSize = 2;
3397 const int alignedValues = deAlign32(numValues, patchSize);
3398
3399 // Initialize buffers with aligned value count to make room for padding
3400 initBuffers(alignedValues);
3401
3402 // Setup input buffer & copy data
3403 uploadInputBuffer(inputs, numValues, false);
3404
3405 renderTess((deUint32)alignedValues, (deUint32)alignedValues, (deUint32)patchSize, extraResources);
3406
3407 // Read back data
3408 readOutputBuffer(outputs, numValues);
3409 }
3410
3411 } // anonymous
3412
3413 // ShaderExecutor
3414
~ShaderExecutor(void)3415 ShaderExecutor::~ShaderExecutor (void)
3416 {
3417 }
3418
areInputs16Bit(void) const3419 bool ShaderExecutor::areInputs16Bit (void) const
3420 {
3421 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3422 {
3423 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3424 return true;
3425 }
3426 return false;
3427 }
3428
areOutputs16Bit(void) const3429 bool ShaderExecutor::areOutputs16Bit (void) const
3430 {
3431 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3432 {
3433 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3434 return true;
3435 }
3436 return false;
3437 }
3438
isOutput16Bit(const size_t ndx) const3439 bool ShaderExecutor::isOutput16Bit (const size_t ndx) const
3440 {
3441 if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3442 return true;
3443 return false;
3444 }
3445
areInputs64Bit(void) const3446 bool ShaderExecutor::areInputs64Bit (void) const
3447 {
3448 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3449 {
3450 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3451 return true;
3452 }
3453 return false;
3454 }
3455
areOutputs64Bit(void) const3456 bool ShaderExecutor::areOutputs64Bit (void) const
3457 {
3458 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3459 {
3460 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3461 return true;
3462 }
3463 return false;
3464 }
3465
isOutput64Bit(const size_t ndx) const3466 bool ShaderExecutor::isOutput64Bit (const size_t ndx) const
3467 {
3468 if (glu::isDataTypeDoubleType(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3469 return true;
3470 return false;
3471 }
3472
3473 // Utilities
3474
generateSources(glu::ShaderType shaderType,const ShaderSpec & shaderSpec,vk::SourceCollections & dst)3475 void generateSources (glu::ShaderType shaderType, const ShaderSpec& shaderSpec, vk::SourceCollections& dst)
3476 {
3477 switch (shaderType)
3478 {
3479 case glu::SHADERTYPE_VERTEX: VertexShaderExecutor::generateSources (shaderSpec, dst); break;
3480 case glu::SHADERTYPE_TESSELLATION_CONTROL: TessControlExecutor::generateSources (shaderSpec, dst); break;
3481 case glu::SHADERTYPE_TESSELLATION_EVALUATION: TessEvaluationExecutor::generateSources (shaderSpec, dst); break;
3482 case glu::SHADERTYPE_GEOMETRY: GeometryShaderExecutor::generateSources (shaderSpec, dst); break;
3483 case glu::SHADERTYPE_FRAGMENT: FragmentShaderExecutor::generateSources (shaderSpec, dst); break;
3484 case glu::SHADERTYPE_COMPUTE: ComputeShaderExecutor::generateSources (shaderSpec, dst); break;
3485 default:
3486 TCU_THROW(InternalError, "Unsupported shader type");
3487 }
3488 }
3489
createExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3490 ShaderExecutor* createExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3491 {
3492 switch (shaderType)
3493 {
3494 case glu::SHADERTYPE_VERTEX: return new VertexShaderExecutor (context, shaderSpec, extraResourcesLayout);
3495 case glu::SHADERTYPE_TESSELLATION_CONTROL: return new TessControlExecutor (context, shaderSpec, extraResourcesLayout);
3496 case glu::SHADERTYPE_TESSELLATION_EVALUATION: return new TessEvaluationExecutor (context, shaderSpec, extraResourcesLayout);
3497 case glu::SHADERTYPE_GEOMETRY: return new GeometryShaderExecutor (context, shaderSpec, extraResourcesLayout);
3498 case glu::SHADERTYPE_FRAGMENT: return new FragmentShaderExecutor (context, shaderSpec, extraResourcesLayout);
3499 case glu::SHADERTYPE_COMPUTE: return new ComputeShaderExecutor (context, shaderSpec, extraResourcesLayout);
3500 default:
3501 TCU_THROW(InternalError, "Unsupported shader type");
3502 }
3503 }
3504
executorSupported(glu::ShaderType shaderType)3505 bool executorSupported(glu::ShaderType shaderType)
3506 {
3507 switch (shaderType)
3508 {
3509 case glu::SHADERTYPE_VERTEX:
3510 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3511 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3512 case glu::SHADERTYPE_GEOMETRY:
3513 case glu::SHADERTYPE_FRAGMENT:
3514 case glu::SHADERTYPE_COMPUTE:
3515 return true;
3516 default:
3517 return false;
3518 }
3519 }
3520
checkSupportShader(Context & context,const glu::ShaderType shaderType)3521 void checkSupportShader(Context& context, const glu::ShaderType shaderType)
3522 {
3523 if (shaderType == glu::SHADERTYPE_TESSELLATION_EVALUATION &&
3524 context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
3525 !context.getPortabilitySubsetFeatures().tessellationIsolines)
3526 {
3527 TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
3528 }
3529 }
3530
3531
3532 } // shaderexecutor
3533 } // vkt
3534