1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7 * Copyright (c) 2016 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief Vulkan ShaderExecutor
24 *//*--------------------------------------------------------------------*/
25
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkMemUtil.hpp"
29 #include "vkRef.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37
38 #include "gluShaderUtil.hpp"
39
40 #include "tcuVector.hpp"
41 #include "tcuTestLog.hpp"
42 #include "tcuTextureUtil.hpp"
43
44 #include "deUniquePtr.hpp"
45 #include "deStringUtil.hpp"
46 #include "deSharedPtr.hpp"
47 #include "deFloat16.h"
48
49 #include <map>
50 #include <sstream>
51 #include <iostream>
52
53 using std::vector;
54 using namespace vk;
55
56 namespace vkt
57 {
58 namespace shaderexecutor
59 {
60 namespace
61 {
62
63 enum
64 {
65 DEFAULT_RENDER_WIDTH = 100,
66 DEFAULT_RENDER_HEIGHT = 100,
67 };
68
69 // Common typedefs
70
71 typedef de::SharedPtr<Unique<VkImage> > VkImageSp;
72 typedef de::SharedPtr<Unique<VkImageView> > VkImageViewSp;
73 typedef de::SharedPtr<Unique<VkBuffer> > VkBufferSp;
74 typedef de::SharedPtr<Allocation> AllocationSp;
75
76 static VkFormat getAttributeFormat(const glu::DataType dataType);
77
78 // Shader utilities
79
getDefaultClearColor(void)80 static VkClearValue getDefaultClearColor (void)
81 {
82 return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
83 }
84
generateEmptyFragmentSource(void)85 static std::string generateEmptyFragmentSource (void)
86 {
87 std::ostringstream src;
88
89 src << "#version 450\n"
90 "layout(location=0) out highp vec4 o_color;\n";
91
92 src << "void main (void)\n{\n";
93 src << " o_color = vec4(0.0);\n";
94 src << "}\n";
95
96 return src.str();
97 }
98
packFloat16Bit(std::ostream & src,const std::vector<Symbol> & outputs)99 void packFloat16Bit (std::ostream& src, const std::vector<Symbol>& outputs)
100 {
101 for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
102 {
103 if(glu::isDataTypeFloatType(symIter->varType.getBasicType()))
104 {
105 if(glu::isDataTypeVector(symIter->varType.getBasicType()))
106 {
107 for(int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
108 {
109 src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "], -1.0)));\n";
110 }
111 }
112 else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
113 {
114 int maxRow = 0;
115 int maxCol = 0;
116 switch (symIter->varType.getBasicType())
117 {
118 case glu::TYPE_FLOAT_MAT2:
119 maxRow = maxCol = 2;
120 break;
121 case glu::TYPE_FLOAT_MAT2X3:
122 maxRow = 2;
123 maxCol = 3;
124 break;
125 case glu::TYPE_FLOAT_MAT2X4:
126 maxRow = 2;
127 maxCol = 4;
128 break;
129 case glu::TYPE_FLOAT_MAT3X2:
130 maxRow = 3;
131 maxCol = 2;
132 break;
133 case glu::TYPE_FLOAT_MAT3:
134 maxRow = maxCol = 3;
135 break;
136 case glu::TYPE_FLOAT_MAT3X4:
137 maxRow = 3;
138 maxCol = 4;
139 break;
140 case glu::TYPE_FLOAT_MAT4X2:
141 maxRow = 4;
142 maxCol = 2;
143 break;
144 case glu::TYPE_FLOAT_MAT4X3:
145 maxRow = 4;
146 maxCol = 3;
147 break;
148 case glu::TYPE_FLOAT_MAT4:
149 maxRow = maxCol = 4;
150 break;
151 default:
152 DE_ASSERT(false);
153 break;
154 }
155
156 for(int i = 0; i < maxRow; i++)
157 for(int j = 0; j < maxCol; j++)
158 {
159 src << "\tpacked_" << symIter->name << "[" << i << "][" << j << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j << "], -1.0)));\n";
160 }
161 }
162 else
163 {
164 src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << ", -1.0)));\n";
165 }
166 }
167 }
168 }
169
generatePassthroughVertexShader(const ShaderSpec & shaderSpec,const char * inputPrefix,const char * outputPrefix)170 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const char* inputPrefix, const char* outputPrefix)
171 {
172 std::ostringstream src;
173 int location = 0;
174
175 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
176
177 if (!shaderSpec.globalDeclarations.empty())
178 src << shaderSpec.globalDeclarations << "\n";
179
180 src << "layout(location = " << location << ") in highp vec4 a_position;\n";
181
182 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
183 {
184 location++;
185 src << "layout(location = "<< location << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
186 << "layout(location = " << location - 1 << ") flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
187 }
188
189 src << "\nvoid main (void)\n{\n"
190 << " gl_Position = a_position;\n"
191 << " gl_PointSize = 1.0;\n";
192
193 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
194 src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
195
196 src << "}\n";
197
198 return src.str();
199 }
200
generateVertexShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix)201 static std::string generateVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
202 {
203 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
204
205 std::ostringstream src;
206
207 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
208
209 if (!shaderSpec.globalDeclarations.empty())
210 src << shaderSpec.globalDeclarations << "\n";
211
212 src << "layout(location = 0) in highp vec4 a_position;\n";
213
214 int locationNumber = 1;
215 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
216 {
217 src << "layout(location = " << locationNumber << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
218 }
219
220 locationNumber = 0;
221 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
222 {
223 DE_ASSERT(output->varType.isBasicType());
224
225 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
226 {
227 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
228 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
229 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
230
231 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
232 }
233 else
234 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
235 }
236
237 src << "\n"
238 << "void main (void)\n"
239 << "{\n"
240 << " gl_Position = a_position;\n"
241 << " gl_PointSize = 1.0;\n";
242
243 // Declare & fetch local input variables
244 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
245 {
246 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
247 {
248 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
249 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
250 }
251 else
252 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
253 }
254
255 // Declare local output variables
256 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
257 {
258 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
259 {
260 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
261 src << "\t" << tname << " " << output->name << ";\n";
262 const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
263 src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
264 }
265 else
266 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
267 }
268
269 // Operation - indented to correct level.
270 {
271 std::istringstream opSrc (shaderSpec.source);
272 std::string line;
273
274 while (std::getline(opSrc, line))
275 src << "\t" << line << "\n";
276 }
277
278 if (shaderSpec.packFloat16Bit)
279 packFloat16Bit(src, shaderSpec.outputs);
280
281 // Assignments to outputs.
282 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
283 {
284 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
285 {
286 src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
287 }
288 else
289 {
290 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
291 {
292 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
293 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
294
295 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
296 }
297 else
298 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
299 }
300 }
301
302 src << "}\n";
303
304 return src.str();
305 }
306
307 struct FragmentOutputLayout
308 {
309 std::vector<const Symbol*> locationSymbols; //! Symbols by location
310 std::map<std::string, int> locationMap; //! Map from symbol name to start location
311 };
312
generateFragShaderOutputDecl(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & outputPrefix)313 static void generateFragShaderOutputDecl (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& outputPrefix)
314 {
315 for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
316 {
317 const Symbol& output = shaderSpec.outputs[outNdx];
318 const int location = de::lookup(outLocationMap, output.name);
319 const std::string outVarName = outputPrefix + output.name;
320 glu::VariableDeclaration decl (output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
321
322 TCU_CHECK_INTERNAL(output.varType.isBasicType());
323
324 if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
325 {
326 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
327 const glu::DataType uintBasicType = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
328 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP);
329
330 decl.varType = uintType;
331 src << decl << ";\n";
332 }
333 else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
334 {
335 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
336 const glu::DataType intBasicType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
337 const glu::VarType intType (intBasicType, glu::PRECISION_HIGHP);
338
339 decl.varType = intType;
340 src << decl << ";\n";
341 }
342 else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
343 {
344 const int vecSize = glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
345 const int numVecs = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
346 const glu::DataType uintBasicType = glu::getDataTypeUintVec(vecSize);
347 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP);
348
349 decl.varType = uintType;
350 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
351 {
352 decl.name = outVarName + "_" + de::toString(vecNdx);
353 decl.layout.location = location + vecNdx;
354 src << decl << ";\n";
355 }
356 }
357 else
358 src << decl << ";\n";
359 }
360 }
361
generateFragShaderOutAssign(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::string & valuePrefix,const std::string & outputPrefix,const bool isInput16Bit=false)362 static void generateFragShaderOutAssign (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::string& valuePrefix, const std::string& outputPrefix, const bool isInput16Bit = false)
363 {
364 if (isInput16Bit)
365 packFloat16Bit(src, shaderSpec.outputs);
366
367 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
368 {
369 const std::string packPrefix = (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
370
371 if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
372 src << " o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
373 else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
374 {
375 const int numVecs = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
376
377 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
378 if (useIntOutputs)
379 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix << output->name << "[" << vecNdx << "]);\n";
380 else
381 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix << output->name << "[" << vecNdx << "];\n";
382 }
383 else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
384 {
385 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
386 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
387
388 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << valuePrefix << output->name << ");\n";
389 }
390 else
391 src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
392 }
393 }
394
generatePassthroughFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)395 static std::string generatePassthroughFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
396 {
397 std::ostringstream src;
398
399 src <<"#version 450\n";
400
401 if (!shaderSpec.globalDeclarations.empty())
402 src << shaderSpec.globalDeclarations << "\n";
403
404 int locationNumber = 0;
405 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
406 {
407 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
408 {
409 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
410 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
411 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
412
413 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(intType, inputPrefix + output->name) << ";\n";
414 }
415 else
416 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
417 }
418
419 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
420
421 src << "\nvoid main (void)\n{\n";
422
423 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
424
425 src << "}\n";
426
427 return src.str();
428 }
429
generateGeometryShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix,const bool pointSizeSupported)430 static std::string generateGeometryShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix, const bool pointSizeSupported)
431 {
432 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
433
434 std::ostringstream src;
435
436 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
437
438 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
439 src << "#extension GL_EXT_geometry_shader : require\n";
440
441 if (!shaderSpec.globalDeclarations.empty())
442 src << shaderSpec.globalDeclarations << "\n";
443
444 src << "layout(points) in;\n"
445 << "layout(points, max_vertices = 1) out;\n";
446
447 int locationNumber = 0;
448 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
449 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
450
451 locationNumber = 0;
452 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
453 {
454 DE_ASSERT(output->varType.isBasicType());
455
456 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
457 {
458 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
459 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
460 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
461
462 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
463 }
464 else
465 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
466 }
467
468 src << "\n"
469 << "void main (void)\n"
470 << "{\n"
471 << " gl_Position = gl_in[0].gl_Position;\n"
472 << (pointSizeSupported ? " gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
473
474 // Fetch input variables
475 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
476 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
477
478 // Declare local output variables.
479 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
480 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
481
482 src << "\n";
483
484 // Operation - indented to correct level.
485 {
486 std::istringstream opSrc (shaderSpec.source);
487 std::string line;
488
489 while (std::getline(opSrc, line))
490 src << "\t" << line << "\n";
491 }
492
493 // Assignments to outputs.
494 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
495 {
496 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
497 {
498 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
499 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
500
501 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
502 }
503 else
504 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
505 }
506
507 src << " EmitVertex();\n"
508 << " EndPrimitive();\n"
509 << "}\n";
510
511 return src.str();
512 }
513
generateFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)514 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
515 {
516 std::ostringstream src;
517 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
518 if (!shaderSpec.globalDeclarations.empty())
519 src << shaderSpec.globalDeclarations << "\n";
520
521 int locationNumber = 0;
522 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
523 {
524 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
525 }
526
527 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
528
529 src << "\nvoid main (void)\n{\n";
530
531 // Declare & fetch local input variables
532 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
533 {
534 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
535 {
536 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
537 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
538 }
539 else
540 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
541 }
542
543 // Declare output variables
544 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
545 {
546 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
547 {
548 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
549 src << "\t" << tname << " " << output->name << ";\n";
550 const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
551 src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
552 }
553 else
554 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
555 }
556
557 // Operation - indented to correct level.
558 {
559 std::istringstream opSrc (shaderSpec.source);
560 std::string line;
561
562 while (std::getline(opSrc, line))
563 src << "\t" << line << "\n";
564 }
565
566 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
567
568 src << "}\n";
569
570 return src.str();
571 }
572
573 // FragmentOutExecutor
574
575 class FragmentOutExecutor : public ShaderExecutor
576 {
577 public:
578 FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
579 virtual ~FragmentOutExecutor (void);
580
581 virtual void execute (int numValues,
582 const void* const* inputs,
583 void* const* outputs,
584 VkDescriptorSet extraResources);
585
586 protected:
587 const glu::ShaderType m_shaderType;
588 const FragmentOutputLayout m_outputLayout;
589
590 private:
591 void bindAttributes (int numValues,
592 const void* const* inputs);
593
594 void addAttribute (deUint32 bindingLocation,
595 VkFormat format,
596 deUint32 sizePerElement,
597 deUint32 count,
598 const void* dataPtr);
599 // reinit render data members
600 virtual void clearRenderData (void);
601
602 const VkDescriptorSetLayout m_extraResourcesLayout;
603
604 std::vector<VkVertexInputBindingDescription> m_vertexBindingDescriptions;
605 std::vector<VkVertexInputAttributeDescription> m_vertexAttributeDescriptions;
606 std::vector<VkBufferSp> m_vertexBuffers;
607 std::vector<AllocationSp> m_vertexBufferAllocs;
608 };
609
computeFragmentOutputLayout(const std::vector<Symbol> & symbols)610 static FragmentOutputLayout computeFragmentOutputLayout (const std::vector<Symbol>& symbols)
611 {
612 FragmentOutputLayout ret;
613 int location = 0;
614
615 for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
616 {
617 const int numLocations = glu::getDataTypeNumLocations(it->varType.getBasicType());
618
619 TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
620 de::insert(ret.locationMap, it->name, location);
621 location += numLocations;
622
623 for (int ndx = 0; ndx < numLocations; ++ndx)
624 ret.locationSymbols.push_back(&*it);
625 }
626
627 return ret;
628 }
629
FragmentOutExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)630 FragmentOutExecutor::FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
631 : ShaderExecutor (context, shaderSpec)
632 , m_shaderType (shaderType)
633 , m_outputLayout (computeFragmentOutputLayout(m_shaderSpec.outputs))
634 , m_extraResourcesLayout (extraResourcesLayout)
635 {
636 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
637 const InstanceInterface& vki = m_context.getInstanceInterface();
638
639 // Input attributes
640 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
641 {
642 const Symbol& symbol = m_shaderSpec.inputs[inputNdx];
643 const glu::DataType basicType = symbol.varType.getBasicType();
644 const VkFormat format = getAttributeFormat(basicType);
645 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
646 if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
647 TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
648 }
649 }
650
~FragmentOutExecutor(void)651 FragmentOutExecutor::~FragmentOutExecutor (void)
652 {
653 }
654
computeVertexPositions(int numValues,const tcu::IVec2 & renderSize)655 static std::vector<tcu::Vec2> computeVertexPositions (int numValues, const tcu::IVec2& renderSize)
656 {
657 std::vector<tcu::Vec2> positions(numValues);
658 for (int valNdx = 0; valNdx < numValues; valNdx++)
659 {
660 const int ix = valNdx % renderSize.x();
661 const int iy = valNdx / renderSize.x();
662 const float fx = -1.0f + 2.0f*((float(ix) + 0.5f) / float(renderSize.x()));
663 const float fy = -1.0f + 2.0f*((float(iy) + 0.5f) / float(renderSize.y()));
664
665 positions[valNdx] = tcu::Vec2(fx, fy);
666 }
667
668 return positions;
669 }
670
getRenderbufferFormatForOutput(const glu::VarType & outputType,bool useIntOutputs)671 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
672 {
673 const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
674 {
675 tcu::TextureFormat::R,
676 tcu::TextureFormat::RG,
677 tcu::TextureFormat::RGBA, // No RGB variants available.
678 tcu::TextureFormat::RGBA
679 };
680
681 const glu::DataType basicType = outputType.getBasicType();
682 const int numComps = glu::getDataTypeNumComponents(basicType);
683 tcu::TextureFormat::ChannelType channelType;
684
685 switch (glu::getDataTypeScalarType(basicType))
686 {
687 case glu::TYPE_UINT: channelType = tcu::TextureFormat::UNSIGNED_INT32; break;
688 case glu::TYPE_INT: channelType = tcu::TextureFormat::SIGNED_INT32; break;
689 case glu::TYPE_BOOL: channelType = tcu::TextureFormat::SIGNED_INT32; break;
690 case glu::TYPE_FLOAT: channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT; break;
691 case glu::TYPE_FLOAT16: channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT; break;
692 default:
693 throw tcu::InternalError("Invalid output type");
694 }
695
696 DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
697
698 return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
699 }
700
getAttributeFormat(const glu::DataType dataType)701 static VkFormat getAttributeFormat (const glu::DataType dataType)
702 {
703 switch (dataType)
704 {
705 case glu::TYPE_FLOAT16: return VK_FORMAT_R16_SFLOAT;
706 case glu::TYPE_FLOAT16_VEC2: return VK_FORMAT_R16G16_SFLOAT;
707 case glu::TYPE_FLOAT16_VEC3: return VK_FORMAT_R16G16B16_SFLOAT;
708 case glu::TYPE_FLOAT16_VEC4: return VK_FORMAT_R16G16B16A16_SFLOAT;
709
710 case glu::TYPE_FLOAT: return VK_FORMAT_R32_SFLOAT;
711 case glu::TYPE_FLOAT_VEC2: return VK_FORMAT_R32G32_SFLOAT;
712 case glu::TYPE_FLOAT_VEC3: return VK_FORMAT_R32G32B32_SFLOAT;
713 case glu::TYPE_FLOAT_VEC4: return VK_FORMAT_R32G32B32A32_SFLOAT;
714
715 case glu::TYPE_INT: return VK_FORMAT_R32_SINT;
716 case glu::TYPE_INT_VEC2: return VK_FORMAT_R32G32_SINT;
717 case glu::TYPE_INT_VEC3: return VK_FORMAT_R32G32B32_SINT;
718 case glu::TYPE_INT_VEC4: return VK_FORMAT_R32G32B32A32_SINT;
719
720 case glu::TYPE_UINT: return VK_FORMAT_R32_UINT;
721 case glu::TYPE_UINT_VEC2: return VK_FORMAT_R32G32_UINT;
722 case glu::TYPE_UINT_VEC3: return VK_FORMAT_R32G32B32_UINT;
723 case glu::TYPE_UINT_VEC4: return VK_FORMAT_R32G32B32A32_UINT;
724
725 case glu::TYPE_FLOAT_MAT2: return VK_FORMAT_R32G32_SFLOAT;
726 case glu::TYPE_FLOAT_MAT2X3: return VK_FORMAT_R32G32B32_SFLOAT;
727 case glu::TYPE_FLOAT_MAT2X4: return VK_FORMAT_R32G32B32A32_SFLOAT;
728 case glu::TYPE_FLOAT_MAT3X2: return VK_FORMAT_R32G32_SFLOAT;
729 case glu::TYPE_FLOAT_MAT3: return VK_FORMAT_R32G32B32_SFLOAT;
730 case glu::TYPE_FLOAT_MAT3X4: return VK_FORMAT_R32G32B32A32_SFLOAT;
731 case glu::TYPE_FLOAT_MAT4X2: return VK_FORMAT_R32G32_SFLOAT;
732 case glu::TYPE_FLOAT_MAT4X3: return VK_FORMAT_R32G32B32_SFLOAT;
733 case glu::TYPE_FLOAT_MAT4: return VK_FORMAT_R32G32B32A32_SFLOAT;
734 default:
735 DE_ASSERT(false);
736 return VK_FORMAT_UNDEFINED;
737 }
738 }
739
addAttribute(deUint32 bindingLocation,VkFormat format,deUint32 sizePerElement,deUint32 count,const void * dataPtr)740 void FragmentOutExecutor::addAttribute (deUint32 bindingLocation, VkFormat format, deUint32 sizePerElement, deUint32 count, const void* dataPtr)
741 {
742 // Add binding specification
743 const deUint32 binding = (deUint32)m_vertexBindingDescriptions.size();
744 const VkVertexInputBindingDescription bindingDescription =
745 {
746 binding,
747 sizePerElement,
748 VK_VERTEX_INPUT_RATE_VERTEX
749 };
750
751 m_vertexBindingDescriptions.push_back(bindingDescription);
752
753 // Add location and format specification
754 const VkVertexInputAttributeDescription attributeDescription =
755 {
756 bindingLocation, // deUint32 location;
757 binding, // deUint32 binding;
758 format, // VkFormat format;
759 0u, // deUint32 offsetInBytes;
760 };
761
762 m_vertexAttributeDescriptions.push_back(attributeDescription);
763
764 // Upload data to buffer
765 const VkDevice vkDevice = m_context.getDevice();
766 const DeviceInterface& vk = m_context.getDeviceInterface();
767 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
768
769 const VkDeviceSize inputSize = sizePerElement * count;
770 const VkBufferCreateInfo vertexBufferParams =
771 {
772 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
773 DE_NULL, // const void* pNext;
774 0u, // VkBufferCreateFlags flags;
775 inputSize, // VkDeviceSize size;
776 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, // VkBufferUsageFlags usage;
777 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
778 1u, // deUint32 queueFamilyCount;
779 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
780 };
781
782 Move<VkBuffer> buffer = createBuffer(vk, vkDevice, &vertexBufferParams);
783 de::MovePtr<Allocation> alloc = m_context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
784
785 VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
786
787 deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
788 flushAlloc(vk, vkDevice, *alloc);
789
790 m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer> >(new Unique<VkBuffer>(buffer)));
791 m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
792 }
793
bindAttributes(int numValues,const void * const * inputs)794 void FragmentOutExecutor::bindAttributes (int numValues, const void* const* inputs)
795 {
796 // Input attributes
797 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
798 {
799 const Symbol& symbol = m_shaderSpec.inputs[inputNdx];
800 const void* ptr = inputs[inputNdx];
801 const glu::DataType basicType = symbol.varType.getBasicType();
802 const int vecSize = glu::getDataTypeScalarSize(basicType);
803 const VkFormat format = getAttributeFormat(basicType);
804 int elementSize = 0;
805 int numAttrsToAdd = 1;
806
807 if (glu::isDataTypeDoubleOrDVec(basicType))
808 elementSize = sizeof(double);
809 if (glu::isDataTypeFloatOrVec(basicType))
810 elementSize = sizeof(float);
811 else if (glu::isDataTypeFloat16OrVec(basicType))
812 elementSize = sizeof(deUint16);
813 else if (glu::isDataTypeIntOrIVec(basicType))
814 elementSize = sizeof(int);
815 else if (glu::isDataTypeUintOrUVec(basicType))
816 elementSize = sizeof(deUint32);
817 else if (glu::isDataTypeMatrix(basicType))
818 {
819 int numRows = glu::getDataTypeMatrixNumRows(basicType);
820 int numCols = glu::getDataTypeMatrixNumColumns(basicType);
821
822 elementSize = numRows * numCols * (int)sizeof(float);
823 numAttrsToAdd = numCols;
824 }
825 else
826 DE_ASSERT(false);
827
828 // add attributes, in case of matrix every column is binded as an attribute
829 for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
830 {
831 addAttribute((deUint32)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
832 }
833 }
834 }
835
clearRenderData(void)836 void FragmentOutExecutor::clearRenderData (void)
837 {
838 m_vertexBindingDescriptions.clear();
839 m_vertexAttributeDescriptions.clear();
840 m_vertexBuffers.clear();
841 m_vertexBufferAllocs.clear();
842 }
843
createEmptyDescriptorSetLayout(const DeviceInterface & vkd,VkDevice device)844 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout (const DeviceInterface& vkd, VkDevice device)
845 {
846 const VkDescriptorSetLayoutCreateInfo createInfo =
847 {
848 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
849 DE_NULL,
850 (VkDescriptorSetLayoutCreateFlags)0,
851 0u,
852 DE_NULL,
853 };
854 return createDescriptorSetLayout(vkd, device, &createInfo);
855 }
856
createDummyDescriptorPool(const DeviceInterface & vkd,VkDevice device)857 static Move<VkDescriptorPool> createDummyDescriptorPool (const DeviceInterface& vkd, VkDevice device)
858 {
859 const VkDescriptorPoolSize dummySize =
860 {
861 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
862 1u,
863 };
864 const VkDescriptorPoolCreateInfo createInfo =
865 {
866 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
867 DE_NULL,
868 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
869 1u,
870 1u,
871 &dummySize
872 };
873 return createDescriptorPool(vkd, device, &createInfo);
874 }
875
allocateSingleDescriptorSet(const DeviceInterface & vkd,VkDevice device,VkDescriptorPool pool,VkDescriptorSetLayout layout)876 static Move<VkDescriptorSet> allocateSingleDescriptorSet (const DeviceInterface& vkd, VkDevice device, VkDescriptorPool pool, VkDescriptorSetLayout layout)
877 {
878 const VkDescriptorSetAllocateInfo allocInfo =
879 {
880 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
881 DE_NULL,
882 pool,
883 1u,
884 &layout,
885 };
886 return allocateDescriptorSet(vkd, device, &allocInfo);
887 }
888
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)889 void FragmentOutExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
890 {
891 const VkDevice vkDevice = m_context.getDevice();
892 const DeviceInterface& vk = m_context.getDeviceInterface();
893 const VkQueue queue = m_context.getUniversalQueue();
894 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
895 Allocator& memAlloc = m_context.getDefaultAllocator();
896
897 const deUint32 renderSizeX = de::min(static_cast<deUint32>(128), (deUint32)numValues);
898 const deUint32 renderSizeY = ((deUint32)numValues / renderSizeX) + (((deUint32)numValues % renderSizeX != 0) ? 1u : 0u);
899 const tcu::UVec2 renderSize (renderSizeX, renderSizeY);
900 std::vector<tcu::Vec2> positions;
901
902 const bool useGeometryShader = m_shaderType == glu::SHADERTYPE_GEOMETRY;
903
904 std::vector<VkImageSp> colorImages;
905 std::vector<VkImageMemoryBarrier> colorImagePreRenderBarriers;
906 std::vector<VkImageMemoryBarrier> colorImagePostRenderBarriers;
907 std::vector<AllocationSp> colorImageAllocs;
908 std::vector<VkAttachmentDescription> attachments;
909 std::vector<VkClearValue> attachmentClearValues;
910 std::vector<VkImageViewSp> colorImageViews;
911
912 std::vector<VkPipelineColorBlendAttachmentState> colorBlendAttachmentStates;
913 std::vector<VkAttachmentReference> colorAttachmentReferences;
914
915 Move<VkRenderPass> renderPass;
916 Move<VkFramebuffer> framebuffer;
917 Move<VkPipelineLayout> pipelineLayout;
918 Move<VkPipeline> graphicsPipeline;
919
920 Move<VkShaderModule> vertexShaderModule;
921 Move<VkShaderModule> geometryShaderModule;
922 Move<VkShaderModule> fragmentShaderModule;
923
924 Move<VkCommandPool> cmdPool;
925 Move<VkCommandBuffer> cmdBuffer;
926
927 Unique<VkDescriptorSetLayout> emptyDescriptorSetLayout (createEmptyDescriptorSetLayout(vk, vkDevice));
928 Unique<VkDescriptorPool> dummyDescriptorPool (createDummyDescriptorPool(vk, vkDevice));
929 Unique<VkDescriptorSet> emptyDescriptorSet (allocateSingleDescriptorSet(vk, vkDevice, *dummyDescriptorPool, *emptyDescriptorSetLayout));
930
931 clearRenderData();
932
933 // Compute positions - 1px points are used to drive fragment shading.
934 positions = computeVertexPositions(numValues, renderSize.cast<int>());
935
936 // Bind attributes
937 addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (deUint32)positions.size(), &positions[0]);
938 bindAttributes(numValues, inputs);
939
940 // Create color images
941 {
942 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
943 {
944 VK_FALSE, // VkBool32 blendEnable;
945 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
946 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
947 VK_BLEND_OP_ADD, // VkBlendOp blendOpColor;
948 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
949 VK_BLEND_FACTOR_ZERO, // VkBlendFactor destAlphaBlendFactor;
950 VK_BLEND_OP_ADD, // VkBlendOp blendOpAlpha;
951 (VK_COLOR_COMPONENT_R_BIT |
952 VK_COLOR_COMPONENT_G_BIT |
953 VK_COLOR_COMPONENT_B_BIT |
954 VK_COLOR_COMPONENT_A_BIT) // VkColorComponentFlags colorWriteMask;
955 };
956
957 for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
958 {
959 const bool isDouble = glu::isDataTypeDoubleOrDVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
960 const bool isFloat = isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
961 const bool isFloat16b = glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
962 const bool isSigned = isDataTypeIntOrIVec (m_shaderSpec.outputs[outNdx].varType.getBasicType());
963 const bool isBool = isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
964 const VkFormat colorFormat = (isDouble ? VK_FORMAT_R64G64B64A64_SFLOAT : (isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT : (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT : (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32G32B32A32_UINT))));
965
966 {
967 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
968 if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
969 TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
970 }
971
972 const VkImageCreateInfo colorImageParams =
973 {
974 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
975 DE_NULL, // const void* pNext;
976 0u, // VkImageCreateFlags flags;
977 VK_IMAGE_TYPE_2D, // VkImageType imageType;
978 colorFormat, // VkFormat format;
979 { renderSize.x(), renderSize.y(), 1u }, // VkExtent3D extent;
980 1u, // deUint32 mipLevels;
981 1u, // deUint32 arraySize;
982 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
983 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
984 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
985 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
986 1u, // deUint32 queueFamilyCount;
987 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
988 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
989 };
990
991 const VkAttachmentDescription colorAttachmentDescription =
992 {
993 0u, // VkAttachmentDescriptorFlags flags;
994 colorFormat, // VkFormat format;
995 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
996 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
997 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
998 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
999 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
1000 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout initialLayout;
1001 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout;
1002 };
1003
1004 Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1005 colorImages.push_back(de::SharedPtr<Unique<VkImage> >(new Unique<VkImage>(colorImage)));
1006 attachmentClearValues.push_back(getDefaultClearColor());
1007
1008 // Allocate and bind color image memory
1009 {
1010 de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *((const VkImage*) colorImages.back().get())), MemoryRequirement::Any);
1011 VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
1012 colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1013
1014 attachments.push_back(colorAttachmentDescription);
1015 colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1016
1017 const VkAttachmentReference colorAttachmentReference =
1018 {
1019 (deUint32) (colorImages.size() - 1), // deUint32 attachment;
1020 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
1021 };
1022
1023 colorAttachmentReferences.push_back(colorAttachmentReference);
1024 }
1025
1026 // Create color attachment view
1027 {
1028 const VkImageViewCreateInfo colorImageViewParams =
1029 {
1030 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
1031 DE_NULL, // const void* pNext;
1032 0u, // VkImageViewCreateFlags flags;
1033 colorImages.back().get()->get(), // VkImage image;
1034 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
1035 colorFormat, // VkFormat format;
1036 {
1037 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
1038 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
1039 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
1040 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
1041 }, // VkComponentMapping components;
1042 {
1043 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
1044 0u, // deUint32 baseMipLevel;
1045 1u, // deUint32 mipLevels;
1046 0u, // deUint32 baseArraySlice;
1047 1u // deUint32 arraySize;
1048 } // VkImageSubresourceRange subresourceRange;
1049 };
1050
1051 Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1052 colorImageViews.push_back(de::SharedPtr<Unique<VkImageView> >(new Unique<VkImageView>(colorImageView)));
1053
1054 const VkImageMemoryBarrier colorImagePreRenderBarrier =
1055 {
1056 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1057 DE_NULL, // pNext
1058 0u, // srcAccessMask
1059 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1060 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // dstAccessMask
1061 VK_IMAGE_LAYOUT_UNDEFINED, // oldLayout
1062 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // newLayout
1063 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1064 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1065 colorImages.back().get()->get(), // image
1066 {
1067 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1068 0u, // baseMipLevel
1069 1u, // levelCount
1070 0u, // baseArrayLayer
1071 1u, // layerCount
1072 } // subresourceRange
1073 };
1074 colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1075
1076 const VkImageMemoryBarrier colorImagePostRenderBarrier =
1077 {
1078 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1079 DE_NULL, // pNext
1080 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1081 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // srcAccessMask
1082 VK_ACCESS_TRANSFER_READ_BIT, // dstAccessMask
1083 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // oldLayout
1084 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // newLayout
1085 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1086 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1087 colorImages.back().get()->get(), // image
1088 {
1089 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1090 0u, // baseMipLevel
1091 1u, // levelCount
1092 0u, // baseArrayLayer
1093 1u, // layerCount
1094 } // subresourceRange
1095 };
1096 colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1097 }
1098 }
1099 }
1100
1101 // Create render pass
1102 {
1103 const VkSubpassDescription subpassDescription =
1104 {
1105 0u, // VkSubpassDescriptionFlags flags;
1106 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
1107 0u, // deUint32 inputCount;
1108 DE_NULL, // const VkAttachmentReference* pInputAttachments;
1109 (deUint32)colorImages.size(), // deUint32 colorCount;
1110 &colorAttachmentReferences[0], // const VkAttachmentReference* colorAttachments;
1111 DE_NULL, // const VkAttachmentReference* resolveAttachments;
1112 DE_NULL, // VkAttachmentReference depthStencilAttachment;
1113 0u, // deUint32 preserveCount;
1114 DE_NULL // const VkAttachmentReference* pPreserveAttachments;
1115 };
1116
1117 const VkRenderPassCreateInfo renderPassParams =
1118 {
1119 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
1120 DE_NULL, // const void* pNext;
1121 (VkRenderPassCreateFlags)0, // VkRenderPassCreateFlags flags;
1122 (deUint32)attachments.size(), // deUint32 attachmentCount;
1123 &attachments[0], // const VkAttachmentDescription* pAttachments;
1124 1u, // deUint32 subpassCount;
1125 &subpassDescription, // const VkSubpassDescription* pSubpasses;
1126 0u, // deUint32 dependencyCount;
1127 DE_NULL // const VkSubpassDependency* pDependencies;
1128 };
1129
1130 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1131 }
1132
1133 // Create framebuffer
1134 {
1135 std::vector<VkImageView> views(colorImageViews.size());
1136 for (size_t i = 0; i < colorImageViews.size(); i++)
1137 {
1138 views[i] = colorImageViews[i].get()->get();
1139 }
1140
1141 const VkFramebufferCreateInfo framebufferParams =
1142 {
1143 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
1144 DE_NULL, // const void* pNext;
1145 0u, // VkFramebufferCreateFlags flags;
1146 *renderPass, // VkRenderPass renderPass;
1147 (deUint32)views.size(), // deUint32 attachmentCount;
1148 &views[0], // const VkImageView* pAttachments;
1149 (deUint32)renderSize.x(), // deUint32 width;
1150 (deUint32)renderSize.y(), // deUint32 height;
1151 1u // deUint32 layers;
1152 };
1153
1154 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1155 }
1156
1157 // Create pipeline layout
1158 {
1159 const VkDescriptorSetLayout setLayouts[] =
1160 {
1161 *emptyDescriptorSetLayout,
1162 m_extraResourcesLayout
1163 };
1164 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
1165 {
1166 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
1167 DE_NULL, // const void* pNext;
1168 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
1169 (m_extraResourcesLayout != 0 ? 2u : 0u), // deUint32 descriptorSetCount;
1170 setLayouts, // const VkDescriptorSetLayout* pSetLayouts;
1171 0u, // deUint32 pushConstantRangeCount;
1172 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
1173 };
1174
1175 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1176 }
1177
1178 // Create shaders
1179 {
1180 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1181 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1182
1183 if (useGeometryShader)
1184 {
1185 if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1186 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1187 else
1188 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1189 }
1190 }
1191
1192 // Create pipeline
1193 {
1194 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
1195 {
1196 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1197 DE_NULL, // const void* pNext;
1198 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
1199 (deUint32)m_vertexBindingDescriptions.size(), // deUint32 bindingCount;
1200 &m_vertexBindingDescriptions[0], // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1201 (deUint32)m_vertexAttributeDescriptions.size(), // deUint32 attributeCount;
1202 &m_vertexAttributeDescriptions[0], // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
1203 };
1204
1205 const std::vector<VkViewport> viewports (1, makeViewport(renderSize));
1206 const std::vector<VkRect2D> scissors (1, makeRect2D(renderSize));
1207
1208 const VkPipelineColorBlendStateCreateInfo colorBlendStateParams =
1209 {
1210 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
1211 DE_NULL, // const void* pNext;
1212 (VkPipelineColorBlendStateCreateFlags)0, // VkPipelineColorBlendStateCreateFlags flags;
1213 VK_FALSE, // VkBool32 logicOpEnable;
1214 VK_LOGIC_OP_COPY, // VkLogicOp logicOp;
1215 (deUint32)colorBlendAttachmentStates.size(), // deUint32 attachmentCount;
1216 &colorBlendAttachmentStates[0], // const VkPipelineColorBlendAttachmentState* pAttachments;
1217 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConst[4];
1218 };
1219
1220 graphicsPipeline = makeGraphicsPipeline(vk, // const DeviceInterface& vk
1221 vkDevice, // const VkDevice device
1222 *pipelineLayout, // const VkPipelineLayout pipelineLayout
1223 *vertexShaderModule, // const VkShaderModule vertexShaderModule
1224 DE_NULL, // const VkShaderModule tessellationControlShaderModule
1225 DE_NULL, // const VkShaderModule tessellationEvalShaderModule
1226 useGeometryShader ? *geometryShaderModule : DE_NULL, // const VkShaderModule geometryShaderModule
1227 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
1228 *renderPass, // const VkRenderPass renderPass
1229 viewports, // const std::vector<VkViewport>& viewports
1230 scissors, // const std::vector<VkRect2D>& scissors
1231 VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // const VkPrimitiveTopology topology
1232 0u, // const deUint32 subpass
1233 0u, // const deUint32 patchControlPoints
1234 &vertexInputStateParams, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
1235 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1236 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
1237 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
1238 &colorBlendStateParams); // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
1239 }
1240
1241 // Create command pool
1242 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1243
1244 // Create command buffer
1245 {
1246 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1247
1248 beginCommandBuffer(vk, *cmdBuffer);
1249
1250 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0,
1251 0, (const VkMemoryBarrier*)DE_NULL,
1252 0, (const VkBufferMemoryBarrier*)DE_NULL,
1253 (deUint32)colorImagePreRenderBarriers.size(), colorImagePreRenderBarriers.empty() ? DE_NULL : &colorImagePreRenderBarriers[0]);
1254 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), (deUint32)attachmentClearValues.size(), &attachmentClearValues[0]);
1255
1256 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1257
1258 if (m_extraResourcesLayout != 0)
1259 {
1260 DE_ASSERT(extraResources != 0);
1261 const VkDescriptorSet descriptorSets[] = { *emptyDescriptorSet, extraResources };
1262 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, DE_NULL);
1263 }
1264 else
1265 DE_ASSERT(extraResources == 0);
1266
1267 const deUint32 numberOfVertexAttributes = (deUint32)m_vertexBuffers.size();
1268
1269 std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1270
1271 std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1272 for (size_t i = 0; i < numberOfVertexAttributes; i++)
1273 {
1274 buffers[i] = m_vertexBuffers[i].get()->get();
1275 }
1276
1277 vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1278 vk.cmdDraw(*cmdBuffer, (deUint32)positions.size(), 1u, 0u, 0u);
1279
1280 endRenderPass(vk, *cmdBuffer);
1281 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0,
1282 0, (const VkMemoryBarrier*)DE_NULL,
1283 0, (const VkBufferMemoryBarrier*)DE_NULL,
1284 (deUint32)colorImagePostRenderBarriers.size(), colorImagePostRenderBarriers.empty() ? DE_NULL : &colorImagePostRenderBarriers[0]);
1285
1286 endCommandBuffer(vk, *cmdBuffer);
1287 }
1288
1289 // Execute Draw
1290 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1291
1292 // Read back result and output
1293 {
1294 const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(deUint32) * renderSize.x() * renderSize.y());
1295 const VkBufferCreateInfo readImageBufferParams =
1296 {
1297 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1298 DE_NULL, // const void* pNext;
1299 0u, // VkBufferCreateFlags flags;
1300 imageSizeBytes, // VkDeviceSize size;
1301 VK_BUFFER_USAGE_TRANSFER_DST_BIT, // VkBufferUsageFlags usage;
1302 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1303 1u, // deUint32 queueFamilyCount;
1304 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
1305 };
1306
1307 // constants for image copy
1308 Move<VkCommandPool> copyCmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1309
1310 const VkBufferImageCopy copyParams =
1311 {
1312 0u, // VkDeviceSize bufferOffset;
1313 (deUint32)renderSize.x(), // deUint32 bufferRowLength;
1314 (deUint32)renderSize.y(), // deUint32 bufferImageHeight;
1315 {
1316 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspect aspect;
1317 0u, // deUint32 mipLevel;
1318 0u, // deUint32 arraySlice;
1319 1u, // deUint32 arraySize;
1320 }, // VkImageSubresource imageSubresource;
1321 { 0u, 0u, 0u }, // VkOffset3D imageOffset;
1322 { renderSize.x(), renderSize.y(), 1u } // VkExtent3D imageExtent;
1323 };
1324
1325 // Read back pixels.
1326 for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1327 {
1328 const Symbol& output = m_shaderSpec.outputs[outNdx];
1329 const int outSize = output.varType.getScalarSize();
1330 const int outVecSize = glu::getDataTypeNumComponents(output.varType.getBasicType());
1331 const int outNumLocs = glu::getDataTypeNumLocations(output.varType.getBasicType());
1332 const int outLocation = de::lookup(m_outputLayout.locationMap, output.name);
1333
1334 for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1335 {
1336 tcu::TextureLevel tmpBuf;
1337 const tcu::TextureFormat format = getRenderbufferFormatForOutput(output.varType, false);
1338 const tcu::TextureFormat readFormat (tcu::TextureFormat::RGBA, format.type);
1339 const Unique<VkBuffer> readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1340 const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1341
1342 VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(), readImageBufferMemory->getOffset()));
1343
1344 // Copy image to buffer
1345 {
1346
1347 Move<VkCommandBuffer> copyCmdBuffer = allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1348
1349 beginCommandBuffer(vk, *copyCmdBuffer);
1350 vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, ©Params);
1351
1352 // Insert a barrier so data written by the transfer is available to the host
1353 {
1354 const VkBufferMemoryBarrier barrier =
1355 {
1356 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
1357 DE_NULL, // const void* pNext;
1358 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags srcAccessMask;
1359 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
1360 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
1361 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
1362 *readImageBuffer, // VkBuffer buffer;
1363 0, // VkDeviceSize offset;
1364 VK_WHOLE_SIZE, // VkDeviceSize size;
1365 };
1366
1367 vk.cmdPipelineBarrier(*copyCmdBuffer, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
1368 0, (const VkMemoryBarrier*)DE_NULL,
1369 1, &barrier,
1370 0, (const VkImageMemoryBarrier*)DE_NULL);
1371 }
1372
1373 endCommandBuffer(vk, *copyCmdBuffer);
1374
1375 submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1376 }
1377
1378 invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1379
1380 tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1381
1382 const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1383 const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1, readImageBufferMemory->getHostPtr());
1384
1385 tcu::copy(tmpBuf.getAccess(), resultAccess);
1386
1387 if (isOutput16Bit(static_cast<size_t>(outNdx)))
1388 {
1389 deUint16* dstPtrBase = static_cast<deUint16*>(outputs[outNdx]);
1390 if (outSize == 4 && outNumLocs == 1)
1391 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint16));
1392 else
1393 {
1394 for (int valNdx = 0; valNdx < numValues; valNdx++)
1395 {
1396 const deUint16* srcPtr = (const deUint16*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1397 deUint16* dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1398 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint16));
1399 }
1400 }
1401 }
1402 else
1403 {
1404 deUint32* dstPtrBase = static_cast<deUint32*>(outputs[outNdx]);
1405 if (outSize == 4 && outNumLocs == 1)
1406 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint32));
1407 else
1408 {
1409 for (int valNdx = 0; valNdx < numValues; valNdx++)
1410 {
1411 const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1412 deUint32* dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1413 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint32));
1414 }
1415 }
1416 }
1417 }
1418 }
1419 }
1420 }
1421
1422 // VertexShaderExecutor
1423
1424 class VertexShaderExecutor : public FragmentOutExecutor
1425 {
1426 public:
1427 VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1428 virtual ~VertexShaderExecutor (void);
1429
1430 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& dst);
1431 };
1432
VertexShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1433 VertexShaderExecutor::VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1434 : FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1435 {
1436 }
1437
~VertexShaderExecutor(void)1438 VertexShaderExecutor::~VertexShaderExecutor (void)
1439 {
1440 }
1441
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1442 void VertexShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1443 {
1444 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1445
1446 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1447 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1448 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1449 }
1450
1451 // GeometryShaderExecutor
1452
1453 class GeometryShaderExecutor : public FragmentOutExecutor
1454 {
1455 public:
1456 GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1457 virtual ~GeometryShaderExecutor (void);
1458
1459 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1460
1461 };
1462
GeometryShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1463 GeometryShaderExecutor::GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1464 : FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1465 {
1466 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1467
1468 if (!features.geometryShader)
1469 TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1470 }
1471
~GeometryShaderExecutor(void)1472 GeometryShaderExecutor::~GeometryShaderExecutor (void)
1473 {
1474 }
1475
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1476 void GeometryShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1477 {
1478 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1479
1480 programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1481
1482 programCollection.glslSources.add("geom") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false)) << shaderSpec.buildOptions;
1483 programCollection.glslSources.add("geom_point_size") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true)) << shaderSpec.buildOptions;
1484
1485 /* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1486 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_")) << shaderSpec.buildOptions;
1487
1488 }
1489
1490 // FragmentShaderExecutor
1491
1492 class FragmentShaderExecutor : public FragmentOutExecutor
1493 {
1494 public:
1495 FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1496 virtual ~FragmentShaderExecutor (void);
1497
1498 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1499
1500 };
1501
FragmentShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1502 FragmentShaderExecutor::FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1503 : FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1504 {
1505 }
1506
~FragmentShaderExecutor(void)1507 FragmentShaderExecutor::~FragmentShaderExecutor (void)
1508 {
1509 }
1510
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1511 void FragmentShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1512 {
1513 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1514
1515 programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1516 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1517 programCollection.glslSources.add("frag") << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1518 }
1519
1520 // Shared utilities for compute and tess executors
1521
getVecStd430ByteAlignment(glu::DataType type)1522 static deUint32 getVecStd430ByteAlignment (glu::DataType type)
1523 {
1524 deUint32 baseSize;
1525
1526 switch (glu::getDataTypeScalarType(type))
1527 {
1528 case glu::TYPE_FLOAT16: baseSize = 2u; break;
1529 case glu::TYPE_DOUBLE: baseSize = 8u; break;
1530 default: baseSize = 4u; break;
1531 }
1532
1533 switch (glu::getDataTypeScalarSize(type))
1534 {
1535 case 1: return baseSize;
1536 case 2: return baseSize * 2u;;
1537 case 3: // fallthrough.
1538 case 4: return baseSize * 4u;
1539 default:
1540 DE_ASSERT(false);
1541 return 0u;
1542 }
1543 }
1544
1545 class BufferIoExecutor : public ShaderExecutor
1546 {
1547 public:
1548 BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec);
1549 virtual ~BufferIoExecutor (void);
1550
1551 protected:
1552 enum
1553 {
1554 INPUT_BUFFER_BINDING = 0,
1555 OUTPUT_BUFFER_BINDING = 1,
1556 };
1557
1558 void initBuffers (int numValues);
getInputBuffer(void) const1559 VkBuffer getInputBuffer (void) const { return *m_inputBuffer; }
getOutputBuffer(void) const1560 VkBuffer getOutputBuffer (void) const { return *m_outputBuffer; }
getInputStride(void) const1561 deUint32 getInputStride (void) const { return getLayoutStride(m_inputLayout); }
getOutputStride(void) const1562 deUint32 getOutputStride (void) const { return getLayoutStride(m_outputLayout); }
1563
1564 void uploadInputBuffer (const void* const* inputPtrs, int numValues, bool packFloat16Bit);
1565 void readOutputBuffer (void* const* outputPtrs, int numValues);
1566
1567 static void declareBufferBlocks (std::ostream& src, const ShaderSpec& spec);
1568 static void generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
1569
1570 protected:
1571 Move<VkBuffer> m_inputBuffer;
1572 Move<VkBuffer> m_outputBuffer;
1573
1574 private:
1575 struct VarLayout
1576 {
1577 deUint32 offset;
1578 deUint32 stride;
1579 deUint32 matrixStride;
1580
VarLayoutvkt::shaderexecutor::__anonabf4a9d60111::BufferIoExecutor::VarLayout1581 VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
1582 };
1583
1584 static void computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
1585 static deUint32 getLayoutStride (const vector<VarLayout>& layout);
1586
1587 static void copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr, bool packFloat16Bit);
1588 static void copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1589
1590 de::MovePtr<Allocation> m_inputAlloc;
1591 de::MovePtr<Allocation> m_outputAlloc;
1592
1593 vector<VarLayout> m_inputLayout;
1594 vector<VarLayout> m_outputLayout;
1595 };
1596
BufferIoExecutor(Context & context,const ShaderSpec & shaderSpec)1597 BufferIoExecutor::BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec)
1598 : ShaderExecutor(context, shaderSpec)
1599 {
1600 computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1601 computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1602 }
1603
~BufferIoExecutor(void)1604 BufferIoExecutor::~BufferIoExecutor (void)
1605 {
1606 }
1607
getLayoutStride(const vector<VarLayout> & layout)1608 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
1609 {
1610 return layout.empty() ? 0 : layout[0].stride;
1611 }
1612
computeVarLayout(const std::vector<Symbol> & symbols,std::vector<VarLayout> * layout)1613 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
1614 {
1615 deUint32 maxAlignment = 0;
1616 deUint32 curOffset = 0;
1617
1618 DE_ASSERT(layout != DE_NULL);
1619 DE_ASSERT(layout->empty());
1620 layout->resize(symbols.size());
1621
1622 for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1623 {
1624 const Symbol& symbol = symbols[varNdx];
1625 const glu::DataType basicType = symbol.varType.getBasicType();
1626 VarLayout& layoutEntry = (*layout)[varNdx];
1627
1628 if (glu::isDataTypeScalarOrVector(basicType))
1629 {
1630 const deUint32 alignment = getVecStd430ByteAlignment(basicType);
1631 const deUint32 size = (deUint32)glu::getDataTypeScalarSize(basicType) * (isDataTypeDoubleType(basicType) ? (int)(sizeof(deUint64)) : (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1632
1633 curOffset = (deUint32)deAlign32((int)curOffset, (int)alignment);
1634 maxAlignment = de::max(maxAlignment, alignment);
1635
1636 layoutEntry.offset = curOffset;
1637 layoutEntry.matrixStride = 0;
1638
1639 curOffset += size;
1640 }
1641 else if (glu::isDataTypeMatrix(basicType))
1642 {
1643 const int numVecs = glu::getDataTypeMatrixNumColumns(basicType);
1644 const glu::DataType vecType = glu::getDataTypeVector(glu::getDataTypeScalarType(basicType), glu::getDataTypeMatrixNumRows(basicType));
1645 const deUint32 vecAlignment = getVecStd430ByteAlignment(vecType);
1646
1647 curOffset = (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
1648 maxAlignment = de::max(maxAlignment, vecAlignment);
1649
1650 layoutEntry.offset = curOffset;
1651 layoutEntry.matrixStride = vecAlignment;
1652
1653 curOffset += vecAlignment*numVecs;
1654 }
1655 else
1656 DE_ASSERT(false);
1657 }
1658
1659 {
1660 const deUint32 totalSize = (deUint32)deAlign32(curOffset, maxAlignment);
1661
1662 for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1663 varIter->stride = totalSize;
1664 }
1665 }
1666
declareBufferBlocks(std::ostream & src,const ShaderSpec & spec)1667 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1668 {
1669 // Input struct
1670 if (!spec.inputs.empty())
1671 {
1672 glu::StructType inputStruct("Inputs");
1673 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1674 inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1675 src << glu::declare(&inputStruct) << ";\n";
1676 }
1677
1678 // Output struct
1679 {
1680 glu::StructType outputStruct("Outputs");
1681 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1682 outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1683 src << glu::declare(&outputStruct) << ";\n";
1684 }
1685
1686 src << "\n";
1687
1688 if (!spec.inputs.empty())
1689 {
1690 src << "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1691 << "{\n"
1692 << " Inputs inputs[];\n"
1693 << "};\n";
1694 }
1695
1696 src << "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1697 << "{\n"
1698 << " Outputs outputs[];\n"
1699 << "};\n"
1700 << "\n";
1701 }
1702
generateExecBufferIo(std::ostream & src,const ShaderSpec & spec,const char * invocationNdxName)1703 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1704 {
1705 std::string tname;
1706 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1707 {
1708 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1709 if (f16BitTest)
1710 {
1711 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1712 }
1713 else
1714 {
1715 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1716 }
1717 src << "\t" << tname << " "<< symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]." << symIter->name << ");\n";
1718 }
1719
1720 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1721 {
1722 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1723 if (f16BitTest)
1724 {
1725 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1726 }
1727 else
1728 {
1729 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1730 }
1731 src << "\t" << tname << " " << symIter->name << ";\n";
1732 if (f16BitTest)
1733 {
1734 const char* ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1735 src << "\t" << ttname << " " << "packed_" << symIter->name << ";\n";
1736 }
1737 }
1738
1739 src << "\n";
1740
1741 {
1742 std::istringstream opSrc (spec.source);
1743 std::string line;
1744
1745 while (std::getline(opSrc, line))
1746 src << "\t" << line << "\n";
1747 }
1748
1749 if (spec.packFloat16Bit)
1750 packFloat16Bit (src, spec.outputs);
1751
1752 src << "\n";
1753 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1754 {
1755 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1756 if(f16BitTest)
1757 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1758 else
1759 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1760 }
1761 }
1762
copyToBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr,bool packFloat16Bit)1763 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr, bool packFloat16Bit)
1764 {
1765 if (varType.isBasicType())
1766 {
1767 const glu::DataType basicType = varType.getBasicType();
1768 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1769 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1770 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1771 const int numComps = scalarSize / numVecs;
1772 const int size = (glu::isDataTypeDoubleType(basicType) ? (int)sizeof(deUint64) : (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1773
1774 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1775 {
1776 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1777 {
1778 const int srcOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1779 const int dstOffset = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1780 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset;
1781 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset;
1782
1783 if (packFloat16Bit)
1784 {
1785 // Convert the float values to 16 bit and store in the lower 16 bits of 32 bit ints.
1786 for (int cmpNdx=0; cmpNdx < numComps; ++cmpNdx)
1787 {
1788 deFloat16 f16vals[2] = {};
1789 f16vals[0] = deFloat32To16Round(((float*)srcPtr)[cmpNdx], DE_ROUNDINGMODE_TO_ZERO);
1790 deMemcpy(dstPtr + cmpNdx * size, &f16vals[0], size);
1791 }
1792 }
1793 else
1794 {
1795 deMemcpy(dstPtr, srcPtr, size * numComps);
1796 }
1797 }
1798 }
1799 }
1800 else
1801 throw tcu::InternalError("Unsupported type");
1802 }
1803
copyFromBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1804 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1805 {
1806 if (varType.isBasicType())
1807 {
1808 const glu::DataType basicType = varType.getBasicType();
1809 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1810 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1811 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1812 const int numComps = scalarSize / numVecs;
1813
1814 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1815 {
1816 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1817 {
1818 const int size = (glu::isDataTypeDoubleType(basicType) ? (int)sizeof(deUint64) : (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1819 const int srcOffset = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1820 const int dstOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1821 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset;
1822 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset;
1823
1824 deMemcpy(dstPtr, srcPtr, size * numComps);
1825 }
1826 }
1827 }
1828 else
1829 throw tcu::InternalError("Unsupported type");
1830 }
1831
uploadInputBuffer(const void * const * inputPtrs,int numValues,bool packFloat16Bit)1832 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues, bool packFloat16Bit)
1833 {
1834 const VkDevice vkDevice = m_context.getDevice();
1835 const DeviceInterface& vk = m_context.getDeviceInterface();
1836
1837 const deUint32 inputStride = getLayoutStride(m_inputLayout);
1838 const int inputBufferSize = inputStride * numValues;
1839
1840 if (inputBufferSize == 0)
1841 return; // No inputs
1842
1843 DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1844 for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1845 {
1846 const glu::VarType& varType = m_shaderSpec.inputs[inputNdx].varType;
1847 const VarLayout& layout = m_inputLayout[inputNdx];
1848
1849 copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr(), packFloat16Bit);
1850 }
1851
1852 flushAlloc(vk, vkDevice, *m_inputAlloc);
1853 }
1854
readOutputBuffer(void * const * outputPtrs,int numValues)1855 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1856 {
1857 const VkDevice vkDevice = m_context.getDevice();
1858 const DeviceInterface& vk = m_context.getDeviceInterface();
1859
1860 DE_ASSERT(numValues > 0); // At least some outputs are required.
1861
1862 invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1863
1864 DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1865 for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1866 {
1867 const glu::VarType& varType = m_shaderSpec.outputs[outputNdx].varType;
1868 const VarLayout& layout = m_outputLayout[outputNdx];
1869
1870 copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1871 }
1872 }
1873
initBuffers(int numValues)1874 void BufferIoExecutor::initBuffers (int numValues)
1875 {
1876 const deUint32 inputStride = getLayoutStride(m_inputLayout);
1877 const deUint32 outputStride = getLayoutStride(m_outputLayout);
1878 // Avoid creating zero-sized buffer/memory
1879 const size_t inputBufferSize = de::max(numValues * inputStride, 1u);
1880 const size_t outputBufferSize = numValues * outputStride;
1881
1882 // Upload data to buffer
1883 const VkDevice vkDevice = m_context.getDevice();
1884 const DeviceInterface& vk = m_context.getDeviceInterface();
1885 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1886 Allocator& memAlloc = m_context.getDefaultAllocator();
1887
1888 const VkBufferCreateInfo inputBufferParams =
1889 {
1890 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1891 DE_NULL, // const void* pNext;
1892 0u, // VkBufferCreateFlags flags;
1893 inputBufferSize, // VkDeviceSize size;
1894 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
1895 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1896 1u, // deUint32 queueFamilyCount;
1897 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
1898 };
1899
1900 m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
1901 m_inputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
1902
1903 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
1904
1905 const VkBufferCreateInfo outputBufferParams =
1906 {
1907 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1908 DE_NULL, // const void* pNext;
1909 0u, // VkBufferCreateFlags flags;
1910 outputBufferSize, // VkDeviceSize size;
1911 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
1912 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1913 1u, // deUint32 queueFamilyCount;
1914 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
1915 };
1916
1917 m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
1918 m_outputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
1919
1920 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
1921 }
1922
1923 // ComputeShaderExecutor
1924
1925 class ComputeShaderExecutor : public BufferIoExecutor
1926 {
1927 public:
1928 ComputeShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1929 virtual ~ComputeShaderExecutor (void);
1930
1931 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1932
1933 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
1934
1935 protected:
1936 static std::string generateComputeShader (const ShaderSpec& spec);
1937
1938 private:
1939 const VkDescriptorSetLayout m_extraResourcesLayout;
1940 };
1941
ComputeShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1942 ComputeShaderExecutor::ComputeShaderExecutor(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1943 : BufferIoExecutor (context, shaderSpec)
1944 , m_extraResourcesLayout (extraResourcesLayout)
1945 {
1946 }
1947
~ComputeShaderExecutor(void)1948 ComputeShaderExecutor::~ComputeShaderExecutor (void)
1949 {
1950 }
1951
getTypeSpirv(const glu::DataType type,const bool packFloat16Bit=false)1952 std::string getTypeSpirv(const glu::DataType type, const bool packFloat16Bit = false)
1953 {
1954 switch(type)
1955 {
1956 case glu::TYPE_FLOAT16:
1957 return "%f16";
1958 case glu::TYPE_FLOAT16_VEC2:
1959 return "%v2f16";
1960 case glu::TYPE_FLOAT16_VEC3:
1961 return "%v3f16";
1962 case glu::TYPE_FLOAT16_VEC4:
1963 return "%v4f16";
1964 case glu::TYPE_FLOAT:
1965 return packFloat16Bit ? "%u32" : "%f32"; // f16 values will be bitcast from ui32.
1966 case glu::TYPE_FLOAT_VEC2:
1967 return packFloat16Bit ? "%v2u32" : "%v2f32"; // f16 values will be bitcast from ui32.
1968 case glu::TYPE_FLOAT_VEC3:
1969 return packFloat16Bit ? "%v3u32" : "%v3f32"; // f16 values will be bitcast from ui32.
1970 case glu::TYPE_FLOAT_VEC4:
1971 return packFloat16Bit ? "%v4u32" : "%v4f32"; // f16 values will be bitcast from ui32.
1972 case glu::TYPE_INT:
1973 return "%i32";
1974 case glu::TYPE_INT_VEC2:
1975 return "%v2i32";
1976 case glu::TYPE_INT_VEC3:
1977 return "%v3i32";
1978 case glu::TYPE_INT_VEC4:
1979 return "%v4i32";
1980 case glu::TYPE_DOUBLE:
1981 return "%f64";
1982 case glu::TYPE_DOUBLE_VEC2:
1983 return "%v2f64";
1984 case glu::TYPE_DOUBLE_VEC3:
1985 return "%v3f64";
1986 case glu::TYPE_DOUBLE_VEC4:
1987 return "%v4f64";
1988 default:
1989 DE_ASSERT(0);
1990 return "";
1991 break;
1992 }
1993 }
1994
moveBitOperation(std::string variableName,const int operationNdx)1995 std::string moveBitOperation (std::string variableName, const int operationNdx)
1996 {
1997 std::ostringstream src;
1998 src << "\n"
1999 << "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
2000 << "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_"<< operationNdx <<" %c_i32_1\n"
2001 << "OpStore " << variableName << " %move1_" << operationNdx << "\n";
2002 return src.str();
2003 }
2004
scalarComparison(const std::string operation,const int operationNdx,const glu::DataType type,const std::string & outputType,const int scalarSize)2005 std::string scalarComparison(const std::string operation, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
2006 {
2007 std::ostringstream src;
2008 std::string boolType;
2009
2010 switch (type)
2011 {
2012 case glu::TYPE_FLOAT16:
2013 case glu::TYPE_FLOAT:
2014 case glu::TYPE_DOUBLE:
2015 src << "\n"
2016 << "%operation_result_" << operationNdx << " = " << operation << " %bool %in0_val %in1_val\n"
2017 << "OpSelectionMerge %IF_" << operationNdx << " None\n"
2018 << "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_" << operationNdx << "\n"
2019 << "%label_IF_" << operationNdx << " = OpLabel\n"
2020 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
2021 << "%out_val_" << operationNdx << " = OpLoad %i32 %out0\n"
2022 << "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_" << operationNdx << "\n"
2023 << "OpStore %out0 %add_if_" << operationNdx << "\n"
2024 << "OpBranch %IF_" << operationNdx << "\n"
2025 << "%IF_" << operationNdx << " = OpLabel\n";
2026 return src.str();
2027 case glu::TYPE_FLOAT16_VEC2:
2028 case glu::TYPE_FLOAT_VEC2:
2029 case glu::TYPE_DOUBLE_VEC2:
2030 boolType = "%v2bool";
2031 break;
2032 case glu::TYPE_FLOAT16_VEC3:
2033 case glu::TYPE_FLOAT_VEC3:
2034 case glu::TYPE_DOUBLE_VEC3:
2035 boolType = "%v3bool";
2036 break;
2037 case glu::TYPE_FLOAT16_VEC4:
2038 case glu::TYPE_FLOAT_VEC4:
2039 case glu::TYPE_DOUBLE_VEC4:
2040 boolType = "%v4bool";
2041 break;
2042 default:
2043 DE_ASSERT(0);
2044 return "";
2045 break;
2046 }
2047
2048 src << "\n"
2049 << "%operation_result_" << operationNdx << " = " << operation << " " << boolType << " %in0_val %in1_val\n"
2050 << "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2051 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2052
2053 src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2054 for(int ndx = 0; ndx < scalarSize; ++ndx)
2055 src << " %operation_val_" << operationNdx;
2056 src << "\n";
2057
2058 src << "%toAdd" << operationNdx << " = OpIMul "<< outputType << " %ivec_result_" << operationNdx << " %operation_vec_" << operationNdx <<"\n"
2059 << "%out_val_" << operationNdx << " = OpLoad "<< outputType << " %out0\n"
2060
2061 << "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd" << operationNdx << "\n"
2062 << "OpStore %out0 %add_if_" << operationNdx << "\n";
2063
2064 return src.str();
2065 }
2066
generateSpirv(const ShaderSpec & spec,const bool are16Bit,const bool are64Bit,const bool isMediump)2067 std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const bool are64Bit, const bool isMediump)
2068 {
2069 static const std::string COMPARE_OPERATIONS[] =
2070 {
2071 "OpFOrdEqual",
2072 "OpFOrdGreaterThan",
2073 "OpFOrdLessThan",
2074 "OpFOrdGreaterThanEqual",
2075 "OpFOrdLessThanEqual",
2076 "OpFUnordEqual",
2077 "OpFUnordGreaterThan",
2078 "OpFUnordLessThan",
2079 "OpFUnordGreaterThanEqual",
2080 "OpFUnordLessThanEqual"
2081 };
2082
2083 int moveBitNdx = 0;
2084 vector<std::string> inputTypes;
2085 vector<std::string> outputTypes;
2086 const std::string packType = spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2087
2088 vector<bool> floatResult;
2089 for (const auto& symbol : spec.outputs)
2090 floatResult.push_back(glu::isDataTypeFloatType(symbol.varType.getBasicType()));
2091
2092 const bool anyFloatResult = std::any_of(begin(floatResult), end(floatResult), [](bool b) { return b; });
2093
2094 vector<bool> packFloatRes;
2095 for (const auto& floatRes : floatResult)
2096 packFloatRes.push_back(floatRes && spec.packFloat16Bit);
2097
2098 const bool useF32Types = (!are16Bit && !are64Bit);
2099 const bool useF64Types = are64Bit;
2100 const bool useF16Types = (spec.packFloat16Bit || are16Bit);
2101
2102 for (const auto& symbol : spec.inputs)
2103 inputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2104
2105 for (const auto& symbol : spec.outputs)
2106 outputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2107
2108 DE_ASSERT(!inputTypes.empty());
2109 DE_ASSERT(!outputTypes.empty());
2110
2111 // Assert input and output types match the expected operations.
2112 switch (spec.spirvCase)
2113 {
2114 case SPIRV_CASETYPE_COMPARE:
2115 case SPIRV_CASETYPE_FREM:
2116 DE_ASSERT(inputTypes.size() == 2);
2117 DE_ASSERT(outputTypes.size() == 1);
2118 break;
2119 case SPIRV_CASETYPE_MODFSTRUCT:
2120 case SPIRV_CASETYPE_FREXPSTRUCT:
2121 DE_ASSERT(inputTypes.size() == 1);
2122 DE_ASSERT(outputTypes.size() == 2);
2123 break;
2124 default:
2125 DE_ASSERT(false);
2126 break;
2127 };
2128
2129 std::ostringstream src;
2130 src << "; SPIR-V\n"
2131 "; Version: 1.0\n"
2132 "; Generator: Khronos Glslang Reference Front End; 4\n"
2133 "; Bound: 114\n"
2134 "; Schema: 0\n"
2135 "OpCapability Shader\n";
2136
2137 if (useF16Types)
2138 src << "OpCapability Float16\n";
2139
2140 if (are16Bit)
2141 src << "OpCapability StorageBuffer16BitAccess\n"
2142 "OpCapability UniformAndStorageBuffer16BitAccess\n";
2143
2144 if (useF64Types)
2145 src << "OpCapability Float64\n";
2146
2147 if (are16Bit)
2148 src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2149
2150 src << "%glslstd450 = OpExtInstImport \"GLSL.std.450\"\n"
2151 "OpMemoryModel Logical GLSL450\n"
2152 "OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2153 "OpExecutionMode %BP_main LocalSize 1 1 1\n"
2154 "OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2155 "OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2156
2157 // Input offsets and stride.
2158 {
2159 int offset = 0;
2160 int ndx = 0;
2161 int largest = 0;
2162 for (const auto& symbol : spec.inputs)
2163 {
2164 const int scalarSize = symbol.varType.getScalarSize();
2165 const int memberSize = (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeDoubleType(symbol.varType.getBasicType()) ? (int)sizeof(deUint64) : (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
2166 const int extraMemberBytes = (offset % memberSize);
2167
2168 offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2169 src << "OpMemberDecorate %SSB0_IN "<< ndx <<" Offset " << offset << "\n";
2170 ++ndx;
2171
2172 if (memberSize > largest)
2173 largest = memberSize;
2174
2175 offset += memberSize;
2176 }
2177 DE_ASSERT(largest > 0);
2178 const int extraBytes = (offset % largest);
2179 const int stride = offset + (extraBytes == 0 ? 0 : (largest - extraBytes));
2180 src << "OpDecorate %up_SSB0_IN ArrayStride "<< stride << "\n";
2181 }
2182
2183 src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2184 "OpDecorate %ssboIN BufferBlock\n"
2185 "OpDecorate %ssbo_src DescriptorSet 0\n"
2186 "OpDecorate %ssbo_src Binding 0\n"
2187 "\n";
2188
2189 if (isMediump)
2190 {
2191 for (size_t i = 0; i < inputTypes.size(); ++i)
2192 {
2193 src <<
2194 "OpMemberDecorate %SSB0_IN " << i << " RelaxedPrecision\n"
2195 "OpDecorate %in" << i << " RelaxedPrecision\n"
2196 "OpDecorate %src_val_0_" << i << " RelaxedPrecision\n"
2197 "OpDecorate %in" << i << "_val RelaxedPrecision\n"
2198 ;
2199 }
2200
2201 if (anyFloatResult)
2202 {
2203 switch (spec.spirvCase)
2204 {
2205 case SPIRV_CASETYPE_FREM:
2206 src << "OpDecorate %frem_result RelaxedPrecision\n";
2207 break;
2208 case SPIRV_CASETYPE_MODFSTRUCT:
2209 src << "OpDecorate %modfstruct_result RelaxedPrecision\n";
2210 break;
2211 case SPIRV_CASETYPE_FREXPSTRUCT:
2212 src << "OpDecorate %frexpstruct_result RelaxedPrecision\n";
2213 break;
2214 default:
2215 DE_ASSERT(false);
2216 break;
2217 }
2218
2219 for (size_t i = 0; i < outputTypes.size(); ++i)
2220 {
2221 src << "OpMemberDecorate %SSB0_OUT " << i << " RelaxedPrecision\n";
2222 src << "OpDecorate %out_val_final_" << i << " RelaxedPrecision\n";
2223 src << "OpDecorate %out" << i << " RelaxedPrecision\n";
2224 }
2225 }
2226 }
2227
2228 // Output offsets and stride.
2229 {
2230 int offset = 0;
2231 int ndx = 0;
2232 int largest = 0;
2233 for (const auto& symbol : spec.outputs)
2234 {
2235 const int scalarSize = symbol.varType.getScalarSize();
2236 const int memberSize = (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeDoubleType(symbol.varType.getBasicType()) ? (int)sizeof(deUint64) : (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
2237 const int extraMemberBytes = (offset % memberSize);
2238
2239 offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2240 src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2241 ++ndx;
2242
2243 if (memberSize > largest)
2244 largest = memberSize;
2245
2246 offset += memberSize;
2247 }
2248 DE_ASSERT(largest > 0);
2249 const int extraBytes = (offset % largest);
2250 const int stride = offset + ((extraBytes == 0) ? 0 : (largest - extraBytes));
2251 src << "OpDecorate %up_SSB0_OUT ArrayStride " << stride << "\n";
2252 }
2253
2254 src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2255 "OpDecorate %ssboOUT BufferBlock\n"
2256 "OpDecorate %ssbo_dst DescriptorSet 0\n"
2257 "OpDecorate %ssbo_dst Binding 1\n"
2258 "\n"
2259 "%void = OpTypeVoid\n"
2260 "%bool = OpTypeBool\n"
2261 "%v2bool = OpTypeVector %bool 2\n"
2262 "%v3bool = OpTypeVector %bool 3\n"
2263 "%v4bool = OpTypeVector %bool 4\n"
2264 "%u32 = OpTypeInt 32 0\n";
2265
2266 if (useF32Types)
2267 src << "%f32 = OpTypeFloat 32\n"
2268 "%v2f32 = OpTypeVector %f32 2\n"
2269 "%v3f32 = OpTypeVector %f32 3\n"
2270 "%v4f32 = OpTypeVector %f32 4\n";
2271
2272 if (useF64Types)
2273 src << "%f64 = OpTypeFloat 64\n"
2274 "%v2f64 = OpTypeVector %f64 2\n"
2275 "%v3f64 = OpTypeVector %f64 3\n"
2276 "%v4f64 = OpTypeVector %f64 4\n";
2277
2278 if (useF16Types)
2279 src << "%f16 = OpTypeFloat 16\n"
2280 "%v2f16 = OpTypeVector %f16 2\n"
2281 "%v3f16 = OpTypeVector %f16 3\n"
2282 "%v4f16 = OpTypeVector %f16 4\n";
2283
2284 src << "%i32 = OpTypeInt 32 1\n"
2285 "%v2i32 = OpTypeVector %i32 2\n"
2286 "%v3i32 = OpTypeVector %i32 3\n"
2287 "%v4i32 = OpTypeVector %i32 4\n"
2288 "%v2u32 = OpTypeVector %u32 2\n"
2289 "%v3u32 = OpTypeVector %u32 3\n"
2290 "%v4u32 = OpTypeVector %u32 4\n"
2291 "\n"
2292 "%ip_u32 = OpTypePointer Input %u32\n"
2293 "%ip_v3u32 = OpTypePointer Input %v3u32\n"
2294 "%up_float = OpTypePointer Uniform " << inputTypes[0] << "\n"
2295 "\n"
2296 "%fp_operation = OpTypePointer Function %i32\n"
2297 "%voidf = OpTypeFunction %void\n"
2298 "%fp_u32 = OpTypePointer Function %u32\n"
2299 "%fp_it1 = OpTypePointer Function " << inputTypes[0] << "\n"
2300 ;
2301
2302 for (size_t i = 0; i < outputTypes.size(); ++i)
2303 {
2304 src << "%fp_out_" << i << " = OpTypePointer Function " << outputTypes[i] << "\n"
2305 << "%up_out_" << i << " = OpTypePointer Uniform " << outputTypes[i] << "\n";
2306 }
2307
2308 if (spec.packFloat16Bit)
2309 src << "%fp_f16 = OpTypePointer Function " << packType << "\n";
2310
2311 src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2312 "%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2313 "\n"
2314 "%c_u32_0 = OpConstant %u32 0\n"
2315 "%c_u32_1 = OpConstant %u32 1\n"
2316 "%c_u32_2 = OpConstant %u32 2\n"
2317 "%c_i32_0 = OpConstant %i32 0\n"
2318 "%c_i32_1 = OpConstant %i32 1\n"
2319 "\n";
2320
2321 if (useF32Types)
2322 src <<
2323 "%c_f32_0 = OpConstant %f32 0\n"
2324 "%c_f32_1 = OpConstant %f32 1\n"
2325 ;
2326
2327 if (useF16Types)
2328 src <<
2329 "%c_f16_0 = OpConstant %f16 0\n"
2330 "%c_f16_1 = OpConstant %f16 1\n"
2331 "%c_f16_minus1 = OpConstant %f16 -0x1p+0"
2332 ;
2333
2334 if (useF64Types)
2335 src <<
2336 "%c_f64_0 = OpConstant %f64 0\n"
2337 "%c_f64_1 = OpConstant %f64 1\n"
2338 ;
2339
2340 src << "\n"
2341 "%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2342 "%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2343 "%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2344 "%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2345 "%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2346 "%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2347 "\n";
2348
2349 if (useF32Types)
2350 src <<
2351 "%c_v2f32_0 = OpConstantComposite %v2f32 %c_f32_0 %c_f32_0\n"
2352 "%c_v2f32_1 = OpConstantComposite %v2f32 %c_f32_1 %c_f32_1\n"
2353 "%c_v3f32_0 = OpConstantComposite %v3f32 %c_f32_0 %c_f32_0 %c_f32_0\n"
2354 "%c_v3f32_1 = OpConstantComposite %v3f32 %c_f32_1 %c_f32_1 %c_f32_1\n"
2355 "%c_v4f32_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_0\n"
2356 "%c_v4f32_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
2357 ;
2358
2359 if (useF16Types)
2360 src <<
2361 "%c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
2362 "%c_v2f16_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
2363 "%c_v3f16_0 = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0 %c_f16_0\n"
2364 "%c_v3f16_1 = OpConstantComposite %v3f16 %c_f16_1 %c_f16_1 %c_f16_1\n"
2365 "%c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
2366 "%c_v4f16_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
2367 ;
2368
2369 if (useF64Types)
2370 src <<
2371 "%c_v2f64_0 = OpConstantComposite %v2f64 %c_f64_0 %c_f64_0\n"
2372 "%c_v2f64_1 = OpConstantComposite %v2f64 %c_f64_1 %c_f64_1\n"
2373 "%c_v3f64_0 = OpConstantComposite %v3f64 %c_f64_0 %c_f64_0 %c_f64_0\n"
2374 "%c_v3f64_1 = OpConstantComposite %v3f64 %c_f64_1 %c_f64_1 %c_f64_1\n"
2375 "%c_v4f64_0 = OpConstantComposite %v4f64 %c_f64_0 %c_f64_0 %c_f64_0 %c_f64_0\n"
2376 "%c_v4f64_1 = OpConstantComposite %v4f64 %c_f64_1 %c_f64_1 %c_f64_1 %c_f64_1\n"
2377 "\n";
2378
2379 // Input struct.
2380 {
2381 src << "%SSB0_IN = OpTypeStruct";
2382 for (const auto& t : inputTypes)
2383 src << " " << t;
2384 src << "\n";
2385 }
2386
2387 src <<
2388 "%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2389 "%ssboIN = OpTypeStruct %up_SSB0_IN\n"
2390 "%up_ssboIN = OpTypePointer Uniform %ssboIN\n"
2391 "%ssbo_src = OpVariable %up_ssboIN Uniform\n"
2392 "\n";
2393
2394 // Output struct.
2395 {
2396 src << "%SSB0_OUT = OpTypeStruct";
2397 for (const auto& t : outputTypes)
2398 src << " " << t;
2399 src << "\n";
2400 }
2401
2402 std::string modfStructMemberType;
2403 std::string frexpStructFirstMemberType;
2404 if (spec.spirvCase == SPIRV_CASETYPE_MODFSTRUCT)
2405 {
2406 modfStructMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2407 src << "%modfstruct_ret_t = OpTypeStruct " << modfStructMemberType << " " << modfStructMemberType << "\n";
2408 }
2409 else if (spec.spirvCase == SPIRV_CASETYPE_FREXPSTRUCT)
2410 {
2411 frexpStructFirstMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2412 src << "%frexpstruct_ret_t = OpTypeStruct " << frexpStructFirstMemberType << " " << outputTypes[1] << "\n";
2413 }
2414
2415 src <<
2416 "%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2417 "%ssboOUT = OpTypeStruct %up_SSB0_OUT\n"
2418 "%up_ssboOUT = OpTypePointer Uniform %ssboOUT\n"
2419 "%ssbo_dst = OpVariable %up_ssboOUT Uniform\n"
2420 "\n"
2421 "%BP_main = OpFunction %void None %voidf\n"
2422 "%BP_label = OpLabel\n"
2423 "%invocationNdx = OpVariable %fp_u32 Function\n";
2424
2425 // Note: here we are supposing all inputs have the same type.
2426 for (size_t i = 0; i < inputTypes.size(); ++i)
2427 src << "%in" << i << " = OpVariable " << (spec.packFloat16Bit ? "%fp_f16" : "%fp_it1") << " Function\n";
2428
2429 for (size_t i = 0; i < outputTypes.size(); ++i)
2430 src << "%out" << i << " = OpVariable " << (packFloatRes[i] ? std::string("%fp_f16") : std::string("%fp_out_") + de::toString(i)) << " Function\n";
2431
2432 src << "%operation = OpVariable %fp_operation Function\n"
2433 "%BP_id_0_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2434 "%BP_id_1_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2435 "%BP_id_2_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2436 "%BP_num_0_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2437 "%BP_num_1_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2438 "%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2439 "%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2440 "%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2441 "%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2442 "%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2443 "\n"
2444 "%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2445 "%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2446 "%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2447 "%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2448 "%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2449 "OpStore %invocationNdx %add_2\n"
2450 "%invocationNdx_val = OpLoad %u32 %invocationNdx\n";
2451
2452 // Load input values.
2453 for (size_t inputNdx = 0; inputNdx < inputTypes.size(); ++inputNdx)
2454 {
2455 src << "\n"
2456 << "%src_ptr_0_" << inputNdx << " = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_" << inputNdx << "\n"
2457 << "%src_val_0_" << inputNdx << " = OpLoad " << inputTypes[inputNdx] << " %src_ptr_0_" << inputNdx << "\n";
2458
2459 if (spec.packFloat16Bit)
2460 {
2461 if (spec.inputs[inputNdx].varType.getScalarSize() > 1)
2462 {
2463 // Extract the val<inputNdx> u32 input channels into individual f16 values.
2464 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2465 {
2466 src << "%src_val_0_" << inputNdx << "_" << i << " = OpCompositeExtract %u32 %src_val_0_" << inputNdx << " " << i << "\n"
2467 "%val_v2f16_0_" << inputNdx << "_" << i << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "_" << i << "\n"
2468 "%val_f16_0_" << inputNdx << "_" << i << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << "_" << i << " 0\n";
2469 }
2470
2471 // Construct the input vector.
2472 src << "%val_f16_0_" << inputNdx << " = OpCompositeConstruct " << packType;
2473 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2474 {
2475 src << " %val_f16_0_" << inputNdx << "_" << i;
2476 }
2477
2478 src << "\n";
2479 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2480 }
2481 else
2482 {
2483 src << "%val_v2f16_0_" << inputNdx << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "\n"
2484 "%val_f16_0_" << inputNdx << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << " 0\n";
2485
2486 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2487 }
2488 }
2489 else
2490 src << "OpStore %in" << inputNdx << " %src_val_0_" << inputNdx << "\n";
2491
2492 src << "%in" << inputNdx << "_val = OpLoad " << (spec.packFloat16Bit ? packType : inputTypes[inputNdx]) << " %in" << inputNdx << "\n";
2493 }
2494
2495 src << "\n"
2496 "OpStore %operation %c_i32_1\n";
2497
2498 // Fill output values with dummy data.
2499 for (size_t i = 0; i < outputTypes.size(); ++i)
2500 src << "OpStore %out" << i << " %c_" << (packFloatRes[i] ? &packType[1] : &outputTypes[i][1]) << "_0\n";
2501
2502 src << "\n";
2503
2504 // Run operation.
2505 switch (spec.spirvCase)
2506 {
2507 case SPIRV_CASETYPE_COMPARE:
2508 for (int operationNdx = 0; operationNdx < DE_LENGTH_OF_ARRAY(COMPARE_OPERATIONS); ++operationNdx)
2509 {
2510 src << scalarComparison (COMPARE_OPERATIONS[operationNdx], operationNdx,
2511 spec.inputs[0].varType.getBasicType(),
2512 outputTypes[0],
2513 spec.outputs[0].varType.getScalarSize());
2514 src << moveBitOperation("%operation", moveBitNdx);
2515 ++moveBitNdx;
2516 }
2517 break;
2518 case SPIRV_CASETYPE_FREM:
2519 src << "%frem_result = OpFRem " << (packFloatRes[0] ? packType : outputTypes[0]) << " %in0_val %in1_val\n"
2520 << "OpStore %out0 %frem_result\n";
2521 break;
2522 case SPIRV_CASETYPE_MODFSTRUCT:
2523 src << "%modfstruct_result = OpExtInst %modfstruct_ret_t %glslstd450 ModfStruct %in0_val\n"
2524 << "%modfstruct_result_0 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 0\n"
2525 << "%modfstruct_result_1 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 1\n"
2526 << "OpStore %out0 %modfstruct_result_0\n"
2527 << "OpStore %out1 %modfstruct_result_1\n";
2528 break;
2529 case SPIRV_CASETYPE_FREXPSTRUCT:
2530 src << "%frexpstruct_result = OpExtInst %frexpstruct_ret_t %glslstd450 FrexpStruct %in0_val\n"
2531 << "%frexpstruct_result_0 = OpCompositeExtract " << frexpStructFirstMemberType << " %frexpstruct_result 0\n"
2532 << "%frexpstruct_result_1 = OpCompositeExtract " << outputTypes[1] << " %frexpstruct_result 1\n"
2533 << "OpStore %out0 %frexpstruct_result_0\n"
2534 << "OpStore %out1 %frexpstruct_result_1\n";
2535 break;
2536 default:
2537 DE_ASSERT(false);
2538 break;
2539 }
2540
2541 for (size_t outputNdx = 0; outputNdx < outputTypes.size(); ++outputNdx)
2542 {
2543 src << "\n"
2544 "%out_val_final_" << outputNdx << " = OpLoad " << (packFloatRes[outputNdx] ? packType : outputTypes[outputNdx]) << " %out" << outputNdx << "\n"
2545 "%ssbo_dst_ptr_" << outputNdx << " = OpAccessChain %up_out_" << outputNdx << " %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_" << outputNdx << "\n";
2546
2547 if (packFloatRes[outputNdx])
2548 {
2549 if (spec.outputs[outputNdx].varType.getScalarSize() > 1)
2550 {
2551 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2552 {
2553 src << "%out_val_final_" << outputNdx << "_" << i << " = OpCompositeExtract %f16 %out_val_final_" << outputNdx << " " << i << "\n";
2554 src << "%out_composite_" << outputNdx << "_" << i << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << "_" << i << " %c_f16_minus1\n";
2555 src << "%u32_val_" << outputNdx << "_" << i << " = OpBitcast %u32 %out_composite_" << outputNdx << "_" << i << "\n";
2556 }
2557
2558 src << "%u32_final_val_" << outputNdx << " = OpCompositeConstruct " << outputTypes[outputNdx];
2559 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2560 src << " %u32_val_" << outputNdx << "_" << i;
2561 src << "\n";
2562 src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %u32_final_val_" << outputNdx << "\n";
2563 }
2564 else
2565 {
2566 src <<
2567 "%out_composite_" << outputNdx << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << " %c_f16_minus1\n"
2568 "%out_result_" << outputNdx << " = OpBitcast " << outputTypes[outputNdx] << " %out_composite_" << outputNdx << "\n"
2569 "OpStore %ssbo_dst_ptr_" << outputNdx << " %out_result_" << outputNdx << "\n";
2570 }
2571 }
2572 else
2573 {
2574 src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %out_val_final_" << outputNdx << "\n";
2575 }
2576 }
2577
2578 src << "\n"
2579 "OpReturn\n"
2580 "OpFunctionEnd\n";
2581
2582 return src.str();
2583 }
2584
2585
generateComputeShader(const ShaderSpec & spec)2586 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
2587 {
2588 if (spec.spirvCase != SPIRV_CASETYPE_NONE)
2589 {
2590 bool are16Bit = false;
2591 bool are64Bit = false;
2592 bool isMediump = false;
2593 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2594 {
2595 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2596 are16Bit = true;
2597
2598 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
2599 are64Bit = true;
2600
2601 if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2602 isMediump = true;
2603
2604 if (isMediump && are16Bit)
2605 break;
2606 }
2607
2608 return generateSpirv(spec, are16Bit, are64Bit, isMediump);
2609 }
2610 else
2611 {
2612 std::ostringstream src;
2613 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2614
2615 if (!spec.globalDeclarations.empty())
2616 src << spec.globalDeclarations << "\n";
2617
2618 src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2619 << "\n";
2620
2621 declareBufferBlocks(src, spec);
2622
2623 src << "void main (void)\n"
2624 << "{\n"
2625 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2626 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2627
2628 generateExecBufferIo(src, spec, "invocationNdx");
2629
2630 src << "}\n";
2631
2632 return src.str();
2633 }
2634 }
2635
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2636 void ComputeShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2637 {
2638 if (shaderSpec.spirvCase != SPIRV_CASETYPE_NONE)
2639 programCollection.spirvAsmSources.add("compute") << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3) << generateComputeShader(shaderSpec);
2640 else
2641 programCollection.glslSources.add("compute") << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2642 }
2643
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2644 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2645 {
2646 const VkDevice vkDevice = m_context.getDevice();
2647 const DeviceInterface& vk = m_context.getDeviceInterface();
2648 const VkQueue queue = m_context.getUniversalQueue();
2649 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2650
2651 DescriptorPoolBuilder descriptorPoolBuilder;
2652 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
2653
2654 Move<VkShaderModule> computeShaderModule;
2655 Move<VkPipeline> computePipeline;
2656 Move<VkPipelineLayout> pipelineLayout;
2657 Move<VkCommandPool> cmdPool;
2658 Move<VkDescriptorPool> descriptorPool;
2659 Move<VkDescriptorSetLayout> descriptorSetLayout;
2660 Move<VkDescriptorSet> descriptorSet;
2661 const deUint32 numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
2662
2663 DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2664
2665 initBuffers(numValues);
2666
2667 // Setup input buffer & copy data
2668 // For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
2669 // storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
2670 // the shader.
2671 uploadInputBuffer(inputs, numValues, m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
2672
2673 // Create command pool
2674 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2675
2676 // Create command buffer
2677
2678 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2679 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2680 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2681 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2682
2683 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2684 descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2685
2686 const VkDescriptorSetAllocateInfo allocInfo =
2687 {
2688 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2689 DE_NULL,
2690 *descriptorPool,
2691 1u,
2692 &*descriptorSetLayout
2693 };
2694
2695 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2696
2697 // Create pipeline layout
2698 {
2699 const VkDescriptorSetLayout descriptorSetLayouts[] =
2700 {
2701 *descriptorSetLayout,
2702 m_extraResourcesLayout
2703 };
2704 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
2705 {
2706 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2707 DE_NULL, // const void* pNext;
2708 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
2709 numDescriptorSets, // deUint32 CdescriptorSetCount;
2710 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
2711 0u, // deUint32 pushConstantRangeCount;
2712 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
2713 };
2714
2715 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2716 }
2717
2718 // Create shaders
2719 {
2720 computeShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2721 }
2722
2723 // create pipeline
2724 {
2725 const VkPipelineShaderStageCreateInfo shaderStageParams[1] =
2726 {
2727 {
2728 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
2729 DE_NULL, // const void* pNext;
2730 (VkPipelineShaderStageCreateFlags)0u, // VkPipelineShaderStageCreateFlags flags;
2731 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagsBit stage;
2732 *computeShaderModule, // VkShaderModule shader;
2733 "main", // const char* pName;
2734 DE_NULL // const VkSpecializationInfo* pSpecializationInfo;
2735 }
2736 };
2737
2738 const VkComputePipelineCreateInfo computePipelineParams =
2739 {
2740 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
2741 DE_NULL, // const void* pNext;
2742 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
2743 *shaderStageParams, // VkPipelineShaderStageCreateInfo cs;
2744 *pipelineLayout, // VkPipelineLayout layout;
2745 0u, // VkPipeline basePipelineHandle;
2746 0u, // int32_t basePipelineIndex;
2747 };
2748
2749 computePipeline = createComputePipeline(vk, vkDevice, DE_NULL, &computePipelineParams);
2750 }
2751
2752 const int maxValuesPerInvocation = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2753 int curOffset = 0;
2754 const deUint32 inputStride = getInputStride();
2755 const deUint32 outputStride = getOutputStride();
2756
2757 while (curOffset < numValues)
2758 {
2759 Move<VkCommandBuffer> cmdBuffer;
2760 const int numToExec = de::min(maxValuesPerInvocation, numValues-curOffset);
2761
2762 // Update descriptors
2763 {
2764 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2765
2766 const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2767 {
2768 *m_outputBuffer, // VkBuffer buffer;
2769 curOffset * outputStride, // VkDeviceSize offset;
2770 numToExec * outputStride // VkDeviceSize range;
2771 };
2772
2773 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2774
2775 if (inputStride)
2776 {
2777 const VkDescriptorBufferInfo inputDescriptorBufferInfo =
2778 {
2779 *m_inputBuffer, // VkBuffer buffer;
2780 curOffset * inputStride, // VkDeviceSize offset;
2781 numToExec * inputStride // VkDeviceSize range;
2782 };
2783
2784 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2785 }
2786
2787 descriptorSetUpdateBuilder.update(vk, vkDevice);
2788 }
2789
2790 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2791 beginCommandBuffer(vk, *cmdBuffer);
2792 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2793
2794 {
2795 const VkDescriptorSet descriptorSets[] = { *descriptorSet, extraResources };
2796 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2797 }
2798
2799 vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2800
2801 // Insert a barrier so data written by the shader is available to the host
2802 {
2803 const VkBufferMemoryBarrier bufferBarrier =
2804 {
2805 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
2806 DE_NULL, // const void* pNext;
2807 VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
2808 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
2809 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
2810 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
2811 *m_outputBuffer, // VkBuffer buffer;
2812 0, // VkDeviceSize offset;
2813 VK_WHOLE_SIZE, // VkDeviceSize size;
2814 };
2815
2816 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
2817 0, (const VkMemoryBarrier*)DE_NULL,
2818 1, &bufferBarrier,
2819 0, (const VkImageMemoryBarrier*)DE_NULL);
2820 }
2821
2822 endCommandBuffer(vk, *cmdBuffer);
2823
2824 curOffset += numToExec;
2825
2826 // Execute
2827 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2828 }
2829
2830 // Read back data
2831 readOutputBuffer(outputs, numValues);
2832 }
2833
2834 // Tessellation utils
2835
generateVertexShaderForTess(void)2836 static std::string generateVertexShaderForTess (void)
2837 {
2838 std::ostringstream src;
2839 src << "#version 450\n"
2840 << "void main (void)\n{\n"
2841 << " gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
2842 << "}\n";
2843
2844 return src.str();
2845 }
2846
2847 class TessellationExecutor : public BufferIoExecutor
2848 {
2849 public:
2850 TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2851 virtual ~TessellationExecutor (void);
2852
2853 void renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources);
2854
2855 private:
2856 const VkDescriptorSetLayout m_extraResourcesLayout;
2857 };
2858
TessellationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2859 TessellationExecutor::TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2860 : BufferIoExecutor (context, shaderSpec)
2861 , m_extraResourcesLayout (extraResourcesLayout)
2862 {
2863 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
2864
2865 if (!features.tessellationShader)
2866 TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
2867 }
2868
~TessellationExecutor(void)2869 TessellationExecutor::~TessellationExecutor (void)
2870 {
2871 }
2872
renderTess(deUint32 numValues,deUint32 vertexCount,deUint32 patchControlPoints,VkDescriptorSet extraResources)2873 void TessellationExecutor::renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources)
2874 {
2875 const size_t inputBufferSize = numValues * getInputStride();
2876 const VkDevice vkDevice = m_context.getDevice();
2877 const DeviceInterface& vk = m_context.getDeviceInterface();
2878 const VkQueue queue = m_context.getUniversalQueue();
2879 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2880 Allocator& memAlloc = m_context.getDefaultAllocator();
2881
2882 const tcu::UVec2 renderSize (DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
2883
2884 Move<VkImage> colorImage;
2885 de::MovePtr<Allocation> colorImageAlloc;
2886 VkFormat colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
2887 Move<VkImageView> colorImageView;
2888
2889 Move<VkRenderPass> renderPass;
2890 Move<VkFramebuffer> framebuffer;
2891 Move<VkPipelineLayout> pipelineLayout;
2892 Move<VkPipeline> graphicsPipeline;
2893
2894 Move<VkShaderModule> vertexShaderModule;
2895 Move<VkShaderModule> tessControlShaderModule;
2896 Move<VkShaderModule> tessEvalShaderModule;
2897 Move<VkShaderModule> fragmentShaderModule;
2898
2899 Move<VkCommandPool> cmdPool;
2900 Move<VkCommandBuffer> cmdBuffer;
2901
2902 Move<VkDescriptorPool> descriptorPool;
2903 Move<VkDescriptorSetLayout> descriptorSetLayout;
2904 Move<VkDescriptorSet> descriptorSet;
2905 const deUint32 numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
2906
2907 DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2908
2909 // Create color image
2910 {
2911 const VkImageCreateInfo colorImageParams =
2912 {
2913 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
2914 DE_NULL, // const void* pNext;
2915 0u, // VkImageCreateFlags flags;
2916 VK_IMAGE_TYPE_2D, // VkImageType imageType;
2917 colorFormat, // VkFormat format;
2918 { renderSize.x(), renderSize.y(), 1u }, // VkExtent3D extent;
2919 1u, // deUint32 mipLevels;
2920 1u, // deUint32 arraySize;
2921 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
2922 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
2923 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
2924 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
2925 1u, // deUint32 queueFamilyCount;
2926 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
2927 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
2928 };
2929
2930 colorImage = createImage(vk, vkDevice, &colorImageParams);
2931
2932 // Allocate and bind color image memory
2933 colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
2934 VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
2935 }
2936
2937 // Create color attachment view
2938 {
2939 const VkImageViewCreateInfo colorImageViewParams =
2940 {
2941 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
2942 DE_NULL, // const void* pNext;
2943 0u, // VkImageViewCreateFlags flags;
2944 *colorImage, // VkImage image;
2945 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
2946 colorFormat, // VkFormat format;
2947 {
2948 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
2949 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
2950 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
2951 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
2952 }, // VkComponentsMapping components;
2953 {
2954 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
2955 0u, // deUint32 baseMipLevel;
2956 1u, // deUint32 mipLevels;
2957 0u, // deUint32 baseArraylayer;
2958 1u // deUint32 layerCount;
2959 } // VkImageSubresourceRange subresourceRange;
2960 };
2961
2962 colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
2963 }
2964
2965 // Create render pass
2966 {
2967 const VkAttachmentDescription colorAttachmentDescription =
2968 {
2969 0u, // VkAttachmentDescriptorFlags flags;
2970 colorFormat, // VkFormat format;
2971 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
2972 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
2973 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
2974 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
2975 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
2976 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
2977 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout finalLayout
2978 };
2979
2980 const VkAttachmentDescription attachments[1] =
2981 {
2982 colorAttachmentDescription
2983 };
2984
2985 const VkAttachmentReference colorAttachmentReference =
2986 {
2987 0u, // deUint32 attachment;
2988 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
2989 };
2990
2991 const VkSubpassDescription subpassDescription =
2992 {
2993 0u, // VkSubpassDescriptionFlags flags;
2994 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
2995 0u, // deUint32 inputCount;
2996 DE_NULL, // const VkAttachmentReference* pInputAttachments;
2997 1u, // deUint32 colorCount;
2998 &colorAttachmentReference, // const VkAttachmentReference* pColorAttachments;
2999 DE_NULL, // const VkAttachmentReference* pResolveAttachments;
3000 DE_NULL, // VkAttachmentReference depthStencilAttachment;
3001 0u, // deUint32 preserveCount;
3002 DE_NULL // const VkAttachmentReference* pPreserveAttachments;
3003 };
3004
3005 const VkRenderPassCreateInfo renderPassParams =
3006 {
3007 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
3008 DE_NULL, // const void* pNext;
3009 0u, // VkRenderPassCreateFlags flags;
3010 1u, // deUint32 attachmentCount;
3011 attachments, // const VkAttachmentDescription* pAttachments;
3012 1u, // deUint32 subpassCount;
3013 &subpassDescription, // const VkSubpassDescription* pSubpasses;
3014 0u, // deUint32 dependencyCount;
3015 DE_NULL // const VkSubpassDependency* pDependencies;
3016 };
3017
3018 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
3019 }
3020
3021 // Create framebuffer
3022 {
3023 const VkFramebufferCreateInfo framebufferParams =
3024 {
3025 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
3026 DE_NULL, // const void* pNext;
3027 0u, // VkFramebufferCreateFlags flags;
3028 *renderPass, // VkRenderPass renderPass;
3029 1u, // deUint32 attachmentCount;
3030 &*colorImageView, // const VkAttachmentBindInfo* pAttachments;
3031 (deUint32)renderSize.x(), // deUint32 width;
3032 (deUint32)renderSize.y(), // deUint32 height;
3033 1u // deUint32 layers;
3034 };
3035
3036 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
3037 }
3038
3039 // Create descriptors
3040 {
3041 DescriptorPoolBuilder descriptorPoolBuilder;
3042 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
3043
3044 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3045 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3046 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3047 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3048
3049 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
3050 descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3051
3052 const VkDescriptorSetAllocateInfo allocInfo =
3053 {
3054 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
3055 DE_NULL,
3056 *descriptorPool,
3057 1u,
3058 &*descriptorSetLayout
3059 };
3060
3061 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
3062 // Update descriptors
3063 {
3064 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3065 const VkDescriptorBufferInfo outputDescriptorBufferInfo =
3066 {
3067 *m_outputBuffer, // VkBuffer buffer;
3068 0u, // VkDeviceSize offset;
3069 VK_WHOLE_SIZE // VkDeviceSize range;
3070 };
3071
3072 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3073
3074 VkDescriptorBufferInfo inputDescriptorBufferInfo =
3075 {
3076 0, // VkBuffer buffer;
3077 0u, // VkDeviceSize offset;
3078 VK_WHOLE_SIZE // VkDeviceSize range;
3079 };
3080
3081 if (inputBufferSize > 0)
3082 {
3083 inputDescriptorBufferInfo.buffer = *m_inputBuffer;
3084
3085 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3086 }
3087
3088 descriptorSetUpdateBuilder.update(vk, vkDevice);
3089 }
3090 }
3091
3092 // Create pipeline layout
3093 {
3094 const VkDescriptorSetLayout descriptorSetLayouts[] =
3095 {
3096 *descriptorSetLayout,
3097 m_extraResourcesLayout
3098 };
3099 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
3100 {
3101 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
3102 DE_NULL, // const void* pNext;
3103 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
3104 numDescriptorSets, // deUint32 descriptorSetCount;
3105 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
3106 0u, // deUint32 pushConstantRangeCount;
3107 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
3108 };
3109
3110 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
3111 }
3112
3113 // Create shader modules
3114 {
3115 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
3116 tessControlShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
3117 tessEvalShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
3118 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
3119 }
3120
3121 // Create pipeline
3122 {
3123 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
3124 {
3125 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
3126 DE_NULL, // const void* pNext;
3127 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
3128 0u, // deUint32 bindingCount;
3129 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
3130 0u, // deUint32 attributeCount;
3131 DE_NULL, // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
3132 };
3133
3134 const std::vector<VkViewport> viewports (1, makeViewport(renderSize));
3135 const std::vector<VkRect2D> scissors (1, makeRect2D(renderSize));
3136
3137 graphicsPipeline = makeGraphicsPipeline(vk, // const DeviceInterface& vk
3138 vkDevice, // const VkDevice device
3139 *pipelineLayout, // const VkPipelineLayout pipelineLayout
3140 *vertexShaderModule, // const VkShaderModule vertexShaderModule
3141 *tessControlShaderModule, // const VkShaderModule tessellationControlShaderModule
3142 *tessEvalShaderModule, // const VkShaderModule tessellationEvalShaderModule
3143 DE_NULL, // const VkShaderModule geometryShaderModule
3144 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
3145 *renderPass, // const VkRenderPass renderPass
3146 viewports, // const std::vector<VkViewport>& viewports
3147 scissors, // const std::vector<VkRect2D>& scissors
3148 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, // const VkPrimitiveTopology topology
3149 0u, // const deUint32 subpass
3150 patchControlPoints, // const deUint32 patchControlPoints
3151 &vertexInputStateParams); // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
3152 }
3153
3154 // Create command pool
3155 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3156
3157 // Create command buffer
3158 {
3159 const VkClearValue clearValue = getDefaultClearColor();
3160
3161 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3162
3163 beginCommandBuffer(vk, *cmdBuffer);
3164
3165 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), clearValue);
3166
3167 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
3168
3169 {
3170 const VkDescriptorSet descriptorSets[] = { *descriptorSet, extraResources };
3171 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
3172 }
3173
3174 vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
3175
3176 endRenderPass(vk, *cmdBuffer);
3177
3178 // Insert a barrier so data written by the shader is available to the host
3179 {
3180 const VkBufferMemoryBarrier bufferBarrier =
3181 {
3182 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
3183 DE_NULL, // const void* pNext;
3184 VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
3185 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
3186 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
3187 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
3188 *m_outputBuffer, // VkBuffer buffer;
3189 0, // VkDeviceSize offset;
3190 VK_WHOLE_SIZE, // VkDeviceSize size;
3191 };
3192
3193 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
3194 0, (const VkMemoryBarrier*)DE_NULL,
3195 1, &bufferBarrier,
3196 0, (const VkImageMemoryBarrier*)DE_NULL);
3197 }
3198
3199 endCommandBuffer(vk, *cmdBuffer);
3200 }
3201
3202 // Execute Draw
3203 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
3204 }
3205
3206 // TessControlExecutor
3207
3208 class TessControlExecutor : public TessellationExecutor
3209 {
3210 public:
3211 TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3212 virtual ~TessControlExecutor (void);
3213
3214 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3215
3216 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3217
3218 protected:
3219 static std::string generateTessControlShader (const ShaderSpec& shaderSpec);
3220 };
3221
TessControlExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3222 TessControlExecutor::TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3223 : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3224 {
3225 }
3226
~TessControlExecutor(void)3227 TessControlExecutor::~TessControlExecutor (void)
3228 {
3229 }
3230
generateTessControlShader(const ShaderSpec & shaderSpec)3231 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
3232 {
3233 std::ostringstream src;
3234 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3235
3236 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3237 src << "#extension GL_EXT_tessellation_shader : require\n\n";
3238
3239 if (!shaderSpec.globalDeclarations.empty())
3240 src << shaderSpec.globalDeclarations << "\n";
3241
3242 src << "\nlayout(vertices = 1) out;\n\n";
3243
3244 declareBufferBlocks(src, shaderSpec);
3245
3246 src << "void main (void)\n{\n";
3247
3248 for (int ndx = 0; ndx < 2; ndx++)
3249 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3250
3251 for (int ndx = 0; ndx < 4; ndx++)
3252 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3253
3254 src << "\n"
3255 << "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
3256
3257 generateExecBufferIo(src, shaderSpec, "invocationId");
3258
3259 src << "}\n";
3260
3261 return src.str();
3262 }
3263
generateEmptyTessEvalShader()3264 static std::string generateEmptyTessEvalShader ()
3265 {
3266 std::ostringstream src;
3267
3268 src << "#version 450\n"
3269 "#extension GL_EXT_tessellation_shader : require\n\n";
3270
3271 src << "layout(triangles, ccw) in;\n";
3272
3273 src << "\nvoid main (void)\n{\n"
3274 << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
3275 << "}\n";
3276
3277 return src.str();
3278 }
3279
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3280 void TessControlExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3281 {
3282 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3283 programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
3284 programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
3285 programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3286 }
3287
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3288 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3289 {
3290 const deUint32 patchSize = 3;
3291
3292 initBuffers(numValues);
3293
3294 // Setup input buffer & copy data
3295 uploadInputBuffer(inputs, numValues, false);
3296
3297 renderTess(numValues, patchSize * numValues, patchSize, extraResources);
3298
3299 // Read back data
3300 readOutputBuffer(outputs, numValues);
3301 }
3302
3303 // TessEvaluationExecutor
3304
3305 class TessEvaluationExecutor : public TessellationExecutor
3306 {
3307 public:
3308 TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3309 virtual ~TessEvaluationExecutor (void);
3310
3311 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3312
3313 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3314
3315 protected:
3316 static std::string generateTessEvalShader (const ShaderSpec& shaderSpec);
3317 };
3318
TessEvaluationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3319 TessEvaluationExecutor::TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3320 : TessellationExecutor (context, shaderSpec, extraResourcesLayout)
3321 {
3322 }
3323
~TessEvaluationExecutor(void)3324 TessEvaluationExecutor::~TessEvaluationExecutor (void)
3325 {
3326 }
3327
generatePassthroughTessControlShader(void)3328 static std::string generatePassthroughTessControlShader (void)
3329 {
3330 std::ostringstream src;
3331
3332 src << "#version 450\n"
3333 "#extension GL_EXT_tessellation_shader : require\n\n";
3334
3335 src << "layout(vertices = 1) out;\n\n";
3336
3337 src << "void main (void)\n{\n";
3338
3339 for (int ndx = 0; ndx < 2; ndx++)
3340 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3341
3342 for (int ndx = 0; ndx < 4; ndx++)
3343 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3344
3345 src << "}\n";
3346
3347 return src.str();
3348 }
3349
generateTessEvalShader(const ShaderSpec & shaderSpec)3350 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
3351 {
3352 std::ostringstream src;
3353
3354 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3355
3356 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3357 src << "#extension GL_EXT_tessellation_shader : require\n\n";
3358
3359 if (!shaderSpec.globalDeclarations.empty())
3360 src << shaderSpec.globalDeclarations << "\n";
3361
3362 src << "\n";
3363
3364 src << "layout(isolines, equal_spacing) in;\n\n";
3365
3366 declareBufferBlocks(src, shaderSpec);
3367
3368 src << "void main (void)\n{\n"
3369 << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3370 << "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3371
3372 generateExecBufferIo(src, shaderSpec, "invocationId");
3373
3374 src << "}\n";
3375
3376 return src.str();
3377 }
3378
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3379 void TessEvaluationExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3380 {
3381 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3382 programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3383 programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3384 programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3385 }
3386
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3387 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3388 {
3389 const int patchSize = 2;
3390 const int alignedValues = deAlign32(numValues, patchSize);
3391
3392 // Initialize buffers with aligned value count to make room for padding
3393 initBuffers(alignedValues);
3394
3395 // Setup input buffer & copy data
3396 uploadInputBuffer(inputs, numValues, false);
3397
3398 renderTess((deUint32)alignedValues, (deUint32)alignedValues, (deUint32)patchSize, extraResources);
3399
3400 // Read back data
3401 readOutputBuffer(outputs, numValues);
3402 }
3403
3404 } // anonymous
3405
3406 // ShaderExecutor
3407
~ShaderExecutor(void)3408 ShaderExecutor::~ShaderExecutor (void)
3409 {
3410 }
3411
areInputs16Bit(void) const3412 bool ShaderExecutor::areInputs16Bit (void) const
3413 {
3414 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3415 {
3416 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3417 return true;
3418 }
3419 return false;
3420 }
3421
areOutputs16Bit(void) const3422 bool ShaderExecutor::areOutputs16Bit (void) const
3423 {
3424 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3425 {
3426 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3427 return true;
3428 }
3429 return false;
3430 }
3431
isOutput16Bit(const size_t ndx) const3432 bool ShaderExecutor::isOutput16Bit (const size_t ndx) const
3433 {
3434 if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3435 return true;
3436 return false;
3437 }
3438
areInputs64Bit(void) const3439 bool ShaderExecutor::areInputs64Bit (void) const
3440 {
3441 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3442 {
3443 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3444 return true;
3445 }
3446 return false;
3447 }
3448
areOutputs64Bit(void) const3449 bool ShaderExecutor::areOutputs64Bit (void) const
3450 {
3451 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3452 {
3453 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3454 return true;
3455 }
3456 return false;
3457 }
3458
isOutput64Bit(const size_t ndx) const3459 bool ShaderExecutor::isOutput64Bit (const size_t ndx) const
3460 {
3461 if (glu::isDataTypeDoubleType(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3462 return true;
3463 return false;
3464 }
3465
3466 // Utilities
3467
generateSources(glu::ShaderType shaderType,const ShaderSpec & shaderSpec,vk::SourceCollections & dst)3468 void generateSources (glu::ShaderType shaderType, const ShaderSpec& shaderSpec, vk::SourceCollections& dst)
3469 {
3470 switch (shaderType)
3471 {
3472 case glu::SHADERTYPE_VERTEX: VertexShaderExecutor::generateSources (shaderSpec, dst); break;
3473 case glu::SHADERTYPE_TESSELLATION_CONTROL: TessControlExecutor::generateSources (shaderSpec, dst); break;
3474 case glu::SHADERTYPE_TESSELLATION_EVALUATION: TessEvaluationExecutor::generateSources (shaderSpec, dst); break;
3475 case glu::SHADERTYPE_GEOMETRY: GeometryShaderExecutor::generateSources (shaderSpec, dst); break;
3476 case glu::SHADERTYPE_FRAGMENT: FragmentShaderExecutor::generateSources (shaderSpec, dst); break;
3477 case glu::SHADERTYPE_COMPUTE: ComputeShaderExecutor::generateSources (shaderSpec, dst); break;
3478 default:
3479 TCU_THROW(InternalError, "Unsupported shader type");
3480 }
3481 }
3482
createExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3483 ShaderExecutor* createExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3484 {
3485 switch (shaderType)
3486 {
3487 case glu::SHADERTYPE_VERTEX: return new VertexShaderExecutor (context, shaderSpec, extraResourcesLayout);
3488 case glu::SHADERTYPE_TESSELLATION_CONTROL: return new TessControlExecutor (context, shaderSpec, extraResourcesLayout);
3489 case glu::SHADERTYPE_TESSELLATION_EVALUATION: return new TessEvaluationExecutor (context, shaderSpec, extraResourcesLayout);
3490 case glu::SHADERTYPE_GEOMETRY: return new GeometryShaderExecutor (context, shaderSpec, extraResourcesLayout);
3491 case glu::SHADERTYPE_FRAGMENT: return new FragmentShaderExecutor (context, shaderSpec, extraResourcesLayout);
3492 case glu::SHADERTYPE_COMPUTE: return new ComputeShaderExecutor (context, shaderSpec, extraResourcesLayout);
3493 default:
3494 TCU_THROW(InternalError, "Unsupported shader type");
3495 }
3496 }
3497
executorSupported(glu::ShaderType shaderType)3498 bool executorSupported(glu::ShaderType shaderType)
3499 {
3500 switch (shaderType)
3501 {
3502 case glu::SHADERTYPE_VERTEX:
3503 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3504 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3505 case glu::SHADERTYPE_GEOMETRY:
3506 case glu::SHADERTYPE_FRAGMENT:
3507 case glu::SHADERTYPE_COMPUTE:
3508 return true;
3509 default:
3510 return false;
3511 }
3512 }
3513
checkSupportShader(Context & context,const glu::ShaderType shaderType)3514 void checkSupportShader(Context& context, const glu::ShaderType shaderType)
3515 {
3516 if (shaderType == glu::SHADERTYPE_TESSELLATION_EVALUATION &&
3517 context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
3518 !context.getPortabilitySubsetFeatures().tessellationIsolines)
3519 {
3520 TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
3521 }
3522 }
3523
3524
3525 } // shaderexecutor
3526 } // vkt
3527