1 /*------------------------------------------------------------------------
2 * OpenGL Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017-2019 The Khronos Group Inc.
6 * Copyright (c) 2017 Codeplay Software Ltd.
7 * Copyright (c) 2019 NVIDIA Corporation.
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 */ /*!
22 * \file
23 * \brief Subgroups Tests
24 */ /*--------------------------------------------------------------------*/
25
26 #include "glcSubgroupsClusteredTests.hpp"
27 #include "glcSubgroupsTestsUtils.hpp"
28
29 #include <string>
30 #include <vector>
31
32 using namespace tcu;
33 using namespace std;
34
35 namespace glc
36 {
37 namespace subgroups
38 {
39 namespace
40 {
41 enum OpType
42 {
43 OPTYPE_CLUSTERED_ADD = 0,
44 OPTYPE_CLUSTERED_MUL,
45 OPTYPE_CLUSTERED_MIN,
46 OPTYPE_CLUSTERED_MAX,
47 OPTYPE_CLUSTERED_AND,
48 OPTYPE_CLUSTERED_OR,
49 OPTYPE_CLUSTERED_XOR,
50 OPTYPE_CLUSTERED_LAST
51 };
52
checkVertexPipelineStages(std::vector<const void * > datas,deUint32 width,deUint32)53 static bool checkVertexPipelineStages(std::vector<const void*> datas,
54 deUint32 width, deUint32)
55 {
56 return glc::subgroups::check(datas, width, 1);
57 }
58
checkComputeStage(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32)59 static bool checkComputeStage(std::vector<const void*> datas,
60 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
61 deUint32)
62 {
63 return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
64 }
65
getOpTypeName(int opType)66 std::string getOpTypeName(int opType)
67 {
68 switch (opType)
69 {
70 default:
71 DE_FATAL("Unsupported op type");
72 return "";
73 case OPTYPE_CLUSTERED_ADD:
74 return "subgroupClusteredAdd";
75 case OPTYPE_CLUSTERED_MUL:
76 return "subgroupClusteredMul";
77 case OPTYPE_CLUSTERED_MIN:
78 return "subgroupClusteredMin";
79 case OPTYPE_CLUSTERED_MAX:
80 return "subgroupClusteredMax";
81 case OPTYPE_CLUSTERED_AND:
82 return "subgroupClusteredAnd";
83 case OPTYPE_CLUSTERED_OR:
84 return "subgroupClusteredOr";
85 case OPTYPE_CLUSTERED_XOR:
86 return "subgroupClusteredXor";
87 }
88 }
89
getOpTypeOperation(int opType,Format format,std::string lhs,std::string rhs)90 std::string getOpTypeOperation(int opType, Format format, std::string lhs, std::string rhs)
91 {
92 switch (opType)
93 {
94 default:
95 DE_FATAL("Unsupported op type");
96 return "";
97 case OPTYPE_CLUSTERED_ADD:
98 return lhs + " + " + rhs;
99 case OPTYPE_CLUSTERED_MUL:
100 return lhs + " * " + rhs;
101 case OPTYPE_CLUSTERED_MIN:
102 switch (format)
103 {
104 default:
105 return "min(" + lhs + ", " + rhs + ")";
106 case FORMAT_R32_SFLOAT:
107 case FORMAT_R64_SFLOAT:
108 return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : min(" + lhs + ", " + rhs + ")))";
109 case FORMAT_R32G32_SFLOAT:
110 case FORMAT_R32G32B32_SFLOAT:
111 case FORMAT_R32G32B32A32_SFLOAT:
112 case FORMAT_R64G64_SFLOAT:
113 case FORMAT_R64G64B64_SFLOAT:
114 case FORMAT_R64G64B64A64_SFLOAT:
115 return "mix(mix(min(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
116 }
117 case OPTYPE_CLUSTERED_MAX:
118 switch (format)
119 {
120 default:
121 return "max(" + lhs + ", " + rhs + ")";
122 case FORMAT_R32_SFLOAT:
123 case FORMAT_R64_SFLOAT:
124 return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : max(" + lhs + ", " + rhs + ")))";
125 case FORMAT_R32G32_SFLOAT:
126 case FORMAT_R32G32B32_SFLOAT:
127 case FORMAT_R32G32B32A32_SFLOAT:
128 case FORMAT_R64G64_SFLOAT:
129 case FORMAT_R64G64B64_SFLOAT:
130 case FORMAT_R64G64B64A64_SFLOAT:
131 return "mix(mix(max(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
132 }
133 case OPTYPE_CLUSTERED_AND:
134 switch (format)
135 {
136 default:
137 return lhs + " & " + rhs;
138 case FORMAT_R32_BOOL:
139 return lhs + " && " + rhs;
140 case FORMAT_R32G32_BOOL:
141 return "bvec2(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y)";
142 case FORMAT_R32G32B32_BOOL:
143 return "bvec3(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z)";
144 case FORMAT_R32G32B32A32_BOOL:
145 return "bvec4(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z, " + lhs + ".w && " + rhs + ".w)";
146 }
147 case OPTYPE_CLUSTERED_OR:
148 switch (format)
149 {
150 default:
151 return lhs + " | " + rhs;
152 case FORMAT_R32_BOOL:
153 return lhs + " || " + rhs;
154 case FORMAT_R32G32_BOOL:
155 return "bvec2(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y)";
156 case FORMAT_R32G32B32_BOOL:
157 return "bvec3(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z)";
158 case FORMAT_R32G32B32A32_BOOL:
159 return "bvec4(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z, " + lhs + ".w || " + rhs + ".w)";
160 }
161 case OPTYPE_CLUSTERED_XOR:
162 switch (format)
163 {
164 default:
165 return lhs + " ^ " + rhs;
166 case FORMAT_R32_BOOL:
167 return lhs + " ^^ " + rhs;
168 case FORMAT_R32G32_BOOL:
169 return "bvec2(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y)";
170 case FORMAT_R32G32B32_BOOL:
171 return "bvec3(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z)";
172 case FORMAT_R32G32B32A32_BOOL:
173 return "bvec4(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z, " + lhs + ".w ^^ " + rhs + ".w)";
174 }
175 }
176 }
177
getIdentity(int opType,Format format)178 std::string getIdentity(int opType, Format format)
179 {
180 bool isFloat = false;
181 bool isInt = false;
182 bool isUnsigned = false;
183
184 switch (format)
185 {
186 default:
187 DE_FATAL("Unhandled format!");
188 break;
189 case FORMAT_R32_SINT:
190 case FORMAT_R32G32_SINT:
191 case FORMAT_R32G32B32_SINT:
192 case FORMAT_R32G32B32A32_SINT:
193 isInt = true;
194 break;
195 case FORMAT_R32_UINT:
196 case FORMAT_R32G32_UINT:
197 case FORMAT_R32G32B32_UINT:
198 case FORMAT_R32G32B32A32_UINT:
199 isUnsigned = true;
200 break;
201 case FORMAT_R32_SFLOAT:
202 case FORMAT_R32G32_SFLOAT:
203 case FORMAT_R32G32B32_SFLOAT:
204 case FORMAT_R32G32B32A32_SFLOAT:
205 case FORMAT_R64_SFLOAT:
206 case FORMAT_R64G64_SFLOAT:
207 case FORMAT_R64G64B64_SFLOAT:
208 case FORMAT_R64G64B64A64_SFLOAT:
209 isFloat = true;
210 break;
211 case FORMAT_R32_BOOL:
212 case FORMAT_R32G32_BOOL:
213 case FORMAT_R32G32B32_BOOL:
214 case FORMAT_R32G32B32A32_BOOL:
215 break; // bool types are not anything
216 }
217
218 switch (opType)
219 {
220 default:
221 DE_FATAL("Unsupported op type");
222 return "";
223 case OPTYPE_CLUSTERED_ADD:
224 return subgroups::getFormatNameForGLSL(format) + "(0)";
225 case OPTYPE_CLUSTERED_MUL:
226 return subgroups::getFormatNameForGLSL(format) + "(1)";
227 case OPTYPE_CLUSTERED_MIN:
228 if (isFloat)
229 {
230 return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
231 }
232 else if (isInt)
233 {
234 return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
235 }
236 else if (isUnsigned)
237 {
238 return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
239 }
240 else
241 {
242 DE_FATAL("Unhandled case");
243 return "";
244 }
245 case OPTYPE_CLUSTERED_MAX:
246 if (isFloat)
247 {
248 return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
249 }
250 else if (isInt)
251 {
252 return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
253 }
254 else if (isUnsigned)
255 {
256 return subgroups::getFormatNameForGLSL(format) + "(0u)";
257 }
258 else
259 {
260 DE_FATAL("Unhandled case");
261 return "";
262 }
263 case OPTYPE_CLUSTERED_AND:
264 return subgroups::getFormatNameForGLSL(format) + "(~0)";
265 case OPTYPE_CLUSTERED_OR:
266 return subgroups::getFormatNameForGLSL(format) + "(0)";
267 case OPTYPE_CLUSTERED_XOR:
268 return subgroups::getFormatNameForGLSL(format) + "(0)";
269 }
270 }
271
getCompare(int opType,Format format,std::string lhs,std::string rhs)272 std::string getCompare(int opType, Format format, std::string lhs, std::string rhs)
273 {
274 std::string formatName = subgroups::getFormatNameForGLSL(format);
275 switch (format)
276 {
277 default:
278 return "all(equal(" + lhs + ", " + rhs + "))";
279 case FORMAT_R32_BOOL:
280 case FORMAT_R32_UINT:
281 case FORMAT_R32_SINT:
282 return "(" + lhs + " == " + rhs + ")";
283 case FORMAT_R32_SFLOAT:
284 case FORMAT_R64_SFLOAT:
285 switch (opType)
286 {
287 default:
288 return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
289 case OPTYPE_CLUSTERED_MIN:
290 case OPTYPE_CLUSTERED_MAX:
291 return "(" + lhs + " == " + rhs + ")";
292 }
293 case FORMAT_R32G32_SFLOAT:
294 case FORMAT_R32G32B32_SFLOAT:
295 case FORMAT_R32G32B32A32_SFLOAT:
296 case FORMAT_R64G64_SFLOAT:
297 case FORMAT_R64G64B64_SFLOAT:
298 case FORMAT_R64G64B64A64_SFLOAT:
299 switch (opType)
300 {
301 default:
302 return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
303 case OPTYPE_CLUSTERED_MIN:
304 case OPTYPE_CLUSTERED_MAX:
305 return "all(equal(" + lhs + ", " + rhs + "))";
306 }
307 }
308 }
309
310 struct CaseDefinition
311 {
312 int opType;
313 ShaderStageFlags shaderStage;
314 Format format;
315 };
316
getBodySource(CaseDefinition caseDef)317 std::string getBodySource(CaseDefinition caseDef)
318 {
319 std::ostringstream bdy;
320 bdy << " bool tempResult = true;\n";
321
322 for (deUint32 i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2)
323 {
324 bdy << " {\n"
325 << " const uint clusterSize = " << i << "u;\n"
326 << " if (clusterSize <= gl_SubgroupSize)\n"
327 << " {\n"
328 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
329 << getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID], clusterSize);\n"
330 << " for (uint clusterOffset = 0u; clusterOffset < gl_SubgroupSize; clusterOffset += clusterSize)\n"
331 << " {\n"
332 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " ref = "
333 << getIdentity(caseDef.opType, caseDef.format) << ";\n"
334 << " for (uint index = clusterOffset; index < (clusterOffset + clusterSize); index++)\n"
335 << " {\n"
336 << " if (subgroupBallotBitExtract(mask, index))\n"
337 << " {\n"
338 << " ref = " << getOpTypeOperation(caseDef.opType, caseDef.format, "ref", "data[index]") << ";\n"
339 << " }\n"
340 << " }\n"
341 << " if ((clusterOffset <= gl_SubgroupInvocationID) && (gl_SubgroupInvocationID < (clusterOffset + clusterSize)))\n"
342 << " {\n"
343 << " if (!" << getCompare(caseDef.opType, caseDef.format, "ref", "op") << ")\n"
344 << " {\n"
345 << " tempResult = false;\n"
346 << " }\n"
347 << " }\n"
348 << " }\n"
349 << " }\n"
350 << " }\n";
351 }
352 return bdy.str();
353 }
354
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)355 void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
356 {
357 subgroups::setFragmentShaderFrameBuffer(programCollection);
358
359 if (SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
360 subgroups::setVertexShaderFrameBuffer(programCollection);
361
362 std::string bdy = getBodySource(caseDef);
363
364 if (SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
365 {
366 std::ostringstream vertexSrc;
367 vertexSrc << "${VERSION_DECL}\n"
368 << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
369 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
370 << "layout(location = 0) in highp vec4 in_position;\n"
371 << "layout(location = 0) out float out_color;\n"
372 << "layout(binding = 0, std140) uniform Buffer0\n"
373 << "{\n"
374 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
375 << "};\n"
376 << "\n"
377 << "void main (void)\n"
378 << "{\n"
379 << " uvec4 mask = subgroupBallot(true);\n"
380 << bdy
381 << " out_color = float(tempResult ? 1 : 0);\n"
382 << " gl_Position = in_position;\n"
383 << " gl_PointSize = 1.0f;\n"
384 << "}\n";
385 programCollection.add("vert") << glu::VertexSource(vertexSrc.str());
386 }
387 else if (SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
388 {
389 std::ostringstream geometry;
390
391 geometry << "${VERSION_DECL}\n"
392 << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
393 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
394 << "layout(points) in;\n"
395 << "layout(points, max_vertices = 1) out;\n"
396 << "layout(location = 0) out float out_color;\n"
397 << "layout(binding = 0, std140) uniform Buffer0\n"
398 << "{\n"
399 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
400 << "};\n"
401 << "\n"
402 << "void main (void)\n"
403 << "{\n"
404 << " uvec4 mask = subgroupBallot(true);\n"
405 << bdy
406 << " out_color = tempResult ? 1.0 : 0.0;\n"
407 << " gl_Position = gl_in[0].gl_Position;\n"
408 << " EmitVertex();\n"
409 << " EndPrimitive();\n"
410 << "}\n";
411
412 programCollection.add("geometry") << glu::GeometrySource(geometry.str());
413 }
414 else if (SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
415 {
416 std::ostringstream controlSource;
417
418 controlSource << "${VERSION_DECL}\n"
419 << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
420 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
421 << "layout(vertices = 2) out;\n"
422 << "layout(location = 0) out float out_color[];\n"
423 << "layout(binding = 0, std140) uniform Buffer0\n"
424 << "{\n"
425 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
426 << "};\n"
427 << "\n"
428 << "void main (void)\n"
429 << "{\n"
430 << " if (gl_InvocationID == 0)\n"
431 <<" {\n"
432 << " gl_TessLevelOuter[0] = 1.0f;\n"
433 << " gl_TessLevelOuter[1] = 1.0f;\n"
434 << " }\n"
435 << " uvec4 mask = subgroupBallot(true);\n"
436 << bdy
437 << " out_color[gl_InvocationID] = tempResult ? 1.0 : 0.0;\n"
438 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
439 << "}\n";
440
441 programCollection.add("tesc") << glu::TessellationControlSource(controlSource.str());
442 subgroups::setTesEvalShaderFrameBuffer(programCollection);
443 }
444 else if (SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
445 {
446 std::ostringstream evaluationSource;
447
448 evaluationSource << "${VERSION_DECL}\n"
449 << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
450 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
451 << "layout(isolines, equal_spacing, ccw ) in;\n"
452 << "layout(location = 0) out float out_color;\n"
453 << "layout(binding = 0, std140) uniform Buffer0\n"
454 << "{\n"
455 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
456 << "};\n"
457 << "\n"
458 << "void main (void)\n"
459 << "{\n"
460 << " uvec4 mask = subgroupBallot(true);\n"
461 << bdy
462 << " out_color = tempResult ? 1.0 : 0.0;\n"
463 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
464 << "}\n";
465
466 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
467 programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str());
468 }
469 else
470 {
471 DE_FATAL("Unsupported shader stage");
472 }
473 }
474
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)475 void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
476 {
477 std::string bdy = getBodySource(caseDef);
478
479 if (SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
480 {
481 std::ostringstream src;
482
483 src << "${VERSION_DECL}\n"
484 << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
485 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
486 << "layout (${LOCAL_SIZE_X}, ${LOCAL_SIZE_Y}, ${LOCAL_SIZE_Z}) in;\n"
487 << "layout(binding = 0, std430) buffer Buffer0\n"
488 << "{\n"
489 << " uint result[];\n"
490 << "};\n"
491 << "layout(binding = 1, std430) buffer Buffer1\n"
492 << "{\n"
493 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
494 << "};\n"
495 << "\n"
496 << "void main (void)\n"
497 << "{\n"
498 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
499 << " highp uint offset = globalSize.x * ((globalSize.y * "
500 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
501 "gl_GlobalInvocationID.x;\n"
502 << " uvec4 mask = subgroupBallot(true);\n"
503 << bdy
504 << " result[offset] = tempResult ? 1u : 0u;\n"
505 << "}\n";
506
507 programCollection.add("comp") << glu::ComputeSource(src.str());
508 }
509 else
510 {
511 {
512 const string vertex =
513 "${VERSION_DECL}\n"
514 "#extension GL_KHR_shader_subgroup_clustered: enable\n"
515 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
516 "layout(binding = 0, std430) buffer Buffer0\n"
517 "{\n"
518 " uint result[];\n"
519 "} b0;\n"
520 "layout(binding = 4, std430) readonly buffer Buffer4\n"
521 "{\n"
522 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
523 "};\n"
524 "\n"
525 "void main (void)\n"
526 "{\n"
527 " uvec4 mask = subgroupBallot(true);\n"
528 + bdy +
529 " b0.result[gl_VertexID] = tempResult ? 1u : 0u;\n"
530 " float pixelSize = 2.0f/1024.0f;\n"
531 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
532 " gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
533 "}\n";
534
535 programCollection.add("vert") << glu::VertexSource(vertex);
536 }
537
538 {
539 const string tesc =
540 "${VERSION_DECL}\n"
541 "#extension GL_KHR_shader_subgroup_clustered: enable\n"
542 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
543 "layout(vertices=1) out;\n"
544 "layout(binding = 1, std430) buffer Buffer1\n"
545 "{\n"
546 " uint result[];\n"
547 "} b1;\n"
548 "layout(binding = 4, std430) readonly buffer Buffer4\n"
549 "{\n"
550 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
551 "};\n"
552 "\n"
553 "void main (void)\n"
554 "{\n"
555 " uvec4 mask = subgroupBallot(true);\n"
556 + bdy +
557 " b1.result[gl_PrimitiveID] = tempResult ? 1u : 0u;\n"
558 " if (gl_InvocationID == 0)\n"
559 " {\n"
560 " gl_TessLevelOuter[0] = 1.0f;\n"
561 " gl_TessLevelOuter[1] = 1.0f;\n"
562 " }\n"
563 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
564 "}\n";
565
566 programCollection.add("tesc") << glu::TessellationControlSource(tesc);
567 }
568
569 {
570 const string tese =
571 "${VERSION_DECL}\n"
572 "#extension GL_KHR_shader_subgroup_clustered: enable\n"
573 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
574 "layout(isolines) in;\n"
575 "layout(binding = 2, std430) buffer Buffer2\n"
576 "{\n"
577 " uint result[];\n"
578 "} b2;\n"
579 "layout(binding = 4, std430) readonly buffer Buffer4\n"
580 "{\n"
581 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
582 "};\n"
583 "\n"
584 "void main (void)\n"
585 "{\n"
586 " uvec4 mask = subgroupBallot(true);\n"
587 + bdy +
588 " b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = tempResult ? 1u : 0u;\n"
589 " float pixelSize = 2.0f/1024.0f;\n"
590 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
591 "}\n";
592 programCollection.add("tese") << glu::TessellationEvaluationSource(tese);
593 }
594
595 {
596 const string geometry =
597 // version string added by addGeometryShadersFromTemplate
598 "#extension GL_KHR_shader_subgroup_clustered: enable\n"
599 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
600 "layout(${TOPOLOGY}) in;\n"
601 "layout(points, max_vertices = 1) out;\n"
602 "layout(binding = 3, std430) buffer Buffer3\n"
603 "{\n"
604 " uint result[];\n"
605 "} b3;\n"
606 "layout(binding = 4, std430) readonly buffer Buffer4\n"
607 "{\n"
608 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
609 "};\n"
610 "\n"
611 "void main (void)\n"
612 "{\n"
613 " uvec4 mask = subgroupBallot(true);\n"
614 + bdy +
615 " b3.result[gl_PrimitiveIDIn] = tempResult ? 1u : 0u;\n"
616 " gl_Position = gl_in[0].gl_Position;\n"
617 " EmitVertex();\n"
618 " EndPrimitive();\n"
619 "}\n";
620 subgroups::addGeometryShadersFromTemplate(geometry, programCollection);
621 }
622
623 {
624 const string fragment =
625 "${VERSION_DECL}\n"
626 "#extension GL_KHR_shader_subgroup_clustered: enable\n"
627 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
628 "precision highp int;\n"
629 "precision highp float;\n"
630 "layout(location = 0) out uint result;\n"
631 "layout(binding = 4, std430) readonly buffer Buffer4\n"
632 "{\n"
633 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
634 "};\n"
635 "void main (void)\n"
636 "{\n"
637 " uvec4 mask = subgroupBallot(true);\n"
638 + bdy +
639 " result = tempResult ? 1u : 0u;\n"
640 "}\n";
641 programCollection.add("fragment") << glu::FragmentSource(fragment);
642 }
643
644 subgroups::addNoSubgroupShader(programCollection);
645 }
646 }
647
supportedCheck(Context & context,CaseDefinition caseDef)648 void supportedCheck (Context& context, CaseDefinition caseDef)
649 {
650 if (!subgroups::isSubgroupSupported(context))
651 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
652
653 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_CLUSTERED_BIT))
654 TCU_THROW(NotSupportedError, "Device does not support subgroup clustered operations");
655
656 if (subgroups::isDoubleFormat(caseDef.format) &&
657 !subgroups::isDoubleSupportedForDevice(context))
658 {
659 TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
660 }
661 }
662
noSSBOtest(Context & context,const CaseDefinition caseDef)663 tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
664 {
665 if (!subgroups::areSubgroupOperationsSupportedForStage(
666 context, caseDef.shaderStage))
667 {
668 if (subgroups::areSubgroupOperationsRequiredForStage(
669 caseDef.shaderStage))
670 {
671 return tcu::TestStatus::fail(
672 "Shader stage " +
673 subgroups::getShaderStageName(caseDef.shaderStage) +
674 " is required to support subgroup operations!");
675 }
676 else
677 {
678 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
679 }
680 }
681
682 subgroups::SSBOData inputData;
683 inputData.format = caseDef.format;
684 inputData.layout = subgroups::SSBOData::LayoutStd140;
685 inputData.numElements = subgroups::maxSupportedSubgroupSize();
686 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
687 inputData.binding = 0u;
688
689 if (SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
690 return subgroups::makeVertexFrameBufferTest(context, FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
691 else if (SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
692 return subgroups::makeGeometryFrameBufferTest(context, FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
693 else if (SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
694 return subgroups::makeTessellationEvaluationFrameBufferTest(context, FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, SHADER_STAGE_TESS_CONTROL_BIT);
695 else if (SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
696 return subgroups::makeTessellationEvaluationFrameBufferTest(context, FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, SHADER_STAGE_TESS_EVALUATION_BIT);
697 else
698 TCU_THROW(InternalError, "Unhandled shader stage");
699 }
700
test(Context & context,const CaseDefinition caseDef)701 tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
702 {
703 if (SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
704 {
705 if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
706 {
707 return tcu::TestStatus::fail(
708 "Shader stage " +
709 subgroups::getShaderStageName(caseDef.shaderStage) +
710 " is required to support subgroup operations!");
711 }
712 subgroups::SSBOData inputData;
713 inputData.format = caseDef.format;
714 inputData.layout = subgroups::SSBOData::LayoutStd430;
715 inputData.numElements = subgroups::maxSupportedSubgroupSize();
716 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
717 inputData.binding = 1u;
718
719 return subgroups::makeComputeTest(context, FORMAT_R32_UINT, &inputData, 1, checkComputeStage);
720 }
721 else
722 {
723 int supportedStages = context.getDeqpContext().getContextInfo().getInt(GL_SUBGROUP_SUPPORTED_STAGES_KHR);
724
725 ShaderStageFlags stages = (ShaderStageFlags)(caseDef.shaderStage & supportedStages);
726
727 if (SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
728 {
729 if ( (stages & SHADER_STAGE_FRAGMENT_BIT) == 0)
730 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
731 else
732 stages = SHADER_STAGE_FRAGMENT_BIT;
733 }
734
735 if ((ShaderStageFlags)0u == stages)
736 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
737
738 subgroups::SSBOData inputData;
739 inputData.format = caseDef.format;
740 inputData.layout = subgroups::SSBOData::LayoutStd430;
741 inputData.numElements = subgroups::maxSupportedSubgroupSize();
742 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
743 inputData.binding = 4u;
744 inputData.stages = stages;
745
746 return subgroups::allStages(context, FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
747 }
748 }
749 }
750
createSubgroupsClusteredTests(deqp::Context & testCtx)751 deqp::TestCaseGroup* createSubgroupsClusteredTests(deqp::Context& testCtx)
752 {
753 de::MovePtr<deqp::TestCaseGroup> graphicGroup(new deqp::TestCaseGroup(
754 testCtx, "graphics", "Subgroup clustered category tests: graphics"));
755 de::MovePtr<deqp::TestCaseGroup> computeGroup(new deqp::TestCaseGroup(
756 testCtx, "compute", "Subgroup clustered category tests: compute"));
757 de::MovePtr<deqp::TestCaseGroup> framebufferGroup(new deqp::TestCaseGroup(
758 testCtx, "framebuffer", "Subgroup clustered category tests: framebuffer"));
759
760 const ShaderStageFlags stages[] =
761 {
762 SHADER_STAGE_VERTEX_BIT,
763 SHADER_STAGE_TESS_EVALUATION_BIT,
764 SHADER_STAGE_TESS_CONTROL_BIT,
765 SHADER_STAGE_GEOMETRY_BIT
766 };
767
768 const Format formats[] =
769 {
770 FORMAT_R32_SINT, FORMAT_R32G32_SINT, FORMAT_R32G32B32_SINT,
771 FORMAT_R32G32B32A32_SINT, FORMAT_R32_UINT, FORMAT_R32G32_UINT,
772 FORMAT_R32G32B32_UINT, FORMAT_R32G32B32A32_UINT,
773 FORMAT_R32_SFLOAT, FORMAT_R32G32_SFLOAT,
774 FORMAT_R32G32B32_SFLOAT, FORMAT_R32G32B32A32_SFLOAT,
775 FORMAT_R64_SFLOAT, FORMAT_R64G64_SFLOAT,
776 FORMAT_R64G64B64_SFLOAT, FORMAT_R64G64B64A64_SFLOAT,
777 FORMAT_R32_BOOL, FORMAT_R32G32_BOOL,
778 FORMAT_R32G32B32_BOOL, FORMAT_R32G32B32A32_BOOL,
779 };
780
781 for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
782 {
783 const Format format = formats[formatIndex];
784
785 for (int opTypeIndex = 0; opTypeIndex < OPTYPE_CLUSTERED_LAST; ++opTypeIndex)
786 {
787 bool isBool = false;
788 bool isFloat = false;
789
790 switch (format)
791 {
792 default:
793 break;
794 case FORMAT_R32_SFLOAT:
795 case FORMAT_R32G32_SFLOAT:
796 case FORMAT_R32G32B32_SFLOAT:
797 case FORMAT_R32G32B32A32_SFLOAT:
798 case FORMAT_R64_SFLOAT:
799 case FORMAT_R64G64_SFLOAT:
800 case FORMAT_R64G64B64_SFLOAT:
801 case FORMAT_R64G64B64A64_SFLOAT:
802 isFloat = true;
803 break;
804 case FORMAT_R32_BOOL:
805 case FORMAT_R32G32_BOOL:
806 case FORMAT_R32G32B32_BOOL:
807 case FORMAT_R32G32B32A32_BOOL:
808 isBool = true;
809 break;
810 }
811
812 bool isBitwiseOp = false;
813
814 switch (opTypeIndex)
815 {
816 default:
817 break;
818 case OPTYPE_CLUSTERED_AND:
819 case OPTYPE_CLUSTERED_OR:
820 case OPTYPE_CLUSTERED_XOR:
821 isBitwiseOp = true;
822 break;
823 }
824
825 if (isFloat && isBitwiseOp)
826 {
827 // Skip float with bitwise category.
828 continue;
829 }
830
831 if (isBool && !isBitwiseOp)
832 {
833 // Skip bool when its not the bitwise category.
834 continue;
835 }
836
837 const std::string name = de::toLower(getOpTypeName(opTypeIndex))
838 +"_" + subgroups::getFormatNameForGLSL(format);
839
840 {
841 const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_COMPUTE_BIT, format};
842 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(computeGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
843 }
844
845 {
846 const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_ALL_GRAPHICS, format};
847 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(graphicGroup.get(), name,
848 "", supportedCheck, initPrograms, test, caseDef);
849 }
850
851 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
852 {
853 const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
854 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(framebufferGroup.get(), name +"_" + getShaderStageName(caseDef.shaderStage), "",
855 supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
856 }
857 }
858 }
859 de::MovePtr<deqp::TestCaseGroup> group(new deqp::TestCaseGroup(
860 testCtx, "clustered", "Subgroup clustered category tests"));
861
862 group->addChild(graphicGroup.release());
863 group->addChild(computeGroup.release());
864 group->addChild(framebufferGroup.release());
865
866 return group.release();
867 }
868
869 } // subgroups
870 } // glc
871