1 /*------------------------------------------------------------------------
2 * OpenGL Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017-2019 The Khronos Group Inc.
6 * Copyright (c) 2017 Codeplay Software Ltd.
7 * Copyright (c) 2019 NVIDIA Corporation.
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 */ /*!
22 * \file
23 * \brief Subgroups Tests
24 */ /*--------------------------------------------------------------------*/
25
26 #include "glcSubgroupsShapeTests.hpp"
27 #include "glcSubgroupsTestsUtils.hpp"
28
29 #include <string>
30 #include <vector>
31
32 using namespace tcu;
33 using namespace std;
34
35 namespace glc
36 {
37 namespace subgroups
38 {
39 namespace
40 {
checkVertexPipelineStages(std::vector<const void * > datas,uint32_t width,uint32_t)41 static bool checkVertexPipelineStages(std::vector<const void *> datas, uint32_t width, uint32_t)
42 {
43 return glc::subgroups::check(datas, width, 1);
44 }
45
checkComputeStage(std::vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)46 static bool checkComputeStage(std::vector<const void *> datas, const uint32_t numWorkgroups[3],
47 const uint32_t localSize[3], uint32_t)
48 {
49 return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
50 }
51
52 enum OpType
53 {
54 OPTYPE_CLUSTERED = 0,
55 OPTYPE_QUAD,
56 OPTYPE_LAST
57 };
58
getOpTypeName(int opType)59 std::string getOpTypeName(int opType)
60 {
61 switch (opType)
62 {
63 default:
64 DE_FATAL("Unsupported op type");
65 return "";
66 case OPTYPE_CLUSTERED:
67 return "clustered";
68 case OPTYPE_QUAD:
69 return "quad";
70 }
71 }
72
73 struct CaseDefinition
74 {
75 int opType;
76 ShaderStageFlags shaderStage;
77 };
78
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)79 void initFrameBufferPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
80 {
81 std::ostringstream bdy;
82 std::string extension = (OPTYPE_CLUSTERED == caseDef.opType) ?
83 "#extension GL_KHR_shader_subgroup_clustered: enable\n" :
84 "#extension GL_KHR_shader_subgroup_quad: enable\n";
85
86 subgroups::setFragmentShaderFrameBuffer(programCollection);
87
88 if (SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
89 subgroups::setVertexShaderFrameBuffer(programCollection);
90
91 extension += "#extension GL_KHR_shader_subgroup_ballot: enable\n";
92
93 bdy << " uint tempResult = 0x1u;\n"
94 << " uvec4 mask = subgroupBallot(true);\n";
95
96 if (OPTYPE_CLUSTERED == caseDef.opType)
97 {
98 for (uint32_t i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2)
99 {
100 bdy << " if (gl_SubgroupSize >= " << i << "u)\n"
101 << " {\n"
102 << " uvec4 contribution = uvec4(0);\n"
103 << " uint modID = gl_SubgroupInvocationID % 32u;\n"
104 << " switch (gl_SubgroupInvocationID / 32u)\n"
105 << " {\n"
106 << " case 0u: contribution.x = 1u << modID; break;\n"
107 << " case 1u: contribution.y = 1u << modID; break;\n"
108 << " case 2u: contribution.z = 1u << modID; break;\n"
109 << " case 3u: contribution.w = 1u << modID; break;\n"
110 << " }\n"
111 << " uvec4 result = subgroupClusteredOr(contribution, " << i << "u);\n"
112 << " uint rootID = gl_SubgroupInvocationID & ~(" << i - 1 << "u);\n"
113 << " for (uint i = 0u; i < " << i << "u; i++)\n"
114 << " {\n"
115 << " uint nextID = rootID + i;\n"
116 << " if (subgroupBallotBitExtract(mask, nextID) ^^ subgroupBallotBitExtract(result, nextID))\n"
117 << " {\n"
118 << " tempResult = 0u;\n"
119 << " }\n"
120 << " }\n"
121 << " }\n";
122 }
123 }
124 else
125 {
126 bdy << " uint cluster[4] =\n"
127 << " uint[](\n"
128 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 0u),\n"
129 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 1u),\n"
130 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 2u),\n"
131 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 3u)\n"
132 << " );\n"
133 << " uint rootID = gl_SubgroupInvocationID & ~0x3u;\n"
134 << " for (uint i = 0u; i < 4u; i++)\n"
135 << " {\n"
136 << " uint nextID = rootID + i;\n"
137 << " if (subgroupBallotBitExtract(mask, nextID) && (cluster[i] != nextID))\n"
138 << " {\n"
139 << " tempResult = mask.x;\n"
140 << " }\n"
141 << " }\n";
142 }
143
144 if (SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
145 {
146 std::ostringstream vertexSrc;
147 vertexSrc << "${VERSION_DECL}\n"
148 << extension << "layout(location = 0) in highp vec4 in_position;\n"
149 << "layout(location = 0) out float result;\n"
150 << "\n"
151 << "void main (void)\n"
152 << "{\n"
153 << bdy.str() << " result = float(tempResult);\n"
154 << " gl_Position = in_position;\n"
155 << " gl_PointSize = 1.0f;\n"
156 << "}\n";
157 programCollection.add("vert") << glu::VertexSource(vertexSrc.str());
158 }
159 else if (SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
160 {
161 std::ostringstream geometry;
162
163 geometry << "${VERSION_DECL}\n"
164 << extension << "layout(points) in;\n"
165 << "layout(points, max_vertices = 1) out;\n"
166 << "layout(location = 0) out float out_color;\n"
167 << "\n"
168 << "void main (void)\n"
169 << "{\n"
170 << bdy.str() << " out_color = float(tempResult);\n"
171 << " gl_Position = gl_in[0].gl_Position;\n"
172 << " EmitVertex();\n"
173 << " EndPrimitive();\n"
174 << "}\n";
175
176 programCollection.add("geometry") << glu::GeometrySource(geometry.str());
177 }
178 else if (SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
179 {
180 std::ostringstream controlSource;
181
182 controlSource << "${VERSION_DECL}\n"
183 << extension << "layout(vertices = 2) out;\n"
184 << "layout(location = 0) out float out_color[];\n"
185 << "\n"
186 << "void main (void)\n"
187 << "{\n"
188 << " if (gl_InvocationID == 0)\n"
189 << " {\n"
190 << " gl_TessLevelOuter[0] = 1.0f;\n"
191 << " gl_TessLevelOuter[1] = 1.0f;\n"
192 << " }\n"
193 << bdy.str() << " out_color[gl_InvocationID] = float(tempResult);\n"
194 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
195 << "}\n";
196
197 programCollection.add("tesc") << glu::TessellationControlSource(controlSource.str());
198 subgroups::setTesEvalShaderFrameBuffer(programCollection);
199 }
200 else if (SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
201 {
202 std::ostringstream evaluationSource;
203
204 evaluationSource << "${VERSION_DECL}\n"
205 << extension << "layout(isolines, equal_spacing, ccw) in;\n"
206 << "layout(location = 0) out float out_color;\n"
207 << "void main (void)\n"
208 << "{\n"
209 << bdy.str() << " out_color = float(tempResult);\n"
210 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
211 << "}\n";
212
213 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
214 programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str());
215 }
216 else
217 {
218 DE_FATAL("Unsupported shader stage");
219 }
220 }
221
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)222 void initPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
223 {
224 std::string extension = (OPTYPE_CLUSTERED == caseDef.opType) ?
225 "#extension GL_KHR_shader_subgroup_clustered: enable\n" :
226 "#extension GL_KHR_shader_subgroup_quad: enable\n";
227
228 extension += "#extension GL_KHR_shader_subgroup_ballot: enable\n";
229
230 std::ostringstream bdy;
231
232 bdy << " uint tempResult = 0x1u;\n"
233 << " uvec4 mask = subgroupBallot(true);\n";
234
235 if (OPTYPE_CLUSTERED == caseDef.opType)
236 {
237 for (uint32_t i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2)
238 {
239 bdy << " if (gl_SubgroupSize >= " << i << "u)\n"
240 << " {\n"
241 << " uvec4 contribution = uvec4(0);\n"
242 << " uint modID = gl_SubgroupInvocationID % 32u;\n"
243 << " switch (gl_SubgroupInvocationID / 32u)\n"
244 << " {\n"
245 << " case 0u: contribution.x = 1u << modID; break;\n"
246 << " case 1u: contribution.y = 1u << modID; break;\n"
247 << " case 2u: contribution.z = 1u << modID; break;\n"
248 << " case 3u: contribution.w = 1u << modID; break;\n"
249 << " }\n"
250 << " uvec4 result = subgroupClusteredOr(contribution, " << i << "u);\n"
251 << " uint rootID = gl_SubgroupInvocationID & ~(" << i - 1 << "u);\n"
252 << " for (uint i = 0u; i < " << i << "u; i++)\n"
253 << " {\n"
254 << " uint nextID = rootID + i;\n"
255 << " if (subgroupBallotBitExtract(mask, nextID) ^^ subgroupBallotBitExtract(result, nextID))\n"
256 << " {\n"
257 << " tempResult = 0u;\n"
258 << " }\n"
259 << " }\n"
260 << " }\n";
261 }
262 }
263 else
264 {
265 bdy << " uint cluster[4] =\n"
266 << " uint[](\n"
267 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 0u),\n"
268 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 1u),\n"
269 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 2u),\n"
270 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 3u)\n"
271 << " );\n"
272 << " uint rootID = gl_SubgroupInvocationID & ~0x3u;\n"
273 << " for (uint i = 0u; i < 4u; i++)\n"
274 << " {\n"
275 << " uint nextID = rootID + i;\n"
276 << " if (subgroupBallotBitExtract(mask, nextID) && (cluster[i] != nextID))\n"
277 << " {\n"
278 << " tempResult = mask.x;\n"
279 << " }\n"
280 << " }\n";
281 }
282
283 if (SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
284 {
285 std::ostringstream src;
286
287 src << "${VERSION_DECL}\n"
288 << extension << "layout (${LOCAL_SIZE_X}, ${LOCAL_SIZE_Y}, ${LOCAL_SIZE_Z}) in;\n"
289 << "layout(binding = 0, std430) buffer Buffer0\n"
290 << "{\n"
291 << " uint result[];\n"
292 << "};\n"
293 << "\n"
294 << "void main (void)\n"
295 << "{\n"
296 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
297 << " highp uint offset = globalSize.x * ((globalSize.y * "
298 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
299 "gl_GlobalInvocationID.x;\n"
300 << bdy.str() << " result[offset] = tempResult;\n"
301 << "}\n";
302
303 programCollection.add("comp") << glu::ComputeSource(src.str());
304 }
305 else
306 {
307 {
308 const string vertex =
309 "${VERSION_DECL}\n" + extension +
310 "layout(binding = 0, std430) buffer Buffer0\n"
311 "{\n"
312 " uint result[];\n"
313 "} b0;\n"
314 "\n"
315 "void main (void)\n"
316 "{\n" +
317 bdy.str() +
318 " b0.result[gl_VertexID] = tempResult;\n"
319 " float pixelSize = 2.0f/1024.0f;\n"
320 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
321 " gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
322 "}\n";
323
324 programCollection.add("vert") << glu::VertexSource(vertex);
325 }
326
327 {
328 const string tesc = "${VERSION_DECL}\n" + extension +
329 "layout(vertices=1) out;\n"
330 "layout(binding = 1, std430) buffer Buffer1\n"
331 "{\n"
332 " uint result[];\n"
333 "} b1;\n"
334 "\n"
335 "void main (void)\n"
336 "{\n" +
337 bdy.str() +
338 " b1.result[gl_PrimitiveID] = 1u;\n"
339 " if (gl_InvocationID == 0)\n"
340 " {\n"
341 " gl_TessLevelOuter[0] = 1.0f;\n"
342 " gl_TessLevelOuter[1] = 1.0f;\n"
343 " }\n"
344 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
345 "}\n";
346
347 programCollection.add("tesc") << glu::TessellationControlSource(tesc);
348 }
349
350 {
351 const string tese = "${VERSION_DECL}\n" + extension +
352 "layout(isolines) in;\n"
353 "layout(binding = 2, std430) buffer Buffer2\n"
354 "{\n"
355 " uint result[];\n"
356 "} b2;\n"
357 "\n"
358 "void main (void)\n"
359 "{\n" +
360 bdy.str() +
361 " b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = 1u;\n"
362 " float pixelSize = 2.0f/1024.0f;\n"
363 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
364 "}\n";
365
366 programCollection.add("tese") << glu::TessellationEvaluationSource(tese);
367 }
368
369 {
370 const string geometry =
371 // version added by addGeometryShadersFromTemplate
372 extension +
373 "layout(${TOPOLOGY}) in;\n"
374 "layout(points, max_vertices = 1) out;\n"
375 "layout(binding = 3, std430) buffer Buffer3\n"
376 "{\n"
377 " uint result[];\n"
378 "} b3;\n"
379 "\n"
380 "void main (void)\n"
381 "{\n" +
382 bdy.str() +
383 " b3.result[gl_PrimitiveIDIn] = tempResult;\n"
384 " gl_Position = gl_in[0].gl_Position;\n"
385 " EmitVertex();\n"
386 " EndPrimitive();\n"
387 "}\n";
388
389 subgroups::addGeometryShadersFromTemplate(geometry, programCollection);
390 }
391
392 {
393 const string fragment = "${VERSION_DECL}\n" + extension +
394 "precision highp int;\n"
395 "layout(location = 0) out uint result;\n"
396 "void main (void)\n"
397 "{\n" +
398 bdy.str() +
399 " result = tempResult;\n"
400 "}\n";
401
402 programCollection.add("fragment") << glu::FragmentSource(fragment);
403 }
404 subgroups::addNoSubgroupShader(programCollection);
405 }
406 }
407
supportedCheck(Context & context,CaseDefinition caseDef)408 void supportedCheck(Context &context, CaseDefinition caseDef)
409 {
410 if (!subgroups::isSubgroupSupported(context))
411 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
412
413 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_BALLOT_BIT))
414 {
415 TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
416 }
417
418 if (OPTYPE_CLUSTERED == caseDef.opType)
419 {
420 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_CLUSTERED_BIT))
421 {
422 TCU_THROW(NotSupportedError, "Subgroup shape tests require that clustered operations are supported!");
423 }
424 }
425
426 if (OPTYPE_QUAD == caseDef.opType)
427 {
428 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_QUAD_BIT))
429 {
430 TCU_THROW(NotSupportedError, "Subgroup shape tests require that quad operations are supported!");
431 }
432 }
433 }
434
noSSBOtest(Context & context,const CaseDefinition caseDef)435 tcu::TestStatus noSSBOtest(Context &context, const CaseDefinition caseDef)
436 {
437 if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
438 {
439 if (subgroups::areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
440 {
441 return tcu::TestStatus::fail("Shader stage " + subgroups::getShaderStageName(caseDef.shaderStage) +
442 " is required to support subgroup operations!");
443 }
444 else
445 {
446 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
447 }
448 }
449
450 if (SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
451 return subgroups::makeVertexFrameBufferTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
452 else if (SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
453 return subgroups::makeGeometryFrameBufferTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
454 else if (SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
455 return subgroups::makeTessellationEvaluationFrameBufferTest(
456 context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, SHADER_STAGE_TESS_CONTROL_BIT);
457 else if (SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
458 return subgroups::makeTessellationEvaluationFrameBufferTest(
459 context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, SHADER_STAGE_TESS_EVALUATION_BIT);
460 else
461 TCU_THROW(InternalError, "Unhandled shader stage");
462 }
463
test(Context & context,const CaseDefinition caseDef)464 tcu::TestStatus test(Context &context, const CaseDefinition caseDef)
465 {
466 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_BASIC_BIT))
467 {
468 return tcu::TestStatus::fail("Subgroup feature " +
469 subgroups::getSubgroupFeatureName(SUBGROUP_FEATURE_BASIC_BIT) +
470 " is a required capability!");
471 }
472
473 if (SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
474 {
475 if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
476 {
477 return tcu::TestStatus::fail("Shader stage " + subgroups::getShaderStageName(caseDef.shaderStage) +
478 " is required to support subgroup operations!");
479 }
480 return subgroups::makeComputeTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkComputeStage);
481 }
482 else
483 {
484 int supportedStages = context.getDeqpContext().getContextInfo().getInt(GL_SUBGROUP_SUPPORTED_STAGES_KHR);
485
486 ShaderStageFlags stages = (ShaderStageFlags)(caseDef.shaderStage & supportedStages);
487
488 if (SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
489 {
490 if ((stages & SHADER_STAGE_FRAGMENT_BIT) == 0)
491 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
492 else
493 stages = SHADER_STAGE_FRAGMENT_BIT;
494 }
495
496 if ((ShaderStageFlags)0u == stages)
497 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
498
499 return subgroups::allStages(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages);
500 }
501 }
502 } // namespace
503
createSubgroupsShapeTests(deqp::Context & testCtx)504 deqp::TestCaseGroup *createSubgroupsShapeTests(deqp::Context &testCtx)
505 {
506 de::MovePtr<deqp::TestCaseGroup> graphicGroup(
507 new deqp::TestCaseGroup(testCtx, "graphics", "Subgroup shape category tests: graphics"));
508 de::MovePtr<deqp::TestCaseGroup> computeGroup(
509 new deqp::TestCaseGroup(testCtx, "compute", "Subgroup shape category tests: compute"));
510 de::MovePtr<deqp::TestCaseGroup> framebufferGroup(
511 new deqp::TestCaseGroup(testCtx, "framebuffer", "Subgroup shape category tests: framebuffer"));
512
513 const ShaderStageFlags stages[] = {
514 SHADER_STAGE_VERTEX_BIT,
515 SHADER_STAGE_TESS_EVALUATION_BIT,
516 SHADER_STAGE_TESS_CONTROL_BIT,
517 SHADER_STAGE_GEOMETRY_BIT,
518 };
519
520 for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
521 {
522 const std::string op = de::toLower(getOpTypeName(opTypeIndex));
523
524 {
525 const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_COMPUTE_BIT};
526 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(computeGroup.get(), op, "", supportedCheck,
527 initPrograms, test, caseDef);
528 }
529
530 {
531 const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_ALL_GRAPHICS};
532 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(graphicGroup.get(), op, "", supportedCheck,
533 initPrograms, test, caseDef);
534 }
535
536 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
537 {
538 const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]};
539 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(
540 framebufferGroup.get(), op + "_" + getShaderStageName(caseDef.shaderStage), "", supportedCheck,
541 initFrameBufferPrograms, noSSBOtest, caseDef);
542 }
543 }
544
545 de::MovePtr<deqp::TestCaseGroup> group(new deqp::TestCaseGroup(testCtx, "shape", "Subgroup shape category tests"));
546
547 group->addChild(graphicGroup.release());
548 group->addChild(computeGroup.release());
549 group->addChild(framebufferGroup.release());
550
551 return group.release();
552 }
553
554 } // namespace subgroups
555 } // namespace glc
556