• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  */ /*!
21  * \file
22  * \brief VK_EXT_subgroup_size_control Tests
23  */ /*--------------------------------------------------------------------*/
24 
25 #include "vktSubgroupsSizeControlTests.hpp"
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "tcuTestLog.hpp"
29 
30 #include <string>
31 #include <vector>
32 #include <algorithm>
33 
34 using namespace tcu;
35 using namespace std;
36 using namespace vk;
37 using namespace vkt;
38 
39 namespace
40 {
41 
42 enum RequiredSubgroupSizeMode
43 {
44     REQUIRED_SUBGROUP_SIZE_NONE = 0,
45     REQUIRED_SUBGROUP_SIZE_MIN  = 1,
46     REQUIRED_SUBGROUP_SIZE_MAX  = 2,
47 };
48 
49 struct CaseDefinition
50 {
51     uint32_t pipelineShaderStageCreateFlags;
52     VkShaderStageFlags shaderStage;
53     bool requiresBallot;
54     uint32_t requiredSubgroupSizeMode;
55     de::SharedPtr<bool> geometryPointSizeSupported;
56     SpirvVersion spirvVersion;
57 
hasFullSubgroupsFlag__anon4227effc0111::CaseDefinition58     bool hasFullSubgroupsFlag(void) const
59     {
60         return ((pipelineShaderStageCreateFlags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) !=
61                 0u);
62     }
63 
shaderUsesFullSubgroups__anon4227effc0111::CaseDefinition64     bool shaderUsesFullSubgroups(void) const
65     {
66         return (hasFullSubgroupsFlag() || (spirvVersion >= SPIRV_VERSION_1_6));
67     }
68 };
69 
70 struct TestParams
71 {
72     bool useSpirv16;
73     bool flagsEnabled;
74     string postfix;
75 };
76 
77 struct internalDataStruct
78 {
79     const Context *context;
80     struct CaseDefinition caseDef;
81     const uint32_t requiredSubgroupSize;
82     const bool
83         isRequiredSubgroupSize; // Indicates if the test uses VkPipelineShaderStageRequiredSubgroupSizeCreateInfo.
84 };
85 
makeDeBool(bool value)86 inline bool makeDeBool(bool value)
87 {
88     return (value ? true : false);
89 }
90 
getLocalSizes(const uint32_t maxWorkGroupSize[3],uint32_t maxWorkGroupInvocations,uint32_t numWorkGroupInvocations)91 UVec3 getLocalSizes(const uint32_t maxWorkGroupSize[3], uint32_t maxWorkGroupInvocations,
92                     uint32_t numWorkGroupInvocations)
93 {
94     DE_ASSERT(numWorkGroupInvocations <= maxWorkGroupInvocations);
95     DE_UNREF(maxWorkGroupInvocations); // For release builds.
96 
97     const uint32_t localSizeX = de::gcd(numWorkGroupInvocations, maxWorkGroupSize[0]);
98     const uint32_t localSizeY = de::gcd(std::max(numWorkGroupInvocations / localSizeX, 1u), maxWorkGroupSize[1]);
99     const uint32_t localSizeZ = std::max(numWorkGroupInvocations / (localSizeX * localSizeY), 1u);
100 
101     return UVec3(localSizeX, localSizeY, localSizeZ);
102 }
103 
getRequiredSubgroupSizeFromMode(Context & context,const CaseDefinition & caseDef,const VkPhysicalDeviceSubgroupSizeControlProperties & subgroupSizeControlProperties)104 uint32_t getRequiredSubgroupSizeFromMode(
105     Context &context, const CaseDefinition &caseDef,
106 #ifndef CTS_USES_VULKANSC
107     const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties)
108 #else
109     const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT &subgroupSizeControlProperties)
110 #endif // CTS_USES_VULKANSC
111 {
112     switch (caseDef.requiredSubgroupSizeMode)
113     {
114     case REQUIRED_SUBGROUP_SIZE_MAX:
115         return subgroupSizeControlProperties.maxSubgroupSize;
116     case REQUIRED_SUBGROUP_SIZE_MIN:
117         return subgroupSizeControlProperties.minSubgroupSize;
118     case REQUIRED_SUBGROUP_SIZE_NONE:
119         return subgroups::getSubgroupSize(context);
120     default:
121         TCU_THROW(NotSupportedError, "Unsupported Subgroup size");
122     }
123 }
124 
checkVertexPipelineStages(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t)125 static bool checkVertexPipelineStages(const void *internalData, vector<const void *> datas, uint32_t width, uint32_t)
126 {
127     const struct internalDataStruct *checkInternalData =
128         reinterpret_cast<const struct internalDataStruct *>(internalData);
129     const Context *context                    = checkInternalData->context;
130     const auto &subgroupSizeControlProperties = context->getSubgroupSizeControlProperties();
131 
132     TestLog &log         = context->getTestContext().getLog();
133     const uint32_t *data = reinterpret_cast<const uint32_t *>(datas[0]);
134 
135     for (uint32_t i = 0; i < width; i++)
136     {
137         if (data[i] > subgroupSizeControlProperties.maxSubgroupSize ||
138             data[i] < subgroupSizeControlProperties.minSubgroupSize)
139         {
140             log << TestLog::Message << "gl_SubgroupSize (" << data[i] << ") value is outside limits ("
141                 << subgroupSizeControlProperties.minSubgroupSize << ", "
142                 << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
143 
144             return false;
145         }
146 
147         if (checkInternalData->isRequiredSubgroupSize && data[i] != checkInternalData->requiredSubgroupSize)
148         {
149             log << TestLog::Message << "gl_SubgroupSize (" << data[i]
150                 << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize
151                 << ")" << TestLog::EndMessage;
152 
153             return false;
154         }
155     }
156 
157     return true;
158 }
159 
checkFragmentPipelineStages(const void * internalData,vector<const void * > datas,uint32_t width,uint32_t height,uint32_t)160 static bool checkFragmentPipelineStages(const void *internalData, vector<const void *> datas, uint32_t width,
161                                         uint32_t height, uint32_t)
162 {
163     const struct internalDataStruct *checkInternalData =
164         reinterpret_cast<const struct internalDataStruct *>(internalData);
165     const Context *context                    = checkInternalData->context;
166     const auto &subgroupSizeControlProperties = context->getSubgroupSizeControlProperties();
167     TestLog &log                              = context->getTestContext().getLog();
168     const uint32_t *data                      = reinterpret_cast<const uint32_t *>(datas[0]);
169 
170     for (uint32_t x = 0u; x < width; ++x)
171     {
172         for (uint32_t y = 0u; y < height; ++y)
173         {
174             const uint32_t ndx = (x * height + y);
175 
176             if (data[ndx] > subgroupSizeControlProperties.maxSubgroupSize ||
177                 data[ndx] < subgroupSizeControlProperties.minSubgroupSize)
178             {
179                 log << TestLog::Message << "gl_SubgroupSize (" << data[ndx] << ") value is outside limits ("
180                     << subgroupSizeControlProperties.minSubgroupSize << ", "
181                     << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
182 
183                 return false;
184             }
185 
186             if (checkInternalData->isRequiredSubgroupSize && data[ndx] != checkInternalData->requiredSubgroupSize)
187             {
188                 log << TestLog::Message << "gl_SubgroupSize (" << data[ndx]
189                     << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize
190                     << ")" << TestLog::EndMessage;
191 
192                 return false;
193             }
194         }
195     }
196     return true;
197 }
198 
checkCompute(const void * internalData,vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)199 static bool checkCompute(const void *internalData, vector<const void *> datas, const uint32_t numWorkgroups[3],
200                          const uint32_t localSize[3], uint32_t)
201 {
202     const struct internalDataStruct *checkInternalData =
203         reinterpret_cast<const struct internalDataStruct *>(internalData);
204     const Context *context                    = checkInternalData->context;
205     const auto &subgroupSizeControlProperties = context->getSubgroupSizeControlProperties();
206     TestLog &log                              = context->getTestContext().getLog();
207     const uint32_t globalSizeX                = numWorkgroups[0] * localSize[0];
208     const uint32_t globalSizeY                = numWorkgroups[1] * localSize[1];
209     const uint32_t globalSizeZ                = numWorkgroups[2] * localSize[2];
210     const uint32_t width                      = globalSizeX * globalSizeY * globalSizeZ;
211     const uint32_t *data                      = reinterpret_cast<const uint32_t *>(datas[0]);
212 
213     for (uint32_t i = 0; i < width; i++)
214     {
215         if (data[i] > subgroupSizeControlProperties.maxSubgroupSize ||
216             data[i] < subgroupSizeControlProperties.minSubgroupSize)
217         {
218             log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
219                 << "gl_SubgroupSize (" << data[i] << ") value is outside limits ("
220                 << subgroupSizeControlProperties.minSubgroupSize << ", "
221                 << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
222 
223             return false;
224         }
225 
226         if (checkInternalData->isRequiredSubgroupSize && data[i] != checkInternalData->requiredSubgroupSize)
227         {
228             log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
229                 << "gl_SubgroupSize (" << data[i] << ") is not equal to the required subgroup size value ("
230                 << checkInternalData->requiredSubgroupSize << ")" << TestLog::EndMessage;
231 
232             return false;
233         }
234     }
235 
236     return true;
237 }
238 
checkComputeRequireFull(const void * internalData,vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)239 static bool checkComputeRequireFull(const void *internalData, vector<const void *> datas,
240                                     const uint32_t numWorkgroups[3], const uint32_t localSize[3], uint32_t)
241 {
242     const struct internalDataStruct *checkInternalData =
243         reinterpret_cast<const struct internalDataStruct *>(internalData);
244     const Context *context                    = checkInternalData->context;
245     const auto &subgroupSizeControlProperties = context->getSubgroupSizeControlProperties();
246     TestLog &log                              = context->getTestContext().getLog();
247     const uint32_t globalSizeX                = numWorkgroups[0] * localSize[0];
248     const uint32_t globalSizeY                = numWorkgroups[1] * localSize[1];
249     const uint32_t globalSizeZ                = numWorkgroups[2] * localSize[2];
250     const uint32_t width                      = globalSizeX * globalSizeY * globalSizeZ;
251     const UVec4 *data                         = reinterpret_cast<const UVec4 *>(datas[0]);
252     const uint32_t numSubgroups =
253         (localSize[0] * localSize[1] * localSize[2]) / checkInternalData->requiredSubgroupSize;
254     const bool exactSubgroupSize =
255         (checkInternalData->caseDef.shaderUsesFullSubgroups() && checkInternalData->isRequiredSubgroupSize);
256 
257     for (uint32_t i = 0; i < width; i++)
258     {
259         if (data[i].x() > subgroupSizeControlProperties.maxSubgroupSize ||
260             data[i].x() < subgroupSizeControlProperties.minSubgroupSize)
261         {
262             log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
263                 << "gl_SubgroupSize value ( " << data[i].x() << ") is outside limits ["
264                 << subgroupSizeControlProperties.minSubgroupSize << ", "
265                 << subgroupSizeControlProperties.maxSubgroupSize << "]" << TestLog::EndMessage;
266             return false;
267         }
268 
269         if (data[i].x() != data[i].y())
270         {
271             log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
272                 << "gl_SubgroupSize ( " << data[i].x() << ") does not match the active number of subgroup invocations ("
273                 << data[i].y() << ")" << TestLog::EndMessage;
274             return false;
275         }
276 
277         if (exactSubgroupSize && data[i].x() != checkInternalData->requiredSubgroupSize)
278         {
279             log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
280                 << "expected subgroupSize (" << checkInternalData->requiredSubgroupSize
281                 << ") doesn't match gl_SubgroupSize ( " << data[i].x() << ")" << TestLog::EndMessage;
282             return false;
283         }
284 
285         if (exactSubgroupSize && data[i].z() != numSubgroups)
286         {
287             log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
288                 << "expected number of subgroups dispatched (" << numSubgroups << ") doesn't match gl_NumSubgroups ("
289                 << data[i].z() << ")" << TestLog::EndMessage;
290             return false;
291         }
292     }
293 
294     return true;
295 }
296 
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)297 void initFrameBufferPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
298 {
299     const ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, caseDef.spirvVersion, 0u);
300 
301     if (VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
302         subgroups::setFragmentShaderFrameBuffer(programCollection);
303 
304     if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage && VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
305         subgroups::setVertexShaderFrameBuffer(programCollection);
306 
307     string bdyStr = "uint tempResult = gl_SubgroupSize;\n";
308 
309     if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
310     {
311         ostringstream vertex;
312 
313         vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
314                << "#extension GL_KHR_shader_subgroup_basic: enable\n"
315                << "layout(location = 0) in highp vec4 in_position;\n"
316                << "layout(location = 0) out float out_color;\n"
317                << "\n"
318                << "void main (void)\n"
319                << "{\n"
320                << bdyStr << "  out_color = float(tempResult);\n"
321                << "  gl_Position = in_position;\n"
322                << "  gl_PointSize = 1.0f;\n"
323                << "}\n";
324 
325         programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
326     }
327     else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
328     {
329         ostringstream geometry;
330 
331         geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
332                  << "#extension GL_KHR_shader_subgroup_basic: enable\n"
333                  << "layout(points) in;\n"
334                  << "layout(points, max_vertices = 1) out;\n"
335                  << "layout(location = 0) out float out_color;\n"
336                  << "void main (void)\n"
337                  << "{\n"
338                  << bdyStr << "  out_color = float(tempResult);\n"
339                  << "  gl_Position = gl_in[0].gl_Position;\n"
340                  << "  gl_PointSize = 1.0f;"
341                  << "  EmitVertex();\n"
342                  << "  EndPrimitive();\n"
343                  << "}\n";
344 
345         programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
346     }
347     else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
348     {
349         ostringstream controlSource;
350 
351         controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
352                       << "#extension GL_KHR_shader_subgroup_basic: enable\n"
353                       << "layout(vertices = 2) out;\n"
354                       << "layout(location = 0) out float out_color[];\n"
355                       << "\n"
356                       << "void main (void)\n"
357                       << "{\n"
358                       << "  if (gl_InvocationID == 0)\n"
359                       << "  {\n"
360                       << "    gl_TessLevelOuter[0] = 1.0f;\n"
361                       << "    gl_TessLevelOuter[1] = 1.0f;\n"
362                       << "  }\n"
363                       << bdyStr << "  out_color[gl_InvocationID ] = float(tempResult);\n"
364                       << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
365                       << "}\n";
366 
367         programCollection.glslSources.add("tesc")
368             << glu::TessellationControlSource(controlSource.str()) << buildOptions;
369         subgroups::setTesEvalShaderFrameBuffer(programCollection);
370     }
371     else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
372     {
373         ostringstream evaluationSource;
374         evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
375                          << "#extension GL_KHR_shader_subgroup_basic: enable\n"
376                          << "layout(isolines, equal_spacing, ccw ) in;\n"
377                          << "layout(location = 0) out float out_color;\n"
378                          << "void main (void)\n"
379                          << "{\n"
380                          << bdyStr << "  out_color  = float(tempResult);\n"
381                          << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
382                          << "}\n";
383 
384         subgroups::setTesCtrlShaderFrameBuffer(programCollection);
385         programCollection.glslSources.add("tese")
386             << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
387     }
388     else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
389     {
390         const string vertex = string(glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)) +
391                               "\n"
392                               "void main (void)\n"
393                               "{\n"
394                               "  vec2 uv = vec2(float(gl_VertexIndex & 1), float((gl_VertexIndex >> 1) & 1));\n"
395                               "  gl_Position = vec4(uv * 4.0f -2.0f, 0.0f, 1.0f);\n"
396                               "  gl_PointSize = 1.0f;\n"
397                               "}\n";
398         programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
399 
400         ostringstream fragmentSource;
401 
402         fragmentSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
403                        << "precision highp int;\n"
404                        << "#extension GL_KHR_shader_subgroup_basic: enable\n"
405                        << "layout(location = 0) out uint out_color;\n"
406                        << "void main()\n"
407                        << "{\n"
408                        << bdyStr << "     out_color = tempResult;\n"
409                        << "}\n";
410 
411         programCollection.glslSources.add("fragment") << glu::FragmentSource(fragmentSource.str()) << buildOptions;
412     }
413     else
414     {
415         DE_FATAL("Unsupported shader stage");
416     }
417 }
418 
getExtHeader(const CaseDefinition &)419 string getExtHeader(const CaseDefinition &)
420 {
421     return "#extension GL_KHR_shader_subgroup_basic: enable\n";
422 }
423 
getPerStageHeadDeclarations(const CaseDefinition & caseDef)424 vector<string> getPerStageHeadDeclarations(const CaseDefinition &caseDef)
425 {
426     const uint32_t stageCount = subgroups::getStagesCount(caseDef.shaderStage);
427     const bool fragment       = (caseDef.shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) != 0;
428     vector<string> result(stageCount, string());
429 
430     if (fragment)
431         result.reserve(result.size() + 1);
432 
433     for (size_t i = 0; i < result.size(); ++i)
434     {
435         result[i] = "layout(set = 0, binding = " + de::toString(i) +
436                     ", std430) buffer Buffer1\n"
437                     "{\n"
438                     "  uint result[];\n"
439                     "};\n";
440     }
441 
442     if (fragment)
443     {
444         const string fragPart = "layout(location = 0) out uint result;\n";
445 
446         result.push_back(fragPart);
447     }
448 
449     return result;
450 }
451 
getTestSource(const CaseDefinition &)452 string getTestSource(const CaseDefinition &)
453 {
454     return "  uint tempResult = gl_SubgroupSize;\n"
455            "  tempRes = tempResult;\n";
456 }
457 
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)458 void initPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
459 {
460     ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, caseDef.spirvVersion, 0u,
461                                     (caseDef.spirvVersion == vk::SPIRV_VERSION_1_4));
462     const string extHeader                = getExtHeader(caseDef);
463     const string testSrc                  = getTestSource(caseDef);
464     const vector<string> headDeclarations = getPerStageHeadDeclarations(caseDef);
465 
466     subgroups::initStdPrograms(programCollection, buildOptions, caseDef.shaderStage, VK_FORMAT_R32_UINT,
467                                *caseDef.geometryPointSizeSupported, extHeader, testSrc, "", headDeclarations);
468 }
469 
initProgramsRequireFull(SourceCollections & programCollection,CaseDefinition caseDef)470 void initProgramsRequireFull(SourceCollections &programCollection, CaseDefinition caseDef)
471 {
472     if (VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
473         DE_FATAL("Unsupported shader stage");
474 
475     ostringstream src;
476 
477     src << "#version 450\n"
478         << "#extension GL_KHR_shader_subgroup_basic: enable\n"
479         << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
480         << "layout (local_size_x_id = 0, local_size_y_id = 1, "
481            "local_size_z_id = 2) in;\n"
482         << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
483         << "{\n"
484         << "  uvec4 result[];\n"
485         << "};\n"
486         << "\n"
487         << "void main (void)\n"
488         << "{\n"
489         << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
490         << "  highp uint offset = globalSize.x * ((globalSize.y * "
491            "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
492            "gl_GlobalInvocationID.x;\n"
493         << "   result[offset].x = gl_SubgroupSize;\n" // save the subgroup size value
494         << "   uint numActive = subgroupBallotBitCount(subgroupBallot(true));\n"
495         << "   result[offset].y = numActive;\n"       // save the number of active subgroup invocations
496         << "   result[offset].z = gl_NumSubgroups;\n" // save the number of subgroups dispatched.
497         << "}\n";
498 
499     programCollection.glslSources.add("comp")
500         << glu::ComputeSource(src.str())
501         << ShaderBuildOptions(programCollection.usedVulkanVersion, caseDef.spirvVersion, 0u);
502 }
503 
supportedCheck(Context & context)504 void supportedCheck(Context &context)
505 {
506     if (!subgroups::isSubgroupSupported(context))
507         TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
508 
509     context.requireDeviceFunctionality("VK_EXT_subgroup_size_control");
510 }
511 
supportedCheckFeatures(Context & context,CaseDefinition caseDef)512 void supportedCheckFeatures(Context &context, CaseDefinition caseDef)
513 {
514     supportedCheck(context);
515 
516     if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
517     {
518         TCU_THROW(NotSupportedError, "Shader stage is required to support subgroup operations!");
519     }
520 
521     if (caseDef.shaderStage == VK_SHADER_STAGE_ALL_GRAPHICS)
522     {
523         const VkPhysicalDeviceFeatures &features = context.getDeviceFeatures();
524 
525         if (!features.tessellationShader || !features.geometryShader)
526             TCU_THROW(NotSupportedError, "Device does not support tessellation or geometry shaders");
527     }
528 
529     if (caseDef.requiresBallot &&
530         !subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
531     {
532         TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
533     }
534 
535     if (caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE ||
536         caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
537     {
538         if (context.getSubgroupSizeControlFeatures().subgroupSizeControl == false)
539             TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
540 
541         if (caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE)
542         {
543             const auto &subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
544             if ((subgroupSizeControlProperties.requiredSubgroupSizeStages & caseDef.shaderStage) != caseDef.shaderStage)
545                 TCU_THROW(NotSupportedError,
546                           "Device does not support setting required subgroup size for the stages selected");
547         }
548     }
549 
550     if (caseDef.hasFullSubgroupsFlag())
551     {
552         if (context.getSubgroupSizeControlFeatures().computeFullSubgroups == false)
553             TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
554     }
555 
556     *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
557 
558 #ifndef CTS_USES_VULKANSC
559     if (isAllRayTracingStages(caseDef.shaderStage))
560     {
561         context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
562     }
563     else if (isAllMeshShadingStages(caseDef.shaderStage))
564     {
565         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
566         context.requireDeviceFunctionality("VK_EXT_mesh_shader");
567 
568         if ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u)
569         {
570             const auto &features = context.getMeshShaderFeaturesEXT();
571             if (!features.taskShader)
572                 TCU_THROW(NotSupportedError, "Task shaders not supported");
573         }
574     }
575 #endif // CTS_USES_VULKANSC
576 
577     if (caseDef.spirvVersion > vk::getMaxSpirvVersionForVulkan(context.getUsedApiVersion()))
578         TCU_THROW(NotSupportedError, "Shader requires SPIR-V version higher than available");
579 }
580 
supportedCheckFeaturesShader(Context & context,CaseDefinition caseDef)581 void supportedCheckFeaturesShader(Context &context, CaseDefinition caseDef)
582 {
583     supportedCheckFeatures(context, caseDef);
584 
585     subgroups::supportedCheckShader(context, caseDef.shaderStage);
586 }
587 
noSSBOtest(Context & context,const CaseDefinition caseDef)588 TestStatus noSSBOtest(Context &context, const CaseDefinition caseDef)
589 {
590     const VkFormat format                        = VK_FORMAT_R32_UINT;
591     const uint32_t &flags                        = caseDef.pipelineShaderStageCreateFlags;
592     const struct internalDataStruct internalData = {
593         &context,
594         caseDef,
595         0u,
596         false,
597     };
598 
599     switch (caseDef.shaderStage)
600     {
601     case VK_SHADER_STAGE_VERTEX_BIT:
602         return subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(context, format, nullptr, 0, &internalData,
603                                                                         checkVertexPipelineStages, flags, 0u);
604     case VK_SHADER_STAGE_GEOMETRY_BIT:
605         return subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, nullptr, 0, &internalData,
606                                                                           checkVertexPipelineStages, flags, 0u);
607     case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
608         return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
609             context, format, nullptr, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, 0u);
610     case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
611         return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
612             context, format, nullptr, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, 0u);
613     case VK_SHADER_STAGE_FRAGMENT_BIT:
614         return subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, nullptr, 0, &internalData,
615                                                                           checkFragmentPipelineStages, flags, 0u);
616     default:
617         TCU_THROW(InternalError, "Unhandled shader stage");
618     }
619 }
620 
test(Context & context,const CaseDefinition caseDef)621 TestStatus test(Context &context, const CaseDefinition caseDef)
622 {
623     if (isAllComputeStages(caseDef.shaderStage))
624     {
625         const uint32_t numWorkgroups[3]      = {1, 1, 1};
626         const uint32_t subgroupSize          = subgroups::getSubgroupSize(context);
627         const auto &physicalDeviceProperties = context.getDeviceProperties();
628         const auto &maxWorkGroupSize         = physicalDeviceProperties.limits.maxComputeWorkGroupSize;
629         const auto &maxInvocations           = physicalDeviceProperties.limits.maxComputeWorkGroupInvocations;
630         // Calculate the local workgroup sizes to exercise the maximum supported by the driver
631         const UVec3 localSize                = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
632         const uint32_t localSizesToTestCount = 16;
633         const uint32_t localSizesToTest[localSizesToTestCount][3] = {
634             {1, 1, 1},
635             {32, 4, 1},
636             {32, 1, 4},
637             {1, 32, 4},
638             {1, 4, 32},
639             {4, 1, 32},
640             {4, 32, 1},
641             {subgroupSize, 1, 1},
642             {1, subgroupSize, 1},
643             {1, 1, subgroupSize},
644             {3, 5, 7},
645             {128, 1, 1},
646             {1, 128, 1},
647             {1, 1, 64},
648             {localSize.x(), localSize.y(), localSize.z()},
649             {1, 1, 1} // Isn't used, just here to make double buffering checks easier
650         };
651         const struct internalDataStruct internalData = {
652             &context,
653             caseDef,
654             subgroupSize,
655             false,
656         };
657 
658         return subgroups::makeComputeTestRequiredSubgroupSize(
659             context, VK_FORMAT_R32_UINT, nullptr, 0, &internalData, checkCompute,
660             caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
661             subgroupSize, localSizesToTest, localSizesToTestCount);
662     }
663 #ifndef CTS_USES_VULKANSC
664     else if (isAllMeshShadingStages(caseDef.shaderStage))
665     {
666         const bool isMesh = ((caseDef.shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
667         const bool isTask = ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
668 
669         DE_ASSERT(isMesh != isTask);
670         DE_UNREF(isTask); // For release builds.
671 
672         const uint32_t numWorkgroups[3] = {1, 1, 1};
673         const uint32_t subgroupSize     = subgroups::getSubgroupSize(context);
674         const auto &meshProperties      = context.getMeshShaderPropertiesEXT();
675         const auto &maxWorkGroupSize =
676             (isMesh ? meshProperties.maxMeshWorkGroupSize : meshProperties.maxTaskWorkGroupSize);
677         const auto &maxInvocations =
678             (isMesh ? meshProperties.maxMeshWorkGroupInvocations : meshProperties.maxTaskWorkGroupInvocations);
679         // Calculate the local workgroup sizes to exercise the maximum supported by the driver
680         const UVec3 localSize                = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
681         const uint32_t localSizesToTestCount = 16;
682         const uint32_t localSizesToTest[localSizesToTestCount][3] = {
683             {1, 1, 1},
684             {32, 4, 1},
685             {32, 1, 4},
686             {1, 32, 4},
687             {1, 4, 32},
688             {4, 1, 32},
689             {4, 32, 1},
690             {subgroupSize, 1, 1},
691             {1, subgroupSize, 1},
692             {1, 1, subgroupSize},
693             {3, 5, 7},
694             {128, 1, 1},
695             {1, 128, 1},
696             {1, 1, 64},
697             {localSize.x(), localSize.y(), localSize.z()},
698             {1, 1, 1} // Isn't used, just here to make double buffering checks easier
699         };
700         const struct internalDataStruct internalData = {
701             &context,
702             caseDef,
703             subgroupSize,
704             false,
705         };
706 
707         return subgroups::makeMeshTestRequiredSubgroupSize(
708             context, VK_FORMAT_R32_UINT, nullptr, 0, &internalData, checkCompute,
709             caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
710             subgroupSize, localSizesToTest, localSizesToTestCount);
711     }
712 #endif // CTS_USES_VULKANSC
713     else if (isAllGraphicsStages(caseDef.shaderStage))
714     {
715         const VkShaderStageFlags stages = subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
716         struct internalDataStruct internalData = {
717             &context,
718             caseDef,
719             0u,
720             false,
721         };
722 
723         return subgroups::allStagesRequiredSubgroupSize(
724             context, VK_FORMAT_R32_UINT, nullptr, 0, &internalData, checkVertexPipelineStages, stages,
725             caseDef.pipelineShaderStageCreateFlags, caseDef.pipelineShaderStageCreateFlags,
726             caseDef.pipelineShaderStageCreateFlags, caseDef.pipelineShaderStageCreateFlags,
727             caseDef.pipelineShaderStageCreateFlags, nullptr);
728     }
729 #ifndef CTS_USES_VULKANSC
730     else if (isAllRayTracingStages(caseDef.shaderStage))
731     {
732         const VkShaderStageFlags stages = subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
733         const vector<uint32_t> flags(6, caseDef.pipelineShaderStageCreateFlags);
734         const struct internalDataStruct internalData = {
735             &context,
736             caseDef,
737             0u,
738             false,
739         };
740 
741         return subgroups::allRayTracingStagesRequiredSubgroupSize(context, VK_FORMAT_R32_UINT, nullptr, 0,
742                                                                   &internalData, checkVertexPipelineStages, stages,
743                                                                   flags.data(), nullptr);
744     }
745 #endif // CTS_USES_VULKANSC
746     else
747         TCU_THROW(InternalError, "Unknown stage or invalid stage set");
748 }
749 
testRequireFullSubgroups(Context & context,const CaseDefinition caseDef)750 TestStatus testRequireFullSubgroups(Context &context, const CaseDefinition caseDef)
751 {
752     DE_ASSERT(VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage);
753     DE_ASSERT(caseDef.requiredSubgroupSizeMode == REQUIRED_SUBGROUP_SIZE_NONE);
754 
755     const uint32_t numWorkgroups[3]                            = {1, 1, 1};
756     const auto &subgroupSizeControlProperties                  = context.getSubgroupSizeControlProperties();
757     const VkPhysicalDeviceProperties &physicalDeviceProperties = context.getDeviceProperties();
758     // Calculate the local workgroup sizes to exercise the maximum supported by the driver
759     const auto &maxWorkGroupSize = physicalDeviceProperties.limits.maxComputeWorkGroupSize;
760     const auto &maxInvocations   = physicalDeviceProperties.limits.maxComputeWorkGroupInvocations;
761     const UVec3 localSize        = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
762     const uint32_t subgroupSize  = subgroups::getSubgroupSize(context);
763     // For full subgroups and allow varying subgroup size, localsize X must be a multiple of maxSubgroupSize.
764     // We set local size X for this test to the maximum, regardless if allow varying subgroup size is enabled or not.
765     const uint32_t localSizesToTestCount                      = 7;
766     const uint32_t localSizesToTest[localSizesToTestCount][3] = {
767         {subgroupSizeControlProperties.maxSubgroupSize, 1, 1},
768         {subgroupSizeControlProperties.maxSubgroupSize, 4, 1},
769         {subgroupSizeControlProperties.maxSubgroupSize, 1, 4},
770         {subgroupSizeControlProperties.maxSubgroupSize * 2, 1, 2},
771         {subgroupSizeControlProperties.maxSubgroupSize * 4, 1, 1},
772         {localSize.x(), localSize.y(), localSize.z()},
773         {1, 1, 1} // Isn't used, just here to make double buffering checks easier
774     };
775     const struct internalDataStruct internalData = {
776         &context,
777         caseDef,
778         subgroupSize,
779         false,
780     };
781 
782     DE_ASSERT(caseDef.requiredSubgroupSizeMode == REQUIRED_SUBGROUP_SIZE_NONE);
783 
784     return subgroups::makeComputeTestRequiredSubgroupSize(
785         context, VK_FORMAT_R32G32B32A32_UINT, nullptr, 0, &internalData, checkComputeRequireFull,
786         caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
787         subgroupSize, localSizesToTest, localSizesToTestCount);
788 }
789 
testRequireSubgroupSize(Context & context,const CaseDefinition caseDef)790 TestStatus testRequireSubgroupSize(Context &context, const CaseDefinition caseDef)
791 {
792     if (isAllComputeStages(caseDef.shaderStage))
793     {
794         const uint32_t numWorkgroups[3]                            = {1, 1, 1};
795         const auto &subgroupSizeControlProperties                  = context.getSubgroupSizeControlProperties();
796         const VkPhysicalDeviceProperties &physicalDeviceProperties = context.getDeviceProperties();
797         const uint32_t requiredSubgroupSize =
798             getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
799         const uint64_t maxSubgroupLimitSize =
800             (uint64_t)requiredSubgroupSize * subgroupSizeControlProperties.maxComputeWorkgroupSubgroups;
801         const uint32_t maxTotalLocalSize = (uint32_t)min<uint64_t>(
802             maxSubgroupLimitSize, physicalDeviceProperties.limits.maxComputeWorkGroupInvocations);
803         const auto &maxWorkGroupSize          = physicalDeviceProperties.limits.maxComputeWorkGroupSize;
804         const auto &maxInvocations            = physicalDeviceProperties.limits.maxComputeWorkGroupInvocations;
805         const UVec3 localSize                 = getLocalSizes(maxWorkGroupSize, maxInvocations, maxTotalLocalSize);
806         const bool shaderUsesFullSubgroups    = caseDef.shaderUsesFullSubgroups();
807         const uint32_t localSizesToTest[5][3] = {
808             {localSize.x(), localSize.y(), localSize.z()},
809             {requiredSubgroupSize, 1, 1},
810             {1, requiredSubgroupSize, 1},
811             {1, 1, requiredSubgroupSize},
812             {1, 1, 1} // Isn't used, just here to make double buffering checks easier
813         };
814 
815         // If the shader uses full subgroups, use only the first two entries so the local size in X is a multiple of the requested
816         // subgroup size, as required by the spec.
817         uint32_t localSizesToTestCount = 5;
818         if (shaderUsesFullSubgroups)
819             localSizesToTestCount = 3;
820 
821         const internalDataStruct internalData = {
822             &context,             //  const Context* context;
823             caseDef,              //  struct CaseDefinition caseDef;
824             requiredSubgroupSize, //  uint32_t requiredSubgroupSize;
825             true,                 // bool isRequiredSubgroupSize;
826         };
827 
828         // Depending on the flag and SPIR-V version we need to run one verification function or another.
829         const auto checkFunction = (shaderUsesFullSubgroups ? checkComputeRequireFull : checkCompute);
830 
831         return subgroups::makeComputeTestRequiredSubgroupSize(
832             context, VK_FORMAT_R32G32B32A32_UINT, nullptr, 0, &internalData, checkFunction,
833             caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
834             requiredSubgroupSize, localSizesToTest, localSizesToTestCount);
835     }
836 #ifndef CTS_USES_VULKANSC
837     else if (isAllMeshShadingStages(caseDef.shaderStage))
838     {
839         const auto isMesh = ((caseDef.shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
840         const auto isTask = ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
841 
842         DE_ASSERT(isMesh != isTask);
843         DE_UNREF(isTask); // For release builds.
844 
845         const uint32_t numWorkgroups[3]           = {1, 1, 1};
846         const auto &subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
847         const auto &meshProperties                = context.getMeshShaderPropertiesEXT();
848         const uint32_t requiredSubgroupSize =
849             getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
850         const auto &maxWorkGroupSize =
851             (isMesh ? meshProperties.maxMeshWorkGroupSize : meshProperties.maxTaskWorkGroupSize);
852         const auto &maxInvocations =
853             (isMesh ? meshProperties.maxMeshWorkGroupInvocations : meshProperties.maxTaskWorkGroupInvocations);
854         const UVec3 localSize                 = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
855         const bool shaderUsesFullSubgroups    = caseDef.shaderUsesFullSubgroups();
856         const uint32_t localSizesToTest[5][3] = {
857             {requiredSubgroupSize, 1, 1},
858             {1, requiredSubgroupSize, 1},
859             {1, 1, requiredSubgroupSize},
860             {localSize.x(), localSize.y(), localSize.z()},
861             {1, 1, 1} // Isn't used, just here to make double buffering checks easier
862         };
863 
864         // If the shader uses full subgroups, use only the first two entries so the local size in X is a multiple of the requested
865         // subgroup size, as required by the spec.
866         uint32_t localSizesToTestCount = 5;
867         if (shaderUsesFullSubgroups)
868             localSizesToTestCount = 3;
869 
870         const internalDataStruct internalData = {
871             &context,             //  const Context* context;
872             caseDef,              //  struct CaseDefinition caseDef;
873             requiredSubgroupSize, //  uint32_t requiredSubgroupSize;
874             true,                 //  bool isRequiredSubgroupSize;
875         };
876 
877         // Depending on the flag and SPIR-V version we need to run one verification function or another.
878         const auto checkFunction = (shaderUsesFullSubgroups ? checkComputeRequireFull : checkCompute);
879 
880         return subgroups::makeMeshTestRequiredSubgroupSize(
881             context, VK_FORMAT_R32G32B32A32_UINT, nullptr, 0, &internalData, checkFunction,
882             caseDef.pipelineShaderStageCreateFlags, numWorkgroups, makeDeBool(internalData.isRequiredSubgroupSize),
883             requiredSubgroupSize, localSizesToTest, localSizesToTestCount);
884     }
885 #endif // CTS_USES_VULKANSC
886     else if (isAllGraphicsStages(caseDef.shaderStage))
887     {
888         const VkShaderStageFlags stages = subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
889         const auto &subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
890         const uint32_t requiredSubgroupSize =
891             getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
892         const uint32_t requiredSubgroupSizes[5] = {requiredSubgroupSize, requiredSubgroupSize, requiredSubgroupSize,
893                                                    requiredSubgroupSize, requiredSubgroupSize};
894         const internalDataStruct internalData   = {
895             &context,             //  const Context* context;
896             caseDef,              //  struct CaseDefinition caseDef;
897             requiredSubgroupSize, //  uint32_t requiredSubgroupSize;
898             true,                 //  bool isRequiredSubgroupSize;
899         };
900 
901         return subgroups::allStagesRequiredSubgroupSize(
902             context, VK_FORMAT_R32_UINT, nullptr, 0, &internalData, checkVertexPipelineStages, stages,
903             caseDef.pipelineShaderStageCreateFlags, caseDef.pipelineShaderStageCreateFlags,
904             caseDef.pipelineShaderStageCreateFlags, caseDef.pipelineShaderStageCreateFlags,
905             caseDef.pipelineShaderStageCreateFlags, requiredSubgroupSizes);
906     }
907 #ifndef CTS_USES_VULKANSC
908     else if (isAllRayTracingStages(caseDef.shaderStage))
909     {
910         const VkShaderStageFlags stages = subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
911         const VkPhysicalDeviceSubgroupSizeControlProperties &subgroupSizeControlProperties =
912             context.getSubgroupSizeControlProperties();
913         const uint32_t requiredSubgroupSize =
914             getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
915         const vector<uint32_t> flags(6, caseDef.pipelineShaderStageCreateFlags);
916         const vector<uint32_t> requiredSubgroupSizes(6, requiredSubgroupSize);
917         const struct internalDataStruct internalData = {
918             &context,             //  const Context* context;
919             caseDef,              //  struct CaseDefinition caseDef;
920             requiredSubgroupSize, //  uint32_t requiredSubgroupSize;
921             true,                 //  bool isRequiredSubgroupSize;
922         };
923 
924         return subgroups::allRayTracingStagesRequiredSubgroupSize(context, VK_FORMAT_R32_UINT, nullptr, 0,
925                                                                   &internalData, checkVertexPipelineStages, stages,
926                                                                   flags.data(), requiredSubgroupSizes.data());
927     }
928 #endif // CTS_USES_VULKANSC
929     else
930         TCU_THROW(InternalError, "Unknown stage or invalid stage set");
931 }
932 
noSSBOtestRequireSubgroupSize(Context & context,const CaseDefinition caseDef)933 TestStatus noSSBOtestRequireSubgroupSize(Context &context, const CaseDefinition caseDef)
934 {
935     const auto &subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
936     const uint32_t requiredSubgroupSize =
937         getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
938     const VkFormat format                  = VK_FORMAT_R32_UINT;
939     const uint32_t &flags                  = caseDef.pipelineShaderStageCreateFlags;
940     const uint32_t &size                   = requiredSubgroupSize;
941     struct internalDataStruct internalData = {
942         &context,
943         caseDef,
944         requiredSubgroupSize,
945         true,
946     };
947 
948     switch (caseDef.shaderStage)
949     {
950     case VK_SHADER_STAGE_VERTEX_BIT:
951         return subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(context, format, nullptr, 0, &internalData,
952                                                                         checkVertexPipelineStages, flags, size);
953     case VK_SHADER_STAGE_GEOMETRY_BIT:
954         return subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, nullptr, 0, &internalData,
955                                                                           checkVertexPipelineStages, flags, size);
956     case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
957         return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
958             context, format, nullptr, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, size);
959     case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
960         return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
961             context, format, nullptr, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, size);
962     case VK_SHADER_STAGE_FRAGMENT_BIT:
963         return subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, nullptr, 0, &internalData,
964                                                                           checkFragmentPipelineStages, flags, size);
965     default:
966         TCU_THROW(InternalError, "Unhandled shader stage");
967     }
968 }
969 
testSanitySubgroupSizeProperties(Context & context)970 TestStatus testSanitySubgroupSizeProperties(Context &context)
971 {
972 #ifndef CTS_USES_VULKANSC
973     VkPhysicalDeviceSubgroupSizeControlProperties subgroupSizeControlProperties;
974     subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES;
975 #else
976     VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
977     subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
978 #endif // CTS_USES_VULKANSC
979 
980     subgroupSizeControlProperties.pNext = nullptr;
981 
982     VkPhysicalDeviceSubgroupProperties subgroupProperties;
983     subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
984     subgroupProperties.pNext = &subgroupSizeControlProperties;
985 
986     VkPhysicalDeviceProperties2 properties;
987     properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
988     properties.pNext = &subgroupProperties;
989 
990     context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
991 
992     if (subgroupProperties.subgroupSize > subgroupSizeControlProperties.maxSubgroupSize ||
993         subgroupProperties.subgroupSize < subgroupSizeControlProperties.minSubgroupSize)
994     {
995         ostringstream error;
996         error << "subgroupSize (" << subgroupProperties.subgroupSize << ") is not between maxSubgroupSize (";
997         error << subgroupSizeControlProperties.maxSubgroupSize << ") and minSubgroupSize (";
998         error << subgroupSizeControlProperties.minSubgroupSize << ")";
999 
1000         return TestStatus::fail(error.str().c_str());
1001     }
1002 
1003     return TestStatus::pass("OK");
1004 }
1005 } // namespace
1006 
1007 namespace vkt
1008 {
1009 namespace subgroups
1010 {
createSubgroupsSizeControlTests(TestContext & testCtx)1011 TestCaseGroup *createSubgroupsSizeControlTests(TestContext &testCtx)
1012 {
1013     de::MovePtr<TestCaseGroup> group(new TestCaseGroup(testCtx, "size_control"));
1014     de::MovePtr<TestCaseGroup> framebufferGroup(new TestCaseGroup(testCtx, "framebuffer"));
1015     de::MovePtr<TestCaseGroup> computeGroup(new TestCaseGroup(testCtx, "compute"));
1016     de::MovePtr<TestCaseGroup> graphicsGroup(new TestCaseGroup(testCtx, "graphics"));
1017 #ifndef CTS_USES_VULKANSC
1018     de::MovePtr<TestCaseGroup> raytracingGroup(new TestCaseGroup(testCtx, "ray_tracing"));
1019     de::MovePtr<TestCaseGroup> meshGroup(new TestCaseGroup(testCtx, "mesh"));
1020 #endif // CTS_USES_VULKANSC
1021     de::MovePtr<TestCaseGroup> genericGroup(new TestCaseGroup(testCtx, "generic"));
1022     const VkShaderStageFlags fbStages[] = {
1023         VK_SHADER_STAGE_VERTEX_BIT,
1024         VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
1025         VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
1026         VK_SHADER_STAGE_GEOMETRY_BIT,
1027         VK_SHADER_STAGE_FRAGMENT_BIT,
1028     };
1029 #ifndef CTS_USES_VULKANSC
1030     const VkShaderStageFlags meshStages[] = {
1031         VK_SHADER_STAGE_MESH_BIT_EXT,
1032         VK_SHADER_STAGE_TASK_BIT_EXT,
1033     };
1034 #endif // CTS_USES_VULKANSC
1035 
1036     // Test sanity of the subgroup size properties.
1037     {
1038         addFunctionCase(genericGroup.get(), "subgroup_size_properties", supportedCheck,
1039                         testSanitySubgroupSizeProperties);
1040     }
1041 
1042     const TestParams testParams[] = {{false, true, ""}, {true, false, "_spirv16"}, {true, true, "_flags_spirv16"}};
1043 
1044     for (const auto &params : testParams)
1045     {
1046         // Allow varying subgroup cases.
1047         const uint32_t flagsVary         = VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
1048         const CaseDefinition caseDefVary = {params.flagsEnabled ? flagsVary : 0u,
1049                                             VK_SHADER_STAGE_COMPUTE_BIT,
1050                                             false,
1051                                             REQUIRED_SUBGROUP_SIZE_NONE,
1052                                             de::SharedPtr<bool>(new bool),
1053                                             params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1054 
1055         addFunctionCaseWithPrograms(computeGroup.get(), "allow_varying_subgroup_size" + params.postfix,
1056                                     supportedCheckFeatures, initPrograms, test, caseDefVary);
1057         addFunctionCaseWithPrograms(graphicsGroup.get(), "allow_varying_subgroup_size" + params.postfix,
1058                                     supportedCheckFeaturesShader, initPrograms, test, caseDefVary);
1059 
1060         for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(fbStages); ++stageIndex)
1061         {
1062             const CaseDefinition caseDefStage = {params.flagsEnabled ? flagsVary : 0u,
1063                                                  fbStages[stageIndex],
1064                                                  false,
1065                                                  REQUIRED_SUBGROUP_SIZE_NONE,
1066                                                  de::SharedPtr<bool>(new bool),
1067                                                  params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1068 
1069             string name =
1070                 getShaderStageName(caseDefStage.shaderStage) + "_allow_varying_subgroup_size" + params.postfix;
1071             addFunctionCaseWithPrograms(framebufferGroup.get(), name, supportedCheckFeaturesShader,
1072                                         initFrameBufferPrograms, noSSBOtest, caseDefStage);
1073         }
1074 
1075 #ifndef CTS_USES_VULKANSC
1076         for (const auto &stage : meshStages)
1077         {
1078             const CaseDefinition caseDefMesh = {(params.flagsEnabled ? flagsVary : 0u),
1079                                                 stage,
1080                                                 false,
1081                                                 REQUIRED_SUBGROUP_SIZE_NONE,
1082                                                 de::SharedPtr<bool>(new bool),
1083                                                 (params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_4)};
1084             const std::string name = getShaderStageName(stage) + "_allow_varying_subgroup_size" + params.postfix;
1085             addFunctionCaseWithPrograms(meshGroup.get(), name, supportedCheckFeatures, initPrograms, test, caseDefMesh);
1086         }
1087 #endif // CTS_USES_VULKANSC
1088 
1089         // Require full subgroups together with allow varying subgroup (only compute shaders).
1090         const uint32_t flagsFullVary = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT |
1091                                        VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
1092         const CaseDefinition caseDefFullVary = {params.flagsEnabled ? flagsFullVary : 0u,
1093                                                 VK_SHADER_STAGE_COMPUTE_BIT,
1094                                                 true,
1095                                                 REQUIRED_SUBGROUP_SIZE_NONE,
1096                                                 de::SharedPtr<bool>(new bool),
1097                                                 params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1098         addFunctionCaseWithPrograms(
1099             computeGroup.get(), "require_full_subgroups_allow_varying_subgroup_size" + params.postfix,
1100             supportedCheckFeatures, initProgramsRequireFull, testRequireFullSubgroups, caseDefFullVary);
1101 
1102         // Require full subgroups cases (only compute shaders).
1103         const uint32_t flagsFull         = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT;
1104         const CaseDefinition caseDefFull = {params.flagsEnabled ? flagsFull : 0u,
1105                                             VK_SHADER_STAGE_COMPUTE_BIT,
1106                                             true,
1107                                             REQUIRED_SUBGROUP_SIZE_NONE,
1108                                             de::SharedPtr<bool>(new bool),
1109                                             params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1110         addFunctionCaseWithPrograms(computeGroup.get(), "require_full_subgroups" + params.postfix,
1111                                     supportedCheckFeatures, initProgramsRequireFull, testRequireFullSubgroups,
1112                                     caseDefFull);
1113 
1114         // Tests to check setting a required subgroup size value, together with require full subgroups (only compute shaders).
1115         const CaseDefinition caseDefMaxFull = {params.flagsEnabled ? flagsFull : 0u,
1116                                                VK_SHADER_STAGE_COMPUTE_BIT,
1117                                                true,
1118                                                REQUIRED_SUBGROUP_SIZE_MAX,
1119                                                de::SharedPtr<bool>(new bool),
1120                                                params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1121         addFunctionCaseWithPrograms(
1122             computeGroup.get(), "required_subgroup_size_max_require_full_subgroups" + params.postfix,
1123             supportedCheckFeatures, initProgramsRequireFull, testRequireSubgroupSize, caseDefMaxFull);
1124 
1125         const CaseDefinition caseDefMinFull = {params.flagsEnabled ? flagsFull : 0u,
1126                                                VK_SHADER_STAGE_COMPUTE_BIT,
1127                                                true,
1128                                                REQUIRED_SUBGROUP_SIZE_MIN,
1129                                                de::SharedPtr<bool>(new bool),
1130                                                params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1131         addFunctionCaseWithPrograms(
1132             computeGroup.get(), "required_subgroup_size_min_require_full_subgroups" + params.postfix,
1133             supportedCheckFeatures, initProgramsRequireFull, testRequireSubgroupSize, caseDefMinFull);
1134 
1135         // Ray tracing cases with allow varying subgroup.
1136 #ifndef CTS_USES_VULKANSC
1137         const uint32_t flagsRayTracing            = VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
1138         const CaseDefinition caseDefAllRaytracing = {params.flagsEnabled ? flagsRayTracing : 0u,
1139                                                      SHADER_STAGE_ALL_RAY_TRACING,
1140                                                      false,
1141                                                      REQUIRED_SUBGROUP_SIZE_NONE,
1142                                                      de::SharedPtr<bool>(new bool),
1143                                                      params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_4};
1144         addFunctionCaseWithPrograms(raytracingGroup.get(), "allow_varying_subgroup_size" + params.postfix,
1145                                     supportedCheckFeaturesShader, initPrograms, test, caseDefAllRaytracing);
1146 #endif // CTS_USES_VULKANSC
1147     }
1148 
1149     // Tests to check setting a required subgroup size value.
1150     {
1151         const CaseDefinition caseDefAllGraphicsMax = {0u,
1152                                                       VK_SHADER_STAGE_ALL_GRAPHICS,
1153                                                       false,
1154                                                       REQUIRED_SUBGROUP_SIZE_MAX,
1155                                                       de::SharedPtr<bool>(new bool),
1156                                                       SPIRV_VERSION_1_3};
1157         addFunctionCaseWithPrograms(graphicsGroup.get(), "required_subgroup_size_max", supportedCheckFeaturesShader,
1158                                     initPrograms, testRequireSubgroupSize, caseDefAllGraphicsMax);
1159         const CaseDefinition caseDefComputeMax = {0u,
1160                                                   VK_SHADER_STAGE_COMPUTE_BIT,
1161                                                   false,
1162                                                   REQUIRED_SUBGROUP_SIZE_MAX,
1163                                                   de::SharedPtr<bool>(new bool),
1164                                                   SPIRV_VERSION_1_3};
1165         addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_max", supportedCheckFeatures,
1166                                     initPrograms, testRequireSubgroupSize, caseDefComputeMax);
1167 #ifndef CTS_USES_VULKANSC
1168         const CaseDefinition caseDefAllRaytracingMax = {0u,
1169                                                         SHADER_STAGE_ALL_RAY_TRACING,
1170                                                         false,
1171                                                         REQUIRED_SUBGROUP_SIZE_MAX,
1172                                                         de::SharedPtr<bool>(new bool),
1173                                                         SPIRV_VERSION_1_4};
1174         addFunctionCaseWithPrograms(raytracingGroup.get(), "required_subgroup_size_max", supportedCheckFeaturesShader,
1175                                     initPrograms, testRequireSubgroupSize, caseDefAllRaytracingMax);
1176 #endif // CTS_USES_VULKANSC
1177 
1178         const CaseDefinition caseDefAllGraphicsMin = {0u,
1179                                                       VK_SHADER_STAGE_ALL_GRAPHICS,
1180                                                       false,
1181                                                       REQUIRED_SUBGROUP_SIZE_MIN,
1182                                                       de::SharedPtr<bool>(new bool),
1183                                                       SPIRV_VERSION_1_3};
1184         addFunctionCaseWithPrograms(graphicsGroup.get(), "required_subgroup_size_min", supportedCheckFeaturesShader,
1185                                     initPrograms, testRequireSubgroupSize, caseDefAllGraphicsMin);
1186         const CaseDefinition caseDefComputeMin = {0u,
1187                                                   VK_SHADER_STAGE_COMPUTE_BIT,
1188                                                   false,
1189                                                   REQUIRED_SUBGROUP_SIZE_MIN,
1190                                                   de::SharedPtr<bool>(new bool),
1191                                                   SPIRV_VERSION_1_3};
1192         addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_min", supportedCheckFeatures,
1193                                     initPrograms, testRequireSubgroupSize, caseDefComputeMin);
1194 #ifndef CTS_USES_VULKANSC
1195         const CaseDefinition caseDefAllRaytracingMin = {0u,
1196                                                         SHADER_STAGE_ALL_RAY_TRACING,
1197                                                         false,
1198                                                         REQUIRED_SUBGROUP_SIZE_MIN,
1199                                                         de::SharedPtr<bool>(new bool),
1200                                                         SPIRV_VERSION_1_4};
1201         addFunctionCaseWithPrograms(raytracingGroup.get(), "required_subgroup_size_min", supportedCheckFeaturesShader,
1202                                     initPrograms, testRequireSubgroupSize, caseDefAllRaytracingMin);
1203 #endif // CTS_USES_VULKANSC
1204         for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(fbStages); ++stageIndex)
1205         {
1206             const CaseDefinition caseDefStageMax = {0u,
1207                                                     fbStages[stageIndex],
1208                                                     false,
1209                                                     REQUIRED_SUBGROUP_SIZE_MAX,
1210                                                     de::SharedPtr<bool>(new bool),
1211                                                     SPIRV_VERSION_1_3};
1212             addFunctionCaseWithPrograms(
1213                 framebufferGroup.get(), getShaderStageName(caseDefStageMax.shaderStage) + "_required_subgroup_size_max",
1214                 supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtestRequireSubgroupSize, caseDefStageMax);
1215             const CaseDefinition caseDefStageMin = {0u,
1216                                                     fbStages[stageIndex],
1217                                                     false,
1218                                                     REQUIRED_SUBGROUP_SIZE_MIN,
1219                                                     de::SharedPtr<bool>(new bool),
1220                                                     SPIRV_VERSION_1_3};
1221             addFunctionCaseWithPrograms(
1222                 framebufferGroup.get(), getShaderStageName(caseDefStageMin.shaderStage) + "_required_subgroup_size_min",
1223                 supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtestRequireSubgroupSize, caseDefStageMin);
1224         }
1225 
1226 #ifndef CTS_USES_VULKANSC
1227         for (const auto &stage : meshStages)
1228         {
1229             const auto stageName = getShaderStageName(stage);
1230 
1231             const CaseDefinition caseDefMeshMax = {
1232                 0u, stage, false, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_4};
1233             addFunctionCaseWithPrograms(meshGroup.get(), "required_subgroup_size_max_" + stageName,
1234                                         supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefMeshMax);
1235             const CaseDefinition caseDefMeshMin = {
1236                 0u, stage, false, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_4};
1237             addFunctionCaseWithPrograms(meshGroup.get(), "required_subgroup_size_min_" + stageName,
1238                                         supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefMeshMin);
1239         }
1240 #endif // CTS_USES_VULKANSC
1241     }
1242 
1243     group->addChild(genericGroup.release());
1244     group->addChild(graphicsGroup.release());
1245     group->addChild(computeGroup.release());
1246     group->addChild(framebufferGroup.release());
1247 #ifndef CTS_USES_VULKANSC
1248     group->addChild(raytracingGroup.release());
1249     group->addChild(meshGroup.release());
1250 #endif // CTS_USES_VULKANSC
1251 
1252     return group.release();
1253 }
1254 
1255 } // namespace subgroups
1256 } // namespace vkt
1257