1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Valve Corporation.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 */ /*!
21 * \file
22 * \brief VK_EXT_subgroup_size_control Tests
23 */ /*--------------------------------------------------------------------*/
24
25 #include "vktSubgroupsSizeControlTests.hpp"
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "tcuTestLog.hpp"
29
30 #include <string>
31 #include <vector>
32 #include <algorithm>
33
34 using namespace tcu;
35 using namespace std;
36 using namespace vk;
37 using namespace vkt;
38
39 namespace
40 {
41
42 enum RequiredSubgroupSizeMode
43 {
44 REQUIRED_SUBGROUP_SIZE_NONE = 0,
45 REQUIRED_SUBGROUP_SIZE_MIN = 1,
46 REQUIRED_SUBGROUP_SIZE_MAX = 2,
47 };
48
49 struct CaseDefinition
50 {
51 deUint32 pipelineShaderStageCreateFlags;
52 VkShaderStageFlags shaderStage;
53 deBool requiresBallot;
54 deUint32 requiredSubgroupSizeMode;
55 de::SharedPtr<bool> geometryPointSizeSupported;
56 SpirvVersion spirvVersion;
57
hasFullSubgroupsFlag__anonbf0e50660111::CaseDefinition58 bool hasFullSubgroupsFlag (void) const
59 {
60 return ((pipelineShaderStageCreateFlags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) != 0u);
61 }
62
shaderUsesFullSubgroups__anonbf0e50660111::CaseDefinition63 bool shaderUsesFullSubgroups (void) const
64 {
65 return (hasFullSubgroupsFlag() || (spirvVersion >= SPIRV_VERSION_1_6));
66 }
67 };
68
69 struct TestParams
70 {
71 deBool useSpirv16;
72 deBool flagsEnabled;
73 string postfix;
74 };
75
76 struct internalDataStruct
77 {
78 const Context* context;
79 struct CaseDefinition caseDef;
80 const deUint32 requiredSubgroupSize;
81 const bool isRequiredSubgroupSize; // Indicates if the test uses VkPipelineShaderStageRequiredSubgroupSizeCreateInfo.
82 };
83
makeDeBool(bool value)84 inline deBool makeDeBool (bool value)
85 {
86 return (value ? DE_TRUE : DE_FALSE);
87 }
88
getLocalSizes(const uint32_t maxWorkGroupSize[3],uint32_t maxWorkGroupInvocations,uint32_t numWorkGroupInvocations)89 UVec3 getLocalSizes (const uint32_t maxWorkGroupSize[3],
90 uint32_t maxWorkGroupInvocations,
91 uint32_t numWorkGroupInvocations)
92 {
93 DE_ASSERT(numWorkGroupInvocations <= maxWorkGroupInvocations);
94 DE_UNREF(maxWorkGroupInvocations); // For release builds.
95
96 const deUint32 localSizeX = de::gcd(numWorkGroupInvocations, maxWorkGroupSize[0]);
97 const deUint32 localSizeY = de::gcd(std::max(numWorkGroupInvocations / localSizeX, 1u), maxWorkGroupSize[1]);
98 const deUint32 localSizeZ = std::max(numWorkGroupInvocations / (localSizeX * localSizeY), 1u);
99
100 return UVec3(localSizeX, localSizeY, localSizeZ);
101 }
102
getRequiredSubgroupSizeFromMode(Context & context,const CaseDefinition & caseDef,const VkPhysicalDeviceSubgroupSizeControlProperties & subgroupSizeControlProperties)103 deUint32 getRequiredSubgroupSizeFromMode (Context& context,
104 const CaseDefinition& caseDef,
105 #ifndef CTS_USES_VULKANSC
106 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties)
107 #else
108 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties)
109 #endif // CTS_USES_VULKANSC
110 {
111 switch (caseDef.requiredSubgroupSizeMode)
112 {
113 case REQUIRED_SUBGROUP_SIZE_MAX: return subgroupSizeControlProperties.maxSubgroupSize;
114 case REQUIRED_SUBGROUP_SIZE_MIN: return subgroupSizeControlProperties.minSubgroupSize;
115 case REQUIRED_SUBGROUP_SIZE_NONE: return subgroups::getSubgroupSize(context);
116 default: TCU_THROW(NotSupportedError, "Unsupported Subgroup size");
117 }
118 }
119
checkVertexPipelineStages(const void * internalData,vector<const void * > datas,deUint32 width,deUint32)120 static bool checkVertexPipelineStages (const void* internalData,
121 vector<const void*> datas,
122 deUint32 width,
123 deUint32)
124 {
125 const struct internalDataStruct* checkInternalData = reinterpret_cast<const struct internalDataStruct *>(internalData);
126 const Context* context = checkInternalData->context;
127 #ifndef CTS_USES_VULKANSC
128 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties = context->getSubgroupSizeControlProperties();
129 #else
130 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties = context->getSubgroupSizeControlPropertiesEXT();
131 #endif // CTS_USES_VULKANSC
132
133 TestLog& log = context->getTestContext().getLog();
134 const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
135
136 for (deUint32 i = 0; i < width; i++)
137 {
138 if (data[i] > subgroupSizeControlProperties.maxSubgroupSize ||
139 data[i] < subgroupSizeControlProperties.minSubgroupSize)
140 {
141 log << TestLog::Message << "gl_SubgroupSize (" << data[i] << ") value is outside limits (" << subgroupSizeControlProperties.minSubgroupSize << ", " << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
142
143 return DE_FALSE;
144 }
145
146 if (checkInternalData->isRequiredSubgroupSize && data[i] != checkInternalData->requiredSubgroupSize)
147 {
148 log << TestLog::Message << "gl_SubgroupSize (" << data[i] << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize << ")" << TestLog::EndMessage;
149
150 return DE_FALSE;
151 }
152 }
153
154 return DE_TRUE;
155 }
156
checkFragmentPipelineStages(const void * internalData,vector<const void * > datas,deUint32 width,deUint32 height,deUint32)157 static bool checkFragmentPipelineStages (const void* internalData,
158 vector<const void*> datas,
159 deUint32 width,
160 deUint32 height,
161 deUint32)
162 {
163 const struct internalDataStruct* checkInternalData = reinterpret_cast<const struct internalDataStruct *>(internalData);
164 const Context* context = checkInternalData->context;
165 #ifndef CTS_USES_VULKANSC
166 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties = context->getSubgroupSizeControlProperties();
167 #else
168 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties = context->getSubgroupSizeControlPropertiesEXT();
169 #endif // CTS_USES_VULKANSC
170 TestLog& log = context->getTestContext().getLog();
171 const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
172
173 for (deUint32 x = 0u; x < width; ++x)
174 {
175 for (deUint32 y = 0u; y < height; ++y)
176 {
177 const deUint32 ndx = (x * height + y);
178
179 if (data[ndx] > subgroupSizeControlProperties.maxSubgroupSize ||
180 data[ndx] < subgroupSizeControlProperties.minSubgroupSize)
181 {
182 log << TestLog::Message << "gl_SubgroupSize (" << data[ndx] << ") value is outside limits (" << subgroupSizeControlProperties.minSubgroupSize << ", " << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
183
184 return DE_FALSE;
185 }
186
187 if (checkInternalData->isRequiredSubgroupSize && data[ndx] != checkInternalData->requiredSubgroupSize)
188 {
189 log << TestLog::Message << "gl_SubgroupSize (" << data[ndx] << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize << ")" << TestLog::EndMessage;
190
191 return DE_FALSE;
192 }
193 }
194 }
195 return true;
196 }
197
checkCompute(const void * internalData,vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32)198 static bool checkCompute (const void* internalData,
199 vector<const void*> datas,
200 const deUint32 numWorkgroups[3],
201 const deUint32 localSize[3],
202 deUint32)
203 {
204 const struct internalDataStruct* checkInternalData = reinterpret_cast<const struct internalDataStruct *>(internalData);
205 const Context* context = checkInternalData->context;
206 #ifndef CTS_USES_VULKANSC
207 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties = context->getSubgroupSizeControlProperties();
208 #else
209 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties = context->getSubgroupSizeControlPropertiesEXT();
210 #endif // CTS_USES_VULKANSC
211 TestLog& log = context->getTestContext().getLog();
212 const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
213 const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
214 const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
215 const deUint32 width = globalSizeX * globalSizeY * globalSizeZ;
216 const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
217
218 for (deUint32 i = 0; i < width; i++)
219 {
220 if (data[i] > subgroupSizeControlProperties.maxSubgroupSize ||
221 data[i] < subgroupSizeControlProperties.minSubgroupSize)
222 {
223 log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
224 << "gl_SubgroupSize (" << data[i] << ") value is outside limits (" << subgroupSizeControlProperties.minSubgroupSize << ", " << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
225
226 return DE_FALSE;
227 }
228
229 if (checkInternalData->isRequiredSubgroupSize && data[i] != checkInternalData->requiredSubgroupSize)
230 {
231 log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
232 << "gl_SubgroupSize (" << data[i] << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize << ")" << TestLog::EndMessage;
233
234 return DE_FALSE;
235 }
236 }
237
238 return DE_TRUE;
239 }
240
checkComputeRequireFull(const void * internalData,vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32)241 static bool checkComputeRequireFull (const void* internalData,
242 vector<const void*> datas,
243 const deUint32 numWorkgroups[3],
244 const deUint32 localSize[3],
245 deUint32)
246 {
247 const struct internalDataStruct* checkInternalData = reinterpret_cast<const struct internalDataStruct *>(internalData);
248 const Context* context = checkInternalData->context;
249 #ifndef CTS_USES_VULKANSC
250 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties = context->getSubgroupSizeControlProperties();
251 #else
252 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties = context->getSubgroupSizeControlPropertiesEXT();
253 #endif // CTS_USES_VULKANSC
254 TestLog& log = context->getTestContext().getLog();
255 const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
256 const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
257 const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
258 const deUint32 width = globalSizeX * globalSizeY * globalSizeZ;
259 const UVec4* data = reinterpret_cast<const UVec4*>(datas[0]);
260 const deUint32 numSubgroups = (localSize[0] * localSize[1] * localSize[2]) / checkInternalData->requiredSubgroupSize;
261 const bool exactSubgroupSize = (checkInternalData->caseDef.shaderUsesFullSubgroups() && checkInternalData->isRequiredSubgroupSize);
262
263 for (deUint32 i = 0; i < width; i++)
264 {
265 if (data[i].x() > subgroupSizeControlProperties.maxSubgroupSize ||
266 data[i].x() < subgroupSizeControlProperties.minSubgroupSize)
267 {
268 log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
269 << "gl_SubgroupSize value ( " << data[i].x() << ") is outside limits [" << subgroupSizeControlProperties.minSubgroupSize << ", " << subgroupSizeControlProperties.maxSubgroupSize << "]" << TestLog::EndMessage;
270 return DE_FALSE;
271 }
272
273 if (data[i].x() != data[i].y())
274 {
275 log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
276 << "gl_SubgroupSize ( " << data[i].x() << ") does not match the active number of subgroup invocations (" << data[i].y() << ")" << TestLog::EndMessage;
277 return DE_FALSE;
278 }
279
280 if (exactSubgroupSize && data[i].x() != checkInternalData->requiredSubgroupSize)
281 {
282 log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
283 << "expected subgroupSize (" << checkInternalData->requiredSubgroupSize << ") doesn't match gl_SubgroupSize ( " << data[i].x() << ")" << TestLog::EndMessage;
284 return DE_FALSE;
285 }
286
287 if (exactSubgroupSize && data[i].z() != numSubgroups)
288 {
289 log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
290 << "expected number of subgroups dispatched (" << numSubgroups << ") doesn't match gl_NumSubgroups (" << data[i].z() << ")" << TestLog::EndMessage;
291 return DE_FALSE;
292 }
293 }
294
295 return DE_TRUE;
296 }
297
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)298 void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
299 {
300 const ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, caseDef.spirvVersion, 0u);
301
302 if (VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
303 subgroups::setFragmentShaderFrameBuffer(programCollection);
304
305 if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage && VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
306 subgroups::setVertexShaderFrameBuffer(programCollection);
307
308 string bdyStr = "uint tempResult = gl_SubgroupSize;\n";
309
310 if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
311 {
312 ostringstream vertex;
313
314 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
315 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
316 << "layout(location = 0) in highp vec4 in_position;\n"
317 << "layout(location = 0) out float out_color;\n"
318 << "\n"
319 << "void main (void)\n"
320 << "{\n"
321 << bdyStr
322 << " out_color = float(tempResult);\n"
323 << " gl_Position = in_position;\n"
324 << " gl_PointSize = 1.0f;\n"
325 << "}\n";
326
327 programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
328 }
329 else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
330 {
331 ostringstream geometry;
332
333 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
334 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
335 << "layout(points) in;\n"
336 << "layout(points, max_vertices = 1) out;\n"
337 << "layout(location = 0) out float out_color;\n"
338 << "void main (void)\n"
339 << "{\n"
340 << bdyStr
341 << " out_color = float(tempResult);\n"
342 << " gl_Position = gl_in[0].gl_Position;\n"
343 << " gl_PointSize = 1.0f;"
344 << " EmitVertex();\n"
345 << " EndPrimitive();\n"
346 << "}\n";
347
348 programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
349 }
350 else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
351 {
352 ostringstream controlSource;
353
354 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
355 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
356 << "layout(vertices = 2) out;\n"
357 << "layout(location = 0) out float out_color[];\n"
358 << "\n"
359 << "void main (void)\n"
360 << "{\n"
361 << " if (gl_InvocationID == 0)\n"
362 << " {\n"
363 << " gl_TessLevelOuter[0] = 1.0f;\n"
364 << " gl_TessLevelOuter[1] = 1.0f;\n"
365 << " }\n"
366 << bdyStr
367 << " out_color[gl_InvocationID ] = float(tempResult);\n"
368 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
369 << "}\n";
370
371 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(controlSource.str()) << buildOptions;
372 subgroups::setTesEvalShaderFrameBuffer(programCollection);
373 }
374 else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
375 {
376 ostringstream evaluationSource;
377 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
378 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
379 << "layout(isolines, equal_spacing, ccw ) in;\n"
380 << "layout(location = 0) out float out_color;\n"
381 << "void main (void)\n"
382 << "{\n"
383 << bdyStr
384 << " out_color = float(tempResult);\n"
385 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
386 << "}\n";
387
388 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
389 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
390 }
391 else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
392 {
393 const string vertex = string(glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)) + "\n"
394 "void main (void)\n"
395 "{\n"
396 " vec2 uv = vec2(float(gl_VertexIndex & 1), float((gl_VertexIndex >> 1) & 1));\n"
397 " gl_Position = vec4(uv * 4.0f -2.0f, 0.0f, 1.0f);\n"
398 " gl_PointSize = 1.0f;\n"
399 "}\n";
400 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
401
402 ostringstream fragmentSource;
403
404 fragmentSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
405 << "precision highp int;\n"
406 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
407 << "layout(location = 0) out uint out_color;\n"
408 << "void main()\n"
409 << "{\n"
410 << bdyStr
411 << " out_color = tempResult;\n"
412 << "}\n";
413
414 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragmentSource.str()) << buildOptions;
415 }
416 else
417 {
418 DE_FATAL("Unsupported shader stage");
419 }
420 }
421
getExtHeader(const CaseDefinition &)422 string getExtHeader (const CaseDefinition&)
423 {
424 return "#extension GL_KHR_shader_subgroup_basic: enable\n";
425 }
426
getPerStageHeadDeclarations(const CaseDefinition & caseDef)427 vector<string> getPerStageHeadDeclarations (const CaseDefinition& caseDef)
428 {
429 const deUint32 stageCount = subgroups::getStagesCount(caseDef.shaderStage);
430 const bool fragment = (caseDef.shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) != 0;
431 vector<string> result (stageCount, string());
432
433 if (fragment)
434 result.reserve(result.size() + 1);
435
436 for (size_t i = 0; i < result.size(); ++i)
437 {
438 result[i] =
439 "layout(set = 0, binding = " + de::toString(i) + ", std430) buffer Buffer1\n"
440 "{\n"
441 " uint result[];\n"
442 "};\n";
443 }
444
445 if (fragment)
446 {
447 const string fragPart =
448 "layout(location = 0) out uint result;\n";
449
450 result.push_back(fragPart);
451 }
452
453 return result;
454 }
455
getTestSource(const CaseDefinition &)456 string getTestSource (const CaseDefinition&)
457 {
458 return
459 " uint tempResult = gl_SubgroupSize;\n"
460 " tempRes = tempResult;\n";
461 }
462
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)463 void initPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
464 {
465 ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, caseDef.spirvVersion, 0u, (caseDef.spirvVersion == vk::SPIRV_VERSION_1_4));
466 const string extHeader = getExtHeader(caseDef);
467 const string testSrc = getTestSource(caseDef);
468 const vector<string> headDeclarations = getPerStageHeadDeclarations(caseDef);
469
470 subgroups::initStdPrograms(programCollection, buildOptions, caseDef.shaderStage, VK_FORMAT_R32_UINT, *caseDef.geometryPointSizeSupported, extHeader, testSrc, "", headDeclarations);
471 }
472
initProgramsRequireFull(SourceCollections & programCollection,CaseDefinition caseDef)473 void initProgramsRequireFull (SourceCollections& programCollection, CaseDefinition caseDef)
474 {
475 if (VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
476 DE_FATAL("Unsupported shader stage");
477
478 ostringstream src;
479
480 src << "#version 450\n"
481 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
482 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
483 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
484 "local_size_z_id = 2) in;\n"
485 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
486 << "{\n"
487 << " uvec4 result[];\n"
488 << "};\n"
489 << "\n"
490 << "void main (void)\n"
491 << "{\n"
492 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
493 << " highp uint offset = globalSize.x * ((globalSize.y * "
494 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
495 "gl_GlobalInvocationID.x;\n"
496 << " result[offset].x = gl_SubgroupSize;\n" // save the subgroup size value
497 << " uint numActive = subgroupBallotBitCount(subgroupBallot(true));\n"
498 << " result[offset].y = numActive;\n" // save the number of active subgroup invocations
499 << " result[offset].z = gl_NumSubgroups;\n" // save the number of subgroups dispatched.
500 << "}\n";
501
502 programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << ShaderBuildOptions(programCollection.usedVulkanVersion, caseDef.spirvVersion, 0u);
503 }
504
supportedCheck(Context & context)505 void supportedCheck (Context& context)
506 {
507 if (!subgroups::isSubgroupSupported(context))
508 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
509
510 context.requireDeviceFunctionality("VK_EXT_subgroup_size_control");
511 }
512
supportedCheckFeatures(Context & context,CaseDefinition caseDef)513 void supportedCheckFeatures (Context& context, CaseDefinition caseDef)
514 {
515 supportedCheck(context);
516
517 if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
518 {
519 TCU_THROW(NotSupportedError, "Shader stage is required to support subgroup operations!");
520 }
521
522 if (caseDef.shaderStage == VK_SHADER_STAGE_ALL_GRAPHICS)
523 {
524 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
525
526 if (!features.tessellationShader || !features.geometryShader)
527 TCU_THROW(NotSupportedError, "Device does not support tessellation or geometry shaders");
528 }
529
530 if (caseDef.requiresBallot && !subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
531 {
532 TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
533 }
534
535 if (caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE ||
536 caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
537 {
538 #ifndef CTS_USES_VULKANSC
539 const VkPhysicalDeviceSubgroupSizeControlFeatures& subgroupSizeControlFeatures = context.getSubgroupSizeControlFeatures();
540 #else
541 const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT& subgroupSizeControlFeatures = context.getSubgroupSizeControlFeaturesEXT();
542 #endif // CTS_USES_VULKANSC
543
544 if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
545 TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
546
547 if (caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE)
548 {
549 #ifndef CTS_USES_VULKANSC
550 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
551 #else
552 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties = context.getSubgroupSizeControlPropertiesEXT();
553 #endif // CTS_USES_VULKANSC
554
555 if ((subgroupSizeControlProperties.requiredSubgroupSizeStages & caseDef.shaderStage) != caseDef.shaderStage)
556 TCU_THROW(NotSupportedError, "Device does not support setting required subgroup size for the stages selected");
557 }
558 }
559
560 if (caseDef.hasFullSubgroupsFlag())
561 {
562 #ifndef CTS_USES_VULKANSC
563 const VkPhysicalDeviceSubgroupSizeControlFeatures& subgroupSizeControlFeatures = context.getSubgroupSizeControlFeatures();
564 #else
565 const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT& subgroupSizeControlFeatures = context.getSubgroupSizeControlFeaturesEXT();
566 #endif // CTS_USES_VULKANSC
567
568 if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
569 TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
570 }
571
572 *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
573
574 #ifndef CTS_USES_VULKANSC
575 if (isAllRayTracingStages(caseDef.shaderStage))
576 {
577 context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
578 }
579 else if (isAllMeshShadingStages(caseDef.shaderStage))
580 {
581 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
582 context.requireDeviceFunctionality("VK_EXT_mesh_shader");
583
584 if ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u)
585 {
586 const auto& features = context.getMeshShaderFeaturesEXT();
587 if (!features.taskShader)
588 TCU_THROW(NotSupportedError, "Task shaders not supported");
589 }
590 }
591 #endif // CTS_USES_VULKANSC
592
593 if (caseDef.spirvVersion > vk::getMaxSpirvVersionForVulkan(context.getUsedApiVersion()))
594 TCU_THROW(NotSupportedError, "Shader requires SPIR-V version higher than available");
595 }
596
supportedCheckFeaturesShader(Context & context,CaseDefinition caseDef)597 void supportedCheckFeaturesShader (Context& context, CaseDefinition caseDef)
598 {
599 supportedCheckFeatures(context, caseDef);
600
601 subgroups::supportedCheckShader(context, caseDef.shaderStage);
602 }
603
noSSBOtest(Context & context,const CaseDefinition caseDef)604 TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
605 {
606 const VkFormat format = VK_FORMAT_R32_UINT;
607 const deUint32& flags = caseDef.pipelineShaderStageCreateFlags;
608 const struct internalDataStruct internalData =
609 {
610 &context,
611 caseDef,
612 0u,
613 false,
614 };
615
616 switch (caseDef.shaderStage)
617 {
618 case VK_SHADER_STAGE_VERTEX_BIT: return subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, flags, 0u);
619 case VK_SHADER_STAGE_GEOMETRY_BIT: return subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, flags, 0u);
620 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, 0u);
621 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, 0u);
622 case VK_SHADER_STAGE_FRAGMENT_BIT: return subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkFragmentPipelineStages, flags, 0u);
623 default: TCU_THROW(InternalError, "Unhandled shader stage");
624 }
625 }
626
test(Context & context,const CaseDefinition caseDef)627 TestStatus test (Context& context, const CaseDefinition caseDef)
628 {
629 if (isAllComputeStages(caseDef.shaderStage))
630 {
631 const deUint32 numWorkgroups[3] = {1, 1, 1};
632 const deUint32 subgroupSize = subgroups::getSubgroupSize(context);
633 const auto& physicalDeviceProperties = context.getDeviceProperties();
634 const auto& maxWorkGroupSize = physicalDeviceProperties.limits.maxComputeWorkGroupSize;
635 const auto& maxInvocations = physicalDeviceProperties.limits.maxComputeWorkGroupInvocations;
636 // Calculate the local workgroup sizes to exercise the maximum supported by the driver
637 const UVec3 localSize = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
638 const deUint32 localSizesToTestCount = 16;
639 const deUint32 localSizesToTest[localSizesToTestCount][3] =
640 {
641 {1, 1, 1},
642 {32, 4, 1},
643 {32, 1, 4},
644 {1, 32, 4},
645 {1, 4, 32},
646 {4, 1, 32},
647 {4, 32, 1},
648 {subgroupSize, 1, 1},
649 {1, subgroupSize, 1},
650 {1, 1, subgroupSize},
651 {3, 5, 7},
652 {128, 1, 1},
653 {1, 128, 1},
654 {1, 1, 64},
655 {localSize.x(), localSize.y(), localSize.z()},
656 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
657 };
658 const struct internalDataStruct internalData =
659 {
660 &context,
661 caseDef,
662 subgroupSize,
663 false,
664 };
665
666 return subgroups::makeComputeTestRequiredSubgroupSize(context,
667 VK_FORMAT_R32_UINT,
668 DE_NULL,
669 0,
670 &internalData,
671 checkCompute,
672 caseDef.pipelineShaderStageCreateFlags,
673 numWorkgroups,
674 makeDeBool(internalData.isRequiredSubgroupSize),
675 subgroupSize,
676 localSizesToTest,
677 localSizesToTestCount);
678 }
679 #ifndef CTS_USES_VULKANSC
680 else if (isAllMeshShadingStages(caseDef.shaderStage))
681 {
682 const bool isMesh = ((caseDef.shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
683 const bool isTask = ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
684
685 DE_ASSERT(isMesh != isTask);
686 DE_UNREF(isTask); // For release builds.
687
688 const deUint32 numWorkgroups[3] = {1, 1, 1};
689 const deUint32 subgroupSize = subgroups::getSubgroupSize(context);
690 const auto& meshProperties = context.getMeshShaderPropertiesEXT();
691 const auto& maxWorkGroupSize = (isMesh ? meshProperties.maxMeshWorkGroupSize : meshProperties.maxTaskWorkGroupSize);
692 const auto& maxInvocations = (isMesh ? meshProperties.maxMeshWorkGroupInvocations : meshProperties.maxTaskWorkGroupInvocations);
693 // Calculate the local workgroup sizes to exercise the maximum supported by the driver
694 const UVec3 localSize = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
695 const deUint32 localSizesToTestCount = 16;
696 const deUint32 localSizesToTest[localSizesToTestCount][3] =
697 {
698 {1, 1, 1},
699 {32, 4, 1},
700 {32, 1, 4},
701 {1, 32, 4},
702 {1, 4, 32},
703 {4, 1, 32},
704 {4, 32, 1},
705 {subgroupSize, 1, 1},
706 {1, subgroupSize, 1},
707 {1, 1, subgroupSize},
708 {3, 5, 7},
709 {128, 1, 1},
710 {1, 128, 1},
711 {1, 1, 64},
712 {localSize.x(), localSize.y(), localSize.z()},
713 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
714 };
715 const struct internalDataStruct internalData =
716 {
717 &context,
718 caseDef,
719 subgroupSize,
720 false,
721 };
722
723 return subgroups::makeMeshTestRequiredSubgroupSize(context,
724 VK_FORMAT_R32_UINT,
725 nullptr,
726 0,
727 &internalData,
728 checkCompute,
729 caseDef.pipelineShaderStageCreateFlags,
730 numWorkgroups,
731 makeDeBool(internalData.isRequiredSubgroupSize),
732 subgroupSize,
733 localSizesToTest,
734 localSizesToTestCount);
735 }
736 #endif // CTS_USES_VULKANSC
737 else if (isAllGraphicsStages(caseDef.shaderStage))
738 {
739 const VkShaderStageFlags stages = subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
740 struct internalDataStruct internalData =
741 {
742 &context,
743 caseDef,
744 0u,
745 false,
746 };
747
748 return subgroups::allStagesRequiredSubgroupSize(context,
749 VK_FORMAT_R32_UINT,
750 nullptr,
751 0,
752 &internalData,
753 checkVertexPipelineStages,
754 stages,
755 caseDef.pipelineShaderStageCreateFlags,
756 caseDef.pipelineShaderStageCreateFlags,
757 caseDef.pipelineShaderStageCreateFlags,
758 caseDef.pipelineShaderStageCreateFlags,
759 caseDef.pipelineShaderStageCreateFlags,
760 nullptr);
761 }
762 #ifndef CTS_USES_VULKANSC
763 else if (isAllRayTracingStages(caseDef.shaderStage))
764 {
765 const VkShaderStageFlags stages = subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
766 const vector<deUint32> flags (6, caseDef.pipelineShaderStageCreateFlags);
767 const struct internalDataStruct internalData =
768 {
769 &context,
770 caseDef,
771 0u,
772 false,
773 };
774
775 return subgroups::allRayTracingStagesRequiredSubgroupSize(context,
776 VK_FORMAT_R32_UINT,
777 nullptr,
778 0,
779 &internalData,
780 checkVertexPipelineStages,
781 stages,
782 flags.data(),
783 nullptr);
784 }
785 #endif // CTS_USES_VULKANSC
786 else
787 TCU_THROW(InternalError, "Unknown stage or invalid stage set");
788 }
789
testRequireFullSubgroups(Context & context,const CaseDefinition caseDef)790 TestStatus testRequireFullSubgroups (Context& context, const CaseDefinition caseDef)
791 {
792 DE_ASSERT(VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage);
793 DE_ASSERT(caseDef.requiredSubgroupSizeMode == REQUIRED_SUBGROUP_SIZE_NONE);
794
795 const deUint32 numWorkgroups[3] = {1, 1, 1};
796 #ifndef CTS_USES_VULKANSC
797 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
798 #else
799 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties = context.getSubgroupSizeControlPropertiesEXT();
800 #endif // CTS_USES_VULKANSC
801 const VkPhysicalDeviceProperties& physicalDeviceProperties = context.getDeviceProperties();
802 // Calculate the local workgroup sizes to exercise the maximum supported by the driver
803 const auto& maxWorkGroupSize = physicalDeviceProperties.limits.maxComputeWorkGroupSize;
804 const auto& maxInvocations = physicalDeviceProperties.limits.maxComputeWorkGroupInvocations;
805 const UVec3 localSize = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
806 const deUint32 subgroupSize = subgroups::getSubgroupSize(context);
807 // For full subgroups and allow varying subgroup size, localsize X must be a multiple of maxSubgroupSize.
808 // We set local size X for this test to the maximum, regardless if allow varying subgroup size is enabled or not.
809 const deUint32 localSizesToTestCount = 7;
810 const deUint32 localSizesToTest[localSizesToTestCount][3] =
811 {
812 {subgroupSizeControlProperties.maxSubgroupSize, 1, 1},
813 {subgroupSizeControlProperties.maxSubgroupSize, 4, 1},
814 {subgroupSizeControlProperties.maxSubgroupSize, 1, 4},
815 {subgroupSizeControlProperties.maxSubgroupSize * 2, 1, 2},
816 {subgroupSizeControlProperties.maxSubgroupSize * 4, 1, 1},
817 {localSize.x(), localSize.y(), localSize.z()},
818 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
819 };
820 const struct internalDataStruct internalData =
821 {
822 &context,
823 caseDef,
824 subgroupSize,
825 false,
826 };
827
828 DE_ASSERT(caseDef.requiredSubgroupSizeMode == REQUIRED_SUBGROUP_SIZE_NONE);
829
830 return subgroups::makeComputeTestRequiredSubgroupSize(context,
831 VK_FORMAT_R32G32B32A32_UINT,
832 nullptr,
833 0,
834 &internalData,
835 checkComputeRequireFull,
836 caseDef.pipelineShaderStageCreateFlags,
837 numWorkgroups,
838 makeDeBool(internalData.isRequiredSubgroupSize),
839 subgroupSize,
840 localSizesToTest,
841 localSizesToTestCount);
842 }
843
testRequireSubgroupSize(Context & context,const CaseDefinition caseDef)844 TestStatus testRequireSubgroupSize (Context& context, const CaseDefinition caseDef)
845 {
846 if (isAllComputeStages(caseDef.shaderStage))
847 {
848 const deUint32 numWorkgroups[3] = {1, 1, 1};
849 #ifndef CTS_USES_VULKANSC
850 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
851 #else
852 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties = context.getSubgroupSizeControlPropertiesEXT();
853 #endif // CTS_USES_VULKANSC
854 const VkPhysicalDeviceProperties& physicalDeviceProperties = context.getDeviceProperties();
855 const deUint32 requiredSubgroupSize = getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
856 const deUint64 maxSubgroupLimitSize = (deUint64)requiredSubgroupSize * subgroupSizeControlProperties.maxComputeWorkgroupSubgroups;
857 const deUint32 maxTotalLocalSize = (deUint32)min<deUint64>(maxSubgroupLimitSize, physicalDeviceProperties.limits.maxComputeWorkGroupInvocations);
858 const auto& maxWorkGroupSize = physicalDeviceProperties.limits.maxComputeWorkGroupSize;
859 const auto& maxInvocations = physicalDeviceProperties.limits.maxComputeWorkGroupInvocations;
860 const UVec3 localSize = getLocalSizes(maxWorkGroupSize, maxInvocations, maxTotalLocalSize);
861 const bool shaderUsesFullSubgroups = caseDef.shaderUsesFullSubgroups();
862 const deUint32 localSizesToTest[5][3] =
863 {
864 {localSize.x(), localSize.y(), localSize.z()},
865 {requiredSubgroupSize, 1, 1},
866 {1, requiredSubgroupSize, 1},
867 {1, 1, requiredSubgroupSize},
868 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
869 };
870
871 // If the shader uses full subgroups, use only the first two entries so the local size in X is a multiple of the requested
872 // subgroup size, as required by the spec.
873 deUint32 localSizesToTestCount = 5;
874 if (shaderUsesFullSubgroups)
875 localSizesToTestCount = 3;
876
877 const internalDataStruct internalData =
878 {
879 &context, // const Context* context;
880 caseDef, // struct CaseDefinition caseDef;
881 requiredSubgroupSize, // deUint32 requiredSubgroupSize;
882 true, // bool isRequiredSubgroupSize;
883 };
884
885 // Depending on the flag and SPIR-V version we need to run one verification function or another.
886 const auto checkFunction = (shaderUsesFullSubgroups ? checkComputeRequireFull : checkCompute);
887
888 return subgroups::makeComputeTestRequiredSubgroupSize(context,
889 VK_FORMAT_R32G32B32A32_UINT,
890 nullptr,
891 0,
892 &internalData,
893 checkFunction,
894 caseDef.pipelineShaderStageCreateFlags,
895 numWorkgroups,
896 makeDeBool(internalData.isRequiredSubgroupSize),
897 requiredSubgroupSize,
898 localSizesToTest,
899 localSizesToTestCount);
900 }
901 #ifndef CTS_USES_VULKANSC
902 else if (isAllMeshShadingStages(caseDef.shaderStage))
903 {
904 const auto isMesh = ((caseDef.shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
905 const auto isTask = ((caseDef.shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
906
907 DE_ASSERT(isMesh != isTask);
908 DE_UNREF(isTask); // For release builds.
909
910 const uint32_t numWorkgroups[3] = {1, 1, 1};
911 const auto& subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
912 const auto& meshProperties = context.getMeshShaderPropertiesEXT();
913 const uint32_t requiredSubgroupSize = getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
914 const auto& maxWorkGroupSize = (isMesh ? meshProperties.maxMeshWorkGroupSize : meshProperties.maxTaskWorkGroupSize);
915 const auto& maxInvocations = (isMesh ? meshProperties.maxMeshWorkGroupInvocations : meshProperties.maxTaskWorkGroupInvocations);
916 const UVec3 localSize = getLocalSizes(maxWorkGroupSize, maxInvocations, maxInvocations);
917 const bool shaderUsesFullSubgroups = caseDef.shaderUsesFullSubgroups();
918 const uint32_t localSizesToTest[5][3] =
919 {
920 {requiredSubgroupSize, 1, 1},
921 {1, requiredSubgroupSize, 1},
922 {1, 1, requiredSubgroupSize},
923 {localSize.x(), localSize.y(), localSize.z()},
924 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
925 };
926
927 // If the shader uses full subgroups, use only the first two entries so the local size in X is a multiple of the requested
928 // subgroup size, as required by the spec.
929 deUint32 localSizesToTestCount = 5;
930 if (shaderUsesFullSubgroups)
931 localSizesToTestCount = 3;
932
933 const internalDataStruct internalData =
934 {
935 &context, // const Context* context;
936 caseDef, // struct CaseDefinition caseDef;
937 requiredSubgroupSize, // uint32_t requiredSubgroupSize;
938 true, // bool isRequiredSubgroupSize;
939 };
940
941 // Depending on the flag and SPIR-V version we need to run one verification function or another.
942 const auto checkFunction = (shaderUsesFullSubgroups ? checkComputeRequireFull : checkCompute);
943
944 return subgroups::makeMeshTestRequiredSubgroupSize(context,
945 VK_FORMAT_R32G32B32A32_UINT,
946 nullptr,
947 0,
948 &internalData,
949 checkFunction,
950 caseDef.pipelineShaderStageCreateFlags,
951 numWorkgroups,
952 makeDeBool(internalData.isRequiredSubgroupSize),
953 requiredSubgroupSize,
954 localSizesToTest,
955 localSizesToTestCount);
956 }
957 #endif // CTS_USES_VULKANSC
958 else if (isAllGraphicsStages(caseDef.shaderStage))
959 {
960 const VkShaderStageFlags stages = subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
961 #ifndef CTS_USES_VULKANSC
962 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
963 #else
964 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties = context.getSubgroupSizeControlPropertiesEXT();
965 #endif // CTS_USES_VULKANSC
966 const deUint32 requiredSubgroupSize = getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
967 const deUint32 requiredSubgroupSizes[5] = { requiredSubgroupSize, requiredSubgroupSize, requiredSubgroupSize, requiredSubgroupSize, requiredSubgroupSize};
968 const internalDataStruct internalData =
969 {
970 &context, // const Context* context;
971 caseDef, // struct CaseDefinition caseDef;
972 requiredSubgroupSize, // deUint32 requiredSubgroupSize;
973 true, // bool isRequiredSubgroupSize;
974 };
975
976 return subgroups::allStagesRequiredSubgroupSize(context,
977 VK_FORMAT_R32_UINT,
978 DE_NULL,
979 0,
980 &internalData,
981 checkVertexPipelineStages,
982 stages,
983 caseDef.pipelineShaderStageCreateFlags,
984 caseDef.pipelineShaderStageCreateFlags,
985 caseDef.pipelineShaderStageCreateFlags,
986 caseDef.pipelineShaderStageCreateFlags,
987 caseDef.pipelineShaderStageCreateFlags,
988 requiredSubgroupSizes);
989 }
990 #ifndef CTS_USES_VULKANSC
991 else if (isAllRayTracingStages(caseDef.shaderStage))
992 {
993 const VkShaderStageFlags stages = subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
994 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
995 const deUint32 requiredSubgroupSize = getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
996 const vector<deUint32> flags (6, caseDef.pipelineShaderStageCreateFlags);
997 const vector<deUint32> requiredSubgroupSizes (6, requiredSubgroupSize);
998 const struct internalDataStruct internalData =
999 {
1000 &context, // const Context* context;
1001 caseDef, // struct CaseDefinition caseDef;
1002 requiredSubgroupSize, // deUint32 requiredSubgroupSize;
1003 true, // bool isRequiredSubgroupSize;
1004 };
1005
1006 return subgroups::allRayTracingStagesRequiredSubgroupSize(context,
1007 VK_FORMAT_R32_UINT,
1008 DE_NULL,
1009 0,
1010 &internalData,
1011 checkVertexPipelineStages,
1012 stages,
1013 flags.data(),
1014 requiredSubgroupSizes.data());
1015 }
1016 #endif // CTS_USES_VULKANSC
1017 else
1018 TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1019 }
1020
noSSBOtestRequireSubgroupSize(Context & context,const CaseDefinition caseDef)1021 TestStatus noSSBOtestRequireSubgroupSize (Context& context, const CaseDefinition caseDef)
1022 {
1023 #ifndef CTS_USES_VULKANSC
1024 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
1025 #else
1026 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties = context.getSubgroupSizeControlPropertiesEXT();
1027 #endif // CTS_USES_VULKANSC
1028 const deUint32 requiredSubgroupSize = getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
1029 const VkFormat format = VK_FORMAT_R32_UINT;
1030 const deUint32& flags = caseDef.pipelineShaderStageCreateFlags;
1031 const deUint32& size = requiredSubgroupSize;
1032 struct internalDataStruct internalData =
1033 {
1034 &context,
1035 caseDef,
1036 requiredSubgroupSize,
1037 true,
1038 };
1039
1040 switch (caseDef.shaderStage)
1041 {
1042 case VK_SHADER_STAGE_VERTEX_BIT: return subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, flags, size);
1043 case VK_SHADER_STAGE_GEOMETRY_BIT: return subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, flags, size);
1044 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, size);
1045 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, size);
1046 case VK_SHADER_STAGE_FRAGMENT_BIT: return subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkFragmentPipelineStages, flags, size);
1047 default: TCU_THROW(InternalError, "Unhandled shader stage");
1048 }
1049 }
1050
testSanitySubgroupSizeProperties(Context & context)1051 TestStatus testSanitySubgroupSizeProperties (Context& context)
1052 {
1053 #ifndef CTS_USES_VULKANSC
1054 VkPhysicalDeviceSubgroupSizeControlProperties subgroupSizeControlProperties;
1055 subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES;
1056 #else
1057 VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
1058 subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
1059 #endif // CTS_USES_VULKANSC
1060
1061 subgroupSizeControlProperties.pNext = DE_NULL;
1062
1063 VkPhysicalDeviceSubgroupProperties subgroupProperties;
1064 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1065 subgroupProperties.pNext = &subgroupSizeControlProperties;
1066
1067 VkPhysicalDeviceProperties2 properties;
1068 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1069 properties.pNext = &subgroupProperties;
1070
1071 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
1072
1073 if (subgroupProperties.subgroupSize > subgroupSizeControlProperties.maxSubgroupSize ||
1074 subgroupProperties.subgroupSize < subgroupSizeControlProperties.minSubgroupSize)
1075 {
1076 ostringstream error;
1077 error << "subgroupSize (" << subgroupProperties.subgroupSize << ") is not between maxSubgroupSize (";
1078 error << subgroupSizeControlProperties.maxSubgroupSize << ") and minSubgroupSize (";
1079 error << subgroupSizeControlProperties.minSubgroupSize << ")";
1080
1081 return TestStatus::fail(error.str().c_str());
1082 }
1083
1084 return TestStatus::pass("OK");
1085 }
1086 }
1087
1088 namespace vkt
1089 {
1090 namespace subgroups
1091 {
createSubgroupsSizeControlTests(TestContext & testCtx)1092 TestCaseGroup* createSubgroupsSizeControlTests (TestContext& testCtx)
1093 {
1094 de::MovePtr<TestCaseGroup> group (new TestCaseGroup(testCtx, "size_control", "VK_EXT_subgroup_size_control tests"));
1095 de::MovePtr<TestCaseGroup> framebufferGroup (new TestCaseGroup(testCtx, "framebuffer", "Subgroup size control category tests: framebuffer"));
1096 de::MovePtr<TestCaseGroup> computeGroup (new TestCaseGroup(testCtx, "compute", "Subgroup size control category tests: compute"));
1097 de::MovePtr<TestCaseGroup> graphicsGroup (new TestCaseGroup(testCtx, "graphics", "Subgroup size control category tests: graphics"));
1098 #ifndef CTS_USES_VULKANSC
1099 de::MovePtr<TestCaseGroup> raytracingGroup (new TestCaseGroup(testCtx, "ray_tracing", "Subgroup size control category tests: ray tracing"));
1100 de::MovePtr<TestCaseGroup> meshGroup (new TestCaseGroup(testCtx, "mesh", "Subgroup size control category tests: mesh shading"));
1101 #endif // CTS_USES_VULKANSC
1102 de::MovePtr<TestCaseGroup> genericGroup (new TestCaseGroup(testCtx, "generic", "Subgroup size control category tests: generic"));
1103 const VkShaderStageFlags fbStages[] =
1104 {
1105 VK_SHADER_STAGE_VERTEX_BIT,
1106 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
1107 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
1108 VK_SHADER_STAGE_GEOMETRY_BIT,
1109 VK_SHADER_STAGE_FRAGMENT_BIT,
1110 };
1111 #ifndef CTS_USES_VULKANSC
1112 const VkShaderStageFlags meshStages[] =
1113 {
1114 VK_SHADER_STAGE_MESH_BIT_EXT,
1115 VK_SHADER_STAGE_TASK_BIT_EXT,
1116 };
1117 #endif // CTS_USES_VULKANSC
1118
1119 // Test sanity of the subgroup size properties.
1120 {
1121 addFunctionCase(genericGroup.get(), "subgroup_size_properties", supportedCheck, testSanitySubgroupSizeProperties);
1122 }
1123
1124 const TestParams testParams[] = {{false, true, ""}, {true, false, "_spirv16"}, {true, true, "_flags_spirv16"}};
1125
1126 for (const auto& params : testParams)
1127 {
1128 // Allow varying subgroup cases.
1129 const deUint32 flagsVary = VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
1130 const CaseDefinition caseDefVary = {params.flagsEnabled ? flagsVary : 0u, VK_SHADER_STAGE_COMPUTE_BIT, DE_FALSE, REQUIRED_SUBGROUP_SIZE_NONE,
1131 de::SharedPtr<bool>(new bool), params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1132
1133 addFunctionCaseWithPrograms(computeGroup.get(), "allow_varying_subgroup_size" + params.postfix, supportedCheckFeatures,
1134 initPrograms, test, caseDefVary);
1135 addFunctionCaseWithPrograms(graphicsGroup.get(), "allow_varying_subgroup_size" + params.postfix,
1136 supportedCheckFeaturesShader, initPrograms, test, caseDefVary);
1137
1138 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(fbStages); ++stageIndex)
1139 {
1140 const CaseDefinition caseDefStage = {params.flagsEnabled ? flagsVary : 0u, fbStages[stageIndex], DE_FALSE, REQUIRED_SUBGROUP_SIZE_NONE,
1141 de::SharedPtr<bool>(new bool), params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1142
1143 string name = getShaderStageName(caseDefStage.shaderStage) + "_allow_varying_subgroup_size" + params.postfix;
1144 addFunctionCaseWithPrograms(framebufferGroup.get(), name, supportedCheckFeaturesShader, initFrameBufferPrograms,
1145 noSSBOtest, caseDefStage);
1146 }
1147
1148 #ifndef CTS_USES_VULKANSC
1149 for (const auto& stage : meshStages)
1150 {
1151 const CaseDefinition caseDefMesh = {(params.flagsEnabled ? flagsVary : 0u), stage, DE_FALSE, REQUIRED_SUBGROUP_SIZE_NONE,
1152 de::SharedPtr<bool>(new bool), (params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_4)};
1153 const std::string name = getShaderStageName(stage) + "_allow_varying_subgroup_size" + params.postfix;
1154 addFunctionCaseWithPrograms(meshGroup.get(), name, supportedCheckFeatures, initPrograms, test, caseDefMesh);
1155 }
1156 #endif // CTS_USES_VULKANSC
1157
1158 // Require full subgroups together with allow varying subgroup (only compute shaders).
1159 const deUint32 flagsFullVary = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT
1160 | VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
1161 const CaseDefinition caseDefFullVary = {params.flagsEnabled ? flagsFullVary : 0u, VK_SHADER_STAGE_COMPUTE_BIT, DE_TRUE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool),
1162 params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1163 addFunctionCaseWithPrograms(computeGroup.get(), "require_full_subgroups_allow_varying_subgroup_size" + params.postfix,
1164 supportedCheckFeatures, initProgramsRequireFull, testRequireFullSubgroups, caseDefFullVary);
1165
1166 // Require full subgroups cases (only compute shaders).
1167 const deUint32 flagsFull = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT;
1168 const CaseDefinition caseDefFull = {params.flagsEnabled ? flagsFull : 0u, VK_SHADER_STAGE_COMPUTE_BIT, DE_TRUE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool),
1169 params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1170 addFunctionCaseWithPrograms(computeGroup.get(), "require_full_subgroups" + params.postfix, supportedCheckFeatures, initProgramsRequireFull,
1171 testRequireFullSubgroups, caseDefFull);
1172
1173 // Tests to check setting a required subgroup size value, together with require full subgroups (only compute shaders).
1174 const CaseDefinition caseDefMaxFull = {params.flagsEnabled ? flagsFull : 0u, VK_SHADER_STAGE_COMPUTE_BIT, DE_TRUE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool),
1175 params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1176 addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_max_require_full_subgroups" + params.postfix, supportedCheckFeatures,
1177 initProgramsRequireFull, testRequireSubgroupSize, caseDefMaxFull);
1178
1179 const CaseDefinition caseDefMinFull = {params.flagsEnabled ? flagsFull : 0u, VK_SHADER_STAGE_COMPUTE_BIT, DE_TRUE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool),
1180 params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_3};
1181 addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_min_require_full_subgroups" + params.postfix, supportedCheckFeatures,
1182 initProgramsRequireFull, testRequireSubgroupSize, caseDefMinFull);
1183
1184 // Ray tracing cases with allow varying subgroup.
1185 #ifndef CTS_USES_VULKANSC
1186 const deUint32 flagsRayTracing = VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
1187 const CaseDefinition caseDefAllRaytracing = {params.flagsEnabled ? flagsRayTracing : 0u, SHADER_STAGE_ALL_RAY_TRACING, DE_FALSE, REQUIRED_SUBGROUP_SIZE_NONE,
1188 de::SharedPtr<bool>(new bool), params.useSpirv16 ? SPIRV_VERSION_1_6 : SPIRV_VERSION_1_4};
1189 addFunctionCaseWithPrograms(raytracingGroup.get(), "allow_varying_subgroup_size" + params.postfix, supportedCheckFeaturesShader,
1190 initPrograms, test, caseDefAllRaytracing);
1191 #endif // CTS_USES_VULKANSC
1192 }
1193
1194 // Tests to check setting a required subgroup size value.
1195 {
1196 const CaseDefinition caseDefAllGraphicsMax = {0u, VK_SHADER_STAGE_ALL_GRAPHICS, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_3};
1197 addFunctionCaseWithPrograms(graphicsGroup.get(), "required_subgroup_size_max", supportedCheckFeaturesShader, initPrograms, testRequireSubgroupSize, caseDefAllGraphicsMax);
1198 const CaseDefinition caseDefComputeMax = {0u, VK_SHADER_STAGE_COMPUTE_BIT, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_3};
1199 addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_max", supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefComputeMax);
1200 #ifndef CTS_USES_VULKANSC
1201 const CaseDefinition caseDefAllRaytracingMax = {0u, SHADER_STAGE_ALL_RAY_TRACING, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_4};
1202 addFunctionCaseWithPrograms(raytracingGroup.get(), "required_subgroup_size_max", supportedCheckFeaturesShader, initPrograms, testRequireSubgroupSize, caseDefAllRaytracingMax);
1203 #endif // CTS_USES_VULKANSC
1204
1205 const CaseDefinition caseDefAllGraphicsMin = {0u, VK_SHADER_STAGE_ALL_GRAPHICS, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_3};
1206 addFunctionCaseWithPrograms(graphicsGroup.get(), "required_subgroup_size_min", supportedCheckFeaturesShader, initPrograms, testRequireSubgroupSize, caseDefAllGraphicsMin);
1207 const CaseDefinition caseDefComputeMin = {0u, VK_SHADER_STAGE_COMPUTE_BIT, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_3};
1208 addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_min", supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefComputeMin);
1209 #ifndef CTS_USES_VULKANSC
1210 const CaseDefinition caseDefAllRaytracingMin = {0u, SHADER_STAGE_ALL_RAY_TRACING, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_4};
1211 addFunctionCaseWithPrograms(raytracingGroup.get(), "required_subgroup_size_min", supportedCheckFeaturesShader, initPrograms, testRequireSubgroupSize, caseDefAllRaytracingMin);
1212 #endif // CTS_USES_VULKANSC
1213 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(fbStages); ++stageIndex)
1214 {
1215 const CaseDefinition caseDefStageMax = {0u, fbStages[stageIndex], DE_FALSE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_3};
1216 addFunctionCaseWithPrograms(framebufferGroup.get(), getShaderStageName(caseDefStageMax.shaderStage) + "_required_subgroup_size_max", supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtestRequireSubgroupSize, caseDefStageMax);
1217 const CaseDefinition caseDefStageMin = {0u, fbStages[stageIndex], DE_FALSE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_3};
1218 addFunctionCaseWithPrograms(framebufferGroup.get(), getShaderStageName(caseDefStageMin.shaderStage) + "_required_subgroup_size_min", supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtestRequireSubgroupSize, caseDefStageMin);
1219 }
1220
1221 #ifndef CTS_USES_VULKANSC
1222 for (const auto& stage : meshStages)
1223 {
1224 const auto stageName = getShaderStageName(stage);
1225
1226 const CaseDefinition caseDefMeshMax = {0u, stage, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_4};
1227 addFunctionCaseWithPrograms(meshGroup.get(), "required_subgroup_size_max_" + stageName, supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefMeshMax);
1228 const CaseDefinition caseDefMeshMin = {0u, stage, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool), SPIRV_VERSION_1_4};
1229 addFunctionCaseWithPrograms(meshGroup.get(), "required_subgroup_size_min_" + stageName, supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefMeshMin);
1230 }
1231 #endif // CTS_USES_VULKANSC
1232 }
1233
1234 group->addChild(genericGroup.release());
1235 group->addChild(graphicsGroup.release());
1236 group->addChild(computeGroup.release());
1237 group->addChild(framebufferGroup.release());
1238 #ifndef CTS_USES_VULKANSC
1239 group->addChild(raytracingGroup.release());
1240 group->addChild(meshGroup.release());
1241 #endif // CTS_USES_VULKANSC
1242
1243 return group.release();
1244 }
1245
1246 } // subgroups
1247 } // vkt
1248