• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  */ /*!
21  * \file
22  * \brief VK_EXT_subgroup_size_control Tests
23  */ /*--------------------------------------------------------------------*/
24 
25 #include "vktSubgroupsSizeControlTests.hpp"
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "tcuTestLog.hpp"
29 
30 #include <string>
31 #include <vector>
32 
33 using namespace tcu;
34 using namespace std;
35 using namespace vk;
36 using namespace vkt;
37 
38 namespace
39 {
40 
41 enum RequiredSubgroupSizeMode
42 {
43 	REQUIRED_SUBGROUP_SIZE_NONE	= 0,
44 	REQUIRED_SUBGROUP_SIZE_MIN	= 1,
45 	REQUIRED_SUBGROUP_SIZE_MAX	= 2,
46 };
47 
48 struct CaseDefinition
49 {
50 	deUint32			pipelineShaderStageCreateFlags;
51 	VkShaderStageFlags	shaderStage;
52 	deBool				requiresBallot;
53 	deUint32			requiredSubgroupSizeMode;
54 	de::SharedPtr<bool>	geometryPointSizeSupported;
55 };
56 
57 struct internalDataStruct
58 {
59 	const Context*			context;
60 	struct CaseDefinition	caseDef;
61 	deUint32				requiredSubgroupSize;
62 };
63 
64 // Find greatest common divisor for a and b
gcd(deUint32 a,deUint32 b)65 deUint32 gcd (deUint32 a, deUint32 b)
66 {
67 	if ((0 != a) && (0 == b))
68 	{
69 		return a;
70 	}
71 	else
72 	{
73 		deUint32 greater = max(a, b);
74 		deUint32 lesser  = min(a, b);
75 
76 		return gcd(lesser, greater % lesser);
77 	}
78 }
79 
getLocalSizes(const VkPhysicalDeviceProperties & physicalDeviceProperties,deUint32 numWorkGroupInvocations)80 UVec3	getLocalSizes (const VkPhysicalDeviceProperties&	physicalDeviceProperties,
81 					   deUint32								numWorkGroupInvocations)
82 {
83 	DE_ASSERT(numWorkGroupInvocations <= physicalDeviceProperties.limits.maxComputeWorkGroupInvocations);
84 	const deUint32 localSizeX = gcd(numWorkGroupInvocations, physicalDeviceProperties.limits.maxComputeWorkGroupSize[0]);
85 	const deUint32 localSizeY = gcd(deMax32(numWorkGroupInvocations / localSizeX, 1u), physicalDeviceProperties.limits.maxComputeWorkGroupSize[1]);
86 	const deUint32 localSizeZ = deMax32(numWorkGroupInvocations / (localSizeX * localSizeY), 1u);
87 
88 	return UVec3(localSizeX, localSizeY, localSizeZ);
89 }
90 
getRequiredSubgroupSizeFromMode(Context & context,const CaseDefinition & caseDef,const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT & subgroupSizeControlProperties)91 deUint32 getRequiredSubgroupSizeFromMode (Context&													context,
92 										  const CaseDefinition&										caseDef,
93 										  const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties)
94 {
95 	switch (caseDef.requiredSubgroupSizeMode)
96 	{
97 		case REQUIRED_SUBGROUP_SIZE_MAX:	return subgroupSizeControlProperties.maxSubgroupSize;
98 		case REQUIRED_SUBGROUP_SIZE_MIN:	return subgroupSizeControlProperties.minSubgroupSize;
99 		case REQUIRED_SUBGROUP_SIZE_NONE:	return subgroups::getSubgroupSize(context);
100 		default:							TCU_THROW(NotSupportedError, "Unsupported Subgroup size");
101 	}
102 }
103 
checkVertexPipelineStages(const void * internalData,vector<const void * > datas,deUint32 width,deUint32)104 static bool checkVertexPipelineStages (const void*			internalData,
105 									   vector<const void*>	datas,
106 									   deUint32				width,
107 									   deUint32)
108 {
109 	const struct internalDataStruct*						checkInternalData				= reinterpret_cast<const struct internalDataStruct *>(internalData);
110 	const Context*											context							= checkInternalData->context;
111 	const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties	= context->getSubgroupSizeControlPropertiesEXT();
112 	TestLog&												log								= context->getTestContext().getLog();
113 	const deUint32*											data							= reinterpret_cast<const deUint32*>(datas[0]);
114 
115 	for (deUint32 i = 0; i < width; i++)
116 	{
117 		if (data[i] > subgroupSizeControlProperties.maxSubgroupSize ||
118 			data[i] < subgroupSizeControlProperties.minSubgroupSize)
119 		{
120 			log << TestLog::Message << "gl_SubgroupSize (" << data[i] << ") value is outside limits (" << subgroupSizeControlProperties.minSubgroupSize << ", " << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
121 
122 			return DE_FALSE;
123 		}
124 
125 		if (checkInternalData->caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE && data[i] != checkInternalData->requiredSubgroupSize)
126 		{
127 			log << TestLog::Message << "gl_SubgroupSize (" << data[i] << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize << ")" << TestLog::EndMessage;
128 
129 			return DE_FALSE;
130 		}
131 	}
132 
133 	return DE_TRUE;
134 }
135 
checkFragmentPipelineStages(const void * internalData,vector<const void * > datas,deUint32 width,deUint32 height,deUint32)136 static bool checkFragmentPipelineStages (const void*			internalData,
137 										 vector<const void*>	datas,
138 										 deUint32				width,
139 										 deUint32				height,
140 										 deUint32)
141 {
142 	const struct internalDataStruct*						checkInternalData				= reinterpret_cast<const struct internalDataStruct *>(internalData);
143 	const Context*											context							= checkInternalData->context;
144 	const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties	= context->getSubgroupSizeControlPropertiesEXT();
145 	TestLog&												log								= context->getTestContext().getLog();
146 	const deUint32*											data							= reinterpret_cast<const deUint32*>(datas[0]);
147 
148 	for (deUint32 x = 0u; x < width; ++x)
149 	{
150 		for (deUint32 y = 0u; y < height; ++y)
151 		{
152 			const deUint32 ndx = (x * height + y);
153 
154 			if (data[ndx] > subgroupSizeControlProperties.maxSubgroupSize ||
155 				data[ndx] < subgroupSizeControlProperties.minSubgroupSize)
156 			{
157 				log << TestLog::Message << "gl_SubgroupSize (" << data[ndx] << ") value is outside limits (" << subgroupSizeControlProperties.minSubgroupSize << ", " << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
158 
159 				return DE_FALSE;
160 			}
161 
162 			if (checkInternalData->caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE &&
163 				data[ndx] != checkInternalData->requiredSubgroupSize)
164 			{
165 				log << TestLog::Message << "gl_SubgroupSize (" << data[ndx] << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize << ")" << TestLog::EndMessage;
166 
167 				return DE_FALSE;
168 			}
169 		}
170 	}
171 	return true;
172 }
173 
checkCompute(const void * internalData,vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32)174 static bool checkCompute (const void*			internalData,
175 						  vector<const void*>	datas,
176 						  const deUint32		numWorkgroups[3],
177 						  const deUint32		localSize[3],
178 						  deUint32)
179 {
180 	const struct internalDataStruct*						checkInternalData				= reinterpret_cast<const struct internalDataStruct *>(internalData);
181 	const Context*											context							= checkInternalData->context;
182 	const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties	= context->getSubgroupSizeControlPropertiesEXT();
183 	TestLog&												log								= context->getTestContext().getLog();
184 	const deUint32											globalSizeX						= numWorkgroups[0] * localSize[0];
185 	const deUint32											globalSizeY						= numWorkgroups[1] * localSize[1];
186 	const deUint32											globalSizeZ						= numWorkgroups[2] * localSize[2];
187 	const deUint32											width							= globalSizeX * globalSizeY * globalSizeZ;
188 	const deUint32*											data							= reinterpret_cast<const deUint32*>(datas[0]);
189 
190 	for (deUint32 i = 0; i < width; i++)
191 	{
192 		if (data[i] > subgroupSizeControlProperties.maxSubgroupSize ||
193 			data[i] < subgroupSizeControlProperties.minSubgroupSize)
194 		{
195 			log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
196 				<< "gl_SubgroupSize (" << data[i] << ") value is outside limits (" << subgroupSizeControlProperties.minSubgroupSize << ", " << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
197 
198 			return DE_FALSE;
199 		}
200 
201 		if (checkInternalData->caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE &&
202 			data[i] != checkInternalData->requiredSubgroupSize)
203 		{
204 			log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
205 				<< "gl_SubgroupSize (" << data[i] << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize << ")" << TestLog::EndMessage;
206 
207 			return DE_FALSE;
208 		}
209 	}
210 
211 	return DE_TRUE;
212 }
213 
checkComputeRequireFull(const void * internalData,vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32)214 static bool checkComputeRequireFull (const void*			internalData,
215 									 vector<const void*>	datas,
216 									 const deUint32			numWorkgroups[3],
217 									 const deUint32			localSize[3],
218 									 deUint32)
219 {
220 	const struct internalDataStruct*						checkInternalData				= reinterpret_cast<const struct internalDataStruct *>(internalData);
221 	const Context*											context							= checkInternalData->context;
222 	const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties	= context->getSubgroupSizeControlPropertiesEXT();
223 	TestLog&												log								= context->getTestContext().getLog();
224 	const deUint32											globalSizeX						= numWorkgroups[0] * localSize[0];
225 	const deUint32											globalSizeY						= numWorkgroups[1] * localSize[1];
226 	const deUint32											globalSizeZ						= numWorkgroups[2] * localSize[2];
227 	const deUint32											width							= globalSizeX * globalSizeY * globalSizeZ;
228 	const UVec4*											data							= reinterpret_cast<const UVec4*>(datas[0]);
229 	const deUint32											numSubgroups					= (localSize[0] * localSize[1] * localSize[2]) / checkInternalData->requiredSubgroupSize;
230 
231 	for (deUint32 i = 0; i < width; i++)
232 	{
233 		if (data[i].x() > subgroupSizeControlProperties.maxSubgroupSize ||
234 			data[i].x() < subgroupSizeControlProperties.minSubgroupSize)
235 		{
236 			log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
237 				<< "gl_SubgroupSize value ( " << data[i].x() << ") is outside limits [" << subgroupSizeControlProperties.minSubgroupSize << ", " << subgroupSizeControlProperties.maxSubgroupSize << "]" << TestLog::EndMessage;
238 			return DE_FALSE;
239 		}
240 
241 		if (data[i].x() != data[i].y())
242 		{
243 			log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
244 				<< "gl_SubgroupSize ( " << data[i].x() << ") does not match the active number of subgroup invocations (" << data[i].y() << ")" << TestLog::EndMessage;
245 			return DE_FALSE;
246 		}
247 
248 		if (checkInternalData->caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT &&
249 			data[i].x() != checkInternalData->requiredSubgroupSize)
250 		{
251 			log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
252 				<< "expected subgroupSize (" << checkInternalData->requiredSubgroupSize << ") doesn't match gl_SubgroupSize ( " << data[i].x() << ")" << TestLog::EndMessage;
253 			return DE_FALSE;
254 		}
255 
256 		if (checkInternalData->caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT && data[i].z() != numSubgroups)
257 		{
258 			log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
259 				<< "expected number of subgroups dispatched (" << numSubgroups << ") doesn't match gl_NumSubgroups (" << data[i].z() << ")";
260 			return DE_FALSE;
261 		}
262 	}
263 
264 	return DE_TRUE;
265 }
266 
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)267 void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
268 {
269 	const ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
270 
271 	if (VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
272 		subgroups::setFragmentShaderFrameBuffer(programCollection);
273 
274 	if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage && VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
275 		subgroups::setVertexShaderFrameBuffer(programCollection);
276 
277 	string bdyStr = "uint tempResult = gl_SubgroupSize;\n";
278 
279 	if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
280 	{
281 		ostringstream vertex;
282 
283 		vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
284 			<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
285 			<< "layout(location = 0) in highp vec4 in_position;\n"
286 			<< "layout(location = 0) out float out_color;\n"
287 			<< "\n"
288 			<< "void main (void)\n"
289 			<< "{\n"
290 			<< bdyStr
291 			<< "  out_color = float(tempResult);\n"
292 			<< "  gl_Position = in_position;\n"
293 			<< "  gl_PointSize = 1.0f;\n"
294 			<< "}\n";
295 
296 		programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
297 	}
298 	else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
299 	{
300 		ostringstream geometry;
301 
302 		geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
303 			<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
304 			<< "layout(points) in;\n"
305 			<< "layout(points, max_vertices = 1) out;\n"
306 			<< "layout(location = 0) out float out_color;\n"
307 			<< "void main (void)\n"
308 			<< "{\n"
309 			<< bdyStr
310 			<< "  out_color = float(tempResult);\n"
311 			<< "  gl_Position = gl_in[0].gl_Position;\n"
312 			<< "  gl_PointSize = 1.0f;"
313 			<< "  EmitVertex();\n"
314 			<< "  EndPrimitive();\n"
315 			<< "}\n";
316 
317 		programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
318 	}
319 	else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
320 	{
321 		ostringstream controlSource;
322 
323 		controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
324 			<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
325 			<< "layout(vertices = 2) out;\n"
326 			<< "layout(location = 0) out float out_color[];\n"
327 			<< "\n"
328 			<< "void main (void)\n"
329 			<< "{\n"
330 			<< "  if (gl_InvocationID == 0)\n"
331 			<< "  {\n"
332 			<< "    gl_TessLevelOuter[0] = 1.0f;\n"
333 			<< "    gl_TessLevelOuter[1] = 1.0f;\n"
334 			<< "  }\n"
335 			<< bdyStr
336 			<< "  out_color[gl_InvocationID ] = float(tempResult);\n"
337 			<< "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
338 			<< "}\n";
339 
340 		programCollection.glslSources.add("tesc") << glu::TessellationControlSource(controlSource.str()) << buildOptions;
341 		subgroups::setTesEvalShaderFrameBuffer(programCollection);
342 	}
343 	else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
344 	{
345 		ostringstream evaluationSource;
346 		evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
347 			<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
348 			<< "layout(isolines, equal_spacing, ccw ) in;\n"
349 			<< "layout(location = 0) out float out_color;\n"
350 			<< "void main (void)\n"
351 			<< "{\n"
352 			<< bdyStr
353 			<< "  out_color  = float(tempResult);\n"
354 			<< "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
355 			<< "}\n";
356 
357 		subgroups::setTesCtrlShaderFrameBuffer(programCollection);
358 		programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
359 	}
360 	else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
361 	{
362 		const string vertex	= string(glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)) + "\n"
363 			"void main (void)\n"
364 			"{\n"
365 			"  vec2 uv = vec2(float(gl_VertexIndex & 1), float((gl_VertexIndex >> 1) & 1));\n"
366 			"  gl_Position = vec4(uv * 4.0f -2.0f, 0.0f, 1.0f);\n"
367 			"  gl_PointSize = 1.0f;\n"
368 			"}\n";
369 		programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
370 
371 		ostringstream fragmentSource;
372 
373 		fragmentSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
374 					   << "precision highp int;\n"
375 						<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
376 					   << "layout(location = 0) out uint out_color;\n"
377 					   << "void main()\n"
378 					   << "{\n"
379 					   << bdyStr
380 					   << "	 out_color = tempResult;\n"
381 					   << "}\n";
382 
383 		programCollection.glslSources.add("fragment") << glu::FragmentSource(fragmentSource.str()) << buildOptions;
384 	}
385 	else
386 	{
387 		DE_FATAL("Unsupported shader stage");
388 	}
389 }
390 
getExtHeader(const CaseDefinition &)391 string getExtHeader (const CaseDefinition&)
392 {
393 	return "#extension GL_KHR_shader_subgroup_basic: enable\n";
394 }
395 
getPerStageHeadDeclarations(const CaseDefinition & caseDef)396 vector<string> getPerStageHeadDeclarations (const CaseDefinition& caseDef)
397 {
398 	const deUint32	stageCount	= subgroups::getStagesCount(caseDef.shaderStage);
399 	const bool		fragment	= (caseDef.shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) != 0;
400 	vector<string>	result		(stageCount, string());
401 
402 	if (fragment)
403 		result.reserve(result.size() + 1);
404 
405 	for (size_t i = 0; i < result.size(); ++i)
406 	{
407 		result[i] =
408 			"layout(set = 0, binding = " + de::toString(i) + ", std430) buffer Buffer1\n"
409 			"{\n"
410 			"  uint result[];\n"
411 			"};\n";
412 	}
413 
414 	if (fragment)
415 	{
416 		const string	fragPart	=
417 			"layout(location = 0) out uint result;\n";
418 
419 		result.push_back(fragPart);
420 	}
421 
422 	return result;
423 }
424 
getTestSource(const CaseDefinition &)425 string getTestSource (const CaseDefinition&)
426 {
427 	return
428 		"  uint tempResult = gl_SubgroupSize;\n"
429 		"  tempRes = tempResult;\n";
430 }
431 
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)432 void initPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
433 {
434 	const SpirvVersion			spirvVersion		= isAllRayTracingStages(caseDef.shaderStage) ? SPIRV_VERSION_1_4 : SPIRV_VERSION_1_3;
435 	const ShaderBuildOptions	buildOptions		(programCollection.usedVulkanVersion, spirvVersion, 0u);
436 	const string				extHeader			= getExtHeader(caseDef);
437 	const string				testSrc				= getTestSource(caseDef);
438 	const vector<string>		headDeclarations	= getPerStageHeadDeclarations(caseDef);
439 
440 	subgroups::initStdPrograms(programCollection, buildOptions, caseDef.shaderStage, VK_FORMAT_R32_UINT, *caseDef.geometryPointSizeSupported, extHeader, testSrc, "", headDeclarations);
441 }
442 
initProgramsRequireFull(SourceCollections & programCollection,CaseDefinition caseDef)443 void initProgramsRequireFull (SourceCollections& programCollection, CaseDefinition caseDef)
444 {
445 	if (VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
446 		DE_FATAL("Unsupported shader stage");
447 
448 	ostringstream src;
449 
450 	src << "#version 450\n"
451 		<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
452 		<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
453 		<< "layout (local_size_x_id = 0, local_size_y_id = 1, "
454 		"local_size_z_id = 2) in;\n"
455 		<< "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
456 		<< "{\n"
457 		<< "  uvec4 result[];\n"
458 		<< "};\n"
459 		<< "\n"
460 		<< "void main (void)\n"
461 		<< "{\n"
462 		<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
463 		<< "  highp uint offset = globalSize.x * ((globalSize.y * "
464 		"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
465 		"gl_GlobalInvocationID.x;\n"
466 		<< "   result[offset].x = gl_SubgroupSize;" // save the subgroup size value
467 		<< "   uint numActive = subgroupBallotBitCount(subgroupBallot(true));\n"
468 		<< "   result[offset].y = numActive;\n" // save the number of active subgroup invocations
469 		<< "   result[offset].z = gl_NumSubgroups;" // save the number of subgroups dispatched.
470 		<< "}\n";
471 
472 	programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << ShaderBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
473 }
474 
supportedCheck(Context & context)475 void supportedCheck (Context& context)
476 {
477 	if (!subgroups::isSubgroupSupported(context))
478 		TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
479 
480 	context.requireDeviceFunctionality("VK_EXT_subgroup_size_control");
481 }
482 
supportedCheckFeatures(Context & context,CaseDefinition caseDef)483 void supportedCheckFeatures (Context& context, CaseDefinition caseDef)
484 {
485 	supportedCheck(context);
486 
487 	if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
488 	{
489 		TCU_THROW(NotSupportedError, "Shader stage is required to support subgroup operations!");
490 	}
491 
492 	if (caseDef.shaderStage == VK_SHADER_STAGE_ALL_GRAPHICS)
493 	{
494 		const VkPhysicalDeviceFeatures&		features	= context.getDeviceFeatures();
495 
496 		if (!features.tessellationShader || !features.geometryShader)
497 			TCU_THROW(NotSupportedError, "Device does not support tessellation or geometry shaders");
498 	}
499 
500 	if (caseDef.requiresBallot && !subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
501 	{
502 		TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
503 	}
504 
505 	if (caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE ||
506 		caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
507 	{
508 		const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT&	subgroupSizeControlFeatures	= context.getSubgroupSizeControlFeaturesEXT();
509 
510 		if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
511 			TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
512 
513 		if (caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE)
514 		{
515 			const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties	= context.getSubgroupSizeControlPropertiesEXT();
516 
517 			if ((subgroupSizeControlProperties.requiredSubgroupSizeStages & caseDef.shaderStage) != caseDef.shaderStage)
518 				TCU_THROW(NotSupportedError, "Device does not support setting required subgroup size for the stages selected");
519 		}
520 	}
521 
522 	if (caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT)
523 	{
524 		const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT&	subgroupSizeControlFeatures	= context.getSubgroupSizeControlFeaturesEXT();
525 
526 		if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
527 			TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
528 	}
529 
530 	*caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
531 
532 	if (isAllRayTracingStages(caseDef.shaderStage))
533 	{
534 		context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
535 	}
536 }
537 
supportedCheckFeaturesShader(Context & context,CaseDefinition caseDef)538 void supportedCheckFeaturesShader (Context& context, CaseDefinition caseDef)
539 {
540 	supportedCheckFeatures(context, caseDef);
541 
542 	subgroups::supportedCheckShader(context, caseDef.shaderStage);
543 }
544 
noSSBOtest(Context & context,const CaseDefinition caseDef)545 TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
546 {
547 	const VkFormat					format			= VK_FORMAT_R32_UINT;
548 	const deUint32&					flags			= caseDef.pipelineShaderStageCreateFlags;
549 	const struct internalDataStruct	internalData	=
550 	{
551 		&context,
552 		caseDef,
553 		0u,
554 	};
555 
556 	switch (caseDef.shaderStage)
557 	{
558 		case VK_SHADER_STAGE_VERTEX_BIT:					return subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, flags, 0u);
559 		case VK_SHADER_STAGE_GEOMETRY_BIT:					return subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, flags, 0u);
560 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:		return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, 0u);
561 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:	return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, 0u);
562 		case VK_SHADER_STAGE_FRAGMENT_BIT:					return subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkFragmentPipelineStages, flags, 0u);
563 		default:											TCU_THROW(InternalError, "Unhandled shader stage");
564 	}
565 }
566 
test(Context & context,const CaseDefinition caseDef)567 TestStatus test (Context& context, const CaseDefinition caseDef)
568 {
569 	if (isAllComputeStages(caseDef.shaderStage))
570 	{
571 		const deUint32						numWorkgroups[3]							= {1, 1, 1};
572 		const deUint32						subgroupSize								= subgroups::getSubgroupSize(context);
573 		const VkPhysicalDeviceProperties	physicalDeviceProperties					= context.getDeviceProperties();
574 		// Calculate the local workgroup sizes to exercise the maximum supported by the driver
575 		const UVec3							localSize									= getLocalSizes(physicalDeviceProperties, physicalDeviceProperties.limits.maxComputeWorkGroupInvocations);
576 		const deUint32						localSizesToTestCount						= 16;
577 		const deUint32						localSizesToTest[localSizesToTestCount][3]	=
578 		{
579 			{1, 1, 1},
580 			{32, 4, 1},
581 			{32, 1, 4},
582 			{1, 32, 4},
583 			{1, 4, 32},
584 			{4, 1, 32},
585 			{4, 32, 1},
586 			{subgroupSize, 1, 1},
587 			{1, subgroupSize, 1},
588 			{1, 1, subgroupSize},
589 			{3, 5, 7},
590 			{128, 1, 1},
591 			{1, 128, 1},
592 			{1, 1, 64},
593 			{localSize.x(), localSize.y(), localSize.z()},
594 			{1, 1, 1} // Isn't used, just here to make double buffering checks easier
595 		};
596 		const struct internalDataStruct		internalData								=
597 		{
598 			&context,
599 			caseDef,
600 			subgroupSize,
601 		};
602 
603 		return subgroups::makeComputeTestRequiredSubgroupSize(context,
604 															  VK_FORMAT_R32_UINT,
605 															  DE_NULL,
606 															  0,
607 															  &internalData,
608 															  checkCompute,
609 															  caseDef.pipelineShaderStageCreateFlags,
610 															  numWorkgroups,
611 															  DE_FALSE,
612 															  subgroupSize,
613 															  localSizesToTest,
614 															  localSizesToTestCount);
615 	}
616 	else if (isAllGraphicsStages(caseDef.shaderStage))
617 	{
618 		const VkShaderStageFlags	stages			= subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
619 		struct internalDataStruct	internalData	=
620 		{
621 			&context,
622 			caseDef,
623 			0u,
624 		};
625 
626 		return subgroups::allStagesRequiredSubgroupSize(context,
627 														VK_FORMAT_R32_UINT,
628 														DE_NULL,
629 														0,
630 														&internalData,
631 														checkVertexPipelineStages,
632 														stages,
633 														caseDef.pipelineShaderStageCreateFlags,
634 														caseDef.pipelineShaderStageCreateFlags,
635 														caseDef.pipelineShaderStageCreateFlags,
636 														caseDef.pipelineShaderStageCreateFlags,
637 														caseDef.pipelineShaderStageCreateFlags,
638 														DE_NULL);
639 	}
640 	else if (isAllRayTracingStages(caseDef.shaderStage))
641 	{
642 		const VkShaderStageFlags		stages			= subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
643 		const vector<deUint32>			flags			(6, caseDef.pipelineShaderStageCreateFlags);
644 		const struct internalDataStruct	internalData	=
645 		{
646 			&context,
647 			caseDef,
648 			0u,
649 		};
650 
651 		return subgroups::allRayTracingStagesRequiredSubgroupSize(context,
652 																  VK_FORMAT_R32_UINT,
653 																  DE_NULL,
654 																  0,
655 																  &internalData,
656 																  checkVertexPipelineStages,
657 																  stages,
658 																  flags.data(),
659 																  DE_NULL);
660 	}
661 	else
662 		TCU_THROW(InternalError, "Unknown stage or invalid stage set");
663 }
664 
testRequireFullSubgroups(Context & context,const CaseDefinition caseDef)665 TestStatus testRequireFullSubgroups (Context& context, const CaseDefinition caseDef)
666 {
667 	DE_ASSERT(VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage);
668 	DE_ASSERT(caseDef.requiredSubgroupSizeMode == REQUIRED_SUBGROUP_SIZE_NONE);
669 
670 	const deUint32												numWorkgroups[3]							= {1, 1, 1};
671 	const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&		subgroupSizeControlProperties				= context.getSubgroupSizeControlPropertiesEXT();
672 	const VkPhysicalDeviceProperties&							physicalDeviceProperties					= context.getDeviceProperties();
673 	// Calculate the local workgroup sizes to exercise the maximum supported by the driver
674 	const UVec3													localSize									= getLocalSizes(physicalDeviceProperties, physicalDeviceProperties.limits.maxComputeWorkGroupInvocations);
675 	const deUint32												subgroupSize								= subgroups::getSubgroupSize(context);
676 	// For full subgroups and allow varying subgroup size, localsize X must be a multiple of maxSubgroupSize.
677 	// We set local size X for this test to the maximum, regardless if allow varying subgroup size is enabled or not.
678 	const deUint32												localSizesToTestCount						= 7;
679 	const deUint32												localSizesToTest[localSizesToTestCount][3]	=
680 	{
681 		{subgroupSizeControlProperties.maxSubgroupSize, 1, 1},
682 		{subgroupSizeControlProperties.maxSubgroupSize, 4, 1},
683 		{subgroupSizeControlProperties.maxSubgroupSize, 1, 4},
684 		{subgroupSizeControlProperties.maxSubgroupSize * 2, 1, 2},
685 		{subgroupSizeControlProperties.maxSubgroupSize * 4, 1, 1},
686 		{localSize.x(), localSize.y(), localSize.z()},
687 		{1, 1, 1} // Isn't used, just here to make double buffering checks easier
688 	};
689 	const struct internalDataStruct								internalData								=
690 	{
691 		&context,
692 		caseDef,
693 		subgroupSize,
694 	};
695 
696 	return subgroups::makeComputeTestRequiredSubgroupSize(context,
697 														  VK_FORMAT_R32G32B32A32_UINT,
698 														  DE_NULL,
699 														  0,
700 														  &internalData,
701 														  checkComputeRequireFull,
702 														  caseDef.pipelineShaderStageCreateFlags,
703 														  numWorkgroups,
704 														  DE_FALSE,
705 														  subgroupSize,
706 														  localSizesToTest,
707 														  localSizesToTestCount);
708 }
709 
testRequireSubgroupSize(Context & context,const CaseDefinition caseDef)710 TestStatus testRequireSubgroupSize (Context& context, const CaseDefinition caseDef)
711 {
712 	if (isAllComputeStages(caseDef.shaderStage))
713 	{
714 		const deUint32											numWorkgroups[3]							= {1, 1, 1};
715 		const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties				= context.getSubgroupSizeControlPropertiesEXT();
716 		const VkPhysicalDeviceProperties&						physicalDeviceProperties					= context.getDeviceProperties();
717 		const deUint32											requiredSubgroupSize						= getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
718 		const deUint64											maxSubgroupLimitSize						= (deUint64)requiredSubgroupSize * subgroupSizeControlProperties.maxComputeWorkgroupSubgroups;
719 		const deUint32											maxTotalLocalSize							= (deUint32)min<deUint64>(maxSubgroupLimitSize, physicalDeviceProperties.limits.maxComputeWorkGroupInvocations);
720 		const UVec3												localSize									= getLocalSizes(physicalDeviceProperties, maxTotalLocalSize);
721 		const deUint32											localSizesToTestCount						= 5;
722 		const deUint32											localSizesToTest[localSizesToTestCount][3]	=
723 		{
724 			{requiredSubgroupSize, 1, 1},
725 			{1, requiredSubgroupSize, 1},
726 			{1, 1, requiredSubgroupSize},
727 			{localSize.x(), localSize.y(), localSize.z()},
728 			{1, 1, 1} // Isn't used, just here to make double buffering checks easier
729 		};
730 		struct internalDataStruct								internalData								=
731 		{
732 			&context,				//  const Context*			context;
733 			caseDef,				//  struct CaseDefinition	caseDef;
734 			requiredSubgroupSize,	//  deUint32				requiredSubgroupSize;
735 		};
736 
737 		// Depending on the flag we need to run one verification function or another.
738 		return subgroups::makeComputeTestRequiredSubgroupSize(context,
739 															  VK_FORMAT_R32G32B32A32_UINT,
740 															  DE_NULL,
741 															  0,
742 															  &internalData,
743 															  caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT ? checkComputeRequireFull : checkCompute,
744 															  caseDef.pipelineShaderStageCreateFlags,
745 															  numWorkgroups,
746 															  DE_TRUE,
747 															  requiredSubgroupSize,
748 															  localSizesToTest,
749 															  localSizesToTestCount);
750 	}
751 	else if (isAllGraphicsStages(caseDef.shaderStage))
752 	{
753 		const VkShaderStageFlags								stages							= subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
754 		const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties	= context.getSubgroupSizeControlPropertiesEXT();
755 		const deUint32											requiredSubgroupSize			= getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
756 		const deUint32											requiredSubgroupSizes[5]		= { requiredSubgroupSize, requiredSubgroupSize, requiredSubgroupSize, requiredSubgroupSize, requiredSubgroupSize};
757 		const struct internalDataStruct							internalData					=
758 		{
759 			&context,				//  const Context*			context;
760 			caseDef,				//  struct CaseDefinition	caseDef;
761 			requiredSubgroupSize,	//  deUint32				requiredSubgroupSize;
762 		};
763 
764 		return subgroups::allStagesRequiredSubgroupSize(context,
765 														VK_FORMAT_R32_UINT,
766 														DE_NULL,
767 														0,
768 														&internalData,
769 														checkVertexPipelineStages,
770 														stages,
771 														caseDef.pipelineShaderStageCreateFlags,
772 														caseDef.pipelineShaderStageCreateFlags,
773 														caseDef.pipelineShaderStageCreateFlags,
774 														caseDef.pipelineShaderStageCreateFlags,
775 														caseDef.pipelineShaderStageCreateFlags,
776 														requiredSubgroupSizes);
777 	}
778 	else if (isAllRayTracingStages(caseDef.shaderStage))
779 	{
780 		const VkShaderStageFlags								stages							= subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
781 		const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties	= context.getSubgroupSizeControlPropertiesEXT();
782 		const deUint32											requiredSubgroupSize			= getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
783 		const vector<deUint32>									flags							(6, caseDef.pipelineShaderStageCreateFlags);
784 		const vector<deUint32>									requiredSubgroupSizes			(6, requiredSubgroupSize);
785 		const struct internalDataStruct							internalData					=
786 		{
787 			&context,				//  const Context*			context;
788 			caseDef,				//  struct CaseDefinition	caseDef;
789 			requiredSubgroupSize,	//  deUint32				requiredSubgroupSize;
790 		};
791 
792 		return subgroups::allRayTracingStagesRequiredSubgroupSize(context,
793 																  VK_FORMAT_R32_UINT,
794 																  DE_NULL,
795 																  0,
796 																  &internalData,
797 																  checkVertexPipelineStages,
798 																  stages,
799 																  flags.data(),
800 																  requiredSubgroupSizes.data());
801 	}
802 	else
803 		TCU_THROW(InternalError, "Unknown stage or invalid stage set");
804 }
805 
noSSBOtestRequireSubgroupSize(Context & context,const CaseDefinition caseDef)806 TestStatus noSSBOtestRequireSubgroupSize (Context& context, const CaseDefinition caseDef)
807 {
808 	const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties	= context.getSubgroupSizeControlPropertiesEXT();
809 	const deUint32											requiredSubgroupSize			= getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
810 	const VkFormat											format							= VK_FORMAT_R32_UINT;
811 	const deUint32&											flags							= caseDef.pipelineShaderStageCreateFlags;
812 	const deUint32&											size							= requiredSubgroupSize;
813 	struct internalDataStruct								internalData					=
814 	{
815 		&context,
816 		caseDef,
817 		requiredSubgroupSize,
818 	};
819 
820 	switch (caseDef.shaderStage)
821 	{
822 		case VK_SHADER_STAGE_VERTEX_BIT:					return subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, flags, size);
823 		case VK_SHADER_STAGE_GEOMETRY_BIT:					return subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, flags, size);
824 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:		return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, size);
825 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:	return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, size);
826 		case VK_SHADER_STAGE_FRAGMENT_BIT:					return subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkFragmentPipelineStages, flags, size);
827 		default:											TCU_THROW(InternalError, "Unhandled shader stage");
828 	}
829 }
830 
testSanitySubgroupSizeProperties(Context & context)831 TestStatus testSanitySubgroupSizeProperties (Context& context)
832 {
833 	VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
834 	subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
835 	subgroupSizeControlProperties.pNext = DE_NULL;
836 
837 	VkPhysicalDeviceSubgroupProperties subgroupProperties;
838 	subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
839 	subgroupProperties.pNext = &subgroupSizeControlProperties;
840 
841 	VkPhysicalDeviceProperties2 properties;
842 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
843 	properties.pNext = &subgroupProperties;
844 
845 	context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
846 
847 	if (subgroupProperties.subgroupSize > subgroupSizeControlProperties.maxSubgroupSize ||
848 		subgroupProperties.subgroupSize < subgroupSizeControlProperties.minSubgroupSize)
849 	{
850 		ostringstream error;
851 		error << "subgroupSize (" << subgroupProperties.subgroupSize << ") is not between maxSubgroupSize (";
852 		error << subgroupSizeControlProperties.maxSubgroupSize << ") and minSubgroupSize (";
853 		error << subgroupSizeControlProperties.minSubgroupSize << ")";
854 
855 		return TestStatus::fail(error.str().c_str());
856 	}
857 
858 	return TestStatus::pass("OK");
859 }
860 }
861 
862 namespace vkt
863 {
864 namespace subgroups
865 {
createSubgroupsSizeControlTests(TestContext & testCtx)866 TestCaseGroup* createSubgroupsSizeControlTests (TestContext& testCtx)
867 {
868 	de::MovePtr<TestCaseGroup>	group				(new TestCaseGroup(testCtx, "size_control", "VK_EXT_subgroup_size_control tests"));
869 	de::MovePtr<TestCaseGroup>	framebufferGroup	(new TestCaseGroup(testCtx, "framebuffer", "Subgroup size control category tests: framebuffer"));
870 	de::MovePtr<TestCaseGroup>	computeGroup		(new TestCaseGroup(testCtx, "compute", "Subgroup size control category tests: compute"));
871 	de::MovePtr<TestCaseGroup>	graphicsGroup		(new TestCaseGroup(testCtx, "graphics", "Subgroup size control category tests: graphics"));
872 	de::MovePtr<TestCaseGroup>	raytracingGroup		(new TestCaseGroup(testCtx, "ray_tracing", "Subgroup size control category tests: ray tracing"));
873 	de::MovePtr<TestCaseGroup>	genericGroup		(new TestCaseGroup(testCtx, "generic", "Subgroup size control category tests: generic"));
874 	const VkShaderStageFlags	stages[]			=
875 	{
876 		VK_SHADER_STAGE_VERTEX_BIT,
877 		VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
878 		VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
879 		VK_SHADER_STAGE_GEOMETRY_BIT,
880 		VK_SHADER_STAGE_FRAGMENT_BIT,
881 	};
882 
883 	// Test sanity of the subgroup size properties.
884 	{
885 		addFunctionCase(genericGroup.get(), "subgroup_size_properties", "", supportedCheck, testSanitySubgroupSizeProperties);
886 	}
887 
888 	// Allow varying subgroup case.
889 	{
890 		const CaseDefinition caseDefCompute = {VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, VK_SHADER_STAGE_COMPUTE_BIT, DE_FALSE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool)};
891 		addFunctionCaseWithPrograms(computeGroup.get(), "allow_varying_subgroup_size", "", supportedCheckFeatures, initPrograms, test, caseDefCompute);
892 		const CaseDefinition caseDefAllGraphics = {VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, VK_SHADER_STAGE_ALL_GRAPHICS, DE_FALSE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool)};
893 		addFunctionCaseWithPrograms(graphicsGroup.get(), "allow_varying_subgroup_size", "", supportedCheckFeaturesShader, initPrograms, test, caseDefAllGraphics);
894 		const CaseDefinition caseDefAllRaytracing = {VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, SHADER_STAGE_ALL_RAY_TRACING, DE_FALSE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool)};
895 		addFunctionCaseWithPrograms(raytracingGroup.get(), "allow_varying_subgroup_size", "", supportedCheckFeaturesShader, initPrograms, test, caseDefAllRaytracing);
896 
897 		for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
898 		{
899 			const CaseDefinition caseDefStage = {VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, stages[stageIndex], DE_FALSE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool)};
900 			addFunctionCaseWithPrograms(framebufferGroup.get(),  getShaderStageName(caseDefStage.shaderStage) + "_allow_varying_subgroup_size", "", supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtest, caseDefStage);
901 		}
902 	}
903 
904 	// Require full subgroups case (only compute shaders).
905 	{
906 		const CaseDefinition caseDef = {VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT, VK_SHADER_STAGE_COMPUTE_BIT, DE_TRUE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool)};
907 		addFunctionCaseWithPrograms(computeGroup.get(), "require_full_subgroups", "", supportedCheckFeatures, initProgramsRequireFull, testRequireFullSubgroups, caseDef);
908 	}
909 
910 	// Require full subgroups together with allow varying subgroup (only compute shaders).
911 	{
912 		deUint32 flags = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT | VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
913 		const CaseDefinition caseDef = {flags, VK_SHADER_STAGE_COMPUTE_BIT, DE_TRUE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool)};
914 		addFunctionCaseWithPrograms(computeGroup.get(), "require_full_subgroups_allow_varying_subgroup_size", "", supportedCheckFeatures, initProgramsRequireFull, testRequireFullSubgroups, caseDef);
915 	}
916 
917 	// Tests to check setting a required subgroup size value.
918 	{
919 		const CaseDefinition caseDefAllGraphicsMax = {0u, VK_SHADER_STAGE_ALL_GRAPHICS, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool)};
920 		addFunctionCaseWithPrograms(graphicsGroup.get(), "required_subgroup_size_max", "", supportedCheckFeaturesShader, initPrograms, testRequireSubgroupSize, caseDefAllGraphicsMax);
921 		const CaseDefinition caseDefComputeMax = {0u, VK_SHADER_STAGE_COMPUTE_BIT, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool)};
922 		addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_max", "", supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefComputeMax);
923 		const CaseDefinition caseDefAllRaytracingMax = {0u, SHADER_STAGE_ALL_RAY_TRACING, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool)};
924 		addFunctionCaseWithPrograms(raytracingGroup.get(), "required_subgroup_size_max", "", supportedCheckFeaturesShader, initPrograms, testRequireSubgroupSize, caseDefAllRaytracingMax);
925 
926 		const CaseDefinition caseDefAllGraphicsMin = {0u, VK_SHADER_STAGE_ALL_GRAPHICS, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool)};
927 		addFunctionCaseWithPrograms(graphicsGroup.get(), "required_subgroup_size_min", "", supportedCheckFeaturesShader, initPrograms, testRequireSubgroupSize, caseDefAllGraphicsMin);
928 		const CaseDefinition caseDefComputeMin = {0u, VK_SHADER_STAGE_COMPUTE_BIT, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool)};
929 		addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_min", "", supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefComputeMin);
930 		const CaseDefinition caseDefAllRaytracingMin = {0u, SHADER_STAGE_ALL_RAY_TRACING, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool)};
931 		addFunctionCaseWithPrograms(raytracingGroup.get(), "required_subgroup_size_min", "", supportedCheckFeaturesShader, initPrograms, testRequireSubgroupSize, caseDefAllRaytracingMin);
932 		for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
933 		{
934 			const CaseDefinition caseDefStageMax = {0u, stages[stageIndex], DE_FALSE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool)};
935 			addFunctionCaseWithPrograms(framebufferGroup.get(),  getShaderStageName(caseDefStageMax.shaderStage) + "_required_subgroup_size_max", "", supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtestRequireSubgroupSize, caseDefStageMax);
936 			const CaseDefinition caseDefStageMin = {0u, stages[stageIndex], DE_FALSE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool)};
937 			addFunctionCaseWithPrograms(framebufferGroup.get(),  getShaderStageName(caseDefStageMin.shaderStage) + "_required_subgroup_size_min", "", supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtestRequireSubgroupSize, caseDefStageMin);
938 		}
939 	}
940 
941 	// Tests to check setting a required subgroup size value, together with require full subgroups (only compute shaders).
942 	{
943 		deUint32 flags = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT;
944 		const CaseDefinition caseDefMax = {flags, VK_SHADER_STAGE_COMPUTE_BIT, DE_TRUE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool)};
945 		addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_max_require_full_subgroups", "", supportedCheckFeatures, initProgramsRequireFull, testRequireSubgroupSize, caseDefMax);
946 		const CaseDefinition caseDefMin = {flags, VK_SHADER_STAGE_COMPUTE_BIT, DE_TRUE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool)};
947 		addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_min_require_full_subgroups", "", supportedCheckFeatures, initProgramsRequireFull, testRequireSubgroupSize, caseDefMin);
948 	}
949 
950 	group->addChild(genericGroup.release());
951 	group->addChild(graphicsGroup.release());
952 	group->addChild(computeGroup.release());
953 	group->addChild(framebufferGroup.release());
954 	group->addChild(raytracingGroup.release());
955 
956 	return group.release();
957 }
958 
959 } // subgroups
960 } // vkt
961