• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Google Inc.
7  * Copyright (c) 2017 Codeplay Software Ltd.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests
24  */ /*--------------------------------------------------------------------*/
25 
26 #include "vktSubgroupsBasicTests.hpp"
27 #include "vktSubgroupsTestsUtils.hpp"
28 
29 #include <string>
30 #include <vector>
31 
32 using namespace tcu;
33 using namespace std;
34 using namespace vk;
35 using namespace vkt;
36 
37 namespace
38 {
39 enum OpType
40 {
41 	OPTYPE_ELECT = 0,
42 	OPTYPE_SUBGROUP_BARRIER,
43 	OPTYPE_SUBGROUP_MEMORY_BARRIER,
44 	OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER,
45 	OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED,
46 	OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE,
47 	OPTYPE_LAST
48 };
49 
50 struct CaseDefinition
51 {
52 	OpType				opType;
53 	VkShaderStageFlags	shaderStage;
54 	de::SharedPtr<bool>	geometryPointSizeSupported;
55 	deBool				requiredSubgroupSize;
56 };
57 
58 static const deUint32		ELECTED_VALUE		= 42u;
59 static const deUint32		UNELECTED_VALUE		= 13u;
60 static const VkDeviceSize	SHADER_BUFFER_SIZE	= 4096ull; // min(maxUniformBufferRange, maxImageDimension1D)
61 
_checkFragmentSubgroupBarriersNoSSBO(vector<const void * > datas,deUint32 width,deUint32 height,bool withImage)62 static bool _checkFragmentSubgroupBarriersNoSSBO (vector<const void*>	datas,
63 												  deUint32				width,
64 												  deUint32				height,
65 												  bool					withImage)
66 {
67 	const float* const	resultData	= reinterpret_cast<const float*>(datas[0]);
68 
69 	for (deUint32 x = 0u; x < width; ++x)
70 	{
71 		for (deUint32 y = 0u; y < height; ++y)
72 		{
73 			const deUint32 ndx = (x * height + y) * 4u;
74 
75 			if (!withImage && 0.0f == resultData[ndx])
76 			{
77 				return false;
78 			}
79 			else if (1.0f == resultData[ndx +2])
80 			{
81 				if(resultData[ndx] != resultData[ndx +1])
82 				{
83 					return false;
84 				}
85 			}
86 			else if (resultData[ndx] != resultData[ndx +3])
87 			{
88 				return false;
89 			}
90 		}
91 	}
92 
93 	return true;
94 }
95 
checkFragmentSubgroupBarriersNoSSBO(const void * internalData,vector<const void * > datas,deUint32 width,deUint32 height,deUint32)96 static bool checkFragmentSubgroupBarriersNoSSBO (const void				*internalData,
97 												 vector<const void*>	datas,
98 												 deUint32				width,
99 												 deUint32				height,
100 												 deUint32)
101 {
102 	DE_UNREF(internalData);
103 
104 	return _checkFragmentSubgroupBarriersNoSSBO(datas, width, height, false);
105 }
106 
checkFragmentSubgroupBarriersWithImageNoSSBO(const void * internalData,vector<const void * > datas,deUint32 width,deUint32 height,deUint32)107 static bool checkFragmentSubgroupBarriersWithImageNoSSBO (const void*			internalData,
108 														  vector<const void*>	datas,
109 														  deUint32				width,
110 														  deUint32				height,
111 														  deUint32)
112 {
113 	DE_UNREF(internalData);
114 
115 	return _checkFragmentSubgroupBarriersNoSSBO(datas, width, height, true);
116 }
117 
checkVertexPipelineStagesSubgroupElectNoSSBO(const void * internalData,vector<const void * > datas,deUint32 width,deUint32)118 static bool checkVertexPipelineStagesSubgroupElectNoSSBO (const void*			internalData,
119 														  vector<const void*>	datas,
120 														  deUint32				width,
121 														  deUint32)
122 {
123 	DE_UNREF(internalData);
124 
125 	const float* const	resultData			= reinterpret_cast<const float*>(datas[0]);
126 	float				poisonValuesFound	= 0.0f;
127 	float				numSubgroupsUsed	= 0.0f;
128 
129 	for (deUint32 x = 0; x < width; ++x)
130 	{
131 		deUint32 val = static_cast<deUint32>(resultData[x * 2]);
132 		numSubgroupsUsed += resultData[x * 2 + 1];
133 
134 		switch (val)
135 		{
136 			default:
137 				// some garbage value was found!
138 				return false;
139 			case UNELECTED_VALUE:
140 				break;
141 			case ELECTED_VALUE:
142 				poisonValuesFound += 1.0f;
143 				break;
144 		}
145 	}
146 
147 	return numSubgroupsUsed == poisonValuesFound;
148 }
149 
checkVertexPipelineStagesSubgroupElect(const void * internalData,vector<const void * > datas,deUint32 width,deUint32,bool multipleCallsPossible)150 static bool checkVertexPipelineStagesSubgroupElect (const void*				internalData,
151 													vector<const void*>		datas,
152 													deUint32				width,
153 													deUint32,
154 													bool					multipleCallsPossible)
155 {
156 	DE_UNREF(internalData);
157 
158 	const deUint32* const	resultData			= reinterpret_cast<const deUint32*>(datas[0]);
159 	deUint32				poisonValuesFound	= 0;
160 
161 	for (deUint32 x = 0; x < width; ++x)
162 	{
163 		deUint32 val = resultData[x];
164 
165 		switch (val)
166 		{
167 			default:
168 				// some garbage value was found!
169 				return false;
170 			case UNELECTED_VALUE:
171 				break;
172 			case ELECTED_VALUE:
173 				poisonValuesFound++;
174 				break;
175 		}
176 	}
177 
178 	// we used an atomicly incremented counter to note how many subgroups we used for the vertex shader
179 	const deUint32	numSubgroupsUsed	= *reinterpret_cast<const deUint32*>(datas[1]);
180 
181 	return (multipleCallsPossible ? (numSubgroupsUsed >= poisonValuesFound) : (numSubgroupsUsed == poisonValuesFound));
182 }
183 
checkVertexPipelineStagesSubgroupBarriers(const void * internalData,vector<const void * > datas,deUint32 width,deUint32)184 static bool checkVertexPipelineStagesSubgroupBarriers (const void*				internalData,
185 													   vector<const void*>		datas,
186 													   deUint32					width,
187 													   deUint32)
188 {
189 	DE_UNREF(internalData);
190 
191 	const deUint32* const resultData = reinterpret_cast<const deUint32*>(datas[0]);
192 
193 	// We used this SSBO to generate our unique value!
194 	const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[3]);
195 
196 	for (deUint32 x = 0; x < width; ++x)
197 	{
198 		deUint32 val = resultData[x];
199 
200 		if (val != ref)
201 			return false;
202 	}
203 
204 	return true;
205 }
206 
_checkVertexPipelineStagesSubgroupBarriersNoSSBO(vector<const void * > datas,deUint32 width,bool withImage)207 static bool _checkVertexPipelineStagesSubgroupBarriersNoSSBO (vector<const void*>	datas,
208 															  deUint32				width,
209 															  bool					withImage)
210 {
211 	const float* const	resultData	= reinterpret_cast<const float*>(datas[0]);
212 
213 	for (deUint32 x = 0u; x < width; ++x)
214 	{
215 		const deUint32 ndx = x*4u;
216 		if (!withImage && 0.0f == resultData[ndx])
217 		{
218 			return false;
219 		}
220 		else if (1.0f == resultData[ndx +2])
221 		{
222 			if(resultData[ndx] != resultData[ndx +1])
223 				return false;
224 		}
225 		else if (resultData[ndx] != resultData[ndx +3])
226 		{
227 			return false;
228 		}
229 	}
230 
231 	return true;
232 }
233 
checkVertexPipelineStagesSubgroupBarriersNoSSBO(const void * internalData,vector<const void * > datas,deUint32 width,deUint32)234 static bool checkVertexPipelineStagesSubgroupBarriersNoSSBO (const void*			internalData,
235 															 vector<const void*>	datas,
236 															 deUint32				width,
237 															 deUint32)
238 {
239 	DE_UNREF(internalData);
240 
241 	return _checkVertexPipelineStagesSubgroupBarriersNoSSBO(datas, width, false);
242 }
243 
checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO(const void * internalData,vector<const void * > datas,deUint32 width,deUint32)244 static bool checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO (const void*			internalData,
245 																	  vector<const void*>	datas,
246 																	  deUint32				width,
247 																	  deUint32)
248 {
249 	DE_UNREF(internalData);
250 
251 	return _checkVertexPipelineStagesSubgroupBarriersNoSSBO(datas, width, true);
252 }
253 
_checkTessellationEvaluationSubgroupBarriersNoSSBO(vector<const void * > datas,deUint32 width,deUint32,bool withImage)254 static bool _checkTessellationEvaluationSubgroupBarriersNoSSBO (vector<const void*>	datas,
255 																deUint32			width,
256 																deUint32,
257 																bool				withImage)
258 {
259 	const float* const	resultData	= reinterpret_cast<const float*>(datas[0]);
260 
261 	for (deUint32 x = 0u; x < width; ++x)
262 	{
263 		const deUint32 ndx = x*4u;
264 
265 		if (!withImage && 0.0f == resultData[ndx])
266 		{
267 			return false;
268 		}
269 		else if (0.0f == resultData[ndx +2] && resultData[ndx] != resultData[ndx +3])
270 		{
271 			return false;
272 		}
273 	}
274 
275 	return true;
276 }
277 
checkTessellationEvaluationSubgroupBarriersWithImageNoSSBO(const void * internalData,vector<const void * > datas,deUint32 width,deUint32 height)278 static bool checkTessellationEvaluationSubgroupBarriersWithImageNoSSBO (const void*			internalData,
279 																		vector<const void*>	datas,
280 																		deUint32			width,
281 																		deUint32			height)
282 {
283 	DE_UNREF(internalData);
284 
285 	return _checkTessellationEvaluationSubgroupBarriersNoSSBO(datas, width, height, true);
286 }
287 
checkTessellationEvaluationSubgroupBarriersNoSSBO(const void * internalData,vector<const void * > datas,deUint32 width,deUint32 height)288 static bool checkTessellationEvaluationSubgroupBarriersNoSSBO (const void*				internalData,
289 															   vector<const void*>		datas,
290 															   deUint32					width,
291 															   deUint32					height)
292 {
293 	DE_UNREF(internalData);
294 
295 	return _checkTessellationEvaluationSubgroupBarriersNoSSBO (datas, width, height, false);
296 }
297 
checkComputeSubgroupElect(const void * internalData,vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32)298 static bool checkComputeSubgroupElect (const void*				internalData,
299 									   vector<const void*>		datas,
300 									   const deUint32			numWorkgroups[3],
301 									   const deUint32			localSize[3],
302 									   deUint32)
303 {
304 	DE_UNREF(internalData);
305 
306 	return subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
307 }
308 
checkComputeSubgroupBarriers(const void * internalData,vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32)309 static bool checkComputeSubgroupBarriers (const void*			internalData,
310 										  vector<const void*>	datas,
311 										  const deUint32		numWorkgroups[3],
312 										  const deUint32		localSize[3],
313 										  deUint32)
314 {
315 	DE_UNREF(internalData);
316 
317 	// We used this SSBO to generate our unique value!
318 	const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[2]);
319 
320 	return subgroups::checkCompute(datas, numWorkgroups, localSize, ref);
321 }
322 
getOpTypeName(OpType opType)323 string getOpTypeName (OpType opType)
324 {
325 	switch (opType)
326 	{
327 		case OPTYPE_ELECT:							return "subgroupElect";
328 		case OPTYPE_SUBGROUP_BARRIER:				return "subgroupBarrier";
329 		case OPTYPE_SUBGROUP_MEMORY_BARRIER:		return "subgroupMemoryBarrier";
330 		case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:	return "subgroupMemoryBarrierBuffer";
331 		case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:	return "subgroupMemoryBarrierShared";
332 		case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:	return "subgroupMemoryBarrierImage";
333 		default:									TCU_THROW(InternalError, "Unsupported op type");
334 	}
335 }
336 
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)337 void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
338 {
339 	const ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
340 	const SpirVAsmBuildOptions	buildOptionsSpr	(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3);
341 
342 	if (VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
343 	{
344 		/*
345 			"layout(location = 0) in vec4 in_color;\n"
346 			"layout(location = 0) out vec4 out_color;\n"
347 			"void main()\n"
348 			{\n"
349 			"	out_color = in_color;\n"
350 			"}\n";
351 		*/
352 		const string fragment =
353 			"; SPIR-V\n"
354 			"; Version: 1.3\n"
355 			"; Generator: Khronos Glslang Reference Front End; 2\n"
356 			"; Bound: 13\n"
357 			"; Schema: 0\n"
358 			"OpCapability Shader\n"
359 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
360 			"OpMemoryModel Logical GLSL450\n"
361 			"OpEntryPoint Fragment %4 \"main\" %9 %11\n"
362 			"OpExecutionMode %4 OriginUpperLeft\n"
363 			"OpDecorate %9 Location 0\n"
364 			"OpDecorate %11 Location 0\n"
365 			"%2 = OpTypeVoid\n"
366 			"%3 = OpTypeFunction %2\n"
367 			"%6 = OpTypeFloat 32\n"
368 			"%7 = OpTypeVector %6 4\n"
369 			"%8 = OpTypePointer Output %7\n"
370 			"%9 = OpVariable %8 Output\n"
371 			"%10 = OpTypePointer Input %7\n"
372 			"%11 = OpVariable %10 Input\n"
373 			"%4 = OpFunction %2 None %3\n"
374 			"%5 = OpLabel\n"
375 			"%12 = OpLoad %7 %11\n"
376 			"OpStore %9 %12\n"
377 			"OpReturn\n"
378 			"OpFunctionEnd\n";
379 
380 		programCollection.spirvAsmSources.add("fragment") << fragment;
381 	}
382 	if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
383 	{
384 		/*
385 			"#version 450\n"
386 			"void main (void)\n"
387 			"{\n"
388 			"  vec2 uv = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2);\n"
389 			"  gl_Position = vec4(uv * 2.0f + -1.0f, 0.0f, 1.0f);\n"
390 			"  gl_PointSize = 1.0f;\n"
391 			"}\n";
392 		*/
393 		const string vertex =
394 			"; SPIR-V\n"
395 			"; Version: 1.3\n"
396 			"; Generator: Khronos Glslang Reference Front End; 2\n"
397 			"; Bound: 44\n"
398 			"; Schema: 0\n"
399 			"OpCapability Shader\n"
400 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
401 			"OpMemoryModel Logical GLSL450\n"
402 			"OpEntryPoint Vertex %4 \"main\" %12 %29\n"
403 			"OpDecorate %12 BuiltIn VertexIndex\n"
404 			"OpMemberDecorate %27 0 BuiltIn Position\n"
405 			"OpMemberDecorate %27 1 BuiltIn PointSize\n"
406 			"OpMemberDecorate %27 2 BuiltIn ClipDistance\n"
407 			"OpMemberDecorate %27 3 BuiltIn CullDistance\n"
408 			"OpDecorate %27 Block\n"
409 			"%2 = OpTypeVoid\n"
410 			"%3 = OpTypeFunction %2\n"
411 			"%6 = OpTypeFloat 32\n"
412 			"%7 = OpTypeVector %6 2\n"
413 			"%8 = OpTypePointer Function %7\n"
414 			"%10 = OpTypeInt 32 1\n"
415 			"%11 = OpTypePointer Input %10\n"
416 			"%12 = OpVariable %11 Input\n"
417 			"%14 = OpConstant %10 1\n"
418 			"%16 = OpConstant %10 2\n"
419 			"%23 = OpTypeVector %6 4\n"
420 			"%24 = OpTypeInt 32 0\n"
421 			"%25 = OpConstant %24 1\n"
422 			"%26 = OpTypeArray %6 %25\n"
423 			"%27 = OpTypeStruct %23 %6 %26 %26\n"
424 			"%28 = OpTypePointer Output %27\n"
425 			"%29 = OpVariable %28 Output\n"
426 			"%30 = OpConstant %10 0\n"
427 			"%32 = OpConstant %6 2\n"
428 			"%34 = OpConstant %6 -1\n"
429 			"%37 = OpConstant %6 0\n"
430 			"%38 = OpConstant %6 1\n"
431 			"%42 = OpTypePointer Output %23\n"
432 			"%44 = OpTypePointer Output %6\n"
433 			"%4 = OpFunction %2 None %3\n"
434 			"%5 = OpLabel\n"
435 			"%9 = OpVariable %8 Function\n"
436 			"%13 = OpLoad %10 %12\n"
437 			"%15 = OpShiftLeftLogical %10 %13 %14\n"
438 			"%17 = OpBitwiseAnd %10 %15 %16\n"
439 			"%18 = OpConvertSToF %6 %17\n"
440 			"%19 = OpLoad %10 %12\n"
441 			"%20 = OpBitwiseAnd %10 %19 %16\n"
442 			"%21 = OpConvertSToF %6 %20\n"
443 			"%22 = OpCompositeConstruct %7 %18 %21\n"
444 			"OpStore %9 %22\n"
445 			"%31 = OpLoad %7 %9\n"
446 			"%33 = OpVectorTimesScalar %7 %31 %32\n"
447 			"%35 = OpCompositeConstruct %7 %34 %34\n"
448 			"%36 = OpFAdd %7 %33 %35\n"
449 			"%39 = OpCompositeExtract %6 %36 0\n"
450 			"%40 = OpCompositeExtract %6 %36 1\n"
451 			"%41 = OpCompositeConstruct %23 %39 %40 %37 %38\n"
452 			"%43 = OpAccessChain %42 %29 %30\n"
453 			"OpStore %43 %41\n"
454 			"%45 = OpAccessChain %44 %29 %14\n"
455 			"OpStore %45 %38\n"
456 			"OpReturn\n"
457 			"OpFunctionEnd\n";
458 
459 		programCollection.spirvAsmSources.add("vert") << vertex;
460 	}
461 	else if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
462 	{
463 		subgroups::setVertexShaderFrameBuffer(programCollection);
464 	}
465 
466 	if (OPTYPE_ELECT == caseDef.opType)
467 	{
468 		ostringstream electedValue ;
469 		ostringstream unelectedValue;
470 
471 		electedValue << ELECTED_VALUE;
472 		unelectedValue << UNELECTED_VALUE;
473 
474 		if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
475 		{
476 			/*
477 				"#extension GL_KHR_shader_subgroup_basic: enable\n"
478 				"layout(location = 0) out vec4 out_color;\n"
479 				"layout(location = 0) in highp vec4 in_position;\n"
480 				"\n"
481 				"void main (void)\n"
482 				"{\n"
483 				"  if (subgroupElect())\n"
484 				"  {\n"
485 				"    out_color.r = " << ELECTED_VALUE << ";\n"
486 				"    out_color.g = 1.0f;\n"
487 				"  }\n"
488 				"  else\n"
489 				"  {\n"
490 				"    out_color.r = " << UNELECTED_VALUE << ";\n"
491 				"    out_color.g = 0.0f;\n"
492 				"  }\n"
493 				"  gl_Position = in_position;\n"
494 				"  gl_PointSize = 1.0f;\n"
495 				"}\n";
496 			*/
497 			const string vertex =
498 				"; SPIR-V\n"
499 				"; Version: 1.3\n"
500 				"; Generator: Khronos Glslang Reference Front End; 2\n"
501 				"; Bound: 38\n"
502 				"; Schema: 0\n"
503 				"OpCapability Shader\n"
504 				"OpCapability GroupNonUniform\n"
505 				"%1 = OpExtInstImport \"GLSL.std.450\"\n"
506 				"OpMemoryModel Logical GLSL450\n"
507 				"OpEntryPoint Vertex %4 \"main\" %15 %31 %35\n"
508 				"OpDecorate %15 Location 0\n"
509 				"OpMemberDecorate %29 0 BuiltIn Position\n"
510 				"OpMemberDecorate %29 1 BuiltIn PointSize\n"
511 				"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
512 				"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
513 				"OpDecorate %29 Block\n"
514 				"OpDecorate %35 Location 0\n"
515 				"%2 = OpTypeVoid\n"
516 				"%3 = OpTypeFunction %2\n"
517 				"%6 = OpTypeBool\n"
518 				"%7 = OpTypeInt 32 0\n"
519 				"%8 = OpConstant %7 3\n"
520 				"%12 = OpTypeFloat 32\n"
521 				"%13 = OpTypeVector %12 4\n"
522 				"%14 = OpTypePointer Output %13\n"
523 				"%15 = OpVariable %14 Output\n"
524 				"%16 = OpConstant %12 " + electedValue.str() + "\n"
525 				"%17 = OpConstant %7 0\n"
526 				"%18 = OpTypePointer Output %12\n"
527 				"%20 = OpConstant %12 1\n"
528 				"%21 = OpConstant %7 1\n"
529 				"%24 = OpConstant %12 " + unelectedValue.str() + "\n"
530 				"%26 = OpConstant %12 0\n"
531 				"%28 = OpTypeArray %12 %21\n"
532 				"%29 = OpTypeStruct %13 %12 %28 %28\n"
533 				"%30 = OpTypePointer Output %29\n"
534 				"%31 = OpVariable %30 Output\n"
535 				"%32 = OpTypeInt 32 1\n"
536 				"%33 = OpConstant %32 0\n"
537 				"%34 = OpTypePointer Input %13\n"
538 				"%35 = OpVariable %34 Input\n"
539 				"%38 = OpConstant %32 1\n"
540 				"%4 = OpFunction %2 None %3\n"
541 				"%5 = OpLabel\n"
542 				"%9 = OpGroupNonUniformElect %6 %8\n"
543 				"OpSelectionMerge %11 None\n"
544 				"OpBranchConditional %9 %10 %23\n"
545 				"%10 = OpLabel\n"
546 				"%19 = OpAccessChain %18 %15 %17\n"
547 				"OpStore %19 %16\n"
548 				"%22 = OpAccessChain %18 %15 %21\n"
549 				"OpStore %22 %20\n"
550 				"OpBranch %11\n"
551 				"%23 = OpLabel\n"
552 				"%25 = OpAccessChain %18 %15 %17\n"
553 				"OpStore %25 %24\n"
554 				"%27 = OpAccessChain %18 %15 %21\n"
555 				"OpStore %27 %26\n"
556 				"OpBranch %11\n"
557 				"%11 = OpLabel\n"
558 				"%36 = OpLoad %13 %35\n"
559 				"%37 = OpAccessChain %14 %31 %33\n"
560 				"OpStore %37 %36\n"
561 				"%39 = OpAccessChain %18 %31 %38\n"
562 				"OpStore %39 %20\n"
563 				"OpReturn\n"
564 				"OpFunctionEnd\n";
565 
566 			programCollection.spirvAsmSources.add("vert") << vertex << buildOptionsSpr;
567 		}
568 		else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
569 		{
570 			/*
571 				"#version 450\n"
572 				"#extension GL_KHR_shader_subgroup_basic: enable\n"
573 				"layout(points) in;\n"
574 				"layout(points, max_vertices = 1) out;\n"
575 				"layout(location = 0) out vec4 out_color;\n"
576 				"void main (void)\n"
577 				"{\n"
578 				"  if (subgroupElect())\n"
579 				"  {\n"
580 				"    out_color.r = " << ELECTED_VALUE << ";\n"
581 				"    out_color.g = 1.0f;\n"
582 				"  }\n"
583 				"  else\n"
584 				"  {\n"
585 				"    out_color.r = " << UNELECTED_VALUE << ";\n"
586 				"    out_color.g = 0.0f;\n"
587 				"  }\n"
588 				"  gl_Position = gl_in[0].gl_Position;\n"
589 				"  gl_PointSize = gl_in[0].gl_PointSize;\n"
590 				"  EmitVertex();\n"
591 				"  EndPrimitive();\n"
592 				"}\n";
593 			*/
594 			ostringstream geometry;
595 
596 			geometry
597 				<< "; SPIR-V\n"
598 				<< "; Version: 1.3\n"
599 				<< "; Generator: Khronos Glslang Reference Front End; 2\n"
600 				<< "; Bound: 42\n"
601 				<< "; Schema: 0\n"
602 				<< "OpCapability Geometry\n"
603 				<< (*caseDef.geometryPointSizeSupported ?
604 					"OpCapability GeometryPointSize\n" : "")
605 				<< "OpCapability GroupNonUniform\n"
606 				<< "%1 = OpExtInstImport \"GLSL.std.450\"\n"
607 				<< "OpMemoryModel Logical GLSL450\n"
608 				<< "OpEntryPoint Geometry %4 \"main\" %15 %31 %37\n"
609 				<< "OpExecutionMode %4 InputPoints\n"
610 				<< "OpExecutionMode %4 Invocations 1\n"
611 				<< "OpExecutionMode %4 OutputPoints\n"
612 				<< "OpExecutionMode %4 OutputVertices 1\n"
613 				<< "OpDecorate %15 Location 0\n"
614 				<< "OpMemberDecorate %29 0 BuiltIn Position\n"
615 				<< "OpMemberDecorate %29 1 BuiltIn PointSize\n"
616 				<< "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
617 				<< "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
618 				<< "OpDecorate %29 Block\n"
619 				<< "OpMemberDecorate %34 0 BuiltIn Position\n"
620 				<< "OpMemberDecorate %34 1 BuiltIn PointSize\n"
621 				<< "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
622 				<< "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
623 				<< "OpDecorate %34 Block\n"
624 				<< "%2 = OpTypeVoid\n"
625 				<< "%3 = OpTypeFunction %2\n"
626 				<< "%6 = OpTypeBool\n"
627 				<< "%7 = OpTypeInt 32 0\n"
628 				<< "%8 = OpConstant %7 3\n"
629 				<< "%12 = OpTypeFloat 32\n"
630 				<< "%13 = OpTypeVector %12 4\n"
631 				<< "%14 = OpTypePointer Output %13\n"
632 				<< "%15 = OpVariable %14 Output\n"
633 				<< "%16 = OpConstant %12 " << electedValue.str() << "\n"
634 				<< "%17 = OpConstant %7 0\n"
635 				<< "%18 = OpTypePointer Output %12\n"
636 				<< "%20 = OpConstant %12 1\n"
637 				<< "%21 = OpConstant %7 1\n"
638 				<< "%24 = OpConstant %12 " << unelectedValue.str() << "\n"
639 				<< "%26 = OpConstant %12 0\n"
640 				<< "%28 = OpTypeArray %12 %21\n"
641 				<< "%29 = OpTypeStruct %13 %12 %28 %28\n"
642 				<< "%30 = OpTypePointer Output %29\n"
643 				<< "%31 = OpVariable %30 Output\n"
644 				<< "%32 = OpTypeInt 32 1\n"
645 				<< "%33 = OpConstant %32 0\n"
646 				<< "%34 = OpTypeStruct %13 %12 %28 %28\n"
647 				<< "%35 = OpTypeArray %34 %21\n"
648 				<< "%36 = OpTypePointer Input %35\n"
649 				<< "%37 = OpVariable %36 Input\n"
650 				<< "%38 = OpTypePointer Input %13\n"
651 				<< (*caseDef.geometryPointSizeSupported ?
652 					"%42 = OpConstant %32 1\n"
653 					"%43 = OpTypePointer Input %12\n"
654 					"%44 = OpTypePointer Output %12\n" : "")
655 				<< "%4 = OpFunction %2 None %3\n"
656 				<< "%5 = OpLabel\n"
657 				<< "%9 = OpGroupNonUniformElect %6 %8\n"
658 				<< "OpSelectionMerge %11 None\n"
659 				<< "OpBranchConditional %9 %10 %23\n"
660 				<< "%10 = OpLabel\n"
661 				<< "%19 = OpAccessChain %18 %15 %17\n"
662 				<< "OpStore %19 %16\n"
663 				<< "%22 = OpAccessChain %18 %15 %21\n"
664 				<< "OpStore %22 %20\n"
665 				<< "OpBranch %11\n"
666 				<< "%23 = OpLabel\n"
667 				<< "%25 = OpAccessChain %18 %15 %17\n"
668 				<< "OpStore %25 %24\n"
669 				<< "%27 = OpAccessChain %18 %15 %21\n"
670 				<< "OpStore %27 %26\n"
671 				<< "OpBranch %11\n"
672 				<< "%11 = OpLabel\n"
673 				<< "%39 = OpAccessChain %38 %37 %33 %33\n"
674 				<< "%40 = OpLoad %13 %39\n"
675 				<< "%41 = OpAccessChain %14 %31 %33\n"
676 				<< "OpStore %41 %40\n"
677 				<< (*caseDef.geometryPointSizeSupported ?
678 					"%45 = OpAccessChain %43 %37 %33 %42\n"
679 					"%46 = OpLoad %12 %45\n"
680 					"%47 = OpAccessChain %44 %31 %42\n"
681 					"OpStore %47 %46\n" : "" )
682 				<< "OpEmitVertex\n"
683 				<< "OpEndPrimitive\n"
684 				<< "OpReturn\n"
685 				<< "OpFunctionEnd\n";
686 
687 			programCollection.spirvAsmSources.add("geometry") << geometry.str() << buildOptionsSpr;
688 		}
689 		else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
690 		{
691 			/*
692 				<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
693 				<< "#extension GL_EXT_tessellation_shader : require\n"
694 				<< "layout(vertices = 2) out;\n"
695 				<< "void main (void)\n"
696 				<< "{\n"
697 				<< "  if (gl_InvocationID == 0)\n"
698 				<< "  {\n"
699 				<< "    gl_TessLevelOuter[0] = 1.0f;\n"
700 				<< "    gl_TessLevelOuter[1] = 1.0f;\n"
701 				<< "  }\n"
702 				<< "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
703 				<< "}\n";
704 			*/
705 			const string controlSource =
706 				"; SPIR-V\n"
707 				"; Version: 1.3\n"
708 				"; Generator: Khronos Glslang Reference Front End; 2\n"
709 				"; Bound: 46\n"
710 				"; Schema: 0\n"
711 				"OpCapability Tessellation\n"
712 				"%1 = OpExtInstImport \"GLSL.std.450\"\n"
713 				"OpMemoryModel Logical GLSL450\n"
714 				"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
715 				"OpExecutionMode %4 OutputVertices 2\n"
716 				"OpDecorate %8 BuiltIn InvocationId\n"
717 				"OpDecorate %20 Patch\n"
718 				"OpDecorate %20 BuiltIn TessLevelOuter\n"
719 				"OpMemberDecorate %29 0 BuiltIn Position\n"
720 				"OpMemberDecorate %29 1 BuiltIn PointSize\n"
721 				"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
722 				"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
723 				"OpDecorate %29 Block\n"
724 				"OpMemberDecorate %35 0 BuiltIn Position\n"
725 				"OpMemberDecorate %35 1 BuiltIn PointSize\n"
726 				"OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
727 				"OpMemberDecorate %35 3 BuiltIn CullDistance\n"
728 				"OpDecorate %35 Block\n"
729 				"%2 = OpTypeVoid\n"
730 				"%3 = OpTypeFunction %2\n"
731 				"%6 = OpTypeInt 32 1\n"
732 				"%7 = OpTypePointer Input %6\n"
733 				"%8 = OpVariable %7 Input\n"
734 				"%10 = OpConstant %6 0\n"
735 				"%11 = OpTypeBool\n"
736 				"%15 = OpTypeFloat 32\n"
737 				"%16 = OpTypeInt 32 0\n"
738 				"%17 = OpConstant %16 4\n"
739 				"%18 = OpTypeArray %15 %17\n"
740 				"%19 = OpTypePointer Output %18\n"
741 				"%20 = OpVariable %19 Output\n"
742 				"%21 = OpConstant %15 1\n"
743 				"%22 = OpTypePointer Output %15\n"
744 				"%24 = OpConstant %6 1\n"
745 				"%26 = OpTypeVector %15 4\n"
746 				"%27 = OpConstant %16 1\n"
747 				"%28 = OpTypeArray %15 %27\n"
748 				"%29 = OpTypeStruct %26 %15 %28 %28\n"
749 				"%30 = OpConstant %16 2\n"
750 				"%31 = OpTypeArray %29 %30\n"
751 				"%32 = OpTypePointer Output %31\n"
752 				"%33 = OpVariable %32 Output\n"
753 				"%35 = OpTypeStruct %26 %15 %28 %28\n"
754 				"%36 = OpConstant %16 32\n"
755 				"%37 = OpTypeArray %35 %36\n"
756 				"%38 = OpTypePointer Input %37\n"
757 				"%39 = OpVariable %38 Input\n"
758 				"%41 = OpTypePointer Input %26\n"
759 				"%44 = OpTypePointer Output %26\n"
760 				"%4 = OpFunction %2 None %3\n"
761 				"%5 = OpLabel\n"
762 				"%9 = OpLoad %6 %8\n"
763 				"%12 = OpIEqual %11 %9 %10\n"
764 				"OpSelectionMerge %14 None\n"
765 				"OpBranchConditional %12 %13 %14\n"
766 				"%13 = OpLabel\n"
767 				"%23 = OpAccessChain %22 %20 %10\n"
768 				"OpStore %23 %21\n"
769 				"%25 = OpAccessChain %22 %20 %24\n"
770 				"OpStore %25 %21\n"
771 				"OpBranch %14\n"
772 				"%14 = OpLabel\n"
773 				"%34 = OpLoad %6 %8\n"
774 				"%40 = OpLoad %6 %8\n"
775 				"%42 = OpAccessChain %41 %39 %40 %10\n"
776 				"%43 = OpLoad %26 %42\n"
777 				"%45 = OpAccessChain %44 %33 %34 %10\n"
778 				"OpStore %45 %43\n"
779 				"OpReturn\n"
780 				"OpFunctionEnd\n";
781 
782 			programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
783 
784 			/*
785 				"#extension GL_KHR_shader_subgroup_basic: enable\n"
786 				"#extension GL_EXT_tessellation_shader : require\n"
787 				"layout(isolines, equal_spacing, ccw ) in;\n"
788 				"layout(location = 0) out vec4 out_color;\n"
789 				"\n"
790 				"void main (void)\n"
791 				"{\n"
792 				"  if (subgroupElect())\n"
793 				"  {\n"
794 				"    out_color.r = " << 2 * ELECTED_VALUE - UNELECTED_VALUE << ";\n"
795 				"    out_color.g = 2.0f;\n"
796 				"  }\n"
797 				"  else\n"
798 				"  {\n"
799 				"    out_color.r = " << UNELECTED_VALUE << ";\n"
800 				"    out_color.g = 0.0f;\n"
801 				"  }\n"
802 				"  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
803 				"}\n";
804 			*/
805 
806 			const string evaluationSource =
807 				"; SPIR-V\n"
808 				"; Version: 1.3\n"
809 				"; Generator: Khronos Glslang Reference Front End; 2\n"
810 				"; Bound: 54\n"
811 				"; Schema: 0\n"
812 				"OpCapability Tessellation\n"
813 				"OpCapability GroupNonUniform\n"
814 				"%1 = OpExtInstImport \"GLSL.std.450\"\n"
815 				"OpMemoryModel Logical GLSL450\n"
816 				"OpEntryPoint TessellationEvaluation %4 \"main\" %15 %31 %38 %47\n"
817 				"OpExecutionMode %4 Isolines\n"
818 				"OpExecutionMode %4 SpacingEqual\n"
819 				"OpExecutionMode %4 VertexOrderCcw\n"
820 				"OpDecorate %15 Location 0\n"
821 				"OpMemberDecorate %29 0 BuiltIn Position\n"
822 				"OpMemberDecorate %29 1 BuiltIn PointSize\n"
823 				"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
824 				"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
825 				"OpDecorate %29 Block\n"
826 				"OpMemberDecorate %34 0 BuiltIn Position\n"
827 				"OpMemberDecorate %34 1 BuiltIn PointSize\n"
828 				"OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
829 				"OpMemberDecorate %34 3 BuiltIn CullDistance\n"
830 				"OpDecorate %34 Block\n"
831 				"OpDecorate %47 BuiltIn TessCoord\n"
832 				"%2 = OpTypeVoid\n"
833 				"%3 = OpTypeFunction %2\n"
834 				"%6 = OpTypeBool\n"
835 				"%7 = OpTypeInt 32 0\n"
836 				"%8 = OpConstant %7 3\n"
837 				"%12 = OpTypeFloat 32\n"
838 				"%13 = OpTypeVector %12 4\n"
839 				"%14 = OpTypePointer Output %13\n"
840 				"%15 = OpVariable %14 Output\n"
841 				"%16 = OpConstant %12 71\n"//electedValue
842 				"%17 = OpConstant %7 0\n"
843 				"%18 = OpTypePointer Output %12\n"
844 				"%20 = OpConstant %12 2\n"
845 				"%21 = OpConstant %7 1\n"
846 				"%24 = OpConstant %12 " + unelectedValue.str() + "\n"
847 				"%26 = OpConstant %12 0\n"
848 				"%28 = OpTypeArray %12 %21\n"
849 				"%29 = OpTypeStruct %13 %12 %28 %28\n"
850 				"%30 = OpTypePointer Output %29\n"
851 				"%31 = OpVariable %30 Output\n"
852 				"%32 = OpTypeInt 32 1\n"
853 				"%33 = OpConstant %32 0\n"
854 				"%34 = OpTypeStruct %13 %12 %28 %28\n"
855 				"%35 = OpConstant %7 32\n"
856 				"%36 = OpTypeArray %34 %35\n"
857 				"%37 = OpTypePointer Input %36\n"
858 				"%38 = OpVariable %37 Input\n"
859 				"%39 = OpTypePointer Input %13\n"
860 				"%42 = OpConstant %32 1\n"
861 				"%45 = OpTypeVector %12 3\n"
862 				"%46 = OpTypePointer Input %45\n"
863 				"%47 = OpVariable %46 Input\n"
864 				"%48 = OpTypePointer Input %12\n"
865 				"%4 = OpFunction %2 None %3\n"
866 				"%5 = OpLabel\n"
867 				"%9 = OpGroupNonUniformElect %6 %8\n"
868 				"OpSelectionMerge %11 None\n"
869 				"OpBranchConditional %9 %10 %23\n"
870 				"%10 = OpLabel\n"
871 				"%19 = OpAccessChain %18 %15 %17\n"
872 				"OpStore %19 %16\n"
873 				"%22 = OpAccessChain %18 %15 %21\n"
874 				"OpStore %22 %20\n"
875 				"OpBranch %11\n"
876 				"%23 = OpLabel\n"
877 				"%25 = OpAccessChain %18 %15 %17\n"
878 				"OpStore %25 %24\n"
879 				"%27 = OpAccessChain %18 %15 %21\n"
880 				"OpStore %27 %26\n"
881 				"OpBranch %11\n"
882 				"%11 = OpLabel\n"
883 				"%40 = OpAccessChain %39 %38 %33 %33\n"
884 				"%41 = OpLoad %13 %40\n"
885 				"%43 = OpAccessChain %39 %38 %42 %33\n"
886 				"%44 = OpLoad %13 %43\n"
887 				"%49 = OpAccessChain %48 %47 %17\n"
888 				"%50 = OpLoad %12 %49\n"
889 				"%51 = OpCompositeConstruct %13 %50 %50 %50 %50\n"
890 				"%52 = OpExtInst %13 %1 FMix %41 %44 %51\n"
891 				"%53 = OpAccessChain %14 %31 %33\n"
892 				"OpStore %53 %52\n"
893 				"OpReturn\n"
894 				"OpFunctionEnd\n";
895 
896 			programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
897 		}
898 		else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
899 		{
900 			/*
901 				"#extension GL_KHR_shader_subgroup_basic: enable\n"
902 				"#extension GL_EXT_tessellation_shader : require\n"
903 				"layout(vertices = 2) out;\n"
904 				"layout(location = 0) out vec4 out_color[];\n"
905 				"void main (void)\n"
906 				"{\n"
907 				"  if (gl_InvocationID == 0)\n"
908 				"  {\n"
909 				"    gl_TessLevelOuter[0] = 1.0f;\n"
910 				"    gl_TessLevelOuter[1] = 1.0f;\n"
911 				"  }\n"
912 				"  if (subgroupElect())\n"
913 				"  {\n"
914 				"    out_color[gl_InvocationID].r = " << ELECTED_VALUE << ";\n"
915 				"    out_color[gl_InvocationID].g = 1.0f;\n"
916 				"  }\n"
917 				"  else\n"
918 				"  {\n"
919 				"    out_color[gl_InvocationID].r = " << UNELECTED_VALUE << ";\n"
920 				"    out_color[gl_InvocationID].g = 0.0f;\n"
921 				"  }\n"
922 				"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
923 				"}\n";
924 			*/
925 			const string  controlSource =
926 				"; SPIR-V\n"
927 				"; Version: 1.3\n"
928 				"; Generator: Khronos Glslang Reference Front End; 2\n"
929 				"; Bound: 66\n"
930 				"; Schema: 0\n"
931 				"OpCapability Tessellation\n"
932 				"OpCapability GroupNonUniform\n"
933 				"%1 = OpExtInstImport \"GLSL.std.450\"\n"
934 				"OpMemoryModel Logical GLSL450\n"
935 				"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %34 %53 %59\n"
936 				"OpExecutionMode %4 OutputVertices 2\n"
937 				"OpDecorate %8 BuiltIn InvocationId\n"
938 				"OpDecorate %20 Patch\n"
939 				"OpDecorate %20 BuiltIn TessLevelOuter\n"
940 				"OpDecorate %34 Location 0\n"
941 				"OpMemberDecorate %50 0 BuiltIn Position\n"
942 				"OpMemberDecorate %50 1 BuiltIn PointSize\n"
943 				"OpMemberDecorate %50 2 BuiltIn ClipDistance\n"
944 				"OpMemberDecorate %50 3 BuiltIn CullDistance\n"
945 				"OpDecorate %50 Block\n"
946 				"OpMemberDecorate %55 0 BuiltIn Position\n"
947 				"OpMemberDecorate %55 1 BuiltIn PointSize\n"
948 				"OpMemberDecorate %55 2 BuiltIn ClipDistance\n"
949 				"OpMemberDecorate %55 3 BuiltIn CullDistance\n"
950 				"OpDecorate %55 Block\n"
951 				"%2 = OpTypeVoid\n"
952 				"%3 = OpTypeFunction %2\n"
953 				"%6 = OpTypeInt 32 1\n"
954 				"%7 = OpTypePointer Input %6\n"
955 				"%8 = OpVariable %7 Input\n"
956 				"%10 = OpConstant %6 0\n"
957 				"%11 = OpTypeBool\n"
958 				"%15 = OpTypeFloat 32\n"
959 				"%16 = OpTypeInt 32 0\n"
960 				"%17 = OpConstant %16 4\n"
961 				"%18 = OpTypeArray %15 %17\n"
962 				"%19 = OpTypePointer Output %18\n"
963 				"%20 = OpVariable %19 Output\n"
964 				"%21 = OpConstant %15 1\n"
965 				"%22 = OpTypePointer Output %15\n"
966 				"%24 = OpConstant %6 1\n"
967 				"%26 = OpConstant %16 3\n"
968 				"%30 = OpTypeVector %15 4\n"
969 				"%31 = OpConstant %16 2\n"
970 				"%32 = OpTypeArray %30 %31\n"
971 				"%33 = OpTypePointer Output %32\n"
972 				"%34 = OpVariable %33 Output\n"
973 				"%36 = OpConstant %15 " + electedValue.str() + "\n"
974 				"%37 = OpConstant %16 0\n"
975 				"%40 = OpConstant %16 1\n"
976 				"%44 = OpConstant %15 " + unelectedValue.str() + "\n"
977 				"%47 = OpConstant %15 0\n"
978 				"%49 = OpTypeArray %15 %40\n"
979 				"%50 = OpTypeStruct %30 %15 %49 %49\n"
980 				"%51 = OpTypeArray %50 %31\n"
981 				"%52 = OpTypePointer Output %51\n"
982 				"%53 = OpVariable %52 Output\n"
983 				"%55 = OpTypeStruct %30 %15 %49 %49\n"
984 				"%56 = OpConstant %16 32\n"
985 				"%57 = OpTypeArray %55 %56\n"
986 				"%58 = OpTypePointer Input %57\n"
987 				"%59 = OpVariable %58 Input\n"
988 				"%61 = OpTypePointer Input %30\n"
989 				"%64 = OpTypePointer Output %30\n"
990 				"%4 = OpFunction %2 None %3\n"
991 				"%5 = OpLabel\n"
992 				"%9 = OpLoad %6 %8\n"
993 				"%12 = OpIEqual %11 %9 %10\n"
994 				"OpSelectionMerge %14 None\n"
995 				"OpBranchConditional %12 %13 %14\n"
996 				"%13 = OpLabel\n"
997 				"%23 = OpAccessChain %22 %20 %10\n"
998 				"OpStore %23 %21\n"
999 				"%25 = OpAccessChain %22 %20 %24\n"
1000 				"OpStore %25 %21\n"
1001 				"OpBranch %14\n"
1002 				"%14 = OpLabel\n"
1003 				"%27 = OpGroupNonUniformElect %11 %26\n"
1004 				"OpSelectionMerge %29 None\n"
1005 				"OpBranchConditional %27 %28 %42\n"
1006 				"%28 = OpLabel\n"
1007 				"%35 = OpLoad %6 %8\n"
1008 				"%38 = OpAccessChain %22 %34 %35 %37\n"
1009 				"OpStore %38 %36\n"
1010 				"%39 = OpLoad %6 %8\n"
1011 				"%41 = OpAccessChain %22 %34 %39 %40\n"
1012 				"OpStore %41 %21\n"
1013 				"OpBranch %29\n"
1014 				"%42 = OpLabel\n"
1015 				"%43 = OpLoad %6 %8\n"
1016 				"%45 = OpAccessChain %22 %34 %43 %37\n"
1017 				"OpStore %45 %44\n"
1018 				"%46 = OpLoad %6 %8\n"
1019 				"%48 = OpAccessChain %22 %34 %46 %40\n"
1020 				"OpStore %48 %47\n"
1021 				"OpBranch %29\n"
1022 				"%29 = OpLabel\n"
1023 				"%54 = OpLoad %6 %8\n"
1024 				"%60 = OpLoad %6 %8\n"
1025 				"%62 = OpAccessChain %61 %59 %60 %10\n"
1026 				"%63 = OpLoad %30 %62\n"
1027 				"%65 = OpAccessChain %64 %53 %54 %10\n"
1028 				"OpStore %65 %63\n"
1029 				"OpReturn\n"
1030 				"OpFunctionEnd\n";
1031 
1032 			programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
1033 
1034 			/*
1035 				"#extension GL_KHR_shader_subgroup_ballot: enable\n"
1036 				"#extension GL_EXT_tessellation_shader : require\n"
1037 				"layout(isolines, equal_spacing, ccw ) in;\n"
1038 				"layout(location = 0) in vec4 in_color[];\n"
1039 				"layout(location = 0) out vec4 out_color;\n"
1040 				"\n"
1041 				"void main (void)\n"
1042 				"{\n"
1043 				"  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1044 				"  out_color = in_color[0];\n"
1045 				"}\n";
1046 			*/
1047 
1048 			const string evaluationSource =
1049 				"; SPIR-V\n"
1050 				"; Version: 1.3\n"
1051 				"; Generator: Khronos Glslang Reference Front End; 2\n"
1052 				"; Bound: 44\n"
1053 				"; Schema: 0\n"
1054 				"OpCapability Tessellation\n"
1055 				"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1056 				"OpMemoryModel Logical GLSL450\n"
1057 				"OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %38 %41\n"
1058 				"OpExecutionMode %4 Isolines\n"
1059 				"OpExecutionMode %4 SpacingEqual\n"
1060 				"OpExecutionMode %4 VertexOrderCcw\n"
1061 				"OpMemberDecorate %11 0 BuiltIn Position\n"
1062 				"OpMemberDecorate %11 1 BuiltIn PointSize\n"
1063 				"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1064 				"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1065 				"OpDecorate %11 Block\n"
1066 				"OpMemberDecorate %16 0 BuiltIn Position\n"
1067 				"OpMemberDecorate %16 1 BuiltIn PointSize\n"
1068 				"OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
1069 				"OpMemberDecorate %16 3 BuiltIn CullDistance\n"
1070 				"OpDecorate %16 Block\n"
1071 				"OpDecorate %29 BuiltIn TessCoord\n"
1072 				"OpDecorate %38 Location 0\n"
1073 				"OpDecorate %41 Location 0\n"
1074 				"%2 = OpTypeVoid\n"
1075 				"%3 = OpTypeFunction %2\n"
1076 				"%6 = OpTypeFloat 32\n"
1077 				"%7 = OpTypeVector %6 4\n"
1078 				"%8 = OpTypeInt 32 0\n"
1079 				"%9 = OpConstant %8 1\n"
1080 				"%10 = OpTypeArray %6 %9\n"
1081 				"%11 = OpTypeStruct %7 %6 %10 %10\n"
1082 				"%12 = OpTypePointer Output %11\n"
1083 				"%13 = OpVariable %12 Output\n"
1084 				"%14 = OpTypeInt 32 1\n"
1085 				"%15 = OpConstant %14 0\n"
1086 				"%16 = OpTypeStruct %7 %6 %10 %10\n"
1087 				"%17 = OpConstant %8 32\n"
1088 				"%18 = OpTypeArray %16 %17\n"
1089 				"%19 = OpTypePointer Input %18\n"
1090 				"%20 = OpVariable %19 Input\n"
1091 				"%21 = OpTypePointer Input %7\n"
1092 				"%24 = OpConstant %14 1\n"
1093 				"%27 = OpTypeVector %6 3\n"
1094 				"%28 = OpTypePointer Input %27\n"
1095 				"%29 = OpVariable %28 Input\n"
1096 				"%30 = OpConstant %8 0\n"
1097 				"%31 = OpTypePointer Input %6\n"
1098 				"%36 = OpTypePointer Output %7\n"
1099 				"%38 = OpVariable %36 Output\n"
1100 				"%39 = OpTypeArray %7 %17\n"
1101 				"%40 = OpTypePointer Input %39\n"
1102 				"%41 = OpVariable %40 Input\n"
1103 				"%4 = OpFunction %2 None %3\n"
1104 				"%5 = OpLabel\n"
1105 				"%22 = OpAccessChain %21 %20 %15 %15\n"
1106 				"%23 = OpLoad %7 %22\n"
1107 				"%25 = OpAccessChain %21 %20 %24 %15\n"
1108 				"%26 = OpLoad %7 %25\n"
1109 				"%32 = OpAccessChain %31 %29 %30\n"
1110 				"%33 = OpLoad %6 %32\n"
1111 				"%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
1112 				"%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
1113 				"%37 = OpAccessChain %36 %13 %15\n"
1114 				"OpStore %37 %35\n"
1115 				"%42 = OpAccessChain %21 %41 %15\n"
1116 				"%43 = OpLoad %7 %42\n"
1117 				"OpStore %38 %43\n"
1118 				"OpReturn\n"
1119 				"OpFunctionEnd\n";
1120 
1121 			programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
1122 		}
1123 		else
1124 			TCU_THROW(InternalError, "Unsupported shader stage");
1125 	}
1126 	else
1127 	{
1128 		const string	color = (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage) ? "out_color[gl_InvocationID].b = 1.0f;\n" : "out_color.b = 1.0f;\n";
1129 		ostringstream	bdy;
1130 
1131 		switch (caseDef.opType)
1132 		{
1133 			case OPTYPE_SUBGROUP_BARRIER:
1134 			case OPTYPE_SUBGROUP_MEMORY_BARRIER:
1135 			case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
1136 			{
1137 				bdy << " tempResult2 = tempBuffer[id];\n"
1138 					<< "  if (subgroupElect())\n"
1139 					<< "  {\n"
1140 					<< "    tempResult = value;\n"
1141 					<< "    " << color
1142 					<< "  }\n"
1143 					 << "  else\n"
1144 					<< "  {\n"
1145 					<< "    tempResult = tempBuffer[id];\n"
1146 					<< "  }\n"
1147 					<< "  " << getOpTypeName(caseDef.opType) << "();\n";
1148 				break;
1149 			}
1150 
1151 			case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
1152 				bdy <<"tempResult2 = imageLoad(tempImage, ivec2(id, 0)).x;\n"
1153 					<< "  if (subgroupElect())\n"
1154 					<< "  {\n"
1155 					<< "    tempResult = value;\n"
1156 					<< "     " << color
1157 					<< "  }\n"
1158 					<< "  else\n"
1159 					<< "  {\n"
1160 					<< "    tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n"
1161 					<< "  }\n"
1162 					<< "  subgroupMemoryBarrierImage();\n";
1163 				break;
1164 
1165 			default:
1166 				TCU_THROW(InternalError, "Unhandled op type");
1167 		}
1168 
1169 		if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
1170 		{
1171 			ostringstream	fragment;
1172 
1173 			fragment	<< glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1174 				<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
1175 				<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1176 				<< "layout(location = 0) out vec4 out_color;\n"
1177 				<< "\n"
1178 				<< "layout(set = 0, binding = 0) uniform Buffer1\n"
1179 				<< "{\n"
1180 				<< "  uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
1181 				<< "};\n"
1182 				<< "\n"
1183 				<< "layout(set = 0, binding = 1) uniform Buffer2\n"
1184 				<< "{\n"
1185 				<< "  uint value;\n"
1186 				<< "};\n"
1187 				<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
1188 				<< "void main (void)\n"
1189 				<< "{\n"
1190 				<< "  if (gl_HelperInvocation) return;\n"
1191 				<< "  uint id = 0;\n"
1192 				<< "  if (subgroupElect())\n"
1193 				<< "  {\n"
1194 				<< "    id = uint(gl_FragCoord.x);\n"
1195 				<< "  }\n"
1196 				<< "  id = subgroupBroadcastFirst(id);\n"
1197 				<< "  uint localId = id;\n"
1198 				<< "  uint tempResult = 0u;\n"
1199 				<< "  uint tempResult2 = 0u;\n"
1200 				<< "  out_color.b = 0.0f;\n"
1201 				<< bdy.str()
1202 				<< "  out_color.r = float(tempResult);\n"
1203 				<< "  out_color.g = float(value);\n"
1204 				<< "  out_color.a = float(tempResult2);\n"
1205 				<< "}\n";
1206 
1207 			programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment.str()) << buildOptions;
1208 		}
1209 		else if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
1210 		{
1211 			ostringstream	vertex;
1212 
1213 			vertex	<< glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1214 				<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
1215 				<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1216 				<<"\n"
1217 				<< "layout(location = 0) out vec4 out_color;\n"
1218 				<< "layout(location = 0) in highp vec4 in_position;\n"
1219 				<< "\n"
1220 				<< "layout(set = 0, binding = 0) uniform Buffer1\n"
1221 				<< "{\n"
1222 				<< "  uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
1223 				<< "};\n"
1224 				<< "\n"
1225 				<< "layout(set = 0, binding = 1) uniform Buffer2\n"
1226 				<< "{\n"
1227 				<< "  uint value;\n"
1228 				<< "};\n"
1229 				<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
1230 				<< "void main (void)\n"
1231 				<< "{\n"
1232 				<< "  uint id = 0;\n"
1233 				<< "  if (subgroupElect())\n"
1234 				<< "  {\n"
1235 				<< "    id = gl_VertexIndex;\n"
1236 				<< "  }\n"
1237 				<< "  id = subgroupBroadcastFirst(id);\n"
1238 				<< "  uint tempResult = 0u;\n"
1239 				<< "  uint tempResult2 = 0u;\n"
1240 				<< "  out_color.b = 0.0f;\n"
1241 				<< bdy.str()
1242 				<< "  out_color.r = float(tempResult);\n"
1243 				<< "  out_color.g = float(value);\n"
1244 				<< "  out_color.a = float(tempResult2);\n"
1245 				<< "  gl_Position = in_position;\n"
1246 				<< "  gl_PointSize = 1.0f;\n"
1247 				<< "}\n";
1248 
1249 			programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1250 		}
1251 		else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
1252 		{
1253 			ostringstream geometry;
1254 
1255 			geometry << "#version 450\n"
1256 					<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1257 					<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
1258 					<< "layout(points) in;\n"
1259 					<< "layout(points, max_vertices = 1) out;\n"
1260 					<< "layout(location = 0) out vec4 out_color;\n"
1261 					<< "layout(set = 0, binding = 0) uniform Buffer1\n"
1262 					<< "{\n"
1263 					<< "  uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
1264 					<< "};\n"
1265 					<< "\n"
1266 					<< "layout(set = 0, binding = 1) uniform Buffer2\n"
1267 					<< "{\n"
1268 					<< "  uint value;\n"
1269 					<< "};\n"
1270 					<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
1271 					<< "void main (void)\n"
1272 					<< "{\n"
1273 					<< "  uint id = 0;\n"
1274 					<< "  if (subgroupElect())\n"
1275 					<< "  {\n"
1276 					<< "    id = gl_InvocationID;\n"
1277 					<< "  }\n"
1278 					<< "  id = subgroupBroadcastFirst(id);\n"
1279 					<< "  uint tempResult = 0u;\n"
1280 					<< "  uint tempResult2 = 0u;\n"
1281 					<< "  out_color.b = 0.0f;\n"
1282 					<< bdy.str()
1283 					<< "  out_color.r = float(tempResult);\n"
1284 					<< "  out_color.g = float(value);\n"
1285 					<< "  out_color.a = float(tempResult2);\n"
1286 					<< "  gl_Position = gl_in[0].gl_Position;\n"
1287 					<< (*caseDef.geometryPointSizeSupported ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "" )
1288 					<< "  EmitVertex();\n"
1289 					<< "  EndPrimitive();\n"
1290 					<< "}\n";
1291 
1292 			programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1293 		}
1294 		else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
1295 		{
1296 			ostringstream controlSource;
1297 			ostringstream evaluationSource;
1298 
1299 			controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1300 				<< "#extension GL_EXT_tessellation_shader : require\n"
1301 				<< "layout(vertices = 2) out;\n"
1302 				<< "void main (void)\n"
1303 				<< "{\n"
1304 				<< "  if (gl_InvocationID == 0)\n"
1305 				<<"  {\n"
1306 				<< "    gl_TessLevelOuter[0] = 1.0f;\n"
1307 				<< "    gl_TessLevelOuter[1] = 1.0f;\n"
1308 				<< "  }\n"
1309 				<< "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1310 				<< (*caseDef.geometryPointSizeSupported ? "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "" )
1311 				<< "}\n";
1312 
1313 			evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1314 				<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
1315 				<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1316 				<< "#extension GL_EXT_tessellation_shader : require\n"
1317 				<< "layout(isolines, equal_spacing, ccw ) in;\n"
1318 				<< "layout(location = 0) out vec4 out_color;\n"
1319 				<< "layout(set = 0, binding = 0) uniform Buffer1\n"
1320 				<< "{\n"
1321 				<< "  uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
1322 				<< "};\n"
1323 				<< "\n"
1324 				<< "layout(set = 0, binding = 1) uniform Buffer2\n"
1325 				<< "{\n"
1326 				<< "  uint value;\n"
1327 				<< "};\n"
1328 				<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
1329 				<< "void main (void)\n"
1330 				<< "{\n"
1331 				<< "  uint id = 0;\n"
1332 				<< "  if (subgroupElect())\n"
1333 				<< "  {\n"
1334 				<< "    id = gl_PrimitiveID;\n"
1335 				<< "  }\n"
1336 				<< "  id = subgroupBroadcastFirst(id);\n"
1337 				<< "  uint tempResult = 0u;\n"
1338 				<< "  uint tempResult2 = 0u;\n"
1339 				<< "  out_color.b = 0.0f;\n"
1340 				<< bdy.str()
1341 				<< "  out_color.r = float(tempResult);\n"
1342 				<< "  out_color.g = float(value);\n"
1343 				<< "  out_color.a = float(tempResult2);\n"
1344 				<< "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1345 				<< (*caseDef.geometryPointSizeSupported ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "" )
1346 				<< "}\n";
1347 
1348 			programCollection.glslSources.add("tesc") << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1349 			programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1350 		}
1351 		else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
1352 		{
1353 			ostringstream controlSource;
1354 			ostringstream evaluationSource;
1355 
1356 			controlSource  << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1357 				<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
1358 				<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1359 				<< "#extension GL_EXT_tessellation_shader : require\n"
1360 				<< "layout(vertices = 2) out;\n"
1361 				<< "layout(location = 0) out vec4 out_color[];\n"
1362 				<< "layout(set = 0, binding = 0) uniform Buffer1\n"
1363 				<< "{\n"
1364 				<< "  uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
1365 				<< "};\n"
1366 				<< "\n"
1367 				<< "layout(set = 0, binding = 1) uniform Buffer2\n"
1368 				<< "{\n"
1369 				<< "  uint value;\n"
1370 				<< "};\n"
1371 				<< (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
1372 				<< "void main (void)\n"
1373 				<< "{\n"
1374 				<< "  uint id = 0;\n"
1375 				<< "  if (gl_InvocationID == 0)\n"
1376 				<<"  {\n"
1377 				<< "    gl_TessLevelOuter[0] = 1.0f;\n"
1378 				<< "    gl_TessLevelOuter[1] = 1.0f;\n"
1379 				<< "  }\n"
1380 				<< "  if (subgroupElect())\n"
1381 				<< "  {\n"
1382 				<< "    id = gl_InvocationID;\n"
1383 				<< "  }\n"
1384 				<< "  id = subgroupBroadcastFirst(id);\n"
1385 				<< "  uint tempResult = 0u;\n"
1386 				<< "  uint tempResult2 = 0u;\n"
1387 				<< "  out_color[gl_InvocationID].b = 0.0f;\n"
1388 				<< bdy.str()
1389 				<< "  out_color[gl_InvocationID].r = float(tempResult);\n"
1390 				<< "  out_color[gl_InvocationID].g = float(value);\n"
1391 				<< "  out_color[gl_InvocationID].a = float(tempResult2);\n"
1392 				<< "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1393 				<< (*caseDef.geometryPointSizeSupported ? "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "" )
1394 				<< "}\n";
1395 
1396 			evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1397 				<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1398 				<< "#extension GL_EXT_tessellation_shader : require\n"
1399 				<< "layout(isolines, equal_spacing, ccw ) in;\n"
1400 				<< "layout(location = 0) in vec4 in_color[];\n"
1401 				<< "layout(location = 0) out vec4 out_color;\n"
1402 				<< "\n"
1403 				<< "void main (void)\n"
1404 				<< "{\n"
1405 				<< "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1406 				<< (*caseDef.geometryPointSizeSupported ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "" )
1407 				<< "  out_color = in_color[0];\n"
1408 				<< "}\n";
1409 
1410 			programCollection.glslSources.add("tesc") << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1411 			programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1412 		}
1413 		else
1414 			TCU_THROW(InternalError, "Unsupported shader stage");
1415 	}
1416 }
1417 
getPerStageHeadDeclarations(const CaseDefinition & caseDef)1418 vector<string> getPerStageHeadDeclarations (const CaseDefinition& caseDef)
1419 {
1420 	const deUint32	stageCount	= subgroups::getStagesCount(caseDef.shaderStage);
1421 	const bool		fragment	= (caseDef.shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) != 0;
1422 	vector<string>	result		(stageCount, string());
1423 
1424 	if (fragment)
1425 		result.resize(result.size() + 1);
1426 
1427 	if (caseDef.opType == OPTYPE_ELECT)
1428 	{
1429 		for (size_t i = 0; i < result.size(); ++i)
1430 		{
1431 			const bool		frag		= (i == stageCount);
1432 			const size_t	binding1	= i;
1433 			const size_t	binding2	= stageCount + i;
1434 
1435 			if (frag)
1436 			{
1437 				result[i] += "layout(location = 0) out uint result;\n";
1438 			}
1439 			else
1440 			{
1441 				result[i] +=
1442 					"layout(set = 0, binding = " + de::toString(binding1) + ", std430) buffer Buffer1\n"
1443 					"{\n"
1444 					"  uint result[];\n"
1445 					"};\n";
1446 			}
1447 
1448 			result[i] +=
1449 				"layout(set = 0, binding = " + de::toString(binding2) + ", std430) buffer Buffer2\n"
1450 				"{\n"
1451 				"  uint numSubgroupsExecuted;\n"
1452 				"};\n";
1453 		}
1454 	}
1455 	else
1456 	{
1457 		for (size_t i = 0; i < result.size(); ++i)
1458 		{
1459 			const bool		frag		= (i == stageCount);
1460 			const size_t	binding1	= i;
1461 			const size_t	binding2	= stageCount + 4 * i;
1462 			const size_t	binding3	= stageCount + 4 * i + 1;
1463 			const size_t	binding4	= stageCount + 4 * i + 2;
1464 			const size_t	binding5	= stageCount + 4 * i + 3;
1465 
1466 			if (frag)
1467 			{
1468 				result[i] = "layout(location = 0) out uint result;\n";
1469 			}
1470 			else
1471 			{
1472 				result[i] +=
1473 					"layout(set = 0, binding = " + de::toString(binding1) + ", std430) buffer Buffer1\n"
1474 					"{\n"
1475 					"  uint result[];\n"
1476 					"};\n";
1477 			}
1478 
1479 			result[i] +=
1480 				"layout(set = 0, binding = " + de::toString(binding2) + ", std430) buffer Buffer2\n"
1481 				"{\n"
1482 				"  uint tempBuffer[];\n"
1483 				"};\n"
1484 				"layout(set = 0, binding = " + de::toString(binding3) + ", std430) buffer Buffer3\n"
1485 				"{\n"
1486 				"  uint subgroupID;\n"
1487 				"};\n"
1488 				"layout(set = 0, binding = " + de::toString(binding4) + ", std430) buffer Buffer4\n"
1489 				"{\n"
1490 				"  uint value;\n"
1491 				"};\n"
1492 				"layout(set = 0, binding = " + de::toString(binding5) + ", r32ui) uniform uimage2D tempImage;\n";
1493 		}
1494 	}
1495 
1496 	return result;
1497 }
1498 
getTestString(const CaseDefinition & caseDef)1499 string getTestString (const CaseDefinition& caseDef)
1500 {
1501 	stringstream	body;
1502 
1503 	if (caseDef.opType != OPTYPE_ELECT && (isAllGraphicsStages(caseDef.shaderStage) || isAllRayTracingStages(caseDef.shaderStage)))
1504 	{
1505 		body << "  uint id = 0;\n"
1506 				"  if (subgroupElect())\n"
1507 				"  {\n"
1508 				"    id = atomicAdd(subgroupID, 1);\n"
1509 				"  }\n"
1510 				"  id = subgroupBroadcastFirst(id);\n"
1511 				"  uint localId = id;\n"
1512 				"  uint tempResult = 0;\n";
1513 	}
1514 
1515 	switch (caseDef.opType)
1516 	{
1517 		case OPTYPE_ELECT:
1518 			if (isAllComputeStages(caseDef.shaderStage))
1519 			{
1520 				body << "  uint value = " << UNELECTED_VALUE << ";\n"
1521 						"  if (subgroupElect())\n"
1522 						"  {\n"
1523 						"    value = " << ELECTED_VALUE << ";\n"
1524 						"  }\n"
1525 						"  uvec4 bits = bitCount(sharedMemoryBallot(value == " << ELECTED_VALUE << "));\n"
1526 						"  tempRes = bits.x + bits.y + bits.z + bits.w;\n";
1527 			}
1528 			else
1529 			{
1530 				body << "  if (subgroupElect())\n"
1531 						"  {\n"
1532 						"    tempRes = " << ELECTED_VALUE << ";\n"
1533 						"    atomicAdd(numSubgroupsExecuted, 1);\n"
1534 						"  }\n"
1535 						"  else\n"
1536 						"  {\n"
1537 						"    tempRes = " << UNELECTED_VALUE << ";\n"
1538 						"  }\n";
1539 			}
1540 			break;
1541 
1542 		case OPTYPE_SUBGROUP_BARRIER:
1543 		case OPTYPE_SUBGROUP_MEMORY_BARRIER:
1544 		case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
1545 			body << "  if (subgroupElect())\n"
1546 					"  {\n"
1547 					"    tempBuffer[id] = value;\n"
1548 					"  }\n"
1549 					"  " << getOpTypeName(caseDef.opType) << "();\n"
1550 					"  tempResult = tempBuffer[id];\n";
1551 			break;
1552 
1553 		case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
1554 			body <<  "  if (subgroupElect())\n"
1555 					"  {\n"
1556 					"    tempShared[localId] = value;\n"
1557 					"  }\n"
1558 					"  subgroupMemoryBarrierShared();\n"
1559 					"  tempResult = tempShared[localId];\n";
1560 			break;
1561 
1562 		case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
1563 			body << "  if (subgroupElect())\n"
1564 					"  {\n"
1565 					"    imageStore(tempImage, ivec2(id, 0), ivec4(value));\n"
1566 					"  }\n"
1567 					"  subgroupMemoryBarrierImage();\n"
1568 					"  tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n";
1569 			break;
1570 
1571 		default:
1572 			TCU_THROW(InternalError, "Unhandled op type!");
1573 	}
1574 
1575 	if (caseDef.opType != OPTYPE_ELECT && (isAllGraphicsStages(caseDef.shaderStage) || isAllRayTracingStages(caseDef.shaderStage)))
1576 	{
1577 		body << "  tempRes = tempResult;\n";
1578 	}
1579 
1580 	return body.str();
1581 }
1582 
getExtHeader(const CaseDefinition & caseDef)1583 string getExtHeader (const CaseDefinition& caseDef)
1584 {
1585 	const string	extensions	= (caseDef.opType == OPTYPE_ELECT)
1586 								? "#extension GL_KHR_shader_subgroup_basic: enable\n"
1587 								: "#extension GL_KHR_shader_subgroup_basic: enable\n"
1588 								  "#extension GL_KHR_shader_subgroup_ballot: enable\n";
1589 	return extensions;
1590 }
1591 
initComputePrograms(SourceCollections & programCollection,CaseDefinition & caseDef,const string & extensions,const string & testSrc,const ShaderBuildOptions & buildOptions)1592 void initComputePrograms (SourceCollections&			programCollection,
1593 						  CaseDefinition&				caseDef,
1594 						  const string&					extensions,
1595 						  const string&					testSrc,
1596 						  const ShaderBuildOptions&		buildOptions)
1597 {
1598 	if (OPTYPE_ELECT == caseDef.opType)
1599 	{
1600 		ostringstream src;
1601 
1602 		src << "#version 450\n"
1603 			<< extensions
1604 			<< "layout (local_size_x_id = 0, local_size_y_id = 1, "
1605 			"local_size_z_id = 2) in;\n"
1606 			<< "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1607 			<< "{\n"
1608 			<< "  uint result[];\n"
1609 			<< "};\n"
1610 			<< "\n"
1611 			<< subgroups::getSharedMemoryBallotHelper()
1612 			<< "void main (void)\n"
1613 			<< "{\n"
1614 			<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1615 			<< "  highp uint offset = globalSize.x * ((globalSize.y * "
1616 			"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1617 			"gl_GlobalInvocationID.x;\n"
1618 			<< "  uint value = " << UNELECTED_VALUE << ";\n"
1619 			<< "  if (subgroupElect())\n"
1620 			<< "  {\n"
1621 			<< "    value = " << ELECTED_VALUE << ";\n"
1622 			<< "  }\n"
1623 			<< "  uvec4 bits = bitCount(sharedMemoryBallot(value == " << ELECTED_VALUE << "));\n"
1624 			<< "  result[offset] = bits.x + bits.y + bits.z + bits.w;\n"
1625 			<< "}\n";
1626 
1627 		programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1628 	}
1629 	else
1630 	{
1631 		ostringstream src;
1632 
1633 		src << "#version 450\n"
1634 			<< "#extension GL_KHR_shader_subgroup_basic: enable\n"
1635 			<< "layout (local_size_x_id = 0, local_size_y_id = 1, "
1636 			"local_size_z_id = 2) in;\n"
1637 			<< "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1638 			<< "{\n"
1639 			<< "  uint result[];\n"
1640 			<< "};\n"
1641 			<< "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
1642 			<< "{\n"
1643 			<< "  uint tempBuffer[];\n"
1644 			<< "};\n"
1645 			<< "layout(set = 0, binding = 2, std430) buffer Buffer3\n"
1646 			<< "{\n"
1647 			<< "  uint value;\n"
1648 			<< "};\n"
1649 			<< "layout(set = 0, binding = 3, r32ui) uniform uimage2D tempImage;\n"
1650 			<< "shared uint tempShared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
1651 			<< "\n"
1652 			<< "void main (void)\n"
1653 			<< "{\n"
1654 			<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1655 			<< "  highp uint offset = globalSize.x * ((globalSize.y * "
1656 			"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1657 			"gl_GlobalInvocationID.x;\n"
1658 			<< "  uint localId = gl_SubgroupID;\n"
1659 			<< "  uint id = globalSize.x * ((globalSize.y * "
1660 			"gl_WorkGroupID.z) + gl_WorkGroupID.y) + "
1661 			"gl_WorkGroupID.x + localId;\n"
1662 			<< "  uint tempResult = 0;\n"
1663 			<< testSrc
1664 			<< "  result[offset] = tempResult;\n"
1665 			<< "}\n";
1666 
1667 		programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1668 	}
1669 }
1670 
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)1671 void initPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
1672 {
1673 	const SpirvVersion			spirvVersion		= isAllRayTracingStages(caseDef.shaderStage) ? SPIRV_VERSION_1_4 : SPIRV_VERSION_1_3;
1674 	const ShaderBuildOptions	buildOptions		(programCollection.usedVulkanVersion, spirvVersion, 0u);
1675 	const string				extHeader			= getExtHeader(caseDef);
1676 	const string				testSrc				= getTestString(caseDef);
1677 	const vector<string>		headDeclarations	= getPerStageHeadDeclarations(caseDef);
1678 	const bool					pointSizeSupport	= *caseDef.geometryPointSizeSupported;
1679 
1680 	if (isAllComputeStages(caseDef.shaderStage))
1681 		initComputePrograms(programCollection, caseDef, extHeader, testSrc, buildOptions);
1682 	else
1683 		subgroups::initStdPrograms(programCollection, buildOptions, caseDef.shaderStage, VK_FORMAT_R32_UINT, pointSizeSupport, extHeader, testSrc, "", headDeclarations, true);
1684 }
1685 
supportedCheck(Context & context,CaseDefinition caseDef)1686 void supportedCheck (Context& context, CaseDefinition caseDef)
1687 {
1688 	if (!subgroups::isSubgroupSupported(context))
1689 		TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
1690 
1691 	if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BASIC_BIT))
1692 		TCU_FAIL("supportedOperations will have the VK_SUBGROUP_FEATURE_BASIC_BIT bit set if any of the physical device's queues support VK_QUEUE_GRAPHICS_BIT or VK_QUEUE_COMPUTE_BIT.");
1693 
1694 	if (caseDef.requiredSubgroupSize)
1695 	{
1696 		context.requireDeviceFunctionality("VK_EXT_subgroup_size_control");
1697 
1698 		const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT&	subgroupSizeControlFeatures		= context.getSubgroupSizeControlFeaturesEXT();
1699 		const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties	= context.getSubgroupSizeControlPropertiesEXT();
1700 
1701 		if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
1702 			TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
1703 
1704 		if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
1705 			TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
1706 
1707 		if ((subgroupSizeControlProperties.requiredSubgroupSizeStages & caseDef.shaderStage) != caseDef.shaderStage)
1708 			TCU_THROW(NotSupportedError, "Required subgroup size is not supported for shader stage");
1709 	}
1710 
1711 	*caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
1712 
1713 	subgroups::supportedCheckShader(context, caseDef.shaderStage);
1714 
1715 	if (OPTYPE_ELECT != caseDef.opType && VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
1716 	{
1717 		if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
1718 		{
1719 			TCU_THROW(NotSupportedError, "Subgroup basic operation non-compute stage test required that ballot operations are supported!");
1720 		}
1721 	}
1722 
1723 	if (isAllRayTracingStages(caseDef.shaderStage))
1724 	{
1725 		context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
1726 	}
1727 }
1728 
noSSBOtest(Context & context,const CaseDefinition caseDef)1729 TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
1730 {
1731 	const deUint32				inputDatasCount	= OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? 3u : 2u;
1732 	vector<subgroups::SSBOData>	inputDatas		(inputDatasCount);
1733 
1734 	inputDatas[0].format = VK_FORMAT_R32_UINT;
1735 	inputDatas[0].layout = subgroups::SSBOData::LayoutStd140;
1736 	inputDatas[0].numElements = SHADER_BUFFER_SIZE/4ull;
1737 	inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
1738 
1739 	inputDatas[1].format = VK_FORMAT_R32_UINT;
1740 	inputDatas[1].layout = subgroups::SSBOData::LayoutStd140;
1741 	inputDatas[1].numElements = 1ull;
1742 	inputDatas[1].initializeType = subgroups::SSBOData::InitializeNonZero;
1743 
1744 	if(OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType )
1745 	{
1746 		inputDatas[2].format = VK_FORMAT_R32_UINT;
1747 		inputDatas[2].layout = subgroups::SSBOData::LayoutPacked;
1748 		inputDatas[2].numElements = SHADER_BUFFER_SIZE;
1749 		inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone;
1750 		inputDatas[2].isImage = true;
1751 	}
1752 
1753 	if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
1754 	{
1755 		if (OPTYPE_ELECT == caseDef.opType)
1756 			return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, DE_NULL, checkVertexPipelineStagesSubgroupElectNoSSBO);
1757 		else
1758 			return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, DE_NULL,
1759 				(OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ?
1760 					checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO :
1761 					checkVertexPipelineStagesSubgroupBarriersNoSSBO
1762 			);
1763 	}
1764 	else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
1765 	{
1766 		return subgroups::makeFragmentFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, DE_NULL,
1767 			(OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ?
1768 				checkFragmentSubgroupBarriersWithImageNoSSBO :
1769 				checkFragmentSubgroupBarriersNoSSBO
1770 		);
1771 	}
1772 	else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
1773 	{
1774 		if (OPTYPE_ELECT == caseDef.opType)
1775 			return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, DE_NULL, checkVertexPipelineStagesSubgroupElectNoSSBO);
1776 		else
1777 			return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, DE_NULL,
1778 				(OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ?
1779 					checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO :
1780 					checkVertexPipelineStagesSubgroupBarriersNoSSBO
1781 			);
1782 	}
1783 
1784 	if (OPTYPE_ELECT == caseDef.opType)
1785 		return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, DE_NULL, checkVertexPipelineStagesSubgroupElectNoSSBO, caseDef.shaderStage);
1786 
1787 	return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, DE_NULL,
1788 		(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage) ?
1789 			((OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ?
1790 				checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO :
1791 				checkVertexPipelineStagesSubgroupBarriersNoSSBO) :
1792 			((OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ?
1793 				checkTessellationEvaluationSubgroupBarriersWithImageNoSSBO :
1794 				checkTessellationEvaluationSubgroupBarriersNoSSBO),
1795 		caseDef.shaderStage);
1796 }
1797 
test(Context & context,const CaseDefinition caseDef)1798 TestStatus test (Context& context, const CaseDefinition caseDef)
1799 {
1800 	if (isAllComputeStages(caseDef.shaderStage))
1801 	{
1802 		const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties	= context.getSubgroupSizeControlPropertiesEXT();
1803 		TestLog&												log								= context.getTestContext().getLog();
1804 
1805 		if (OPTYPE_ELECT == caseDef.opType)
1806 		{
1807 			if (caseDef.requiredSubgroupSize == DE_FALSE)
1808 				return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkComputeSubgroupElect);
1809 
1810 			log << TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
1811 				<< subgroupSizeControlProperties.maxSubgroupSize << "]" << TestLog::EndMessage;
1812 
1813 			// According to the spec, requiredSubgroupSize must be a power-of-two integer.
1814 			for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
1815 				{
1816 					TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0u, DE_NULL, checkComputeSubgroupElect,
1817 																	size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
1818 					if (result.getCode() != QP_TEST_RESULT_PASS)
1819 						{
1820 							log << TestLog::Message << "subgroupSize " << size << " failed" << TestLog::EndMessage;
1821 							return result;
1822 						}
1823 				}
1824 
1825 			return TestStatus::pass("OK");
1826 		}
1827 		else
1828 		{
1829 			const deUint32				inputDatasCount					= 3;
1830 			const subgroups::SSBOData	inputDatas[inputDatasCount]		=
1831 			{
1832 				{
1833 					subgroups::SSBOData::InitializeNone,	//  InputDataInitializeType		initializeType;
1834 					subgroups::SSBOData::LayoutStd430,		//  InputDataLayoutType			layout;
1835 					VK_FORMAT_R32_UINT,						//  vk::VkFormat				format;
1836 					SHADER_BUFFER_SIZE,						//  vk::VkDeviceSize			numElements;
1837 				},
1838 				{
1839 					subgroups::SSBOData::InitializeNonZero,	//  InputDataInitializeType		initializeType;
1840 					subgroups::SSBOData::LayoutStd430,		//  InputDataLayoutType			layout;
1841 					VK_FORMAT_R32_UINT,						//  vk::VkFormat				format;
1842 					1,										//  vk::VkDeviceSize			numElements;
1843 				},
1844 				{
1845 					subgroups::SSBOData::InitializeNone,	//  InputDataInitializeType		initializeType;
1846 					subgroups::SSBOData::LayoutPacked,		//  InputDataLayoutType			layout;
1847 					VK_FORMAT_R32_UINT,						//  vk::VkFormat				format;
1848 					SHADER_BUFFER_SIZE,						//  vk::VkDeviceSize			numElements;
1849 					true,									//  bool						isImage;
1850 				},
1851 			};
1852 
1853 			if (caseDef.requiredSubgroupSize == DE_FALSE)
1854 				return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, DE_NULL, checkComputeSubgroupBarriers);
1855 
1856 			log << TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
1857 				<< subgroupSizeControlProperties.maxSubgroupSize << "]" << TestLog::EndMessage;
1858 
1859 			// According to the spec, requiredSubgroupSize must be a power-of-two integer.
1860 			for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
1861 			{
1862 				TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, DE_NULL, checkComputeSubgroupBarriers,
1863 																	size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
1864 				if (result.getCode() != QP_TEST_RESULT_PASS)
1865 				{
1866 					log << TestLog::Message << "subgroupSize " << size << " failed" << TestLog::EndMessage;
1867 					return result;
1868 				}
1869 			}
1870 
1871 			return TestStatus::pass("OK");
1872 		}
1873 	}
1874 	else if (isAllGraphicsStages(caseDef.shaderStage))
1875 	{
1876 		if (!subgroups::isFragmentSSBOSupportedForDevice(context))
1877 		{
1878 			TCU_THROW(NotSupportedError, "Subgroup basic operation require that the fragment stage be able to write to SSBOs!");
1879 		}
1880 
1881 		const VkShaderStageFlags	stages	= subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
1882 		const VkShaderStageFlags	stagesBits[] =
1883 		{
1884 			VK_SHADER_STAGE_VERTEX_BIT,
1885 			VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
1886 			VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
1887 			VK_SHADER_STAGE_GEOMETRY_BIT,
1888 			VK_SHADER_STAGE_FRAGMENT_BIT,
1889 		};
1890 
1891 		if (OPTYPE_ELECT == caseDef.opType)
1892 		{
1893 			const deUint32		inputCount				= DE_LENGTH_OF_ARRAY(stagesBits);
1894 			subgroups::SSBOData	inputData[inputCount];
1895 
1896 			for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(stagesBits); ++ndx)
1897 			{
1898 				inputData[ndx]	=
1899 				{
1900 					subgroups::SSBOData::InitializeZero,	//  InputDataInitializeType		initializeType;
1901 					subgroups::SSBOData::LayoutStd430,		//  InputDataLayoutType			layout;
1902 					VK_FORMAT_R32_UINT,						//  vk::VkFormat				format;
1903 					1,										//  vk::VkDeviceSize			numElements;
1904 					false,									//  bool						isImage;
1905 					4 + ndx,								//  deUint32					binding;
1906 					stagesBits[ndx],						//  vk::VkShaderStageFlags		stages;
1907 				};
1908 			}
1909 
1910 			return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputData, inputCount, DE_NULL, checkVertexPipelineStagesSubgroupElect, stages);
1911 		}
1912 		else
1913 		{
1914 			const deUint32		inputDatasCount					= DE_LENGTH_OF_ARRAY(stagesBits) * 4u;
1915 			subgroups::SSBOData	inputDatas[inputDatasCount];
1916 
1917 			for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(stagesBits); ++ndx)
1918 			{
1919 				const deUint32 index = ndx*4;
1920 
1921 				inputDatas[index].format				= VK_FORMAT_R32_UINT;
1922 				inputDatas[index].layout				= subgroups::SSBOData::LayoutStd430;
1923 				inputDatas[index].numElements			= SHADER_BUFFER_SIZE;
1924 				inputDatas[index].initializeType		= subgroups::SSBOData::InitializeNonZero;
1925 				inputDatas[index].binding				= index + 4u;
1926 				inputDatas[index].stages				= stagesBits[ndx];
1927 
1928 				inputDatas[index + 1].format			= VK_FORMAT_R32_UINT;
1929 				inputDatas[index + 1].layout			= subgroups::SSBOData::LayoutStd430;
1930 				inputDatas[index + 1].numElements		= 1;
1931 				inputDatas[index + 1].initializeType	= subgroups::SSBOData::InitializeZero;
1932 				inputDatas[index + 1].binding			= index + 5u;
1933 				inputDatas[index + 1].stages			= stagesBits[ndx];
1934 
1935 				inputDatas[index + 2].format			= VK_FORMAT_R32_UINT;
1936 				inputDatas[index + 2].layout			= subgroups::SSBOData::LayoutStd430;
1937 				inputDatas[index + 2].numElements		= 1;
1938 				inputDatas[index + 2].initializeType	= subgroups::SSBOData::InitializeNonZero;
1939 				inputDatas[index + 2].binding			= index + 6u;
1940 				inputDatas[index + 2].stages			= stagesBits[ndx];
1941 
1942 				inputDatas[index + 3].format			= VK_FORMAT_R32_UINT;
1943 				inputDatas[index + 3].layout			= subgroups::SSBOData::LayoutStd430;
1944 				inputDatas[index + 3].numElements		= SHADER_BUFFER_SIZE;
1945 				inputDatas[index + 3].initializeType	= subgroups::SSBOData::InitializeNone;
1946 				inputDatas[index + 3].isImage			= true;
1947 				inputDatas[index + 3].binding			= index + 7u;
1948 				inputDatas[index + 3].stages			= stagesBits[ndx];
1949 			}
1950 
1951 			return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, DE_NULL, checkVertexPipelineStagesSubgroupBarriers, stages);
1952 		}
1953 	}
1954 	else if (isAllRayTracingStages(caseDef.shaderStage))
1955 	{
1956 		const VkShaderStageFlags	stages			= subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
1957 		const VkShaderStageFlags	stagesBits[]	=
1958 		{
1959 			VK_SHADER_STAGE_RAYGEN_BIT_KHR,
1960 			VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
1961 			VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
1962 			VK_SHADER_STAGE_MISS_BIT_KHR,
1963 			VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
1964 			VK_SHADER_STAGE_CALLABLE_BIT_KHR,
1965 		};
1966 		const deUint32				stagesCount		= DE_LENGTH_OF_ARRAY(stagesBits);
1967 
1968 		if (OPTYPE_ELECT == caseDef.opType)
1969 		{
1970 			const deUint32		inputDataCount				= stagesCount;
1971 			subgroups::SSBOData	inputData[inputDataCount];
1972 
1973 			for (deUint32 ndx = 0; ndx < inputDataCount; ++ndx)
1974 			{
1975 				inputData[ndx].format			= VK_FORMAT_R32_UINT;
1976 				inputData[ndx].layout			= subgroups::SSBOData::LayoutStd430;
1977 				inputData[ndx].numElements		= 1;
1978 				inputData[ndx].initializeType	= subgroups::SSBOData::InitializeZero;
1979 				inputData[ndx].binding			= stagesCount + ndx;
1980 				inputData[ndx].stages			= stagesBits[ndx];
1981 			}
1982 
1983 			return subgroups::allRayTracingStages(context, VK_FORMAT_R32_UINT, inputData, inputDataCount, DE_NULL, checkVertexPipelineStagesSubgroupElect, stages);
1984 		}
1985 		else
1986 		{
1987 			const deUint32		datasPerStage				= 4u;
1988 			const deUint32		inputDatasCount				= datasPerStage * stagesCount;
1989 			subgroups::SSBOData	inputDatas[inputDatasCount];
1990 
1991 			for (deUint32 ndx = 0; ndx < stagesCount; ++ndx)
1992 			{
1993 				const deUint32 index = datasPerStage * ndx;
1994 
1995 				for (deUint32 perStageNdx = 0; perStageNdx < datasPerStage; ++perStageNdx)
1996 				{
1997 					inputDatas[index + perStageNdx].format	= VK_FORMAT_R32_UINT;
1998 					inputDatas[index + perStageNdx].layout	= subgroups::SSBOData::LayoutStd430;
1999 					inputDatas[index + perStageNdx].stages	= stagesBits[ndx];
2000 					inputDatas[index + perStageNdx].isImage	= false;
2001 				}
2002 
2003 				inputDatas[index + 0].numElements		= SHADER_BUFFER_SIZE;
2004 				inputDatas[index + 0].initializeType	= subgroups::SSBOData::InitializeNonZero;
2005 				inputDatas[index + 0].binding			= index + stagesCount;
2006 
2007 				inputDatas[index + 1].numElements		= 1;
2008 				inputDatas[index + 1].initializeType	= subgroups::SSBOData::InitializeZero;
2009 				inputDatas[index + 1].binding			= index + stagesCount + 1u;
2010 
2011 				inputDatas[index + 2].numElements		= 1;
2012 				inputDatas[index + 2].initializeType	= subgroups::SSBOData::InitializeNonZero;
2013 				inputDatas[index + 2].binding			= index + stagesCount + 2u;
2014 
2015 				inputDatas[index + 3].numElements		= SHADER_BUFFER_SIZE;
2016 				inputDatas[index + 3].initializeType	= subgroups::SSBOData::InitializeNone;
2017 				inputDatas[index + 3].isImage			= true;
2018 				inputDatas[index + 3].binding			= index + stagesCount + 3u;
2019 			}
2020 
2021 			return subgroups::allRayTracingStages(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, DE_NULL, checkVertexPipelineStagesSubgroupBarriers, stages);
2022 		}
2023 	}
2024 	else
2025 		TCU_THROW(InternalError, "Unknown stage or invalid stage set");
2026 }
2027 }
2028 
2029 namespace vkt
2030 {
2031 namespace subgroups
2032 {
createSubgroupsBasicTests(TestContext & testCtx)2033 TestCaseGroup* createSubgroupsBasicTests (TestContext& testCtx)
2034 {
2035 	de::MovePtr<TestCaseGroup>	group				(new TestCaseGroup(testCtx, "basic", "Subgroup basic category tests"));
2036 	de::MovePtr<TestCaseGroup>	graphicGroup		(new TestCaseGroup(testCtx, "graphics", "Subgroup basic category tests: graphics"));
2037 	de::MovePtr<TestCaseGroup>	computeGroup		(new TestCaseGroup(testCtx, "compute", "Subgroup basic category tests: compute"));
2038 	de::MovePtr<TestCaseGroup>	framebufferGroup	(new TestCaseGroup(testCtx, "framebuffer", "Subgroup basic category tests: framebuffer"));
2039 	de::MovePtr<TestCaseGroup>	raytracingGroup		(new TestCaseGroup(testCtx, "ray_tracing", "Subgroup basic category tests: ray tracing"));
2040 	const VkShaderStageFlags	stages[]			=
2041 	{
2042 		VK_SHADER_STAGE_FRAGMENT_BIT,
2043 		VK_SHADER_STAGE_VERTEX_BIT,
2044 		VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2045 		VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
2046 		VK_SHADER_STAGE_GEOMETRY_BIT,
2047 	};
2048 	const deBool				boolValues[]		=
2049 	{
2050 		DE_FALSE,
2051 		DE_TRUE
2052 	};
2053 
2054 	for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
2055 	{
2056 		const OpType	opType	= static_cast<OpType>(opTypeIndex);
2057 		const string	op		= de::toLower(getOpTypeName(opType));
2058 
2059 		for (size_t groupSizeNdx = 0; groupSizeNdx < DE_LENGTH_OF_ARRAY(boolValues); ++groupSizeNdx)
2060 		{
2061 			const deBool			requiredSubgroupSize	= boolValues[groupSizeNdx];
2062 			const string			testNameSuffix			= requiredSubgroupSize ? "_requiredsubgroupsize" : "";
2063 			const CaseDefinition	caseDef					=
2064 			{
2065 				opType,							//  OpType				opType;
2066 				VK_SHADER_STAGE_COMPUTE_BIT,	//  VkShaderStageFlags	shaderStage;
2067 				de::SharedPtr<bool>(new bool),	//  de::SharedPtr<bool>	geometryPointSizeSupported;
2068 				requiredSubgroupSize			//  deBool				requiredSubgroupSize;
2069 			};
2070 			const string			testName				= op + testNameSuffix;
2071 
2072 			addFunctionCaseWithPrograms(computeGroup.get(), testName, "", supportedCheck, initPrograms, test, caseDef);
2073 		}
2074 
2075 		if (OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED == opType)
2076 		{
2077 			// Shared isn't available in non compute shaders.
2078 			continue;
2079 		}
2080 
2081 		{
2082 			const CaseDefinition caseDef =
2083 			{
2084 				opType,							//  OpType				opType;
2085 				VK_SHADER_STAGE_ALL_GRAPHICS,	//  VkShaderStageFlags	shaderStage;
2086 				de::SharedPtr<bool>(new bool),	//  de::SharedPtr<bool>	geometryPointSizeSupported;
2087 				DE_FALSE						//  deBool				requiredSubgroupSize;
2088 			};
2089 
2090 			addFunctionCaseWithPrograms(graphicGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
2091 		}
2092 
2093 		{
2094 			const CaseDefinition caseDef =
2095 			{
2096 				opType,							//  OpType				opType;
2097 				SHADER_STAGE_ALL_RAY_TRACING,	//  VkShaderStageFlags	shaderStage;
2098 				de::SharedPtr<bool>(new bool),	//  de::SharedPtr<bool>	geometryPointSizeSupported;
2099 				DE_FALSE						//  deBool				requiredSubgroupSize;
2100 			};
2101 
2102 			addFunctionCaseWithPrograms(raytracingGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
2103 		}
2104 
2105 		for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
2106 		{
2107 			if (OPTYPE_ELECT == opType && stages[stageIndex] == VK_SHADER_STAGE_FRAGMENT_BIT)
2108 				continue;		// This is not tested. I don't know why.
2109 
2110 			const CaseDefinition	caseDef		=
2111 			{
2112 				opType,							//  OpType				opType;
2113 				stages[stageIndex],				//  VkShaderStageFlags	shaderStage;
2114 				de::SharedPtr<bool>(new bool),	//  de::SharedPtr<bool>	geometryPointSizeSupported;
2115 				DE_FALSE						//  deBool				requiredSubgroupSize;
2116 			};
2117 			const string			testName	= op + "_" + getShaderStageName(caseDef.shaderStage);
2118 
2119 			addFunctionCaseWithPrograms(framebufferGroup.get(), testName, "", supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
2120 		}
2121 	}
2122 
2123 	group->addChild(graphicGroup.release());
2124 	group->addChild(computeGroup.release());
2125 	group->addChild(framebufferGroup.release());
2126 	group->addChild(raytracingGroup.release());
2127 
2128 	return group.release();
2129 }
2130 
2131 } // subgroups
2132 } // vkt
2133