• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Google Inc.
7  * Copyright (c) 2017 Codeplay Software Ltd.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests Utils
24  */ /*--------------------------------------------------------------------*/
25 
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vkRayTracingUtil.hpp"
28 #include "deFloat16.h"
29 #include "deRandom.hpp"
30 #include "tcuCommandLine.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 
38 using namespace tcu;
39 using namespace std;
40 using namespace vk;
41 using namespace vkt;
42 
43 namespace
44 {
45 
46 enum class ComputeLike { COMPUTE = 0, MESH };
47 
getMaxWidth()48 deUint32 getMaxWidth ()
49 {
50 	return 1024u;
51 }
52 
getNextWidth(const deUint32 width)53 deUint32 getNextWidth (const deUint32 width)
54 {
55 	if (width < 128)
56 	{
57 		// This ensures we test every value up to 128 (the max subgroup size).
58 		return width + 1;
59 	}
60 	else
61 	{
62 		// And once we hit 128 we increment to only power of 2's to reduce testing time.
63 		return width * 2;
64 	}
65 }
66 
getFormatSizeInBytes(const VkFormat format)67 deUint32 getFormatSizeInBytes (const VkFormat format)
68 {
69 	switch (format)
70 	{
71 		default:
72 			DE_FATAL("Unhandled format!");
73 			return 0;
74 		case VK_FORMAT_R8_SINT:
75 		case VK_FORMAT_R8_UINT:
76 			return static_cast<deUint32>(sizeof(deInt8));
77 		case VK_FORMAT_R8G8_SINT:
78 		case VK_FORMAT_R8G8_UINT:
79 			return static_cast<deUint32>(sizeof(deInt8) * 2);
80 		case VK_FORMAT_R8G8B8_SINT:
81 		case VK_FORMAT_R8G8B8_UINT:
82 		case VK_FORMAT_R8G8B8A8_SINT:
83 		case VK_FORMAT_R8G8B8A8_UINT:
84 			return static_cast<deUint32>(sizeof(deInt8) * 4);
85 		case VK_FORMAT_R16_SINT:
86 		case VK_FORMAT_R16_UINT:
87 		case VK_FORMAT_R16_SFLOAT:
88 			return static_cast<deUint32>(sizeof(deInt16));
89 		case VK_FORMAT_R16G16_SINT:
90 		case VK_FORMAT_R16G16_UINT:
91 		case VK_FORMAT_R16G16_SFLOAT:
92 			return static_cast<deUint32>(sizeof(deInt16) * 2);
93 		case VK_FORMAT_R16G16B16_UINT:
94 		case VK_FORMAT_R16G16B16_SINT:
95 		case VK_FORMAT_R16G16B16_SFLOAT:
96 		case VK_FORMAT_R16G16B16A16_SINT:
97 		case VK_FORMAT_R16G16B16A16_UINT:
98 		case VK_FORMAT_R16G16B16A16_SFLOAT:
99 			return static_cast<deUint32>(sizeof(deInt16) * 4);
100 		case VK_FORMAT_R32_SINT:
101 		case VK_FORMAT_R32_UINT:
102 		case VK_FORMAT_R32_SFLOAT:
103 			return static_cast<deUint32>(sizeof(deInt32));
104 		case VK_FORMAT_R32G32_SINT:
105 		case VK_FORMAT_R32G32_UINT:
106 		case VK_FORMAT_R32G32_SFLOAT:
107 			return static_cast<deUint32>(sizeof(deInt32) * 2);
108 		case VK_FORMAT_R32G32B32_SINT:
109 		case VK_FORMAT_R32G32B32_UINT:
110 		case VK_FORMAT_R32G32B32_SFLOAT:
111 		case VK_FORMAT_R32G32B32A32_SINT:
112 		case VK_FORMAT_R32G32B32A32_UINT:
113 		case VK_FORMAT_R32G32B32A32_SFLOAT:
114 			return static_cast<deUint32>(sizeof(deInt32) * 4);
115 		case VK_FORMAT_R64_SINT:
116 		case VK_FORMAT_R64_UINT:
117 		case VK_FORMAT_R64_SFLOAT:
118 			return static_cast<deUint32>(sizeof(deInt64));
119 		case VK_FORMAT_R64G64_SINT:
120 		case VK_FORMAT_R64G64_UINT:
121 		case VK_FORMAT_R64G64_SFLOAT:
122 			return static_cast<deUint32>(sizeof(deInt64) * 2);
123 		case VK_FORMAT_R64G64B64_SINT:
124 		case VK_FORMAT_R64G64B64_UINT:
125 		case VK_FORMAT_R64G64B64_SFLOAT:
126 		case VK_FORMAT_R64G64B64A64_SINT:
127 		case VK_FORMAT_R64G64B64A64_UINT:
128 		case VK_FORMAT_R64G64B64A64_SFLOAT:
129 			return static_cast<deUint32>(sizeof(deInt64) * 4);
130 		// The below formats are used to represent bool and bvec* types. These
131 		// types are passed to the shader as int and ivec* types, before the
132 		// calculations are done as booleans. We need a distinct type here so
133 		// that the shader generators can switch on it and generate the correct
134 		// shader source for testing.
135 		case VK_FORMAT_R8_USCALED:
136 			return static_cast<deUint32>(sizeof(deInt32));
137 		case VK_FORMAT_R8G8_USCALED:
138 			return static_cast<deUint32>(sizeof(deInt32) * 2);
139 		case VK_FORMAT_R8G8B8_USCALED:
140 		case VK_FORMAT_R8G8B8A8_USCALED:
141 			return static_cast<deUint32>(sizeof(deInt32) * 4);
142 	}
143 }
144 
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)145 deUint32 getElementSizeInBytes (const VkFormat									format,
146 								const subgroups::SSBOData::InputDataLayoutType	layout)
147 {
148 	const deUint32 bytes = getFormatSizeInBytes(format);
149 
150 	if (layout == subgroups::SSBOData::LayoutStd140)
151 		return bytes < 16 ? 16 : bytes;
152 	else
153 		return bytes;
154 }
155 
makeRenderPass(Context & context,VkFormat format)156 Move<VkRenderPass> makeRenderPass (Context& context, VkFormat format)
157 {
158 	const VkAttachmentReference		colorReference			=
159 	{
160 		0,
161 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
162 	};
163 	const VkSubpassDescription		subpassDescription		=
164 	{
165 		0u,									//  VkSubpassDescriptionFlags		flags;
166 		VK_PIPELINE_BIND_POINT_GRAPHICS,	//  VkPipelineBindPoint				pipelineBindPoint;
167 		0,									//  deUint32						inputAttachmentCount;
168 		DE_NULL,							//  const VkAttachmentReference*	pInputAttachments;
169 		1,									//  deUint32						colorAttachmentCount;
170 		&colorReference,					//  const VkAttachmentReference*	pColorAttachments;
171 		DE_NULL,							//  const VkAttachmentReference*	pResolveAttachments;
172 		DE_NULL,							//  const VkAttachmentReference*	pDepthStencilAttachment;
173 		0,									//  deUint32						preserveAttachmentCount;
174 		DE_NULL								//  const deUint32*					pPreserveAttachments;
175 	};
176 	const VkSubpassDependency		subpassDependencies[2]	=
177 	{
178 		{
179 			VK_SUBPASS_EXTERNAL,															//  deUint32				srcSubpass;
180 			0u,																				//  deUint32				dstSubpass;
181 			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,											//  VkPipelineStageFlags	srcStageMask;
182 			VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,									//  VkPipelineStageFlags	dstStageMask;
183 			VK_ACCESS_MEMORY_READ_BIT,														//  VkAccessFlags			srcAccessMask;
184 			VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,		//  VkAccessFlags			dstAccessMask;
185 			VK_DEPENDENCY_BY_REGION_BIT														//  VkDependencyFlags		dependencyFlags;
186 		},
187 		{
188 			0u,																				//  deUint32				srcSubpass;
189 			VK_SUBPASS_EXTERNAL,															//  deUint32				dstSubpass;
190 			VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,									//  VkPipelineStageFlags	srcStageMask;
191 			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,											//  VkPipelineStageFlags	dstStageMask;
192 			VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,		//  VkAccessFlags			srcAccessMask;
193 			VK_ACCESS_MEMORY_READ_BIT,														//  VkAccessFlags			dstAccessMask;
194 			VK_DEPENDENCY_BY_REGION_BIT														//  VkDependencyFlags		dependencyFlags;
195 		},
196 	};
197 	const VkAttachmentDescription	attachmentDescription	=
198 	{
199 		0u,											//  VkAttachmentDescriptionFlags	flags;
200 		format,										//  VkFormat						format;
201 		VK_SAMPLE_COUNT_1_BIT,						//  VkSampleCountFlagBits			samples;
202 		VK_ATTACHMENT_LOAD_OP_CLEAR,				//  VkAttachmentLoadOp				loadOp;
203 		VK_ATTACHMENT_STORE_OP_STORE,				//  VkAttachmentStoreOp				storeOp;
204 		VK_ATTACHMENT_LOAD_OP_DONT_CARE,			//  VkAttachmentLoadOp				stencilLoadOp;
205 		VK_ATTACHMENT_STORE_OP_DONT_CARE,			//  VkAttachmentStoreOp				stencilStoreOp;
206 		VK_IMAGE_LAYOUT_UNDEFINED,					//  VkImageLayout					initialLayout;
207 		VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL		//  VkImageLayout					finalLayout;
208 	};
209 	const VkRenderPassCreateInfo	renderPassCreateInfo =
210 	{
211 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,	//  VkStructureType					sType;
212 		DE_NULL,									//  const void*						pNext;
213 		0u,											//  VkRenderPassCreateFlags			flags;
214 		1,											//  deUint32						attachmentCount;
215 		&attachmentDescription,						//  const VkAttachmentDescription*	pAttachments;
216 		1,											//  deUint32						subpassCount;
217 		&subpassDescription,						//  const VkSubpassDescription*		pSubpasses;
218 		2,											//  deUint32						dependencyCount;
219 		subpassDependencies							//  const VkSubpassDependency*		pDependencies;
220 	};
221 
222 	return createRenderPass(context.getDeviceInterface(), context.getDevice(), &renderPassCreateInfo);
223 }
224 
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const deUint32 subpass,const deUint32 patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])225 Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface&							vk,
226 									   const VkDevice									device,
227 									   const VkPipelineLayout							pipelineLayout,
228 									   const VkShaderModule								vertexShaderModule,
229 									   const VkShaderModule								tessellationControlShaderModule,
230 									   const VkShaderModule								tessellationEvalShaderModule,
231 									   const VkShaderModule								geometryShaderModule,
232 									   const VkShaderModule								fragmentShaderModule,
233 									   const VkRenderPass								renderPass,
234 									   const std::vector<VkViewport>&					viewports,
235 									   const std::vector<VkRect2D>&						scissors,
236 									   const VkPrimitiveTopology						topology,
237 									   const deUint32									subpass,
238 									   const deUint32									patchControlPoints,
239 									   const VkPipelineVertexInputStateCreateInfo*		vertexInputStateCreateInfo,
240 									   const VkPipelineRasterizationStateCreateInfo*	rasterizationStateCreateInfo,
241 									   const VkPipelineMultisampleStateCreateInfo*		multisampleStateCreateInfo,
242 									   const VkPipelineDepthStencilStateCreateInfo*		depthStencilStateCreateInfo,
243 									   const VkPipelineColorBlendStateCreateInfo*		colorBlendStateCreateInfo,
244 									   const VkPipelineDynamicStateCreateInfo*			dynamicStateCreateInfo,
245 									   const deUint32									vertexShaderStageCreateFlags,
246 									   const deUint32									tessellationControlShaderStageCreateFlags,
247 									   const deUint32									tessellationEvalShaderStageCreateFlags,
248 									   const deUint32									geometryShaderStageCreateFlags,
249 									   const deUint32									fragmentShaderStageCreateFlags,
250 									   const deUint32									requiredSubgroupSize[5])
251 {
252 	const VkBool32									disableRasterization				= (fragmentShaderModule == DE_NULL);
253 	const bool										hasTessellation						= (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
254 
255 	VkPipelineShaderStageCreateInfo					stageCreateInfo						=
256 	{
257 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType                     sType
258 		DE_NULL,												// const void*                         pNext
259 		0u,														// VkPipelineShaderStageCreateFlags    flags
260 		VK_SHADER_STAGE_VERTEX_BIT,								// VkShaderStageFlagBits               stage
261 		DE_NULL,												// VkShaderModule                      module
262 		"main",													// const char*                         pName
263 		DE_NULL													// const VkSpecializationInfo*         pSpecializationInfo
264 	};
265 
266 	std::vector<VkPipelineShaderStageCreateInfo>	pipelineShaderStageParams;
267 
268 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] =
269 	{
270 		{
271 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
272 			DE_NULL,
273 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
274 		},
275 		{
276 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
277 			DE_NULL,
278 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
279 		},
280 		{
281 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
282 			DE_NULL,
283 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
284 		},
285 		{
286 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
287 			DE_NULL,
288 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
289 		},
290 		{
291 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
292 			DE_NULL,
293 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
294 		},
295 	};
296 
297 	{
298 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[0] : DE_NULL;
299 		stageCreateInfo.flags	= vertexShaderStageCreateFlags;
300 		stageCreateInfo.stage	= VK_SHADER_STAGE_VERTEX_BIT;
301 		stageCreateInfo.module	= vertexShaderModule;
302 		pipelineShaderStageParams.push_back(stageCreateInfo);
303 	}
304 
305 	if (tessellationControlShaderModule != DE_NULL)
306 	{
307 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[1] : DE_NULL;
308 		stageCreateInfo.flags	= tessellationControlShaderStageCreateFlags;
309 		stageCreateInfo.stage	= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
310 		stageCreateInfo.module	= tessellationControlShaderModule;
311 		pipelineShaderStageParams.push_back(stageCreateInfo);
312 	}
313 
314 	if (tessellationEvalShaderModule != DE_NULL)
315 	{
316 		stageCreateInfo.pNext	= (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[2] : DE_NULL;
317 		stageCreateInfo.flags	= tessellationEvalShaderStageCreateFlags;
318 		stageCreateInfo.stage	= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
319 		stageCreateInfo.module	= tessellationEvalShaderModule;
320 		pipelineShaderStageParams.push_back(stageCreateInfo);
321 	}
322 
323 	if (geometryShaderModule != DE_NULL)
324 	{
325 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[3] : DE_NULL;
326 		stageCreateInfo.flags	= geometryShaderStageCreateFlags;
327 		stageCreateInfo.stage	= VK_SHADER_STAGE_GEOMETRY_BIT;
328 		stageCreateInfo.module	= geometryShaderModule;
329 		pipelineShaderStageParams.push_back(stageCreateInfo);
330 	}
331 
332 	if (fragmentShaderModule != DE_NULL)
333 	{
334 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[4] : DE_NULL;
335 		stageCreateInfo.flags	= fragmentShaderStageCreateFlags;
336 		stageCreateInfo.stage	= VK_SHADER_STAGE_FRAGMENT_BIT;
337 		stageCreateInfo.module	= fragmentShaderModule;
338 		pipelineShaderStageParams.push_back(stageCreateInfo);
339 	}
340 
341 	const VkVertexInputBindingDescription			vertexInputBindingDescription		=
342 	{
343 		0u,								// deUint32             binding
344 		sizeof(tcu::Vec4),				// deUint32             stride
345 		VK_VERTEX_INPUT_RATE_VERTEX,	// VkVertexInputRate    inputRate
346 	};
347 
348 	const VkVertexInputAttributeDescription			vertexInputAttributeDescription		=
349 	{
350 		0u,								// deUint32    location
351 		0u,								// deUint32    binding
352 		VK_FORMAT_R32G32B32A32_SFLOAT,	// VkFormat    format
353 		0u								// deUint32    offset
354 	};
355 
356 	const VkPipelineVertexInputStateCreateInfo		vertexInputStateCreateInfoDefault	=
357 	{
358 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType                             sType
359 		DE_NULL,													// const void*                                 pNext
360 		(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags       flags
361 		1u,															// deUint32                                    vertexBindingDescriptionCount
362 		&vertexInputBindingDescription,								// const VkVertexInputBindingDescription*      pVertexBindingDescriptions
363 		1u,															// deUint32                                    vertexAttributeDescriptionCount
364 		&vertexInputAttributeDescription							// const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions
365 	};
366 
367 	const VkPipelineInputAssemblyStateCreateInfo	inputAssemblyStateCreateInfo		=
368 	{
369 		VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType                            sType
370 		DE_NULL,														// const void*                                pNext
371 		0u,																// VkPipelineInputAssemblyStateCreateFlags    flags
372 		topology,														// VkPrimitiveTopology                        topology
373 		VK_FALSE														// VkBool32                                   primitiveRestartEnable
374 	};
375 
376 	const VkPipelineTessellationStateCreateInfo		tessStateCreateInfo					=
377 	{
378 		VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,	// VkStructureType                           sType
379 		DE_NULL,													// const void*                               pNext
380 		0u,															// VkPipelineTessellationStateCreateFlags    flags
381 		patchControlPoints											// deUint32                                  patchControlPoints
382 	};
383 
384 	const VkPipelineViewportStateCreateInfo			viewportStateCreateInfo				=
385 	{
386 		VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,	// VkStructureType                             sType
387 		DE_NULL,												// const void*                                 pNext
388 		(VkPipelineViewportStateCreateFlags)0,					// VkPipelineViewportStateCreateFlags          flags
389 		viewports.empty() ? 1u : (deUint32)viewports.size(),	// deUint32                                    viewportCount
390 		viewports.empty() ? DE_NULL : &viewports[0],			// const VkViewport*                           pViewports
391 		viewports.empty() ? 1u : (deUint32)scissors.size(),		// deUint32                                    scissorCount
392 		scissors.empty() ? DE_NULL : &scissors[0]				// const VkRect2D*                             pScissors
393 	};
394 
395 	const VkPipelineRasterizationStateCreateInfo	rasterizationStateCreateInfoDefault	=
396 	{
397 		VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,	// VkStructureType                            sType
398 		DE_NULL,													// const void*                                pNext
399 		0u,															// VkPipelineRasterizationStateCreateFlags    flags
400 		VK_FALSE,													// VkBool32                                   depthClampEnable
401 		disableRasterization,										// VkBool32                                   rasterizerDiscardEnable
402 		VK_POLYGON_MODE_FILL,										// VkPolygonMode                              polygonMode
403 		VK_CULL_MODE_NONE,											// VkCullModeFlags                            cullMode
404 		VK_FRONT_FACE_COUNTER_CLOCKWISE,							// VkFrontFace                                frontFace
405 		VK_FALSE,													// VkBool32                                   depthBiasEnable
406 		0.0f,														// float                                      depthBiasConstantFactor
407 		0.0f,														// float                                      depthBiasClamp
408 		0.0f,														// float                                      depthBiasSlopeFactor
409 		1.0f														// float                                      lineWidth
410 	};
411 
412 	const VkPipelineMultisampleStateCreateInfo		multisampleStateCreateInfoDefault	=
413 	{
414 		VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType                          sType
415 		DE_NULL,													// const void*                              pNext
416 		0u,															// VkPipelineMultisampleStateCreateFlags    flags
417 		VK_SAMPLE_COUNT_1_BIT,										// VkSampleCountFlagBits                    rasterizationSamples
418 		VK_FALSE,													// VkBool32                                 sampleShadingEnable
419 		1.0f,														// float                                    minSampleShading
420 		DE_NULL,													// const VkSampleMask*                      pSampleMask
421 		VK_FALSE,													// VkBool32                                 alphaToCoverageEnable
422 		VK_FALSE													// VkBool32                                 alphaToOneEnable
423 	};
424 
425 	const VkStencilOpState							stencilOpState						=
426 	{
427 		VK_STENCIL_OP_KEEP,		// VkStencilOp    failOp
428 		VK_STENCIL_OP_KEEP,		// VkStencilOp    passOp
429 		VK_STENCIL_OP_KEEP,		// VkStencilOp    depthFailOp
430 		VK_COMPARE_OP_NEVER,	// VkCompareOp    compareOp
431 		0,						// deUint32       compareMask
432 		0,						// deUint32       writeMask
433 		0						// deUint32       reference
434 	};
435 
436 	const VkPipelineDepthStencilStateCreateInfo		depthStencilStateCreateInfoDefault	=
437 	{
438 		VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,	// VkStructureType                          sType
439 		DE_NULL,													// const void*                              pNext
440 		0u,															// VkPipelineDepthStencilStateCreateFlags   flags
441 		VK_FALSE,													// VkBool32                                 depthTestEnable
442 		VK_FALSE,													// VkBool32                                 depthWriteEnable
443 		VK_COMPARE_OP_LESS_OR_EQUAL,								// VkCompareOp                              depthCompareOp
444 		VK_FALSE,													// VkBool32                                 depthBoundsTestEnable
445 		VK_FALSE,													// VkBool32                                 stencilTestEnable
446 		stencilOpState,												// VkStencilOpState                         front
447 		stencilOpState,												// VkStencilOpState                         back
448 		0.0f,														// float                                    minDepthBounds
449 		1.0f,														// float                                    maxDepthBounds
450 	};
451 
452 	const VkPipelineColorBlendAttachmentState		colorBlendAttachmentState			=
453 	{
454 		VK_FALSE,					// VkBool32                 blendEnable
455 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            srcColorBlendFactor
456 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            dstColorBlendFactor
457 		VK_BLEND_OP_ADD,			// VkBlendOp                colorBlendOp
458 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            srcAlphaBlendFactor
459 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            dstAlphaBlendFactor
460 		VK_BLEND_OP_ADD,			// VkBlendOp                alphaBlendOp
461 		VK_COLOR_COMPONENT_R_BIT	// VkColorComponentFlags    colorWriteMask
462 		| VK_COLOR_COMPONENT_G_BIT
463 		| VK_COLOR_COMPONENT_B_BIT
464 		| VK_COLOR_COMPONENT_A_BIT
465 	};
466 
467 	const VkPipelineColorBlendStateCreateInfo		colorBlendStateCreateInfoDefault	=
468 	{
469 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	// VkStructureType                               sType
470 		DE_NULL,													// const void*                                   pNext
471 		0u,															// VkPipelineColorBlendStateCreateFlags          flags
472 		VK_FALSE,													// VkBool32                                      logicOpEnable
473 		VK_LOGIC_OP_CLEAR,											// VkLogicOp                                     logicOp
474 		1u,															// deUint32                                      attachmentCount
475 		&colorBlendAttachmentState,									// const VkPipelineColorBlendAttachmentState*    pAttachments
476 		{ 0.0f, 0.0f, 0.0f, 0.0f }									// float                                         blendConstants[4]
477 	};
478 
479 	std::vector<VkDynamicState>						dynamicStates;
480 
481 	if (viewports.empty())
482 		dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
483 	if (scissors.empty())
484 		dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
485 
486 	const VkPipelineDynamicStateCreateInfo			dynamicStateCreateInfoDefault		=
487 	{
488 		VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,	// VkStructureType                      sType
489 		DE_NULL,												// const void*                          pNext
490 		0u,														// VkPipelineDynamicStateCreateFlags    flags
491 		(deUint32)dynamicStates.size(),							// deUint32                             dynamicStateCount
492 		dynamicStates.empty() ? DE_NULL : &dynamicStates[0]		// const VkDynamicState*                pDynamicStates
493 	};
494 
495 	const VkPipelineDynamicStateCreateInfo*			dynamicStateCreateInfoDefaultPtr	= dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
496 
497 	const VkGraphicsPipelineCreateInfo				pipelineCreateInfo					=
498 	{
499 		VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,														// VkStructureType                                  sType
500 		DE_NULL,																								// const void*                                      pNext
501 		0u,																										// VkPipelineCreateFlags                            flags
502 		(deUint32)pipelineShaderStageParams.size(),																// deUint32                                         stageCount
503 		&pipelineShaderStageParams[0],																			// const VkPipelineShaderStageCreateInfo*           pStages
504 		vertexInputStateCreateInfo ? vertexInputStateCreateInfo : &vertexInputStateCreateInfoDefault,			// const VkPipelineVertexInputStateCreateInfo*      pVertexInputState
505 		&inputAssemblyStateCreateInfo,																			// const VkPipelineInputAssemblyStateCreateInfo*    pInputAssemblyState
506 		hasTessellation ? &tessStateCreateInfo : DE_NULL,														// const VkPipelineTessellationStateCreateInfo*     pTessellationState
507 		&viewportStateCreateInfo,																				// const VkPipelineViewportStateCreateInfo*         pViewportState
508 		rasterizationStateCreateInfo ? rasterizationStateCreateInfo : &rasterizationStateCreateInfoDefault,		// const VkPipelineRasterizationStateCreateInfo*    pRasterizationState
509 		multisampleStateCreateInfo ? multisampleStateCreateInfo: &multisampleStateCreateInfoDefault,			// const VkPipelineMultisampleStateCreateInfo*      pMultisampleState
510 		depthStencilStateCreateInfo ? depthStencilStateCreateInfo : &depthStencilStateCreateInfoDefault,		// const VkPipelineDepthStencilStateCreateInfo*     pDepthStencilState
511 		colorBlendStateCreateInfo ? colorBlendStateCreateInfo : &colorBlendStateCreateInfoDefault,				// const VkPipelineColorBlendStateCreateInfo*       pColorBlendState
512 		dynamicStateCreateInfo ? dynamicStateCreateInfo : dynamicStateCreateInfoDefaultPtr,						// const VkPipelineDynamicStateCreateInfo*          pDynamicState
513 		pipelineLayout,																							// VkPipelineLayout                                 layout
514 		renderPass,																								// VkRenderPass                                     renderPass
515 		subpass,																								// deUint32                                         subpass
516 		DE_NULL,																								// VkPipeline                                       basePipelineHandle
517 		0																										// deInt32                                          basePipelineIndex;
518 	};
519 
520 	return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
521 }
522 
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const deUint32 vertexShaderStageCreateFlags=0u,const deUint32 tessellationControlShaderStageCreateFlags=0u,const deUint32 tessellationEvalShaderStageCreateFlags=0u,const deUint32 geometryShaderStageCreateFlags=0u,const deUint32 fragmentShaderStageCreateFlags=0u,const deUint32 requiredSubgroupSize[5]=DE_NULL)523 Move<VkPipeline> makeGraphicsPipeline (Context&									context,
524 									   const VkPipelineLayout					pipelineLayout,
525 									   const VkShaderStageFlags					stages,
526 									   const VkShaderModule						vertexShaderModule,
527 									   const VkShaderModule						fragmentShaderModule,
528 									   const VkShaderModule						geometryShaderModule,
529 									   const VkShaderModule						tessellationControlModule,
530 									   const VkShaderModule						tessellationEvaluationModule,
531 									   const VkRenderPass						renderPass,
532 									   const VkPrimitiveTopology				topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
533 									   const VkVertexInputBindingDescription*	vertexInputBindingDescription = DE_NULL,
534 									   const VkVertexInputAttributeDescription*	vertexInputAttributeDescriptions = DE_NULL,
535 									   const bool								frameBufferTests = false,
536 									   const vk::VkFormat						attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
537 									   const deUint32							vertexShaderStageCreateFlags = 0u,
538 									   const deUint32							tessellationControlShaderStageCreateFlags = 0u,
539 									   const deUint32							tessellationEvalShaderStageCreateFlags = 0u,
540 									   const deUint32							geometryShaderStageCreateFlags = 0u,
541 									   const deUint32							fragmentShaderStageCreateFlags = 0u,
542 									   const deUint32							requiredSubgroupSize[5] = DE_NULL)
543 {
544 	const std::vector<VkViewport>				noViewports;
545 	const std::vector<VkRect2D>					noScissors;
546 	const VkPipelineVertexInputStateCreateInfo	vertexInputStateCreateInfo	=
547 	{
548 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType								sType;
549 		DE_NULL,													// const void*									pNext;
550 		0u,															// VkPipelineVertexInputStateCreateFlags		flags;
551 		vertexInputBindingDescription == DE_NULL ? 0u : 1u,			// deUint32										vertexBindingDescriptionCount;
552 		vertexInputBindingDescription,								// const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
553 		vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u,		// deUint32										vertexAttributeDescriptionCount;
554 		vertexInputAttributeDescriptions,							// const VkVertexInputAttributeDescription*		pVertexAttributeDescriptions;
555 	};
556 	const deUint32								numChannels					= getNumUsedChannels(mapVkFormat(attachmentFormat).order);
557 	const VkColorComponentFlags					colorComponent				= numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
558 																			  numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
559 																			  numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
560 																			  VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
561 	const VkPipelineColorBlendAttachmentState	colorBlendAttachmentState	=
562 	{
563 		VK_FALSE,				//  VkBool32				blendEnable;
564 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			srcColorBlendFactor;
565 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			dstColorBlendFactor;
566 		VK_BLEND_OP_ADD,		//  VkBlendOp				colorBlendOp;
567 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			srcAlphaBlendFactor;
568 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			dstAlphaBlendFactor;
569 		VK_BLEND_OP_ADD,		//  VkBlendOp				alphaBlendOp;
570 		colorComponent			//  VkColorComponentFlags	colorWriteMask;
571 	};
572 	const VkPipelineColorBlendStateCreateInfo	colorBlendStateCreateInfo	=
573 	{
574 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	//  VkStructureType								sType;
575 		DE_NULL,													//  const void*									pNext;
576 		0u,															//  VkPipelineColorBlendStateCreateFlags		flags;
577 		VK_FALSE,													//  VkBool32									logicOpEnable;
578 		VK_LOGIC_OP_CLEAR,											//  VkLogicOp									logicOp;
579 		1,															//  deUint32									attachmentCount;
580 		&colorBlendAttachmentState,									//  const VkPipelineColorBlendAttachmentState*	pAttachments;
581 		{ 0.0f, 0.0f, 0.0f, 0.0f }									//  float										blendConstants[4];
582 	};
583 	const deUint32								patchControlPoints			= (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
584 
585 	return makeGraphicsPipeline(context.getDeviceInterface(),	// const DeviceInterface&                        vk
586 								context.getDevice(),			// const VkDevice                                device
587 								pipelineLayout,					// const VkPipelineLayout                        pipelineLayout
588 								vertexShaderModule,				// const VkShaderModule                          vertexShaderModule
589 								tessellationControlModule,		// const VkShaderModule                          tessellationControlShaderModule
590 								tessellationEvaluationModule,	// const VkShaderModule                          tessellationEvalShaderModule
591 								geometryShaderModule,			// const VkShaderModule                          geometryShaderModule
592 								fragmentShaderModule,			// const VkShaderModule                          fragmentShaderModule
593 								renderPass,						// const VkRenderPass                            renderPass
594 								noViewports,					// const std::vector<VkViewport>&                viewports
595 								noScissors,						// const std::vector<VkRect2D>&                  scissors
596 								topology,						// const VkPrimitiveTopology                     topology
597 								0u,								// const deUint32                                subpass
598 								patchControlPoints,				// const deUint32                                patchControlPoints
599 								&vertexInputStateCreateInfo,	// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
600 								DE_NULL,						// const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
601 								DE_NULL,						// const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
602 								DE_NULL,						// const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
603 								&colorBlendStateCreateInfo,		// const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
604 								DE_NULL,						// const VkPipelineDynamicStateCreateInfo*
605 								vertexShaderStageCreateFlags,	// const deUint32								 vertexShaderStageCreateFlags,
606 								tessellationControlShaderStageCreateFlags,	// const deUint32					 tessellationControlShaderStageCreateFlags
607 								tessellationEvalShaderStageCreateFlags,		// const deUint32					 tessellationEvalShaderStageCreateFlags
608 								geometryShaderStageCreateFlags,	// const deUint32								 geometryShaderStageCreateFlags
609 								fragmentShaderStageCreateFlags,	// const deUint32								 fragmentShaderStageCreateFlags
610 								requiredSubgroupSize);			// const deUint32								 requiredSubgroupSize[5]
611 }
612 
makeCommandBuffer(Context & context,const VkCommandPool commandPool)613 Move<VkCommandBuffer> makeCommandBuffer (Context& context, const VkCommandPool commandPool)
614 {
615 	const VkCommandBufferAllocateInfo bufferAllocateParams =
616 	{
617 		VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,	// VkStructureType		sType;
618 		DE_NULL,										// const void*			pNext;
619 		commandPool,									// VkCommandPool		commandPool;
620 		VK_COMMAND_BUFFER_LEVEL_PRIMARY,				// VkCommandBufferLevel	level;
621 		1u,												// deUint32				bufferCount;
622 	};
623 	return allocateCommandBuffer(context.getDeviceInterface(),
624 								 context.getDevice(), &bufferAllocateParams);
625 }
626 
627 struct Buffer;
628 struct Image;
629 
630 struct BufferOrImage
631 {
isImage__anonc13380c00111::BufferOrImage632 	bool isImage() const
633 	{
634 		return m_isImage;
635 	}
636 
getAsBuffer__anonc13380c00111::BufferOrImage637 	Buffer* getAsBuffer()
638 	{
639 		if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
640 		return reinterpret_cast<Buffer* >(this);
641 	}
642 
getAsImage__anonc13380c00111::BufferOrImage643 	Image* getAsImage()
644 	{
645 		if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
646 		return reinterpret_cast<Image*>(this);
647 	}
648 
getType__anonc13380c00111::BufferOrImage649 	virtual VkDescriptorType getType() const
650 	{
651 		if (m_isImage)
652 		{
653 			return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
654 		}
655 		else
656 		{
657 			return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
658 		}
659 	}
660 
getAllocation__anonc13380c00111::BufferOrImage661 	Allocation& getAllocation() const
662 	{
663 		return *m_allocation;
664 	}
665 
~BufferOrImage__anonc13380c00111::BufferOrImage666 	virtual ~BufferOrImage() {}
667 
668 protected:
BufferOrImage__anonc13380c00111::BufferOrImage669 	explicit BufferOrImage(bool image) : m_isImage(image) {}
670 
671 	bool m_isImage;
672 	de::details::MovePtr<Allocation> m_allocation;
673 };
674 
675 struct Buffer : public BufferOrImage
676 {
Buffer__anonc13380c00111::Buffer677 	explicit Buffer (Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage)
678 		: BufferOrImage		(false)
679 		, m_sizeInBytes		(sizeInBytes)
680 		, m_usage			(usage)
681 	{
682 		const DeviceInterface&			vkd					= context.getDeviceInterface();
683 		const VkDevice					device				= context.getDevice();
684 
685 		const vk::VkBufferCreateInfo	bufferCreateInfo	=
686 		{
687 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
688 			DE_NULL,
689 			0u,
690 			m_sizeInBytes,
691 			m_usage,
692 			VK_SHARING_MODE_EXCLUSIVE,
693 			0u,
694 			DE_NULL,
695 		};
696 		m_buffer		= createBuffer(vkd, device, &bufferCreateInfo);
697 
698 		VkMemoryRequirements			req					= getBufferMemoryRequirements(vkd, device, *m_buffer);
699 
700 		m_allocation	= context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
701 		VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
702 	}
703 
getType__anonc13380c00111::Buffer704 	virtual VkDescriptorType getType() const
705 	{
706 		if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
707 		{
708 			return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
709 		}
710 		return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
711 	}
712 
getBuffer__anonc13380c00111::Buffer713 	VkBuffer getBuffer () const
714 	{
715 		return *m_buffer;
716 	}
717 
getBufferPtr__anonc13380c00111::Buffer718 	const VkBuffer* getBufferPtr () const
719 	{
720 		return &(*m_buffer);
721 	}
722 
getSize__anonc13380c00111::Buffer723 	VkDeviceSize getSize () const
724 	{
725 		return m_sizeInBytes;
726 	}
727 
728 private:
729 	Move<VkBuffer>				m_buffer;
730 	VkDeviceSize				m_sizeInBytes;
731 	const VkBufferUsageFlags	m_usage;
732 };
733 
734 struct Image : public BufferOrImage
735 {
Image__anonc13380c00111::Image736 	explicit Image (Context& context, deUint32 width, deUint32 height, VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
737 		: BufferOrImage(true)
738 	{
739 		const DeviceInterface&			vk					= context.getDeviceInterface();
740 		const VkDevice					device				= context.getDevice();
741 		const deUint32					queueFamilyIndex	= context.getUniversalQueueFamilyIndex();
742 
743 		const VkImageCreateInfo			imageCreateInfo		=
744 		{
745 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//  VkStructureType			sType;
746 			DE_NULL,								//  const void*				pNext;
747 			0,										//  VkImageCreateFlags		flags;
748 			VK_IMAGE_TYPE_2D,						//  VkImageType				imageType;
749 			format,									//  VkFormat				format;
750 			{width, height, 1},						//  VkExtent3D				extent;
751 			1,										//  deUint32				mipLevels;
752 			1,										//  deUint32				arrayLayers;
753 			VK_SAMPLE_COUNT_1_BIT,					//  VkSampleCountFlagBits	samples;
754 			VK_IMAGE_TILING_OPTIMAL,				//  VkImageTiling			tiling;
755 			usage,									//  VkImageUsageFlags		usage;
756 			VK_SHARING_MODE_EXCLUSIVE,				//  VkSharingMode			sharingMode;
757 			0u,										//  deUint32				queueFamilyIndexCount;
758 			DE_NULL,								//  const deUint32*			pQueueFamilyIndices;
759 			VK_IMAGE_LAYOUT_UNDEFINED				//  VkImageLayout			initialLayout;
760 		};
761 
762 		const VkComponentMapping		componentMapping	=
763 		{
764 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
765 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
766 		};
767 
768 		const VkImageSubresourceRange	subresourceRange	=
769 		{
770 			VK_IMAGE_ASPECT_COLOR_BIT,	//VkImageAspectFlags	aspectMask
771 			0u,							//deUint32				baseMipLevel
772 			1u,							//deUint32				levelCount
773 			0u,							//deUint32				baseArrayLayer
774 			1u							//deUint32				layerCount
775 		};
776 
777 		const VkSamplerCreateInfo		samplerCreateInfo	=
778 		{
779 			VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,		//  VkStructureType			sType;
780 			DE_NULL,									//  const void*				pNext;
781 			0u,											//  VkSamplerCreateFlags	flags;
782 			VK_FILTER_NEAREST,							//  VkFilter				magFilter;
783 			VK_FILTER_NEAREST,							//  VkFilter				minFilter;
784 			VK_SAMPLER_MIPMAP_MODE_NEAREST,				//  VkSamplerMipmapMode		mipmapMode;
785 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		//  VkSamplerAddressMode	addressModeU;
786 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		//  VkSamplerAddressMode	addressModeV;
787 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		//  VkSamplerAddressMode	addressModeW;
788 			0.0f,										//  float					mipLodBias;
789 			VK_FALSE,									//  VkBool32				anisotropyEnable;
790 			1.0f,										//  float					maxAnisotropy;
791 			DE_FALSE,									//  VkBool32				compareEnable;
792 			VK_COMPARE_OP_ALWAYS,						//  VkCompareOp				compareOp;
793 			0.0f,										//  float					minLod;
794 			0.0f,										//  float					maxLod;
795 			VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,	//  VkBorderColor			borderColor;
796 			VK_FALSE,									//  VkBool32				unnormalizedCoordinates;
797 		};
798 
799 		m_image			= createImage(vk, device, &imageCreateInfo);
800 
801 		VkMemoryRequirements			req					= getImageMemoryRequirements(vk, device, *m_image);
802 
803 		req.size		*= 2;
804 		m_allocation	= context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
805 
806 		VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
807 
808 		const VkImageViewCreateInfo		imageViewCreateInfo	=
809 		{
810 			VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,	//  VkStructureType			sType;
811 			DE_NULL,									//  const void*				pNext;
812 			0,											//  VkImageViewCreateFlags	flags;
813 			*m_image,									//  VkImage					image;
814 			VK_IMAGE_VIEW_TYPE_2D,						//  VkImageViewType			viewType;
815 			imageCreateInfo.format,						//  VkFormat				format;
816 			componentMapping,							//  VkComponentMapping		components;
817 			subresourceRange							//  VkImageSubresourceRange	subresourceRange;
818 		};
819 
820 		m_imageView		= createImageView(vk, device, &imageViewCreateInfo);
821 		m_sampler		= createSampler(vk, device, &samplerCreateInfo);
822 
823 		// Transition input image layouts
824 		{
825 			const Unique<VkCommandPool>		cmdPool			(makeCommandPool(vk, device, queueFamilyIndex));
826 			const Unique<VkCommandBuffer>	cmdBuffer		(makeCommandBuffer(context, *cmdPool));
827 
828 			beginCommandBuffer(vk, *cmdBuffer);
829 
830 			const VkImageMemoryBarrier		imageBarrier	= makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
831 																	VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
832 
833 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
834 				(VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
835 
836 			endCommandBuffer(vk, *cmdBuffer);
837 			submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
838 		}
839 	}
840 
getImage__anonc13380c00111::Image841 	VkImage getImage () const
842 	{
843 		return *m_image;
844 	}
845 
getImageView__anonc13380c00111::Image846 	VkImageView getImageView () const
847 	{
848 		return *m_imageView;
849 	}
850 
getSampler__anonc13380c00111::Image851 	VkSampler getSampler () const
852 	{
853 		return *m_sampler;
854 	}
855 
856 private:
857 	Move<VkImage>		m_image;
858 	Move<VkImageView>	m_imageView;
859 	Move<VkSampler>		m_sampler;
860 };
861 }
862 
getStagesCount(const VkShaderStageFlags shaderStages)863 deUint32 vkt::subgroups::getStagesCount (const VkShaderStageFlags shaderStages)
864 {
865 	const deUint32	stageCount	= isAllGraphicsStages(shaderStages)    ? 4
866 								: isAllComputeStages(shaderStages)     ? 1
867 #ifndef CTS_USES_VULKANSC
868 								: isAllRayTracingStages(shaderStages)  ? 6
869 								: isAllMeshShadingStages(shaderStages) ? 1
870 #endif // CTS_USES_VULKANSC
871 								: 0;
872 
873 	DE_ASSERT(stageCount != 0);
874 
875 	return stageCount;
876 }
877 
getSharedMemoryBallotHelper()878 std::string vkt::subgroups::getSharedMemoryBallotHelper ()
879 {
880 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
881 			"uvec4 sharedMemoryBallot(bool vote)\n"
882 			"{\n"
883 			"  uint groupOffset = gl_SubgroupID;\n"
884 			"  // One invocation in the group 0's the whole group's data\n"
885 			"  if (subgroupElect())\n"
886 			"  {\n"
887 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
888 			"  }\n"
889 			"  subgroupMemoryBarrierShared();\n"
890 			"  if (vote)\n"
891 			"  {\n"
892 			"    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
893 			"    const highp uint bitToSet = 1u << invocationId;\n"
894 			"    switch (gl_SubgroupInvocationID / 32)\n"
895 			"    {\n"
896 			"    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
897 			"    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
898 			"    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
899 			"    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
900 			"    }\n"
901 			"  }\n"
902 			"  subgroupMemoryBarrierShared();\n"
903 			"  return superSecretComputeShaderHelper[groupOffset];\n"
904 			"}\n";
905 }
906 
getSharedMemoryBallotHelperARB()907 std::string vkt::subgroups::getSharedMemoryBallotHelperARB ()
908 {
909 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
910 			"uint64_t sharedMemoryBallot(bool vote)\n"
911 			"{\n"
912 			"  uint groupOffset = gl_SubgroupID;\n"
913 			"  // One invocation in the group 0's the whole group's data\n"
914 			"  if (subgroupElect())\n"
915 			"  {\n"
916 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
917 			"  }\n"
918 			"  subgroupMemoryBarrierShared();\n"
919 			"  if (vote)\n"
920 			"  {\n"
921 			"    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
922 			"    const highp uint bitToSet = 1u << invocationId;\n"
923 			"    switch (gl_SubgroupInvocationID / 32)\n"
924 			"    {\n"
925 			"    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
926 			"    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
927 			"    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
928 			"    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
929 			"    }\n"
930 			"  }\n"
931 			"  subgroupMemoryBarrierShared();\n"
932 			"  return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
933 			"}\n";
934 }
935 
getSubgroupSize(Context & context)936 deUint32 vkt::subgroups::getSubgroupSize (Context& context)
937 {
938 	return context.getSubgroupProperties().subgroupSize;
939 }
940 
maxSupportedSubgroupSize()941 deUint32 vkt::subgroups::maxSupportedSubgroupSize ()
942 {
943 	return 128u;
944 }
945 
getShaderStageName(VkShaderStageFlags stage)946 std::string vkt::subgroups::getShaderStageName (VkShaderStageFlags stage)
947 {
948 	switch (stage)
949 	{
950 		case VK_SHADER_STAGE_COMPUTE_BIT:					return "compute";
951 		case VK_SHADER_STAGE_FRAGMENT_BIT:					return "fragment";
952 		case VK_SHADER_STAGE_VERTEX_BIT:					return "vertex";
953 		case VK_SHADER_STAGE_GEOMETRY_BIT:					return "geometry";
954 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:		return "tess_control";
955 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:	return "tess_eval";
956 #ifndef CTS_USES_VULKANSC
957 		case VK_SHADER_STAGE_RAYGEN_BIT_KHR:				return "rgen";
958 		case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:				return "ahit";
959 		case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:			return "chit";
960 		case VK_SHADER_STAGE_MISS_BIT_KHR:					return "miss";
961 		case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:			return "sect";
962 		case VK_SHADER_STAGE_CALLABLE_BIT_KHR:				return "call";
963 		case VK_SHADER_STAGE_MESH_BIT_EXT:					return "mesh";
964 		case VK_SHADER_STAGE_TASK_BIT_EXT:					return "task";
965 #endif // CTS_USES_VULKANSC
966 		default:											TCU_THROW(InternalError, "Unhandled stage");
967 	}
968 }
969 
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)970 std::string vkt::subgroups::getSubgroupFeatureName (vk::VkSubgroupFeatureFlagBits bit)
971 {
972 	switch (bit)
973 	{
974 		case VK_SUBGROUP_FEATURE_BASIC_BIT:				return "VK_SUBGROUP_FEATURE_BASIC_BIT";
975 		case VK_SUBGROUP_FEATURE_VOTE_BIT:				return "VK_SUBGROUP_FEATURE_VOTE_BIT";
976 		case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:		return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
977 		case VK_SUBGROUP_FEATURE_BALLOT_BIT:			return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
978 		case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:			return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
979 		case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:	return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
980 		case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:			return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
981 		case VK_SUBGROUP_FEATURE_QUAD_BIT:				return "VK_SUBGROUP_FEATURE_QUAD_BIT";
982 		default:										TCU_THROW(InternalError, "Unknown subgroup feature category");
983 	}
984 }
985 
addNoSubgroupShader(SourceCollections & programCollection)986 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
987 {
988 	{
989 	/*
990 		"#version 450\n"
991 		"void main (void)\n"
992 		"{\n"
993 		"  float pixelSize = 2.0f/1024.0f;\n"
994 		"   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
995 		"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
996 		"  gl_PointSize = 1.0f;\n"
997 		"}\n"
998 	*/
999 		const std::string vertNoSubgroup =
1000 			"; SPIR-V\n"
1001 			"; Version: 1.3\n"
1002 			"; Generator: Khronos Glslang Reference Front End; 1\n"
1003 			"; Bound: 37\n"
1004 			"; Schema: 0\n"
1005 			"OpCapability Shader\n"
1006 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1007 			"OpMemoryModel Logical GLSL450\n"
1008 			"OpEntryPoint Vertex %4 \"main\" %22 %26\n"
1009 			"OpMemberDecorate %20 0 BuiltIn Position\n"
1010 			"OpMemberDecorate %20 1 BuiltIn PointSize\n"
1011 			"OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
1012 			"OpMemberDecorate %20 3 BuiltIn CullDistance\n"
1013 			"OpDecorate %20 Block\n"
1014 			"OpDecorate %26 BuiltIn VertexIndex\n"
1015 			"%2 = OpTypeVoid\n"
1016 			"%3 = OpTypeFunction %2\n"
1017 			"%6 = OpTypeFloat 32\n"
1018 			"%7 = OpTypePointer Function %6\n"
1019 			"%9 = OpConstant %6 0.00195313\n"
1020 			"%12 = OpConstant %6 2\n"
1021 			"%14 = OpConstant %6 1\n"
1022 			"%16 = OpTypeVector %6 4\n"
1023 			"%17 = OpTypeInt 32 0\n"
1024 			"%18 = OpConstant %17 1\n"
1025 			"%19 = OpTypeArray %6 %18\n"
1026 			"%20 = OpTypeStruct %16 %6 %19 %19\n"
1027 			"%21 = OpTypePointer Output %20\n"
1028 			"%22 = OpVariable %21 Output\n"
1029 			"%23 = OpTypeInt 32 1\n"
1030 			"%24 = OpConstant %23 0\n"
1031 			"%25 = OpTypePointer Input %23\n"
1032 			"%26 = OpVariable %25 Input\n"
1033 			"%33 = OpConstant %6 0\n"
1034 			"%35 = OpTypePointer Output %16\n"
1035 			"%37 = OpConstant %23 1\n"
1036 			"%38 = OpTypePointer Output %6\n"
1037 			"%4 = OpFunction %2 None %3\n"
1038 			"%5 = OpLabel\n"
1039 			"%8 = OpVariable %7 Function\n"
1040 			"%10 = OpVariable %7 Function\n"
1041 			"OpStore %8 %9\n"
1042 			"%11 = OpLoad %6 %8\n"
1043 			"%13 = OpFDiv %6 %11 %12\n"
1044 			"%15 = OpFSub %6 %13 %14\n"
1045 			"OpStore %10 %15\n"
1046 			"%27 = OpLoad %23 %26\n"
1047 			"%28 = OpConvertSToF %6 %27\n"
1048 			"%29 = OpLoad %6 %8\n"
1049 			"%30 = OpFMul %6 %28 %29\n"
1050 			"%31 = OpLoad %6 %10\n"
1051 			"%32 = OpFAdd %6 %30 %31\n"
1052 			"%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1053 			"%36 = OpAccessChain %35 %22 %24\n"
1054 			"OpStore %36 %34\n"
1055 			"%39 = OpAccessChain %38 %22 %37\n"
1056 			"OpStore %39 %14\n"
1057 			"OpReturn\n"
1058 			"OpFunctionEnd\n";
1059 		programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1060 	}
1061 
1062 	{
1063 	/*
1064 		"#version 450\n"
1065 		"layout(vertices=1) out;\n"
1066 		"\n"
1067 		"void main (void)\n"
1068 		"{\n"
1069 		"  if (gl_InvocationID == 0)\n"
1070 		"  {\n"
1071 		"    gl_TessLevelOuter[0] = 1.0f;\n"
1072 		"    gl_TessLevelOuter[1] = 1.0f;\n"
1073 		"  }\n"
1074 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1075 		"}\n"
1076 	*/
1077 		const std::string tescNoSubgroup =
1078 			"; SPIR-V\n"
1079 			"; Version: 1.3\n"
1080 			"; Generator: Khronos Glslang Reference Front End; 1\n"
1081 			"; Bound: 45\n"
1082 			"; Schema: 0\n"
1083 			"OpCapability Tessellation\n"
1084 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1085 			"OpMemoryModel Logical GLSL450\n"
1086 			"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1087 			"OpExecutionMode %4 OutputVertices 1\n"
1088 			"OpDecorate %8 BuiltIn InvocationId\n"
1089 			"OpDecorate %20 Patch\n"
1090 			"OpDecorate %20 BuiltIn TessLevelOuter\n"
1091 			"OpMemberDecorate %29 0 BuiltIn Position\n"
1092 			"OpMemberDecorate %29 1 BuiltIn PointSize\n"
1093 			"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1094 			"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1095 			"OpDecorate %29 Block\n"
1096 			"OpMemberDecorate %34 0 BuiltIn Position\n"
1097 			"OpMemberDecorate %34 1 BuiltIn PointSize\n"
1098 			"OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1099 			"OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1100 			"OpDecorate %34 Block\n"
1101 			"%2 = OpTypeVoid\n"
1102 			"%3 = OpTypeFunction %2\n"
1103 			"%6 = OpTypeInt 32 1\n"
1104 			"%7 = OpTypePointer Input %6\n"
1105 			"%8 = OpVariable %7 Input\n"
1106 			"%10 = OpConstant %6 0\n"
1107 			"%11 = OpTypeBool\n"
1108 			"%15 = OpTypeFloat 32\n"
1109 			"%16 = OpTypeInt 32 0\n"
1110 			"%17 = OpConstant %16 4\n"
1111 			"%18 = OpTypeArray %15 %17\n"
1112 			"%19 = OpTypePointer Output %18\n"
1113 			"%20 = OpVariable %19 Output\n"
1114 			"%21 = OpConstant %15 1\n"
1115 			"%22 = OpTypePointer Output %15\n"
1116 			"%24 = OpConstant %6 1\n"
1117 			"%26 = OpTypeVector %15 4\n"
1118 			"%27 = OpConstant %16 1\n"
1119 			"%28 = OpTypeArray %15 %27\n"
1120 			"%29 = OpTypeStruct %26 %15 %28 %28\n"
1121 			"%30 = OpTypeArray %29 %27\n"
1122 			"%31 = OpTypePointer Output %30\n"
1123 			"%32 = OpVariable %31 Output\n"
1124 			"%34 = OpTypeStruct %26 %15 %28 %28\n"
1125 			"%35 = OpConstant %16 32\n"
1126 			"%36 = OpTypeArray %34 %35\n"
1127 			"%37 = OpTypePointer Input %36\n"
1128 			"%38 = OpVariable %37 Input\n"
1129 			"%40 = OpTypePointer Input %26\n"
1130 			"%43 = OpTypePointer Output %26\n"
1131 			"%4 = OpFunction %2 None %3\n"
1132 			"%5 = OpLabel\n"
1133 			"%9 = OpLoad %6 %8\n"
1134 			"%12 = OpIEqual %11 %9 %10\n"
1135 			"OpSelectionMerge %14 None\n"
1136 			"OpBranchConditional %12 %13 %14\n"
1137 			"%13 = OpLabel\n"
1138 			"%23 = OpAccessChain %22 %20 %10\n"
1139 			"OpStore %23 %21\n"
1140 			"%25 = OpAccessChain %22 %20 %24\n"
1141 			"OpStore %25 %21\n"
1142 			"OpBranch %14\n"
1143 			"%14 = OpLabel\n"
1144 			"%33 = OpLoad %6 %8\n"
1145 			"%39 = OpLoad %6 %8\n"
1146 			"%41 = OpAccessChain %40 %38 %39 %10\n"
1147 			"%42 = OpLoad %26 %41\n"
1148 			"%44 = OpAccessChain %43 %32 %33 %10\n"
1149 			"OpStore %44 %42\n"
1150 			"OpReturn\n"
1151 			"OpFunctionEnd\n";
1152 		programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1153 	}
1154 
1155 	{
1156 	/*
1157 		"#version 450\n"
1158 		"layout(isolines) in;\n"
1159 		"\n"
1160 		"void main (void)\n"
1161 		"{\n"
1162 		"  float pixelSize = 2.0f/1024.0f;\n"
1163 		"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1164 		"}\n";
1165 	*/
1166 		const std::string teseNoSubgroup =
1167 			"; SPIR-V\n"
1168 			"; Version: 1.3\n"
1169 			"; Generator: Khronos Glslang Reference Front End; 2\n"
1170 			"; Bound: 42\n"
1171 			"; Schema: 0\n"
1172 			"OpCapability Tessellation\n"
1173 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1174 			"OpMemoryModel Logical GLSL450\n"
1175 			"OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1176 			"OpExecutionMode %4 Isolines\n"
1177 			"OpExecutionMode %4 SpacingEqual\n"
1178 			"OpExecutionMode %4 VertexOrderCcw\n"
1179 			"OpMemberDecorate %14 0 BuiltIn Position\n"
1180 			"OpMemberDecorate %14 1 BuiltIn PointSize\n"
1181 			"OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1182 			"OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1183 			"OpDecorate %14 Block\n"
1184 			"OpMemberDecorate %19 0 BuiltIn Position\n"
1185 			"OpMemberDecorate %19 1 BuiltIn PointSize\n"
1186 			"OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1187 			"OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1188 			"OpDecorate %19 Block\n"
1189 			"OpDecorate %29 BuiltIn TessCoord\n"
1190 			"%2 = OpTypeVoid\n"
1191 			"%3 = OpTypeFunction %2\n"
1192 			"%6 = OpTypeFloat 32\n"
1193 			"%7 = OpTypePointer Function %6\n"
1194 			"%9 = OpConstant %6 0.00195313\n"
1195 			"%10 = OpTypeVector %6 4\n"
1196 			"%11 = OpTypeInt 32 0\n"
1197 			"%12 = OpConstant %11 1\n"
1198 			"%13 = OpTypeArray %6 %12\n"
1199 			"%14 = OpTypeStruct %10 %6 %13 %13\n"
1200 			"%15 = OpTypePointer Output %14\n"
1201 			"%16 = OpVariable %15 Output\n"
1202 			"%17 = OpTypeInt 32 1\n"
1203 			"%18 = OpConstant %17 0\n"
1204 			"%19 = OpTypeStruct %10 %6 %13 %13\n"
1205 			"%20 = OpConstant %11 32\n"
1206 			"%21 = OpTypeArray %19 %20\n"
1207 			"%22 = OpTypePointer Input %21\n"
1208 			"%23 = OpVariable %22 Input\n"
1209 			"%24 = OpTypePointer Input %10\n"
1210 			"%27 = OpTypeVector %6 3\n"
1211 			"%28 = OpTypePointer Input %27\n"
1212 			"%29 = OpVariable %28 Input\n"
1213 			"%30 = OpConstant %11 0\n"
1214 			"%31 = OpTypePointer Input %6\n"
1215 			"%36 = OpConstant %6 2\n"
1216 			"%40 = OpTypePointer Output %10\n"
1217 			"%4 = OpFunction %2 None %3\n"
1218 			"%5 = OpLabel\n"
1219 			"%8 = OpVariable %7 Function\n"
1220 			"OpStore %8 %9\n"
1221 			"%25 = OpAccessChain %24 %23 %18 %18\n"
1222 			"%26 = OpLoad %10 %25\n"
1223 			"%32 = OpAccessChain %31 %29 %30\n"
1224 			"%33 = OpLoad %6 %32\n"
1225 			"%34 = OpLoad %6 %8\n"
1226 			"%35 = OpFMul %6 %33 %34\n"
1227 			"%37 = OpFDiv %6 %35 %36\n"
1228 			"%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1229 			"%39 = OpFAdd %10 %26 %38\n"
1230 			"%41 = OpAccessChain %40 %16 %18\n"
1231 			"OpStore %41 %39\n"
1232 			"OpReturn\n"
1233 			"OpFunctionEnd\n";
1234 		programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1235 	}
1236 
1237 }
1238 
getFramebufferBufferDeclarations(const VkFormat & format,const std::vector<std::string> & declarations,const deUint32 stage)1239 static std::string getFramebufferBufferDeclarations (const VkFormat&					format,
1240 													 const std::vector<std::string>&	declarations,
1241 													 const deUint32						stage)
1242 {
1243 	if (declarations.empty())
1244 	{
1245 		const std::string	name	= (stage == 0) ? "result" : "out_color";
1246 		const std::string	suffix	= (stage == 2) ? "[]" : "";
1247 		const std::string	result	=
1248 			"layout(location = 0) out float " + name + suffix + ";\n"
1249 			"layout(set = 0, binding = 0) uniform Buffer1\n"
1250 			"{\n"
1251 			"  " + de::toString(subgroups::getFormatNameForGLSL(format)) + " data[" + de::toString(subgroups::maxSupportedSubgroupSize()) + "];\n"
1252 			"};\n";
1253 
1254 		return result;
1255 	}
1256 	else
1257 	{
1258 		return declarations[stage];
1259 	}
1260 }
1261 
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations)1262 void vkt::subgroups::initStdFrameBufferPrograms (SourceCollections&					programCollection,
1263 												 const vk::ShaderBuildOptions&		buildOptions,
1264 												 VkShaderStageFlags					shaderStage,
1265 												 VkFormat							format,
1266 												 bool								gsPointSize,
1267 												 const std::string&					extHeader,
1268 												 const std::string&					testSrc,
1269 												 const std::string&					helperStr,
1270 												 const std::vector<std::string>&	declarations)
1271 {
1272 	subgroups::setFragmentShaderFrameBuffer(programCollection);
1273 
1274 	if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1275 		subgroups::setVertexShaderFrameBuffer(programCollection);
1276 
1277 	if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1278 	{
1279 		std::ostringstream vertex;
1280 
1281 		vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1282 			<< extHeader
1283 			<< "layout(location = 0) in highp vec4 in_position;\n"
1284 			<< getFramebufferBufferDeclarations(format, declarations, 0)
1285 			<< "\n"
1286 			<< helperStr
1287 			<< "void main (void)\n"
1288 			<< "{\n"
1289 			<< "  uint tempRes;\n"
1290 			<< testSrc
1291 			<< "  result = float(tempRes);\n"
1292 			<< "  gl_Position = in_position;\n"
1293 			<< "  gl_PointSize = 1.0f;\n"
1294 			<< "}\n";
1295 
1296 		programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1297 	}
1298 	else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1299 	{
1300 		std::ostringstream geometry;
1301 
1302 		geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1303 			<< extHeader
1304 			<< "layout(points) in;\n"
1305 			<< "layout(points, max_vertices = 1) out;\n"
1306 			<< getFramebufferBufferDeclarations(format, declarations, 1)
1307 			<< "\n"
1308 			<< helperStr
1309 			<< "void main (void)\n"
1310 			<< "{\n"
1311 			<< "  uint tempRes;\n"
1312 			<< testSrc
1313 			<< "  out_color = float(tempRes);\n"
1314 			<< "  gl_Position = gl_in[0].gl_Position;\n"
1315 			<< (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1316 			<< "  EmitVertex();\n"
1317 			<< "  EndPrimitive();\n"
1318 			<< "}\n";
1319 
1320 		programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1321 	}
1322 	else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1323 	{
1324 		std::ostringstream controlSource;
1325 
1326 		controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1327 			<< extHeader
1328 			<< "layout(vertices = 2) out;\n"
1329 			<< getFramebufferBufferDeclarations(format, declarations, 2)
1330 			<< "\n"
1331 			<< helperStr
1332 			<< "void main (void)\n"
1333 			<< "{\n"
1334 			<< "  if (gl_InvocationID == 0)\n"
1335 			<< "  {\n"
1336 			<< "    gl_TessLevelOuter[0] = 1.0f;\n"
1337 			<< "    gl_TessLevelOuter[1] = 1.0f;\n"
1338 			<< "  }\n"
1339 			<< "  uint tempRes;\n"
1340 			<< testSrc
1341 			<< "  out_color[gl_InvocationID] = float(tempRes);\n"
1342 			<< "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1343 			<< (gsPointSize ? "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "")
1344 			<< "}\n";
1345 
1346 		programCollection.glslSources.add("tesc") << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1347 		subgroups::setTesEvalShaderFrameBuffer(programCollection);
1348 	}
1349 	else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1350 	{
1351 		ostringstream evaluationSource;
1352 
1353 		evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1354 			<< extHeader
1355 			<< "layout(isolines, equal_spacing, ccw ) in;\n"
1356 			<< getFramebufferBufferDeclarations(format, declarations, 3)
1357 			<< "\n"
1358 			<< helperStr
1359 			<< "void main (void)\n"
1360 			<< "{\n"
1361 			<< "  uint tempRes;\n"
1362 			<< testSrc
1363 			<< "  out_color = float(tempRes);\n"
1364 			<< "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1365 			<< (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1366 			<< "}\n";
1367 
1368 		subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1369 		programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1370 	}
1371 	else
1372 	{
1373 		DE_FATAL("Unsupported shader stage");
1374 	}
1375 }
1376 
getBufferDeclarations(vk::VkShaderStageFlags shaderStage,const std::string & formatName,const std::vector<std::string> & declarations,const deUint32 stage)1377 static std::string getBufferDeclarations (vk::VkShaderStageFlags			shaderStage,
1378 										  const std::string&				formatName,
1379 										  const std::vector<std::string>&	declarations,
1380 										  const deUint32					stage)
1381 {
1382 	if (declarations.empty())
1383 	{
1384 		const deUint32	stageCount	= vkt::subgroups::getStagesCount(shaderStage);
1385 		const deUint32	binding0	= stage;
1386 		const deUint32	binding1	= stageCount;
1387 		const bool		fragment	= (shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) && (stage == stageCount);
1388 		const string	buffer1		= fragment
1389 									? "layout(location = 0) out uint result;\n"
1390 									: "layout(set = 0, binding = " + de::toString(binding0) + ", std430) buffer Buffer1\n"
1391 									  "{\n"
1392 									  "  uint result[];\n"
1393 									  "};\n";
1394 		//todo boza I suppose it can be "layout(set = 0, binding = " + de::toString(binding1) + ", std430) readonly buffer Buffer2\n"
1395 		const string	buffer2		= "layout(set = 0, binding = " + de::toString(binding1) + ", std430)" + (stageCount == 1 ? "" : " readonly") + " buffer Buffer" + (fragment ? "1" : "2") + "\n"
1396 									  "{\n"
1397 									  "  " + formatName + " data[];\n"
1398 									  "};\n";
1399 
1400 		return buffer1 + buffer2;
1401 	}
1402 	else
1403 	{
1404 		return declarations[stage];
1405 	}
1406 }
1407 
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations,const bool avoidHelperInvocations,const std::string & tempRes)1408 void vkt::subgroups::initStdPrograms (vk::SourceCollections&			programCollection,
1409 									  const vk::ShaderBuildOptions&		buildOptions,
1410 									  vk::VkShaderStageFlags			shaderStage,
1411 									  vk::VkFormat						format,
1412 									  bool								gsPointSize,
1413 									  const std::string&				extHeader,
1414 									  const std::string&				testSrc,
1415 									  const std::string&				helperStr,
1416 									  const std::vector<std::string>&	declarations,
1417 									  const bool						avoidHelperInvocations,
1418 									  const std::string&				tempRes)
1419 {
1420 	const std::string	formatName	= subgroups::getFormatNameForGLSL(format);
1421 
1422 	if (isAllComputeStages(shaderStage))
1423 	{
1424 		std::ostringstream	src;
1425 
1426 		src << "#version 450\n"
1427 			<< extHeader
1428 			<< "layout (local_size_x_id = 0, local_size_y_id = 1, "
1429 			"local_size_z_id = 2) in;\n"
1430 			<< getBufferDeclarations(shaderStage, formatName, declarations, 0)
1431 			<< "\n"
1432 			<< helperStr
1433 			<< "void main (void)\n"
1434 			<< "{\n"
1435 			<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1436 			<< "  highp uint offset = globalSize.x * ((globalSize.y * "
1437 			"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1438 			"gl_GlobalInvocationID.x;\n"
1439 			<< tempRes
1440 			<< testSrc
1441 			<< "  result[offset] = tempRes;\n"
1442 			<< "}\n";
1443 
1444 		programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1445 	}
1446 #ifndef CTS_USES_VULKANSC
1447 	else if (isAllMeshShadingStages(shaderStage))
1448 	{
1449 		const bool testMesh = ((shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
1450 		const bool testTask = ((shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
1451 
1452 		if (testMesh)
1453 		{
1454 			std::ostringstream mesh;
1455 
1456 			mesh
1457 				<< "#version 450\n"
1458 				<< "#extension GL_EXT_mesh_shader : enable\n"
1459 				//<< "#extension GL_NV_mesh_shader : enable\n"
1460 				<< extHeader
1461 				<< "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1462 				<< "layout (points) out;\n"
1463 				<< "layout (max_vertices = 1, max_primitives = 1) out;\n"
1464 				<< getBufferDeclarations(shaderStage, formatName, declarations, 0)
1465 				<< "\n"
1466 				<< helperStr
1467 				<< "void main (void)\n"
1468 				<< "{\n"
1469 				<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1470 				//<< "  uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1471 				<< "  highp uint offset = globalSize.x * ((globalSize.y * "
1472 				"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1473 				"gl_GlobalInvocationID.x;\n"
1474 				<< tempRes
1475 				<< testSrc
1476 				<< "  result[offset] = tempRes;\n"
1477 				<< "  SetMeshOutputsEXT(0u, 0u);\n"
1478 				//<< "  gl_PrimitiveCountNV = 0;\n"
1479 				<< "}\n";
1480 
1481 			programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1482 		}
1483 		else
1484 		{
1485 			const std::string meshShaderNoSubgroups =
1486 				"#version 450\n"
1487 				"#extension GL_EXT_mesh_shader : enable\n"
1488 				"\n"
1489 				"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
1490 				"layout (points) out;\n"
1491 				"layout (max_vertices = 1, max_primitives = 1) out;\n"
1492 				"\n"
1493 				"void main (void)\n"
1494 				"{\n"
1495 				"  SetMeshOutputsEXT(0u, 0u);\n"
1496 				"}\n"
1497 				;
1498 			programCollection.glslSources.add("mesh") << glu::MeshSource(meshShaderNoSubgroups) << buildOptions;
1499 		}
1500 
1501 		if (testTask)
1502 		{
1503 			const tcu::UVec3	emitSize = (testMesh ? tcu::UVec3(1u, 1u, 1u) : tcu::UVec3(0u, 0u, 0u));
1504 			std::ostringstream	task;
1505 
1506 			task
1507 				<< "#version 450\n"
1508 				<< "#extension GL_EXT_mesh_shader : enable\n"
1509 				//<< "#extension GL_NV_mesh_shader : enable\n"
1510 				<< extHeader
1511 				<< "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1512 				<< getBufferDeclarations(shaderStage, formatName, declarations, 0)
1513 				<< "\n"
1514 				<< helperStr
1515 				<< "void main (void)\n"
1516 				<< "{\n"
1517 				<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1518 				//<< "  uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1519 				<< "  highp uint offset = globalSize.x * ((globalSize.y * "
1520 				"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1521 				"gl_GlobalInvocationID.x;\n"
1522 				<< tempRes
1523 				<< testSrc
1524 				<< "  result[offset] = tempRes;\n"
1525 				<< "  EmitMeshTasksEXT(" << emitSize.x() << ", " << emitSize.y() << ", " << emitSize.z() << ");\n"
1526 				//<< "  gl_TaskCountNV = " << emitSize.x() << ";\n"
1527 				<< "}\n";
1528 
1529 			programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1530 		}
1531 	}
1532 #endif // CTS_USES_VULKANSC
1533 	else if (isAllGraphicsStages(shaderStage))
1534 	{
1535 		const string vertex =
1536 			"#version 450\n"
1537 			+ extHeader
1538 			+ getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1539 			"\n"
1540 			+ helperStr +
1541 			"void main (void)\n"
1542 			"{\n"
1543 			"  uint tempRes;\n"
1544 			+ testSrc +
1545 			"  result[gl_VertexIndex] = tempRes;\n"
1546 			"  float pixelSize = 2.0f/1024.0f;\n"
1547 			"  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1548 			"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1549 			"  gl_PointSize = 1.0f;\n"
1550 			"}\n";
1551 
1552 		const string tesc =
1553 			"#version 450\n"
1554 			+ extHeader +
1555 			"layout(vertices=1) out;\n"
1556 			+ getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1557 			"\n"
1558 			+ helperStr +
1559 			"void main (void)\n"
1560 			"{\n"
1561 			+ tempRes
1562 			+ testSrc +
1563 			"  result[gl_PrimitiveID] = tempRes;\n"
1564 			"  if (gl_InvocationID == 0)\n"
1565 			"  {\n"
1566 			"    gl_TessLevelOuter[0] = 1.0f;\n"
1567 			"    gl_TessLevelOuter[1] = 1.0f;\n"
1568 			"  }\n"
1569 			"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1570 			+ (gsPointSize ? "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1571 			"}\n";
1572 
1573 		const string tese =
1574 			"#version 450\n"
1575 			+ extHeader +
1576 			"layout(isolines) in;\n"
1577 			+ getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1578 			"\n"
1579 			+ helperStr +
1580 			"void main (void)\n"
1581 			"{\n"
1582 			+ tempRes
1583 			+ testSrc +
1584 			"  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1585 			"  float pixelSize = 2.0f/1024.0f;\n"
1586 			"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1587 			+ (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1588 			"}\n";
1589 
1590 		const string geometry =
1591 			"#version 450\n"
1592 			+ extHeader +
1593 			"layout(${TOPOLOGY}) in;\n"
1594 			"layout(points, max_vertices = 1) out;\n"
1595 			+ getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1596 			"\n"
1597 			+ helperStr +
1598 			"void main (void)\n"
1599 			"{\n"
1600 			+ tempRes
1601 			+ testSrc +
1602 			"  result[gl_PrimitiveIDIn] = tempRes;\n"
1603 			"  gl_Position = gl_in[0].gl_Position;\n"
1604 			+ (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1605 			"  EmitVertex();\n"
1606 			"  EndPrimitive();\n"
1607 			"}\n";
1608 
1609 		const string fragment =
1610 			"#version 450\n"
1611 			+ extHeader
1612 			+ getBufferDeclarations(shaderStage, formatName, declarations, 4)
1613 			+ helperStr +
1614 			"void main (void)\n"
1615 			"{\n"
1616 			+ (avoidHelperInvocations ? "  if (gl_HelperInvocation) return;\n" : "")
1617 			+ tempRes
1618 			+ testSrc +
1619 			"  result = tempRes;\n"
1620 			"}\n";
1621 
1622 		subgroups::addNoSubgroupShader(programCollection);
1623 
1624 		programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1625 		programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1626 		programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1627 		subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1628 		programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1629 	}
1630 #ifndef CTS_USES_VULKANSC
1631 	else if (isAllRayTracingStages(shaderStage))
1632 	{
1633 		const std::string	rgenShader	=
1634 			"#version 460 core\n"
1635 			"#extension GL_EXT_ray_tracing: require\n"
1636 			+ extHeader +
1637 			"layout(location = 0) rayPayloadEXT uvec4 payload;\n"
1638 			"layout(location = 0) callableDataEXT uvec4 callData;"
1639 			"layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
1640 			+ getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1641 			"\n"
1642 			+ helperStr +
1643 			"void main()\n"
1644 			"{\n"
1645 			+ tempRes
1646 			+ testSrc +
1647 			"  uint  rayFlags   = 0;\n"
1648 			"  uint  cullMask   = 0xFF;\n"
1649 			"  float tmin       = 0.0;\n"
1650 			"  float tmax       = 9.0;\n"
1651 			"  vec3  origin     = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
1652 			"  vec3  directHit  = vec3(0.0, 0.0, -1.0);\n"
1653 			"  vec3  directMiss = vec3(0.0, 0.0, +1.0);\n"
1654 			"\n"
1655 			"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
1656 			"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
1657 			"  executeCallableEXT(0, 0);"
1658 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1659 			"}\n";
1660 		const std::string	ahitShader	=
1661 			"#version 460 core\n"
1662 			"#extension GL_EXT_ray_tracing: require\n"
1663 			+ extHeader +
1664 			"hitAttributeEXT vec3 attribs;\n"
1665 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1666 			+ getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1667 			"\n"
1668 			+ helperStr +
1669 			"void main()\n"
1670 			"{\n"
1671 			+ tempRes
1672 			+ testSrc +
1673 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1674 			"}\n";
1675 		const std::string	chitShader	=
1676 			"#version 460 core\n"
1677 			"#extension GL_EXT_ray_tracing: require\n"
1678 			+ extHeader +
1679 			"hitAttributeEXT vec3 attribs;\n"
1680 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1681 			+ getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1682 			"\n"
1683 			+ helperStr +
1684 			"void main()\n"
1685 			"{\n"
1686 			+ tempRes
1687 			+ testSrc +
1688 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1689 			"}\n";
1690 		const std::string	missShader	=
1691 			"#version 460 core\n"
1692 			"#extension GL_EXT_ray_tracing: require\n"
1693 			+ extHeader +
1694 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1695 			+ getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1696 			"\n"
1697 			+ helperStr +
1698 			"void main()\n"
1699 			"{\n"
1700 			+ tempRes
1701 			+ testSrc +
1702 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1703 			"}\n";
1704 		const std::string	sectShader	=
1705 			"#version 460 core\n"
1706 			"#extension GL_EXT_ray_tracing: require\n"
1707 			+ extHeader +
1708 			"hitAttributeEXT vec3 hitAttribute;\n"
1709 			+ getBufferDeclarations(shaderStage, formatName, declarations, 4) +
1710 			"\n"
1711 			+ helperStr +
1712 			"void main()\n"
1713 			"{\n"
1714 			+ tempRes
1715 			+ testSrc +
1716 			"  reportIntersectionEXT(0.75f, 0x7Eu);\n"
1717 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1718 			"}\n";
1719 		const std::string	callShader	=
1720 			"#version 460 core\n"
1721 			"#extension GL_EXT_ray_tracing: require\n"
1722 			+ extHeader +
1723 			"layout(location = 0) callableDataInEXT float callData;\n"
1724 			+ getBufferDeclarations(shaderStage, formatName, declarations, 5) +
1725 			"\n"
1726 			+ helperStr +
1727 			"void main()\n"
1728 			"{\n"
1729 			+ tempRes
1730 			+ testSrc +
1731 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1732 			"}\n";
1733 
1734 		programCollection.glslSources.add("rgen") << glu::RaygenSource		(rgenShader) << buildOptions;
1735 		programCollection.glslSources.add("ahit") << glu::AnyHitSource		(ahitShader) << buildOptions;
1736 		programCollection.glslSources.add("chit") << glu::ClosestHitSource	(chitShader) << buildOptions;
1737 		programCollection.glslSources.add("miss") << glu::MissSource		(missShader) << buildOptions;
1738 		programCollection.glslSources.add("sect") << glu::IntersectionSource(sectShader) << buildOptions;
1739 		programCollection.glslSources.add("call") << glu::CallableSource	(callShader) << buildOptions;
1740 
1741 		subgroups::addRayTracingNoSubgroupShader(programCollection);
1742 	}
1743 #endif // CTS_USES_VULKANSC
1744 	else
1745 		TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1746 
1747 }
1748 
isSubgroupSupported(Context & context)1749 bool vkt::subgroups::isSubgroupSupported (Context& context)
1750 {
1751 	return context.contextSupports(vk::ApiVersion(0, 1, 1, 0));
1752 }
1753 
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1754 bool vkt::subgroups::areSubgroupOperationsSupportedForStage (Context& context, const VkShaderStageFlags stage)
1755 {
1756 	return (stage & (context.getSubgroupProperties().supportedStages)) ? true : false;
1757 }
1758 
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1759 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice (Context& context, VkSubgroupFeatureFlagBits bit)
1760 {
1761 	return (bit & (context.getSubgroupProperties().supportedOperations)) ? true : false;
1762 }
1763 
areQuadOperationsSupportedForStages(Context & context,const VkShaderStageFlags stages)1764 bool vkt::subgroups::areQuadOperationsSupportedForStages (Context& context, const VkShaderStageFlags stages)
1765 {
1766 	// Check general quad feature support first.
1767 	if (!isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_QUAD_BIT))
1768 		return false;
1769 
1770 	if (context.getSubgroupProperties().quadOperationsInAllStages == VK_TRUE)
1771 		return true; // No problem, any stage works.
1772 
1773 	// Only frag and compute are supported.
1774 	const VkShaderStageFlags fragCompute = (VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
1775 	const VkShaderStageFlags otherStages = ~fragCompute;
1776 	return ((stages & otherStages) == 0u);
1777 }
1778 
isFragmentSSBOSupportedForDevice(Context & context)1779 bool vkt::subgroups::isFragmentSSBOSupportedForDevice (Context& context)
1780 {
1781 	return context.getDeviceFeatures().fragmentStoresAndAtomics ? true : false;
1782 }
1783 
isVertexSSBOSupportedForDevice(Context & context)1784 bool vkt::subgroups::isVertexSSBOSupportedForDevice (Context& context)
1785 {
1786 	return context.getDeviceFeatures().vertexPipelineStoresAndAtomics ? true : false;
1787 }
1788 
isInt64SupportedForDevice(Context & context)1789 bool vkt::subgroups::isInt64SupportedForDevice (Context& context)
1790 {
1791 	return context.getDeviceFeatures().shaderInt64 ? true : false;
1792 }
1793 
isTessellationAndGeometryPointSizeSupported(Context & context)1794 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1795 {
1796 	return context.getDeviceFeatures().shaderTessellationAndGeometryPointSize ? true : false;
1797 }
1798 
is16BitUBOStorageSupported(Context & context)1799 bool vkt::subgroups::is16BitUBOStorageSupported (Context& context)
1800 {
1801 	return context.get16BitStorageFeatures().uniformAndStorageBuffer16BitAccess ? true : false;
1802 }
1803 
is8BitUBOStorageSupported(Context & context)1804 bool vkt::subgroups::is8BitUBOStorageSupported (Context& context)
1805 {
1806 	return context.get8BitStorageFeatures().uniformAndStorageBuffer8BitAccess ? true : false;
1807 }
1808 
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1809 bool vkt::subgroups::isFormatSupportedForDevice (Context& context, vk::VkFormat format)
1810 {
1811 	const VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures&	subgroupExtendedTypesFeatures	= context.getShaderSubgroupExtendedTypesFeatures();
1812 	const VkPhysicalDeviceShaderFloat16Int8Features&			float16Int8Features				= context.getShaderFloat16Int8Features();
1813 	const VkPhysicalDevice16BitStorageFeatures&					storage16bit					= context.get16BitStorageFeatures();
1814 	const VkPhysicalDevice8BitStorageFeatures&					storage8bit						= context.get8BitStorageFeatures();
1815 	const VkPhysicalDeviceFeatures&								features						= context.getDeviceFeatures();
1816 	bool														shaderFloat64					= features.shaderFloat64 ? true : false;
1817 	bool														shaderInt16						= features.shaderInt16 ? true : false;
1818 	bool														shaderInt64						= features.shaderInt64 ? true : false;
1819 	bool														shaderSubgroupExtendedTypes		= false;
1820 	bool														shaderFloat16					= false;
1821 	bool														shaderInt8						= false;
1822 	bool														storageBuffer16BitAccess		= false;
1823 	bool														storageBuffer8BitAccess			= false;
1824 
1825 	if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1826 		context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1827 	{
1828 		shaderSubgroupExtendedTypes	= subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes ? true : false;
1829 		shaderFloat16				= float16Int8Features.shaderFloat16 ? true : false;
1830 		shaderInt8					= float16Int8Features.shaderInt8 ? true : false;
1831 
1832 		if ( context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage") )
1833 			storageBuffer16BitAccess = storage16bit.storageBuffer16BitAccess ? true : false;
1834 
1835 		if (context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage"))
1836 			storageBuffer8BitAccess = storage8bit.storageBuffer8BitAccess ? true : false;
1837 	}
1838 
1839 	switch (format)
1840 	{
1841 		default:
1842 			return true;
1843 		case VK_FORMAT_R16_SFLOAT:
1844 		case VK_FORMAT_R16G16_SFLOAT:
1845 		case VK_FORMAT_R16G16B16_SFLOAT:
1846 		case VK_FORMAT_R16G16B16A16_SFLOAT:
1847 			return shaderSubgroupExtendedTypes && shaderFloat16 && storageBuffer16BitAccess;
1848 		case VK_FORMAT_R64_SFLOAT:
1849 		case VK_FORMAT_R64G64_SFLOAT:
1850 		case VK_FORMAT_R64G64B64_SFLOAT:
1851 		case VK_FORMAT_R64G64B64A64_SFLOAT:
1852 			return shaderFloat64;
1853 		case VK_FORMAT_R8_SINT:
1854 		case VK_FORMAT_R8G8_SINT:
1855 		case VK_FORMAT_R8G8B8_SINT:
1856 		case VK_FORMAT_R8G8B8A8_SINT:
1857 		case VK_FORMAT_R8_UINT:
1858 		case VK_FORMAT_R8G8_UINT:
1859 		case VK_FORMAT_R8G8B8_UINT:
1860 		case VK_FORMAT_R8G8B8A8_UINT:
1861 			return shaderSubgroupExtendedTypes && shaderInt8 && storageBuffer8BitAccess;
1862 		case VK_FORMAT_R16_SINT:
1863 		case VK_FORMAT_R16G16_SINT:
1864 		case VK_FORMAT_R16G16B16_SINT:
1865 		case VK_FORMAT_R16G16B16A16_SINT:
1866 		case VK_FORMAT_R16_UINT:
1867 		case VK_FORMAT_R16G16_UINT:
1868 		case VK_FORMAT_R16G16B16_UINT:
1869 		case VK_FORMAT_R16G16B16A16_UINT:
1870 			return shaderSubgroupExtendedTypes && shaderInt16 && storageBuffer16BitAccess;
1871 		case VK_FORMAT_R64_SINT:
1872 		case VK_FORMAT_R64G64_SINT:
1873 		case VK_FORMAT_R64G64B64_SINT:
1874 		case VK_FORMAT_R64G64B64A64_SINT:
1875 		case VK_FORMAT_R64_UINT:
1876 		case VK_FORMAT_R64G64_UINT:
1877 		case VK_FORMAT_R64G64B64_UINT:
1878 		case VK_FORMAT_R64G64B64A64_UINT:
1879 			return shaderSubgroupExtendedTypes && shaderInt64;
1880 	}
1881 }
1882 
isSubgroupBroadcastDynamicIdSupported(Context & context)1883 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported (Context& context)
1884 {
1885 	return context.contextSupports(vk::ApiVersion(0, 1, 2, 0)) &&
1886 		vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice()).subgroupBroadcastDynamicId;
1887 }
1888 
getFormatNameForGLSL(VkFormat format)1889 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1890 {
1891 	switch (format)
1892 	{
1893 		case VK_FORMAT_R8_SINT:				return "int8_t";
1894 		case VK_FORMAT_R8G8_SINT:			return "i8vec2";
1895 		case VK_FORMAT_R8G8B8_SINT:			return "i8vec3";
1896 		case VK_FORMAT_R8G8B8A8_SINT:		return "i8vec4";
1897 		case VK_FORMAT_R8_UINT:				return "uint8_t";
1898 		case VK_FORMAT_R8G8_UINT:			return "u8vec2";
1899 		case VK_FORMAT_R8G8B8_UINT:			return "u8vec3";
1900 		case VK_FORMAT_R8G8B8A8_UINT:		return "u8vec4";
1901 		case VK_FORMAT_R16_SINT:			return "int16_t";
1902 		case VK_FORMAT_R16G16_SINT:			return "i16vec2";
1903 		case VK_FORMAT_R16G16B16_SINT:		return "i16vec3";
1904 		case VK_FORMAT_R16G16B16A16_SINT:	return "i16vec4";
1905 		case VK_FORMAT_R16_UINT:			return "uint16_t";
1906 		case VK_FORMAT_R16G16_UINT:			return "u16vec2";
1907 		case VK_FORMAT_R16G16B16_UINT:		return "u16vec3";
1908 		case VK_FORMAT_R16G16B16A16_UINT:	return "u16vec4";
1909 		case VK_FORMAT_R32_SINT:			return "int";
1910 		case VK_FORMAT_R32G32_SINT:			return "ivec2";
1911 		case VK_FORMAT_R32G32B32_SINT:		return "ivec3";
1912 		case VK_FORMAT_R32G32B32A32_SINT:	return "ivec4";
1913 		case VK_FORMAT_R32_UINT:			return "uint";
1914 		case VK_FORMAT_R32G32_UINT:			return "uvec2";
1915 		case VK_FORMAT_R32G32B32_UINT:		return "uvec3";
1916 		case VK_FORMAT_R32G32B32A32_UINT:	return "uvec4";
1917 		case VK_FORMAT_R64_SINT:			return "int64_t";
1918 		case VK_FORMAT_R64G64_SINT:			return "i64vec2";
1919 		case VK_FORMAT_R64G64B64_SINT:		return "i64vec3";
1920 		case VK_FORMAT_R64G64B64A64_SINT:	return "i64vec4";
1921 		case VK_FORMAT_R64_UINT:			return "uint64_t";
1922 		case VK_FORMAT_R64G64_UINT:			return "u64vec2";
1923 		case VK_FORMAT_R64G64B64_UINT:		return "u64vec3";
1924 		case VK_FORMAT_R64G64B64A64_UINT:	return "u64vec4";
1925 		case VK_FORMAT_R16_SFLOAT:			return "float16_t";
1926 		case VK_FORMAT_R16G16_SFLOAT:		return "f16vec2";
1927 		case VK_FORMAT_R16G16B16_SFLOAT:	return "f16vec3";
1928 		case VK_FORMAT_R16G16B16A16_SFLOAT:	return "f16vec4";
1929 		case VK_FORMAT_R32_SFLOAT:			return "float";
1930 		case VK_FORMAT_R32G32_SFLOAT:		return "vec2";
1931 		case VK_FORMAT_R32G32B32_SFLOAT:	return "vec3";
1932 		case VK_FORMAT_R32G32B32A32_SFLOAT:	return "vec4";
1933 		case VK_FORMAT_R64_SFLOAT:			return "double";
1934 		case VK_FORMAT_R64G64_SFLOAT:		return "dvec2";
1935 		case VK_FORMAT_R64G64B64_SFLOAT:	return "dvec3";
1936 		case VK_FORMAT_R64G64B64A64_SFLOAT:	return "dvec4";
1937 		case VK_FORMAT_R8_USCALED:			return "bool";
1938 		case VK_FORMAT_R8G8_USCALED:		return "bvec2";
1939 		case VK_FORMAT_R8G8B8_USCALED:		return "bvec3";
1940 		case VK_FORMAT_R8G8B8A8_USCALED:	return "bvec4";
1941 		default:							TCU_THROW(InternalError, "Unhandled format");
1942 	}
1943 }
1944 
getAdditionalExtensionForFormat(vk::VkFormat format)1945 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1946 {
1947 	switch (format)
1948 	{
1949 		default:
1950 			return "";
1951 		case VK_FORMAT_R8_SINT:
1952 		case VK_FORMAT_R8G8_SINT:
1953 		case VK_FORMAT_R8G8B8_SINT:
1954 		case VK_FORMAT_R8G8B8A8_SINT:
1955 		case VK_FORMAT_R8_UINT:
1956 		case VK_FORMAT_R8G8_UINT:
1957 		case VK_FORMAT_R8G8B8_UINT:
1958 		case VK_FORMAT_R8G8B8A8_UINT:
1959 			return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1960 		case VK_FORMAT_R16_SINT:
1961 		case VK_FORMAT_R16G16_SINT:
1962 		case VK_FORMAT_R16G16B16_SINT:
1963 		case VK_FORMAT_R16G16B16A16_SINT:
1964 		case VK_FORMAT_R16_UINT:
1965 		case VK_FORMAT_R16G16_UINT:
1966 		case VK_FORMAT_R16G16B16_UINT:
1967 		case VK_FORMAT_R16G16B16A16_UINT:
1968 			return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1969 		case VK_FORMAT_R64_SINT:
1970 		case VK_FORMAT_R64G64_SINT:
1971 		case VK_FORMAT_R64G64B64_SINT:
1972 		case VK_FORMAT_R64G64B64A64_SINT:
1973 		case VK_FORMAT_R64_UINT:
1974 		case VK_FORMAT_R64G64_UINT:
1975 		case VK_FORMAT_R64G64B64_UINT:
1976 		case VK_FORMAT_R64G64B64A64_UINT:
1977 			return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1978 		case VK_FORMAT_R16_SFLOAT:
1979 		case VK_FORMAT_R16G16_SFLOAT:
1980 		case VK_FORMAT_R16G16B16_SFLOAT:
1981 		case VK_FORMAT_R16G16B16A16_SFLOAT:
1982 			return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1983 	}
1984 }
1985 
getAllFormats()1986 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats ()
1987 {
1988 	std::vector<VkFormat> formats;
1989 
1990 	formats.push_back(VK_FORMAT_R8_SINT);
1991 	formats.push_back(VK_FORMAT_R8G8_SINT);
1992 	formats.push_back(VK_FORMAT_R8G8B8_SINT);
1993 	formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1994 	formats.push_back(VK_FORMAT_R8_UINT);
1995 	formats.push_back(VK_FORMAT_R8G8_UINT);
1996 	formats.push_back(VK_FORMAT_R8G8B8_UINT);
1997 	formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1998 	formats.push_back(VK_FORMAT_R16_SINT);
1999 	formats.push_back(VK_FORMAT_R16G16_SINT);
2000 	formats.push_back(VK_FORMAT_R16G16B16_SINT);
2001 	formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
2002 	formats.push_back(VK_FORMAT_R16_UINT);
2003 	formats.push_back(VK_FORMAT_R16G16_UINT);
2004 	formats.push_back(VK_FORMAT_R16G16B16_UINT);
2005 	formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
2006 	formats.push_back(VK_FORMAT_R32_SINT);
2007 	formats.push_back(VK_FORMAT_R32G32_SINT);
2008 	formats.push_back(VK_FORMAT_R32G32B32_SINT);
2009 	formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
2010 	formats.push_back(VK_FORMAT_R32_UINT);
2011 	formats.push_back(VK_FORMAT_R32G32_UINT);
2012 	formats.push_back(VK_FORMAT_R32G32B32_UINT);
2013 	formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
2014 	formats.push_back(VK_FORMAT_R64_SINT);
2015 	formats.push_back(VK_FORMAT_R64G64_SINT);
2016 	formats.push_back(VK_FORMAT_R64G64B64_SINT);
2017 	formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
2018 	formats.push_back(VK_FORMAT_R64_UINT);
2019 	formats.push_back(VK_FORMAT_R64G64_UINT);
2020 	formats.push_back(VK_FORMAT_R64G64B64_UINT);
2021 	formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
2022 	formats.push_back(VK_FORMAT_R16_SFLOAT);
2023 	formats.push_back(VK_FORMAT_R16G16_SFLOAT);
2024 	formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
2025 	formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
2026 	formats.push_back(VK_FORMAT_R32_SFLOAT);
2027 	formats.push_back(VK_FORMAT_R32G32_SFLOAT);
2028 	formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
2029 	formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
2030 	formats.push_back(VK_FORMAT_R64_SFLOAT);
2031 	formats.push_back(VK_FORMAT_R64G64_SFLOAT);
2032 	formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
2033 	formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
2034 	formats.push_back(VK_FORMAT_R8_USCALED);
2035 	formats.push_back(VK_FORMAT_R8G8_USCALED);
2036 	formats.push_back(VK_FORMAT_R8G8B8_USCALED);
2037 	formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
2038 
2039 	return formats;
2040 }
2041 
isFormatSigned(VkFormat format)2042 bool vkt::subgroups::isFormatSigned (VkFormat format)
2043 {
2044 	switch (format)
2045 	{
2046 		default:
2047 			return false;
2048 		case VK_FORMAT_R8_SINT:
2049 		case VK_FORMAT_R8G8_SINT:
2050 		case VK_FORMAT_R8G8B8_SINT:
2051 		case VK_FORMAT_R8G8B8A8_SINT:
2052 		case VK_FORMAT_R16_SINT:
2053 		case VK_FORMAT_R16G16_SINT:
2054 		case VK_FORMAT_R16G16B16_SINT:
2055 		case VK_FORMAT_R16G16B16A16_SINT:
2056 		case VK_FORMAT_R32_SINT:
2057 		case VK_FORMAT_R32G32_SINT:
2058 		case VK_FORMAT_R32G32B32_SINT:
2059 		case VK_FORMAT_R32G32B32A32_SINT:
2060 		case VK_FORMAT_R64_SINT:
2061 		case VK_FORMAT_R64G64_SINT:
2062 		case VK_FORMAT_R64G64B64_SINT:
2063 		case VK_FORMAT_R64G64B64A64_SINT:
2064 			return true;
2065 	}
2066 }
2067 
isFormatUnsigned(VkFormat format)2068 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
2069 {
2070 	switch (format)
2071 	{
2072 		default:
2073 			return false;
2074 		case VK_FORMAT_R8_UINT:
2075 		case VK_FORMAT_R8G8_UINT:
2076 		case VK_FORMAT_R8G8B8_UINT:
2077 		case VK_FORMAT_R8G8B8A8_UINT:
2078 		case VK_FORMAT_R16_UINT:
2079 		case VK_FORMAT_R16G16_UINT:
2080 		case VK_FORMAT_R16G16B16_UINT:
2081 		case VK_FORMAT_R16G16B16A16_UINT:
2082 		case VK_FORMAT_R32_UINT:
2083 		case VK_FORMAT_R32G32_UINT:
2084 		case VK_FORMAT_R32G32B32_UINT:
2085 		case VK_FORMAT_R32G32B32A32_UINT:
2086 		case VK_FORMAT_R64_UINT:
2087 		case VK_FORMAT_R64G64_UINT:
2088 		case VK_FORMAT_R64G64B64_UINT:
2089 		case VK_FORMAT_R64G64B64A64_UINT:
2090 			return true;
2091 	}
2092 }
2093 
isFormatFloat(VkFormat format)2094 bool vkt::subgroups::isFormatFloat (VkFormat format)
2095 {
2096 	switch (format)
2097 	{
2098 		default:
2099 			return false;
2100 		case VK_FORMAT_R16_SFLOAT:
2101 		case VK_FORMAT_R16G16_SFLOAT:
2102 		case VK_FORMAT_R16G16B16_SFLOAT:
2103 		case VK_FORMAT_R16G16B16A16_SFLOAT:
2104 		case VK_FORMAT_R32_SFLOAT:
2105 		case VK_FORMAT_R32G32_SFLOAT:
2106 		case VK_FORMAT_R32G32B32_SFLOAT:
2107 		case VK_FORMAT_R32G32B32A32_SFLOAT:
2108 		case VK_FORMAT_R64_SFLOAT:
2109 		case VK_FORMAT_R64G64_SFLOAT:
2110 		case VK_FORMAT_R64G64B64_SFLOAT:
2111 		case VK_FORMAT_R64G64B64A64_SFLOAT:
2112 			return true;
2113 	}
2114 }
2115 
isFormatBool(VkFormat format)2116 bool vkt::subgroups::isFormatBool (VkFormat format)
2117 {
2118 	switch (format)
2119 	{
2120 		default:
2121 			return false;
2122 		case VK_FORMAT_R8_USCALED:
2123 		case VK_FORMAT_R8G8_USCALED:
2124 		case VK_FORMAT_R8G8B8_USCALED:
2125 		case VK_FORMAT_R8G8B8A8_USCALED:
2126 			return true;
2127 	}
2128 }
2129 
isFormat8bitTy(VkFormat format)2130 bool vkt::subgroups::isFormat8bitTy (VkFormat format)
2131 {
2132 	switch (format)
2133 	{
2134 	default:
2135 		return false;
2136 	case VK_FORMAT_R8_SINT:
2137 	case VK_FORMAT_R8G8_SINT:
2138 	case VK_FORMAT_R8G8B8_SINT:
2139 	case VK_FORMAT_R8G8B8A8_SINT:
2140 	case VK_FORMAT_R8_UINT:
2141 	case VK_FORMAT_R8G8_UINT:
2142 	case VK_FORMAT_R8G8B8_UINT:
2143 	case VK_FORMAT_R8G8B8A8_UINT:
2144 		return true;
2145 	}
2146 }
2147 
isFormat16BitTy(VkFormat format)2148 bool vkt::subgroups::isFormat16BitTy (VkFormat format)
2149 {
2150 	switch (format)
2151 	{
2152 	default:
2153 		return false;
2154 	case VK_FORMAT_R16_SFLOAT:
2155 	case VK_FORMAT_R16G16_SFLOAT:
2156 	case VK_FORMAT_R16G16B16_SFLOAT:
2157 	case VK_FORMAT_R16G16B16A16_SFLOAT:
2158 	case VK_FORMAT_R16_SINT:
2159 	case VK_FORMAT_R16G16_SINT:
2160 	case VK_FORMAT_R16G16B16_SINT:
2161 	case VK_FORMAT_R16G16B16A16_SINT:
2162 	case VK_FORMAT_R16_UINT:
2163 	case VK_FORMAT_R16G16_UINT:
2164 	case VK_FORMAT_R16G16B16_UINT:
2165 	case VK_FORMAT_R16G16B16A16_UINT:
2166 		return true;
2167 	}
2168 }
2169 
setVertexShaderFrameBuffer(SourceCollections & programCollection)2170 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
2171 {
2172 	/*
2173 		"layout(location = 0) in highp vec4 in_position;\n"
2174 		"void main (void)\n"
2175 		"{\n"
2176 		"  gl_Position = in_position;\n"
2177 		"  gl_PointSize = 1.0f;\n"
2178 		"}\n";
2179 	*/
2180 	programCollection.spirvAsmSources.add("vert") <<
2181 		"; SPIR-V\n"
2182 		"; Version: 1.3\n"
2183 		"; Generator: Khronos Glslang Reference Front End; 7\n"
2184 		"; Bound: 25\n"
2185 		"; Schema: 0\n"
2186 		"OpCapability Shader\n"
2187 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2188 		"OpMemoryModel Logical GLSL450\n"
2189 		"OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2190 		"OpMemberDecorate %11 0 BuiltIn Position\n"
2191 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
2192 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2193 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2194 		"OpDecorate %11 Block\n"
2195 		"OpDecorate %17 Location 0\n"
2196 		"%2 = OpTypeVoid\n"
2197 		"%3 = OpTypeFunction %2\n"
2198 		"%6 = OpTypeFloat 32\n"
2199 		"%7 = OpTypeVector %6 4\n"
2200 		"%8 = OpTypeInt 32 0\n"
2201 		"%9 = OpConstant %8 1\n"
2202 		"%10 = OpTypeArray %6 %9\n"
2203 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
2204 		"%12 = OpTypePointer Output %11\n"
2205 		"%13 = OpVariable %12 Output\n"
2206 		"%14 = OpTypeInt 32 1\n"
2207 		"%15 = OpConstant %14 0\n"
2208 		"%16 = OpTypePointer Input %7\n"
2209 		"%17 = OpVariable %16 Input\n"
2210 		"%19 = OpTypePointer Output %7\n"
2211 		"%21 = OpConstant %14 1\n"
2212 		"%22 = OpConstant %6 1\n"
2213 		"%23 = OpTypePointer Output %6\n"
2214 		"%4 = OpFunction %2 None %3\n"
2215 		"%5 = OpLabel\n"
2216 		"%18 = OpLoad %7 %17\n"
2217 		"%20 = OpAccessChain %19 %13 %15\n"
2218 		"OpStore %20 %18\n"
2219 		"%24 = OpAccessChain %23 %13 %21\n"
2220 		"OpStore %24 %22\n"
2221 		"OpReturn\n"
2222 		"OpFunctionEnd\n";
2223 }
2224 
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2225 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
2226 {
2227 	/*
2228 		"layout(location = 0) in float in_color;\n"
2229 		"layout(location = 0) out uint out_color;\n"
2230 		"void main()\n"
2231 		{\n"
2232 		"	out_color = uint(in_color);\n"
2233 		"}\n";
2234 	*/
2235 	programCollection.spirvAsmSources.add("fragment") <<
2236 		"; SPIR-V\n"
2237 		"; Version: 1.3\n"
2238 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2239 		"; Bound: 14\n"
2240 		"; Schema: 0\n"
2241 		"OpCapability Shader\n"
2242 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2243 		"OpMemoryModel Logical GLSL450\n"
2244 		"OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2245 		"OpExecutionMode %4 OriginUpperLeft\n"
2246 		"OpDecorate %8 Location 0\n"
2247 		"OpDecorate %11 Location 0\n"
2248 		"%2 = OpTypeVoid\n"
2249 		"%3 = OpTypeFunction %2\n"
2250 		"%6 = OpTypeInt 32 0\n"
2251 		"%7 = OpTypePointer Output %6\n"
2252 		"%8 = OpVariable %7 Output\n"
2253 		"%9 = OpTypeFloat 32\n"
2254 		"%10 = OpTypePointer Input %9\n"
2255 		"%11 = OpVariable %10 Input\n"
2256 		"%4 = OpFunction %2 None %3\n"
2257 		"%5 = OpLabel\n"
2258 		"%12 = OpLoad %9 %11\n"
2259 		"%13 = OpConvertFToU %6 %12\n"
2260 		"OpStore %8 %13\n"
2261 		"OpReturn\n"
2262 		"OpFunctionEnd\n";
2263 }
2264 
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2265 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
2266 {
2267 	/*
2268 		"#extension GL_KHR_shader_subgroup_basic: enable\n"
2269 		"#extension GL_EXT_tessellation_shader : require\n"
2270 		"layout(vertices = 2) out;\n"
2271 		"void main (void)\n"
2272 		"{\n"
2273 		"  if (gl_InvocationID == 0)\n"
2274 		"  {\n"
2275 		"    gl_TessLevelOuter[0] = 1.0f;\n"
2276 		"    gl_TessLevelOuter[1] = 1.0f;\n"
2277 		"  }\n"
2278 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2279 		"}\n";
2280 	*/
2281 	programCollection.spirvAsmSources.add("tesc") <<
2282 		"; SPIR-V\n"
2283 		"; Version: 1.3\n"
2284 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2285 		"; Bound: 46\n"
2286 		"; Schema: 0\n"
2287 		"OpCapability Tessellation\n"
2288 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2289 		"OpMemoryModel Logical GLSL450\n"
2290 		"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2291 		"OpExecutionMode %4 OutputVertices 2\n"
2292 		"OpDecorate %8 BuiltIn InvocationId\n"
2293 		"OpDecorate %20 Patch\n"
2294 		"OpDecorate %20 BuiltIn TessLevelOuter\n"
2295 		"OpMemberDecorate %29 0 BuiltIn Position\n"
2296 		"OpMemberDecorate %29 1 BuiltIn PointSize\n"
2297 		"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2298 		"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2299 		"OpDecorate %29 Block\n"
2300 		"OpMemberDecorate %35 0 BuiltIn Position\n"
2301 		"OpMemberDecorate %35 1 BuiltIn PointSize\n"
2302 		"OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2303 		"OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2304 		"OpDecorate %35 Block\n"
2305 		"%2 = OpTypeVoid\n"
2306 		"%3 = OpTypeFunction %2\n"
2307 		"%6 = OpTypeInt 32 1\n"
2308 		"%7 = OpTypePointer Input %6\n"
2309 		"%8 = OpVariable %7 Input\n"
2310 		"%10 = OpConstant %6 0\n"
2311 		"%11 = OpTypeBool\n"
2312 		"%15 = OpTypeFloat 32\n"
2313 		"%16 = OpTypeInt 32 0\n"
2314 		"%17 = OpConstant %16 4\n"
2315 		"%18 = OpTypeArray %15 %17\n"
2316 		"%19 = OpTypePointer Output %18\n"
2317 		"%20 = OpVariable %19 Output\n"
2318 		"%21 = OpConstant %15 1\n"
2319 		"%22 = OpTypePointer Output %15\n"
2320 		"%24 = OpConstant %6 1\n"
2321 		"%26 = OpTypeVector %15 4\n"
2322 		"%27 = OpConstant %16 1\n"
2323 		"%28 = OpTypeArray %15 %27\n"
2324 		"%29 = OpTypeStruct %26 %15 %28 %28\n"
2325 		"%30 = OpConstant %16 2\n"
2326 		"%31 = OpTypeArray %29 %30\n"
2327 		"%32 = OpTypePointer Output %31\n"
2328 		"%33 = OpVariable %32 Output\n"
2329 		"%35 = OpTypeStruct %26 %15 %28 %28\n"
2330 		"%36 = OpConstant %16 32\n"
2331 		"%37 = OpTypeArray %35 %36\n"
2332 		"%38 = OpTypePointer Input %37\n"
2333 		"%39 = OpVariable %38 Input\n"
2334 		"%41 = OpTypePointer Input %26\n"
2335 		"%44 = OpTypePointer Output %26\n"
2336 		"%4 = OpFunction %2 None %3\n"
2337 		"%5 = OpLabel\n"
2338 		"%9 = OpLoad %6 %8\n"
2339 		"%12 = OpIEqual %11 %9 %10\n"
2340 		"OpSelectionMerge %14 None\n"
2341 		"OpBranchConditional %12 %13 %14\n"
2342 		"%13 = OpLabel\n"
2343 		"%23 = OpAccessChain %22 %20 %10\n"
2344 		"OpStore %23 %21\n"
2345 		"%25 = OpAccessChain %22 %20 %24\n"
2346 		"OpStore %25 %21\n"
2347 		"OpBranch %14\n"
2348 		"%14 = OpLabel\n"
2349 		"%34 = OpLoad %6 %8\n"
2350 		"%40 = OpLoad %6 %8\n"
2351 		"%42 = OpAccessChain %41 %39 %40 %10\n"
2352 		"%43 = OpLoad %26 %42\n"
2353 		"%45 = OpAccessChain %44 %33 %34 %10\n"
2354 		"OpStore %45 %43\n"
2355 		"OpReturn\n"
2356 		"OpFunctionEnd\n";
2357 }
2358 
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2359 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
2360 {
2361 	/*
2362 		"#extension GL_KHR_shader_subgroup_ballot: enable\n"
2363 		"#extension GL_EXT_tessellation_shader : require\n"
2364 		"layout(isolines, equal_spacing, ccw ) in;\n"
2365 		"layout(location = 0) in float in_color[];\n"
2366 		"layout(location = 0) out float out_color;\n"
2367 		"\n"
2368 		"void main (void)\n"
2369 		"{\n"
2370 		"  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2371 		"  out_color = in_color[0];\n"
2372 		"}\n";
2373 	*/
2374 	programCollection.spirvAsmSources.add("tese") <<
2375 		"; SPIR-V\n"
2376 		"; Version: 1.3\n"
2377 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2378 		"; Bound: 45\n"
2379 		"; Schema: 0\n"
2380 		"OpCapability Tessellation\n"
2381 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2382 		"OpMemoryModel Logical GLSL450\n"
2383 		"OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2384 		"OpExecutionMode %4 Isolines\n"
2385 		"OpExecutionMode %4 SpacingEqual\n"
2386 		"OpExecutionMode %4 VertexOrderCcw\n"
2387 		"OpMemberDecorate %11 0 BuiltIn Position\n"
2388 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
2389 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2390 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2391 		"OpDecorate %11 Block\n"
2392 		"OpMemberDecorate %16 0 BuiltIn Position\n"
2393 		"OpMemberDecorate %16 1 BuiltIn PointSize\n"
2394 		"OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2395 		"OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2396 		"OpDecorate %16 Block\n"
2397 		"OpDecorate %29 BuiltIn TessCoord\n"
2398 		"OpDecorate %39 Location 0\n"
2399 		"OpDecorate %42 Location 0\n"
2400 		"%2 = OpTypeVoid\n"
2401 		"%3 = OpTypeFunction %2\n"
2402 		"%6 = OpTypeFloat 32\n"
2403 		"%7 = OpTypeVector %6 4\n"
2404 		"%8 = OpTypeInt 32 0\n"
2405 		"%9 = OpConstant %8 1\n"
2406 		"%10 = OpTypeArray %6 %9\n"
2407 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
2408 		"%12 = OpTypePointer Output %11\n"
2409 		"%13 = OpVariable %12 Output\n"
2410 		"%14 = OpTypeInt 32 1\n"
2411 		"%15 = OpConstant %14 0\n"
2412 		"%16 = OpTypeStruct %7 %6 %10 %10\n"
2413 		"%17 = OpConstant %8 32\n"
2414 		"%18 = OpTypeArray %16 %17\n"
2415 		"%19 = OpTypePointer Input %18\n"
2416 		"%20 = OpVariable %19 Input\n"
2417 		"%21 = OpTypePointer Input %7\n"
2418 		"%24 = OpConstant %14 1\n"
2419 		"%27 = OpTypeVector %6 3\n"
2420 		"%28 = OpTypePointer Input %27\n"
2421 		"%29 = OpVariable %28 Input\n"
2422 		"%30 = OpConstant %8 0\n"
2423 		"%31 = OpTypePointer Input %6\n"
2424 		"%36 = OpTypePointer Output %7\n"
2425 		"%38 = OpTypePointer Output %6\n"
2426 		"%39 = OpVariable %38 Output\n"
2427 		"%40 = OpTypeArray %6 %17\n"
2428 		"%41 = OpTypePointer Input %40\n"
2429 		"%42 = OpVariable %41 Input\n"
2430 		"%4 = OpFunction %2 None %3\n"
2431 		"%5 = OpLabel\n"
2432 		"%22 = OpAccessChain %21 %20 %15 %15\n"
2433 		"%23 = OpLoad %7 %22\n"
2434 		"%25 = OpAccessChain %21 %20 %24 %15\n"
2435 		"%26 = OpLoad %7 %25\n"
2436 		"%32 = OpAccessChain %31 %29 %30\n"
2437 		"%33 = OpLoad %6 %32\n"
2438 		"%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2439 		"%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2440 		"%37 = OpAccessChain %36 %13 %15\n"
2441 		"OpStore %37 %35\n"
2442 		"%43 = OpAccessChain %31 %42 %15\n"
2443 		"%44 = OpLoad %6 %43\n"
2444 		"OpStore %39 %44\n"
2445 		"OpReturn\n"
2446 		"OpFunctionEnd\n";
2447 }
2448 
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2449 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options,  vk::GlslSourceCollection& collection)
2450 {
2451 	tcu::StringTemplate geometryTemplate(glslTemplate);
2452 
2453 	map<string, string>		linesParams;
2454 	linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2455 
2456 	map<string, string>		pointsParams;
2457 	pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2458 
2459 	collection.add("geometry_lines")	<< glu::GeometrySource(geometryTemplate.specialize(linesParams))	<< options;
2460 	collection.add("geometry_points")	<< glu::GeometrySource(geometryTemplate.specialize(pointsParams))	<< options;
2461 }
2462 
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2463 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
2464 {
2465 	tcu::StringTemplate geometryTemplate(spirvTemplate);
2466 
2467 	map<string, string>		linesParams;
2468 	linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2469 
2470 	map<string, string>		pointsParams;
2471 	pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2472 
2473 	collection.add("geometry_lines")	<< geometryTemplate.specialize(linesParams)		<< options;
2474 	collection.add("geometry_points")	<< geometryTemplate.specialize(pointsParams)	<< options;
2475 }
2476 
initializeMemory(Context & context,const Allocation & alloc,const subgroups::SSBOData & data)2477 void initializeMemory (Context& context, const Allocation& alloc, const subgroups::SSBOData& data)
2478 {
2479 	const vk::VkFormat format = data.format;
2480 	const vk::VkDeviceSize size = data.numElements *
2481 		(data.isImage() ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2482 	if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2483 	{
2484 		de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2485 
2486 		switch (format)
2487 		{
2488 			default:
2489 				DE_FATAL("Illegal buffer format");
2490 				break;
2491 			case VK_FORMAT_R8_SINT:
2492 			case VK_FORMAT_R8G8_SINT:
2493 			case VK_FORMAT_R8G8B8_SINT:
2494 			case VK_FORMAT_R8G8B8A8_SINT:
2495 			case VK_FORMAT_R8_UINT:
2496 			case VK_FORMAT_R8G8_UINT:
2497 			case VK_FORMAT_R8G8B8_UINT:
2498 			case VK_FORMAT_R8G8B8A8_UINT:
2499 			{
2500 				deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2501 
2502 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2503 				{
2504 					ptr[k] = rnd.getUint8();
2505 				}
2506 			}
2507 			break;
2508 			case VK_FORMAT_R16_SINT:
2509 			case VK_FORMAT_R16G16_SINT:
2510 			case VK_FORMAT_R16G16B16_SINT:
2511 			case VK_FORMAT_R16G16B16A16_SINT:
2512 			case VK_FORMAT_R16_UINT:
2513 			case VK_FORMAT_R16G16_UINT:
2514 			case VK_FORMAT_R16G16B16_UINT:
2515 			case VK_FORMAT_R16G16B16A16_UINT:
2516 			{
2517 				deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2518 
2519 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2520 				{
2521 					ptr[k] = rnd.getUint16();
2522 				}
2523 			}
2524 			break;
2525 			case VK_FORMAT_R8_USCALED:
2526 			case VK_FORMAT_R8G8_USCALED:
2527 			case VK_FORMAT_R8G8B8_USCALED:
2528 			case VK_FORMAT_R8G8B8A8_USCALED:
2529 			{
2530 				deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2531 
2532 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2533 				{
2534 					deUint32 r = rnd.getUint32();
2535 					ptr[k] = (r & 1) ? r : 0;
2536 				}
2537 			}
2538 			break;
2539 			case VK_FORMAT_R32_SINT:
2540 			case VK_FORMAT_R32G32_SINT:
2541 			case VK_FORMAT_R32G32B32_SINT:
2542 			case VK_FORMAT_R32G32B32A32_SINT:
2543 			case VK_FORMAT_R32_UINT:
2544 			case VK_FORMAT_R32G32_UINT:
2545 			case VK_FORMAT_R32G32B32_UINT:
2546 			case VK_FORMAT_R32G32B32A32_UINT:
2547 			{
2548 				deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2549 
2550 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2551 				{
2552 					ptr[k] = rnd.getUint32();
2553 				}
2554 			}
2555 			break;
2556 			case VK_FORMAT_R64_SINT:
2557 			case VK_FORMAT_R64G64_SINT:
2558 			case VK_FORMAT_R64G64B64_SINT:
2559 			case VK_FORMAT_R64G64B64A64_SINT:
2560 			case VK_FORMAT_R64_UINT:
2561 			case VK_FORMAT_R64G64_UINT:
2562 			case VK_FORMAT_R64G64B64_UINT:
2563 			case VK_FORMAT_R64G64B64A64_UINT:
2564 			{
2565 				deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2566 
2567 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2568 				{
2569 					ptr[k] = rnd.getUint64();
2570 				}
2571 			}
2572 			break;
2573 			case VK_FORMAT_R16_SFLOAT:
2574 			case VK_FORMAT_R16G16_SFLOAT:
2575 			case VK_FORMAT_R16G16B16_SFLOAT:
2576 			case VK_FORMAT_R16G16B16A16_SFLOAT:
2577 			{
2578 				deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2579 
2580 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2581 				{
2582 					ptr[k] = deFloat32To16(rnd.getFloat());
2583 				}
2584 			}
2585 			break;
2586 			case VK_FORMAT_R32_SFLOAT:
2587 			case VK_FORMAT_R32G32_SFLOAT:
2588 			case VK_FORMAT_R32G32B32_SFLOAT:
2589 			case VK_FORMAT_R32G32B32A32_SFLOAT:
2590 			{
2591 				float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2592 
2593 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2594 				{
2595 					ptr[k] = rnd.getFloat();
2596 				}
2597 			}
2598 			break;
2599 			case VK_FORMAT_R64_SFLOAT:
2600 			case VK_FORMAT_R64G64_SFLOAT:
2601 			case VK_FORMAT_R64G64B64_SFLOAT:
2602 			case VK_FORMAT_R64G64B64A64_SFLOAT:
2603 			{
2604 				double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2605 
2606 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2607 				{
2608 					ptr[k] = rnd.getDouble();
2609 				}
2610 			}
2611 			break;
2612 		}
2613 	}
2614 	else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2615 	{
2616 		deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2617 
2618 		for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2619 		{
2620 			ptr[k] = 0;
2621 		}
2622 	}
2623 
2624 	if (subgroups::SSBOData::InitializeNone != data.initializeType)
2625 	{
2626 		flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2627 	}
2628 }
2629 
getResultBinding(const VkShaderStageFlagBits shaderStage)2630 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2631 {
2632 	switch(shaderStage)
2633 	{
2634 		case VK_SHADER_STAGE_VERTEX_BIT:
2635 			return 0u;
2636 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2637 			return 1u;
2638 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2639 			return 2u;
2640 		case VK_SHADER_STAGE_GEOMETRY_BIT:
2641 			return 3u;
2642 		default:
2643 			DE_ASSERT(0);
2644 			return -1;
2645 	}
2646 	DE_ASSERT(0);
2647 	return -1;
2648 }
2649 
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage)2650 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (Context&					context,
2651 																		   VkFormat					format,
2652 																		   const SSBOData*			extraData,
2653 																		   deUint32					extraDataCount,
2654 																		   const void*				internalData,
2655 																		   subgroups::CheckResult	checkResult,
2656 																		   const VkShaderStageFlags	shaderStage)
2657 {
2658 	return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2659 }
2660 
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage,const deUint32 tessShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2661 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize (Context&					context,
2662 																							   VkFormat					format,
2663 																							   const SSBOData*			extraData,
2664 																							   deUint32					extraDataCount,
2665 																							   const void*				internalData,
2666 																							   subgroups::CheckResult	checkResult,
2667 																							   const VkShaderStageFlags	shaderStage,
2668 																							   const deUint32			tessShaderStageCreateFlags,
2669 																							   const deUint32			requiredSubgroupSize)
2670 {
2671 	const DeviceInterface&					vk						= context.getDeviceInterface();
2672 	const VkDevice							device					= context.getDevice();
2673 	const deUint32							maxWidth				= getMaxWidth();
2674 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
2675 	DescriptorSetLayoutBuilder				layoutBuilder;
2676 	DescriptorPoolBuilder					poolBuilder;
2677 	DescriptorSetUpdateBuilder				updateBuilder;
2678 	Move <VkDescriptorPool>					descriptorPool;
2679 	Move <VkDescriptorSet>					descriptorSet;
2680 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2681 	const Unique<VkShaderModule>			teCtrlShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("tesc"), 0u));
2682 	const Unique<VkShaderModule>			teEvalShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("tese"), 0u));
2683 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2684 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
2685 	const VkVertexInputBindingDescription	vertexInputBinding		=
2686 	{
2687 		0u,											//  deUint32			binding;
2688 		static_cast<deUint32>(sizeof(tcu::Vec4)),	//  deUint32			stride;
2689 		VK_VERTEX_INPUT_RATE_VERTEX					//  VkVertexInputRate	inputRate;
2690 	};
2691 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
2692 	{
2693 		0u,									//  deUint32	location;
2694 		0u,									//  deUint32	binding;
2695 		VK_FORMAT_R32G32B32A32_SFLOAT,		//  VkFormat	format;
2696 		0u									//  deUint32	offset;
2697 	};
2698 
2699 	for (deUint32 i = 0u; i < extraDataCount; i++)
2700 	{
2701 		if (extraData[i].isImage())
2702 		{
2703 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2704 		}
2705 		else
2706 		{
2707 			DE_ASSERT(extraData[i].isUBO());
2708 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2709 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2710 		}
2711 		const Allocation& alloc = inputBuffers[i]->getAllocation();
2712 		initializeMemory(context, alloc, extraData[i]);
2713 	}
2714 
2715 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2716 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2717 
2718 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
2719 
2720 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
2721 
2722 	const deUint32 requiredSubgroupSizes[5] = {0u,
2723 											   ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2724 											   ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u),
2725 											   0u,
2726 											   0u};
2727 
2728 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
2729 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2730 																						  VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2731 																						  *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2732 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2733 																						  0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2734 																						  ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u),
2735 																						  0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2736 
2737 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2738 		poolBuilder.addType(inputBuffers[ndx]->getType());
2739 
2740 	if (extraDataCount > 0)
2741 	{
2742 		descriptorPool = poolBuilder.build(vk, device,
2743 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2744 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2745 	}
2746 
2747 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2748 	{
2749 		if (inputBuffers[buffersNdx]->isImage())
2750 		{
2751 			VkDescriptorImageInfo info =
2752 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2753 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2754 
2755 			updateBuilder.writeSingle(*descriptorSet,
2756 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2757 										inputBuffers[buffersNdx]->getType(), &info);
2758 		}
2759 		else
2760 		{
2761 			VkDescriptorBufferInfo info =
2762 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2763 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2764 
2765 			updateBuilder.writeSingle(*descriptorSet,
2766 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2767 										inputBuffers[buffersNdx]->getType(), &info);
2768 		}
2769 	}
2770 
2771 	updateBuilder.update(vk, device);
2772 
2773 	const VkQueue							queue					= context.getUniversalQueue();
2774 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
2775 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
2776 	const deUint32							subgroupSize			= getSubgroupSize(context);
2777 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
2778 	const vk::VkDeviceSize					vertexBufferSize		= 2ull * maxWidth * sizeof(tcu::Vec4);
2779 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2780 	unsigned								totalIterations			= 0u;
2781 	unsigned								failedIterations		= 0u;
2782 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2783 
2784 	{
2785 		const Allocation&		alloc				= vertexBuffer.getAllocation();
2786 		std::vector<tcu::Vec4>	data				(2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2787 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
2788 		float					leftHandPosition	= -1.0f;
2789 
2790 		for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2791 		{
2792 			data[ndx][0] = leftHandPosition;
2793 			leftHandPosition += pixelSize;
2794 			data[ndx+1][0] = leftHandPosition;
2795 		}
2796 
2797 		deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2798 		flushAlloc(vk, device, alloc);
2799 	}
2800 
2801 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2802 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
2803 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
2804 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2805 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2806 	const VkDeviceSize			vertexBufferOffset	= 0u;
2807 
2808 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2809 	{
2810 		totalIterations++;
2811 
2812 		beginCommandBuffer(vk, *cmdBuffer);
2813 		{
2814 
2815 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2816 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2817 
2818 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2819 
2820 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2821 
2822 			if (extraDataCount > 0)
2823 			{
2824 				vk.cmdBindDescriptorSets(*cmdBuffer,
2825 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2826 					&descriptorSet.get(), 0u, DE_NULL);
2827 			}
2828 
2829 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2830 			vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2831 
2832 			endRenderPass(vk, *cmdBuffer);
2833 
2834 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2835 			endCommandBuffer(vk, *cmdBuffer);
2836 
2837 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2838 		}
2839 		context.resetCommandPoolForVKSC(device, *cmdPool);
2840 
2841 		{
2842 			const Allocation& allocResult = imageBufferResult.getAllocation();
2843 			invalidateAlloc(vk, device, allocResult);
2844 
2845 			std::vector<const void*> datas;
2846 			datas.push_back(allocResult.getHostPtr());
2847 			if (!checkResult(internalData, datas, width/2u, subgroupSize))
2848 				failedIterations++;
2849 		}
2850 	}
2851 
2852 	if (0 < failedIterations)
2853 	{
2854 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2855 
2856 		context.getTestContext().getLog()
2857 				<< TestLog::Message << valuesPassed << " / "
2858 				<< totalIterations << " values passed" << TestLog::EndMessage;
2859 		return tcu::TestStatus::fail("Failed!");
2860 	}
2861 
2862 	return tcu::TestStatus::pass("OK");
2863 }
2864 
check(std::vector<const void * > datas,deUint32 width,deUint32 ref)2865 bool vkt::subgroups::check (std::vector<const void*> datas, deUint32 width, deUint32 ref)
2866 {
2867 	const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2868 
2869 	for (deUint32 n = 0; n < width; ++n)
2870 	{
2871 		if (data[n] != ref)
2872 		{
2873 			return false;
2874 		}
2875 	}
2876 
2877 	return true;
2878 }
2879 
checkComputeOrMesh(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 ref)2880 bool vkt::subgroups::checkComputeOrMesh (std::vector<const void*>	datas,
2881 										 const deUint32				numWorkgroups[3],
2882 										 const deUint32				localSize[3],
2883 										 deUint32					ref)
2884 {
2885 	const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2886 	const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2887 	const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2888 
2889 	return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2890 }
2891 
makeGeometryFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)2892 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest (Context&				context,
2893 															 VkFormat				format,
2894 															 const SSBOData*		extraData,
2895 															 deUint32				extraDataCount,
2896 															 const void*			internalData,
2897 															 subgroups::CheckResult	checkResult)
2898 {
2899 	return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
2900 }
2901 
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 geometryShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2902 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize (Context&					context,
2903 																				 VkFormat					format,
2904 																				 const SSBOData*			extraData,
2905 																				 deUint32					extraDataCount,
2906 																				 const void*				internalData,
2907 																				 subgroups::CheckResult		checkResult,
2908 																				 const deUint32				geometryShaderStageCreateFlags,
2909 																				 const deUint32				requiredSubgroupSize)
2910 {
2911 	const DeviceInterface&					vk						= context.getDeviceInterface();
2912 	const VkDevice							device					= context.getDevice();
2913 	const deUint32							maxWidth				= getMaxWidth();
2914 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
2915 	DescriptorSetLayoutBuilder				layoutBuilder;
2916 	DescriptorPoolBuilder					poolBuilder;
2917 	DescriptorSetUpdateBuilder				updateBuilder;
2918 	Move <VkDescriptorPool>					descriptorPool;
2919 	Move <VkDescriptorSet>					descriptorSet;
2920 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2921 	const Unique<VkShaderModule>			geometryShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2922 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2923 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
2924 	const VkVertexInputBindingDescription	vertexInputBinding		=
2925 	{
2926 		0u,											//  deUint32			binding;
2927 		static_cast<deUint32>(sizeof(tcu::Vec4)),	//  deUint32			stride;
2928 		VK_VERTEX_INPUT_RATE_VERTEX					//  VkVertexInputRate	inputRate;
2929 	};
2930 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
2931 	{
2932 		0u,									//  deUint32	location;
2933 		0u,									//  deUint32	binding;
2934 		VK_FORMAT_R32G32B32A32_SFLOAT,		//  VkFormat	format;
2935 		0u									//  deUint32	offset;
2936 	};
2937 
2938 	for (deUint32 i = 0u; i < extraDataCount; i++)
2939 	{
2940 		if (extraData[i].isImage())
2941 		{
2942 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2943 		}
2944 		else
2945 		{
2946 			DE_ASSERT(extraData[i].isUBO());
2947 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2948 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2949 		}
2950 		const Allocation& alloc = inputBuffers[i]->getAllocation();
2951 		initializeMemory(context, alloc, extraData[i]);
2952 	}
2953 
2954 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2955 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2956 
2957 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
2958 
2959 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
2960 
2961 	const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2962 
2963 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
2964 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2965 																						  *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2966 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2967 																						  0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2968 																						  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2969 
2970 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2971 		poolBuilder.addType(inputBuffers[ndx]->getType());
2972 
2973 	if (extraDataCount > 0)
2974 	{
2975 		descriptorPool = poolBuilder.build(vk, device,
2976 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2977 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2978 	}
2979 
2980 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2981 	{
2982 		if (inputBuffers[buffersNdx]->isImage())
2983 		{
2984 			VkDescriptorImageInfo info =
2985 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2986 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2987 
2988 			updateBuilder.writeSingle(*descriptorSet,
2989 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2990 										inputBuffers[buffersNdx]->getType(), &info);
2991 		}
2992 		else
2993 		{
2994 			VkDescriptorBufferInfo info =
2995 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2996 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2997 
2998 			updateBuilder.writeSingle(*descriptorSet,
2999 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3000 										inputBuffers[buffersNdx]->getType(), &info);
3001 		}
3002 	}
3003 
3004 	updateBuilder.update(vk, device);
3005 
3006 	const VkQueue							queue					= context.getUniversalQueue();
3007 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3008 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
3009 	const deUint32							subgroupSize			= getSubgroupSize(context);
3010 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
3011 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
3012 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3013 	unsigned								totalIterations			= 0u;
3014 	unsigned								failedIterations		= 0u;
3015 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3016 
3017 	{
3018 		const Allocation&		alloc				= vertexBuffer.getAllocation();
3019 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3020 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
3021 		float					leftHandPosition	= -1.0f;
3022 
3023 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3024 		{
3025 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3026 			leftHandPosition += pixelSize;
3027 		}
3028 
3029 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3030 		flushAlloc(vk, device, alloc);
3031 	}
3032 
3033 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3034 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
3035 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
3036 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3037 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3038 	const VkDeviceSize			vertexBufferOffset	= 0u;
3039 
3040 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3041 	{
3042 		totalIterations++;
3043 
3044 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3045 		{
3046 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3047 			initializeMemory(context, alloc, extraData[ndx]);
3048 		}
3049 
3050 		beginCommandBuffer(vk, *cmdBuffer);
3051 		{
3052 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3053 
3054 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3055 
3056 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3057 
3058 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3059 
3060 			if (extraDataCount > 0)
3061 			{
3062 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3063 					&descriptorSet.get(), 0u, DE_NULL);
3064 			}
3065 
3066 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3067 
3068 			vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3069 
3070 			endRenderPass(vk, *cmdBuffer);
3071 
3072 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3073 
3074 			endCommandBuffer(vk, *cmdBuffer);
3075 
3076 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3077 		}
3078 		context.resetCommandPoolForVKSC(device, *cmdPool);
3079 
3080 		{
3081 			const Allocation& allocResult = imageBufferResult.getAllocation();
3082 			invalidateAlloc(vk, device, allocResult);
3083 
3084 			std::vector<const void*> datas;
3085 			datas.push_back(allocResult.getHostPtr());
3086 			if (!checkResult(internalData, datas, width, subgroupSize))
3087 				failedIterations++;
3088 		}
3089 	}
3090 
3091 	if (0 < failedIterations)
3092 	{
3093 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3094 
3095 		context.getTestContext().getLog()
3096 				<< TestLog::Message << valuesPassed << " / "
3097 				<< totalIterations << " values passed" << TestLog::EndMessage;
3098 
3099 		return tcu::TestStatus::fail("Failed!");
3100 	}
3101 
3102 	return tcu::TestStatus::pass("OK");
3103 }
3104 
getPossibleGraphicsSubgroupStages(Context & context,const vk::VkShaderStageFlags testedStages)3105 vk::VkShaderStageFlags vkt::subgroups::getPossibleGraphicsSubgroupStages (Context& context, const vk::VkShaderStageFlags testedStages)
3106 {
3107 	const VkPhysicalDeviceSubgroupProperties&	subgroupProperties	= context.getSubgroupProperties();
3108 	VkShaderStageFlags							stages				= testedStages & subgroupProperties.supportedStages;
3109 
3110 	DE_ASSERT(isAllGraphicsStages(testedStages));
3111 
3112 	if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
3113 	{
3114 		if ((stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
3115 			TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
3116 		else
3117 			stages = VK_SHADER_STAGE_FRAGMENT_BIT;
3118 	}
3119 
3120 	if (static_cast<VkShaderStageFlags>(0u) == stages)
3121 		TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
3122 
3123 	return stages;
3124 }
3125 
allStages(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3126 tcu::TestStatus vkt::subgroups::allStages (Context&						context,
3127 										   vk::VkFormat					format,
3128 										   const SSBOData*				extraData,
3129 										   deUint32						extraDataCount,
3130 										   const void*					internalData,
3131 										   const VerificationFunctor&	checkResult,
3132 										   const vk::VkShaderStageFlags	shaderStage)
3133 {
3134 	return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage,
3135 														 0u, 0u, 0u, 0u, 0u, DE_NULL);
3136 }
3137 
allStagesRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStageTested,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])3138 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize (Context&						context,
3139 															   vk::VkFormat					format,
3140 															   const SSBOData*				extraDatas,
3141 															   deUint32						extraDatasCount,
3142 															   const void*					internalData,
3143 															   const VerificationFunctor&	checkResult,
3144 															   const vk::VkShaderStageFlags	shaderStageTested,
3145 															   const deUint32				vertexShaderStageCreateFlags,
3146 															   const deUint32				tessellationControlShaderStageCreateFlags,
3147 															   const deUint32				tessellationEvalShaderStageCreateFlags,
3148 															   const deUint32				geometryShaderStageCreateFlags,
3149 															   const deUint32				fragmentShaderStageCreateFlags,
3150 															   const deUint32				requiredSubgroupSize[5])
3151 {
3152 	const DeviceInterface&			vk					= context.getDeviceInterface();
3153 	const VkDevice					device				= context.getDevice();
3154 	const deUint32					maxWidth			= getMaxWidth();
3155 	vector<VkShaderStageFlagBits>	stagesVector;
3156 	VkShaderStageFlags				shaderStageRequired	= (VkShaderStageFlags)0ull;
3157 
3158 	Move<VkShaderModule>			vertexShaderModule;
3159 	Move<VkShaderModule>			teCtrlShaderModule;
3160 	Move<VkShaderModule>			teEvalShaderModule;
3161 	Move<VkShaderModule>			geometryShaderModule;
3162 	Move<VkShaderModule>			fragmentShaderModule;
3163 
3164 	if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3165 	{
3166 		stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3167 	}
3168 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3169 	{
3170 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3171 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3172 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3173 	}
3174 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3175 	{
3176 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3177 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3178 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3179 	}
3180 	if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3181 	{
3182 		stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3183 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3184 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3185 	}
3186 	if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3187 	{
3188 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3189 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3190 	}
3191 
3192 	const deUint32	stagesCount	= static_cast<deUint32>(stagesVector.size());
3193 	const string	vert		= (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT)					? "vert_noSubgroup"		: "vert";
3194 	const string	tesc		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)		? "tesc_noSubgroup"		: "tesc";
3195 	const string	tese		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)	? "tese_noSubgroup"		: "tese";
3196 
3197 	shaderStageRequired = shaderStageTested | shaderStageRequired;
3198 
3199 	vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3200 	if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3201 	{
3202 		teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3203 		teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3204 	}
3205 	if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3206 	{
3207 		if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3208 		{
3209 			// tessellation shaders output line primitives
3210 			geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3211 		}
3212 		else
3213 		{
3214 			// otherwise points are processed by geometry shader
3215 			geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3216 		}
3217 	}
3218 	if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3219 		fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3220 
3221 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
3222 
3223 	DescriptorSetLayoutBuilder layoutBuilder;
3224 
3225 	// The implicit result SSBO we use to store our outputs from the shader
3226 	for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3227 	{
3228 		const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3229 		const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3230 		inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
3231 
3232 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
3233 	}
3234 
3235 	for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3236 	{
3237 		const deUint32 datasNdx = ndx - stagesCount;
3238 		if (extraDatas[datasNdx].isImage())
3239 		{
3240 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3241 		}
3242 		else
3243 		{
3244 			const auto usage	= (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3245 			const auto size		= getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
3246 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
3247 		}
3248 
3249 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3250 		initializeMemory(context, alloc, extraDatas[datasNdx]);
3251 
3252 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
3253 								extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
3254 	}
3255 
3256 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3257 
3258 	const Unique<VkPipelineLayout> pipelineLayout(
3259 		makePipelineLayout(vk, device, *descriptorSetLayout));
3260 
3261 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3262 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3263 														   shaderStageRequired,
3264 														   *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
3265 														   *renderPass,
3266 														   (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3267 														   DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT,
3268 														   vertexShaderStageCreateFlags, tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3269 														   geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3270 
3271 	Move <VkDescriptorPool>	descriptorPool;
3272 	Move <VkDescriptorSet>	descriptorSet;
3273 
3274 	if (inputBuffers.size() > 0)
3275 	{
3276 		DescriptorPoolBuilder poolBuilder;
3277 
3278 		for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
3279 		{
3280 			poolBuilder.addType(inputBuffers[ndx]->getType());
3281 		}
3282 
3283 		descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3284 
3285 		// Create descriptor set
3286 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3287 
3288 		DescriptorSetUpdateBuilder updateBuilder;
3289 
3290 		for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3291 		{
3292 			deUint32 binding;
3293 			if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
3294 			else binding = extraDatas[ndx -stagesCount].binding;
3295 
3296 			if (inputBuffers[ndx]->isImage())
3297 			{
3298 				VkDescriptorImageInfo info =
3299 					makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3300 											inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3301 
3302 				updateBuilder.writeSingle(	*descriptorSet,
3303 											DescriptorSetUpdateBuilder::Location::binding(binding),
3304 											inputBuffers[ndx]->getType(), &info);
3305 			}
3306 			else
3307 			{
3308 				VkDescriptorBufferInfo info =
3309 					makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
3310 							0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3311 
3312 				updateBuilder.writeSingle(	*descriptorSet,
3313 													DescriptorSetUpdateBuilder::Location::binding(binding),
3314 													inputBuffers[ndx]->getType(), &info);
3315 			}
3316 		}
3317 
3318 		updateBuilder.update(vk, device);
3319 	}
3320 
3321 	{
3322 		const VkQueue					queue					= context.getUniversalQueue();
3323 		const deUint32					queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3324 		const Unique<VkCommandPool>		cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
3325 		const deUint32					subgroupSize			= getSubgroupSize(context);
3326 		const Unique<VkCommandBuffer>	cmdBuffer				(makeCommandBuffer(context, *cmdPool));
3327 		unsigned						totalIterations			= 0u;
3328 		unsigned						failedIterations		= 0u;
3329 		Image							resultImage				(context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3330 		const Unique<VkFramebuffer>		framebuffer				(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3331 		const VkViewport				viewport				= makeViewport(maxWidth, 1u);
3332 		const VkRect2D					scissor					= makeRect2D(maxWidth, 1u);
3333 		const vk::VkDeviceSize			imageResultSize			= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3334 		Buffer							imageBufferResult		(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3335 		const VkImageSubresourceRange	subresourceRange		=
3336 		{
3337 			VK_IMAGE_ASPECT_COLOR_BIT,											//VkImageAspectFlags	aspectMask
3338 			0u,																	//deUint32				baseMipLevel
3339 			1u,																	//deUint32				levelCount
3340 			0u,																	//deUint32				baseArrayLayer
3341 			1u																	//deUint32				layerCount
3342 		};
3343 
3344 		const VkImageMemoryBarrier		colorAttachmentBarrier	= makeImageMemoryBarrier(
3345 			(VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3346 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
3347 			resultImage.getImage(), subresourceRange);
3348 
3349 		for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3350 		{
3351 			for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3352 			{
3353 				// re-init the data
3354 				const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3355 				initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3356 			}
3357 
3358 			totalIterations++;
3359 
3360 			beginCommandBuffer(vk, *cmdBuffer);
3361 
3362 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
3363 
3364 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3365 
3366 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3367 
3368 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3369 
3370 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3371 
3372 			if (stagesCount + extraDatasCount > 0)
3373 				vk.cmdBindDescriptorSets(*cmdBuffer,
3374 						VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3375 						&descriptorSet.get(), 0u, DE_NULL);
3376 
3377 			vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3378 
3379 			endRenderPass(vk, *cmdBuffer);
3380 
3381 			copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3382 
3383 			endCommandBuffer(vk, *cmdBuffer);
3384 
3385 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3386 
3387 			for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3388 			{
3389 				std::vector<const void*> datas;
3390 				if (!inputBuffers[ndx]->isImage())
3391 				{
3392 					const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
3393 					invalidateAlloc(vk, device, resultAlloc);
3394 					// we always have our result data first
3395 					datas.push_back(resultAlloc.getHostPtr());
3396 				}
3397 
3398 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3399 				{
3400 					const deUint32 datasNdx = index - stagesCount;
3401 					if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3402 					{
3403 						const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
3404 						invalidateAlloc(vk, device, resultAlloc);
3405 						// we always have our result data first
3406 						datas.push_back(resultAlloc.getHostPtr());
3407 					}
3408 				}
3409 
3410 				// Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3411 				const bool		multiCall	= (	stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT						||
3412 												stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT		||
3413 												stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT	||
3414 												stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT					);
3415 				const deUint32	usedWidth	= ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3416 
3417 				if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3418 					failedIterations++;
3419 			}
3420 			if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3421 			{
3422 				std::vector<const void*> datas;
3423 				const Allocation& resultAlloc = imageBufferResult.getAllocation();
3424 				invalidateAlloc(vk, device, resultAlloc);
3425 
3426 				// we always have our result data first
3427 				datas.push_back(resultAlloc.getHostPtr());
3428 
3429 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3430 				{
3431 					const deUint32 datasNdx = index - stagesCount;
3432 					if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3433 					{
3434 						const Allocation& alloc = inputBuffers[index]->getAllocation();
3435 						invalidateAlloc(vk, device, alloc);
3436 						// we always have our result data first
3437 						datas.push_back(alloc.getHostPtr());
3438 					}
3439 				}
3440 
3441 				if (!checkResult(internalData, datas, width, subgroupSize, false))
3442 					failedIterations++;
3443 			}
3444 
3445 			context.resetCommandPoolForVKSC(device, *cmdPool);
3446 		}
3447 
3448 		if (0 < failedIterations)
3449 		{
3450 			unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3451 
3452 			context.getTestContext().getLog()
3453 				<< TestLog::Message << valuesPassed << " / "
3454 				<< totalIterations << " values passed" << TestLog::EndMessage;
3455 
3456 			return tcu::TestStatus::fail("Failed!");
3457 		}
3458 	}
3459 
3460 	return tcu::TestStatus::pass("OK");
3461 }
3462 
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)3463 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest (Context&					context,
3464 														   vk::VkFormat				format,
3465 														   const SSBOData*			extraData,
3466 														   deUint32					extraDataCount,
3467 														   const void*				internalData,
3468 														   subgroups::CheckResult	checkResult)
3469 {
3470 	return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
3471 }
3472 
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 vertexShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3473 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize (Context&					context,
3474 																			   vk::VkFormat				format,
3475 																			   const SSBOData*			extraData,
3476 																			   deUint32					extraDataCount,
3477 																			   const void*				internalData,
3478 																			   subgroups::CheckResult	checkResult,
3479 																			   const deUint32			vertexShaderStageCreateFlags,
3480 																			   const deUint32			requiredSubgroupSize)
3481 {
3482 	const DeviceInterface&					vk						= context.getDeviceInterface();
3483 	const VkDevice							device					= context.getDevice();
3484 	const VkQueue							queue					= context.getUniversalQueue();
3485 	const deUint32							maxWidth				= getMaxWidth();
3486 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3487 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
3488 	DescriptorSetLayoutBuilder				layoutBuilder;
3489 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3490 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3491 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
3492 	const VkVertexInputBindingDescription	vertexInputBinding		=
3493 	{
3494 		0u,											// binding;
3495 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
3496 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
3497 	};
3498 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
3499 	{
3500 		0u,
3501 		0u,
3502 		VK_FORMAT_R32G32B32A32_SFLOAT,
3503 		0u
3504 	};
3505 
3506 	for (deUint32 i = 0u; i < extraDataCount; i++)
3507 	{
3508 		if (extraData[i].isImage())
3509 		{
3510 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
3511 		}
3512 		else
3513 		{
3514 			DE_ASSERT(extraData[i].isUBO());
3515 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3516 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3517 		}
3518 		const Allocation& alloc = inputBuffers[i]->getAllocation();
3519 		initializeMemory(context, alloc, extraData[i]);
3520 	}
3521 
3522 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3523 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3524 
3525 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
3526 
3527 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
3528 
3529 	const deUint32 requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3530 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
3531 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3532 																						  *vertexShaderModule, *fragmentShaderModule,
3533 																						  DE_NULL, DE_NULL, DE_NULL,
3534 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3535 																						  &vertexInputBinding, &vertexInputAttribute, true, format,
3536 																						  vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3537 																						  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3538 	DescriptorPoolBuilder					poolBuilder;
3539 	DescriptorSetUpdateBuilder				updateBuilder;
3540 
3541 
3542 	for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3543 		poolBuilder.addType(inputBuffers[ndx]->getType());
3544 
3545 	Move <VkDescriptorPool>					descriptorPool;
3546 	Move <VkDescriptorSet>					descriptorSet;
3547 
3548 	if (extraDataCount > 0)
3549 	{
3550 		descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3551 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3552 	}
3553 
3554 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3555 	{
3556 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3557 		initializeMemory(context, alloc, extraData[ndx]);
3558 	}
3559 
3560 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3561 	{
3562 		if (inputBuffers[buffersNdx]->isImage())
3563 		{
3564 			VkDescriptorImageInfo info =
3565 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3566 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3567 
3568 			updateBuilder.writeSingle(*descriptorSet,
3569 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3570 										inputBuffers[buffersNdx]->getType(), &info);
3571 		}
3572 		else
3573 		{
3574 			VkDescriptorBufferInfo info =
3575 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3576 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3577 
3578 			updateBuilder.writeSingle(*descriptorSet,
3579 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3580 										inputBuffers[buffersNdx]->getType(), &info);
3581 		}
3582 	}
3583 	updateBuilder.update(vk, device);
3584 
3585 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
3586 
3587 	const deUint32							subgroupSize			= getSubgroupSize(context);
3588 
3589 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
3590 
3591 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
3592 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3593 
3594 	unsigned								totalIterations			= 0u;
3595 	unsigned								failedIterations		= 0u;
3596 
3597 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3598 
3599 	{
3600 		const Allocation&		alloc				= vertexBuffer.getAllocation();
3601 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3602 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
3603 		float					leftHandPosition	= -1.0f;
3604 
3605 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3606 		{
3607 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3608 			leftHandPosition += pixelSize;
3609 		}
3610 
3611 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3612 		flushAlloc(vk, device, alloc);
3613 	}
3614 
3615 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3616 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
3617 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
3618 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3619 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3620 	const VkDeviceSize			vertexBufferOffset	= 0u;
3621 
3622 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3623 	{
3624 		totalIterations++;
3625 
3626 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3627 		{
3628 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3629 			initializeMemory(context, alloc, extraData[ndx]);
3630 		}
3631 
3632 		beginCommandBuffer(vk, *cmdBuffer);
3633 		{
3634 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3635 
3636 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3637 
3638 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3639 
3640 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3641 
3642 			if (extraDataCount > 0)
3643 			{
3644 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3645 					&descriptorSet.get(), 0u, DE_NULL);
3646 			}
3647 
3648 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3649 
3650 			vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3651 
3652 			endRenderPass(vk, *cmdBuffer);
3653 
3654 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3655 
3656 			endCommandBuffer(vk, *cmdBuffer);
3657 
3658 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3659 		}
3660 		context.resetCommandPoolForVKSC(device, *cmdPool);
3661 
3662 		{
3663 			const Allocation& allocResult = imageBufferResult.getAllocation();
3664 			invalidateAlloc(vk, device, allocResult);
3665 
3666 			std::vector<const void*> datas;
3667 			datas.push_back(allocResult.getHostPtr());
3668 			if (!checkResult(internalData, datas, width, subgroupSize))
3669 				failedIterations++;
3670 		}
3671 	}
3672 
3673 	if (0 < failedIterations)
3674 	{
3675 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3676 
3677 		context.getTestContext().getLog()
3678 			<< TestLog::Message << valuesPassed << " / "
3679 			<< totalIterations << " values passed" << TestLog::EndMessage;
3680 
3681 		return tcu::TestStatus::fail("Failed!");
3682 	}
3683 
3684 	return tcu::TestStatus::pass("OK");
3685 }
3686 
makeFragmentFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult)3687 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest (Context&				context,
3688 															 VkFormat				format,
3689 															 const SSBOData*		extraDatas,
3690 															 deUint32				extraDatasCount,
3691 															 const void*			internalData,
3692 															 CheckResultFragment	checkResult)
3693 {
3694 	return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData, checkResult, 0u, 0u);
3695 }
3696 
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3697 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize (Context&				context,
3698 																				 VkFormat				format,
3699 																				 const SSBOData*		extraDatas,
3700 																				 deUint32				extraDatasCount,
3701 																				 const void*			internalData,
3702 																				 CheckResultFragment	checkResult,
3703 																				 const deUint32			fragmentShaderStageCreateFlags,
3704 																				 const deUint32			requiredSubgroupSize)
3705 {
3706 	const DeviceInterface&						vk						= context.getDeviceInterface();
3707 	const VkDevice								device					= context.getDevice();
3708 	const VkQueue								queue					= context.getUniversalQueue();
3709 	const deUint32								queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3710 	const Unique<VkShaderModule>				vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3711 	const Unique<VkShaderModule>				fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3712 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers			(extraDatasCount);
3713 
3714 	for (deUint32 i = 0; i < extraDatasCount; i++)
3715 	{
3716 		if (extraDatas[i].isImage())
3717 		{
3718 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3719 		}
3720 		else
3721 		{
3722 			DE_ASSERT(extraDatas[i].isUBO());
3723 
3724 			const vk::VkDeviceSize	size	= getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3725 
3726 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3727 		}
3728 
3729 		const Allocation& alloc = inputBuffers[i]->getAllocation();
3730 
3731 		initializeMemory(context, alloc, extraDatas[i]);
3732 	}
3733 
3734 	DescriptorSetLayoutBuilder layoutBuilder;
3735 
3736 	for (deUint32 i = 0; i < extraDatasCount; i++)
3737 	{
3738 		layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3739 	}
3740 
3741 	const Unique<VkDescriptorSetLayout>	descriptorSetLayout(layoutBuilder.build(vk, device));
3742 	const Unique<VkPipelineLayout>		pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3743 	const Unique<VkRenderPass>			renderPass(makeRenderPass(context, format));
3744 	const deUint32						requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3745 	const Unique<VkPipeline>			pipeline(makeGraphicsPipeline(context,
3746 																	  *pipelineLayout,
3747 																	  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3748 																	  *vertexShaderModule,
3749 																	  *fragmentShaderModule,
3750 																	  DE_NULL,
3751 																	  DE_NULL,
3752 																	  DE_NULL,
3753 																	  *renderPass,
3754 																	  VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3755 																	  DE_NULL,
3756 																	  DE_NULL,
3757 																	  true,
3758 																	  VK_FORMAT_R32G32B32A32_SFLOAT,
3759 																	  0u,
3760 																	  0u,
3761 																	  0u,
3762 																	  0u,
3763 																	  fragmentShaderStageCreateFlags,
3764 																	  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3765 	DescriptorPoolBuilder				poolBuilder;
3766 
3767 	// To stop validation complaining, always add at least one type to pool.
3768 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3769 	for (deUint32 i = 0; i < extraDatasCount; i++)
3770 	{
3771 		poolBuilder.addType(inputBuffers[i]->getType());
3772 	}
3773 
3774 	Move<VkDescriptorPool> descriptorPool;
3775 	// Create descriptor set
3776 	Move<VkDescriptorSet> descriptorSet;
3777 
3778 	if (extraDatasCount > 0)
3779 	{
3780 		descriptorPool	= poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3781 
3782 		descriptorSet	= makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3783 	}
3784 
3785 	DescriptorSetUpdateBuilder updateBuilder;
3786 
3787 	for (deUint32 i = 0; i < extraDatasCount; i++)
3788 	{
3789 		if (inputBuffers[i]->isImage())
3790 		{
3791 			const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3792 
3793 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3794 		}
3795 		else
3796 		{
3797 			const VkDescriptorBufferInfo	info	= makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3798 
3799 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3800 		}
3801 	}
3802 
3803 	if (extraDatasCount > 0)
3804 		updateBuilder.update(vk, device);
3805 
3806 	const Unique<VkCommandPool>		cmdPool				(makeCommandPool(vk, device, queueFamilyIndex));
3807 	const deUint32					subgroupSize		= getSubgroupSize(context);
3808 	const Unique<VkCommandBuffer>	cmdBuffer			(makeCommandBuffer(context, *cmdPool));
3809 	unsigned						totalIterations		= 0;
3810 	unsigned						failedIterations	= 0;
3811 
3812 	for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3813 	{
3814 		for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3815 		{
3816 			totalIterations++;
3817 
3818 			// re-init the data
3819 			for (deUint32 i = 0; i < extraDatasCount; i++)
3820 			{
3821 				const Allocation& alloc = inputBuffers[i]->getAllocation();
3822 
3823 				initializeMemory(context, alloc, extraDatas[i]);
3824 			}
3825 
3826 			const VkDeviceSize			formatSize				= getFormatSizeInBytes(format);
3827 			const VkDeviceSize			resultImageSizeInBytes	= width * height * formatSize;
3828 			Image						resultImage				(context, width, height, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3829 			Buffer						resultBuffer			(context, resultImageSizeInBytes, VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3830 			const Unique<VkFramebuffer>	framebuffer				(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3831 			VkViewport					viewport				= makeViewport(width, height);
3832 			VkRect2D					scissor					= {{0, 0}, {width, height}};
3833 
3834 			beginCommandBuffer(vk, *cmdBuffer);
3835 
3836 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3837 
3838 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3839 
3840 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3841 
3842 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3843 
3844 			if (extraDatasCount > 0)
3845 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
3846 
3847 			vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3848 
3849 			endRenderPass(vk, *cmdBuffer);
3850 
3851 			copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3852 
3853 			endCommandBuffer(vk, *cmdBuffer);
3854 
3855 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3856 
3857 			std::vector<const void*> datas;
3858 			{
3859 				const Allocation& resultAlloc = resultBuffer.getAllocation();
3860 				invalidateAlloc(vk, device, resultAlloc);
3861 
3862 				// we always have our result data first
3863 				datas.push_back(resultAlloc.getHostPtr());
3864 			}
3865 
3866 			if (!checkResult(internalData, datas, width, height, subgroupSize))
3867 			{
3868 				failedIterations++;
3869 			}
3870 
3871 			context.resetCommandPoolForVKSC(device, *cmdPool);
3872 		}
3873 	}
3874 
3875 	if (0 < failedIterations)
3876 	{
3877 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3878 
3879 		context.getTestContext().getLog()
3880 			<< TestLog::Message << valuesPassed << " / "
3881 			<< totalIterations << " values passed" << TestLog::EndMessage;
3882 
3883 		return tcu::TestStatus::fail("Failed!");
3884 	}
3885 
3886 	return tcu::TestStatus::pass("OK");
3887 }
3888 
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize)3889 Move<VkPipeline> makeComputePipeline (Context&					context,
3890 									  const VkPipelineLayout	pipelineLayout,
3891 									  const VkShaderModule		shaderModule,
3892 									  const deUint32			pipelineShaderStageFlags,
3893 									  const deUint32			pipelineCreateFlags,
3894 									  VkPipeline				basePipelineHandle,
3895 									  deUint32					localSizeX,
3896 									  deUint32					localSizeY,
3897 									  deUint32					localSizeZ,
3898 									  deUint32					requiredSubgroupSize)
3899 {
3900 	const deUint32														localSize[3]				= {localSizeX, localSizeY, localSizeZ};
3901 	const vk::VkSpecializationMapEntry									entries[3]					=
3902 	{
3903 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
3904 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
3905 		{2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3906 	};
3907 	const vk::VkSpecializationInfo										info						=
3908 	{
3909 		/* mapEntryCount = */ 3,
3910 		/* pMapEntries   = */ entries,
3911 		/* dataSize      = */ sizeof(localSize),
3912 		/* pData         = */ localSize
3913 	};
3914 	const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT	subgroupSizeCreateInfo		=
3915 	{
3916 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,	// VkStructureType    sType;
3917 		DE_NULL,																		// void*              pNext;
3918 		requiredSubgroupSize															// uint32_t           requiredSubgroupSize;
3919 	};
3920 	const vk::VkPipelineShaderStageCreateInfo							pipelineShaderStageParams	=
3921 	{
3922 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,				// VkStructureType					sType;
3923 		(requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL),	// const void*						pNext;
3924 		pipelineShaderStageFlags,											// VkPipelineShaderStageCreateFlags	flags;
3925 		VK_SHADER_STAGE_COMPUTE_BIT,										// VkShaderStageFlagBits			stage;
3926 		shaderModule,														// VkShaderModule					module;
3927 		"main",																// const char*						pName;
3928 		&info,																// const VkSpecializationInfo*		pSpecializationInfo;
3929 	};
3930 	const vk::VkComputePipelineCreateInfo								pipelineCreateInfo			=
3931 	{
3932 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType	sType;
3933 		DE_NULL,										// const void*						pNext;
3934 		pipelineCreateFlags,							// VkPipelineCreateFlags			flags;
3935 		pipelineShaderStageParams,						// VkPipelineShaderStageCreateInfo	stage;
3936 		pipelineLayout,									// VkPipelineLayout					layout;
3937 		basePipelineHandle,								// VkPipeline						basePipelineHandle;
3938 		-1,												// deInt32							basePipelineIndex;
3939 	};
3940 
3941 	return createComputePipeline(context.getDeviceInterface(), context.getDevice(), DE_NULL, &pipelineCreateInfo);
3942 }
3943 
3944 #ifndef CTS_USES_VULKANSC
makeMeshPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule taskModule,const VkShaderModule meshModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize,const VkRenderPass renderPass)3945 Move<VkPipeline> makeMeshPipeline (Context&					context,
3946 								   const VkPipelineLayout	pipelineLayout,
3947 								   const VkShaderModule		taskModule,
3948 								   const VkShaderModule		meshModule,
3949 								   const deUint32			pipelineShaderStageFlags,
3950 								   const deUint32			pipelineCreateFlags,
3951 								   VkPipeline				basePipelineHandle,
3952 								   deUint32					localSizeX,
3953 								   deUint32					localSizeY,
3954 								   deUint32					localSizeZ,
3955 								   deUint32					requiredSubgroupSize,
3956 								   const VkRenderPass		renderPass)
3957 {
3958 	const deUint32														localSize[3]				= {localSizeX, localSizeY, localSizeZ};
3959 	const vk::VkSpecializationMapEntry									entries[3]					=
3960 	{
3961 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
3962 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
3963 		{2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3964 	};
3965 	const vk::VkSpecializationInfo										info						=
3966 	{
3967 		/* mapEntryCount = */ 3,
3968 		/* pMapEntries   = */ entries,
3969 		/* dataSize      = */ sizeof(localSize),
3970 		/* pData         = */ localSize
3971 	};
3972 	const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT	subgroupSizeCreateInfo		=
3973 	{
3974 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,	// VkStructureType    sType;
3975 		DE_NULL,																		// void*              pNext;
3976 		requiredSubgroupSize															// uint32_t           requiredSubgroupSize;
3977 	};
3978 
3979 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*		pSubgroupSizeCreateInfo		= ((requiredSubgroupSize != 0u) ? &subgroupSizeCreateInfo : nullptr);
3980 
3981 	std::vector<VkPipelineShaderStageCreateInfo>						shaderStageParams;
3982 	vk::VkPipelineShaderStageCreateInfo									pipelineShaderStageParams	=
3983 	{
3984 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType					sType;
3985 		nullptr,												// const void*						pNext;
3986 		pipelineShaderStageFlags,								// VkPipelineShaderStageCreateFlags	flags;
3987 		VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM,						// VkShaderStageFlagBits			stage;
3988 		DE_NULL,												// VkShaderModule					module;
3989 		"main",													// const char*						pName;
3990 		&info,													// const VkSpecializationInfo*		pSpecializationInfo;
3991 	};
3992 
3993 	if (taskModule != DE_NULL)
3994 	{
3995 		pipelineShaderStageParams.module	= taskModule;
3996 		pipelineShaderStageParams.pNext		= pSubgroupSizeCreateInfo;
3997 		pipelineShaderStageParams.stage		= VK_SHADER_STAGE_TASK_BIT_EXT;
3998 		shaderStageParams.push_back(pipelineShaderStageParams);
3999 	}
4000 
4001 	if (meshModule != DE_NULL)
4002 	{
4003 		pipelineShaderStageParams.module	= meshModule;
4004 		pipelineShaderStageParams.pNext		= ((taskModule == DE_NULL) ? pSubgroupSizeCreateInfo : nullptr);
4005 		pipelineShaderStageParams.stage		= VK_SHADER_STAGE_MESH_BIT_EXT;
4006 		shaderStageParams.push_back(pipelineShaderStageParams);
4007 	}
4008 
4009 	const std::vector<VkViewport>	viewports	(1u, makeViewport(1u, 1u));
4010 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(1u, 1u));
4011 
4012 	return makeGraphicsPipeline(context.getDeviceInterface(), context.getDevice(), basePipelineHandle, pipelineLayout, pipelineCreateFlags, shaderStageParams, renderPass, viewports, scissors);
4013 }
4014 #endif // CTS_USES_VULKANSC
4015 
makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,deUint32 inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4016 tcu::TestStatus makeComputeOrMeshTestRequiredSubgroupSize (ComputeLike							testType,
4017 														   Context&								context,
4018 														   VkFormat								format,
4019 														   const vkt::subgroups::SSBOData*		inputs,
4020 														   deUint32								inputsCount,
4021 														   const void*							internalData,
4022 														   vkt::subgroups::CheckResultCompute	checkResult,
4023 														   const deUint32						pipelineShaderStageCreateFlags,
4024 														   const deUint32						numWorkgroups[3],
4025 														   const deBool							isRequiredSubgroupSize,
4026 														   const deUint32						subgroupSize,
4027 														   const deUint32						localSizesToTest[][3],
4028 														   const deUint32						localSizesToTestCount)
4029 {
4030 	const DeviceInterface&									vk								= context.getDeviceInterface();
4031 	const VkDevice											device							= context.getDevice();
4032 	const VkQueue											queue							= context.getUniversalQueue();
4033 	const deUint32											queueFamilyIndex				= context.getUniversalQueueFamilyIndex();
4034 #ifndef CTS_USES_VULKANSC
4035 	const VkPhysicalDeviceSubgroupSizeControlProperties&	subgroupSizeControlProperties	= context.getSubgroupSizeControlProperties();
4036 #else
4037 	const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties	= context.getSubgroupSizeControlPropertiesEXT();
4038 #endif // CTS_USES_VULKANSC
4039 	const VkDeviceSize										elementSize						= getFormatSizeInBytes(format);
4040 	const VkDeviceSize										maxSubgroupSize					= isRequiredSubgroupSize
4041 																							? deMax32(subgroupSizeControlProperties.maxSubgroupSize, vkt::subgroups::maxSupportedSubgroupSize())
4042 																							: vkt::subgroups::maxSupportedSubgroupSize();
4043 	const VkDeviceSize										resultBufferSize				= maxSubgroupSize * maxSubgroupSize * maxSubgroupSize;
4044 	const VkDeviceSize										resultBufferSizeInBytes			= resultBufferSize * elementSize;
4045 	Buffer													resultBuffer					(context, resultBufferSizeInBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4046 	std::vector< de::SharedPtr<BufferOrImage> >				inputBuffers					(inputsCount);
4047 	const auto												shaderStageFlags				= ((testType == ComputeLike::COMPUTE)
4048 																								? VK_SHADER_STAGE_COMPUTE_BIT
4049 #ifndef CTS_USES_VULKANSC
4050 																								: (VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT));
4051 #else
4052 																								: 0);
4053 #endif // CTS_USES_VULKANSC
4054 	const auto												pipelineBindPoint				= ((testType == ComputeLike::COMPUTE)
4055 																								? VK_PIPELINE_BIND_POINT_COMPUTE
4056 																								: VK_PIPELINE_BIND_POINT_GRAPHICS);
4057 	const auto												pipelineStage					= ((testType == ComputeLike::COMPUTE)
4058 																								? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
4059 #ifndef CTS_USES_VULKANSC
4060 																								: (VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT));
4061 #else
4062 																								: 0);
4063 #endif // CTS_USES_VULKANSC
4064 	const auto												renderArea						= makeRect2D(1u, 1u);
4065 
4066 	std::vector<tcu::UVec3>									usedLocalSizes;
4067 	for (deUint32 i = 0; i < localSizesToTestCount; ++i)
4068 	{
4069 		usedLocalSizes.push_back(tcu::UVec3(localSizesToTest[i][0], localSizesToTest[i][1], localSizesToTest[i][2]));
4070 	}
4071 
4072 	for (deUint32 i = 0; i < inputsCount; i++)
4073 	{
4074 		if (inputs[i].isImage())
4075 		{
4076 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
4077 		}
4078 		else
4079 		{
4080 			const auto usage	= (inputs[i].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4081 			const auto size		= getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4082 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4083 		}
4084 
4085 		const Allocation& alloc = inputBuffers[i]->getAllocation();
4086 
4087 		initializeMemory(context, alloc, inputs[i]);
4088 	}
4089 
4090 	DescriptorSetLayoutBuilder layoutBuilder;
4091 	layoutBuilder.addBinding(
4092 		resultBuffer.getType(), 1, shaderStageFlags, DE_NULL);
4093 
4094 	for (deUint32 i = 0; i < inputsCount; i++)
4095 	{
4096 		layoutBuilder.addBinding(
4097 			inputBuffers[i]->getType(), 1, shaderStageFlags, DE_NULL);
4098 	}
4099 
4100 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
4101 		layoutBuilder.build(vk, device));
4102 
4103 	Move<VkShaderModule>	compShader;
4104 	Move<VkShaderModule>	meshShader;
4105 	Move<VkShaderModule>	taskShader;
4106 	const auto&				binaries	= context.getBinaryCollection();
4107 
4108 	if (testType == ComputeLike::COMPUTE)
4109 	{
4110 		compShader = createShaderModule(vk, device, binaries.get("comp"));
4111 	}
4112 	else if (testType == ComputeLike::MESH)
4113 	{
4114 		meshShader = createShaderModule(vk, device, binaries.get("mesh"));
4115 		if (binaries.contains("task"))
4116 			taskShader = createShaderModule(vk, device, binaries.get("task"));
4117 	}
4118 	else
4119 	{
4120 		DE_ASSERT(false);
4121 	}
4122 
4123 	const Unique<VkPipelineLayout> pipelineLayout(
4124 		makePipelineLayout(vk, device, *descriptorSetLayout));
4125 
4126 	DescriptorPoolBuilder poolBuilder;
4127 
4128 	poolBuilder.addType(resultBuffer.getType());
4129 
4130 	for (deUint32 i = 0; i < inputsCount; i++)
4131 	{
4132 		poolBuilder.addType(inputBuffers[i]->getType());
4133 	}
4134 
4135 	const Unique<VkDescriptorPool>	descriptorPool			(poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
4136 	const Unique<VkDescriptorSet>	descriptorSet			(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
4137 	const VkDescriptorBufferInfo	resultDescriptorInfo =	makeDescriptorBufferInfo(resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
4138 	DescriptorSetUpdateBuilder		updateBuilder;
4139 
4140 	updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
4141 
4142 	for (deUint32 i = 0; i < inputsCount; i++)
4143 	{
4144 		if (inputBuffers[i]->isImage())
4145 		{
4146 			const VkDescriptorImageInfo		info	= makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4147 
4148 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
4149 		}
4150 		else
4151 		{
4152 			vk::VkDeviceSize		size	= getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4153 			VkDescriptorBufferInfo	info	= makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
4154 
4155 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
4156 		}
4157 	}
4158 
4159 	updateBuilder.update(vk, device);
4160 
4161 	const Unique<VkCommandPool>						cmdPool				(makeCommandPool(vk, device, queueFamilyIndex));
4162 	unsigned										totalIterations		= 0;
4163 	unsigned										failedIterations	= 0;
4164 	const Unique<VkCommandBuffer>					cmdBuffer			(makeCommandBuffer(context, *cmdPool));
4165 	std::vector<de::SharedPtr<Move<VkPipeline>>>	pipelines			(localSizesToTestCount);
4166 	const auto										reqSubgroupSize		= (isRequiredSubgroupSize ? subgroupSize : 0u);
4167 	Move<VkRenderPass>								renderPass;
4168 	Move<VkFramebuffer>								framebuffer;
4169 
4170 	if (testType == ComputeLike::MESH)
4171 	{
4172 		renderPass	= makeRenderPass(vk, device);
4173 		framebuffer	= makeFramebuffer(vk, device, renderPass.get(), 0u, nullptr, renderArea.extent.width, renderArea.extent.height);
4174 	}
4175 
4176 	context.getTestContext().touchWatchdog();
4177 	{
4178 		if (testType == ComputeLike::COMPUTE)
4179 		{
4180 			pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
4181 																									*pipelineLayout,
4182 																									*compShader,
4183 																									pipelineShaderStageCreateFlags,
4184 #ifndef CTS_USES_VULKANSC
4185 																									VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4186 #else
4187 																									0u,
4188 #endif // CTS_USES_VULKANSC
4189 																									(VkPipeline) DE_NULL,
4190 																									usedLocalSizes[0][0],
4191 																									usedLocalSizes[0][1],
4192 																									usedLocalSizes[0][2],
4193 																									reqSubgroupSize)));
4194 		}
4195 #ifndef CTS_USES_VULKANSC
4196 		else if (testType == ComputeLike::MESH)
4197 		{
4198 			pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(context,
4199 																								 pipelineLayout.get(),
4200 																								 taskShader.get(),
4201 																								 meshShader.get(),
4202 																								 pipelineShaderStageCreateFlags,
4203 																								 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4204 																								 DE_NULL,
4205 																								 usedLocalSizes[0][0],
4206 																								 usedLocalSizes[0][1],
4207 																								 usedLocalSizes[0][2],
4208 																								 reqSubgroupSize,
4209 																								 renderPass.get())));
4210 		}
4211 #endif // CTS_USES_VULKANSC
4212 		else
4213 		{
4214 			DE_ASSERT(false);
4215 		}
4216 	}
4217 	context.getTestContext().touchWatchdog();
4218 
4219 	for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
4220 	{
4221 		const deUint32 nextX = usedLocalSizes[index][0];
4222 		const deUint32 nextY = usedLocalSizes[index][1];
4223 		const deUint32 nextZ = usedLocalSizes[index][2];
4224 
4225 		context.getTestContext().touchWatchdog();
4226 		{
4227 			if (testType == ComputeLike::COMPUTE)
4228 			{
4229 				pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
4230 																											*pipelineLayout,
4231 																											*compShader,
4232 																											pipelineShaderStageCreateFlags,
4233 #ifndef CTS_USES_VULKANSC
4234 																											VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4235 #else
4236 																											0u,
4237 #endif // CTS_USES_VULKANSC
4238 																											**pipelines[0],
4239 																											nextX,
4240 																											nextY,
4241 																											nextZ,
4242 																											reqSubgroupSize)));
4243 			}
4244 #ifndef CTS_USES_VULKANSC
4245 			else if (testType == ComputeLike::MESH)
4246 			{
4247 				pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(context,
4248 																										 pipelineLayout.get(),
4249 																										 taskShader.get(),
4250 																										 meshShader.get(),
4251 																										 pipelineShaderStageCreateFlags,
4252 																										 VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4253 																										 pipelines[0].get()->get(),
4254 																										 nextX,
4255 																										 nextY,
4256 																										 nextZ,
4257 																										 reqSubgroupSize,
4258 																										 renderPass.get())));
4259 			}
4260 #endif // CTS_USES_VULKANSC
4261 			else
4262 			{
4263 				DE_ASSERT(false);
4264 			}
4265 		}
4266 		context.getTestContext().touchWatchdog();
4267 	}
4268 
4269 	for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
4270 	{
4271 		// we are running one test
4272 		totalIterations++;
4273 
4274 		beginCommandBuffer(vk, *cmdBuffer);
4275 		{
4276 			if (testType == ComputeLike::MESH)
4277 				beginRenderPass(vk, *cmdBuffer, renderPass.get(), framebuffer.get(), renderArea);
4278 
4279 			vk.cmdBindPipeline(*cmdBuffer, pipelineBindPoint, **pipelines[index]);
4280 
4281 			vk.cmdBindDescriptorSets(*cmdBuffer, pipelineBindPoint, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
4282 
4283 			if (testType == ComputeLike::COMPUTE)
4284 				vk.cmdDispatch(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4285 #ifndef CTS_USES_VULKANSC
4286 			else if (testType == ComputeLike::MESH)
4287 				vk.cmdDrawMeshTasksEXT(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4288 				//vk.cmdDrawMeshTasksNV(*cmdBuffer, numWorkgroups[0], 0);
4289 #endif // CTS_USES_VULKANSC
4290 			else
4291 				DE_ASSERT(false);
4292 
4293 			if (testType == ComputeLike::MESH)
4294 				endRenderPass(vk, *cmdBuffer);
4295 		}
4296 
4297 		// Make shader writes available.
4298 		const auto postShaderBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4299 		vk.cmdPipelineBarrier(*cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postShaderBarrier, 0u, nullptr, 0u, nullptr);
4300 
4301 		endCommandBuffer(vk, *cmdBuffer);
4302 
4303 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4304 
4305 		std::vector<const void*> datas;
4306 
4307 		{
4308 			const Allocation& resultAlloc = resultBuffer.getAllocation();
4309 			invalidateAlloc(vk, device, resultAlloc);
4310 
4311 			// we always have our result data first
4312 			datas.push_back(resultAlloc.getHostPtr());
4313 		}
4314 
4315 		for (deUint32 i = 0; i < inputsCount; i++)
4316 		{
4317 			if (!inputBuffers[i]->isImage())
4318 			{
4319 				const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
4320 				invalidateAlloc(vk, device, resultAlloc);
4321 
4322 				// we always have our result data first
4323 				datas.push_back(resultAlloc.getHostPtr());
4324 			}
4325 		}
4326 
4327 		if (!checkResult(internalData, datas, numWorkgroups, usedLocalSizes[index].getPtr(), subgroupSize))
4328 		{
4329 			failedIterations++;
4330 		}
4331 		else
4332 		{
4333 			failedIterations = failedIterations + 0;
4334 		}
4335 
4336 		context.resetCommandPoolForVKSC(device, *cmdPool);
4337 	}
4338 
4339 	if (0 < failedIterations)
4340 	{
4341 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4342 
4343 		context.getTestContext().getLog()
4344 			<< TestLog::Message << valuesPassed << " / "
4345 			<< totalIterations << " values passed" << TestLog::EndMessage;
4346 
4347 		return tcu::TestStatus::fail("Failed!");
4348 	}
4349 
4350 	return tcu::TestStatus::pass("OK");
4351 }
4352 
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4353 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize (Context&			context,
4354 																	 VkFormat			format,
4355 																	 const SSBOData*	inputs,
4356 																	 deUint32			inputsCount,
4357 																	 const void*		internalData,
4358 																	 CheckResultCompute	checkResult,
4359 																	 const deUint32		pipelineShaderStageCreateFlags,
4360 																	 const deUint32		numWorkgroups[3],
4361 																	 const deBool		isRequiredSubgroupSize,
4362 																	 const deUint32		subgroupSize,
4363 																	 const deUint32		localSizesToTest[][3],
4364 																	 const deUint32		localSizesToTestCount)
4365 {
4366 	return makeComputeOrMeshTestRequiredSubgroupSize(
4367 		ComputeLike::COMPUTE,
4368 		context,
4369 		format,
4370 		inputs,
4371 		inputsCount,
4372 		internalData,
4373 		checkResult,
4374 		pipelineShaderStageCreateFlags,
4375 		numWorkgroups,
4376 		isRequiredSubgroupSize,
4377 		subgroupSize,
4378 		localSizesToTest,
4379 		localSizesToTestCount);
4380 }
4381 
makeMeshTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4382 tcu::TestStatus vkt::subgroups::makeMeshTestRequiredSubgroupSize (Context&				context,
4383 																  VkFormat				format,
4384 																  const SSBOData*		inputs,
4385 																  deUint32				inputsCount,
4386 																  const void*			internalData,
4387 																  CheckResultCompute	checkResult,
4388 																  const deUint32		pipelineShaderStageCreateFlags,
4389 																  const deUint32		numWorkgroups[3],
4390 																  const deBool			isRequiredSubgroupSize,
4391 																  const deUint32		subgroupSize,
4392 																  const deUint32		localSizesToTest[][3],
4393 																  const deUint32		localSizesToTestCount)
4394 {
4395 	return makeComputeOrMeshTestRequiredSubgroupSize(
4396 		ComputeLike::MESH,
4397 		context,
4398 		format,
4399 		inputs,
4400 		inputsCount,
4401 		internalData,
4402 		checkResult,
4403 		pipelineShaderStageCreateFlags,
4404 		numWorkgroups,
4405 		isRequiredSubgroupSize,
4406 		subgroupSize,
4407 		localSizesToTest,
4408 		localSizesToTestCount);
4409 }
4410 
makeComputeOrMeshTest(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,deUint32 inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4411 tcu::TestStatus makeComputeOrMeshTest (ComputeLike							testType,
4412 									   Context&								context,
4413 									   VkFormat								format,
4414 									   const vkt::subgroups::SSBOData*		inputs,
4415 									   deUint32								inputsCount,
4416 									   const void*							internalData,
4417 									   vkt::subgroups::CheckResultCompute	checkResult,
4418 									   deUint32								requiredSubgroupSize,
4419 									   const deUint32						pipelineShaderStageCreateFlags)
4420 {
4421 	const uint32_t	numWorkgroups[3]		= {4, 2, 2};
4422 	const bool		isRequiredSubgroupSize	= (requiredSubgroupSize != 0u);
4423 	const uint32_t	subgroupSize			= (isRequiredSubgroupSize ? requiredSubgroupSize : vkt::subgroups::getSubgroupSize(context));
4424 
4425 	const deUint32 localSizesToTestCount = 8;
4426 	deUint32 localSizesToTest[localSizesToTestCount][3] =
4427 	{
4428 		{1, 1, 1},
4429 		{subgroupSize, 1, 1},
4430 		{1, subgroupSize, 1},
4431 		{1, 1, subgroupSize},
4432 		{32, 4, 1},
4433 		{1, 4, 32},
4434 		{3, 5, 7},
4435 		{1, 1, 1} // Isn't used, just here to make double buffering checks easier
4436 	};
4437 
4438 	if (testType == ComputeLike::COMPUTE)
4439 		return makeComputeTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4440 												   numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4441 	else
4442 		return makeMeshTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4443 												numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4444 }
4445 
makeComputeTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4446 tcu::TestStatus vkt::subgroups::makeComputeTest (Context&				context,
4447 												 VkFormat				format,
4448 												 const SSBOData*		inputs,
4449 												 deUint32				inputsCount,
4450 												 const void*			internalData,
4451 												 CheckResultCompute		checkResult,
4452 												 deUint32				requiredSubgroupSize,
4453 												 const deUint32			pipelineShaderStageCreateFlags)
4454 {
4455 	return makeComputeOrMeshTest(ComputeLike::COMPUTE, context, format, inputs, inputsCount, internalData, checkResult, requiredSubgroupSize, pipelineShaderStageCreateFlags);
4456 }
4457 
makeMeshTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4458 tcu::TestStatus vkt::subgroups::makeMeshTest (Context&				context,
4459 											  VkFormat				format,
4460 											  const SSBOData*		inputs,
4461 											  deUint32				inputsCount,
4462 											  const void*			internalData,
4463 											  CheckResultCompute	checkResult,
4464 											  deUint32				requiredSubgroupSize,
4465 											  const deUint32		pipelineShaderStageCreateFlags)
4466 {
4467 	return makeComputeOrMeshTest(ComputeLike::MESH, context, format, inputs, inputsCount, internalData, checkResult, requiredSubgroupSize, pipelineShaderStageCreateFlags);
4468 }
4469 
checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)4470 static inline void checkShaderStageSetValidity (const VkShaderStageFlags shaderStages)
4471 {
4472 	if (shaderStages == 0)
4473 		TCU_THROW(InternalError, "Shader stage is not specified");
4474 
4475 	// It can actually be only 1 or 0.
4476 	const deUint32 exclusivePipelinesCount	= (isAllComputeStages(shaderStages) ? 1 : 0)
4477 											+ (isAllGraphicsStages(shaderStages) ? 1 : 0)
4478 #ifndef CTS_USES_VULKANSC
4479 											+ (isAllRayTracingStages(shaderStages) ? 1 : 0)
4480 											+ (isAllMeshShadingStages(shaderStages) ? 1 : 0)
4481 #endif // CTS_USES_VULKANSC
4482 											;
4483 
4484 	if (exclusivePipelinesCount != 1)
4485 		TCU_THROW(InternalError, "Mix of shaders from different pipelines is detected");
4486 }
4487 
supportedCheckShader(Context & context,const VkShaderStageFlags shaderStages)4488 void vkt::subgroups::supportedCheckShader (Context& context, const VkShaderStageFlags shaderStages)
4489 {
4490 	checkShaderStageSetValidity(shaderStages);
4491 
4492 	if ((shaderStages & VK_SHADER_STAGE_GEOMETRY_BIT) != 0)
4493 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_GEOMETRY_SHADER);
4494 
4495 	if ((context.getSubgroupProperties().supportedStages & shaderStages) == 0)
4496 	{
4497 		if (isAllComputeStages(shaderStages))
4498 			TCU_FAIL("Compute shader is required to support subgroup operations");
4499 		else
4500 			TCU_THROW(NotSupportedError, "Subgroup support is not available for test shader stage(s)");
4501 	}
4502 
4503 #ifndef CTS_USES_VULKANSC
4504 	if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & shaderStages &&
4505 		context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4506 		!context.getPortabilitySubsetFeatures().tessellationIsolines)
4507 	{
4508 		TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4509 	}
4510 #endif // CTS_USES_VULKANSC
4511 }
4512 
4513 
4514 namespace vkt
4515 {
4516 namespace subgroups
4517 {
4518 typedef std::vector< de::SharedPtr<BufferOrImage> > vectorBufferOrImage;
4519 
4520 enum ShaderGroups
4521 {
4522 	FIRST_GROUP		= 0,
4523 	RAYGEN_GROUP	= FIRST_GROUP,
4524 	MISS_GROUP,
4525 	HIT_GROUP,
4526 	CALL_GROUP,
4527 	GROUP_COUNT
4528 };
4529 
getAllRayTracingFormats()4530 const std::vector<vk::VkFormat> getAllRayTracingFormats()
4531 {
4532 	std::vector<VkFormat> formats;
4533 
4534 	formats.push_back(VK_FORMAT_R8G8B8_SINT);
4535 	formats.push_back(VK_FORMAT_R8_UINT);
4536 	formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
4537 	formats.push_back(VK_FORMAT_R16G16B16_SINT);
4538 	formats.push_back(VK_FORMAT_R16_UINT);
4539 	formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
4540 	formats.push_back(VK_FORMAT_R32G32B32_SINT);
4541 	formats.push_back(VK_FORMAT_R32_UINT);
4542 	formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
4543 	formats.push_back(VK_FORMAT_R64G64B64_SINT);
4544 	formats.push_back(VK_FORMAT_R64_UINT);
4545 	formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
4546 	formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
4547 	formats.push_back(VK_FORMAT_R32_SFLOAT);
4548 	formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
4549 	formats.push_back(VK_FORMAT_R64_SFLOAT);
4550 	formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
4551 	formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
4552 	formats.push_back(VK_FORMAT_R8_USCALED);
4553 	formats.push_back(VK_FORMAT_R8G8_USCALED);
4554 	formats.push_back(VK_FORMAT_R8G8B8_USCALED);
4555 	formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
4556 
4557 	return formats;
4558 }
4559 
addRayTracingNoSubgroupShader(SourceCollections & programCollection)4560 void addRayTracingNoSubgroupShader (SourceCollections& programCollection)
4561 {
4562 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
4563 
4564 	const std::string rgenShaderNoSubgroups =
4565 		"#version 460 core\n"
4566 		"#extension GL_EXT_ray_tracing: require\n"
4567 		"layout(location = 0) rayPayloadEXT uvec4 payload;\n"
4568 		"layout(location = 0) callableDataEXT uvec4 callData;"
4569 		"layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
4570 		"\n"
4571 		"void main()\n"
4572 		"{\n"
4573 		"  uint  rayFlags   = 0;\n"
4574 		"  uint  cullMask   = 0xFF;\n"
4575 		"  float tmin       = 0.0;\n"
4576 		"  float tmax       = 9.0;\n"
4577 		"  vec3  origin     = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
4578 		"  vec3  directHit  = vec3(0.0, 0.0, -1.0);\n"
4579 		"  vec3  directMiss = vec3(0.0, 0.0, +1.0);\n"
4580 		"\n"
4581 		"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
4582 		"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
4583 		"  executeCallableEXT(0, 0);"
4584 		"}\n";
4585 	const std::string hitShaderNoSubgroups =
4586 		"#version 460 core\n"
4587 		"#extension GL_EXT_ray_tracing: require\n"
4588 		"hitAttributeEXT vec3 attribs;\n"
4589 		"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4590 		"\n"
4591 		"void main()\n"
4592 		"{\n"
4593 		"}\n";
4594 	const std::string missShaderNoSubgroups =
4595 		"#version 460 core\n"
4596 		"#extension GL_EXT_ray_tracing: require\n"
4597 		"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4598 		"\n"
4599 		"void main()\n"
4600 		"{\n"
4601 		"}\n";
4602 	const std::string sectShaderNoSubgroups =
4603 		"#version 460 core\n"
4604 		"#extension GL_EXT_ray_tracing: require\n"
4605 		"hitAttributeEXT vec3 hitAttribute;\n"
4606 		"\n"
4607 		"void main()\n"
4608 		"{\n"
4609 		"  reportIntersectionEXT(0.75f, 0x7Eu);\n"
4610 		"}\n";
4611 	const std::string callShaderNoSubgroups =
4612 		"#version 460 core\n"
4613 		"#extension GL_EXT_ray_tracing: require\n"
4614 		"layout(location = 0) callableDataInEXT float callData;\n"
4615 		"\n"
4616 		"void main()\n"
4617 		"{\n"
4618 		"}\n";
4619 
4620 	programCollection.glslSources.add("rgen_noSubgroup") << glu::RaygenSource		(rgenShaderNoSubgroups) << buildOptions;
4621 	programCollection.glslSources.add("ahit_noSubgroup") << glu::AnyHitSource		(hitShaderNoSubgroups)  << buildOptions;
4622 	programCollection.glslSources.add("chit_noSubgroup") << glu::ClosestHitSource	(hitShaderNoSubgroups)  << buildOptions;
4623 	programCollection.glslSources.add("miss_noSubgroup") << glu::MissSource			(missShaderNoSubgroups) << buildOptions;
4624 	programCollection.glslSources.add("sect_noSubgroup") << glu::IntersectionSource	(sectShaderNoSubgroups) << buildOptions;
4625 	programCollection.glslSources.add("call_noSubgroup") << glu::CallableSource		(callShaderNoSubgroups) << buildOptions;
4626 }
4627 
4628 #ifndef CTS_USES_VULKANSC
4629 
enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)4630 static vector<VkShaderStageFlagBits> enumerateRayTracingShaderStages (const VkShaderStageFlags	shaderStage)
4631 {
4632 	vector<VkShaderStageFlagBits>	result;
4633 	const VkShaderStageFlagBits		shaderStageFlags[]	=
4634 	{
4635 		VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4636 		VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4637 		VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4638 		VK_SHADER_STAGE_MISS_BIT_KHR,
4639 		VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4640 		VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4641 	};
4642 
4643 	for (auto shaderStageFlag: shaderStageFlags)
4644 	{
4645 		if (0 != (shaderStage & shaderStageFlag))
4646 			result.push_back(shaderStageFlag);
4647 	}
4648 
4649 	return result;
4650 }
4651 
getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)4652 static deUint32 getRayTracingResultBinding (const VkShaderStageFlagBits shaderStage)
4653 {
4654 	const VkShaderStageFlags	shaderStageFlags[]	=
4655 	{
4656 		VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4657 		VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4658 		VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4659 		VK_SHADER_STAGE_MISS_BIT_KHR,
4660 		VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4661 		VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4662 	};
4663 
4664 	for (deUint32 shaderStageNdx = 0; shaderStageNdx < DE_LENGTH_OF_ARRAY(shaderStageFlags); ++shaderStageNdx)
4665 	{
4666 		if (0 != (shaderStage & shaderStageFlags[shaderStageNdx]))
4667 		{
4668 			DE_ASSERT(0 == (shaderStage & (~shaderStageFlags[shaderStageNdx])));
4669 
4670 			return shaderStageNdx;
4671 		}
4672 	}
4673 
4674 	TCU_THROW(InternalError, "Non-raytracing stage specified or no stage at all");
4675 }
4676 
makeRayTracingInputBuffers(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector)4677 static vectorBufferOrImage makeRayTracingInputBuffers (Context&								context,
4678 													   VkFormat								format,
4679 													   const SSBOData*						extraDatas,
4680 													   deUint32								extraDatasCount,
4681 													   const vector<VkShaderStageFlagBits>&	stagesVector)
4682 {
4683 	const size_t		stagesCount		= stagesVector.size();
4684 	const VkDeviceSize	shaderSize		= getMaxWidth();
4685 	const VkDeviceSize	inputBufferSize	= getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
4686 	vectorBufferOrImage	inputBuffers	(stagesCount + extraDatasCount);
4687 
4688 	// The implicit result SSBO we use to store our outputs from the shader
4689 	for (size_t stageNdx = 0u; stageNdx < stagesCount; ++stageNdx)
4690 		inputBuffers[stageNdx]	= de::SharedPtr<BufferOrImage>(new Buffer(context, inputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
4691 
4692 	for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4693 	{
4694 		const size_t	datasNdx	= stageNdx - stagesCount;
4695 
4696 		if (extraDatas[datasNdx].isImage())
4697 		{
4698 			inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
4699 		}
4700 		else
4701 		{
4702 			const auto usage	= (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4703 			const auto size		= getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
4704 			inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4705 		}
4706 
4707 		initializeMemory(context, inputBuffers[stageNdx]->getAllocation(), extraDatas[datasNdx]);
4708 	}
4709 
4710 	return inputBuffers;
4711 }
4712 
makeRayTracingDescriptorSetLayout(Context & context,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4713 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayout (Context&								context,
4714 																	  const SSBOData*						extraDatas,
4715 																	  deUint32								extraDatasCount,
4716 																	  const vector<VkShaderStageFlagBits>&	stagesVector,
4717 																	  const vectorBufferOrImage&			inputBuffers)
4718 {
4719 	const DeviceInterface&		vkd				= context.getDeviceInterface();
4720 	const VkDevice				device			= context.getDevice();
4721 	const size_t				stagesCount		= stagesVector.size();
4722 	DescriptorSetLayoutBuilder	layoutBuilder;
4723 
4724 	// The implicit result SSBO we use to store our outputs from the shader
4725 	for (size_t stageNdx = 0u; stageNdx < stagesVector.size(); ++stageNdx)
4726 	{
4727 		const deUint32	stageBinding	= getRayTracingResultBinding(stagesVector[stageNdx]);
4728 
4729 		layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, stagesVector[stageNdx], stageBinding, DE_NULL);
4730 	}
4731 
4732 	for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4733 	{
4734 		const size_t datasNdx = stageNdx - stagesCount;
4735 
4736 		layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
4737 	}
4738 
4739 	return layoutBuilder.build(vkd, device);
4740 }
4741 
makeRayTracingDescriptorSetLayoutAS(Context & context)4742 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayoutAS (Context&	context)
4743 {
4744 	const DeviceInterface&		vkd				= context.getDeviceInterface();
4745 	const VkDevice				device			= context.getDevice();
4746 	DescriptorSetLayoutBuilder	layoutBuilder;
4747 
4748 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
4749 
4750 	return layoutBuilder.build(vkd, device);
4751 }
4752 
makeRayTracingDescriptorPool(Context & context,const vectorBufferOrImage & inputBuffers)4753 static Move<VkDescriptorPool> makeRayTracingDescriptorPool (Context&						context,
4754 															const vectorBufferOrImage&		inputBuffers)
4755 {
4756 	const DeviceInterface&	vkd					= context.getDeviceInterface();
4757 	const VkDevice			device				= context.getDevice();
4758 	const deUint32			maxDescriptorSets	= 2u;
4759 	DescriptorPoolBuilder	poolBuilder;
4760 	Move<VkDescriptorPool>	result;
4761 
4762 	if (inputBuffers.size() > 0)
4763 	{
4764 		for (size_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
4765 			poolBuilder.addType(inputBuffers[ndx]->getType());
4766 	}
4767 
4768 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
4769 
4770 	result = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, maxDescriptorSets);
4771 
4772 	return result;
4773 }
4774 
makeRayTracingDescriptorSet(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4775 static Move<VkDescriptorSet> makeRayTracingDescriptorSet (Context&								context,
4776 														  VkDescriptorPool						descriptorPool,
4777 														  VkDescriptorSetLayout					descriptorSetLayout,
4778 														  const SSBOData*						extraDatas,
4779 														  deUint32								extraDatasCount,
4780 														  const vector<VkShaderStageFlagBits>&	stagesVector,
4781 														  const vectorBufferOrImage&			inputBuffers)
4782 {
4783 	const DeviceInterface&	vkd				= context.getDeviceInterface();
4784 	const VkDevice			device			= context.getDevice();
4785 	const size_t			stagesCount		= stagesVector.size();
4786 	Move<VkDescriptorSet>	descriptorSet;
4787 
4788 	if (inputBuffers.size() > 0)
4789 	{
4790 		DescriptorSetUpdateBuilder updateBuilder;
4791 
4792 		// Create descriptor set
4793 		descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4794 
4795 		for (size_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
4796 		{
4797 			const deUint32	binding	= (ndx < stagesCount)
4798 									? getRayTracingResultBinding(stagesVector[ndx])
4799 									: extraDatas[ndx - stagesCount].binding;
4800 
4801 			if (inputBuffers[ndx]->isImage())
4802 			{
4803 				const VkDescriptorImageInfo		info	= makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(), inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4804 
4805 				updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4806 			}
4807 			else
4808 			{
4809 				const VkDescriptorBufferInfo	info	= makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
4810 
4811 				updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4812 			}
4813 		}
4814 
4815 		updateBuilder.update(vkd, device);
4816 	}
4817 
4818 	return descriptorSet;
4819 }
4820 
makeRayTracingDescriptorSetAS(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,de::MovePtr<TopLevelAccelerationStructure> & topLevelAccelerationStructure)4821 static Move<VkDescriptorSet> makeRayTracingDescriptorSetAS (Context&									context,
4822 															VkDescriptorPool							descriptorPool,
4823 															VkDescriptorSetLayout						descriptorSetLayout,
4824 															de::MovePtr<TopLevelAccelerationStructure>&	topLevelAccelerationStructure)
4825 {
4826 	const DeviceInterface&								vkd										= context.getDeviceInterface();
4827 	const VkDevice										device									= context.getDevice();
4828 	const TopLevelAccelerationStructure*				topLevelAccelerationStructurePtr		= topLevelAccelerationStructure.get();
4829 	const VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
4830 	{
4831 		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
4832 		DE_NULL,															//  const void*							pNext;
4833 		1u,																	//  deUint32							accelerationStructureCount;
4834 		topLevelAccelerationStructurePtr->getPtr(),							//  const VkAccelerationStructureKHR*	pAccelerationStructures;
4835 	};
4836 	Move<VkDescriptorSet>								descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4837 
4838 	DescriptorSetUpdateBuilder()
4839 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
4840 		.update(vkd, device);
4841 
4842 	return descriptorSet;
4843 }
4844 
makeRayTracingPipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout0,const VkDescriptorSetLayout descriptorSetLayout1)4845 static Move<VkPipelineLayout> makeRayTracingPipelineLayout (Context&					context,
4846 															const VkDescriptorSetLayout	descriptorSetLayout0,
4847 															const VkDescriptorSetLayout	descriptorSetLayout1)
4848 {
4849 	const DeviceInterface&						vkd							= context.getDeviceInterface();
4850 	const VkDevice								device						= context.getDevice();
4851 	const std::vector<VkDescriptorSetLayout>	descriptorSetLayouts		{ descriptorSetLayout0, descriptorSetLayout1 };
4852 	const deUint32								descriptorSetLayoutsSize	= static_cast<deUint32>(descriptorSetLayouts.size());
4853 
4854 	return makePipelineLayout(vkd, device, descriptorSetLayoutsSize, descriptorSetLayouts.data());
4855 }
4856 
createTopAccelerationStructure(Context & context,de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)4857 static de::MovePtr<TopLevelAccelerationStructure> createTopAccelerationStructure (Context&											context,
4858 																				  de::SharedPtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure)
4859 {
4860 	const DeviceInterface&						vkd			= context.getDeviceInterface();
4861 	const VkDevice								device		= context.getDevice();
4862 	Allocator&									allocator	= context.getDefaultAllocator();
4863 	de::MovePtr<TopLevelAccelerationStructure>	result		= makeTopLevelAccelerationStructure();
4864 
4865 	result->setInstanceCount(1);
4866 	result->addInstance(bottomLevelAccelerationStructure);
4867 	result->create(vkd, device, allocator);
4868 
4869 	return result;
4870 }
4871 
createBottomAccelerationStructure(Context & context)4872 static de::SharedPtr<BottomLevelAccelerationStructure> createBottomAccelerationStructure (Context&	context)
4873 {
4874 	const DeviceInterface&							vkd				= context.getDeviceInterface();
4875 	const VkDevice									device			= context.getDevice();
4876 	Allocator&										allocator		= context.getDefaultAllocator();
4877 	de::MovePtr<BottomLevelAccelerationStructure>	result			= makeBottomLevelAccelerationStructure();
4878 	const std::vector<tcu::Vec3>					geometryData	{ tcu::Vec3(-1.0f, -1.0f, -2.0f), tcu::Vec3(+1.0f, +1.0f, -1.0f) };
4879 
4880 	result->setGeometryCount(1u);
4881 	result->addGeometry(geometryData, false);
4882 	result->create(vkd, device, allocator, 0u);
4883 
4884 	return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
4885 }
4886 
makeRayTracingPipeline(Context & context,const VkShaderStageFlags shaderStageTested,const VkPipelineLayout pipelineLayout,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6],Move<VkPipeline> & pipelineOut)4887 static de::MovePtr<RayTracingPipeline> makeRayTracingPipeline (Context&					context,
4888 															   const VkShaderStageFlags	shaderStageTested,
4889 															   const VkPipelineLayout	pipelineLayout,
4890 															   const deUint32			shaderStageCreateFlags[6],
4891 															   const deUint32			requiredSubgroupSize[6],
4892 															   Move<VkPipeline>&		pipelineOut)
4893 {
4894 	const DeviceInterface&											vkd									= context.getDeviceInterface();
4895 	const VkDevice													device								= context.getDevice();
4896 	BinaryCollection&												collection							= context.getBinaryCollection();
4897 	const char*														shaderRgenName						= (0 != (shaderStageTested & VK_SHADER_STAGE_RAYGEN_BIT_KHR))			? "rgen" : "rgen_noSubgroup";
4898 	const char*														shaderAhitName						= (0 != (shaderStageTested & VK_SHADER_STAGE_ANY_HIT_BIT_KHR))			? "ahit" : "ahit_noSubgroup";
4899 	const char*														shaderChitName						= (0 != (shaderStageTested & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR))		? "chit" : "chit_noSubgroup";
4900 	const char*														shaderMissName						= (0 != (shaderStageTested & VK_SHADER_STAGE_MISS_BIT_KHR))				? "miss" : "miss_noSubgroup";
4901 	const char*														shaderSectName						= (0 != (shaderStageTested & VK_SHADER_STAGE_INTERSECTION_BIT_KHR))		? "sect" : "sect_noSubgroup";
4902 	const char*														shaderCallName						= (0 != (shaderStageTested & VK_SHADER_STAGE_CALLABLE_BIT_KHR))			? "call" : "call_noSubgroup";
4903 	const VkShaderModuleCreateFlags									noShaderModuleCreateFlags			= static_cast<VkShaderModuleCreateFlags>(0);
4904 	Move<VkShaderModule>											rgenShaderModule					= createShaderModule(vkd, device, collection.get(shaderRgenName), noShaderModuleCreateFlags);
4905 	Move<VkShaderModule>											ahitShaderModule					= createShaderModule(vkd, device, collection.get(shaderAhitName), noShaderModuleCreateFlags);
4906 	Move<VkShaderModule>											chitShaderModule					= createShaderModule(vkd, device, collection.get(shaderChitName), noShaderModuleCreateFlags);
4907 	Move<VkShaderModule>											missShaderModule					= createShaderModule(vkd, device, collection.get(shaderMissName), noShaderModuleCreateFlags);
4908 	Move<VkShaderModule>											sectShaderModule					= createShaderModule(vkd, device, collection.get(shaderSectName), noShaderModuleCreateFlags);
4909 	Move<VkShaderModule>											callShaderModule					= createShaderModule(vkd, device, collection.get(shaderCallName), noShaderModuleCreateFlags);
4910 	const VkPipelineShaderStageCreateFlags							noPipelineShaderStageCreateFlags	= static_cast<VkPipelineShaderStageCreateFlags>(0);
4911 	const VkPipelineShaderStageCreateFlags							rgenPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[0];
4912 	const VkPipelineShaderStageCreateFlags							ahitPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[1];
4913 	const VkPipelineShaderStageCreateFlags							chitPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[2];
4914 	const VkPipelineShaderStageCreateFlags							missPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[3];
4915 	const VkPipelineShaderStageCreateFlags							sectPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[4];
4916 	const VkPipelineShaderStageCreateFlags							callPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[5];
4917 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT	requiredSubgroupSizeCreateInfo[6]	=
4918 	{
4919 		{
4920 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4921 			DE_NULL,
4922 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
4923 		},
4924 		{
4925 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4926 			DE_NULL,
4927 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
4928 		},
4929 		{
4930 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4931 			DE_NULL,
4932 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
4933 		},
4934 		{
4935 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4936 			DE_NULL,
4937 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
4938 		},
4939 		{
4940 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4941 			DE_NULL,
4942 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
4943 		},
4944 		{
4945 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4946 			DE_NULL,
4947 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[5] : 0u,
4948 		},
4949 	};
4950 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	rgenRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[0];
4951 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	ahitRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[1];
4952 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	chitRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[2];
4953 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	missRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[3];
4954 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	sectRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[4];
4955 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	callRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[5].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[5];
4956 	de::MovePtr<RayTracingPipeline>									rayTracingPipeline					= de::newMovePtr<RayTracingPipeline>();
4957 
4958 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR		, rgenShaderModule, RAYGEN_GROUP,	DE_NULL, rgenPipelineShaderStageCreateFlags, rgenRequiredSubgroupSizeCreateInfo);
4959 	rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR		, ahitShaderModule, HIT_GROUP,		DE_NULL, ahitPipelineShaderStageCreateFlags, ahitRequiredSubgroupSizeCreateInfo);
4960 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR	, chitShaderModule, HIT_GROUP,		DE_NULL, chitPipelineShaderStageCreateFlags, chitRequiredSubgroupSizeCreateInfo);
4961 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR			, missShaderModule, MISS_GROUP,		DE_NULL, missPipelineShaderStageCreateFlags, missRequiredSubgroupSizeCreateInfo);
4962 	rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR	, sectShaderModule, HIT_GROUP,		DE_NULL, sectPipelineShaderStageCreateFlags, sectRequiredSubgroupSizeCreateInfo);
4963 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR		, callShaderModule, CALL_GROUP,		DE_NULL, callPipelineShaderStageCreateFlags, callRequiredSubgroupSizeCreateInfo);
4964 
4965 	// Must execute createPipeline here, due to pNext pointers in calls to addShader are local
4966 	pipelineOut	= rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
4967 
4968 	return rayTracingPipeline;
4969 }
4970 
getPossibleRayTracingSubgroupStages(Context & context,const VkShaderStageFlags testedStages)4971 VkShaderStageFlags getPossibleRayTracingSubgroupStages (Context& context, const VkShaderStageFlags testedStages)
4972 {
4973 	const VkPhysicalDeviceSubgroupProperties&	subgroupProperties	= context.getSubgroupProperties();
4974 	const VkShaderStageFlags					stages				= testedStages & subgroupProperties.supportedStages;
4975 
4976 	DE_ASSERT(isAllRayTracingStages(testedStages));
4977 
4978 	return stages;
4979 }
4980 
allRayTracingStages(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStage)4981 tcu::TestStatus allRayTracingStages (Context&						context,
4982 									 VkFormat						format,
4983 									 const SSBOData*				extraDatas,
4984 									 deUint32						extraDataCount,
4985 									 const void*					internalData,
4986 									 const VerificationFunctor&		checkResult,
4987 									 const VkShaderStageFlags		shaderStage)
4988 {
4989 	return vkt::subgroups::allRayTracingStagesRequiredSubgroupSize(context,
4990 																   format,
4991 																   extraDatas,
4992 																   extraDataCount,
4993 																   internalData,
4994 																   checkResult,
4995 																   shaderStage,
4996 																   DE_NULL,
4997 																   DE_NULL);
4998 }
4999 
allRayTracingStagesRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6])5000 tcu::TestStatus allRayTracingStagesRequiredSubgroupSize (Context&					context,
5001 														 VkFormat					format,
5002 														 const SSBOData*			extraDatas,
5003 														 deUint32					extraDatasCount,
5004 														 const void*				internalData,
5005 														 const VerificationFunctor&	checkResult,
5006 														 const VkShaderStageFlags	shaderStageTested,
5007 														 const deUint32				shaderStageCreateFlags[6],
5008 														 const deUint32				requiredSubgroupSize[6])
5009 {
5010 	const DeviceInterface&							vkd									= context.getDeviceInterface();
5011 	const VkDevice									device								= context.getDevice();
5012 	const VkQueue									queue								= context.getUniversalQueue();
5013 	const deUint32									queueFamilyIndex					= context.getUniversalQueueFamilyIndex();
5014 	Allocator&										allocator							= context.getDefaultAllocator();
5015 	const deUint32									subgroupSize						= getSubgroupSize(context);
5016 	const deUint32									maxWidth							= getMaxWidth();
5017 	const vector<VkShaderStageFlagBits>				stagesVector						= enumerateRayTracingShaderStages(shaderStageTested);
5018 	const deUint32									stagesCount							= static_cast<deUint32>(stagesVector.size());
5019 	de::SharedPtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure	= createBottomAccelerationStructure(context);
5020 	de::MovePtr<TopLevelAccelerationStructure>		topLevelAccelerationStructure		= createTopAccelerationStructure(context, bottomLevelAccelerationStructure);
5021 	vectorBufferOrImage								inputBuffers						= makeRayTracingInputBuffers(context, format, extraDatas, extraDatasCount, stagesVector);
5022 	const Move<VkDescriptorSetLayout>				descriptorSetLayout					= makeRayTracingDescriptorSetLayout(context, extraDatas, extraDatasCount, stagesVector, inputBuffers);
5023 	const Move<VkDescriptorSetLayout>				descriptorSetLayoutAS				= makeRayTracingDescriptorSetLayoutAS(context);
5024 	const Move<VkPipelineLayout>					pipelineLayout						= makeRayTracingPipelineLayout(context, *descriptorSetLayout, *descriptorSetLayoutAS);
5025 	Move<VkPipeline>								pipeline							= Move<VkPipeline>();
5026 	const de::MovePtr<RayTracingPipeline>			rayTracingPipeline					= makeRayTracingPipeline(context, shaderStageTested, *pipelineLayout, shaderStageCreateFlags, requiredSubgroupSize, pipeline);
5027 	const deUint32									shaderGroupHandleSize				= context.getRayTracingPipelineProperties().shaderGroupHandleSize;
5028 	const deUint32									shaderGroupBaseAlignment			= context.getRayTracingPipelineProperties().shaderGroupBaseAlignment;
5029 	de::MovePtr<BufferWithMemory>					rgenShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, RAYGEN_GROUP, 1u);
5030 	de::MovePtr<BufferWithMemory>					missShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, MISS_GROUP,   1u);
5031 	de::MovePtr<BufferWithMemory>					hitsShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, HIT_GROUP,    1u);
5032 	de::MovePtr<BufferWithMemory>					callShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, CALL_GROUP,   1u);
5033 	const VkStridedDeviceAddressRegionKHR			rgenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, rgenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5034 	const VkStridedDeviceAddressRegionKHR			missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5035 	const VkStridedDeviceAddressRegionKHR			hitsShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitsShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5036 	const VkStridedDeviceAddressRegionKHR			callShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5037 	const Move<VkDescriptorPool>					descriptorPool						= makeRayTracingDescriptorPool(context, inputBuffers);
5038 	const Move<VkDescriptorSet>						descriptorSet						= makeRayTracingDescriptorSet(context, *descriptorPool, *descriptorSetLayout, extraDatas, extraDatasCount, stagesVector, inputBuffers);
5039 	const Move<VkDescriptorSet>						descriptorSetAS						= makeRayTracingDescriptorSetAS(context, *descriptorPool, *descriptorSetLayoutAS, topLevelAccelerationStructure);
5040 	const Move<VkCommandPool>						cmdPool								= makeCommandPool(vkd, device, queueFamilyIndex);
5041 	const Move<VkCommandBuffer>						cmdBuffer							= makeCommandBuffer(context, *cmdPool);
5042 	deUint32										passIterations						= 0u;
5043 	deUint32										failIterations						= 0u;
5044 
5045 	DE_ASSERT(shaderStageTested != 0);
5046 
5047 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
5048 	{
5049 
5050 		for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
5051 		{
5052 			// re-init the data
5053 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
5054 
5055 			initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
5056 		}
5057 
5058 		beginCommandBuffer(vkd, *cmdBuffer);
5059 		{
5060 			vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
5061 
5062 			bottomLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
5063 			topLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
5064 
5065 			vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 1u, 1u, &descriptorSetAS.get(), 0u, DE_NULL);
5066 
5067 			if (stagesCount + extraDatasCount > 0)
5068 				vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
5069 
5070 			cmdTraceRays(vkd,
5071 				*cmdBuffer,
5072 				&rgenShaderBindingTableRegion,
5073 				&missShaderBindingTableRegion,
5074 				&hitsShaderBindingTableRegion,
5075 				&callShaderBindingTableRegion,
5076 				width, 1, 1);
5077 
5078 			const VkMemoryBarrier	postTraceMemoryBarrier	= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
5079 			cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_HOST_BIT, &postTraceMemoryBarrier);
5080 		}
5081 		endCommandBuffer(vkd, *cmdBuffer);
5082 
5083 		submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
5084 
5085 		for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
5086 		{
5087 			std::vector<const void*> datas;
5088 
5089 			if (!inputBuffers[ndx]->isImage())
5090 			{
5091 				const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
5092 
5093 				invalidateAlloc(vkd, device, resultAlloc);
5094 
5095 				// we always have our result data first
5096 				datas.push_back(resultAlloc.getHostPtr());
5097 			}
5098 
5099 			for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
5100 			{
5101 				const deUint32 datasNdx = index - stagesCount;
5102 
5103 				if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
5104 				{
5105 					const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
5106 
5107 					invalidateAlloc(vkd, device, resultAlloc);
5108 
5109 					// we always have our result data first
5110 					datas.push_back(resultAlloc.getHostPtr());
5111 				}
5112 			}
5113 
5114 			if (!checkResult(internalData, datas, width, subgroupSize, false))
5115 				failIterations++;
5116 			else
5117 				passIterations++;
5118 		}
5119 
5120 		context.resetCommandPoolForVKSC(device, *cmdPool);
5121 	}
5122 
5123 	if (failIterations > 0 || passIterations == 0)
5124 		return tcu::TestStatus::fail("Failed " + de::toString(failIterations) + " out of " + de::toString(failIterations + passIterations) + " iterations.");
5125 	else
5126 		return tcu::TestStatus::pass("OK");
5127 }
5128 #endif // CTS_USES_VULKANSC
5129 
5130 } // namespace subgroups
5131 } // nsamespace vkt
5132