• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Google Inc.
7  * Copyright (c) 2017 Codeplay Software Ltd.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests Utils
24  */ /*--------------------------------------------------------------------*/
25 
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vkRayTracingUtil.hpp"
28 #include "deFloat16.h"
29 #include "deRandom.hpp"
30 #include "tcuCommandLine.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 
38 using namespace tcu;
39 using namespace std;
40 using namespace vk;
41 using namespace vkt;
42 
43 namespace
44 {
45 
46 enum class ComputeLike { COMPUTE = 0, MESH };
47 
getMaxWidth()48 deUint32 getMaxWidth ()
49 {
50 	return 1024u;
51 }
52 
getNextWidth(const deUint32 width)53 deUint32 getNextWidth (const deUint32 width)
54 {
55 	if (width < 128)
56 	{
57 		// This ensures we test every value up to 128 (the max subgroup size).
58 		return width + 1;
59 	}
60 	else
61 	{
62 		// And once we hit 128 we increment to only power of 2's to reduce testing time.
63 		return width * 2;
64 	}
65 }
66 
getFormatSizeInBytes(const VkFormat format)67 deUint32 getFormatSizeInBytes (const VkFormat format)
68 {
69 	switch (format)
70 	{
71 		default:
72 			DE_FATAL("Unhandled format!");
73 			return 0;
74 		case VK_FORMAT_R8_SINT:
75 		case VK_FORMAT_R8_UINT:
76 			return static_cast<deUint32>(sizeof(deInt8));
77 		case VK_FORMAT_R8G8_SINT:
78 		case VK_FORMAT_R8G8_UINT:
79 			return static_cast<deUint32>(sizeof(deInt8) * 2);
80 		case VK_FORMAT_R8G8B8_SINT:
81 		case VK_FORMAT_R8G8B8_UINT:
82 		case VK_FORMAT_R8G8B8A8_SINT:
83 		case VK_FORMAT_R8G8B8A8_UINT:
84 			return static_cast<deUint32>(sizeof(deInt8) * 4);
85 		case VK_FORMAT_R16_SINT:
86 		case VK_FORMAT_R16_UINT:
87 		case VK_FORMAT_R16_SFLOAT:
88 			return static_cast<deUint32>(sizeof(deInt16));
89 		case VK_FORMAT_R16G16_SINT:
90 		case VK_FORMAT_R16G16_UINT:
91 		case VK_FORMAT_R16G16_SFLOAT:
92 			return static_cast<deUint32>(sizeof(deInt16) * 2);
93 		case VK_FORMAT_R16G16B16_UINT:
94 		case VK_FORMAT_R16G16B16_SINT:
95 		case VK_FORMAT_R16G16B16_SFLOAT:
96 		case VK_FORMAT_R16G16B16A16_SINT:
97 		case VK_FORMAT_R16G16B16A16_UINT:
98 		case VK_FORMAT_R16G16B16A16_SFLOAT:
99 			return static_cast<deUint32>(sizeof(deInt16) * 4);
100 		case VK_FORMAT_R32_SINT:
101 		case VK_FORMAT_R32_UINT:
102 		case VK_FORMAT_R32_SFLOAT:
103 			return static_cast<deUint32>(sizeof(deInt32));
104 		case VK_FORMAT_R32G32_SINT:
105 		case VK_FORMAT_R32G32_UINT:
106 		case VK_FORMAT_R32G32_SFLOAT:
107 			return static_cast<deUint32>(sizeof(deInt32) * 2);
108 		case VK_FORMAT_R32G32B32_SINT:
109 		case VK_FORMAT_R32G32B32_UINT:
110 		case VK_FORMAT_R32G32B32_SFLOAT:
111 		case VK_FORMAT_R32G32B32A32_SINT:
112 		case VK_FORMAT_R32G32B32A32_UINT:
113 		case VK_FORMAT_R32G32B32A32_SFLOAT:
114 			return static_cast<deUint32>(sizeof(deInt32) * 4);
115 		case VK_FORMAT_R64_SINT:
116 		case VK_FORMAT_R64_UINT:
117 		case VK_FORMAT_R64_SFLOAT:
118 			return static_cast<deUint32>(sizeof(deInt64));
119 		case VK_FORMAT_R64G64_SINT:
120 		case VK_FORMAT_R64G64_UINT:
121 		case VK_FORMAT_R64G64_SFLOAT:
122 			return static_cast<deUint32>(sizeof(deInt64) * 2);
123 		case VK_FORMAT_R64G64B64_SINT:
124 		case VK_FORMAT_R64G64B64_UINT:
125 		case VK_FORMAT_R64G64B64_SFLOAT:
126 		case VK_FORMAT_R64G64B64A64_SINT:
127 		case VK_FORMAT_R64G64B64A64_UINT:
128 		case VK_FORMAT_R64G64B64A64_SFLOAT:
129 			return static_cast<deUint32>(sizeof(deInt64) * 4);
130 		// The below formats are used to represent bool and bvec* types. These
131 		// types are passed to the shader as int and ivec* types, before the
132 		// calculations are done as booleans. We need a distinct type here so
133 		// that the shader generators can switch on it and generate the correct
134 		// shader source for testing.
135 		case VK_FORMAT_R8_USCALED:
136 			return static_cast<deUint32>(sizeof(deInt32));
137 		case VK_FORMAT_R8G8_USCALED:
138 			return static_cast<deUint32>(sizeof(deInt32) * 2);
139 		case VK_FORMAT_R8G8B8_USCALED:
140 		case VK_FORMAT_R8G8B8A8_USCALED:
141 			return static_cast<deUint32>(sizeof(deInt32) * 4);
142 	}
143 }
144 
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)145 deUint32 getElementSizeInBytes (const VkFormat									format,
146 								const subgroups::SSBOData::InputDataLayoutType	layout)
147 {
148 	const deUint32 bytes = getFormatSizeInBytes(format);
149 
150 	if (layout == subgroups::SSBOData::LayoutStd140)
151 		return bytes < 16 ? 16 : bytes;
152 	else
153 		return bytes;
154 }
155 
makeRenderPass(Context & context,VkFormat format)156 Move<VkRenderPass> makeRenderPass (Context& context, VkFormat format)
157 {
158 	const VkAttachmentReference		colorReference			=
159 	{
160 		0,
161 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
162 	};
163 	const VkSubpassDescription		subpassDescription		=
164 	{
165 		0u,									//  VkSubpassDescriptionFlags		flags;
166 		VK_PIPELINE_BIND_POINT_GRAPHICS,	//  VkPipelineBindPoint				pipelineBindPoint;
167 		0,									//  deUint32						inputAttachmentCount;
168 		DE_NULL,							//  const VkAttachmentReference*	pInputAttachments;
169 		1,									//  deUint32						colorAttachmentCount;
170 		&colorReference,					//  const VkAttachmentReference*	pColorAttachments;
171 		DE_NULL,							//  const VkAttachmentReference*	pResolveAttachments;
172 		DE_NULL,							//  const VkAttachmentReference*	pDepthStencilAttachment;
173 		0,									//  deUint32						preserveAttachmentCount;
174 		DE_NULL								//  const deUint32*					pPreserveAttachments;
175 	};
176 	const VkSubpassDependency		subpassDependencies[2]	=
177 	{
178 		{
179 			VK_SUBPASS_EXTERNAL,															//  deUint32				srcSubpass;
180 			0u,																				//  deUint32				dstSubpass;
181 			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,											//  VkPipelineStageFlags	srcStageMask;
182 			VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,									//  VkPipelineStageFlags	dstStageMask;
183 			VK_ACCESS_MEMORY_READ_BIT,														//  VkAccessFlags			srcAccessMask;
184 			VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,		//  VkAccessFlags			dstAccessMask;
185 			VK_DEPENDENCY_BY_REGION_BIT														//  VkDependencyFlags		dependencyFlags;
186 		},
187 		{
188 			0u,																				//  deUint32				srcSubpass;
189 			VK_SUBPASS_EXTERNAL,															//  deUint32				dstSubpass;
190 			VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,									//  VkPipelineStageFlags	srcStageMask;
191 			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,											//  VkPipelineStageFlags	dstStageMask;
192 			VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,		//  VkAccessFlags			srcAccessMask;
193 			VK_ACCESS_MEMORY_READ_BIT,														//  VkAccessFlags			dstAccessMask;
194 			VK_DEPENDENCY_BY_REGION_BIT														//  VkDependencyFlags		dependencyFlags;
195 		},
196 	};
197 	const VkAttachmentDescription	attachmentDescription	=
198 	{
199 		0u,											//  VkAttachmentDescriptionFlags	flags;
200 		format,										//  VkFormat						format;
201 		VK_SAMPLE_COUNT_1_BIT,						//  VkSampleCountFlagBits			samples;
202 		VK_ATTACHMENT_LOAD_OP_CLEAR,				//  VkAttachmentLoadOp				loadOp;
203 		VK_ATTACHMENT_STORE_OP_STORE,				//  VkAttachmentStoreOp				storeOp;
204 		VK_ATTACHMENT_LOAD_OP_DONT_CARE,			//  VkAttachmentLoadOp				stencilLoadOp;
205 		VK_ATTACHMENT_STORE_OP_DONT_CARE,			//  VkAttachmentStoreOp				stencilStoreOp;
206 		VK_IMAGE_LAYOUT_UNDEFINED,					//  VkImageLayout					initialLayout;
207 		VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL		//  VkImageLayout					finalLayout;
208 	};
209 	const VkRenderPassCreateInfo	renderPassCreateInfo =
210 	{
211 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,	//  VkStructureType					sType;
212 		DE_NULL,									//  const void*						pNext;
213 		0u,											//  VkRenderPassCreateFlags			flags;
214 		1,											//  deUint32						attachmentCount;
215 		&attachmentDescription,						//  const VkAttachmentDescription*	pAttachments;
216 		1,											//  deUint32						subpassCount;
217 		&subpassDescription,						//  const VkSubpassDescription*		pSubpasses;
218 		2,											//  deUint32						dependencyCount;
219 		subpassDependencies							//  const VkSubpassDependency*		pDependencies;
220 	};
221 
222 	return createRenderPass(context.getDeviceInterface(), context.getDevice(), &renderPassCreateInfo);
223 }
224 
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const deUint32 subpass,const deUint32 patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])225 Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface&							vk,
226 									   const VkDevice									device,
227 									   const VkPipelineLayout							pipelineLayout,
228 									   const VkShaderModule								vertexShaderModule,
229 									   const VkShaderModule								tessellationControlShaderModule,
230 									   const VkShaderModule								tessellationEvalShaderModule,
231 									   const VkShaderModule								geometryShaderModule,
232 									   const VkShaderModule								fragmentShaderModule,
233 									   const VkRenderPass								renderPass,
234 									   const std::vector<VkViewport>&					viewports,
235 									   const std::vector<VkRect2D>&						scissors,
236 									   const VkPrimitiveTopology						topology,
237 									   const deUint32									subpass,
238 									   const deUint32									patchControlPoints,
239 									   const VkPipelineVertexInputStateCreateInfo*		vertexInputStateCreateInfo,
240 									   const VkPipelineRasterizationStateCreateInfo*	rasterizationStateCreateInfo,
241 									   const VkPipelineMultisampleStateCreateInfo*		multisampleStateCreateInfo,
242 									   const VkPipelineDepthStencilStateCreateInfo*		depthStencilStateCreateInfo,
243 									   const VkPipelineColorBlendStateCreateInfo*		colorBlendStateCreateInfo,
244 									   const VkPipelineDynamicStateCreateInfo*			dynamicStateCreateInfo,
245 									   const deUint32									vertexShaderStageCreateFlags,
246 									   const deUint32									tessellationControlShaderStageCreateFlags,
247 									   const deUint32									tessellationEvalShaderStageCreateFlags,
248 									   const deUint32									geometryShaderStageCreateFlags,
249 									   const deUint32									fragmentShaderStageCreateFlags,
250 									   const deUint32									requiredSubgroupSize[5])
251 {
252 	const VkBool32									disableRasterization				= (fragmentShaderModule == DE_NULL);
253 	const bool										hasTessellation						= (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
254 
255 	VkPipelineShaderStageCreateInfo					stageCreateInfo						=
256 	{
257 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType                     sType
258 		DE_NULL,												// const void*                         pNext
259 		0u,														// VkPipelineShaderStageCreateFlags    flags
260 		VK_SHADER_STAGE_VERTEX_BIT,								// VkShaderStageFlagBits               stage
261 		DE_NULL,												// VkShaderModule                      module
262 		"main",													// const char*                         pName
263 		DE_NULL													// const VkSpecializationInfo*         pSpecializationInfo
264 	};
265 
266 	std::vector<VkPipelineShaderStageCreateInfo>	pipelineShaderStageParams;
267 
268 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] =
269 	{
270 		{
271 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
272 			DE_NULL,
273 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
274 		},
275 		{
276 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
277 			DE_NULL,
278 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
279 		},
280 		{
281 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
282 			DE_NULL,
283 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
284 		},
285 		{
286 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
287 			DE_NULL,
288 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
289 		},
290 		{
291 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
292 			DE_NULL,
293 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
294 		},
295 	};
296 
297 	{
298 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[0] : DE_NULL;
299 		stageCreateInfo.flags	= vertexShaderStageCreateFlags;
300 		stageCreateInfo.stage	= VK_SHADER_STAGE_VERTEX_BIT;
301 		stageCreateInfo.module	= vertexShaderModule;
302 		pipelineShaderStageParams.push_back(stageCreateInfo);
303 	}
304 
305 	if (tessellationControlShaderModule != DE_NULL)
306 	{
307 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[1] : DE_NULL;
308 		stageCreateInfo.flags	= tessellationControlShaderStageCreateFlags;
309 		stageCreateInfo.stage	= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
310 		stageCreateInfo.module	= tessellationControlShaderModule;
311 		pipelineShaderStageParams.push_back(stageCreateInfo);
312 	}
313 
314 	if (tessellationEvalShaderModule != DE_NULL)
315 	{
316 		stageCreateInfo.pNext	= (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[2] : DE_NULL;
317 		stageCreateInfo.flags	= tessellationEvalShaderStageCreateFlags;
318 		stageCreateInfo.stage	= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
319 		stageCreateInfo.module	= tessellationEvalShaderModule;
320 		pipelineShaderStageParams.push_back(stageCreateInfo);
321 	}
322 
323 	if (geometryShaderModule != DE_NULL)
324 	{
325 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[3] : DE_NULL;
326 		stageCreateInfo.flags	= geometryShaderStageCreateFlags;
327 		stageCreateInfo.stage	= VK_SHADER_STAGE_GEOMETRY_BIT;
328 		stageCreateInfo.module	= geometryShaderModule;
329 		pipelineShaderStageParams.push_back(stageCreateInfo);
330 	}
331 
332 	if (fragmentShaderModule != DE_NULL)
333 	{
334 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[4] : DE_NULL;
335 		stageCreateInfo.flags	= fragmentShaderStageCreateFlags;
336 		stageCreateInfo.stage	= VK_SHADER_STAGE_FRAGMENT_BIT;
337 		stageCreateInfo.module	= fragmentShaderModule;
338 		pipelineShaderStageParams.push_back(stageCreateInfo);
339 	}
340 
341 	const VkVertexInputBindingDescription			vertexInputBindingDescription		=
342 	{
343 		0u,								// deUint32             binding
344 		sizeof(tcu::Vec4),				// deUint32             stride
345 		VK_VERTEX_INPUT_RATE_VERTEX,	// VkVertexInputRate    inputRate
346 	};
347 
348 	const VkVertexInputAttributeDescription			vertexInputAttributeDescription		=
349 	{
350 		0u,								// deUint32    location
351 		0u,								// deUint32    binding
352 		VK_FORMAT_R32G32B32A32_SFLOAT,	// VkFormat    format
353 		0u								// deUint32    offset
354 	};
355 
356 	const VkPipelineVertexInputStateCreateInfo		vertexInputStateCreateInfoDefault	=
357 	{
358 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType                             sType
359 		DE_NULL,													// const void*                                 pNext
360 		(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags       flags
361 		1u,															// deUint32                                    vertexBindingDescriptionCount
362 		&vertexInputBindingDescription,								// const VkVertexInputBindingDescription*      pVertexBindingDescriptions
363 		1u,															// deUint32                                    vertexAttributeDescriptionCount
364 		&vertexInputAttributeDescription							// const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions
365 	};
366 
367 	const VkPipelineInputAssemblyStateCreateInfo	inputAssemblyStateCreateInfo		=
368 	{
369 		VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType                            sType
370 		DE_NULL,														// const void*                                pNext
371 		0u,																// VkPipelineInputAssemblyStateCreateFlags    flags
372 		topology,														// VkPrimitiveTopology                        topology
373 		VK_FALSE														// VkBool32                                   primitiveRestartEnable
374 	};
375 
376 	const VkPipelineTessellationStateCreateInfo		tessStateCreateInfo					=
377 	{
378 		VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,	// VkStructureType                           sType
379 		DE_NULL,													// const void*                               pNext
380 		0u,															// VkPipelineTessellationStateCreateFlags    flags
381 		patchControlPoints											// deUint32                                  patchControlPoints
382 	};
383 
384 	const VkPipelineViewportStateCreateInfo			viewportStateCreateInfo				=
385 	{
386 		VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,	// VkStructureType                             sType
387 		DE_NULL,												// const void*                                 pNext
388 		(VkPipelineViewportStateCreateFlags)0,					// VkPipelineViewportStateCreateFlags          flags
389 		viewports.empty() ? 1u : (deUint32)viewports.size(),	// deUint32                                    viewportCount
390 		viewports.empty() ? DE_NULL : &viewports[0],			// const VkViewport*                           pViewports
391 		viewports.empty() ? 1u : (deUint32)scissors.size(),		// deUint32                                    scissorCount
392 		scissors.empty() ? DE_NULL : &scissors[0]				// const VkRect2D*                             pScissors
393 	};
394 
395 	const VkPipelineRasterizationStateCreateInfo	rasterizationStateCreateInfoDefault	=
396 	{
397 		VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,	// VkStructureType                            sType
398 		DE_NULL,													// const void*                                pNext
399 		0u,															// VkPipelineRasterizationStateCreateFlags    flags
400 		VK_FALSE,													// VkBool32                                   depthClampEnable
401 		disableRasterization,										// VkBool32                                   rasterizerDiscardEnable
402 		VK_POLYGON_MODE_FILL,										// VkPolygonMode                              polygonMode
403 		VK_CULL_MODE_NONE,											// VkCullModeFlags                            cullMode
404 		VK_FRONT_FACE_COUNTER_CLOCKWISE,							// VkFrontFace                                frontFace
405 		VK_FALSE,													// VkBool32                                   depthBiasEnable
406 		0.0f,														// float                                      depthBiasConstantFactor
407 		0.0f,														// float                                      depthBiasClamp
408 		0.0f,														// float                                      depthBiasSlopeFactor
409 		1.0f														// float                                      lineWidth
410 	};
411 
412 	const VkPipelineMultisampleStateCreateInfo		multisampleStateCreateInfoDefault	=
413 	{
414 		VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType                          sType
415 		DE_NULL,													// const void*                              pNext
416 		0u,															// VkPipelineMultisampleStateCreateFlags    flags
417 		VK_SAMPLE_COUNT_1_BIT,										// VkSampleCountFlagBits                    rasterizationSamples
418 		VK_FALSE,													// VkBool32                                 sampleShadingEnable
419 		1.0f,														// float                                    minSampleShading
420 		DE_NULL,													// const VkSampleMask*                      pSampleMask
421 		VK_FALSE,													// VkBool32                                 alphaToCoverageEnable
422 		VK_FALSE													// VkBool32                                 alphaToOneEnable
423 	};
424 
425 	const VkStencilOpState							stencilOpState						=
426 	{
427 		VK_STENCIL_OP_KEEP,		// VkStencilOp    failOp
428 		VK_STENCIL_OP_KEEP,		// VkStencilOp    passOp
429 		VK_STENCIL_OP_KEEP,		// VkStencilOp    depthFailOp
430 		VK_COMPARE_OP_NEVER,	// VkCompareOp    compareOp
431 		0,						// deUint32       compareMask
432 		0,						// deUint32       writeMask
433 		0						// deUint32       reference
434 	};
435 
436 	const VkPipelineDepthStencilStateCreateInfo		depthStencilStateCreateInfoDefault	=
437 	{
438 		VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,	// VkStructureType                          sType
439 		DE_NULL,													// const void*                              pNext
440 		0u,															// VkPipelineDepthStencilStateCreateFlags   flags
441 		VK_FALSE,													// VkBool32                                 depthTestEnable
442 		VK_FALSE,													// VkBool32                                 depthWriteEnable
443 		VK_COMPARE_OP_LESS_OR_EQUAL,								// VkCompareOp                              depthCompareOp
444 		VK_FALSE,													// VkBool32                                 depthBoundsTestEnable
445 		VK_FALSE,													// VkBool32                                 stencilTestEnable
446 		stencilOpState,												// VkStencilOpState                         front
447 		stencilOpState,												// VkStencilOpState                         back
448 		0.0f,														// float                                    minDepthBounds
449 		1.0f,														// float                                    maxDepthBounds
450 	};
451 
452 	const VkPipelineColorBlendAttachmentState		colorBlendAttachmentState			=
453 	{
454 		VK_FALSE,					// VkBool32                 blendEnable
455 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            srcColorBlendFactor
456 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            dstColorBlendFactor
457 		VK_BLEND_OP_ADD,			// VkBlendOp                colorBlendOp
458 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            srcAlphaBlendFactor
459 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            dstAlphaBlendFactor
460 		VK_BLEND_OP_ADD,			// VkBlendOp                alphaBlendOp
461 		VK_COLOR_COMPONENT_R_BIT	// VkColorComponentFlags    colorWriteMask
462 		| VK_COLOR_COMPONENT_G_BIT
463 		| VK_COLOR_COMPONENT_B_BIT
464 		| VK_COLOR_COMPONENT_A_BIT
465 	};
466 
467 	const VkPipelineColorBlendStateCreateInfo		colorBlendStateCreateInfoDefault	=
468 	{
469 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	// VkStructureType                               sType
470 		DE_NULL,													// const void*                                   pNext
471 		0u,															// VkPipelineColorBlendStateCreateFlags          flags
472 		VK_FALSE,													// VkBool32                                      logicOpEnable
473 		VK_LOGIC_OP_CLEAR,											// VkLogicOp                                     logicOp
474 		1u,															// deUint32                                      attachmentCount
475 		&colorBlendAttachmentState,									// const VkPipelineColorBlendAttachmentState*    pAttachments
476 		{ 0.0f, 0.0f, 0.0f, 0.0f }									// float                                         blendConstants[4]
477 	};
478 
479 	std::vector<VkDynamicState>						dynamicStates;
480 
481 	if (viewports.empty())
482 		dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
483 	if (scissors.empty())
484 		dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
485 
486 	const VkPipelineDynamicStateCreateInfo			dynamicStateCreateInfoDefault		=
487 	{
488 		VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,	// VkStructureType                      sType
489 		DE_NULL,												// const void*                          pNext
490 		0u,														// VkPipelineDynamicStateCreateFlags    flags
491 		(deUint32)dynamicStates.size(),							// deUint32                             dynamicStateCount
492 		dynamicStates.empty() ? DE_NULL : &dynamicStates[0]		// const VkDynamicState*                pDynamicStates
493 	};
494 
495 	const VkPipelineDynamicStateCreateInfo*			dynamicStateCreateInfoDefaultPtr	= dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
496 
497 	const VkGraphicsPipelineCreateInfo				pipelineCreateInfo					=
498 	{
499 		VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,														// VkStructureType                                  sType
500 		DE_NULL,																								// const void*                                      pNext
501 		0u,																										// VkPipelineCreateFlags                            flags
502 		(deUint32)pipelineShaderStageParams.size(),																// deUint32                                         stageCount
503 		&pipelineShaderStageParams[0],																			// const VkPipelineShaderStageCreateInfo*           pStages
504 		vertexInputStateCreateInfo ? vertexInputStateCreateInfo : &vertexInputStateCreateInfoDefault,			// const VkPipelineVertexInputStateCreateInfo*      pVertexInputState
505 		&inputAssemblyStateCreateInfo,																			// const VkPipelineInputAssemblyStateCreateInfo*    pInputAssemblyState
506 		hasTessellation ? &tessStateCreateInfo : DE_NULL,														// const VkPipelineTessellationStateCreateInfo*     pTessellationState
507 		&viewportStateCreateInfo,																				// const VkPipelineViewportStateCreateInfo*         pViewportState
508 		rasterizationStateCreateInfo ? rasterizationStateCreateInfo : &rasterizationStateCreateInfoDefault,		// const VkPipelineRasterizationStateCreateInfo*    pRasterizationState
509 		multisampleStateCreateInfo ? multisampleStateCreateInfo: &multisampleStateCreateInfoDefault,			// const VkPipelineMultisampleStateCreateInfo*      pMultisampleState
510 		depthStencilStateCreateInfo ? depthStencilStateCreateInfo : &depthStencilStateCreateInfoDefault,		// const VkPipelineDepthStencilStateCreateInfo*     pDepthStencilState
511 		colorBlendStateCreateInfo ? colorBlendStateCreateInfo : &colorBlendStateCreateInfoDefault,				// const VkPipelineColorBlendStateCreateInfo*       pColorBlendState
512 		dynamicStateCreateInfo ? dynamicStateCreateInfo : dynamicStateCreateInfoDefaultPtr,						// const VkPipelineDynamicStateCreateInfo*          pDynamicState
513 		pipelineLayout,																							// VkPipelineLayout                                 layout
514 		renderPass,																								// VkRenderPass                                     renderPass
515 		subpass,																								// deUint32                                         subpass
516 		DE_NULL,																								// VkPipeline                                       basePipelineHandle
517 		0																										// deInt32                                          basePipelineIndex;
518 	};
519 
520 	return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
521 }
522 
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const deUint32 vertexShaderStageCreateFlags=0u,const deUint32 tessellationControlShaderStageCreateFlags=0u,const deUint32 tessellationEvalShaderStageCreateFlags=0u,const deUint32 geometryShaderStageCreateFlags=0u,const deUint32 fragmentShaderStageCreateFlags=0u,const deUint32 requiredSubgroupSize[5]=DE_NULL)523 Move<VkPipeline> makeGraphicsPipeline (Context&									context,
524 									   const VkPipelineLayout					pipelineLayout,
525 									   const VkShaderStageFlags					stages,
526 									   const VkShaderModule						vertexShaderModule,
527 									   const VkShaderModule						fragmentShaderModule,
528 									   const VkShaderModule						geometryShaderModule,
529 									   const VkShaderModule						tessellationControlModule,
530 									   const VkShaderModule						tessellationEvaluationModule,
531 									   const VkRenderPass						renderPass,
532 									   const VkPrimitiveTopology				topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
533 									   const VkVertexInputBindingDescription*	vertexInputBindingDescription = DE_NULL,
534 									   const VkVertexInputAttributeDescription*	vertexInputAttributeDescriptions = DE_NULL,
535 									   const bool								frameBufferTests = false,
536 									   const vk::VkFormat						attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
537 									   const deUint32							vertexShaderStageCreateFlags = 0u,
538 									   const deUint32							tessellationControlShaderStageCreateFlags = 0u,
539 									   const deUint32							tessellationEvalShaderStageCreateFlags = 0u,
540 									   const deUint32							geometryShaderStageCreateFlags = 0u,
541 									   const deUint32							fragmentShaderStageCreateFlags = 0u,
542 									   const deUint32							requiredSubgroupSize[5] = DE_NULL)
543 {
544 	const std::vector<VkViewport>				noViewports;
545 	const std::vector<VkRect2D>					noScissors;
546 	const VkPipelineVertexInputStateCreateInfo	vertexInputStateCreateInfo	=
547 	{
548 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType								sType;
549 		DE_NULL,													// const void*									pNext;
550 		0u,															// VkPipelineVertexInputStateCreateFlags		flags;
551 		vertexInputBindingDescription == DE_NULL ? 0u : 1u,			// deUint32										vertexBindingDescriptionCount;
552 		vertexInputBindingDescription,								// const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
553 		vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u,		// deUint32										vertexAttributeDescriptionCount;
554 		vertexInputAttributeDescriptions,							// const VkVertexInputAttributeDescription*		pVertexAttributeDescriptions;
555 	};
556 	const deUint32								numChannels					= getNumUsedChannels(mapVkFormat(attachmentFormat).order);
557 	const VkColorComponentFlags					colorComponent				= numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
558 																			  numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
559 																			  numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
560 																			  VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
561 	const VkPipelineColorBlendAttachmentState	colorBlendAttachmentState	=
562 	{
563 		VK_FALSE,				//  VkBool32				blendEnable;
564 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			srcColorBlendFactor;
565 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			dstColorBlendFactor;
566 		VK_BLEND_OP_ADD,		//  VkBlendOp				colorBlendOp;
567 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			srcAlphaBlendFactor;
568 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			dstAlphaBlendFactor;
569 		VK_BLEND_OP_ADD,		//  VkBlendOp				alphaBlendOp;
570 		colorComponent			//  VkColorComponentFlags	colorWriteMask;
571 	};
572 	const VkPipelineColorBlendStateCreateInfo	colorBlendStateCreateInfo	=
573 	{
574 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	//  VkStructureType								sType;
575 		DE_NULL,													//  const void*									pNext;
576 		0u,															//  VkPipelineColorBlendStateCreateFlags		flags;
577 		VK_FALSE,													//  VkBool32									logicOpEnable;
578 		VK_LOGIC_OP_CLEAR,											//  VkLogicOp									logicOp;
579 		1,															//  deUint32									attachmentCount;
580 		&colorBlendAttachmentState,									//  const VkPipelineColorBlendAttachmentState*	pAttachments;
581 		{ 0.0f, 0.0f, 0.0f, 0.0f }									//  float										blendConstants[4];
582 	};
583 	const deUint32								patchControlPoints			= (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
584 
585 	return makeGraphicsPipeline(context.getDeviceInterface(),	// const DeviceInterface&                        vk
586 								context.getDevice(),			// const VkDevice                                device
587 								pipelineLayout,					// const VkPipelineLayout                        pipelineLayout
588 								vertexShaderModule,				// const VkShaderModule                          vertexShaderModule
589 								tessellationControlModule,		// const VkShaderModule                          tessellationControlShaderModule
590 								tessellationEvaluationModule,	// const VkShaderModule                          tessellationEvalShaderModule
591 								geometryShaderModule,			// const VkShaderModule                          geometryShaderModule
592 								fragmentShaderModule,			// const VkShaderModule                          fragmentShaderModule
593 								renderPass,						// const VkRenderPass                            renderPass
594 								noViewports,					// const std::vector<VkViewport>&                viewports
595 								noScissors,						// const std::vector<VkRect2D>&                  scissors
596 								topology,						// const VkPrimitiveTopology                     topology
597 								0u,								// const deUint32                                subpass
598 								patchControlPoints,				// const deUint32                                patchControlPoints
599 								&vertexInputStateCreateInfo,	// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
600 								DE_NULL,						// const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
601 								DE_NULL,						// const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
602 								DE_NULL,						// const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
603 								&colorBlendStateCreateInfo,		// const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
604 								DE_NULL,						// const VkPipelineDynamicStateCreateInfo*
605 								vertexShaderStageCreateFlags,	// const deUint32								 vertexShaderStageCreateFlags,
606 								tessellationControlShaderStageCreateFlags,	// const deUint32					 tessellationControlShaderStageCreateFlags
607 								tessellationEvalShaderStageCreateFlags,		// const deUint32					 tessellationEvalShaderStageCreateFlags
608 								geometryShaderStageCreateFlags,	// const deUint32								 geometryShaderStageCreateFlags
609 								fragmentShaderStageCreateFlags,	// const deUint32								 fragmentShaderStageCreateFlags
610 								requiredSubgroupSize);			// const deUint32								 requiredSubgroupSize[5]
611 }
612 
makeCommandBuffer(Context & context,const VkCommandPool commandPool)613 Move<VkCommandBuffer> makeCommandBuffer (Context& context, const VkCommandPool commandPool)
614 {
615 	const VkCommandBufferAllocateInfo bufferAllocateParams =
616 	{
617 		VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,	// VkStructureType		sType;
618 		DE_NULL,										// const void*			pNext;
619 		commandPool,									// VkCommandPool		commandPool;
620 		VK_COMMAND_BUFFER_LEVEL_PRIMARY,				// VkCommandBufferLevel	level;
621 		1u,												// deUint32				bufferCount;
622 	};
623 	return allocateCommandBuffer(context.getDeviceInterface(),
624 								 context.getDevice(), &bufferAllocateParams);
625 }
626 
627 struct Buffer;
628 struct Image;
629 
630 struct BufferOrImage
631 {
isImage__anond12606dc0111::BufferOrImage632 	bool isImage() const
633 	{
634 		return m_isImage;
635 	}
636 
getAsBuffer__anond12606dc0111::BufferOrImage637 	Buffer* getAsBuffer()
638 	{
639 		if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
640 		return reinterpret_cast<Buffer* >(this);
641 	}
642 
getAsImage__anond12606dc0111::BufferOrImage643 	Image* getAsImage()
644 	{
645 		if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
646 		return reinterpret_cast<Image*>(this);
647 	}
648 
getType__anond12606dc0111::BufferOrImage649 	virtual VkDescriptorType getType() const
650 	{
651 		if (m_isImage)
652 		{
653 			return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
654 		}
655 		else
656 		{
657 			return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
658 		}
659 	}
660 
getAllocation__anond12606dc0111::BufferOrImage661 	Allocation& getAllocation() const
662 	{
663 		return *m_allocation;
664 	}
665 
~BufferOrImage__anond12606dc0111::BufferOrImage666 	virtual ~BufferOrImage() {}
667 
668 protected:
BufferOrImage__anond12606dc0111::BufferOrImage669 	explicit BufferOrImage(bool image) : m_isImage(image) {}
670 
671 	bool m_isImage;
672 	de::details::MovePtr<Allocation> m_allocation;
673 };
674 
675 struct Buffer : public BufferOrImage
676 {
Buffer__anond12606dc0111::Buffer677 	explicit Buffer (Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage)
678 		: BufferOrImage		(false)
679 		, m_sizeInBytes		(sizeInBytes)
680 		, m_usage			(usage)
681 	{
682 		const DeviceInterface&			vkd					= context.getDeviceInterface();
683 		const VkDevice					device				= context.getDevice();
684 
685 		const vk::VkBufferCreateInfo	bufferCreateInfo	=
686 		{
687 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
688 			DE_NULL,
689 			0u,
690 			m_sizeInBytes,
691 			m_usage,
692 			VK_SHARING_MODE_EXCLUSIVE,
693 			0u,
694 			DE_NULL,
695 		};
696 		m_buffer		= createBuffer(vkd, device, &bufferCreateInfo);
697 
698 		VkMemoryRequirements			req					= getBufferMemoryRequirements(vkd, device, *m_buffer);
699 
700 		m_allocation	= context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
701 		VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
702 	}
703 
getType__anond12606dc0111::Buffer704 	virtual VkDescriptorType getType() const
705 	{
706 		if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
707 		{
708 			return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
709 		}
710 		return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
711 	}
712 
getBuffer__anond12606dc0111::Buffer713 	VkBuffer getBuffer () const
714 	{
715 		return *m_buffer;
716 	}
717 
getBufferPtr__anond12606dc0111::Buffer718 	const VkBuffer* getBufferPtr () const
719 	{
720 		return &(*m_buffer);
721 	}
722 
getSize__anond12606dc0111::Buffer723 	VkDeviceSize getSize () const
724 	{
725 		return m_sizeInBytes;
726 	}
727 
728 private:
729 	Move<VkBuffer>				m_buffer;
730 	VkDeviceSize				m_sizeInBytes;
731 	const VkBufferUsageFlags	m_usage;
732 };
733 
734 struct Image : public BufferOrImage
735 {
Image__anond12606dc0111::Image736 	explicit Image (Context& context, deUint32 width, deUint32 height, VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
737 		: BufferOrImage(true)
738 	{
739 		const DeviceInterface&			vk					= context.getDeviceInterface();
740 		const VkDevice					device				= context.getDevice();
741 		const deUint32					queueFamilyIndex	= context.getUniversalQueueFamilyIndex();
742 
743 		const VkImageCreateInfo			imageCreateInfo		=
744 		{
745 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//  VkStructureType			sType;
746 			DE_NULL,								//  const void*				pNext;
747 			0,										//  VkImageCreateFlags		flags;
748 			VK_IMAGE_TYPE_2D,						//  VkImageType				imageType;
749 			format,									//  VkFormat				format;
750 			{width, height, 1},						//  VkExtent3D				extent;
751 			1,										//  deUint32				mipLevels;
752 			1,										//  deUint32				arrayLayers;
753 			VK_SAMPLE_COUNT_1_BIT,					//  VkSampleCountFlagBits	samples;
754 			VK_IMAGE_TILING_OPTIMAL,				//  VkImageTiling			tiling;
755 			usage,									//  VkImageUsageFlags		usage;
756 			VK_SHARING_MODE_EXCLUSIVE,				//  VkSharingMode			sharingMode;
757 			0u,										//  deUint32				queueFamilyIndexCount;
758 			DE_NULL,								//  const deUint32*			pQueueFamilyIndices;
759 			VK_IMAGE_LAYOUT_UNDEFINED				//  VkImageLayout			initialLayout;
760 		};
761 
762 		const VkComponentMapping		componentMapping	=
763 		{
764 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
765 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
766 		};
767 
768 		const VkImageSubresourceRange	subresourceRange	=
769 		{
770 			VK_IMAGE_ASPECT_COLOR_BIT,	//VkImageAspectFlags	aspectMask
771 			0u,							//deUint32				baseMipLevel
772 			1u,							//deUint32				levelCount
773 			0u,							//deUint32				baseArrayLayer
774 			1u							//deUint32				layerCount
775 		};
776 
777 		const VkSamplerCreateInfo		samplerCreateInfo	=
778 		{
779 			VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,		//  VkStructureType			sType;
780 			DE_NULL,									//  const void*				pNext;
781 			0u,											//  VkSamplerCreateFlags	flags;
782 			VK_FILTER_NEAREST,							//  VkFilter				magFilter;
783 			VK_FILTER_NEAREST,							//  VkFilter				minFilter;
784 			VK_SAMPLER_MIPMAP_MODE_NEAREST,				//  VkSamplerMipmapMode		mipmapMode;
785 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		//  VkSamplerAddressMode	addressModeU;
786 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		//  VkSamplerAddressMode	addressModeV;
787 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		//  VkSamplerAddressMode	addressModeW;
788 			0.0f,										//  float					mipLodBias;
789 			VK_FALSE,									//  VkBool32				anisotropyEnable;
790 			1.0f,										//  float					maxAnisotropy;
791 			DE_FALSE,									//  VkBool32				compareEnable;
792 			VK_COMPARE_OP_ALWAYS,						//  VkCompareOp				compareOp;
793 			0.0f,										//  float					minLod;
794 			0.0f,										//  float					maxLod;
795 			VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,	//  VkBorderColor			borderColor;
796 			VK_FALSE,									//  VkBool32				unnormalizedCoordinates;
797 		};
798 
799 		m_image			= createImage(vk, device, &imageCreateInfo);
800 
801 		VkMemoryRequirements			req					= getImageMemoryRequirements(vk, device, *m_image);
802 
803 		req.size		*= 2;
804 		m_allocation	= context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
805 
806 		VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
807 
808 		const VkImageViewCreateInfo		imageViewCreateInfo	=
809 		{
810 			VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,	//  VkStructureType			sType;
811 			DE_NULL,									//  const void*				pNext;
812 			0,											//  VkImageViewCreateFlags	flags;
813 			*m_image,									//  VkImage					image;
814 			VK_IMAGE_VIEW_TYPE_2D,						//  VkImageViewType			viewType;
815 			imageCreateInfo.format,						//  VkFormat				format;
816 			componentMapping,							//  VkComponentMapping		components;
817 			subresourceRange							//  VkImageSubresourceRange	subresourceRange;
818 		};
819 
820 		m_imageView		= createImageView(vk, device, &imageViewCreateInfo);
821 		m_sampler		= createSampler(vk, device, &samplerCreateInfo);
822 
823 		// Transition input image layouts
824 		{
825 			const Unique<VkCommandPool>		cmdPool			(makeCommandPool(vk, device, queueFamilyIndex));
826 			const Unique<VkCommandBuffer>	cmdBuffer		(makeCommandBuffer(context, *cmdPool));
827 
828 			beginCommandBuffer(vk, *cmdBuffer);
829 
830 			const VkImageMemoryBarrier		imageBarrier	= makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
831 																	VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
832 
833 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
834 				(VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
835 
836 			endCommandBuffer(vk, *cmdBuffer);
837 			submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
838 		}
839 	}
840 
getImage__anond12606dc0111::Image841 	VkImage getImage () const
842 	{
843 		return *m_image;
844 	}
845 
getImageView__anond12606dc0111::Image846 	VkImageView getImageView () const
847 	{
848 		return *m_imageView;
849 	}
850 
getSampler__anond12606dc0111::Image851 	VkSampler getSampler () const
852 	{
853 		return *m_sampler;
854 	}
855 
856 private:
857 	Move<VkImage>		m_image;
858 	Move<VkImageView>	m_imageView;
859 	Move<VkSampler>		m_sampler;
860 };
861 }
862 
getStagesCount(const VkShaderStageFlags shaderStages)863 deUint32 vkt::subgroups::getStagesCount (const VkShaderStageFlags shaderStages)
864 {
865 	const deUint32	stageCount	= isAllGraphicsStages(shaderStages)    ? 4
866 								: isAllComputeStages(shaderStages)     ? 1
867 #ifndef CTS_USES_VULKANSC
868 								: isAllRayTracingStages(shaderStages)  ? 6
869 								: isAllMeshShadingStages(shaderStages) ? 1
870 #endif // CTS_USES_VULKANSC
871 								: 0;
872 
873 	DE_ASSERT(stageCount != 0);
874 
875 	return stageCount;
876 }
877 
getSharedMemoryBallotHelper()878 std::string vkt::subgroups::getSharedMemoryBallotHelper ()
879 {
880 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
881 			"uvec4 sharedMemoryBallot(bool vote)\n"
882 			"{\n"
883 			"  uint groupOffset = gl_SubgroupID;\n"
884 			"  // One invocation in the group 0's the whole group's data\n"
885 			"  if (subgroupElect())\n"
886 			"  {\n"
887 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
888 			"  }\n"
889 			"  subgroupMemoryBarrierShared();\n"
890 			"  if (vote)\n"
891 			"  {\n"
892 			"    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
893 			"    const highp uint bitToSet = 1u << invocationId;\n"
894 			"    switch (gl_SubgroupInvocationID / 32)\n"
895 			"    {\n"
896 			"    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
897 			"    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
898 			"    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
899 			"    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
900 			"    }\n"
901 			"  }\n"
902 			"  subgroupMemoryBarrierShared();\n"
903 			"  return superSecretComputeShaderHelper[groupOffset];\n"
904 			"}\n";
905 }
906 
getSharedMemoryBallotHelperARB()907 std::string vkt::subgroups::getSharedMemoryBallotHelperARB ()
908 {
909 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
910 			"uint64_t sharedMemoryBallot(bool vote)\n"
911 			"{\n"
912 			"  uint groupOffset = gl_SubgroupID;\n"
913 			"  // One invocation in the group 0's the whole group's data\n"
914 			"  if (subgroupElect())\n"
915 			"  {\n"
916 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
917 			"  }\n"
918 			"  subgroupMemoryBarrierShared();\n"
919 			"  if (vote)\n"
920 			"  {\n"
921 			"    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
922 			"    const highp uint bitToSet = 1u << invocationId;\n"
923 			"    switch (gl_SubgroupInvocationID / 32)\n"
924 			"    {\n"
925 			"    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
926 			"    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
927 			"    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
928 			"    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
929 			"    }\n"
930 			"  }\n"
931 			"  subgroupMemoryBarrierShared();\n"
932 			"  return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
933 			"}\n";
934 }
935 
getSubgroupSize(Context & context)936 deUint32 vkt::subgroups::getSubgroupSize (Context& context)
937 {
938 	return context.getSubgroupProperties().subgroupSize;
939 }
940 
maxSupportedSubgroupSize()941 deUint32 vkt::subgroups::maxSupportedSubgroupSize ()
942 {
943 	return 128u;
944 }
945 
getShaderStageName(VkShaderStageFlags stage)946 std::string vkt::subgroups::getShaderStageName (VkShaderStageFlags stage)
947 {
948 	switch (stage)
949 	{
950 		case VK_SHADER_STAGE_COMPUTE_BIT:					return "compute";
951 		case VK_SHADER_STAGE_FRAGMENT_BIT:					return "fragment";
952 		case VK_SHADER_STAGE_VERTEX_BIT:					return "vertex";
953 		case VK_SHADER_STAGE_GEOMETRY_BIT:					return "geometry";
954 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:		return "tess_control";
955 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:	return "tess_eval";
956 #ifndef CTS_USES_VULKANSC
957 		case VK_SHADER_STAGE_RAYGEN_BIT_KHR:				return "rgen";
958 		case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:				return "ahit";
959 		case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:			return "chit";
960 		case VK_SHADER_STAGE_MISS_BIT_KHR:					return "miss";
961 		case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:			return "sect";
962 		case VK_SHADER_STAGE_CALLABLE_BIT_KHR:				return "call";
963 		case VK_SHADER_STAGE_MESH_BIT_EXT:					return "mesh";
964 		case VK_SHADER_STAGE_TASK_BIT_EXT:					return "task";
965 #endif // CTS_USES_VULKANSC
966 		default:											TCU_THROW(InternalError, "Unhandled stage");
967 	}
968 }
969 
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)970 std::string vkt::subgroups::getSubgroupFeatureName (vk::VkSubgroupFeatureFlagBits bit)
971 {
972 	switch (bit)
973 	{
974 		case VK_SUBGROUP_FEATURE_BASIC_BIT:				return "VK_SUBGROUP_FEATURE_BASIC_BIT";
975 		case VK_SUBGROUP_FEATURE_VOTE_BIT:				return "VK_SUBGROUP_FEATURE_VOTE_BIT";
976 		case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:		return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
977 		case VK_SUBGROUP_FEATURE_BALLOT_BIT:			return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
978 		case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:			return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
979 		case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:	return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
980 		case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:			return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
981 		case VK_SUBGROUP_FEATURE_QUAD_BIT:				return "VK_SUBGROUP_FEATURE_QUAD_BIT";
982 		default:										TCU_THROW(InternalError, "Unknown subgroup feature category");
983 	}
984 }
985 
addNoSubgroupShader(SourceCollections & programCollection)986 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
987 {
988 	{
989 	/*
990 		"#version 450\n"
991 		"void main (void)\n"
992 		"{\n"
993 		"  float pixelSize = 2.0f/1024.0f;\n"
994 		"   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
995 		"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
996 		"  gl_PointSize = 1.0f;\n"
997 		"}\n"
998 	*/
999 		const std::string vertNoSubgroup =
1000 			"; SPIR-V\n"
1001 			"; Version: 1.3\n"
1002 			"; Generator: Khronos Glslang Reference Front End; 1\n"
1003 			"; Bound: 37\n"
1004 			"; Schema: 0\n"
1005 			"OpCapability Shader\n"
1006 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1007 			"OpMemoryModel Logical GLSL450\n"
1008 			"OpEntryPoint Vertex %4 \"main\" %22 %26\n"
1009 			"OpMemberDecorate %20 0 BuiltIn Position\n"
1010 			"OpMemberDecorate %20 1 BuiltIn PointSize\n"
1011 			"OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
1012 			"OpMemberDecorate %20 3 BuiltIn CullDistance\n"
1013 			"OpDecorate %20 Block\n"
1014 			"OpDecorate %26 BuiltIn VertexIndex\n"
1015 			"%2 = OpTypeVoid\n"
1016 			"%3 = OpTypeFunction %2\n"
1017 			"%6 = OpTypeFloat 32\n"
1018 			"%7 = OpTypePointer Function %6\n"
1019 			"%9 = OpConstant %6 0.00195313\n"
1020 			"%12 = OpConstant %6 2\n"
1021 			"%14 = OpConstant %6 1\n"
1022 			"%16 = OpTypeVector %6 4\n"
1023 			"%17 = OpTypeInt 32 0\n"
1024 			"%18 = OpConstant %17 1\n"
1025 			"%19 = OpTypeArray %6 %18\n"
1026 			"%20 = OpTypeStruct %16 %6 %19 %19\n"
1027 			"%21 = OpTypePointer Output %20\n"
1028 			"%22 = OpVariable %21 Output\n"
1029 			"%23 = OpTypeInt 32 1\n"
1030 			"%24 = OpConstant %23 0\n"
1031 			"%25 = OpTypePointer Input %23\n"
1032 			"%26 = OpVariable %25 Input\n"
1033 			"%33 = OpConstant %6 0\n"
1034 			"%35 = OpTypePointer Output %16\n"
1035 			"%37 = OpConstant %23 1\n"
1036 			"%38 = OpTypePointer Output %6\n"
1037 			"%4 = OpFunction %2 None %3\n"
1038 			"%5 = OpLabel\n"
1039 			"%8 = OpVariable %7 Function\n"
1040 			"%10 = OpVariable %7 Function\n"
1041 			"OpStore %8 %9\n"
1042 			"%11 = OpLoad %6 %8\n"
1043 			"%13 = OpFDiv %6 %11 %12\n"
1044 			"%15 = OpFSub %6 %13 %14\n"
1045 			"OpStore %10 %15\n"
1046 			"%27 = OpLoad %23 %26\n"
1047 			"%28 = OpConvertSToF %6 %27\n"
1048 			"%29 = OpLoad %6 %8\n"
1049 			"%30 = OpFMul %6 %28 %29\n"
1050 			"%31 = OpLoad %6 %10\n"
1051 			"%32 = OpFAdd %6 %30 %31\n"
1052 			"%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1053 			"%36 = OpAccessChain %35 %22 %24\n"
1054 			"OpStore %36 %34\n"
1055 			"%39 = OpAccessChain %38 %22 %37\n"
1056 			"OpStore %39 %14\n"
1057 			"OpReturn\n"
1058 			"OpFunctionEnd\n";
1059 		programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1060 	}
1061 
1062 	{
1063 	/*
1064 		"#version 450\n"
1065 		"layout(vertices=1) out;\n"
1066 		"\n"
1067 		"void main (void)\n"
1068 		"{\n"
1069 		"  if (gl_InvocationID == 0)\n"
1070 		"  {\n"
1071 		"    gl_TessLevelOuter[0] = 1.0f;\n"
1072 		"    gl_TessLevelOuter[1] = 1.0f;\n"
1073 		"  }\n"
1074 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1075 		"}\n"
1076 	*/
1077 		const std::string tescNoSubgroup =
1078 			"; SPIR-V\n"
1079 			"; Version: 1.3\n"
1080 			"; Generator: Khronos Glslang Reference Front End; 1\n"
1081 			"; Bound: 45\n"
1082 			"; Schema: 0\n"
1083 			"OpCapability Tessellation\n"
1084 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1085 			"OpMemoryModel Logical GLSL450\n"
1086 			"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1087 			"OpExecutionMode %4 OutputVertices 1\n"
1088 			"OpDecorate %8 BuiltIn InvocationId\n"
1089 			"OpDecorate %20 Patch\n"
1090 			"OpDecorate %20 BuiltIn TessLevelOuter\n"
1091 			"OpMemberDecorate %29 0 BuiltIn Position\n"
1092 			"OpMemberDecorate %29 1 BuiltIn PointSize\n"
1093 			"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1094 			"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1095 			"OpDecorate %29 Block\n"
1096 			"OpMemberDecorate %34 0 BuiltIn Position\n"
1097 			"OpMemberDecorate %34 1 BuiltIn PointSize\n"
1098 			"OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1099 			"OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1100 			"OpDecorate %34 Block\n"
1101 			"%2 = OpTypeVoid\n"
1102 			"%3 = OpTypeFunction %2\n"
1103 			"%6 = OpTypeInt 32 1\n"
1104 			"%7 = OpTypePointer Input %6\n"
1105 			"%8 = OpVariable %7 Input\n"
1106 			"%10 = OpConstant %6 0\n"
1107 			"%11 = OpTypeBool\n"
1108 			"%15 = OpTypeFloat 32\n"
1109 			"%16 = OpTypeInt 32 0\n"
1110 			"%17 = OpConstant %16 4\n"
1111 			"%18 = OpTypeArray %15 %17\n"
1112 			"%19 = OpTypePointer Output %18\n"
1113 			"%20 = OpVariable %19 Output\n"
1114 			"%21 = OpConstant %15 1\n"
1115 			"%22 = OpTypePointer Output %15\n"
1116 			"%24 = OpConstant %6 1\n"
1117 			"%26 = OpTypeVector %15 4\n"
1118 			"%27 = OpConstant %16 1\n"
1119 			"%28 = OpTypeArray %15 %27\n"
1120 			"%29 = OpTypeStruct %26 %15 %28 %28\n"
1121 			"%30 = OpTypeArray %29 %27\n"
1122 			"%31 = OpTypePointer Output %30\n"
1123 			"%32 = OpVariable %31 Output\n"
1124 			"%34 = OpTypeStruct %26 %15 %28 %28\n"
1125 			"%35 = OpConstant %16 32\n"
1126 			"%36 = OpTypeArray %34 %35\n"
1127 			"%37 = OpTypePointer Input %36\n"
1128 			"%38 = OpVariable %37 Input\n"
1129 			"%40 = OpTypePointer Input %26\n"
1130 			"%43 = OpTypePointer Output %26\n"
1131 			"%4 = OpFunction %2 None %3\n"
1132 			"%5 = OpLabel\n"
1133 			"%9 = OpLoad %6 %8\n"
1134 			"%12 = OpIEqual %11 %9 %10\n"
1135 			"OpSelectionMerge %14 None\n"
1136 			"OpBranchConditional %12 %13 %14\n"
1137 			"%13 = OpLabel\n"
1138 			"%23 = OpAccessChain %22 %20 %10\n"
1139 			"OpStore %23 %21\n"
1140 			"%25 = OpAccessChain %22 %20 %24\n"
1141 			"OpStore %25 %21\n"
1142 			"OpBranch %14\n"
1143 			"%14 = OpLabel\n"
1144 			"%33 = OpLoad %6 %8\n"
1145 			"%39 = OpLoad %6 %8\n"
1146 			"%41 = OpAccessChain %40 %38 %39 %10\n"
1147 			"%42 = OpLoad %26 %41\n"
1148 			"%44 = OpAccessChain %43 %32 %33 %10\n"
1149 			"OpStore %44 %42\n"
1150 			"OpReturn\n"
1151 			"OpFunctionEnd\n";
1152 		programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1153 	}
1154 
1155 	{
1156 	/*
1157 		"#version 450\n"
1158 		"layout(isolines) in;\n"
1159 		"\n"
1160 		"void main (void)\n"
1161 		"{\n"
1162 		"  float pixelSize = 2.0f/1024.0f;\n"
1163 		"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1164 		"}\n";
1165 	*/
1166 		const std::string teseNoSubgroup =
1167 			"; SPIR-V\n"
1168 			"; Version: 1.3\n"
1169 			"; Generator: Khronos Glslang Reference Front End; 2\n"
1170 			"; Bound: 42\n"
1171 			"; Schema: 0\n"
1172 			"OpCapability Tessellation\n"
1173 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1174 			"OpMemoryModel Logical GLSL450\n"
1175 			"OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1176 			"OpExecutionMode %4 Isolines\n"
1177 			"OpExecutionMode %4 SpacingEqual\n"
1178 			"OpExecutionMode %4 VertexOrderCcw\n"
1179 			"OpMemberDecorate %14 0 BuiltIn Position\n"
1180 			"OpMemberDecorate %14 1 BuiltIn PointSize\n"
1181 			"OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1182 			"OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1183 			"OpDecorate %14 Block\n"
1184 			"OpMemberDecorate %19 0 BuiltIn Position\n"
1185 			"OpMemberDecorate %19 1 BuiltIn PointSize\n"
1186 			"OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1187 			"OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1188 			"OpDecorate %19 Block\n"
1189 			"OpDecorate %29 BuiltIn TessCoord\n"
1190 			"%2 = OpTypeVoid\n"
1191 			"%3 = OpTypeFunction %2\n"
1192 			"%6 = OpTypeFloat 32\n"
1193 			"%7 = OpTypePointer Function %6\n"
1194 			"%9 = OpConstant %6 0.00195313\n"
1195 			"%10 = OpTypeVector %6 4\n"
1196 			"%11 = OpTypeInt 32 0\n"
1197 			"%12 = OpConstant %11 1\n"
1198 			"%13 = OpTypeArray %6 %12\n"
1199 			"%14 = OpTypeStruct %10 %6 %13 %13\n"
1200 			"%15 = OpTypePointer Output %14\n"
1201 			"%16 = OpVariable %15 Output\n"
1202 			"%17 = OpTypeInt 32 1\n"
1203 			"%18 = OpConstant %17 0\n"
1204 			"%19 = OpTypeStruct %10 %6 %13 %13\n"
1205 			"%20 = OpConstant %11 32\n"
1206 			"%21 = OpTypeArray %19 %20\n"
1207 			"%22 = OpTypePointer Input %21\n"
1208 			"%23 = OpVariable %22 Input\n"
1209 			"%24 = OpTypePointer Input %10\n"
1210 			"%27 = OpTypeVector %6 3\n"
1211 			"%28 = OpTypePointer Input %27\n"
1212 			"%29 = OpVariable %28 Input\n"
1213 			"%30 = OpConstant %11 0\n"
1214 			"%31 = OpTypePointer Input %6\n"
1215 			"%36 = OpConstant %6 2\n"
1216 			"%40 = OpTypePointer Output %10\n"
1217 			"%4 = OpFunction %2 None %3\n"
1218 			"%5 = OpLabel\n"
1219 			"%8 = OpVariable %7 Function\n"
1220 			"OpStore %8 %9\n"
1221 			"%25 = OpAccessChain %24 %23 %18 %18\n"
1222 			"%26 = OpLoad %10 %25\n"
1223 			"%32 = OpAccessChain %31 %29 %30\n"
1224 			"%33 = OpLoad %6 %32\n"
1225 			"%34 = OpLoad %6 %8\n"
1226 			"%35 = OpFMul %6 %33 %34\n"
1227 			"%37 = OpFDiv %6 %35 %36\n"
1228 			"%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1229 			"%39 = OpFAdd %10 %26 %38\n"
1230 			"%41 = OpAccessChain %40 %16 %18\n"
1231 			"OpStore %41 %39\n"
1232 			"OpReturn\n"
1233 			"OpFunctionEnd\n";
1234 		programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1235 	}
1236 
1237 }
1238 
getFramebufferBufferDeclarations(const VkFormat & format,const std::vector<std::string> & declarations,const deUint32 stage)1239 static std::string getFramebufferBufferDeclarations (const VkFormat&					format,
1240 													 const std::vector<std::string>&	declarations,
1241 													 const deUint32						stage)
1242 {
1243 	if (declarations.empty())
1244 	{
1245 		const std::string	name	= (stage == 0) ? "result" : "out_color";
1246 		const std::string	suffix	= (stage == 2) ? "[]" : "";
1247 		const std::string	result	=
1248 			"layout(location = 0) out float " + name + suffix + ";\n"
1249 			"layout(set = 0, binding = 0) uniform Buffer1\n"
1250 			"{\n"
1251 			"  " + de::toString(subgroups::getFormatNameForGLSL(format)) + " data[" + de::toString(subgroups::maxSupportedSubgroupSize()) + "];\n"
1252 			"};\n";
1253 
1254 		return result;
1255 	}
1256 	else
1257 	{
1258 		return declarations[stage];
1259 	}
1260 }
1261 
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations)1262 void vkt::subgroups::initStdFrameBufferPrograms (SourceCollections&					programCollection,
1263 												 const vk::ShaderBuildOptions&		buildOptions,
1264 												 VkShaderStageFlags					shaderStage,
1265 												 VkFormat							format,
1266 												 bool								gsPointSize,
1267 												 const std::string&					extHeader,
1268 												 const std::string&					testSrc,
1269 												 const std::string&					helperStr,
1270 												 const std::vector<std::string>&	declarations)
1271 {
1272 	subgroups::setFragmentShaderFrameBuffer(programCollection);
1273 
1274 	if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1275 		subgroups::setVertexShaderFrameBuffer(programCollection);
1276 
1277 	if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1278 	{
1279 		std::ostringstream vertex;
1280 
1281 		vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1282 			<< extHeader
1283 			<< "layout(location = 0) in highp vec4 in_position;\n"
1284 			<< getFramebufferBufferDeclarations(format, declarations, 0)
1285 			<< "\n"
1286 			<< helperStr
1287 			<< "void main (void)\n"
1288 			<< "{\n"
1289 			<< "  uint tempRes;\n"
1290 			<< testSrc
1291 			<< "  result = float(tempRes);\n"
1292 			<< "  gl_Position = in_position;\n"
1293 			<< "  gl_PointSize = 1.0f;\n"
1294 			<< "}\n";
1295 
1296 		programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1297 	}
1298 	else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1299 	{
1300 		std::ostringstream geometry;
1301 
1302 		geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1303 			<< extHeader
1304 			<< "layout(points) in;\n"
1305 			<< "layout(points, max_vertices = 1) out;\n"
1306 			<< getFramebufferBufferDeclarations(format, declarations, 1)
1307 			<< "\n"
1308 			<< helperStr
1309 			<< "void main (void)\n"
1310 			<< "{\n"
1311 			<< "  uint tempRes;\n"
1312 			<< testSrc
1313 			<< "  out_color = float(tempRes);\n"
1314 			<< "  gl_Position = gl_in[0].gl_Position;\n"
1315 			<< (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1316 			<< "  EmitVertex();\n"
1317 			<< "  EndPrimitive();\n"
1318 			<< "}\n";
1319 
1320 		programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1321 	}
1322 	else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1323 	{
1324 		std::ostringstream controlSource;
1325 
1326 		controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1327 			<< extHeader
1328 			<< "layout(vertices = 2) out;\n"
1329 			<< getFramebufferBufferDeclarations(format, declarations, 2)
1330 			<< "\n"
1331 			<< helperStr
1332 			<< "void main (void)\n"
1333 			<< "{\n"
1334 			<< "  if (gl_InvocationID == 0)\n"
1335 			<< "  {\n"
1336 			<< "    gl_TessLevelOuter[0] = 1.0f;\n"
1337 			<< "    gl_TessLevelOuter[1] = 1.0f;\n"
1338 			<< "  }\n"
1339 			<< "  uint tempRes;\n"
1340 			<< testSrc
1341 			<< "  out_color[gl_InvocationID] = float(tempRes);\n"
1342 			<< "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1343 			<< (gsPointSize ? "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "")
1344 			<< "}\n";
1345 
1346 		programCollection.glslSources.add("tesc") << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1347 		subgroups::setTesEvalShaderFrameBuffer(programCollection);
1348 	}
1349 	else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1350 	{
1351 		ostringstream evaluationSource;
1352 
1353 		evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1354 			<< extHeader
1355 			<< "layout(isolines, equal_spacing, ccw ) in;\n"
1356 			<< getFramebufferBufferDeclarations(format, declarations, 3)
1357 			<< "\n"
1358 			<< helperStr
1359 			<< "void main (void)\n"
1360 			<< "{\n"
1361 			<< "  uint tempRes;\n"
1362 			<< testSrc
1363 			<< "  out_color = float(tempRes);\n"
1364 			<< "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1365 			<< (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1366 			<< "}\n";
1367 
1368 		subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1369 		programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1370 	}
1371 	else
1372 	{
1373 		DE_FATAL("Unsupported shader stage");
1374 	}
1375 }
1376 
getBufferDeclarations(vk::VkShaderStageFlags shaderStage,const std::string & formatName,const std::vector<std::string> & declarations,const deUint32 stage)1377 static std::string getBufferDeclarations (vk::VkShaderStageFlags			shaderStage,
1378 										  const std::string&				formatName,
1379 										  const std::vector<std::string>&	declarations,
1380 										  const deUint32					stage)
1381 {
1382 	if (declarations.empty())
1383 	{
1384 		const deUint32	stageCount	= vkt::subgroups::getStagesCount(shaderStage);
1385 		const deUint32	binding0	= stage;
1386 		const deUint32	binding1	= stageCount;
1387 		const bool		fragment	= (shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) && (stage == stageCount);
1388 		const string	buffer1		= fragment
1389 									? "layout(location = 0) out uint result;\n"
1390 									: "layout(set = 0, binding = " + de::toString(binding0) + ", std430) buffer Buffer1\n"
1391 									  "{\n"
1392 									  "  uint result[];\n"
1393 									  "};\n";
1394 		//todo boza I suppose it can be "layout(set = 0, binding = " + de::toString(binding1) + ", std430) readonly buffer Buffer2\n"
1395 		const string	buffer2		= "layout(set = 0, binding = " + de::toString(binding1) + ", std430)" + (stageCount == 1 ? "" : " readonly") + " buffer Buffer" + (fragment ? "1" : "2") + "\n"
1396 									  "{\n"
1397 									  "  " + formatName + " data[];\n"
1398 									  "};\n";
1399 
1400 		return buffer1 + buffer2;
1401 	}
1402 	else
1403 	{
1404 		return declarations[stage];
1405 	}
1406 }
1407 
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations,const bool avoidHelperInvocations,const std::string & tempRes)1408 void vkt::subgroups::initStdPrograms (vk::SourceCollections&			programCollection,
1409 									  const vk::ShaderBuildOptions&		buildOptions,
1410 									  vk::VkShaderStageFlags			shaderStage,
1411 									  vk::VkFormat						format,
1412 									  bool								gsPointSize,
1413 									  const std::string&				extHeader,
1414 									  const std::string&				testSrc,
1415 									  const std::string&				helperStr,
1416 									  const std::vector<std::string>&	declarations,
1417 									  const bool						avoidHelperInvocations,
1418 									  const std::string&				tempRes)
1419 {
1420 	const std::string	formatName	= subgroups::getFormatNameForGLSL(format);
1421 
1422 	if (isAllComputeStages(shaderStage))
1423 	{
1424 		std::ostringstream	src;
1425 
1426 		src << "#version 450\n"
1427 			<< extHeader
1428 			<< "layout (local_size_x_id = 0, local_size_y_id = 1, "
1429 			"local_size_z_id = 2) in;\n"
1430 			<< getBufferDeclarations(shaderStage, formatName, declarations, 0)
1431 			<< "\n"
1432 			<< helperStr
1433 			<< "void main (void)\n"
1434 			<< "{\n"
1435 			<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1436 			<< "  highp uint offset = globalSize.x * ((globalSize.y * "
1437 			"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1438 			"gl_GlobalInvocationID.x;\n"
1439 			<< tempRes
1440 			<< testSrc
1441 			<< "  result[offset] = tempRes;\n"
1442 			<< "}\n";
1443 
1444 		programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1445 	}
1446 #ifndef CTS_USES_VULKANSC
1447 	else if (isAllMeshShadingStages(shaderStage))
1448 	{
1449 		const bool testMesh = ((shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
1450 		const bool testTask = ((shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
1451 
1452 		if (testMesh)
1453 		{
1454 			std::ostringstream mesh;
1455 
1456 			mesh
1457 				<< "#version 450\n"
1458 				<< "#extension GL_EXT_mesh_shader : enable\n"
1459 				<< extHeader
1460 				<< "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1461 				<< "layout (points) out;\n"
1462 				<< "layout (max_vertices = 1, max_primitives = 1) out;\n"
1463 				<< getBufferDeclarations(shaderStage, formatName, declarations, 0)
1464 				<< "\n"
1465 				<< helperStr
1466 				<< "void main (void)\n"
1467 				<< "{\n"
1468 				<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1469 				<< "  highp uint offset = globalSize.x * ((globalSize.y * "
1470 				"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1471 				"gl_GlobalInvocationID.x;\n"
1472 				<< tempRes
1473 				<< testSrc
1474 				<< "  result[offset] = tempRes;\n"
1475 				<< "  SetMeshOutputsEXT(0u, 0u);\n"
1476 				<< "}\n";
1477 
1478 			programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1479 		}
1480 		else
1481 		{
1482 			const std::string meshShaderNoSubgroups =
1483 				"#version 450\n"
1484 				"#extension GL_EXT_mesh_shader : enable\n"
1485 				"\n"
1486 				"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
1487 				"layout (points) out;\n"
1488 				"layout (max_vertices = 1, max_primitives = 1) out;\n"
1489 				"\n"
1490 				"void main (void)\n"
1491 				"{\n"
1492 				"  SetMeshOutputsEXT(0u, 0u);\n"
1493 				"}\n"
1494 				;
1495 			programCollection.glslSources.add("mesh") << glu::MeshSource(meshShaderNoSubgroups) << buildOptions;
1496 		}
1497 
1498 		if (testTask)
1499 		{
1500 			const tcu::UVec3	emitSize = (testMesh ? tcu::UVec3(1u, 1u, 1u) : tcu::UVec3(0u, 0u, 0u));
1501 			std::ostringstream	task;
1502 
1503 			task
1504 				<< "#version 450\n"
1505 				<< "#extension GL_EXT_mesh_shader : enable\n"
1506 				//<< "#extension GL_NV_mesh_shader : enable\n"
1507 				<< extHeader
1508 				<< "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1509 				<< getBufferDeclarations(shaderStage, formatName, declarations, 0)
1510 				<< "\n"
1511 				<< helperStr
1512 				<< "void main (void)\n"
1513 				<< "{\n"
1514 				<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1515 				//<< "  uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1516 				<< "  highp uint offset = globalSize.x * ((globalSize.y * "
1517 				"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1518 				"gl_GlobalInvocationID.x;\n"
1519 				<< tempRes
1520 				<< testSrc
1521 				<< "  result[offset] = tempRes;\n"
1522 				<< "  EmitMeshTasksEXT(" << emitSize.x() << ", " << emitSize.y() << ", " << emitSize.z() << ");\n"
1523 				//<< "  gl_TaskCountNV = " << emitSize.x() << ";\n"
1524 				<< "}\n";
1525 
1526 			programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1527 		}
1528 	}
1529 #endif // CTS_USES_VULKANSC
1530 	else if (isAllGraphicsStages(shaderStage))
1531 	{
1532 		const string vertex =
1533 			"#version 450\n"
1534 			+ extHeader
1535 			+ getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1536 			"\n"
1537 			+ helperStr +
1538 			"void main (void)\n"
1539 			"{\n"
1540 			"  uint tempRes;\n"
1541 			+ testSrc +
1542 			"  result[gl_VertexIndex] = tempRes;\n"
1543 			"  float pixelSize = 2.0f/1024.0f;\n"
1544 			"  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1545 			"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1546 			"  gl_PointSize = 1.0f;\n"
1547 			"}\n";
1548 
1549 		const string tesc =
1550 			"#version 450\n"
1551 			+ extHeader +
1552 			"layout(vertices=1) out;\n"
1553 			+ getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1554 			"\n"
1555 			+ helperStr +
1556 			"void main (void)\n"
1557 			"{\n"
1558 			+ tempRes
1559 			+ testSrc +
1560 			"  result[gl_PrimitiveID] = tempRes;\n"
1561 			"  if (gl_InvocationID == 0)\n"
1562 			"  {\n"
1563 			"    gl_TessLevelOuter[0] = 1.0f;\n"
1564 			"    gl_TessLevelOuter[1] = 1.0f;\n"
1565 			"  }\n"
1566 			"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1567 			+ (gsPointSize ? "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1568 			"}\n";
1569 
1570 		const string tese =
1571 			"#version 450\n"
1572 			+ extHeader +
1573 			"layout(isolines) in;\n"
1574 			+ getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1575 			"\n"
1576 			+ helperStr +
1577 			"void main (void)\n"
1578 			"{\n"
1579 			+ tempRes
1580 			+ testSrc +
1581 			"  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1582 			"  float pixelSize = 2.0f/1024.0f;\n"
1583 			"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1584 			+ (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1585 			"}\n";
1586 
1587 		const string geometry =
1588 			"#version 450\n"
1589 			+ extHeader +
1590 			"layout(${TOPOLOGY}) in;\n"
1591 			"layout(points, max_vertices = 1) out;\n"
1592 			+ getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1593 			"\n"
1594 			+ helperStr +
1595 			"void main (void)\n"
1596 			"{\n"
1597 			+ tempRes
1598 			+ testSrc +
1599 			"  result[gl_PrimitiveIDIn] = tempRes;\n"
1600 			"  gl_Position = gl_in[0].gl_Position;\n"
1601 			+ (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1602 			"  EmitVertex();\n"
1603 			"  EndPrimitive();\n"
1604 			"}\n";
1605 
1606 		const string fragment =
1607 			"#version 450\n"
1608 			+ extHeader
1609 			+ getBufferDeclarations(shaderStage, formatName, declarations, 4)
1610 			+ helperStr +
1611 			"void main (void)\n"
1612 			"{\n"
1613 			+ (avoidHelperInvocations ? "  if (gl_HelperInvocation) return;\n" : "")
1614 			+ tempRes
1615 			+ testSrc +
1616 			"  result = tempRes;\n"
1617 			"}\n";
1618 
1619 		subgroups::addNoSubgroupShader(programCollection);
1620 
1621 		programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1622 		programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1623 		programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1624 		subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1625 		programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1626 	}
1627 #ifndef CTS_USES_VULKANSC
1628 	else if (isAllRayTracingStages(shaderStage))
1629 	{
1630 		const std::string	rgenShader	=
1631 			"#version 460 core\n"
1632 			"#extension GL_EXT_ray_tracing: require\n"
1633 			+ extHeader +
1634 			"layout(location = 0) rayPayloadEXT uvec4 payload;\n"
1635 			"layout(location = 0) callableDataEXT uvec4 callData;"
1636 			"layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
1637 			+ getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1638 			"\n"
1639 			+ helperStr +
1640 			"void main()\n"
1641 			"{\n"
1642 			+ tempRes
1643 			+ testSrc +
1644 			"  uint  rayFlags   = 0;\n"
1645 			"  uint  cullMask   = 0xFF;\n"
1646 			"  float tmin       = 0.0;\n"
1647 			"  float tmax       = 9.0;\n"
1648 			"  vec3  origin     = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
1649 			"  vec3  directHit  = vec3(0.0, 0.0, -1.0);\n"
1650 			"  vec3  directMiss = vec3(0.0, 0.0, +1.0);\n"
1651 			"\n"
1652 			"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
1653 			"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
1654 			"  executeCallableEXT(0, 0);"
1655 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1656 			"}\n";
1657 		const std::string	ahitShader	=
1658 			"#version 460 core\n"
1659 			"#extension GL_EXT_ray_tracing: require\n"
1660 			+ extHeader +
1661 			"hitAttributeEXT vec3 attribs;\n"
1662 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1663 			+ getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1664 			"\n"
1665 			+ helperStr +
1666 			"void main()\n"
1667 			"{\n"
1668 			+ tempRes
1669 			+ testSrc +
1670 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1671 			"}\n";
1672 		const std::string	chitShader	=
1673 			"#version 460 core\n"
1674 			"#extension GL_EXT_ray_tracing: require\n"
1675 			+ extHeader +
1676 			"hitAttributeEXT vec3 attribs;\n"
1677 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1678 			+ getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1679 			"\n"
1680 			+ helperStr +
1681 			"void main()\n"
1682 			"{\n"
1683 			+ tempRes
1684 			+ testSrc +
1685 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1686 			"}\n";
1687 		const std::string	missShader	=
1688 			"#version 460 core\n"
1689 			"#extension GL_EXT_ray_tracing: require\n"
1690 			+ extHeader +
1691 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1692 			+ getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1693 			"\n"
1694 			+ helperStr +
1695 			"void main()\n"
1696 			"{\n"
1697 			+ tempRes
1698 			+ testSrc +
1699 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1700 			"}\n";
1701 		const std::string	sectShader	=
1702 			"#version 460 core\n"
1703 			"#extension GL_EXT_ray_tracing: require\n"
1704 			+ extHeader +
1705 			"hitAttributeEXT vec3 hitAttribute;\n"
1706 			+ getBufferDeclarations(shaderStage, formatName, declarations, 4) +
1707 			"\n"
1708 			+ helperStr +
1709 			"void main()\n"
1710 			"{\n"
1711 			+ tempRes
1712 			+ testSrc +
1713 			"  reportIntersectionEXT(0.75f, 0x7Eu);\n"
1714 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1715 			"}\n";
1716 		const std::string	callShader	=
1717 			"#version 460 core\n"
1718 			"#extension GL_EXT_ray_tracing: require\n"
1719 			+ extHeader +
1720 			"layout(location = 0) callableDataInEXT float callData;\n"
1721 			+ getBufferDeclarations(shaderStage, formatName, declarations, 5) +
1722 			"\n"
1723 			+ helperStr +
1724 			"void main()\n"
1725 			"{\n"
1726 			+ tempRes
1727 			+ testSrc +
1728 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1729 			"}\n";
1730 
1731 		programCollection.glslSources.add("rgen") << glu::RaygenSource		(rgenShader) << buildOptions;
1732 		programCollection.glslSources.add("ahit") << glu::AnyHitSource		(ahitShader) << buildOptions;
1733 		programCollection.glslSources.add("chit") << glu::ClosestHitSource	(chitShader) << buildOptions;
1734 		programCollection.glslSources.add("miss") << glu::MissSource		(missShader) << buildOptions;
1735 		programCollection.glslSources.add("sect") << glu::IntersectionSource(sectShader) << buildOptions;
1736 		programCollection.glslSources.add("call") << glu::CallableSource	(callShader) << buildOptions;
1737 
1738 		subgroups::addRayTracingNoSubgroupShader(programCollection);
1739 	}
1740 #endif // CTS_USES_VULKANSC
1741 	else
1742 		TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1743 
1744 }
1745 
isSubgroupSupported(Context & context)1746 bool vkt::subgroups::isSubgroupSupported (Context& context)
1747 {
1748 	return context.contextSupports(vk::ApiVersion(0, 1, 1, 0));
1749 }
1750 
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1751 bool vkt::subgroups::areSubgroupOperationsSupportedForStage (Context& context, const VkShaderStageFlags stage)
1752 {
1753 	return (stage & (context.getSubgroupProperties().supportedStages)) ? true : false;
1754 }
1755 
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1756 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice (Context& context, VkSubgroupFeatureFlagBits bit)
1757 {
1758 	return (bit & (context.getSubgroupProperties().supportedOperations)) ? true : false;
1759 }
1760 
areQuadOperationsSupportedForStages(Context & context,const VkShaderStageFlags stages)1761 bool vkt::subgroups::areQuadOperationsSupportedForStages (Context& context, const VkShaderStageFlags stages)
1762 {
1763 	// Check general quad feature support first.
1764 	if (!isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_QUAD_BIT))
1765 		return false;
1766 
1767 	if (context.getSubgroupProperties().quadOperationsInAllStages == VK_TRUE)
1768 		return true; // No problem, any stage works.
1769 
1770 	// Only frag and compute are supported.
1771 	const VkShaderStageFlags fragCompute = (VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
1772 	const VkShaderStageFlags otherStages = ~fragCompute;
1773 	return ((stages & otherStages) == 0u);
1774 }
1775 
isFragmentSSBOSupportedForDevice(Context & context)1776 bool vkt::subgroups::isFragmentSSBOSupportedForDevice (Context& context)
1777 {
1778 	return context.getDeviceFeatures().fragmentStoresAndAtomics ? true : false;
1779 }
1780 
isVertexSSBOSupportedForDevice(Context & context)1781 bool vkt::subgroups::isVertexSSBOSupportedForDevice (Context& context)
1782 {
1783 	return context.getDeviceFeatures().vertexPipelineStoresAndAtomics ? true : false;
1784 }
1785 
isInt64SupportedForDevice(Context & context)1786 bool vkt::subgroups::isInt64SupportedForDevice (Context& context)
1787 {
1788 	return context.getDeviceFeatures().shaderInt64 ? true : false;
1789 }
1790 
isTessellationAndGeometryPointSizeSupported(Context & context)1791 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1792 {
1793 	return context.getDeviceFeatures().shaderTessellationAndGeometryPointSize ? true : false;
1794 }
1795 
is16BitUBOStorageSupported(Context & context)1796 bool vkt::subgroups::is16BitUBOStorageSupported (Context& context)
1797 {
1798 	return context.get16BitStorageFeatures().uniformAndStorageBuffer16BitAccess ? true : false;
1799 }
1800 
is8BitUBOStorageSupported(Context & context)1801 bool vkt::subgroups::is8BitUBOStorageSupported (Context& context)
1802 {
1803 	return context.get8BitStorageFeatures().uniformAndStorageBuffer8BitAccess ? true : false;
1804 }
1805 
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1806 bool vkt::subgroups::isFormatSupportedForDevice (Context& context, vk::VkFormat format)
1807 {
1808 	const VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures&	subgroupExtendedTypesFeatures	= context.getShaderSubgroupExtendedTypesFeatures();
1809 	const VkPhysicalDeviceShaderFloat16Int8Features&			float16Int8Features				= context.getShaderFloat16Int8Features();
1810 	const VkPhysicalDevice16BitStorageFeatures&					storage16bit					= context.get16BitStorageFeatures();
1811 	const VkPhysicalDevice8BitStorageFeatures&					storage8bit						= context.get8BitStorageFeatures();
1812 	const VkPhysicalDeviceFeatures&								features						= context.getDeviceFeatures();
1813 	bool														shaderFloat64					= features.shaderFloat64 ? true : false;
1814 	bool														shaderInt16						= features.shaderInt16 ? true : false;
1815 	bool														shaderInt64						= features.shaderInt64 ? true : false;
1816 	bool														shaderSubgroupExtendedTypes		= false;
1817 	bool														shaderFloat16					= false;
1818 	bool														shaderInt8						= false;
1819 	bool														storageBuffer16BitAccess		= false;
1820 	bool														storageBuffer8BitAccess			= false;
1821 
1822 	if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1823 		context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1824 	{
1825 		shaderSubgroupExtendedTypes	= subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes ? true : false;
1826 		shaderFloat16				= float16Int8Features.shaderFloat16 ? true : false;
1827 		shaderInt8					= float16Int8Features.shaderInt8 ? true : false;
1828 
1829 		if ( context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage") )
1830 			storageBuffer16BitAccess = storage16bit.storageBuffer16BitAccess ? true : false;
1831 
1832 		if (context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage"))
1833 			storageBuffer8BitAccess = storage8bit.storageBuffer8BitAccess ? true : false;
1834 	}
1835 
1836 	switch (format)
1837 	{
1838 		default:
1839 			return true;
1840 		case VK_FORMAT_R16_SFLOAT:
1841 		case VK_FORMAT_R16G16_SFLOAT:
1842 		case VK_FORMAT_R16G16B16_SFLOAT:
1843 		case VK_FORMAT_R16G16B16A16_SFLOAT:
1844 			return shaderSubgroupExtendedTypes && shaderFloat16 && storageBuffer16BitAccess;
1845 		case VK_FORMAT_R64_SFLOAT:
1846 		case VK_FORMAT_R64G64_SFLOAT:
1847 		case VK_FORMAT_R64G64B64_SFLOAT:
1848 		case VK_FORMAT_R64G64B64A64_SFLOAT:
1849 			return shaderFloat64;
1850 		case VK_FORMAT_R8_SINT:
1851 		case VK_FORMAT_R8G8_SINT:
1852 		case VK_FORMAT_R8G8B8_SINT:
1853 		case VK_FORMAT_R8G8B8A8_SINT:
1854 		case VK_FORMAT_R8_UINT:
1855 		case VK_FORMAT_R8G8_UINT:
1856 		case VK_FORMAT_R8G8B8_UINT:
1857 		case VK_FORMAT_R8G8B8A8_UINT:
1858 			return shaderSubgroupExtendedTypes && shaderInt8 && storageBuffer8BitAccess;
1859 		case VK_FORMAT_R16_SINT:
1860 		case VK_FORMAT_R16G16_SINT:
1861 		case VK_FORMAT_R16G16B16_SINT:
1862 		case VK_FORMAT_R16G16B16A16_SINT:
1863 		case VK_FORMAT_R16_UINT:
1864 		case VK_FORMAT_R16G16_UINT:
1865 		case VK_FORMAT_R16G16B16_UINT:
1866 		case VK_FORMAT_R16G16B16A16_UINT:
1867 			return shaderSubgroupExtendedTypes && shaderInt16 && storageBuffer16BitAccess;
1868 		case VK_FORMAT_R64_SINT:
1869 		case VK_FORMAT_R64G64_SINT:
1870 		case VK_FORMAT_R64G64B64_SINT:
1871 		case VK_FORMAT_R64G64B64A64_SINT:
1872 		case VK_FORMAT_R64_UINT:
1873 		case VK_FORMAT_R64G64_UINT:
1874 		case VK_FORMAT_R64G64B64_UINT:
1875 		case VK_FORMAT_R64G64B64A64_UINT:
1876 			return shaderSubgroupExtendedTypes && shaderInt64;
1877 	}
1878 }
1879 
isSubgroupBroadcastDynamicIdSupported(Context & context)1880 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported (Context& context)
1881 {
1882 	return context.contextSupports(vk::ApiVersion(0, 1, 2, 0)) &&
1883 		vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice()).subgroupBroadcastDynamicId;
1884 }
1885 
getFormatNameForGLSL(VkFormat format)1886 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1887 {
1888 	switch (format)
1889 	{
1890 		case VK_FORMAT_R8_SINT:				return "int8_t";
1891 		case VK_FORMAT_R8G8_SINT:			return "i8vec2";
1892 		case VK_FORMAT_R8G8B8_SINT:			return "i8vec3";
1893 		case VK_FORMAT_R8G8B8A8_SINT:		return "i8vec4";
1894 		case VK_FORMAT_R8_UINT:				return "uint8_t";
1895 		case VK_FORMAT_R8G8_UINT:			return "u8vec2";
1896 		case VK_FORMAT_R8G8B8_UINT:			return "u8vec3";
1897 		case VK_FORMAT_R8G8B8A8_UINT:		return "u8vec4";
1898 		case VK_FORMAT_R16_SINT:			return "int16_t";
1899 		case VK_FORMAT_R16G16_SINT:			return "i16vec2";
1900 		case VK_FORMAT_R16G16B16_SINT:		return "i16vec3";
1901 		case VK_FORMAT_R16G16B16A16_SINT:	return "i16vec4";
1902 		case VK_FORMAT_R16_UINT:			return "uint16_t";
1903 		case VK_FORMAT_R16G16_UINT:			return "u16vec2";
1904 		case VK_FORMAT_R16G16B16_UINT:		return "u16vec3";
1905 		case VK_FORMAT_R16G16B16A16_UINT:	return "u16vec4";
1906 		case VK_FORMAT_R32_SINT:			return "int";
1907 		case VK_FORMAT_R32G32_SINT:			return "ivec2";
1908 		case VK_FORMAT_R32G32B32_SINT:		return "ivec3";
1909 		case VK_FORMAT_R32G32B32A32_SINT:	return "ivec4";
1910 		case VK_FORMAT_R32_UINT:			return "uint";
1911 		case VK_FORMAT_R32G32_UINT:			return "uvec2";
1912 		case VK_FORMAT_R32G32B32_UINT:		return "uvec3";
1913 		case VK_FORMAT_R32G32B32A32_UINT:	return "uvec4";
1914 		case VK_FORMAT_R64_SINT:			return "int64_t";
1915 		case VK_FORMAT_R64G64_SINT:			return "i64vec2";
1916 		case VK_FORMAT_R64G64B64_SINT:		return "i64vec3";
1917 		case VK_FORMAT_R64G64B64A64_SINT:	return "i64vec4";
1918 		case VK_FORMAT_R64_UINT:			return "uint64_t";
1919 		case VK_FORMAT_R64G64_UINT:			return "u64vec2";
1920 		case VK_FORMAT_R64G64B64_UINT:		return "u64vec3";
1921 		case VK_FORMAT_R64G64B64A64_UINT:	return "u64vec4";
1922 		case VK_FORMAT_R16_SFLOAT:			return "float16_t";
1923 		case VK_FORMAT_R16G16_SFLOAT:		return "f16vec2";
1924 		case VK_FORMAT_R16G16B16_SFLOAT:	return "f16vec3";
1925 		case VK_FORMAT_R16G16B16A16_SFLOAT:	return "f16vec4";
1926 		case VK_FORMAT_R32_SFLOAT:			return "float";
1927 		case VK_FORMAT_R32G32_SFLOAT:		return "vec2";
1928 		case VK_FORMAT_R32G32B32_SFLOAT:	return "vec3";
1929 		case VK_FORMAT_R32G32B32A32_SFLOAT:	return "vec4";
1930 		case VK_FORMAT_R64_SFLOAT:			return "double";
1931 		case VK_FORMAT_R64G64_SFLOAT:		return "dvec2";
1932 		case VK_FORMAT_R64G64B64_SFLOAT:	return "dvec3";
1933 		case VK_FORMAT_R64G64B64A64_SFLOAT:	return "dvec4";
1934 		case VK_FORMAT_R8_USCALED:			return "bool";
1935 		case VK_FORMAT_R8G8_USCALED:		return "bvec2";
1936 		case VK_FORMAT_R8G8B8_USCALED:		return "bvec3";
1937 		case VK_FORMAT_R8G8B8A8_USCALED:	return "bvec4";
1938 		default:							TCU_THROW(InternalError, "Unhandled format");
1939 	}
1940 }
1941 
getAdditionalExtensionForFormat(vk::VkFormat format)1942 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1943 {
1944 	switch (format)
1945 	{
1946 		default:
1947 			return "";
1948 		case VK_FORMAT_R8_SINT:
1949 		case VK_FORMAT_R8G8_SINT:
1950 		case VK_FORMAT_R8G8B8_SINT:
1951 		case VK_FORMAT_R8G8B8A8_SINT:
1952 		case VK_FORMAT_R8_UINT:
1953 		case VK_FORMAT_R8G8_UINT:
1954 		case VK_FORMAT_R8G8B8_UINT:
1955 		case VK_FORMAT_R8G8B8A8_UINT:
1956 			return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1957 		case VK_FORMAT_R16_SINT:
1958 		case VK_FORMAT_R16G16_SINT:
1959 		case VK_FORMAT_R16G16B16_SINT:
1960 		case VK_FORMAT_R16G16B16A16_SINT:
1961 		case VK_FORMAT_R16_UINT:
1962 		case VK_FORMAT_R16G16_UINT:
1963 		case VK_FORMAT_R16G16B16_UINT:
1964 		case VK_FORMAT_R16G16B16A16_UINT:
1965 			return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1966 		case VK_FORMAT_R64_SINT:
1967 		case VK_FORMAT_R64G64_SINT:
1968 		case VK_FORMAT_R64G64B64_SINT:
1969 		case VK_FORMAT_R64G64B64A64_SINT:
1970 		case VK_FORMAT_R64_UINT:
1971 		case VK_FORMAT_R64G64_UINT:
1972 		case VK_FORMAT_R64G64B64_UINT:
1973 		case VK_FORMAT_R64G64B64A64_UINT:
1974 			return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1975 		case VK_FORMAT_R16_SFLOAT:
1976 		case VK_FORMAT_R16G16_SFLOAT:
1977 		case VK_FORMAT_R16G16B16_SFLOAT:
1978 		case VK_FORMAT_R16G16B16A16_SFLOAT:
1979 			return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1980 	}
1981 }
1982 
getAllFormats()1983 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats ()
1984 {
1985 	std::vector<VkFormat> formats;
1986 
1987 	formats.push_back(VK_FORMAT_R8_SINT);
1988 	formats.push_back(VK_FORMAT_R8G8_SINT);
1989 	formats.push_back(VK_FORMAT_R8G8B8_SINT);
1990 	formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1991 	formats.push_back(VK_FORMAT_R8_UINT);
1992 	formats.push_back(VK_FORMAT_R8G8_UINT);
1993 	formats.push_back(VK_FORMAT_R8G8B8_UINT);
1994 	formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1995 	formats.push_back(VK_FORMAT_R16_SINT);
1996 	formats.push_back(VK_FORMAT_R16G16_SINT);
1997 	formats.push_back(VK_FORMAT_R16G16B16_SINT);
1998 	formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1999 	formats.push_back(VK_FORMAT_R16_UINT);
2000 	formats.push_back(VK_FORMAT_R16G16_UINT);
2001 	formats.push_back(VK_FORMAT_R16G16B16_UINT);
2002 	formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
2003 	formats.push_back(VK_FORMAT_R32_SINT);
2004 	formats.push_back(VK_FORMAT_R32G32_SINT);
2005 	formats.push_back(VK_FORMAT_R32G32B32_SINT);
2006 	formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
2007 	formats.push_back(VK_FORMAT_R32_UINT);
2008 	formats.push_back(VK_FORMAT_R32G32_UINT);
2009 	formats.push_back(VK_FORMAT_R32G32B32_UINT);
2010 	formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
2011 	formats.push_back(VK_FORMAT_R64_SINT);
2012 	formats.push_back(VK_FORMAT_R64G64_SINT);
2013 	formats.push_back(VK_FORMAT_R64G64B64_SINT);
2014 	formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
2015 	formats.push_back(VK_FORMAT_R64_UINT);
2016 	formats.push_back(VK_FORMAT_R64G64_UINT);
2017 	formats.push_back(VK_FORMAT_R64G64B64_UINT);
2018 	formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
2019 	formats.push_back(VK_FORMAT_R16_SFLOAT);
2020 	formats.push_back(VK_FORMAT_R16G16_SFLOAT);
2021 	formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
2022 	formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
2023 	formats.push_back(VK_FORMAT_R32_SFLOAT);
2024 	formats.push_back(VK_FORMAT_R32G32_SFLOAT);
2025 	formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
2026 	formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
2027 	formats.push_back(VK_FORMAT_R64_SFLOAT);
2028 	formats.push_back(VK_FORMAT_R64G64_SFLOAT);
2029 	formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
2030 	formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
2031 	formats.push_back(VK_FORMAT_R8_USCALED);
2032 	formats.push_back(VK_FORMAT_R8G8_USCALED);
2033 	formats.push_back(VK_FORMAT_R8G8B8_USCALED);
2034 	formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
2035 
2036 	return formats;
2037 }
2038 
isFormatSigned(VkFormat format)2039 bool vkt::subgroups::isFormatSigned (VkFormat format)
2040 {
2041 	switch (format)
2042 	{
2043 		default:
2044 			return false;
2045 		case VK_FORMAT_R8_SINT:
2046 		case VK_FORMAT_R8G8_SINT:
2047 		case VK_FORMAT_R8G8B8_SINT:
2048 		case VK_FORMAT_R8G8B8A8_SINT:
2049 		case VK_FORMAT_R16_SINT:
2050 		case VK_FORMAT_R16G16_SINT:
2051 		case VK_FORMAT_R16G16B16_SINT:
2052 		case VK_FORMAT_R16G16B16A16_SINT:
2053 		case VK_FORMAT_R32_SINT:
2054 		case VK_FORMAT_R32G32_SINT:
2055 		case VK_FORMAT_R32G32B32_SINT:
2056 		case VK_FORMAT_R32G32B32A32_SINT:
2057 		case VK_FORMAT_R64_SINT:
2058 		case VK_FORMAT_R64G64_SINT:
2059 		case VK_FORMAT_R64G64B64_SINT:
2060 		case VK_FORMAT_R64G64B64A64_SINT:
2061 			return true;
2062 	}
2063 }
2064 
isFormatUnsigned(VkFormat format)2065 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
2066 {
2067 	switch (format)
2068 	{
2069 		default:
2070 			return false;
2071 		case VK_FORMAT_R8_UINT:
2072 		case VK_FORMAT_R8G8_UINT:
2073 		case VK_FORMAT_R8G8B8_UINT:
2074 		case VK_FORMAT_R8G8B8A8_UINT:
2075 		case VK_FORMAT_R16_UINT:
2076 		case VK_FORMAT_R16G16_UINT:
2077 		case VK_FORMAT_R16G16B16_UINT:
2078 		case VK_FORMAT_R16G16B16A16_UINT:
2079 		case VK_FORMAT_R32_UINT:
2080 		case VK_FORMAT_R32G32_UINT:
2081 		case VK_FORMAT_R32G32B32_UINT:
2082 		case VK_FORMAT_R32G32B32A32_UINT:
2083 		case VK_FORMAT_R64_UINT:
2084 		case VK_FORMAT_R64G64_UINT:
2085 		case VK_FORMAT_R64G64B64_UINT:
2086 		case VK_FORMAT_R64G64B64A64_UINT:
2087 			return true;
2088 	}
2089 }
2090 
isFormatFloat(VkFormat format)2091 bool vkt::subgroups::isFormatFloat (VkFormat format)
2092 {
2093 	switch (format)
2094 	{
2095 		default:
2096 			return false;
2097 		case VK_FORMAT_R16_SFLOAT:
2098 		case VK_FORMAT_R16G16_SFLOAT:
2099 		case VK_FORMAT_R16G16B16_SFLOAT:
2100 		case VK_FORMAT_R16G16B16A16_SFLOAT:
2101 		case VK_FORMAT_R32_SFLOAT:
2102 		case VK_FORMAT_R32G32_SFLOAT:
2103 		case VK_FORMAT_R32G32B32_SFLOAT:
2104 		case VK_FORMAT_R32G32B32A32_SFLOAT:
2105 		case VK_FORMAT_R64_SFLOAT:
2106 		case VK_FORMAT_R64G64_SFLOAT:
2107 		case VK_FORMAT_R64G64B64_SFLOAT:
2108 		case VK_FORMAT_R64G64B64A64_SFLOAT:
2109 			return true;
2110 	}
2111 }
2112 
isFormatBool(VkFormat format)2113 bool vkt::subgroups::isFormatBool (VkFormat format)
2114 {
2115 	switch (format)
2116 	{
2117 		default:
2118 			return false;
2119 		case VK_FORMAT_R8_USCALED:
2120 		case VK_FORMAT_R8G8_USCALED:
2121 		case VK_FORMAT_R8G8B8_USCALED:
2122 		case VK_FORMAT_R8G8B8A8_USCALED:
2123 			return true;
2124 	}
2125 }
2126 
isFormat8bitTy(VkFormat format)2127 bool vkt::subgroups::isFormat8bitTy (VkFormat format)
2128 {
2129 	switch (format)
2130 	{
2131 	default:
2132 		return false;
2133 	case VK_FORMAT_R8_SINT:
2134 	case VK_FORMAT_R8G8_SINT:
2135 	case VK_FORMAT_R8G8B8_SINT:
2136 	case VK_FORMAT_R8G8B8A8_SINT:
2137 	case VK_FORMAT_R8_UINT:
2138 	case VK_FORMAT_R8G8_UINT:
2139 	case VK_FORMAT_R8G8B8_UINT:
2140 	case VK_FORMAT_R8G8B8A8_UINT:
2141 		return true;
2142 	}
2143 }
2144 
isFormat16BitTy(VkFormat format)2145 bool vkt::subgroups::isFormat16BitTy (VkFormat format)
2146 {
2147 	switch (format)
2148 	{
2149 	default:
2150 		return false;
2151 	case VK_FORMAT_R16_SFLOAT:
2152 	case VK_FORMAT_R16G16_SFLOAT:
2153 	case VK_FORMAT_R16G16B16_SFLOAT:
2154 	case VK_FORMAT_R16G16B16A16_SFLOAT:
2155 	case VK_FORMAT_R16_SINT:
2156 	case VK_FORMAT_R16G16_SINT:
2157 	case VK_FORMAT_R16G16B16_SINT:
2158 	case VK_FORMAT_R16G16B16A16_SINT:
2159 	case VK_FORMAT_R16_UINT:
2160 	case VK_FORMAT_R16G16_UINT:
2161 	case VK_FORMAT_R16G16B16_UINT:
2162 	case VK_FORMAT_R16G16B16A16_UINT:
2163 		return true;
2164 	}
2165 }
2166 
setVertexShaderFrameBuffer(SourceCollections & programCollection)2167 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
2168 {
2169 	/*
2170 		"layout(location = 0) in highp vec4 in_position;\n"
2171 		"void main (void)\n"
2172 		"{\n"
2173 		"  gl_Position = in_position;\n"
2174 		"  gl_PointSize = 1.0f;\n"
2175 		"}\n";
2176 	*/
2177 	programCollection.spirvAsmSources.add("vert") <<
2178 		"; SPIR-V\n"
2179 		"; Version: 1.3\n"
2180 		"; Generator: Khronos Glslang Reference Front End; 7\n"
2181 		"; Bound: 25\n"
2182 		"; Schema: 0\n"
2183 		"OpCapability Shader\n"
2184 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2185 		"OpMemoryModel Logical GLSL450\n"
2186 		"OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2187 		"OpMemberDecorate %11 0 BuiltIn Position\n"
2188 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
2189 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2190 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2191 		"OpDecorate %11 Block\n"
2192 		"OpDecorate %17 Location 0\n"
2193 		"%2 = OpTypeVoid\n"
2194 		"%3 = OpTypeFunction %2\n"
2195 		"%6 = OpTypeFloat 32\n"
2196 		"%7 = OpTypeVector %6 4\n"
2197 		"%8 = OpTypeInt 32 0\n"
2198 		"%9 = OpConstant %8 1\n"
2199 		"%10 = OpTypeArray %6 %9\n"
2200 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
2201 		"%12 = OpTypePointer Output %11\n"
2202 		"%13 = OpVariable %12 Output\n"
2203 		"%14 = OpTypeInt 32 1\n"
2204 		"%15 = OpConstant %14 0\n"
2205 		"%16 = OpTypePointer Input %7\n"
2206 		"%17 = OpVariable %16 Input\n"
2207 		"%19 = OpTypePointer Output %7\n"
2208 		"%21 = OpConstant %14 1\n"
2209 		"%22 = OpConstant %6 1\n"
2210 		"%23 = OpTypePointer Output %6\n"
2211 		"%4 = OpFunction %2 None %3\n"
2212 		"%5 = OpLabel\n"
2213 		"%18 = OpLoad %7 %17\n"
2214 		"%20 = OpAccessChain %19 %13 %15\n"
2215 		"OpStore %20 %18\n"
2216 		"%24 = OpAccessChain %23 %13 %21\n"
2217 		"OpStore %24 %22\n"
2218 		"OpReturn\n"
2219 		"OpFunctionEnd\n";
2220 }
2221 
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2222 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
2223 {
2224 	/*
2225 		"layout(location = 0) in float in_color;\n"
2226 		"layout(location = 0) out uint out_color;\n"
2227 		"void main()\n"
2228 		{\n"
2229 		"	out_color = uint(in_color);\n"
2230 		"}\n";
2231 	*/
2232 	programCollection.spirvAsmSources.add("fragment") <<
2233 		"; SPIR-V\n"
2234 		"; Version: 1.3\n"
2235 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2236 		"; Bound: 14\n"
2237 		"; Schema: 0\n"
2238 		"OpCapability Shader\n"
2239 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2240 		"OpMemoryModel Logical GLSL450\n"
2241 		"OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2242 		"OpExecutionMode %4 OriginUpperLeft\n"
2243 		"OpDecorate %8 Location 0\n"
2244 		"OpDecorate %11 Location 0\n"
2245 		"%2 = OpTypeVoid\n"
2246 		"%3 = OpTypeFunction %2\n"
2247 		"%6 = OpTypeInt 32 0\n"
2248 		"%7 = OpTypePointer Output %6\n"
2249 		"%8 = OpVariable %7 Output\n"
2250 		"%9 = OpTypeFloat 32\n"
2251 		"%10 = OpTypePointer Input %9\n"
2252 		"%11 = OpVariable %10 Input\n"
2253 		"%4 = OpFunction %2 None %3\n"
2254 		"%5 = OpLabel\n"
2255 		"%12 = OpLoad %9 %11\n"
2256 		"%13 = OpConvertFToU %6 %12\n"
2257 		"OpStore %8 %13\n"
2258 		"OpReturn\n"
2259 		"OpFunctionEnd\n";
2260 }
2261 
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2262 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
2263 {
2264 	/*
2265 		"#extension GL_KHR_shader_subgroup_basic: enable\n"
2266 		"#extension GL_EXT_tessellation_shader : require\n"
2267 		"layout(vertices = 2) out;\n"
2268 		"void main (void)\n"
2269 		"{\n"
2270 		"  if (gl_InvocationID == 0)\n"
2271 		"  {\n"
2272 		"    gl_TessLevelOuter[0] = 1.0f;\n"
2273 		"    gl_TessLevelOuter[1] = 1.0f;\n"
2274 		"  }\n"
2275 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2276 		"}\n";
2277 	*/
2278 	programCollection.spirvAsmSources.add("tesc") <<
2279 		"; SPIR-V\n"
2280 		"; Version: 1.3\n"
2281 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2282 		"; Bound: 46\n"
2283 		"; Schema: 0\n"
2284 		"OpCapability Tessellation\n"
2285 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2286 		"OpMemoryModel Logical GLSL450\n"
2287 		"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2288 		"OpExecutionMode %4 OutputVertices 2\n"
2289 		"OpDecorate %8 BuiltIn InvocationId\n"
2290 		"OpDecorate %20 Patch\n"
2291 		"OpDecorate %20 BuiltIn TessLevelOuter\n"
2292 		"OpMemberDecorate %29 0 BuiltIn Position\n"
2293 		"OpMemberDecorate %29 1 BuiltIn PointSize\n"
2294 		"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2295 		"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2296 		"OpDecorate %29 Block\n"
2297 		"OpMemberDecorate %35 0 BuiltIn Position\n"
2298 		"OpMemberDecorate %35 1 BuiltIn PointSize\n"
2299 		"OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2300 		"OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2301 		"OpDecorate %35 Block\n"
2302 		"%2 = OpTypeVoid\n"
2303 		"%3 = OpTypeFunction %2\n"
2304 		"%6 = OpTypeInt 32 1\n"
2305 		"%7 = OpTypePointer Input %6\n"
2306 		"%8 = OpVariable %7 Input\n"
2307 		"%10 = OpConstant %6 0\n"
2308 		"%11 = OpTypeBool\n"
2309 		"%15 = OpTypeFloat 32\n"
2310 		"%16 = OpTypeInt 32 0\n"
2311 		"%17 = OpConstant %16 4\n"
2312 		"%18 = OpTypeArray %15 %17\n"
2313 		"%19 = OpTypePointer Output %18\n"
2314 		"%20 = OpVariable %19 Output\n"
2315 		"%21 = OpConstant %15 1\n"
2316 		"%22 = OpTypePointer Output %15\n"
2317 		"%24 = OpConstant %6 1\n"
2318 		"%26 = OpTypeVector %15 4\n"
2319 		"%27 = OpConstant %16 1\n"
2320 		"%28 = OpTypeArray %15 %27\n"
2321 		"%29 = OpTypeStruct %26 %15 %28 %28\n"
2322 		"%30 = OpConstant %16 2\n"
2323 		"%31 = OpTypeArray %29 %30\n"
2324 		"%32 = OpTypePointer Output %31\n"
2325 		"%33 = OpVariable %32 Output\n"
2326 		"%35 = OpTypeStruct %26 %15 %28 %28\n"
2327 		"%36 = OpConstant %16 32\n"
2328 		"%37 = OpTypeArray %35 %36\n"
2329 		"%38 = OpTypePointer Input %37\n"
2330 		"%39 = OpVariable %38 Input\n"
2331 		"%41 = OpTypePointer Input %26\n"
2332 		"%44 = OpTypePointer Output %26\n"
2333 		"%4 = OpFunction %2 None %3\n"
2334 		"%5 = OpLabel\n"
2335 		"%9 = OpLoad %6 %8\n"
2336 		"%12 = OpIEqual %11 %9 %10\n"
2337 		"OpSelectionMerge %14 None\n"
2338 		"OpBranchConditional %12 %13 %14\n"
2339 		"%13 = OpLabel\n"
2340 		"%23 = OpAccessChain %22 %20 %10\n"
2341 		"OpStore %23 %21\n"
2342 		"%25 = OpAccessChain %22 %20 %24\n"
2343 		"OpStore %25 %21\n"
2344 		"OpBranch %14\n"
2345 		"%14 = OpLabel\n"
2346 		"%34 = OpLoad %6 %8\n"
2347 		"%40 = OpLoad %6 %8\n"
2348 		"%42 = OpAccessChain %41 %39 %40 %10\n"
2349 		"%43 = OpLoad %26 %42\n"
2350 		"%45 = OpAccessChain %44 %33 %34 %10\n"
2351 		"OpStore %45 %43\n"
2352 		"OpReturn\n"
2353 		"OpFunctionEnd\n";
2354 }
2355 
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2356 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
2357 {
2358 	/*
2359 		"#extension GL_KHR_shader_subgroup_ballot: enable\n"
2360 		"#extension GL_EXT_tessellation_shader : require\n"
2361 		"layout(isolines, equal_spacing, ccw ) in;\n"
2362 		"layout(location = 0) in float in_color[];\n"
2363 		"layout(location = 0) out float out_color;\n"
2364 		"\n"
2365 		"void main (void)\n"
2366 		"{\n"
2367 		"  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2368 		"  out_color = in_color[0];\n"
2369 		"}\n";
2370 	*/
2371 	programCollection.spirvAsmSources.add("tese") <<
2372 		"; SPIR-V\n"
2373 		"; Version: 1.3\n"
2374 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2375 		"; Bound: 45\n"
2376 		"; Schema: 0\n"
2377 		"OpCapability Tessellation\n"
2378 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2379 		"OpMemoryModel Logical GLSL450\n"
2380 		"OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2381 		"OpExecutionMode %4 Isolines\n"
2382 		"OpExecutionMode %4 SpacingEqual\n"
2383 		"OpExecutionMode %4 VertexOrderCcw\n"
2384 		"OpMemberDecorate %11 0 BuiltIn Position\n"
2385 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
2386 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2387 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2388 		"OpDecorate %11 Block\n"
2389 		"OpMemberDecorate %16 0 BuiltIn Position\n"
2390 		"OpMemberDecorate %16 1 BuiltIn PointSize\n"
2391 		"OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2392 		"OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2393 		"OpDecorate %16 Block\n"
2394 		"OpDecorate %29 BuiltIn TessCoord\n"
2395 		"OpDecorate %39 Location 0\n"
2396 		"OpDecorate %42 Location 0\n"
2397 		"%2 = OpTypeVoid\n"
2398 		"%3 = OpTypeFunction %2\n"
2399 		"%6 = OpTypeFloat 32\n"
2400 		"%7 = OpTypeVector %6 4\n"
2401 		"%8 = OpTypeInt 32 0\n"
2402 		"%9 = OpConstant %8 1\n"
2403 		"%10 = OpTypeArray %6 %9\n"
2404 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
2405 		"%12 = OpTypePointer Output %11\n"
2406 		"%13 = OpVariable %12 Output\n"
2407 		"%14 = OpTypeInt 32 1\n"
2408 		"%15 = OpConstant %14 0\n"
2409 		"%16 = OpTypeStruct %7 %6 %10 %10\n"
2410 		"%17 = OpConstant %8 32\n"
2411 		"%18 = OpTypeArray %16 %17\n"
2412 		"%19 = OpTypePointer Input %18\n"
2413 		"%20 = OpVariable %19 Input\n"
2414 		"%21 = OpTypePointer Input %7\n"
2415 		"%24 = OpConstant %14 1\n"
2416 		"%27 = OpTypeVector %6 3\n"
2417 		"%28 = OpTypePointer Input %27\n"
2418 		"%29 = OpVariable %28 Input\n"
2419 		"%30 = OpConstant %8 0\n"
2420 		"%31 = OpTypePointer Input %6\n"
2421 		"%36 = OpTypePointer Output %7\n"
2422 		"%38 = OpTypePointer Output %6\n"
2423 		"%39 = OpVariable %38 Output\n"
2424 		"%40 = OpTypeArray %6 %17\n"
2425 		"%41 = OpTypePointer Input %40\n"
2426 		"%42 = OpVariable %41 Input\n"
2427 		"%4 = OpFunction %2 None %3\n"
2428 		"%5 = OpLabel\n"
2429 		"%22 = OpAccessChain %21 %20 %15 %15\n"
2430 		"%23 = OpLoad %7 %22\n"
2431 		"%25 = OpAccessChain %21 %20 %24 %15\n"
2432 		"%26 = OpLoad %7 %25\n"
2433 		"%32 = OpAccessChain %31 %29 %30\n"
2434 		"%33 = OpLoad %6 %32\n"
2435 		"%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2436 		"%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2437 		"%37 = OpAccessChain %36 %13 %15\n"
2438 		"OpStore %37 %35\n"
2439 		"%43 = OpAccessChain %31 %42 %15\n"
2440 		"%44 = OpLoad %6 %43\n"
2441 		"OpStore %39 %44\n"
2442 		"OpReturn\n"
2443 		"OpFunctionEnd\n";
2444 }
2445 
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2446 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options,  vk::GlslSourceCollection& collection)
2447 {
2448 	tcu::StringTemplate geometryTemplate(glslTemplate);
2449 
2450 	map<string, string>		linesParams;
2451 	linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2452 
2453 	map<string, string>		pointsParams;
2454 	pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2455 
2456 	collection.add("geometry_lines")	<< glu::GeometrySource(geometryTemplate.specialize(linesParams))	<< options;
2457 	collection.add("geometry_points")	<< glu::GeometrySource(geometryTemplate.specialize(pointsParams))	<< options;
2458 }
2459 
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2460 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
2461 {
2462 	tcu::StringTemplate geometryTemplate(spirvTemplate);
2463 
2464 	map<string, string>		linesParams;
2465 	linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2466 
2467 	map<string, string>		pointsParams;
2468 	pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2469 
2470 	collection.add("geometry_lines")	<< geometryTemplate.specialize(linesParams)		<< options;
2471 	collection.add("geometry_points")	<< geometryTemplate.specialize(pointsParams)	<< options;
2472 }
2473 
initializeMemory(Context & context,const Allocation & alloc,const subgroups::SSBOData & data)2474 void initializeMemory (Context& context, const Allocation& alloc, const subgroups::SSBOData& data)
2475 {
2476 	const vk::VkFormat format = data.format;
2477 	const vk::VkDeviceSize size = data.numElements *
2478 		(data.isImage() ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2479 	if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2480 	{
2481 		de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2482 
2483 		switch (format)
2484 		{
2485 			default:
2486 				DE_FATAL("Illegal buffer format");
2487 				break;
2488 			case VK_FORMAT_R8_SINT:
2489 			case VK_FORMAT_R8G8_SINT:
2490 			case VK_FORMAT_R8G8B8_SINT:
2491 			case VK_FORMAT_R8G8B8A8_SINT:
2492 			case VK_FORMAT_R8_UINT:
2493 			case VK_FORMAT_R8G8_UINT:
2494 			case VK_FORMAT_R8G8B8_UINT:
2495 			case VK_FORMAT_R8G8B8A8_UINT:
2496 			{
2497 				deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2498 
2499 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2500 				{
2501 					ptr[k] = rnd.getUint8();
2502 				}
2503 			}
2504 			break;
2505 			case VK_FORMAT_R16_SINT:
2506 			case VK_FORMAT_R16G16_SINT:
2507 			case VK_FORMAT_R16G16B16_SINT:
2508 			case VK_FORMAT_R16G16B16A16_SINT:
2509 			case VK_FORMAT_R16_UINT:
2510 			case VK_FORMAT_R16G16_UINT:
2511 			case VK_FORMAT_R16G16B16_UINT:
2512 			case VK_FORMAT_R16G16B16A16_UINT:
2513 			{
2514 				deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2515 
2516 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2517 				{
2518 					ptr[k] = rnd.getUint16();
2519 				}
2520 			}
2521 			break;
2522 			case VK_FORMAT_R8_USCALED:
2523 			case VK_FORMAT_R8G8_USCALED:
2524 			case VK_FORMAT_R8G8B8_USCALED:
2525 			case VK_FORMAT_R8G8B8A8_USCALED:
2526 			{
2527 				deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2528 
2529 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2530 				{
2531 					deUint32 r = rnd.getUint32();
2532 					ptr[k] = (r & 1) ? r : 0;
2533 				}
2534 			}
2535 			break;
2536 			case VK_FORMAT_R32_SINT:
2537 			case VK_FORMAT_R32G32_SINT:
2538 			case VK_FORMAT_R32G32B32_SINT:
2539 			case VK_FORMAT_R32G32B32A32_SINT:
2540 			case VK_FORMAT_R32_UINT:
2541 			case VK_FORMAT_R32G32_UINT:
2542 			case VK_FORMAT_R32G32B32_UINT:
2543 			case VK_FORMAT_R32G32B32A32_UINT:
2544 			{
2545 				deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2546 
2547 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2548 				{
2549 					ptr[k] = rnd.getUint32();
2550 				}
2551 			}
2552 			break;
2553 			case VK_FORMAT_R64_SINT:
2554 			case VK_FORMAT_R64G64_SINT:
2555 			case VK_FORMAT_R64G64B64_SINT:
2556 			case VK_FORMAT_R64G64B64A64_SINT:
2557 			case VK_FORMAT_R64_UINT:
2558 			case VK_FORMAT_R64G64_UINT:
2559 			case VK_FORMAT_R64G64B64_UINT:
2560 			case VK_FORMAT_R64G64B64A64_UINT:
2561 			{
2562 				deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2563 
2564 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2565 				{
2566 					ptr[k] = rnd.getUint64();
2567 				}
2568 			}
2569 			break;
2570 			case VK_FORMAT_R16_SFLOAT:
2571 			case VK_FORMAT_R16G16_SFLOAT:
2572 			case VK_FORMAT_R16G16B16_SFLOAT:
2573 			case VK_FORMAT_R16G16B16A16_SFLOAT:
2574 			{
2575 				deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2576 
2577 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2578 				{
2579 					ptr[k] = deFloat32To16(rnd.getFloat());
2580 				}
2581 			}
2582 			break;
2583 			case VK_FORMAT_R32_SFLOAT:
2584 			case VK_FORMAT_R32G32_SFLOAT:
2585 			case VK_FORMAT_R32G32B32_SFLOAT:
2586 			case VK_FORMAT_R32G32B32A32_SFLOAT:
2587 			{
2588 				float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2589 
2590 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2591 				{
2592 					ptr[k] = rnd.getFloat();
2593 				}
2594 			}
2595 			break;
2596 			case VK_FORMAT_R64_SFLOAT:
2597 			case VK_FORMAT_R64G64_SFLOAT:
2598 			case VK_FORMAT_R64G64B64_SFLOAT:
2599 			case VK_FORMAT_R64G64B64A64_SFLOAT:
2600 			{
2601 				double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2602 
2603 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2604 				{
2605 					ptr[k] = rnd.getDouble();
2606 				}
2607 			}
2608 			break;
2609 		}
2610 	}
2611 	else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2612 	{
2613 		deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2614 
2615 		for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2616 		{
2617 			ptr[k] = 0;
2618 		}
2619 	}
2620 
2621 	if (subgroups::SSBOData::InitializeNone != data.initializeType)
2622 	{
2623 		flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2624 	}
2625 }
2626 
getResultBinding(const VkShaderStageFlagBits shaderStage)2627 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2628 {
2629 	switch(shaderStage)
2630 	{
2631 		case VK_SHADER_STAGE_VERTEX_BIT:
2632 			return 0u;
2633 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2634 			return 1u;
2635 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2636 			return 2u;
2637 		case VK_SHADER_STAGE_GEOMETRY_BIT:
2638 			return 3u;
2639 		default:
2640 			DE_ASSERT(0);
2641 			return -1;
2642 	}
2643 	DE_ASSERT(0);
2644 	return -1;
2645 }
2646 
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage)2647 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (Context&					context,
2648 																		   VkFormat					format,
2649 																		   const SSBOData*			extraData,
2650 																		   deUint32					extraDataCount,
2651 																		   const void*				internalData,
2652 																		   subgroups::CheckResult	checkResult,
2653 																		   const VkShaderStageFlags	shaderStage)
2654 {
2655 	return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2656 }
2657 
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage,const deUint32 tessShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2658 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize (Context&					context,
2659 																							   VkFormat					format,
2660 																							   const SSBOData*			extraData,
2661 																							   deUint32					extraDataCount,
2662 																							   const void*				internalData,
2663 																							   subgroups::CheckResult	checkResult,
2664 																							   const VkShaderStageFlags	shaderStage,
2665 																							   const deUint32			tessShaderStageCreateFlags,
2666 																							   const deUint32			requiredSubgroupSize)
2667 {
2668 	const DeviceInterface&					vk						= context.getDeviceInterface();
2669 	const VkDevice							device					= context.getDevice();
2670 	const deUint32							maxWidth				= getMaxWidth();
2671 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
2672 	DescriptorSetLayoutBuilder				layoutBuilder;
2673 	DescriptorPoolBuilder					poolBuilder;
2674 	DescriptorSetUpdateBuilder				updateBuilder;
2675 	Move <VkDescriptorPool>					descriptorPool;
2676 	Move <VkDescriptorSet>					descriptorSet;
2677 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2678 	const Unique<VkShaderModule>			teCtrlShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("tesc"), 0u));
2679 	const Unique<VkShaderModule>			teEvalShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("tese"), 0u));
2680 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2681 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
2682 	const VkVertexInputBindingDescription	vertexInputBinding		=
2683 	{
2684 		0u,											//  deUint32			binding;
2685 		static_cast<deUint32>(sizeof(tcu::Vec4)),	//  deUint32			stride;
2686 		VK_VERTEX_INPUT_RATE_VERTEX					//  VkVertexInputRate	inputRate;
2687 	};
2688 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
2689 	{
2690 		0u,									//  deUint32	location;
2691 		0u,									//  deUint32	binding;
2692 		VK_FORMAT_R32G32B32A32_SFLOAT,		//  VkFormat	format;
2693 		0u									//  deUint32	offset;
2694 	};
2695 
2696 	for (deUint32 i = 0u; i < extraDataCount; i++)
2697 	{
2698 		if (extraData[i].isImage())
2699 		{
2700 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2701 		}
2702 		else
2703 		{
2704 			DE_ASSERT(extraData[i].isUBO());
2705 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2706 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2707 		}
2708 		const Allocation& alloc = inputBuffers[i]->getAllocation();
2709 		initializeMemory(context, alloc, extraData[i]);
2710 	}
2711 
2712 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2713 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2714 
2715 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
2716 
2717 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
2718 
2719 	const deUint32 requiredSubgroupSizes[5] = {0u,
2720 											   ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2721 											   ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u),
2722 											   0u,
2723 											   0u};
2724 
2725 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
2726 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2727 																						  VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2728 																						  *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2729 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2730 																						  0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2731 																						  ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u),
2732 																						  0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2733 
2734 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2735 		poolBuilder.addType(inputBuffers[ndx]->getType());
2736 
2737 	if (extraDataCount > 0)
2738 	{
2739 		descriptorPool = poolBuilder.build(vk, device,
2740 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2741 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2742 	}
2743 
2744 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2745 	{
2746 		if (inputBuffers[buffersNdx]->isImage())
2747 		{
2748 			VkDescriptorImageInfo info =
2749 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2750 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2751 
2752 			updateBuilder.writeSingle(*descriptorSet,
2753 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2754 										inputBuffers[buffersNdx]->getType(), &info);
2755 		}
2756 		else
2757 		{
2758 			VkDescriptorBufferInfo info =
2759 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2760 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2761 
2762 			updateBuilder.writeSingle(*descriptorSet,
2763 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2764 										inputBuffers[buffersNdx]->getType(), &info);
2765 		}
2766 	}
2767 
2768 	updateBuilder.update(vk, device);
2769 
2770 	const VkQueue							queue					= context.getUniversalQueue();
2771 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
2772 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
2773 	const deUint32							subgroupSize			= getSubgroupSize(context);
2774 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
2775 	const vk::VkDeviceSize					vertexBufferSize		= 2ull * maxWidth * sizeof(tcu::Vec4);
2776 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2777 	unsigned								totalIterations			= 0u;
2778 	unsigned								failedIterations		= 0u;
2779 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2780 
2781 	{
2782 		const Allocation&		alloc				= vertexBuffer.getAllocation();
2783 		std::vector<tcu::Vec4>	data				(2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2784 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
2785 		float					leftHandPosition	= -1.0f;
2786 
2787 		for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2788 		{
2789 			data[ndx][0] = leftHandPosition;
2790 			leftHandPosition += pixelSize;
2791 			data[ndx+1][0] = leftHandPosition;
2792 		}
2793 
2794 		deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2795 		flushAlloc(vk, device, alloc);
2796 	}
2797 
2798 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2799 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
2800 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
2801 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2802 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2803 	const VkDeviceSize			vertexBufferOffset	= 0u;
2804 
2805 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2806 	{
2807 		totalIterations++;
2808 
2809 		beginCommandBuffer(vk, *cmdBuffer);
2810 		{
2811 
2812 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2813 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2814 
2815 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2816 
2817 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2818 
2819 			if (extraDataCount > 0)
2820 			{
2821 				vk.cmdBindDescriptorSets(*cmdBuffer,
2822 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2823 					&descriptorSet.get(), 0u, DE_NULL);
2824 			}
2825 
2826 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2827 			vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2828 
2829 			endRenderPass(vk, *cmdBuffer);
2830 
2831 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2832 			endCommandBuffer(vk, *cmdBuffer);
2833 
2834 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2835 		}
2836 		context.resetCommandPoolForVKSC(device, *cmdPool);
2837 
2838 		{
2839 			const Allocation& allocResult = imageBufferResult.getAllocation();
2840 			invalidateAlloc(vk, device, allocResult);
2841 
2842 			std::vector<const void*> datas;
2843 			datas.push_back(allocResult.getHostPtr());
2844 			if (!checkResult(internalData, datas, width/2u, subgroupSize))
2845 				failedIterations++;
2846 		}
2847 	}
2848 
2849 	if (0 < failedIterations)
2850 	{
2851 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2852 
2853 		context.getTestContext().getLog()
2854 				<< TestLog::Message << valuesPassed << " / "
2855 				<< totalIterations << " values passed" << TestLog::EndMessage;
2856 		return tcu::TestStatus::fail("Failed!");
2857 	}
2858 
2859 	return tcu::TestStatus::pass("OK");
2860 }
2861 
check(std::vector<const void * > datas,deUint32 width,deUint32 ref)2862 bool vkt::subgroups::check (std::vector<const void*> datas, deUint32 width, deUint32 ref)
2863 {
2864 	const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2865 
2866 	for (deUint32 n = 0; n < width; ++n)
2867 	{
2868 		if (data[n] != ref)
2869 		{
2870 			return false;
2871 		}
2872 	}
2873 
2874 	return true;
2875 }
2876 
checkComputeOrMesh(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 ref)2877 bool vkt::subgroups::checkComputeOrMesh (std::vector<const void*>	datas,
2878 										 const deUint32				numWorkgroups[3],
2879 										 const deUint32				localSize[3],
2880 										 deUint32					ref)
2881 {
2882 	const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2883 	const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2884 	const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2885 
2886 	return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2887 }
2888 
makeGeometryFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)2889 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest (Context&				context,
2890 															 VkFormat				format,
2891 															 const SSBOData*		extraData,
2892 															 deUint32				extraDataCount,
2893 															 const void*			internalData,
2894 															 subgroups::CheckResult	checkResult)
2895 {
2896 	return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
2897 }
2898 
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 geometryShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2899 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize (Context&					context,
2900 																				 VkFormat					format,
2901 																				 const SSBOData*			extraData,
2902 																				 deUint32					extraDataCount,
2903 																				 const void*				internalData,
2904 																				 subgroups::CheckResult		checkResult,
2905 																				 const deUint32				geometryShaderStageCreateFlags,
2906 																				 const deUint32				requiredSubgroupSize)
2907 {
2908 	const DeviceInterface&					vk						= context.getDeviceInterface();
2909 	const VkDevice							device					= context.getDevice();
2910 	const deUint32							maxWidth				= getMaxWidth();
2911 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
2912 	DescriptorSetLayoutBuilder				layoutBuilder;
2913 	DescriptorPoolBuilder					poolBuilder;
2914 	DescriptorSetUpdateBuilder				updateBuilder;
2915 	Move <VkDescriptorPool>					descriptorPool;
2916 	Move <VkDescriptorSet>					descriptorSet;
2917 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2918 	const Unique<VkShaderModule>			geometryShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2919 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2920 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
2921 	const VkVertexInputBindingDescription	vertexInputBinding		=
2922 	{
2923 		0u,											//  deUint32			binding;
2924 		static_cast<deUint32>(sizeof(tcu::Vec4)),	//  deUint32			stride;
2925 		VK_VERTEX_INPUT_RATE_VERTEX					//  VkVertexInputRate	inputRate;
2926 	};
2927 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
2928 	{
2929 		0u,									//  deUint32	location;
2930 		0u,									//  deUint32	binding;
2931 		VK_FORMAT_R32G32B32A32_SFLOAT,		//  VkFormat	format;
2932 		0u									//  deUint32	offset;
2933 	};
2934 
2935 	for (deUint32 i = 0u; i < extraDataCount; i++)
2936 	{
2937 		if (extraData[i].isImage())
2938 		{
2939 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2940 		}
2941 		else
2942 		{
2943 			DE_ASSERT(extraData[i].isUBO());
2944 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2945 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2946 		}
2947 		const Allocation& alloc = inputBuffers[i]->getAllocation();
2948 		initializeMemory(context, alloc, extraData[i]);
2949 	}
2950 
2951 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2952 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2953 
2954 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
2955 
2956 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
2957 
2958 	const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2959 
2960 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
2961 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2962 																						  *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2963 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2964 																						  0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2965 																						  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2966 
2967 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2968 		poolBuilder.addType(inputBuffers[ndx]->getType());
2969 
2970 	if (extraDataCount > 0)
2971 	{
2972 		descriptorPool = poolBuilder.build(vk, device,
2973 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2974 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2975 	}
2976 
2977 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2978 	{
2979 		if (inputBuffers[buffersNdx]->isImage())
2980 		{
2981 			VkDescriptorImageInfo info =
2982 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2983 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2984 
2985 			updateBuilder.writeSingle(*descriptorSet,
2986 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2987 										inputBuffers[buffersNdx]->getType(), &info);
2988 		}
2989 		else
2990 		{
2991 			VkDescriptorBufferInfo info =
2992 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2993 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2994 
2995 			updateBuilder.writeSingle(*descriptorSet,
2996 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2997 										inputBuffers[buffersNdx]->getType(), &info);
2998 		}
2999 	}
3000 
3001 	updateBuilder.update(vk, device);
3002 
3003 	const VkQueue							queue					= context.getUniversalQueue();
3004 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3005 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
3006 	const deUint32							subgroupSize			= getSubgroupSize(context);
3007 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
3008 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
3009 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3010 	unsigned								totalIterations			= 0u;
3011 	unsigned								failedIterations		= 0u;
3012 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3013 
3014 	{
3015 		const Allocation&		alloc				= vertexBuffer.getAllocation();
3016 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3017 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
3018 		float					leftHandPosition	= -1.0f;
3019 
3020 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3021 		{
3022 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3023 			leftHandPosition += pixelSize;
3024 		}
3025 
3026 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3027 		flushAlloc(vk, device, alloc);
3028 	}
3029 
3030 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3031 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
3032 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
3033 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3034 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3035 	const VkDeviceSize			vertexBufferOffset	= 0u;
3036 
3037 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3038 	{
3039 		totalIterations++;
3040 
3041 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3042 		{
3043 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3044 			initializeMemory(context, alloc, extraData[ndx]);
3045 		}
3046 
3047 		beginCommandBuffer(vk, *cmdBuffer);
3048 		{
3049 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3050 
3051 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3052 
3053 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3054 
3055 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3056 
3057 			if (extraDataCount > 0)
3058 			{
3059 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3060 					&descriptorSet.get(), 0u, DE_NULL);
3061 			}
3062 
3063 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3064 
3065 			vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3066 
3067 			endRenderPass(vk, *cmdBuffer);
3068 
3069 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3070 
3071 			endCommandBuffer(vk, *cmdBuffer);
3072 
3073 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3074 		}
3075 		context.resetCommandPoolForVKSC(device, *cmdPool);
3076 
3077 		{
3078 			const Allocation& allocResult = imageBufferResult.getAllocation();
3079 			invalidateAlloc(vk, device, allocResult);
3080 
3081 			std::vector<const void*> datas;
3082 			datas.push_back(allocResult.getHostPtr());
3083 			if (!checkResult(internalData, datas, width, subgroupSize))
3084 				failedIterations++;
3085 		}
3086 	}
3087 
3088 	if (0 < failedIterations)
3089 	{
3090 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3091 
3092 		context.getTestContext().getLog()
3093 				<< TestLog::Message << valuesPassed << " / "
3094 				<< totalIterations << " values passed" << TestLog::EndMessage;
3095 
3096 		return tcu::TestStatus::fail("Failed!");
3097 	}
3098 
3099 	return tcu::TestStatus::pass("OK");
3100 }
3101 
getPossibleGraphicsSubgroupStages(Context & context,const vk::VkShaderStageFlags testedStages)3102 vk::VkShaderStageFlags vkt::subgroups::getPossibleGraphicsSubgroupStages (Context& context, const vk::VkShaderStageFlags testedStages)
3103 {
3104 	const VkPhysicalDeviceSubgroupProperties&	subgroupProperties	= context.getSubgroupProperties();
3105 	VkShaderStageFlags							stages				= testedStages & subgroupProperties.supportedStages;
3106 
3107 	DE_ASSERT(isAllGraphicsStages(testedStages));
3108 
3109 	if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
3110 	{
3111 		if ((stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
3112 			TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
3113 		else
3114 			stages = VK_SHADER_STAGE_FRAGMENT_BIT;
3115 	}
3116 
3117 	if (static_cast<VkShaderStageFlags>(0u) == stages)
3118 		TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
3119 
3120 	return stages;
3121 }
3122 
allStages(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3123 tcu::TestStatus vkt::subgroups::allStages (Context&						context,
3124 										   vk::VkFormat					format,
3125 										   const SSBOData*				extraData,
3126 										   deUint32						extraDataCount,
3127 										   const void*					internalData,
3128 										   const VerificationFunctor&	checkResult,
3129 										   const vk::VkShaderStageFlags	shaderStage)
3130 {
3131 	return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage,
3132 														 0u, 0u, 0u, 0u, 0u, DE_NULL);
3133 }
3134 
allStagesRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStageTested,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])3135 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize (Context&						context,
3136 															   vk::VkFormat					format,
3137 															   const SSBOData*				extraDatas,
3138 															   deUint32						extraDatasCount,
3139 															   const void*					internalData,
3140 															   const VerificationFunctor&	checkResult,
3141 															   const vk::VkShaderStageFlags	shaderStageTested,
3142 															   const deUint32				vertexShaderStageCreateFlags,
3143 															   const deUint32				tessellationControlShaderStageCreateFlags,
3144 															   const deUint32				tessellationEvalShaderStageCreateFlags,
3145 															   const deUint32				geometryShaderStageCreateFlags,
3146 															   const deUint32				fragmentShaderStageCreateFlags,
3147 															   const deUint32				requiredSubgroupSize[5])
3148 {
3149 	const DeviceInterface&			vk					= context.getDeviceInterface();
3150 	const VkDevice					device				= context.getDevice();
3151 	const deUint32					maxWidth			= getMaxWidth();
3152 	vector<VkShaderStageFlagBits>	stagesVector;
3153 	VkShaderStageFlags				shaderStageRequired	= (VkShaderStageFlags)0ull;
3154 
3155 	Move<VkShaderModule>			vertexShaderModule;
3156 	Move<VkShaderModule>			teCtrlShaderModule;
3157 	Move<VkShaderModule>			teEvalShaderModule;
3158 	Move<VkShaderModule>			geometryShaderModule;
3159 	Move<VkShaderModule>			fragmentShaderModule;
3160 
3161 	if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3162 	{
3163 		stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3164 	}
3165 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3166 	{
3167 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3168 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3169 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3170 	}
3171 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3172 	{
3173 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3174 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3175 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3176 	}
3177 	if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3178 	{
3179 		stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3180 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3181 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3182 	}
3183 	if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3184 	{
3185 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3186 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3187 	}
3188 
3189 	const deUint32	stagesCount	= static_cast<deUint32>(stagesVector.size());
3190 	const string	vert		= (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT)					? "vert_noSubgroup"		: "vert";
3191 	const string	tesc		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)		? "tesc_noSubgroup"		: "tesc";
3192 	const string	tese		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)	? "tese_noSubgroup"		: "tese";
3193 
3194 	shaderStageRequired = shaderStageTested | shaderStageRequired;
3195 
3196 	vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3197 	if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3198 	{
3199 		teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3200 		teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3201 	}
3202 	if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3203 	{
3204 		if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3205 		{
3206 			// tessellation shaders output line primitives
3207 			geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3208 		}
3209 		else
3210 		{
3211 			// otherwise points are processed by geometry shader
3212 			geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3213 		}
3214 	}
3215 	if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3216 		fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3217 
3218 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
3219 
3220 	DescriptorSetLayoutBuilder layoutBuilder;
3221 
3222 	// The implicit result SSBO we use to store our outputs from the shader
3223 	for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3224 	{
3225 		const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3226 		const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3227 		inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
3228 
3229 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
3230 	}
3231 
3232 	for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3233 	{
3234 		const deUint32 datasNdx = ndx - stagesCount;
3235 		if (extraDatas[datasNdx].isImage())
3236 		{
3237 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3238 		}
3239 		else
3240 		{
3241 			const auto usage	= (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3242 			const auto size		= getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
3243 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
3244 		}
3245 
3246 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3247 		initializeMemory(context, alloc, extraDatas[datasNdx]);
3248 
3249 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
3250 								extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
3251 	}
3252 
3253 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3254 
3255 	const Unique<VkPipelineLayout> pipelineLayout(
3256 		makePipelineLayout(vk, device, *descriptorSetLayout));
3257 
3258 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3259 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3260 														   shaderStageRequired,
3261 														   *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
3262 														   *renderPass,
3263 														   (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3264 														   DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT,
3265 														   vertexShaderStageCreateFlags, tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3266 														   geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3267 
3268 	Move <VkDescriptorPool>	descriptorPool;
3269 	Move <VkDescriptorSet>	descriptorSet;
3270 
3271 	if (inputBuffers.size() > 0)
3272 	{
3273 		DescriptorPoolBuilder poolBuilder;
3274 
3275 		for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
3276 		{
3277 			poolBuilder.addType(inputBuffers[ndx]->getType());
3278 		}
3279 
3280 		descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3281 
3282 		// Create descriptor set
3283 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3284 
3285 		DescriptorSetUpdateBuilder updateBuilder;
3286 
3287 		for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3288 		{
3289 			deUint32 binding;
3290 			if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
3291 			else binding = extraDatas[ndx -stagesCount].binding;
3292 
3293 			if (inputBuffers[ndx]->isImage())
3294 			{
3295 				VkDescriptorImageInfo info =
3296 					makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3297 											inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3298 
3299 				updateBuilder.writeSingle(	*descriptorSet,
3300 											DescriptorSetUpdateBuilder::Location::binding(binding),
3301 											inputBuffers[ndx]->getType(), &info);
3302 			}
3303 			else
3304 			{
3305 				VkDescriptorBufferInfo info =
3306 					makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
3307 							0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3308 
3309 				updateBuilder.writeSingle(	*descriptorSet,
3310 													DescriptorSetUpdateBuilder::Location::binding(binding),
3311 													inputBuffers[ndx]->getType(), &info);
3312 			}
3313 		}
3314 
3315 		updateBuilder.update(vk, device);
3316 	}
3317 
3318 	{
3319 		const VkQueue					queue					= context.getUniversalQueue();
3320 		const deUint32					queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3321 		const Unique<VkCommandPool>		cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
3322 		const deUint32					subgroupSize			= getSubgroupSize(context);
3323 		const Unique<VkCommandBuffer>	cmdBuffer				(makeCommandBuffer(context, *cmdPool));
3324 		unsigned						totalIterations			= 0u;
3325 		unsigned						failedIterations		= 0u;
3326 		Image							resultImage				(context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3327 		const Unique<VkFramebuffer>		framebuffer				(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3328 		const VkViewport				viewport				= makeViewport(maxWidth, 1u);
3329 		const VkRect2D					scissor					= makeRect2D(maxWidth, 1u);
3330 		const vk::VkDeviceSize			imageResultSize			= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3331 		Buffer							imageBufferResult		(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3332 		const VkImageSubresourceRange	subresourceRange		=
3333 		{
3334 			VK_IMAGE_ASPECT_COLOR_BIT,											//VkImageAspectFlags	aspectMask
3335 			0u,																	//deUint32				baseMipLevel
3336 			1u,																	//deUint32				levelCount
3337 			0u,																	//deUint32				baseArrayLayer
3338 			1u																	//deUint32				layerCount
3339 		};
3340 
3341 		const VkImageMemoryBarrier		colorAttachmentBarrier	= makeImageMemoryBarrier(
3342 			(VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3343 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
3344 			resultImage.getImage(), subresourceRange);
3345 
3346 		for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3347 		{
3348 			for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3349 			{
3350 				// re-init the data
3351 				const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3352 				initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3353 			}
3354 
3355 			totalIterations++;
3356 
3357 			beginCommandBuffer(vk, *cmdBuffer);
3358 
3359 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
3360 
3361 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3362 
3363 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3364 
3365 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3366 
3367 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3368 
3369 			if (stagesCount + extraDatasCount > 0)
3370 				vk.cmdBindDescriptorSets(*cmdBuffer,
3371 						VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3372 						&descriptorSet.get(), 0u, DE_NULL);
3373 
3374 			vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3375 
3376 			endRenderPass(vk, *cmdBuffer);
3377 
3378 			copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3379 
3380 			endCommandBuffer(vk, *cmdBuffer);
3381 
3382 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3383 
3384 			for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3385 			{
3386 				std::vector<const void*> datas;
3387 				if (!inputBuffers[ndx]->isImage())
3388 				{
3389 					const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
3390 					invalidateAlloc(vk, device, resultAlloc);
3391 					// we always have our result data first
3392 					datas.push_back(resultAlloc.getHostPtr());
3393 				}
3394 
3395 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3396 				{
3397 					const deUint32 datasNdx = index - stagesCount;
3398 					if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3399 					{
3400 						const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
3401 						invalidateAlloc(vk, device, resultAlloc);
3402 						// we always have our result data first
3403 						datas.push_back(resultAlloc.getHostPtr());
3404 					}
3405 				}
3406 
3407 				// Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3408 				const bool		multiCall	= (	stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT						||
3409 												stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT		||
3410 												stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT	||
3411 												stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT					);
3412 				const deUint32	usedWidth	= ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3413 
3414 				if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3415 					failedIterations++;
3416 			}
3417 			if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3418 			{
3419 				std::vector<const void*> datas;
3420 				const Allocation& resultAlloc = imageBufferResult.getAllocation();
3421 				invalidateAlloc(vk, device, resultAlloc);
3422 
3423 				// we always have our result data first
3424 				datas.push_back(resultAlloc.getHostPtr());
3425 
3426 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3427 				{
3428 					const deUint32 datasNdx = index - stagesCount;
3429 					if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3430 					{
3431 						const Allocation& alloc = inputBuffers[index]->getAllocation();
3432 						invalidateAlloc(vk, device, alloc);
3433 						// we always have our result data first
3434 						datas.push_back(alloc.getHostPtr());
3435 					}
3436 				}
3437 
3438 				if (!checkResult(internalData, datas, width, subgroupSize, false))
3439 					failedIterations++;
3440 			}
3441 
3442 			context.resetCommandPoolForVKSC(device, *cmdPool);
3443 		}
3444 
3445 		if (0 < failedIterations)
3446 		{
3447 			unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3448 
3449 			context.getTestContext().getLog()
3450 				<< TestLog::Message << valuesPassed << " / "
3451 				<< totalIterations << " values passed" << TestLog::EndMessage;
3452 
3453 			return tcu::TestStatus::fail("Failed!");
3454 		}
3455 	}
3456 
3457 	return tcu::TestStatus::pass("OK");
3458 }
3459 
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)3460 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest (Context&					context,
3461 														   vk::VkFormat				format,
3462 														   const SSBOData*			extraData,
3463 														   deUint32					extraDataCount,
3464 														   const void*				internalData,
3465 														   subgroups::CheckResult	checkResult)
3466 {
3467 	return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
3468 }
3469 
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 vertexShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3470 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize (Context&					context,
3471 																			   vk::VkFormat				format,
3472 																			   const SSBOData*			extraData,
3473 																			   deUint32					extraDataCount,
3474 																			   const void*				internalData,
3475 																			   subgroups::CheckResult	checkResult,
3476 																			   const deUint32			vertexShaderStageCreateFlags,
3477 																			   const deUint32			requiredSubgroupSize)
3478 {
3479 	const DeviceInterface&					vk						= context.getDeviceInterface();
3480 	const VkDevice							device					= context.getDevice();
3481 	const VkQueue							queue					= context.getUniversalQueue();
3482 	const deUint32							maxWidth				= getMaxWidth();
3483 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3484 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
3485 	DescriptorSetLayoutBuilder				layoutBuilder;
3486 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3487 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3488 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
3489 	const VkVertexInputBindingDescription	vertexInputBinding		=
3490 	{
3491 		0u,											// binding;
3492 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
3493 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
3494 	};
3495 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
3496 	{
3497 		0u,
3498 		0u,
3499 		VK_FORMAT_R32G32B32A32_SFLOAT,
3500 		0u
3501 	};
3502 
3503 	for (deUint32 i = 0u; i < extraDataCount; i++)
3504 	{
3505 		if (extraData[i].isImage())
3506 		{
3507 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
3508 		}
3509 		else
3510 		{
3511 			DE_ASSERT(extraData[i].isUBO());
3512 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3513 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3514 		}
3515 		const Allocation& alloc = inputBuffers[i]->getAllocation();
3516 		initializeMemory(context, alloc, extraData[i]);
3517 	}
3518 
3519 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3520 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3521 
3522 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
3523 
3524 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
3525 
3526 	const deUint32 requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3527 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
3528 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3529 																						  *vertexShaderModule, *fragmentShaderModule,
3530 																						  DE_NULL, DE_NULL, DE_NULL,
3531 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3532 																						  &vertexInputBinding, &vertexInputAttribute, true, format,
3533 																						  vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3534 																						  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3535 	DescriptorPoolBuilder					poolBuilder;
3536 	DescriptorSetUpdateBuilder				updateBuilder;
3537 
3538 
3539 	for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3540 		poolBuilder.addType(inputBuffers[ndx]->getType());
3541 
3542 	Move <VkDescriptorPool>					descriptorPool;
3543 	Move <VkDescriptorSet>					descriptorSet;
3544 
3545 	if (extraDataCount > 0)
3546 	{
3547 		descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3548 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3549 	}
3550 
3551 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3552 	{
3553 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3554 		initializeMemory(context, alloc, extraData[ndx]);
3555 	}
3556 
3557 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3558 	{
3559 		if (inputBuffers[buffersNdx]->isImage())
3560 		{
3561 			VkDescriptorImageInfo info =
3562 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3563 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3564 
3565 			updateBuilder.writeSingle(*descriptorSet,
3566 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3567 										inputBuffers[buffersNdx]->getType(), &info);
3568 		}
3569 		else
3570 		{
3571 			VkDescriptorBufferInfo info =
3572 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3573 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3574 
3575 			updateBuilder.writeSingle(*descriptorSet,
3576 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3577 										inputBuffers[buffersNdx]->getType(), &info);
3578 		}
3579 	}
3580 	updateBuilder.update(vk, device);
3581 
3582 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
3583 
3584 	const deUint32							subgroupSize			= getSubgroupSize(context);
3585 
3586 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
3587 
3588 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
3589 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3590 
3591 	unsigned								totalIterations			= 0u;
3592 	unsigned								failedIterations		= 0u;
3593 
3594 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3595 
3596 	{
3597 		const Allocation&		alloc				= vertexBuffer.getAllocation();
3598 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3599 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
3600 		float					leftHandPosition	= -1.0f;
3601 
3602 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3603 		{
3604 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3605 			leftHandPosition += pixelSize;
3606 		}
3607 
3608 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3609 		flushAlloc(vk, device, alloc);
3610 	}
3611 
3612 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3613 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
3614 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
3615 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3616 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3617 	const VkDeviceSize			vertexBufferOffset	= 0u;
3618 
3619 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3620 	{
3621 		totalIterations++;
3622 
3623 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3624 		{
3625 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3626 			initializeMemory(context, alloc, extraData[ndx]);
3627 		}
3628 
3629 		beginCommandBuffer(vk, *cmdBuffer);
3630 		{
3631 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3632 
3633 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3634 
3635 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3636 
3637 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3638 
3639 			if (extraDataCount > 0)
3640 			{
3641 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3642 					&descriptorSet.get(), 0u, DE_NULL);
3643 			}
3644 
3645 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3646 
3647 			vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3648 
3649 			endRenderPass(vk, *cmdBuffer);
3650 
3651 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3652 
3653 			endCommandBuffer(vk, *cmdBuffer);
3654 
3655 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3656 		}
3657 		context.resetCommandPoolForVKSC(device, *cmdPool);
3658 
3659 		{
3660 			const Allocation& allocResult = imageBufferResult.getAllocation();
3661 			invalidateAlloc(vk, device, allocResult);
3662 
3663 			std::vector<const void*> datas;
3664 			datas.push_back(allocResult.getHostPtr());
3665 			if (!checkResult(internalData, datas, width, subgroupSize))
3666 				failedIterations++;
3667 		}
3668 	}
3669 
3670 	if (0 < failedIterations)
3671 	{
3672 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3673 
3674 		context.getTestContext().getLog()
3675 			<< TestLog::Message << valuesPassed << " / "
3676 			<< totalIterations << " values passed" << TestLog::EndMessage;
3677 
3678 		return tcu::TestStatus::fail("Failed!");
3679 	}
3680 
3681 	return tcu::TestStatus::pass("OK");
3682 }
3683 
makeFragmentFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult)3684 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest (Context&				context,
3685 															 VkFormat				format,
3686 															 const SSBOData*		extraDatas,
3687 															 deUint32				extraDatasCount,
3688 															 const void*			internalData,
3689 															 CheckResultFragment	checkResult)
3690 {
3691 	return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData, checkResult, 0u, 0u);
3692 }
3693 
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3694 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize (Context&				context,
3695 																				 VkFormat				format,
3696 																				 const SSBOData*		extraDatas,
3697 																				 deUint32				extraDatasCount,
3698 																				 const void*			internalData,
3699 																				 CheckResultFragment	checkResult,
3700 																				 const deUint32			fragmentShaderStageCreateFlags,
3701 																				 const deUint32			requiredSubgroupSize)
3702 {
3703 	const DeviceInterface&						vk						= context.getDeviceInterface();
3704 	const VkDevice								device					= context.getDevice();
3705 	const VkQueue								queue					= context.getUniversalQueue();
3706 	const deUint32								queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3707 	const Unique<VkShaderModule>				vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3708 	const Unique<VkShaderModule>				fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3709 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers			(extraDatasCount);
3710 
3711 	for (deUint32 i = 0; i < extraDatasCount; i++)
3712 	{
3713 		if (extraDatas[i].isImage())
3714 		{
3715 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3716 		}
3717 		else
3718 		{
3719 			DE_ASSERT(extraDatas[i].isUBO());
3720 
3721 			const vk::VkDeviceSize	size	= getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3722 
3723 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3724 		}
3725 
3726 		const Allocation& alloc = inputBuffers[i]->getAllocation();
3727 
3728 		initializeMemory(context, alloc, extraDatas[i]);
3729 	}
3730 
3731 	DescriptorSetLayoutBuilder layoutBuilder;
3732 
3733 	for (deUint32 i = 0; i < extraDatasCount; i++)
3734 	{
3735 		layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3736 	}
3737 
3738 	const Unique<VkDescriptorSetLayout>	descriptorSetLayout(layoutBuilder.build(vk, device));
3739 	const Unique<VkPipelineLayout>		pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3740 	const Unique<VkRenderPass>			renderPass(makeRenderPass(context, format));
3741 	const deUint32						requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3742 	const Unique<VkPipeline>			pipeline(makeGraphicsPipeline(context,
3743 																	  *pipelineLayout,
3744 																	  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3745 																	  *vertexShaderModule,
3746 																	  *fragmentShaderModule,
3747 																	  DE_NULL,
3748 																	  DE_NULL,
3749 																	  DE_NULL,
3750 																	  *renderPass,
3751 																	  VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3752 																	  DE_NULL,
3753 																	  DE_NULL,
3754 																	  true,
3755 																	  VK_FORMAT_R32G32B32A32_SFLOAT,
3756 																	  0u,
3757 																	  0u,
3758 																	  0u,
3759 																	  0u,
3760 																	  fragmentShaderStageCreateFlags,
3761 																	  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3762 	DescriptorPoolBuilder				poolBuilder;
3763 
3764 	// To stop validation complaining, always add at least one type to pool.
3765 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3766 	for (deUint32 i = 0; i < extraDatasCount; i++)
3767 	{
3768 		poolBuilder.addType(inputBuffers[i]->getType());
3769 	}
3770 
3771 	Move<VkDescriptorPool> descriptorPool;
3772 	// Create descriptor set
3773 	Move<VkDescriptorSet> descriptorSet;
3774 
3775 	if (extraDatasCount > 0)
3776 	{
3777 		descriptorPool	= poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3778 
3779 		descriptorSet	= makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3780 	}
3781 
3782 	DescriptorSetUpdateBuilder updateBuilder;
3783 
3784 	for (deUint32 i = 0; i < extraDatasCount; i++)
3785 	{
3786 		if (inputBuffers[i]->isImage())
3787 		{
3788 			const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3789 
3790 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3791 		}
3792 		else
3793 		{
3794 			const VkDescriptorBufferInfo	info	= makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3795 
3796 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3797 		}
3798 	}
3799 
3800 	if (extraDatasCount > 0)
3801 		updateBuilder.update(vk, device);
3802 
3803 	const Unique<VkCommandPool>		cmdPool				(makeCommandPool(vk, device, queueFamilyIndex));
3804 	const deUint32					subgroupSize		= getSubgroupSize(context);
3805 	const Unique<VkCommandBuffer>	cmdBuffer			(makeCommandBuffer(context, *cmdPool));
3806 	unsigned						totalIterations		= 0;
3807 	unsigned						failedIterations	= 0;
3808 
3809 	for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3810 	{
3811 		for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3812 		{
3813 			totalIterations++;
3814 
3815 			// re-init the data
3816 			for (deUint32 i = 0; i < extraDatasCount; i++)
3817 			{
3818 				const Allocation& alloc = inputBuffers[i]->getAllocation();
3819 
3820 				initializeMemory(context, alloc, extraDatas[i]);
3821 			}
3822 
3823 			const VkDeviceSize			formatSize				= getFormatSizeInBytes(format);
3824 			const VkDeviceSize			resultImageSizeInBytes	= width * height * formatSize;
3825 			Image						resultImage				(context, width, height, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3826 			Buffer						resultBuffer			(context, resultImageSizeInBytes, VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3827 			const Unique<VkFramebuffer>	framebuffer				(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3828 			VkViewport					viewport				= makeViewport(width, height);
3829 			VkRect2D					scissor					= {{0, 0}, {width, height}};
3830 
3831 			beginCommandBuffer(vk, *cmdBuffer);
3832 
3833 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3834 
3835 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3836 
3837 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3838 
3839 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3840 
3841 			if (extraDatasCount > 0)
3842 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
3843 
3844 			vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3845 
3846 			endRenderPass(vk, *cmdBuffer);
3847 
3848 			copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3849 
3850 			endCommandBuffer(vk, *cmdBuffer);
3851 
3852 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3853 
3854 			std::vector<const void*> datas;
3855 			{
3856 				const Allocation& resultAlloc = resultBuffer.getAllocation();
3857 				invalidateAlloc(vk, device, resultAlloc);
3858 
3859 				// we always have our result data first
3860 				datas.push_back(resultAlloc.getHostPtr());
3861 			}
3862 
3863 			if (!checkResult(internalData, datas, width, height, subgroupSize))
3864 			{
3865 				failedIterations++;
3866 			}
3867 
3868 			context.resetCommandPoolForVKSC(device, *cmdPool);
3869 		}
3870 	}
3871 
3872 	if (0 < failedIterations)
3873 	{
3874 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3875 
3876 		context.getTestContext().getLog()
3877 			<< TestLog::Message << valuesPassed << " / "
3878 			<< totalIterations << " values passed" << TestLog::EndMessage;
3879 
3880 		return tcu::TestStatus::fail("Failed!");
3881 	}
3882 
3883 	return tcu::TestStatus::pass("OK");
3884 }
3885 
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize)3886 Move<VkPipeline> makeComputePipeline (Context&					context,
3887 									  const VkPipelineLayout	pipelineLayout,
3888 									  const VkShaderModule		shaderModule,
3889 									  const deUint32			pipelineShaderStageFlags,
3890 									  const deUint32			pipelineCreateFlags,
3891 									  VkPipeline				basePipelineHandle,
3892 									  deUint32					localSizeX,
3893 									  deUint32					localSizeY,
3894 									  deUint32					localSizeZ,
3895 									  deUint32					requiredSubgroupSize)
3896 {
3897 	const deUint32														localSize[3]				= {localSizeX, localSizeY, localSizeZ};
3898 	const vk::VkSpecializationMapEntry									entries[3]					=
3899 	{
3900 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
3901 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
3902 		{2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3903 	};
3904 	const vk::VkSpecializationInfo										info						=
3905 	{
3906 		/* mapEntryCount = */ 3,
3907 		/* pMapEntries   = */ entries,
3908 		/* dataSize      = */ sizeof(localSize),
3909 		/* pData         = */ localSize
3910 	};
3911 	const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT	subgroupSizeCreateInfo		=
3912 	{
3913 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,	// VkStructureType    sType;
3914 		DE_NULL,																		// void*              pNext;
3915 		requiredSubgroupSize															// uint32_t           requiredSubgroupSize;
3916 	};
3917 	const vk::VkPipelineShaderStageCreateInfo							pipelineShaderStageParams	=
3918 	{
3919 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,				// VkStructureType					sType;
3920 		(requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL),	// const void*						pNext;
3921 		pipelineShaderStageFlags,											// VkPipelineShaderStageCreateFlags	flags;
3922 		VK_SHADER_STAGE_COMPUTE_BIT,										// VkShaderStageFlagBits			stage;
3923 		shaderModule,														// VkShaderModule					module;
3924 		"main",																// const char*						pName;
3925 		&info,																// const VkSpecializationInfo*		pSpecializationInfo;
3926 	};
3927 	const vk::VkComputePipelineCreateInfo								pipelineCreateInfo			=
3928 	{
3929 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType	sType;
3930 		DE_NULL,										// const void*						pNext;
3931 		pipelineCreateFlags,							// VkPipelineCreateFlags			flags;
3932 		pipelineShaderStageParams,						// VkPipelineShaderStageCreateInfo	stage;
3933 		pipelineLayout,									// VkPipelineLayout					layout;
3934 #ifndef CTS_USES_VULKANSC
3935 		basePipelineHandle,								// VkPipeline						basePipelineHandle;
3936 		-1,												// deInt32							basePipelineIndex;
3937 #else
3938 		DE_NULL,										// VkPipeline						basePipelineHandle;
3939 		0,												// deInt32							basePipelineIndex;
3940 #endif // CTS_USES_VULKANSC
3941 	};
3942 	static_cast<void>(basePipelineHandle);
3943 
3944 	return createComputePipeline(context.getDeviceInterface(), context.getDevice(), DE_NULL, &pipelineCreateInfo);
3945 }
3946 
3947 #ifndef CTS_USES_VULKANSC
makeMeshPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule taskModule,const VkShaderModule meshModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize,const VkRenderPass renderPass)3948 Move<VkPipeline> makeMeshPipeline (Context&					context,
3949 								   const VkPipelineLayout	pipelineLayout,
3950 								   const VkShaderModule		taskModule,
3951 								   const VkShaderModule		meshModule,
3952 								   const deUint32			pipelineShaderStageFlags,
3953 								   const deUint32			pipelineCreateFlags,
3954 								   VkPipeline				basePipelineHandle,
3955 								   deUint32					localSizeX,
3956 								   deUint32					localSizeY,
3957 								   deUint32					localSizeZ,
3958 								   deUint32					requiredSubgroupSize,
3959 								   const VkRenderPass		renderPass)
3960 {
3961 	const deUint32														localSize[3]				= {localSizeX, localSizeY, localSizeZ};
3962 	const vk::VkSpecializationMapEntry									entries[3]					=
3963 	{
3964 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
3965 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
3966 		{2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3967 	};
3968 	const vk::VkSpecializationInfo										info						=
3969 	{
3970 		/* mapEntryCount = */ 3,
3971 		/* pMapEntries   = */ entries,
3972 		/* dataSize      = */ sizeof(localSize),
3973 		/* pData         = */ localSize
3974 	};
3975 	const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT	subgroupSizeCreateInfo		=
3976 	{
3977 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,	// VkStructureType    sType;
3978 		DE_NULL,																		// void*              pNext;
3979 		requiredSubgroupSize															// uint32_t           requiredSubgroupSize;
3980 	};
3981 
3982 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*		pSubgroupSizeCreateInfo		= ((requiredSubgroupSize != 0u) ? &subgroupSizeCreateInfo : nullptr);
3983 
3984 	std::vector<VkPipelineShaderStageCreateInfo>						shaderStageParams;
3985 	vk::VkPipelineShaderStageCreateInfo									pipelineShaderStageParams	=
3986 	{
3987 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType					sType;
3988 		nullptr,												// const void*						pNext;
3989 		pipelineShaderStageFlags,								// VkPipelineShaderStageCreateFlags	flags;
3990 		VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM,						// VkShaderStageFlagBits			stage;
3991 		DE_NULL,												// VkShaderModule					module;
3992 		"main",													// const char*						pName;
3993 		&info,													// const VkSpecializationInfo*		pSpecializationInfo;
3994 	};
3995 
3996 	if (taskModule != DE_NULL)
3997 	{
3998 		pipelineShaderStageParams.module	= taskModule;
3999 		pipelineShaderStageParams.pNext		= pSubgroupSizeCreateInfo;
4000 		pipelineShaderStageParams.stage		= VK_SHADER_STAGE_TASK_BIT_EXT;
4001 		shaderStageParams.push_back(pipelineShaderStageParams);
4002 	}
4003 
4004 	if (meshModule != DE_NULL)
4005 	{
4006 		pipelineShaderStageParams.module	= meshModule;
4007 		pipelineShaderStageParams.pNext		= ((taskModule == DE_NULL) ? pSubgroupSizeCreateInfo : nullptr);
4008 		pipelineShaderStageParams.stage		= VK_SHADER_STAGE_MESH_BIT_EXT;
4009 		shaderStageParams.push_back(pipelineShaderStageParams);
4010 	}
4011 
4012 	const std::vector<VkViewport>	viewports	(1u, makeViewport(1u, 1u));
4013 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(1u, 1u));
4014 
4015 	return makeGraphicsPipeline(context.getDeviceInterface(), context.getDevice(), basePipelineHandle, pipelineLayout, pipelineCreateFlags, shaderStageParams, renderPass, viewports, scissors);
4016 }
4017 #endif // CTS_USES_VULKANSC
4018 
makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,deUint32 inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4019 tcu::TestStatus makeComputeOrMeshTestRequiredSubgroupSize (ComputeLike							testType,
4020 														   Context&								context,
4021 														   VkFormat								format,
4022 														   const vkt::subgroups::SSBOData*		inputs,
4023 														   deUint32								inputsCount,
4024 														   const void*							internalData,
4025 														   vkt::subgroups::CheckResultCompute	checkResult,
4026 														   const deUint32						pipelineShaderStageCreateFlags,
4027 														   const deUint32						numWorkgroups[3],
4028 														   const deBool							isRequiredSubgroupSize,
4029 														   const deUint32						subgroupSize,
4030 														   const deUint32						localSizesToTest[][3],
4031 														   const deUint32						localSizesToTestCount)
4032 {
4033 	const DeviceInterface&									vk								= context.getDeviceInterface();
4034 	const VkDevice											device							= context.getDevice();
4035 	const VkQueue											queue							= context.getUniversalQueue();
4036 	const deUint32											queueFamilyIndex				= context.getUniversalQueueFamilyIndex();
4037 #ifndef CTS_USES_VULKANSC
4038 	const VkPhysicalDeviceSubgroupSizeControlProperties&	subgroupSizeControlProperties	= context.getSubgroupSizeControlProperties();
4039 #else
4040 	const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties	= context.getSubgroupSizeControlPropertiesEXT();
4041 #endif // CTS_USES_VULKANSC
4042 	const VkDeviceSize										elementSize						= getFormatSizeInBytes(format);
4043 	const VkDeviceSize										maxSubgroupSize					= isRequiredSubgroupSize
4044 																							? deMax32(subgroupSizeControlProperties.maxSubgroupSize, vkt::subgroups::maxSupportedSubgroupSize())
4045 																							: vkt::subgroups::maxSupportedSubgroupSize();
4046 	const VkDeviceSize										resultBufferSize				= maxSubgroupSize * maxSubgroupSize * maxSubgroupSize;
4047 	const VkDeviceSize										resultBufferSizeInBytes			= resultBufferSize * elementSize;
4048 	Buffer													resultBuffer					(context, resultBufferSizeInBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4049 	std::vector< de::SharedPtr<BufferOrImage> >				inputBuffers					(inputsCount);
4050 	const auto												shaderStageFlags				= ((testType == ComputeLike::COMPUTE)
4051 																								? VK_SHADER_STAGE_COMPUTE_BIT
4052 #ifndef CTS_USES_VULKANSC
4053 																								: (VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT));
4054 #else
4055 																								: 0);
4056 #endif // CTS_USES_VULKANSC
4057 	const auto												pipelineBindPoint				= ((testType == ComputeLike::COMPUTE)
4058 																								? VK_PIPELINE_BIND_POINT_COMPUTE
4059 																								: VK_PIPELINE_BIND_POINT_GRAPHICS);
4060 	const auto												pipelineStage					= ((testType == ComputeLike::COMPUTE)
4061 																								? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
4062 #ifndef CTS_USES_VULKANSC
4063 																								: (VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT));
4064 #else
4065 																								: 0);
4066 #endif // CTS_USES_VULKANSC
4067 	const auto												renderArea						= makeRect2D(1u, 1u);
4068 
4069 	std::vector<tcu::UVec3>									usedLocalSizes;
4070 	for (deUint32 i = 0; i < localSizesToTestCount; ++i)
4071 	{
4072 		usedLocalSizes.push_back(tcu::UVec3(localSizesToTest[i][0], localSizesToTest[i][1], localSizesToTest[i][2]));
4073 	}
4074 
4075 	for (deUint32 i = 0; i < inputsCount; i++)
4076 	{
4077 		if (inputs[i].isImage())
4078 		{
4079 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
4080 		}
4081 		else
4082 		{
4083 			const auto usage	= (inputs[i].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4084 			const auto size		= getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4085 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4086 		}
4087 
4088 		const Allocation& alloc = inputBuffers[i]->getAllocation();
4089 
4090 		initializeMemory(context, alloc, inputs[i]);
4091 	}
4092 
4093 	DescriptorSetLayoutBuilder layoutBuilder;
4094 	layoutBuilder.addBinding(
4095 		resultBuffer.getType(), 1, shaderStageFlags, DE_NULL);
4096 
4097 	for (deUint32 i = 0; i < inputsCount; i++)
4098 	{
4099 		layoutBuilder.addBinding(
4100 			inputBuffers[i]->getType(), 1, shaderStageFlags, DE_NULL);
4101 	}
4102 
4103 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
4104 		layoutBuilder.build(vk, device));
4105 
4106 	Move<VkShaderModule>	compShader;
4107 	Move<VkShaderModule>	meshShader;
4108 	Move<VkShaderModule>	taskShader;
4109 	const auto&				binaries	= context.getBinaryCollection();
4110 
4111 	if (testType == ComputeLike::COMPUTE)
4112 	{
4113 		compShader = createShaderModule(vk, device, binaries.get("comp"));
4114 	}
4115 	else if (testType == ComputeLike::MESH)
4116 	{
4117 		meshShader = createShaderModule(vk, device, binaries.get("mesh"));
4118 		if (binaries.contains("task"))
4119 			taskShader = createShaderModule(vk, device, binaries.get("task"));
4120 	}
4121 	else
4122 	{
4123 		DE_ASSERT(false);
4124 	}
4125 
4126 	const Unique<VkPipelineLayout> pipelineLayout(
4127 		makePipelineLayout(vk, device, *descriptorSetLayout));
4128 
4129 	DescriptorPoolBuilder poolBuilder;
4130 
4131 	poolBuilder.addType(resultBuffer.getType());
4132 
4133 	for (deUint32 i = 0; i < inputsCount; i++)
4134 	{
4135 		poolBuilder.addType(inputBuffers[i]->getType());
4136 	}
4137 
4138 	const Unique<VkDescriptorPool>	descriptorPool			(poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
4139 	const Unique<VkDescriptorSet>	descriptorSet			(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
4140 	const VkDescriptorBufferInfo	resultDescriptorInfo =	makeDescriptorBufferInfo(resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
4141 	DescriptorSetUpdateBuilder		updateBuilder;
4142 
4143 	updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
4144 
4145 	for (deUint32 i = 0; i < inputsCount; i++)
4146 	{
4147 		if (inputBuffers[i]->isImage())
4148 		{
4149 			const VkDescriptorImageInfo		info	= makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4150 
4151 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
4152 		}
4153 		else
4154 		{
4155 			vk::VkDeviceSize		size	= getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4156 			VkDescriptorBufferInfo	info	= makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
4157 
4158 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
4159 		}
4160 	}
4161 
4162 	updateBuilder.update(vk, device);
4163 
4164 	const Unique<VkCommandPool>						cmdPool				(makeCommandPool(vk, device, queueFamilyIndex));
4165 	unsigned										totalIterations		= 0;
4166 	unsigned										failedIterations	= 0;
4167 	const Unique<VkCommandBuffer>					cmdBuffer			(makeCommandBuffer(context, *cmdPool));
4168 	std::vector<de::SharedPtr<Move<VkPipeline>>>	pipelines			(localSizesToTestCount);
4169 	const auto										reqSubgroupSize		= (isRequiredSubgroupSize ? subgroupSize : 0u);
4170 	Move<VkRenderPass>								renderPass;
4171 	Move<VkFramebuffer>								framebuffer;
4172 
4173 	if (testType == ComputeLike::MESH)
4174 	{
4175 		renderPass	= makeRenderPass(vk, device);
4176 		framebuffer	= makeFramebuffer(vk, device, renderPass.get(), 0u, nullptr, renderArea.extent.width, renderArea.extent.height);
4177 	}
4178 
4179 	context.getTestContext().touchWatchdog();
4180 	{
4181 		if (testType == ComputeLike::COMPUTE)
4182 		{
4183 			pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
4184 																									*pipelineLayout,
4185 																									*compShader,
4186 																									pipelineShaderStageCreateFlags,
4187 #ifndef CTS_USES_VULKANSC
4188 																									VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4189 #else
4190 																									0u,
4191 #endif // CTS_USES_VULKANSC
4192 																									(VkPipeline) DE_NULL,
4193 																									usedLocalSizes[0][0],
4194 																									usedLocalSizes[0][1],
4195 																									usedLocalSizes[0][2],
4196 																									reqSubgroupSize)));
4197 		}
4198 #ifndef CTS_USES_VULKANSC
4199 		else if (testType == ComputeLike::MESH)
4200 		{
4201 			pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(context,
4202 																								 pipelineLayout.get(),
4203 																								 taskShader.get(),
4204 																								 meshShader.get(),
4205 																								 pipelineShaderStageCreateFlags,
4206 																								 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4207 																								 DE_NULL,
4208 																								 usedLocalSizes[0][0],
4209 																								 usedLocalSizes[0][1],
4210 																								 usedLocalSizes[0][2],
4211 																								 reqSubgroupSize,
4212 																								 renderPass.get())));
4213 		}
4214 #endif // CTS_USES_VULKANSC
4215 		else
4216 		{
4217 			DE_ASSERT(false);
4218 		}
4219 	}
4220 	context.getTestContext().touchWatchdog();
4221 
4222 	for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
4223 	{
4224 		const deUint32 nextX = usedLocalSizes[index][0];
4225 		const deUint32 nextY = usedLocalSizes[index][1];
4226 		const deUint32 nextZ = usedLocalSizes[index][2];
4227 
4228 		context.getTestContext().touchWatchdog();
4229 		{
4230 			if (testType == ComputeLike::COMPUTE)
4231 			{
4232 				pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
4233 																											*pipelineLayout,
4234 																											*compShader,
4235 																											pipelineShaderStageCreateFlags,
4236 #ifndef CTS_USES_VULKANSC
4237 																											VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4238 #else
4239 																											0u,
4240 #endif // CTS_USES_VULKANSC
4241 																											**pipelines[0],
4242 																											nextX,
4243 																											nextY,
4244 																											nextZ,
4245 																											reqSubgroupSize)));
4246 			}
4247 #ifndef CTS_USES_VULKANSC
4248 			else if (testType == ComputeLike::MESH)
4249 			{
4250 				pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(context,
4251 																										 pipelineLayout.get(),
4252 																										 taskShader.get(),
4253 																										 meshShader.get(),
4254 																										 pipelineShaderStageCreateFlags,
4255 																										 VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4256 																										 pipelines[0].get()->get(),
4257 																										 nextX,
4258 																										 nextY,
4259 																										 nextZ,
4260 																										 reqSubgroupSize,
4261 																										 renderPass.get())));
4262 			}
4263 #endif // CTS_USES_VULKANSC
4264 			else
4265 			{
4266 				DE_ASSERT(false);
4267 			}
4268 		}
4269 		context.getTestContext().touchWatchdog();
4270 	}
4271 
4272 	for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
4273 	{
4274 		// we are running one test
4275 		totalIterations++;
4276 
4277 		beginCommandBuffer(vk, *cmdBuffer);
4278 		{
4279 			if (testType == ComputeLike::MESH)
4280 				beginRenderPass(vk, *cmdBuffer, renderPass.get(), framebuffer.get(), renderArea);
4281 
4282 			vk.cmdBindPipeline(*cmdBuffer, pipelineBindPoint, **pipelines[index]);
4283 
4284 			vk.cmdBindDescriptorSets(*cmdBuffer, pipelineBindPoint, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
4285 
4286 			if (testType == ComputeLike::COMPUTE)
4287 				vk.cmdDispatch(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4288 #ifndef CTS_USES_VULKANSC
4289 			else if (testType == ComputeLike::MESH)
4290 				vk.cmdDrawMeshTasksEXT(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4291 				//vk.cmdDrawMeshTasksNV(*cmdBuffer, numWorkgroups[0], 0);
4292 #endif // CTS_USES_VULKANSC
4293 			else
4294 				DE_ASSERT(false);
4295 
4296 			if (testType == ComputeLike::MESH)
4297 				endRenderPass(vk, *cmdBuffer);
4298 		}
4299 
4300 		// Make shader writes available.
4301 		const auto postShaderBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4302 		vk.cmdPipelineBarrier(*cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postShaderBarrier, 0u, nullptr, 0u, nullptr);
4303 
4304 		endCommandBuffer(vk, *cmdBuffer);
4305 
4306 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4307 
4308 		std::vector<const void*> datas;
4309 
4310 		{
4311 			const Allocation& resultAlloc = resultBuffer.getAllocation();
4312 			invalidateAlloc(vk, device, resultAlloc);
4313 
4314 			// we always have our result data first
4315 			datas.push_back(resultAlloc.getHostPtr());
4316 		}
4317 
4318 		for (deUint32 i = 0; i < inputsCount; i++)
4319 		{
4320 			if (!inputBuffers[i]->isImage())
4321 			{
4322 				const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
4323 				invalidateAlloc(vk, device, resultAlloc);
4324 
4325 				// we always have our result data first
4326 				datas.push_back(resultAlloc.getHostPtr());
4327 			}
4328 		}
4329 
4330 		if (!checkResult(internalData, datas, numWorkgroups, usedLocalSizes[index].getPtr(), subgroupSize))
4331 		{
4332 			failedIterations++;
4333 		}
4334 
4335 		context.resetCommandPoolForVKSC(device, *cmdPool);
4336 	}
4337 
4338 	if (0 < failedIterations)
4339 	{
4340 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4341 
4342 		context.getTestContext().getLog()
4343 			<< TestLog::Message << valuesPassed << " / "
4344 			<< totalIterations << " values passed" << TestLog::EndMessage;
4345 
4346 		return tcu::TestStatus::fail("Failed!");
4347 	}
4348 
4349 	return tcu::TestStatus::pass("OK");
4350 }
4351 
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4352 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize (Context&			context,
4353 																	 VkFormat			format,
4354 																	 const SSBOData*	inputs,
4355 																	 deUint32			inputsCount,
4356 																	 const void*		internalData,
4357 																	 CheckResultCompute	checkResult,
4358 																	 const deUint32		pipelineShaderStageCreateFlags,
4359 																	 const deUint32		numWorkgroups[3],
4360 																	 const deBool		isRequiredSubgroupSize,
4361 																	 const deUint32		subgroupSize,
4362 																	 const deUint32		localSizesToTest[][3],
4363 																	 const deUint32		localSizesToTestCount)
4364 {
4365 	return makeComputeOrMeshTestRequiredSubgroupSize(
4366 		ComputeLike::COMPUTE,
4367 		context,
4368 		format,
4369 		inputs,
4370 		inputsCount,
4371 		internalData,
4372 		checkResult,
4373 		pipelineShaderStageCreateFlags,
4374 		numWorkgroups,
4375 		isRequiredSubgroupSize,
4376 		subgroupSize,
4377 		localSizesToTest,
4378 		localSizesToTestCount);
4379 }
4380 
makeMeshTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4381 tcu::TestStatus vkt::subgroups::makeMeshTestRequiredSubgroupSize (Context&				context,
4382 																  VkFormat				format,
4383 																  const SSBOData*		inputs,
4384 																  deUint32				inputsCount,
4385 																  const void*			internalData,
4386 																  CheckResultCompute	checkResult,
4387 																  const deUint32		pipelineShaderStageCreateFlags,
4388 																  const deUint32		numWorkgroups[3],
4389 																  const deBool			isRequiredSubgroupSize,
4390 																  const deUint32		subgroupSize,
4391 																  const deUint32		localSizesToTest[][3],
4392 																  const deUint32		localSizesToTestCount)
4393 {
4394 	return makeComputeOrMeshTestRequiredSubgroupSize(
4395 		ComputeLike::MESH,
4396 		context,
4397 		format,
4398 		inputs,
4399 		inputsCount,
4400 		internalData,
4401 		checkResult,
4402 		pipelineShaderStageCreateFlags,
4403 		numWorkgroups,
4404 		isRequiredSubgroupSize,
4405 		subgroupSize,
4406 		localSizesToTest,
4407 		localSizesToTestCount);
4408 }
4409 
makeComputeOrMeshTest(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,deUint32 inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4410 tcu::TestStatus makeComputeOrMeshTest (ComputeLike							testType,
4411 									   Context&								context,
4412 									   VkFormat								format,
4413 									   const vkt::subgroups::SSBOData*		inputs,
4414 									   deUint32								inputsCount,
4415 									   const void*							internalData,
4416 									   vkt::subgroups::CheckResultCompute	checkResult,
4417 									   deUint32								requiredSubgroupSize,
4418 									   const deUint32						pipelineShaderStageCreateFlags)
4419 {
4420 	const uint32_t	numWorkgroups[3]		= {4, 2, 2};
4421 	const bool		isRequiredSubgroupSize	= (requiredSubgroupSize != 0u);
4422 	const uint32_t	subgroupSize			= (isRequiredSubgroupSize ? requiredSubgroupSize : vkt::subgroups::getSubgroupSize(context));
4423 
4424 	const deUint32 localSizesToTestCount = 8;
4425 	deUint32 localSizesToTest[localSizesToTestCount][3] =
4426 	{
4427 		{1, 1, 1},
4428 		{subgroupSize, 1, 1},
4429 		{1, subgroupSize, 1},
4430 		{1, 1, subgroupSize},
4431 		{32, 4, 1},
4432 		{1, 4, 32},
4433 		{3, 5, 7},
4434 		{1, 1, 1} // Isn't used, just here to make double buffering checks easier
4435 	};
4436 
4437 	if (testType == ComputeLike::COMPUTE)
4438 		return makeComputeTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4439 												   numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4440 	else
4441 		return makeMeshTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4442 												numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4443 }
4444 
makeComputeTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4445 tcu::TestStatus vkt::subgroups::makeComputeTest (Context&				context,
4446 												 VkFormat				format,
4447 												 const SSBOData*		inputs,
4448 												 deUint32				inputsCount,
4449 												 const void*			internalData,
4450 												 CheckResultCompute		checkResult,
4451 												 deUint32				requiredSubgroupSize,
4452 												 const deUint32			pipelineShaderStageCreateFlags)
4453 {
4454 	return makeComputeOrMeshTest(ComputeLike::COMPUTE, context, format, inputs, inputsCount, internalData, checkResult, requiredSubgroupSize, pipelineShaderStageCreateFlags);
4455 }
4456 
makeMeshTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4457 tcu::TestStatus vkt::subgroups::makeMeshTest (Context&				context,
4458 											  VkFormat				format,
4459 											  const SSBOData*		inputs,
4460 											  deUint32				inputsCount,
4461 											  const void*			internalData,
4462 											  CheckResultCompute	checkResult,
4463 											  deUint32				requiredSubgroupSize,
4464 											  const deUint32		pipelineShaderStageCreateFlags)
4465 {
4466 	return makeComputeOrMeshTest(ComputeLike::MESH, context, format, inputs, inputsCount, internalData, checkResult, requiredSubgroupSize, pipelineShaderStageCreateFlags);
4467 }
4468 
checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)4469 static inline void checkShaderStageSetValidity (const VkShaderStageFlags shaderStages)
4470 {
4471 	if (shaderStages == 0)
4472 		TCU_THROW(InternalError, "Shader stage is not specified");
4473 
4474 	// It can actually be only 1 or 0.
4475 	const deUint32 exclusivePipelinesCount	= (isAllComputeStages(shaderStages) ? 1 : 0)
4476 											+ (isAllGraphicsStages(shaderStages) ? 1 : 0)
4477 #ifndef CTS_USES_VULKANSC
4478 											+ (isAllRayTracingStages(shaderStages) ? 1 : 0)
4479 											+ (isAllMeshShadingStages(shaderStages) ? 1 : 0)
4480 #endif // CTS_USES_VULKANSC
4481 											;
4482 
4483 	if (exclusivePipelinesCount != 1)
4484 		TCU_THROW(InternalError, "Mix of shaders from different pipelines is detected");
4485 }
4486 
supportedCheckShader(Context & context,const VkShaderStageFlags shaderStages)4487 void vkt::subgroups::supportedCheckShader (Context& context, const VkShaderStageFlags shaderStages)
4488 {
4489 	checkShaderStageSetValidity(shaderStages);
4490 
4491 	if ((context.getSubgroupProperties().supportedStages & shaderStages) == 0)
4492 	{
4493 		if (isAllComputeStages(shaderStages))
4494 			TCU_FAIL("Compute shader is required to support subgroup operations");
4495 		else
4496 			TCU_THROW(NotSupportedError, "Subgroup support is not available for test shader stage(s)");
4497 	}
4498 
4499 #ifndef CTS_USES_VULKANSC
4500 	if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & shaderStages &&
4501 		context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4502 		!context.getPortabilitySubsetFeatures().tessellationIsolines)
4503 	{
4504 		TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4505 	}
4506 #endif // CTS_USES_VULKANSC
4507 }
4508 
4509 
4510 namespace vkt
4511 {
4512 namespace subgroups
4513 {
4514 typedef std::vector< de::SharedPtr<BufferOrImage> > vectorBufferOrImage;
4515 
4516 enum ShaderGroups
4517 {
4518 	FIRST_GROUP		= 0,
4519 	RAYGEN_GROUP	= FIRST_GROUP,
4520 	MISS_GROUP,
4521 	HIT_GROUP,
4522 	CALL_GROUP,
4523 	GROUP_COUNT
4524 };
4525 
getAllRayTracingFormats()4526 const std::vector<vk::VkFormat> getAllRayTracingFormats()
4527 {
4528 	std::vector<VkFormat> formats;
4529 
4530 	formats.push_back(VK_FORMAT_R8G8B8_SINT);
4531 	formats.push_back(VK_FORMAT_R8_UINT);
4532 	formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
4533 	formats.push_back(VK_FORMAT_R16G16B16_SINT);
4534 	formats.push_back(VK_FORMAT_R16_UINT);
4535 	formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
4536 	formats.push_back(VK_FORMAT_R32G32B32_SINT);
4537 	formats.push_back(VK_FORMAT_R32_UINT);
4538 	formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
4539 	formats.push_back(VK_FORMAT_R64G64B64_SINT);
4540 	formats.push_back(VK_FORMAT_R64_UINT);
4541 	formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
4542 	formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
4543 	formats.push_back(VK_FORMAT_R32_SFLOAT);
4544 	formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
4545 	formats.push_back(VK_FORMAT_R64_SFLOAT);
4546 	formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
4547 	formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
4548 	formats.push_back(VK_FORMAT_R8_USCALED);
4549 	formats.push_back(VK_FORMAT_R8G8_USCALED);
4550 	formats.push_back(VK_FORMAT_R8G8B8_USCALED);
4551 	formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
4552 
4553 	return formats;
4554 }
4555 
addRayTracingNoSubgroupShader(SourceCollections & programCollection)4556 void addRayTracingNoSubgroupShader (SourceCollections& programCollection)
4557 {
4558 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
4559 
4560 	const std::string rgenShaderNoSubgroups =
4561 		"#version 460 core\n"
4562 		"#extension GL_EXT_ray_tracing: require\n"
4563 		"layout(location = 0) rayPayloadEXT uvec4 payload;\n"
4564 		"layout(location = 0) callableDataEXT uvec4 callData;"
4565 		"layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
4566 		"\n"
4567 		"void main()\n"
4568 		"{\n"
4569 		"  uint  rayFlags   = 0;\n"
4570 		"  uint  cullMask   = 0xFF;\n"
4571 		"  float tmin       = 0.0;\n"
4572 		"  float tmax       = 9.0;\n"
4573 		"  vec3  origin     = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
4574 		"  vec3  directHit  = vec3(0.0, 0.0, -1.0);\n"
4575 		"  vec3  directMiss = vec3(0.0, 0.0, +1.0);\n"
4576 		"\n"
4577 		"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
4578 		"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
4579 		"  executeCallableEXT(0, 0);"
4580 		"}\n";
4581 	const std::string hitShaderNoSubgroups =
4582 		"#version 460 core\n"
4583 		"#extension GL_EXT_ray_tracing: require\n"
4584 		"hitAttributeEXT vec3 attribs;\n"
4585 		"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4586 		"\n"
4587 		"void main()\n"
4588 		"{\n"
4589 		"}\n";
4590 	const std::string missShaderNoSubgroups =
4591 		"#version 460 core\n"
4592 		"#extension GL_EXT_ray_tracing: require\n"
4593 		"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4594 		"\n"
4595 		"void main()\n"
4596 		"{\n"
4597 		"}\n";
4598 	const std::string sectShaderNoSubgroups =
4599 		"#version 460 core\n"
4600 		"#extension GL_EXT_ray_tracing: require\n"
4601 		"hitAttributeEXT vec3 hitAttribute;\n"
4602 		"\n"
4603 		"void main()\n"
4604 		"{\n"
4605 		"  reportIntersectionEXT(0.75f, 0x7Eu);\n"
4606 		"}\n";
4607 	const std::string callShaderNoSubgroups =
4608 		"#version 460 core\n"
4609 		"#extension GL_EXT_ray_tracing: require\n"
4610 		"layout(location = 0) callableDataInEXT float callData;\n"
4611 		"\n"
4612 		"void main()\n"
4613 		"{\n"
4614 		"}\n";
4615 
4616 	programCollection.glslSources.add("rgen_noSubgroup") << glu::RaygenSource		(rgenShaderNoSubgroups) << buildOptions;
4617 	programCollection.glslSources.add("ahit_noSubgroup") << glu::AnyHitSource		(hitShaderNoSubgroups)  << buildOptions;
4618 	programCollection.glslSources.add("chit_noSubgroup") << glu::ClosestHitSource	(hitShaderNoSubgroups)  << buildOptions;
4619 	programCollection.glslSources.add("miss_noSubgroup") << glu::MissSource			(missShaderNoSubgroups) << buildOptions;
4620 	programCollection.glslSources.add("sect_noSubgroup") << glu::IntersectionSource	(sectShaderNoSubgroups) << buildOptions;
4621 	programCollection.glslSources.add("call_noSubgroup") << glu::CallableSource		(callShaderNoSubgroups) << buildOptions;
4622 }
4623 
4624 #ifndef CTS_USES_VULKANSC
4625 
enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)4626 static vector<VkShaderStageFlagBits> enumerateRayTracingShaderStages (const VkShaderStageFlags	shaderStage)
4627 {
4628 	vector<VkShaderStageFlagBits>	result;
4629 	const VkShaderStageFlagBits		shaderStageFlags[]	=
4630 	{
4631 		VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4632 		VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4633 		VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4634 		VK_SHADER_STAGE_MISS_BIT_KHR,
4635 		VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4636 		VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4637 	};
4638 
4639 	for (auto shaderStageFlag: shaderStageFlags)
4640 	{
4641 		if (0 != (shaderStage & shaderStageFlag))
4642 			result.push_back(shaderStageFlag);
4643 	}
4644 
4645 	return result;
4646 }
4647 
getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)4648 static deUint32 getRayTracingResultBinding (const VkShaderStageFlagBits shaderStage)
4649 {
4650 	const VkShaderStageFlags	shaderStageFlags[]	=
4651 	{
4652 		VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4653 		VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4654 		VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4655 		VK_SHADER_STAGE_MISS_BIT_KHR,
4656 		VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4657 		VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4658 	};
4659 
4660 	for (deUint32 shaderStageNdx = 0; shaderStageNdx < DE_LENGTH_OF_ARRAY(shaderStageFlags); ++shaderStageNdx)
4661 	{
4662 		if (0 != (shaderStage & shaderStageFlags[shaderStageNdx]))
4663 		{
4664 			DE_ASSERT(0 == (shaderStage & (~shaderStageFlags[shaderStageNdx])));
4665 
4666 			return shaderStageNdx;
4667 		}
4668 	}
4669 
4670 	TCU_THROW(InternalError, "Non-raytracing stage specified or no stage at all");
4671 }
4672 
makeRayTracingInputBuffers(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector)4673 static vectorBufferOrImage makeRayTracingInputBuffers (Context&								context,
4674 													   VkFormat								format,
4675 													   const SSBOData*						extraDatas,
4676 													   deUint32								extraDatasCount,
4677 													   const vector<VkShaderStageFlagBits>&	stagesVector)
4678 {
4679 	const size_t		stagesCount		= stagesVector.size();
4680 	const VkDeviceSize	shaderSize		= getMaxWidth();
4681 	const VkDeviceSize	inputBufferSize	= getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
4682 	vectorBufferOrImage	inputBuffers	(stagesCount + extraDatasCount);
4683 
4684 	// The implicit result SSBO we use to store our outputs from the shader
4685 	for (size_t stageNdx = 0u; stageNdx < stagesCount; ++stageNdx)
4686 		inputBuffers[stageNdx]	= de::SharedPtr<BufferOrImage>(new Buffer(context, inputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
4687 
4688 	for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4689 	{
4690 		const size_t	datasNdx	= stageNdx - stagesCount;
4691 
4692 		if (extraDatas[datasNdx].isImage())
4693 		{
4694 			inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
4695 		}
4696 		else
4697 		{
4698 			const auto usage	= (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4699 			const auto size		= getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
4700 			inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4701 		}
4702 
4703 		initializeMemory(context, inputBuffers[stageNdx]->getAllocation(), extraDatas[datasNdx]);
4704 	}
4705 
4706 	return inputBuffers;
4707 }
4708 
makeRayTracingDescriptorSetLayout(Context & context,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4709 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayout (Context&								context,
4710 																	  const SSBOData*						extraDatas,
4711 																	  deUint32								extraDatasCount,
4712 																	  const vector<VkShaderStageFlagBits>&	stagesVector,
4713 																	  const vectorBufferOrImage&			inputBuffers)
4714 {
4715 	const DeviceInterface&		vkd				= context.getDeviceInterface();
4716 	const VkDevice				device			= context.getDevice();
4717 	const size_t				stagesCount		= stagesVector.size();
4718 	DescriptorSetLayoutBuilder	layoutBuilder;
4719 
4720 	// The implicit result SSBO we use to store our outputs from the shader
4721 	for (size_t stageNdx = 0u; stageNdx < stagesVector.size(); ++stageNdx)
4722 	{
4723 		const deUint32	stageBinding	= getRayTracingResultBinding(stagesVector[stageNdx]);
4724 
4725 		layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, stagesVector[stageNdx], stageBinding, DE_NULL);
4726 	}
4727 
4728 	for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4729 	{
4730 		const size_t datasNdx = stageNdx - stagesCount;
4731 
4732 		layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
4733 	}
4734 
4735 	return layoutBuilder.build(vkd, device);
4736 }
4737 
makeRayTracingDescriptorSetLayoutAS(Context & context)4738 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayoutAS (Context&	context)
4739 {
4740 	const DeviceInterface&		vkd				= context.getDeviceInterface();
4741 	const VkDevice				device			= context.getDevice();
4742 	DescriptorSetLayoutBuilder	layoutBuilder;
4743 
4744 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
4745 
4746 	return layoutBuilder.build(vkd, device);
4747 }
4748 
makeRayTracingDescriptorPool(Context & context,const vectorBufferOrImage & inputBuffers)4749 static Move<VkDescriptorPool> makeRayTracingDescriptorPool (Context&						context,
4750 															const vectorBufferOrImage&		inputBuffers)
4751 {
4752 	const DeviceInterface&	vkd					= context.getDeviceInterface();
4753 	const VkDevice			device				= context.getDevice();
4754 	const deUint32			maxDescriptorSets	= 2u;
4755 	DescriptorPoolBuilder	poolBuilder;
4756 	Move<VkDescriptorPool>	result;
4757 
4758 	if (inputBuffers.size() > 0)
4759 	{
4760 		for (size_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
4761 			poolBuilder.addType(inputBuffers[ndx]->getType());
4762 	}
4763 
4764 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
4765 
4766 	result = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, maxDescriptorSets);
4767 
4768 	return result;
4769 }
4770 
makeRayTracingDescriptorSet(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4771 static Move<VkDescriptorSet> makeRayTracingDescriptorSet (Context&								context,
4772 														  VkDescriptorPool						descriptorPool,
4773 														  VkDescriptorSetLayout					descriptorSetLayout,
4774 														  const SSBOData*						extraDatas,
4775 														  deUint32								extraDatasCount,
4776 														  const vector<VkShaderStageFlagBits>&	stagesVector,
4777 														  const vectorBufferOrImage&			inputBuffers)
4778 {
4779 	const DeviceInterface&	vkd				= context.getDeviceInterface();
4780 	const VkDevice			device			= context.getDevice();
4781 	const size_t			stagesCount		= stagesVector.size();
4782 	Move<VkDescriptorSet>	descriptorSet;
4783 
4784 	if (inputBuffers.size() > 0)
4785 	{
4786 		DescriptorSetUpdateBuilder updateBuilder;
4787 
4788 		// Create descriptor set
4789 		descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4790 
4791 		for (size_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
4792 		{
4793 			const deUint32	binding	= (ndx < stagesCount)
4794 									? getRayTracingResultBinding(stagesVector[ndx])
4795 									: extraDatas[ndx - stagesCount].binding;
4796 
4797 			if (inputBuffers[ndx]->isImage())
4798 			{
4799 				const VkDescriptorImageInfo		info	= makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(), inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4800 
4801 				updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4802 			}
4803 			else
4804 			{
4805 				const VkDescriptorBufferInfo	info	= makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
4806 
4807 				updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4808 			}
4809 		}
4810 
4811 		updateBuilder.update(vkd, device);
4812 	}
4813 
4814 	return descriptorSet;
4815 }
4816 
makeRayTracingDescriptorSetAS(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,de::MovePtr<TopLevelAccelerationStructure> & topLevelAccelerationStructure)4817 static Move<VkDescriptorSet> makeRayTracingDescriptorSetAS (Context&									context,
4818 															VkDescriptorPool							descriptorPool,
4819 															VkDescriptorSetLayout						descriptorSetLayout,
4820 															de::MovePtr<TopLevelAccelerationStructure>&	topLevelAccelerationStructure)
4821 {
4822 	const DeviceInterface&								vkd										= context.getDeviceInterface();
4823 	const VkDevice										device									= context.getDevice();
4824 	const TopLevelAccelerationStructure*				topLevelAccelerationStructurePtr		= topLevelAccelerationStructure.get();
4825 	const VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
4826 	{
4827 		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
4828 		DE_NULL,															//  const void*							pNext;
4829 		1u,																	//  deUint32							accelerationStructureCount;
4830 		topLevelAccelerationStructurePtr->getPtr(),							//  const VkAccelerationStructureKHR*	pAccelerationStructures;
4831 	};
4832 	Move<VkDescriptorSet>								descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4833 
4834 	DescriptorSetUpdateBuilder()
4835 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
4836 		.update(vkd, device);
4837 
4838 	return descriptorSet;
4839 }
4840 
makeRayTracingPipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout0,const VkDescriptorSetLayout descriptorSetLayout1)4841 static Move<VkPipelineLayout> makeRayTracingPipelineLayout (Context&					context,
4842 															const VkDescriptorSetLayout	descriptorSetLayout0,
4843 															const VkDescriptorSetLayout	descriptorSetLayout1)
4844 {
4845 	const DeviceInterface&						vkd							= context.getDeviceInterface();
4846 	const VkDevice								device						= context.getDevice();
4847 	const std::vector<VkDescriptorSetLayout>	descriptorSetLayouts		{ descriptorSetLayout0, descriptorSetLayout1 };
4848 	const deUint32								descriptorSetLayoutsSize	= static_cast<deUint32>(descriptorSetLayouts.size());
4849 
4850 	return makePipelineLayout(vkd, device, descriptorSetLayoutsSize, descriptorSetLayouts.data());
4851 }
4852 
createTopAccelerationStructure(Context & context,de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)4853 static de::MovePtr<TopLevelAccelerationStructure> createTopAccelerationStructure (Context&											context,
4854 																				  de::SharedPtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure)
4855 {
4856 	const DeviceInterface&						vkd			= context.getDeviceInterface();
4857 	const VkDevice								device		= context.getDevice();
4858 	Allocator&									allocator	= context.getDefaultAllocator();
4859 	de::MovePtr<TopLevelAccelerationStructure>	result		= makeTopLevelAccelerationStructure();
4860 
4861 	result->setInstanceCount(1);
4862 	result->addInstance(bottomLevelAccelerationStructure);
4863 	result->create(vkd, device, allocator);
4864 
4865 	return result;
4866 }
4867 
createBottomAccelerationStructure(Context & context)4868 static de::SharedPtr<BottomLevelAccelerationStructure> createBottomAccelerationStructure (Context&	context)
4869 {
4870 	const DeviceInterface&							vkd				= context.getDeviceInterface();
4871 	const VkDevice									device			= context.getDevice();
4872 	Allocator&										allocator		= context.getDefaultAllocator();
4873 	de::MovePtr<BottomLevelAccelerationStructure>	result			= makeBottomLevelAccelerationStructure();
4874 	const std::vector<tcu::Vec3>					geometryData	{ tcu::Vec3(-1.0f, -1.0f, -2.0f), tcu::Vec3(+1.0f, +1.0f, -1.0f) };
4875 
4876 	result->setGeometryCount(1u);
4877 	result->addGeometry(geometryData, false);
4878 	result->create(vkd, device, allocator, 0u);
4879 
4880 	return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
4881 }
4882 
makeRayTracingPipeline(Context & context,const VkShaderStageFlags shaderStageTested,const VkPipelineLayout pipelineLayout,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6],Move<VkPipeline> & pipelineOut)4883 static de::MovePtr<RayTracingPipeline> makeRayTracingPipeline (Context&					context,
4884 															   const VkShaderStageFlags	shaderStageTested,
4885 															   const VkPipelineLayout	pipelineLayout,
4886 															   const deUint32			shaderStageCreateFlags[6],
4887 															   const deUint32			requiredSubgroupSize[6],
4888 															   Move<VkPipeline>&		pipelineOut)
4889 {
4890 	const DeviceInterface&											vkd									= context.getDeviceInterface();
4891 	const VkDevice													device								= context.getDevice();
4892 	BinaryCollection&												collection							= context.getBinaryCollection();
4893 	const char*														shaderRgenName						= (0 != (shaderStageTested & VK_SHADER_STAGE_RAYGEN_BIT_KHR))			? "rgen" : "rgen_noSubgroup";
4894 	const char*														shaderAhitName						= (0 != (shaderStageTested & VK_SHADER_STAGE_ANY_HIT_BIT_KHR))			? "ahit" : "ahit_noSubgroup";
4895 	const char*														shaderChitName						= (0 != (shaderStageTested & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR))		? "chit" : "chit_noSubgroup";
4896 	const char*														shaderMissName						= (0 != (shaderStageTested & VK_SHADER_STAGE_MISS_BIT_KHR))				? "miss" : "miss_noSubgroup";
4897 	const char*														shaderSectName						= (0 != (shaderStageTested & VK_SHADER_STAGE_INTERSECTION_BIT_KHR))		? "sect" : "sect_noSubgroup";
4898 	const char*														shaderCallName						= (0 != (shaderStageTested & VK_SHADER_STAGE_CALLABLE_BIT_KHR))			? "call" : "call_noSubgroup";
4899 	const VkShaderModuleCreateFlags									noShaderModuleCreateFlags			= static_cast<VkShaderModuleCreateFlags>(0);
4900 	Move<VkShaderModule>											rgenShaderModule					= createShaderModule(vkd, device, collection.get(shaderRgenName), noShaderModuleCreateFlags);
4901 	Move<VkShaderModule>											ahitShaderModule					= createShaderModule(vkd, device, collection.get(shaderAhitName), noShaderModuleCreateFlags);
4902 	Move<VkShaderModule>											chitShaderModule					= createShaderModule(vkd, device, collection.get(shaderChitName), noShaderModuleCreateFlags);
4903 	Move<VkShaderModule>											missShaderModule					= createShaderModule(vkd, device, collection.get(shaderMissName), noShaderModuleCreateFlags);
4904 	Move<VkShaderModule>											sectShaderModule					= createShaderModule(vkd, device, collection.get(shaderSectName), noShaderModuleCreateFlags);
4905 	Move<VkShaderModule>											callShaderModule					= createShaderModule(vkd, device, collection.get(shaderCallName), noShaderModuleCreateFlags);
4906 	const VkPipelineShaderStageCreateFlags							noPipelineShaderStageCreateFlags	= static_cast<VkPipelineShaderStageCreateFlags>(0);
4907 	const VkPipelineShaderStageCreateFlags							rgenPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[0];
4908 	const VkPipelineShaderStageCreateFlags							ahitPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[1];
4909 	const VkPipelineShaderStageCreateFlags							chitPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[2];
4910 	const VkPipelineShaderStageCreateFlags							missPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[3];
4911 	const VkPipelineShaderStageCreateFlags							sectPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[4];
4912 	const VkPipelineShaderStageCreateFlags							callPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[5];
4913 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT	requiredSubgroupSizeCreateInfo[6]	=
4914 	{
4915 		{
4916 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4917 			DE_NULL,
4918 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
4919 		},
4920 		{
4921 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4922 			DE_NULL,
4923 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
4924 		},
4925 		{
4926 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4927 			DE_NULL,
4928 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
4929 		},
4930 		{
4931 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4932 			DE_NULL,
4933 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
4934 		},
4935 		{
4936 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4937 			DE_NULL,
4938 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
4939 		},
4940 		{
4941 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4942 			DE_NULL,
4943 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[5] : 0u,
4944 		},
4945 	};
4946 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	rgenRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[0];
4947 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	ahitRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[1];
4948 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	chitRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[2];
4949 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	missRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[3];
4950 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	sectRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[4];
4951 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	callRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[5].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[5];
4952 	de::MovePtr<RayTracingPipeline>									rayTracingPipeline					= de::newMovePtr<RayTracingPipeline>();
4953 
4954 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR		, rgenShaderModule, RAYGEN_GROUP,	DE_NULL, rgenPipelineShaderStageCreateFlags, rgenRequiredSubgroupSizeCreateInfo);
4955 	rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR		, ahitShaderModule, HIT_GROUP,		DE_NULL, ahitPipelineShaderStageCreateFlags, ahitRequiredSubgroupSizeCreateInfo);
4956 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR	, chitShaderModule, HIT_GROUP,		DE_NULL, chitPipelineShaderStageCreateFlags, chitRequiredSubgroupSizeCreateInfo);
4957 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR			, missShaderModule, MISS_GROUP,		DE_NULL, missPipelineShaderStageCreateFlags, missRequiredSubgroupSizeCreateInfo);
4958 	rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR	, sectShaderModule, HIT_GROUP,		DE_NULL, sectPipelineShaderStageCreateFlags, sectRequiredSubgroupSizeCreateInfo);
4959 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR		, callShaderModule, CALL_GROUP,		DE_NULL, callPipelineShaderStageCreateFlags, callRequiredSubgroupSizeCreateInfo);
4960 
4961 	// Must execute createPipeline here, due to pNext pointers in calls to addShader are local
4962 	pipelineOut	= rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
4963 
4964 	return rayTracingPipeline;
4965 }
4966 
getPossibleRayTracingSubgroupStages(Context & context,const VkShaderStageFlags testedStages)4967 VkShaderStageFlags getPossibleRayTracingSubgroupStages (Context& context, const VkShaderStageFlags testedStages)
4968 {
4969 	const VkPhysicalDeviceSubgroupProperties&	subgroupProperties	= context.getSubgroupProperties();
4970 	const VkShaderStageFlags					stages				= testedStages & subgroupProperties.supportedStages;
4971 
4972 	DE_ASSERT(isAllRayTracingStages(testedStages));
4973 
4974 	return stages;
4975 }
4976 
allRayTracingStages(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStage)4977 tcu::TestStatus allRayTracingStages (Context&						context,
4978 									 VkFormat						format,
4979 									 const SSBOData*				extraDatas,
4980 									 deUint32						extraDataCount,
4981 									 const void*					internalData,
4982 									 const VerificationFunctor&		checkResult,
4983 									 const VkShaderStageFlags		shaderStage)
4984 {
4985 	return vkt::subgroups::allRayTracingStagesRequiredSubgroupSize(context,
4986 																   format,
4987 																   extraDatas,
4988 																   extraDataCount,
4989 																   internalData,
4990 																   checkResult,
4991 																   shaderStage,
4992 																   DE_NULL,
4993 																   DE_NULL);
4994 }
4995 
allRayTracingStagesRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6])4996 tcu::TestStatus allRayTracingStagesRequiredSubgroupSize (Context&					context,
4997 														 VkFormat					format,
4998 														 const SSBOData*			extraDatas,
4999 														 deUint32					extraDatasCount,
5000 														 const void*				internalData,
5001 														 const VerificationFunctor&	checkResult,
5002 														 const VkShaderStageFlags	shaderStageTested,
5003 														 const deUint32				shaderStageCreateFlags[6],
5004 														 const deUint32				requiredSubgroupSize[6])
5005 {
5006 	const DeviceInterface&							vkd									= context.getDeviceInterface();
5007 	const VkDevice									device								= context.getDevice();
5008 	const VkQueue									queue								= context.getUniversalQueue();
5009 	const deUint32									queueFamilyIndex					= context.getUniversalQueueFamilyIndex();
5010 	Allocator&										allocator							= context.getDefaultAllocator();
5011 	const deUint32									subgroupSize						= getSubgroupSize(context);
5012 	const deUint32									maxWidth							= getMaxWidth();
5013 	const vector<VkShaderStageFlagBits>				stagesVector						= enumerateRayTracingShaderStages(shaderStageTested);
5014 	const deUint32									stagesCount							= static_cast<deUint32>(stagesVector.size());
5015 	de::SharedPtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure	= createBottomAccelerationStructure(context);
5016 	de::MovePtr<TopLevelAccelerationStructure>		topLevelAccelerationStructure		= createTopAccelerationStructure(context, bottomLevelAccelerationStructure);
5017 	vectorBufferOrImage								inputBuffers						= makeRayTracingInputBuffers(context, format, extraDatas, extraDatasCount, stagesVector);
5018 	const Move<VkDescriptorSetLayout>				descriptorSetLayout					= makeRayTracingDescriptorSetLayout(context, extraDatas, extraDatasCount, stagesVector, inputBuffers);
5019 	const Move<VkDescriptorSetLayout>				descriptorSetLayoutAS				= makeRayTracingDescriptorSetLayoutAS(context);
5020 	const Move<VkPipelineLayout>					pipelineLayout						= makeRayTracingPipelineLayout(context, *descriptorSetLayout, *descriptorSetLayoutAS);
5021 	Move<VkPipeline>								pipeline							= Move<VkPipeline>();
5022 	const de::MovePtr<RayTracingPipeline>			rayTracingPipeline					= makeRayTracingPipeline(context, shaderStageTested, *pipelineLayout, shaderStageCreateFlags, requiredSubgroupSize, pipeline);
5023 	const deUint32									shaderGroupHandleSize				= context.getRayTracingPipelineProperties().shaderGroupHandleSize;
5024 	const deUint32									shaderGroupBaseAlignment			= context.getRayTracingPipelineProperties().shaderGroupBaseAlignment;
5025 	de::MovePtr<BufferWithMemory>					rgenShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, RAYGEN_GROUP, 1u);
5026 	de::MovePtr<BufferWithMemory>					missShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, MISS_GROUP,   1u);
5027 	de::MovePtr<BufferWithMemory>					hitsShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, HIT_GROUP,    1u);
5028 	de::MovePtr<BufferWithMemory>					callShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, CALL_GROUP,   1u);
5029 	const VkStridedDeviceAddressRegionKHR			rgenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, rgenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5030 	const VkStridedDeviceAddressRegionKHR			missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5031 	const VkStridedDeviceAddressRegionKHR			hitsShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitsShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5032 	const VkStridedDeviceAddressRegionKHR			callShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5033 	const Move<VkDescriptorPool>					descriptorPool						= makeRayTracingDescriptorPool(context, inputBuffers);
5034 	const Move<VkDescriptorSet>						descriptorSet						= makeRayTracingDescriptorSet(context, *descriptorPool, *descriptorSetLayout, extraDatas, extraDatasCount, stagesVector, inputBuffers);
5035 	const Move<VkDescriptorSet>						descriptorSetAS						= makeRayTracingDescriptorSetAS(context, *descriptorPool, *descriptorSetLayoutAS, topLevelAccelerationStructure);
5036 	const Move<VkCommandPool>						cmdPool								= makeCommandPool(vkd, device, queueFamilyIndex);
5037 	const Move<VkCommandBuffer>						cmdBuffer							= makeCommandBuffer(context, *cmdPool);
5038 	deUint32										passIterations						= 0u;
5039 	deUint32										failIterations						= 0u;
5040 
5041 	DE_ASSERT(shaderStageTested != 0);
5042 
5043 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
5044 	{
5045 
5046 		for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
5047 		{
5048 			// re-init the data
5049 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
5050 
5051 			initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
5052 		}
5053 
5054 		beginCommandBuffer(vkd, *cmdBuffer);
5055 		{
5056 			vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
5057 
5058 			bottomLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
5059 			topLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
5060 
5061 			vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 1u, 1u, &descriptorSetAS.get(), 0u, DE_NULL);
5062 
5063 			if (stagesCount + extraDatasCount > 0)
5064 				vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
5065 
5066 			cmdTraceRays(vkd,
5067 				*cmdBuffer,
5068 				&rgenShaderBindingTableRegion,
5069 				&missShaderBindingTableRegion,
5070 				&hitsShaderBindingTableRegion,
5071 				&callShaderBindingTableRegion,
5072 				width, 1, 1);
5073 
5074 			const VkMemoryBarrier	postTraceMemoryBarrier	= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
5075 			cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_HOST_BIT, &postTraceMemoryBarrier);
5076 		}
5077 		endCommandBuffer(vkd, *cmdBuffer);
5078 
5079 		submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
5080 
5081 		for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
5082 		{
5083 			std::vector<const void*> datas;
5084 
5085 			if (!inputBuffers[ndx]->isImage())
5086 			{
5087 				const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
5088 
5089 				invalidateAlloc(vkd, device, resultAlloc);
5090 
5091 				// we always have our result data first
5092 				datas.push_back(resultAlloc.getHostPtr());
5093 			}
5094 
5095 			for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
5096 			{
5097 				const deUint32 datasNdx = index - stagesCount;
5098 
5099 				if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
5100 				{
5101 					const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
5102 
5103 					invalidateAlloc(vkd, device, resultAlloc);
5104 
5105 					// we always have our result data first
5106 					datas.push_back(resultAlloc.getHostPtr());
5107 				}
5108 			}
5109 
5110 			if (!checkResult(internalData, datas, width, subgroupSize, false))
5111 				failIterations++;
5112 			else
5113 				passIterations++;
5114 		}
5115 
5116 		context.resetCommandPoolForVKSC(device, *cmdPool);
5117 	}
5118 
5119 	if (failIterations > 0 || passIterations == 0)
5120 		return tcu::TestStatus::fail("Failed " + de::toString(failIterations) + " out of " + de::toString(failIterations + passIterations) + " iterations.");
5121 	else
5122 		return tcu::TestStatus::pass("OK");
5123 }
5124 #endif // CTS_USES_VULKANSC
5125 
5126 } // namespace subgroups
5127 } // nsamespace vkt
5128