• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Google Inc.
7  * Copyright (c) 2017 Codeplay Software Ltd.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests Utils
24  */ /*--------------------------------------------------------------------*/
25 
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vkRayTracingUtil.hpp"
28 #include "deFloat16.h"
29 #include "deRandom.hpp"
30 #include "tcuCommandLine.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 
38 using namespace tcu;
39 using namespace std;
40 using namespace vk;
41 using namespace vkt;
42 
43 namespace
44 {
45 
getMaxWidth()46 deUint32 getMaxWidth ()
47 {
48 	return 1024u;
49 }
50 
getNextWidth(const deUint32 width)51 deUint32 getNextWidth (const deUint32 width)
52 {
53 	if (width < 128)
54 	{
55 		// This ensures we test every value up to 128 (the max subgroup size).
56 		return width + 1;
57 	}
58 	else
59 	{
60 		// And once we hit 128 we increment to only power of 2's to reduce testing time.
61 		return width * 2;
62 	}
63 }
64 
getFormatSizeInBytes(const VkFormat format)65 deUint32 getFormatSizeInBytes (const VkFormat format)
66 {
67 	switch (format)
68 	{
69 		default:
70 			DE_FATAL("Unhandled format!");
71 			return 0;
72 		case VK_FORMAT_R8_SINT:
73 		case VK_FORMAT_R8_UINT:
74 			return static_cast<deUint32>(sizeof(deInt8));
75 		case VK_FORMAT_R8G8_SINT:
76 		case VK_FORMAT_R8G8_UINT:
77 			return static_cast<deUint32>(sizeof(deInt8) * 2);
78 		case VK_FORMAT_R8G8B8_SINT:
79 		case VK_FORMAT_R8G8B8_UINT:
80 		case VK_FORMAT_R8G8B8A8_SINT:
81 		case VK_FORMAT_R8G8B8A8_UINT:
82 			return static_cast<deUint32>(sizeof(deInt8) * 4);
83 		case VK_FORMAT_R16_SINT:
84 		case VK_FORMAT_R16_UINT:
85 		case VK_FORMAT_R16_SFLOAT:
86 			return static_cast<deUint32>(sizeof(deInt16));
87 		case VK_FORMAT_R16G16_SINT:
88 		case VK_FORMAT_R16G16_UINT:
89 		case VK_FORMAT_R16G16_SFLOAT:
90 			return static_cast<deUint32>(sizeof(deInt16) * 2);
91 		case VK_FORMAT_R16G16B16_UINT:
92 		case VK_FORMAT_R16G16B16_SINT:
93 		case VK_FORMAT_R16G16B16_SFLOAT:
94 		case VK_FORMAT_R16G16B16A16_SINT:
95 		case VK_FORMAT_R16G16B16A16_UINT:
96 		case VK_FORMAT_R16G16B16A16_SFLOAT:
97 			return static_cast<deUint32>(sizeof(deInt16) * 4);
98 		case VK_FORMAT_R32_SINT:
99 		case VK_FORMAT_R32_UINT:
100 		case VK_FORMAT_R32_SFLOAT:
101 			return static_cast<deUint32>(sizeof(deInt32));
102 		case VK_FORMAT_R32G32_SINT:
103 		case VK_FORMAT_R32G32_UINT:
104 		case VK_FORMAT_R32G32_SFLOAT:
105 			return static_cast<deUint32>(sizeof(deInt32) * 2);
106 		case VK_FORMAT_R32G32B32_SINT:
107 		case VK_FORMAT_R32G32B32_UINT:
108 		case VK_FORMAT_R32G32B32_SFLOAT:
109 		case VK_FORMAT_R32G32B32A32_SINT:
110 		case VK_FORMAT_R32G32B32A32_UINT:
111 		case VK_FORMAT_R32G32B32A32_SFLOAT:
112 			return static_cast<deUint32>(sizeof(deInt32) * 4);
113 		case VK_FORMAT_R64_SINT:
114 		case VK_FORMAT_R64_UINT:
115 		case VK_FORMAT_R64_SFLOAT:
116 			return static_cast<deUint32>(sizeof(deInt64));
117 		case VK_FORMAT_R64G64_SINT:
118 		case VK_FORMAT_R64G64_UINT:
119 		case VK_FORMAT_R64G64_SFLOAT:
120 			return static_cast<deUint32>(sizeof(deInt64) * 2);
121 		case VK_FORMAT_R64G64B64_SINT:
122 		case VK_FORMAT_R64G64B64_UINT:
123 		case VK_FORMAT_R64G64B64_SFLOAT:
124 		case VK_FORMAT_R64G64B64A64_SINT:
125 		case VK_FORMAT_R64G64B64A64_UINT:
126 		case VK_FORMAT_R64G64B64A64_SFLOAT:
127 			return static_cast<deUint32>(sizeof(deInt64) * 4);
128 		// The below formats are used to represent bool and bvec* types. These
129 		// types are passed to the shader as int and ivec* types, before the
130 		// calculations are done as booleans. We need a distinct type here so
131 		// that the shader generators can switch on it and generate the correct
132 		// shader source for testing.
133 		case VK_FORMAT_R8_USCALED:
134 			return static_cast<deUint32>(sizeof(deInt32));
135 		case VK_FORMAT_R8G8_USCALED:
136 			return static_cast<deUint32>(sizeof(deInt32) * 2);
137 		case VK_FORMAT_R8G8B8_USCALED:
138 		case VK_FORMAT_R8G8B8A8_USCALED:
139 			return static_cast<deUint32>(sizeof(deInt32) * 4);
140 	}
141 }
142 
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)143 deUint32 getElementSizeInBytes (const VkFormat									format,
144 								const subgroups::SSBOData::InputDataLayoutType	layout)
145 {
146 	const deUint32 bytes = getFormatSizeInBytes(format);
147 
148 	if (layout == subgroups::SSBOData::LayoutStd140)
149 		return bytes < 16 ? 16 : bytes;
150 	else
151 		return bytes;
152 }
153 
makeRenderPass(Context & context,VkFormat format)154 Move<VkRenderPass> makeRenderPass (Context& context, VkFormat format)
155 {
156 	const VkAttachmentReference		colorReference			=
157 	{
158 		0,
159 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
160 	};
161 	const VkSubpassDescription		subpassDescription		=
162 	{
163 		0u,									//  VkSubpassDescriptionFlags		flags;
164 		VK_PIPELINE_BIND_POINT_GRAPHICS,	//  VkPipelineBindPoint				pipelineBindPoint;
165 		0,									//  deUint32						inputAttachmentCount;
166 		DE_NULL,							//  const VkAttachmentReference*	pInputAttachments;
167 		1,									//  deUint32						colorAttachmentCount;
168 		&colorReference,					//  const VkAttachmentReference*	pColorAttachments;
169 		DE_NULL,							//  const VkAttachmentReference*	pResolveAttachments;
170 		DE_NULL,							//  const VkAttachmentReference*	pDepthStencilAttachment;
171 		0,									//  deUint32						preserveAttachmentCount;
172 		DE_NULL								//  const deUint32*					pPreserveAttachments;
173 	};
174 	const VkSubpassDependency		subpassDependencies[2]	=
175 	{
176 		{
177 			VK_SUBPASS_EXTERNAL,															//  deUint32				srcSubpass;
178 			0u,																				//  deUint32				dstSubpass;
179 			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,											//  VkPipelineStageFlags	srcStageMask;
180 			VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,									//  VkPipelineStageFlags	dstStageMask;
181 			VK_ACCESS_MEMORY_READ_BIT,														//  VkAccessFlags			srcAccessMask;
182 			VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,		//  VkAccessFlags			dstAccessMask;
183 			VK_DEPENDENCY_BY_REGION_BIT														//  VkDependencyFlags		dependencyFlags;
184 		},
185 		{
186 			0u,																				//  deUint32				srcSubpass;
187 			VK_SUBPASS_EXTERNAL,															//  deUint32				dstSubpass;
188 			VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,									//  VkPipelineStageFlags	srcStageMask;
189 			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,											//  VkPipelineStageFlags	dstStageMask;
190 			VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,		//  VkAccessFlags			srcAccessMask;
191 			VK_ACCESS_MEMORY_READ_BIT,														//  VkAccessFlags			dstAccessMask;
192 			VK_DEPENDENCY_BY_REGION_BIT														//  VkDependencyFlags		dependencyFlags;
193 		},
194 	};
195 	const VkAttachmentDescription	attachmentDescription	=
196 	{
197 		0u,											//  VkAttachmentDescriptionFlags	flags;
198 		format,										//  VkFormat						format;
199 		VK_SAMPLE_COUNT_1_BIT,						//  VkSampleCountFlagBits			samples;
200 		VK_ATTACHMENT_LOAD_OP_CLEAR,				//  VkAttachmentLoadOp				loadOp;
201 		VK_ATTACHMENT_STORE_OP_STORE,				//  VkAttachmentStoreOp				storeOp;
202 		VK_ATTACHMENT_LOAD_OP_DONT_CARE,			//  VkAttachmentLoadOp				stencilLoadOp;
203 		VK_ATTACHMENT_STORE_OP_DONT_CARE,			//  VkAttachmentStoreOp				stencilStoreOp;
204 		VK_IMAGE_LAYOUT_UNDEFINED,					//  VkImageLayout					initialLayout;
205 		VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL		//  VkImageLayout					finalLayout;
206 	};
207 	const VkRenderPassCreateInfo	renderPassCreateInfo =
208 	{
209 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,	//  VkStructureType					sType;
210 		DE_NULL,									//  const void*						pNext;
211 		0u,											//  VkRenderPassCreateFlags			flags;
212 		1,											//  deUint32						attachmentCount;
213 		&attachmentDescription,						//  const VkAttachmentDescription*	pAttachments;
214 		1,											//  deUint32						subpassCount;
215 		&subpassDescription,						//  const VkSubpassDescription*		pSubpasses;
216 		2,											//  deUint32						dependencyCount;
217 		subpassDependencies							//  const VkSubpassDependency*		pDependencies;
218 	};
219 
220 	return createRenderPass(context.getDeviceInterface(), context.getDevice(), &renderPassCreateInfo);
221 }
222 
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const deUint32 subpass,const deUint32 patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])223 Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface&							vk,
224 									   const VkDevice									device,
225 									   const VkPipelineLayout							pipelineLayout,
226 									   const VkShaderModule								vertexShaderModule,
227 									   const VkShaderModule								tessellationControlShaderModule,
228 									   const VkShaderModule								tessellationEvalShaderModule,
229 									   const VkShaderModule								geometryShaderModule,
230 									   const VkShaderModule								fragmentShaderModule,
231 									   const VkRenderPass								renderPass,
232 									   const std::vector<VkViewport>&					viewports,
233 									   const std::vector<VkRect2D>&						scissors,
234 									   const VkPrimitiveTopology						topology,
235 									   const deUint32									subpass,
236 									   const deUint32									patchControlPoints,
237 									   const VkPipelineVertexInputStateCreateInfo*		vertexInputStateCreateInfo,
238 									   const VkPipelineRasterizationStateCreateInfo*	rasterizationStateCreateInfo,
239 									   const VkPipelineMultisampleStateCreateInfo*		multisampleStateCreateInfo,
240 									   const VkPipelineDepthStencilStateCreateInfo*		depthStencilStateCreateInfo,
241 									   const VkPipelineColorBlendStateCreateInfo*		colorBlendStateCreateInfo,
242 									   const VkPipelineDynamicStateCreateInfo*			dynamicStateCreateInfo,
243 									   const deUint32									vertexShaderStageCreateFlags,
244 									   const deUint32									tessellationControlShaderStageCreateFlags,
245 									   const deUint32									tessellationEvalShaderStageCreateFlags,
246 									   const deUint32									geometryShaderStageCreateFlags,
247 									   const deUint32									fragmentShaderStageCreateFlags,
248 									   const deUint32									requiredSubgroupSize[5])
249 {
250 	const VkBool32									disableRasterization				= (fragmentShaderModule == DE_NULL);
251 	const bool										hasTessellation						= (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
252 
253 	VkPipelineShaderStageCreateInfo					stageCreateInfo						=
254 	{
255 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType                     sType
256 		DE_NULL,												// const void*                         pNext
257 		0u,														// VkPipelineShaderStageCreateFlags    flags
258 		VK_SHADER_STAGE_VERTEX_BIT,								// VkShaderStageFlagBits               stage
259 		DE_NULL,												// VkShaderModule                      module
260 		"main",													// const char*                         pName
261 		DE_NULL													// const VkSpecializationInfo*         pSpecializationInfo
262 	};
263 
264 	std::vector<VkPipelineShaderStageCreateInfo>	pipelineShaderStageParams;
265 
266 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] =
267 	{
268 		{
269 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
270 			DE_NULL,
271 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
272 		},
273 		{
274 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
275 			DE_NULL,
276 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
277 		},
278 		{
279 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
280 			DE_NULL,
281 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
282 		},
283 		{
284 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
285 			DE_NULL,
286 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
287 		},
288 		{
289 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
290 			DE_NULL,
291 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
292 		},
293 	};
294 
295 	{
296 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[0] : DE_NULL;
297 		stageCreateInfo.flags	= vertexShaderStageCreateFlags;
298 		stageCreateInfo.stage	= VK_SHADER_STAGE_VERTEX_BIT;
299 		stageCreateInfo.module	= vertexShaderModule;
300 		pipelineShaderStageParams.push_back(stageCreateInfo);
301 	}
302 
303 	if (tessellationControlShaderModule != DE_NULL)
304 	{
305 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[1] : DE_NULL;
306 		stageCreateInfo.flags	= tessellationControlShaderStageCreateFlags;
307 		stageCreateInfo.stage	= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
308 		stageCreateInfo.module	= tessellationControlShaderModule;
309 		pipelineShaderStageParams.push_back(stageCreateInfo);
310 	}
311 
312 	if (tessellationEvalShaderModule != DE_NULL)
313 	{
314 		stageCreateInfo.pNext	= (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[2] : DE_NULL;
315 		stageCreateInfo.flags	= tessellationEvalShaderStageCreateFlags;
316 		stageCreateInfo.stage	= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
317 		stageCreateInfo.module	= tessellationEvalShaderModule;
318 		pipelineShaderStageParams.push_back(stageCreateInfo);
319 	}
320 
321 	if (geometryShaderModule != DE_NULL)
322 	{
323 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[3] : DE_NULL;
324 		stageCreateInfo.flags	= geometryShaderStageCreateFlags;
325 		stageCreateInfo.stage	= VK_SHADER_STAGE_GEOMETRY_BIT;
326 		stageCreateInfo.module	= geometryShaderModule;
327 		pipelineShaderStageParams.push_back(stageCreateInfo);
328 	}
329 
330 	if (fragmentShaderModule != DE_NULL)
331 	{
332 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[4] : DE_NULL;
333 		stageCreateInfo.flags	= fragmentShaderStageCreateFlags;
334 		stageCreateInfo.stage	= VK_SHADER_STAGE_FRAGMENT_BIT;
335 		stageCreateInfo.module	= fragmentShaderModule;
336 		pipelineShaderStageParams.push_back(stageCreateInfo);
337 	}
338 
339 	const VkVertexInputBindingDescription			vertexInputBindingDescription		=
340 	{
341 		0u,								// deUint32             binding
342 		sizeof(tcu::Vec4),				// deUint32             stride
343 		VK_VERTEX_INPUT_RATE_VERTEX,	// VkVertexInputRate    inputRate
344 	};
345 
346 	const VkVertexInputAttributeDescription			vertexInputAttributeDescription		=
347 	{
348 		0u,								// deUint32    location
349 		0u,								// deUint32    binding
350 		VK_FORMAT_R32G32B32A32_SFLOAT,	// VkFormat    format
351 		0u								// deUint32    offset
352 	};
353 
354 	const VkPipelineVertexInputStateCreateInfo		vertexInputStateCreateInfoDefault	=
355 	{
356 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType                             sType
357 		DE_NULL,													// const void*                                 pNext
358 		(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags       flags
359 		1u,															// deUint32                                    vertexBindingDescriptionCount
360 		&vertexInputBindingDescription,								// const VkVertexInputBindingDescription*      pVertexBindingDescriptions
361 		1u,															// deUint32                                    vertexAttributeDescriptionCount
362 		&vertexInputAttributeDescription							// const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions
363 	};
364 
365 	const VkPipelineInputAssemblyStateCreateInfo	inputAssemblyStateCreateInfo		=
366 	{
367 		VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType                            sType
368 		DE_NULL,														// const void*                                pNext
369 		0u,																// VkPipelineInputAssemblyStateCreateFlags    flags
370 		topology,														// VkPrimitiveTopology                        topology
371 		VK_FALSE														// VkBool32                                   primitiveRestartEnable
372 	};
373 
374 	const VkPipelineTessellationStateCreateInfo		tessStateCreateInfo					=
375 	{
376 		VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,	// VkStructureType                           sType
377 		DE_NULL,													// const void*                               pNext
378 		0u,															// VkPipelineTessellationStateCreateFlags    flags
379 		patchControlPoints											// deUint32                                  patchControlPoints
380 	};
381 
382 	const VkPipelineViewportStateCreateInfo			viewportStateCreateInfo				=
383 	{
384 		VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,	// VkStructureType                             sType
385 		DE_NULL,												// const void*                                 pNext
386 		(VkPipelineViewportStateCreateFlags)0,					// VkPipelineViewportStateCreateFlags          flags
387 		viewports.empty() ? 1u : (deUint32)viewports.size(),	// deUint32                                    viewportCount
388 		viewports.empty() ? DE_NULL : &viewports[0],			// const VkViewport*                           pViewports
389 		viewports.empty() ? 1u : (deUint32)scissors.size(),		// deUint32                                    scissorCount
390 		scissors.empty() ? DE_NULL : &scissors[0]				// const VkRect2D*                             pScissors
391 	};
392 
393 	const VkPipelineRasterizationStateCreateInfo	rasterizationStateCreateInfoDefault	=
394 	{
395 		VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,	// VkStructureType                            sType
396 		DE_NULL,													// const void*                                pNext
397 		0u,															// VkPipelineRasterizationStateCreateFlags    flags
398 		VK_FALSE,													// VkBool32                                   depthClampEnable
399 		disableRasterization,										// VkBool32                                   rasterizerDiscardEnable
400 		VK_POLYGON_MODE_FILL,										// VkPolygonMode                              polygonMode
401 		VK_CULL_MODE_NONE,											// VkCullModeFlags                            cullMode
402 		VK_FRONT_FACE_COUNTER_CLOCKWISE,							// VkFrontFace                                frontFace
403 		VK_FALSE,													// VkBool32                                   depthBiasEnable
404 		0.0f,														// float                                      depthBiasConstantFactor
405 		0.0f,														// float                                      depthBiasClamp
406 		0.0f,														// float                                      depthBiasSlopeFactor
407 		1.0f														// float                                      lineWidth
408 	};
409 
410 	const VkPipelineMultisampleStateCreateInfo		multisampleStateCreateInfoDefault	=
411 	{
412 		VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType                          sType
413 		DE_NULL,													// const void*                              pNext
414 		0u,															// VkPipelineMultisampleStateCreateFlags    flags
415 		VK_SAMPLE_COUNT_1_BIT,										// VkSampleCountFlagBits                    rasterizationSamples
416 		VK_FALSE,													// VkBool32                                 sampleShadingEnable
417 		1.0f,														// float                                    minSampleShading
418 		DE_NULL,													// const VkSampleMask*                      pSampleMask
419 		VK_FALSE,													// VkBool32                                 alphaToCoverageEnable
420 		VK_FALSE													// VkBool32                                 alphaToOneEnable
421 	};
422 
423 	const VkStencilOpState							stencilOpState						=
424 	{
425 		VK_STENCIL_OP_KEEP,		// VkStencilOp    failOp
426 		VK_STENCIL_OP_KEEP,		// VkStencilOp    passOp
427 		VK_STENCIL_OP_KEEP,		// VkStencilOp    depthFailOp
428 		VK_COMPARE_OP_NEVER,	// VkCompareOp    compareOp
429 		0,						// deUint32       compareMask
430 		0,						// deUint32       writeMask
431 		0						// deUint32       reference
432 	};
433 
434 	const VkPipelineDepthStencilStateCreateInfo		depthStencilStateCreateInfoDefault	=
435 	{
436 		VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,	// VkStructureType                          sType
437 		DE_NULL,													// const void*                              pNext
438 		0u,															// VkPipelineDepthStencilStateCreateFlags   flags
439 		VK_FALSE,													// VkBool32                                 depthTestEnable
440 		VK_FALSE,													// VkBool32                                 depthWriteEnable
441 		VK_COMPARE_OP_LESS_OR_EQUAL,								// VkCompareOp                              depthCompareOp
442 		VK_FALSE,													// VkBool32                                 depthBoundsTestEnable
443 		VK_FALSE,													// VkBool32                                 stencilTestEnable
444 		stencilOpState,												// VkStencilOpState                         front
445 		stencilOpState,												// VkStencilOpState                         back
446 		0.0f,														// float                                    minDepthBounds
447 		1.0f,														// float                                    maxDepthBounds
448 	};
449 
450 	const VkPipelineColorBlendAttachmentState		colorBlendAttachmentState			=
451 	{
452 		VK_FALSE,					// VkBool32                 blendEnable
453 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            srcColorBlendFactor
454 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            dstColorBlendFactor
455 		VK_BLEND_OP_ADD,			// VkBlendOp                colorBlendOp
456 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            srcAlphaBlendFactor
457 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            dstAlphaBlendFactor
458 		VK_BLEND_OP_ADD,			// VkBlendOp                alphaBlendOp
459 		VK_COLOR_COMPONENT_R_BIT	// VkColorComponentFlags    colorWriteMask
460 		| VK_COLOR_COMPONENT_G_BIT
461 		| VK_COLOR_COMPONENT_B_BIT
462 		| VK_COLOR_COMPONENT_A_BIT
463 	};
464 
465 	const VkPipelineColorBlendStateCreateInfo		colorBlendStateCreateInfoDefault	=
466 	{
467 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	// VkStructureType                               sType
468 		DE_NULL,													// const void*                                   pNext
469 		0u,															// VkPipelineColorBlendStateCreateFlags          flags
470 		VK_FALSE,													// VkBool32                                      logicOpEnable
471 		VK_LOGIC_OP_CLEAR,											// VkLogicOp                                     logicOp
472 		1u,															// deUint32                                      attachmentCount
473 		&colorBlendAttachmentState,									// const VkPipelineColorBlendAttachmentState*    pAttachments
474 		{ 0.0f, 0.0f, 0.0f, 0.0f }									// float                                         blendConstants[4]
475 	};
476 
477 	std::vector<VkDynamicState>						dynamicStates;
478 
479 	if (viewports.empty())
480 		dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
481 	if (scissors.empty())
482 		dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
483 
484 	const VkPipelineDynamicStateCreateInfo			dynamicStateCreateInfoDefault		=
485 	{
486 		VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,	// VkStructureType                      sType
487 		DE_NULL,												// const void*                          pNext
488 		0u,														// VkPipelineDynamicStateCreateFlags    flags
489 		(deUint32)dynamicStates.size(),							// deUint32                             dynamicStateCount
490 		dynamicStates.empty() ? DE_NULL : &dynamicStates[0]		// const VkDynamicState*                pDynamicStates
491 	};
492 
493 	const VkPipelineDynamicStateCreateInfo*			dynamicStateCreateInfoDefaultPtr	= dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
494 
495 	const VkGraphicsPipelineCreateInfo				pipelineCreateInfo					=
496 	{
497 		VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,														// VkStructureType                                  sType
498 		DE_NULL,																								// const void*                                      pNext
499 		0u,																										// VkPipelineCreateFlags                            flags
500 		(deUint32)pipelineShaderStageParams.size(),																// deUint32                                         stageCount
501 		&pipelineShaderStageParams[0],																			// const VkPipelineShaderStageCreateInfo*           pStages
502 		vertexInputStateCreateInfo ? vertexInputStateCreateInfo : &vertexInputStateCreateInfoDefault,			// const VkPipelineVertexInputStateCreateInfo*      pVertexInputState
503 		&inputAssemblyStateCreateInfo,																			// const VkPipelineInputAssemblyStateCreateInfo*    pInputAssemblyState
504 		hasTessellation ? &tessStateCreateInfo : DE_NULL,														// const VkPipelineTessellationStateCreateInfo*     pTessellationState
505 		&viewportStateCreateInfo,																				// const VkPipelineViewportStateCreateInfo*         pViewportState
506 		rasterizationStateCreateInfo ? rasterizationStateCreateInfo : &rasterizationStateCreateInfoDefault,		// const VkPipelineRasterizationStateCreateInfo*    pRasterizationState
507 		multisampleStateCreateInfo ? multisampleStateCreateInfo: &multisampleStateCreateInfoDefault,			// const VkPipelineMultisampleStateCreateInfo*      pMultisampleState
508 		depthStencilStateCreateInfo ? depthStencilStateCreateInfo : &depthStencilStateCreateInfoDefault,		// const VkPipelineDepthStencilStateCreateInfo*     pDepthStencilState
509 		colorBlendStateCreateInfo ? colorBlendStateCreateInfo : &colorBlendStateCreateInfoDefault,				// const VkPipelineColorBlendStateCreateInfo*       pColorBlendState
510 		dynamicStateCreateInfo ? dynamicStateCreateInfo : dynamicStateCreateInfoDefaultPtr,						// const VkPipelineDynamicStateCreateInfo*          pDynamicState
511 		pipelineLayout,																							// VkPipelineLayout                                 layout
512 		renderPass,																								// VkRenderPass                                     renderPass
513 		subpass,																								// deUint32                                         subpass
514 		DE_NULL,																								// VkPipeline                                       basePipelineHandle
515 		0																										// deInt32                                          basePipelineIndex;
516 	};
517 
518 	return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
519 }
520 
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const deUint32 vertexShaderStageCreateFlags=0u,const deUint32 tessellationControlShaderStageCreateFlags=0u,const deUint32 tessellationEvalShaderStageCreateFlags=0u,const deUint32 geometryShaderStageCreateFlags=0u,const deUint32 fragmentShaderStageCreateFlags=0u,const deUint32 requiredSubgroupSize[5]=DE_NULL)521 Move<VkPipeline> makeGraphicsPipeline (Context&									context,
522 									   const VkPipelineLayout					pipelineLayout,
523 									   const VkShaderStageFlags					stages,
524 									   const VkShaderModule						vertexShaderModule,
525 									   const VkShaderModule						fragmentShaderModule,
526 									   const VkShaderModule						geometryShaderModule,
527 									   const VkShaderModule						tessellationControlModule,
528 									   const VkShaderModule						tessellationEvaluationModule,
529 									   const VkRenderPass						renderPass,
530 									   const VkPrimitiveTopology				topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
531 									   const VkVertexInputBindingDescription*	vertexInputBindingDescription = DE_NULL,
532 									   const VkVertexInputAttributeDescription*	vertexInputAttributeDescriptions = DE_NULL,
533 									   const bool								frameBufferTests = false,
534 									   const vk::VkFormat						attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
535 									   const deUint32							vertexShaderStageCreateFlags = 0u,
536 									   const deUint32							tessellationControlShaderStageCreateFlags = 0u,
537 									   const deUint32							tessellationEvalShaderStageCreateFlags = 0u,
538 									   const deUint32							geometryShaderStageCreateFlags = 0u,
539 									   const deUint32							fragmentShaderStageCreateFlags = 0u,
540 									   const deUint32							requiredSubgroupSize[5] = DE_NULL)
541 {
542 	const std::vector<VkViewport>				noViewports;
543 	const std::vector<VkRect2D>					noScissors;
544 	const VkPipelineVertexInputStateCreateInfo	vertexInputStateCreateInfo	=
545 	{
546 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType								sType;
547 		DE_NULL,													// const void*									pNext;
548 		0u,															// VkPipelineVertexInputStateCreateFlags		flags;
549 		vertexInputBindingDescription == DE_NULL ? 0u : 1u,			// deUint32										vertexBindingDescriptionCount;
550 		vertexInputBindingDescription,								// const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
551 		vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u,		// deUint32										vertexAttributeDescriptionCount;
552 		vertexInputAttributeDescriptions,							// const VkVertexInputAttributeDescription*		pVertexAttributeDescriptions;
553 	};
554 	const deUint32								numChannels					= getNumUsedChannels(mapVkFormat(attachmentFormat).order);
555 	const VkColorComponentFlags					colorComponent				= numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
556 																			  numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
557 																			  numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
558 																			  VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
559 	const VkPipelineColorBlendAttachmentState	colorBlendAttachmentState	=
560 	{
561 		VK_FALSE,				//  VkBool32				blendEnable;
562 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			srcColorBlendFactor;
563 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			dstColorBlendFactor;
564 		VK_BLEND_OP_ADD,		//  VkBlendOp				colorBlendOp;
565 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			srcAlphaBlendFactor;
566 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			dstAlphaBlendFactor;
567 		VK_BLEND_OP_ADD,		//  VkBlendOp				alphaBlendOp;
568 		colorComponent			//  VkColorComponentFlags	colorWriteMask;
569 	};
570 	const VkPipelineColorBlendStateCreateInfo	colorBlendStateCreateInfo	=
571 	{
572 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	//  VkStructureType								sType;
573 		DE_NULL,													//  const void*									pNext;
574 		0u,															//  VkPipelineColorBlendStateCreateFlags		flags;
575 		VK_FALSE,													//  VkBool32									logicOpEnable;
576 		VK_LOGIC_OP_CLEAR,											//  VkLogicOp									logicOp;
577 		1,															//  deUint32									attachmentCount;
578 		&colorBlendAttachmentState,									//  const VkPipelineColorBlendAttachmentState*	pAttachments;
579 		{ 0.0f, 0.0f, 0.0f, 0.0f }									//  float										blendConstants[4];
580 	};
581 	const deUint32								patchControlPoints			= (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
582 
583 	return makeGraphicsPipeline(context.getDeviceInterface(),	// const DeviceInterface&                        vk
584 								context.getDevice(),			// const VkDevice                                device
585 								pipelineLayout,					// const VkPipelineLayout                        pipelineLayout
586 								vertexShaderModule,				// const VkShaderModule                          vertexShaderModule
587 								tessellationControlModule,		// const VkShaderModule                          tessellationControlShaderModule
588 								tessellationEvaluationModule,	// const VkShaderModule                          tessellationEvalShaderModule
589 								geometryShaderModule,			// const VkShaderModule                          geometryShaderModule
590 								fragmentShaderModule,			// const VkShaderModule                          fragmentShaderModule
591 								renderPass,						// const VkRenderPass                            renderPass
592 								noViewports,					// const std::vector<VkViewport>&                viewports
593 								noScissors,						// const std::vector<VkRect2D>&                  scissors
594 								topology,						// const VkPrimitiveTopology                     topology
595 								0u,								// const deUint32                                subpass
596 								patchControlPoints,				// const deUint32                                patchControlPoints
597 								&vertexInputStateCreateInfo,	// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
598 								DE_NULL,						// const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
599 								DE_NULL,						// const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
600 								DE_NULL,						// const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
601 								&colorBlendStateCreateInfo,		// const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
602 								DE_NULL,						// const VkPipelineDynamicStateCreateInfo*
603 								vertexShaderStageCreateFlags,	// const deUint32								 vertexShaderStageCreateFlags,
604 								tessellationControlShaderStageCreateFlags,	// const deUint32					 tessellationControlShaderStageCreateFlags
605 								tessellationEvalShaderStageCreateFlags,		// const deUint32					 tessellationEvalShaderStageCreateFlags
606 								geometryShaderStageCreateFlags,	// const deUint32								 geometryShaderStageCreateFlags
607 								fragmentShaderStageCreateFlags,	// const deUint32								 fragmentShaderStageCreateFlags
608 								requiredSubgroupSize);			// const deUint32								 requiredSubgroupSize[5]
609 }
610 
makeCommandBuffer(Context & context,const VkCommandPool commandPool)611 Move<VkCommandBuffer> makeCommandBuffer (Context& context, const VkCommandPool commandPool)
612 {
613 	const VkCommandBufferAllocateInfo bufferAllocateParams =
614 	{
615 		VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,	// VkStructureType		sType;
616 		DE_NULL,										// const void*			pNext;
617 		commandPool,									// VkCommandPool		commandPool;
618 		VK_COMMAND_BUFFER_LEVEL_PRIMARY,				// VkCommandBufferLevel	level;
619 		1u,												// deUint32				bufferCount;
620 	};
621 	return allocateCommandBuffer(context.getDeviceInterface(),
622 								 context.getDevice(), &bufferAllocateParams);
623 }
624 
625 struct Buffer;
626 struct Image;
627 
628 struct BufferOrImage
629 {
isImage__anonfdc00a3a0111::BufferOrImage630 	bool isImage() const
631 	{
632 		return m_isImage;
633 	}
634 
getAsBuffer__anonfdc00a3a0111::BufferOrImage635 	Buffer* getAsBuffer()
636 	{
637 		if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
638 		return reinterpret_cast<Buffer* >(this);
639 	}
640 
getAsImage__anonfdc00a3a0111::BufferOrImage641 	Image* getAsImage()
642 	{
643 		if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
644 		return reinterpret_cast<Image*>(this);
645 	}
646 
getType__anonfdc00a3a0111::BufferOrImage647 	virtual VkDescriptorType getType() const
648 	{
649 		if (m_isImage)
650 		{
651 			return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
652 		}
653 		else
654 		{
655 			return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
656 		}
657 	}
658 
getAllocation__anonfdc00a3a0111::BufferOrImage659 	Allocation& getAllocation() const
660 	{
661 		return *m_allocation;
662 	}
663 
~BufferOrImage__anonfdc00a3a0111::BufferOrImage664 	virtual ~BufferOrImage() {}
665 
666 protected:
BufferOrImage__anonfdc00a3a0111::BufferOrImage667 	explicit BufferOrImage(bool image) : m_isImage(image) {}
668 
669 	bool m_isImage;
670 	de::details::MovePtr<Allocation> m_allocation;
671 };
672 
673 struct Buffer : public BufferOrImage
674 {
Buffer__anonfdc00a3a0111::Buffer675 	explicit Buffer (Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
676 		: BufferOrImage		(false)
677 		, m_sizeInBytes		(sizeInBytes)
678 		, m_usage			(usage)
679 	{
680 		const DeviceInterface&			vkd					= context.getDeviceInterface();
681 		const VkDevice					device				= context.getDevice();
682 
683 		const vk::VkBufferCreateInfo	bufferCreateInfo	=
684 		{
685 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
686 			DE_NULL,
687 			0u,
688 			m_sizeInBytes,
689 			m_usage,
690 			VK_SHARING_MODE_EXCLUSIVE,
691 			0u,
692 			DE_NULL,
693 		};
694 		m_buffer		= createBuffer(vkd, device, &bufferCreateInfo);
695 
696 		VkMemoryRequirements			req					= getBufferMemoryRequirements(vkd, device, *m_buffer);
697 
698 		m_allocation	= context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
699 		VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
700 	}
701 
getType__anonfdc00a3a0111::Buffer702 	virtual VkDescriptorType getType() const
703 	{
704 		if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
705 		{
706 			return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
707 		}
708 		return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
709 	}
710 
getBuffer__anonfdc00a3a0111::Buffer711 	VkBuffer getBuffer () const
712 	{
713 		return *m_buffer;
714 	}
715 
getBufferPtr__anonfdc00a3a0111::Buffer716 	const VkBuffer* getBufferPtr () const
717 	{
718 		return &(*m_buffer);
719 	}
720 
getSize__anonfdc00a3a0111::Buffer721 	VkDeviceSize getSize () const
722 	{
723 		return m_sizeInBytes;
724 	}
725 
726 private:
727 	Move<VkBuffer>				m_buffer;
728 	VkDeviceSize				m_sizeInBytes;
729 	const VkBufferUsageFlags	m_usage;
730 };
731 
732 struct Image : public BufferOrImage
733 {
Image__anonfdc00a3a0111::Image734 	explicit Image (Context& context, deUint32 width, deUint32 height, VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
735 		: BufferOrImage(true)
736 	{
737 		const DeviceInterface&			vk					= context.getDeviceInterface();
738 		const VkDevice					device				= context.getDevice();
739 		const deUint32					queueFamilyIndex	= context.getUniversalQueueFamilyIndex();
740 
741 		const VkImageCreateInfo			imageCreateInfo		=
742 		{
743 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//  VkStructureType			sType;
744 			DE_NULL,								//  const void*				pNext;
745 			0,										//  VkImageCreateFlags		flags;
746 			VK_IMAGE_TYPE_2D,						//  VkImageType				imageType;
747 			format,									//  VkFormat				format;
748 			{width, height, 1},						//  VkExtent3D				extent;
749 			1,										//  deUint32				mipLevels;
750 			1,										//  deUint32				arrayLayers;
751 			VK_SAMPLE_COUNT_1_BIT,					//  VkSampleCountFlagBits	samples;
752 			VK_IMAGE_TILING_OPTIMAL,				//  VkImageTiling			tiling;
753 			usage,									//  VkImageUsageFlags		usage;
754 			VK_SHARING_MODE_EXCLUSIVE,				//  VkSharingMode			sharingMode;
755 			0u,										//  deUint32				queueFamilyIndexCount;
756 			DE_NULL,								//  const deUint32*			pQueueFamilyIndices;
757 			VK_IMAGE_LAYOUT_UNDEFINED				//  VkImageLayout			initialLayout;
758 		};
759 
760 		const VkComponentMapping		componentMapping	=
761 		{
762 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
763 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
764 		};
765 
766 		const VkImageSubresourceRange	subresourceRange	=
767 		{
768 			VK_IMAGE_ASPECT_COLOR_BIT,	//VkImageAspectFlags	aspectMask
769 			0u,							//deUint32				baseMipLevel
770 			1u,							//deUint32				levelCount
771 			0u,							//deUint32				baseArrayLayer
772 			1u							//deUint32				layerCount
773 		};
774 
775 		const VkSamplerCreateInfo		samplerCreateInfo	=
776 		{
777 			VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,		//  VkStructureType			sType;
778 			DE_NULL,									//  const void*				pNext;
779 			0u,											//  VkSamplerCreateFlags	flags;
780 			VK_FILTER_NEAREST,							//  VkFilter				magFilter;
781 			VK_FILTER_NEAREST,							//  VkFilter				minFilter;
782 			VK_SAMPLER_MIPMAP_MODE_NEAREST,				//  VkSamplerMipmapMode		mipmapMode;
783 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		//  VkSamplerAddressMode	addressModeU;
784 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		//  VkSamplerAddressMode	addressModeV;
785 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		//  VkSamplerAddressMode	addressModeW;
786 			0.0f,										//  float					mipLodBias;
787 			VK_FALSE,									//  VkBool32				anisotropyEnable;
788 			1.0f,										//  float					maxAnisotropy;
789 			DE_FALSE,									//  VkBool32				compareEnable;
790 			VK_COMPARE_OP_ALWAYS,						//  VkCompareOp				compareOp;
791 			0.0f,										//  float					minLod;
792 			0.0f,										//  float					maxLod;
793 			VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,	//  VkBorderColor			borderColor;
794 			VK_FALSE,									//  VkBool32				unnormalizedCoordinates;
795 		};
796 
797 		m_image			= createImage(vk, device, &imageCreateInfo);
798 
799 		VkMemoryRequirements			req					= getImageMemoryRequirements(vk, device, *m_image);
800 
801 		req.size		*= 2;
802 		m_allocation	= context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
803 
804 		VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
805 
806 		const VkImageViewCreateInfo		imageViewCreateInfo	=
807 		{
808 			VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,	//  VkStructureType			sType;
809 			DE_NULL,									//  const void*				pNext;
810 			0,											//  VkImageViewCreateFlags	flags;
811 			*m_image,									//  VkImage					image;
812 			VK_IMAGE_VIEW_TYPE_2D,						//  VkImageViewType			viewType;
813 			imageCreateInfo.format,						//  VkFormat				format;
814 			componentMapping,							//  VkComponentMapping		components;
815 			subresourceRange							//  VkImageSubresourceRange	subresourceRange;
816 		};
817 
818 		m_imageView		= createImageView(vk, device, &imageViewCreateInfo);
819 		m_sampler		= createSampler(vk, device, &samplerCreateInfo);
820 
821 		// Transition input image layouts
822 		{
823 			const Unique<VkCommandPool>		cmdPool			(makeCommandPool(vk, device, queueFamilyIndex));
824 			const Unique<VkCommandBuffer>	cmdBuffer		(makeCommandBuffer(context, *cmdPool));
825 
826 			beginCommandBuffer(vk, *cmdBuffer);
827 
828 			const VkImageMemoryBarrier		imageBarrier	= makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
829 																	VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
830 
831 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
832 				(VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
833 
834 			endCommandBuffer(vk, *cmdBuffer);
835 			submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
836 		}
837 	}
838 
getImage__anonfdc00a3a0111::Image839 	VkImage getImage () const
840 	{
841 		return *m_image;
842 	}
843 
getImageView__anonfdc00a3a0111::Image844 	VkImageView getImageView () const
845 	{
846 		return *m_imageView;
847 	}
848 
getSampler__anonfdc00a3a0111::Image849 	VkSampler getSampler () const
850 	{
851 		return *m_sampler;
852 	}
853 
854 private:
855 	Move<VkImage>		m_image;
856 	Move<VkImageView>	m_imageView;
857 	Move<VkSampler>		m_sampler;
858 };
859 }
860 
getStagesCount(const VkShaderStageFlags shaderStages)861 deUint32 vkt::subgroups::getStagesCount (const VkShaderStageFlags shaderStages)
862 {
863 	const deUint32	stageCount	= isAllRayTracingStages(shaderStages) ? 6
864 								: isAllGraphicsStages(shaderStages)   ? 4
865 								: isAllComputeStages(shaderStages)    ? 1
866 								: 0;
867 
868 	DE_ASSERT(stageCount != 0);
869 
870 	return stageCount;
871 }
872 
getSharedMemoryBallotHelper()873 std::string vkt::subgroups::getSharedMemoryBallotHelper ()
874 {
875 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
876 			"uvec4 sharedMemoryBallot(bool vote)\n"
877 			"{\n"
878 			"  uint groupOffset = gl_SubgroupID;\n"
879 			"  // One invocation in the group 0's the whole group's data\n"
880 			"  if (subgroupElect())\n"
881 			"  {\n"
882 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
883 			"  }\n"
884 			"  subgroupMemoryBarrierShared();\n"
885 			"  if (vote)\n"
886 			"  {\n"
887 			"    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
888 			"    const highp uint bitToSet = 1u << invocationId;\n"
889 			"    switch (gl_SubgroupInvocationID / 32)\n"
890 			"    {\n"
891 			"    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
892 			"    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
893 			"    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
894 			"    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
895 			"    }\n"
896 			"  }\n"
897 			"  subgroupMemoryBarrierShared();\n"
898 			"  return superSecretComputeShaderHelper[groupOffset];\n"
899 			"}\n";
900 }
901 
getSharedMemoryBallotHelperARB()902 std::string vkt::subgroups::getSharedMemoryBallotHelperARB ()
903 {
904 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
905 			"uint64_t sharedMemoryBallot(bool vote)\n"
906 			"{\n"
907 			"  uint groupOffset = gl_SubgroupID;\n"
908 			"  // One invocation in the group 0's the whole group's data\n"
909 			"  if (subgroupElect())\n"
910 			"  {\n"
911 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
912 			"  }\n"
913 			"  subgroupMemoryBarrierShared();\n"
914 			"  if (vote)\n"
915 			"  {\n"
916 			"    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
917 			"    const highp uint bitToSet = 1u << invocationId;\n"
918 			"    switch (gl_SubgroupInvocationID / 32)\n"
919 			"    {\n"
920 			"    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
921 			"    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
922 			"    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
923 			"    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
924 			"    }\n"
925 			"  }\n"
926 			"  subgroupMemoryBarrierShared();\n"
927 			"  return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
928 			"}\n";
929 }
930 
getSubgroupSize(Context & context)931 deUint32 vkt::subgroups::getSubgroupSize (Context& context)
932 {
933 	return context.getSubgroupProperties().subgroupSize;
934 }
935 
maxSupportedSubgroupSize()936 deUint32 vkt::subgroups::maxSupportedSubgroupSize ()
937 {
938 	return 128u;
939 }
940 
getShaderStageName(VkShaderStageFlags stage)941 std::string vkt::subgroups::getShaderStageName (VkShaderStageFlags stage)
942 {
943 	switch (stage)
944 	{
945 		case VK_SHADER_STAGE_COMPUTE_BIT:					return "compute";
946 		case VK_SHADER_STAGE_FRAGMENT_BIT:					return "fragment";
947 		case VK_SHADER_STAGE_VERTEX_BIT:					return "vertex";
948 		case VK_SHADER_STAGE_GEOMETRY_BIT:					return "geometry";
949 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:		return "tess_control";
950 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:	return "tess_eval";
951 		case VK_SHADER_STAGE_RAYGEN_BIT_KHR:				return "rgen";
952 		case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:				return "ahit";
953 		case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:			return "chit";
954 		case VK_SHADER_STAGE_MISS_BIT_KHR:					return "miss";
955 		case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:			return "sect";
956 		case VK_SHADER_STAGE_CALLABLE_BIT_KHR:				return "call";
957 		default:											TCU_THROW(InternalError, "Unhandled stage");
958 	}
959 }
960 
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)961 std::string vkt::subgroups::getSubgroupFeatureName (vk::VkSubgroupFeatureFlagBits bit)
962 {
963 	switch (bit)
964 	{
965 		case VK_SUBGROUP_FEATURE_BASIC_BIT:				return "VK_SUBGROUP_FEATURE_BASIC_BIT";
966 		case VK_SUBGROUP_FEATURE_VOTE_BIT:				return "VK_SUBGROUP_FEATURE_VOTE_BIT";
967 		case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:		return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
968 		case VK_SUBGROUP_FEATURE_BALLOT_BIT:			return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
969 		case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:			return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
970 		case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:	return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
971 		case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:			return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
972 		case VK_SUBGROUP_FEATURE_QUAD_BIT:				return "VK_SUBGROUP_FEATURE_QUAD_BIT";
973 		default:										TCU_THROW(InternalError, "Unknown subgroup feature category");
974 	}
975 }
976 
addNoSubgroupShader(SourceCollections & programCollection)977 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
978 {
979 	{
980 	/*
981 		"#version 450\n"
982 		"void main (void)\n"
983 		"{\n"
984 		"  float pixelSize = 2.0f/1024.0f;\n"
985 		"   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
986 		"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
987 		"  gl_PointSize = 1.0f;\n"
988 		"}\n"
989 	*/
990 		const std::string vertNoSubgroup =
991 			"; SPIR-V\n"
992 			"; Version: 1.3\n"
993 			"; Generator: Khronos Glslang Reference Front End; 1\n"
994 			"; Bound: 37\n"
995 			"; Schema: 0\n"
996 			"OpCapability Shader\n"
997 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
998 			"OpMemoryModel Logical GLSL450\n"
999 			"OpEntryPoint Vertex %4 \"main\" %22 %26\n"
1000 			"OpMemberDecorate %20 0 BuiltIn Position\n"
1001 			"OpMemberDecorate %20 1 BuiltIn PointSize\n"
1002 			"OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
1003 			"OpMemberDecorate %20 3 BuiltIn CullDistance\n"
1004 			"OpDecorate %20 Block\n"
1005 			"OpDecorate %26 BuiltIn VertexIndex\n"
1006 			"%2 = OpTypeVoid\n"
1007 			"%3 = OpTypeFunction %2\n"
1008 			"%6 = OpTypeFloat 32\n"
1009 			"%7 = OpTypePointer Function %6\n"
1010 			"%9 = OpConstant %6 0.00195313\n"
1011 			"%12 = OpConstant %6 2\n"
1012 			"%14 = OpConstant %6 1\n"
1013 			"%16 = OpTypeVector %6 4\n"
1014 			"%17 = OpTypeInt 32 0\n"
1015 			"%18 = OpConstant %17 1\n"
1016 			"%19 = OpTypeArray %6 %18\n"
1017 			"%20 = OpTypeStruct %16 %6 %19 %19\n"
1018 			"%21 = OpTypePointer Output %20\n"
1019 			"%22 = OpVariable %21 Output\n"
1020 			"%23 = OpTypeInt 32 1\n"
1021 			"%24 = OpConstant %23 0\n"
1022 			"%25 = OpTypePointer Input %23\n"
1023 			"%26 = OpVariable %25 Input\n"
1024 			"%33 = OpConstant %6 0\n"
1025 			"%35 = OpTypePointer Output %16\n"
1026 			"%37 = OpConstant %23 1\n"
1027 			"%38 = OpTypePointer Output %6\n"
1028 			"%4 = OpFunction %2 None %3\n"
1029 			"%5 = OpLabel\n"
1030 			"%8 = OpVariable %7 Function\n"
1031 			"%10 = OpVariable %7 Function\n"
1032 			"OpStore %8 %9\n"
1033 			"%11 = OpLoad %6 %8\n"
1034 			"%13 = OpFDiv %6 %11 %12\n"
1035 			"%15 = OpFSub %6 %13 %14\n"
1036 			"OpStore %10 %15\n"
1037 			"%27 = OpLoad %23 %26\n"
1038 			"%28 = OpConvertSToF %6 %27\n"
1039 			"%29 = OpLoad %6 %8\n"
1040 			"%30 = OpFMul %6 %28 %29\n"
1041 			"%31 = OpLoad %6 %10\n"
1042 			"%32 = OpFAdd %6 %30 %31\n"
1043 			"%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1044 			"%36 = OpAccessChain %35 %22 %24\n"
1045 			"OpStore %36 %34\n"
1046 			"%39 = OpAccessChain %38 %22 %37\n"
1047 			"OpStore %39 %14\n"
1048 			"OpReturn\n"
1049 			"OpFunctionEnd\n";
1050 		programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1051 	}
1052 
1053 	{
1054 	/*
1055 		"#version 450\n"
1056 		"layout(vertices=1) out;\n"
1057 		"\n"
1058 		"void main (void)\n"
1059 		"{\n"
1060 		"  if (gl_InvocationID == 0)\n"
1061 		"  {\n"
1062 		"    gl_TessLevelOuter[0] = 1.0f;\n"
1063 		"    gl_TessLevelOuter[1] = 1.0f;\n"
1064 		"  }\n"
1065 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1066 		"}\n"
1067 	*/
1068 		const std::string tescNoSubgroup =
1069 			"; SPIR-V\n"
1070 			"; Version: 1.3\n"
1071 			"; Generator: Khronos Glslang Reference Front End; 1\n"
1072 			"; Bound: 45\n"
1073 			"; Schema: 0\n"
1074 			"OpCapability Tessellation\n"
1075 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1076 			"OpMemoryModel Logical GLSL450\n"
1077 			"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1078 			"OpExecutionMode %4 OutputVertices 1\n"
1079 			"OpDecorate %8 BuiltIn InvocationId\n"
1080 			"OpDecorate %20 Patch\n"
1081 			"OpDecorate %20 BuiltIn TessLevelOuter\n"
1082 			"OpMemberDecorate %29 0 BuiltIn Position\n"
1083 			"OpMemberDecorate %29 1 BuiltIn PointSize\n"
1084 			"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1085 			"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1086 			"OpDecorate %29 Block\n"
1087 			"OpMemberDecorate %34 0 BuiltIn Position\n"
1088 			"OpMemberDecorate %34 1 BuiltIn PointSize\n"
1089 			"OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1090 			"OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1091 			"OpDecorate %34 Block\n"
1092 			"%2 = OpTypeVoid\n"
1093 			"%3 = OpTypeFunction %2\n"
1094 			"%6 = OpTypeInt 32 1\n"
1095 			"%7 = OpTypePointer Input %6\n"
1096 			"%8 = OpVariable %7 Input\n"
1097 			"%10 = OpConstant %6 0\n"
1098 			"%11 = OpTypeBool\n"
1099 			"%15 = OpTypeFloat 32\n"
1100 			"%16 = OpTypeInt 32 0\n"
1101 			"%17 = OpConstant %16 4\n"
1102 			"%18 = OpTypeArray %15 %17\n"
1103 			"%19 = OpTypePointer Output %18\n"
1104 			"%20 = OpVariable %19 Output\n"
1105 			"%21 = OpConstant %15 1\n"
1106 			"%22 = OpTypePointer Output %15\n"
1107 			"%24 = OpConstant %6 1\n"
1108 			"%26 = OpTypeVector %15 4\n"
1109 			"%27 = OpConstant %16 1\n"
1110 			"%28 = OpTypeArray %15 %27\n"
1111 			"%29 = OpTypeStruct %26 %15 %28 %28\n"
1112 			"%30 = OpTypeArray %29 %27\n"
1113 			"%31 = OpTypePointer Output %30\n"
1114 			"%32 = OpVariable %31 Output\n"
1115 			"%34 = OpTypeStruct %26 %15 %28 %28\n"
1116 			"%35 = OpConstant %16 32\n"
1117 			"%36 = OpTypeArray %34 %35\n"
1118 			"%37 = OpTypePointer Input %36\n"
1119 			"%38 = OpVariable %37 Input\n"
1120 			"%40 = OpTypePointer Input %26\n"
1121 			"%43 = OpTypePointer Output %26\n"
1122 			"%4 = OpFunction %2 None %3\n"
1123 			"%5 = OpLabel\n"
1124 			"%9 = OpLoad %6 %8\n"
1125 			"%12 = OpIEqual %11 %9 %10\n"
1126 			"OpSelectionMerge %14 None\n"
1127 			"OpBranchConditional %12 %13 %14\n"
1128 			"%13 = OpLabel\n"
1129 			"%23 = OpAccessChain %22 %20 %10\n"
1130 			"OpStore %23 %21\n"
1131 			"%25 = OpAccessChain %22 %20 %24\n"
1132 			"OpStore %25 %21\n"
1133 			"OpBranch %14\n"
1134 			"%14 = OpLabel\n"
1135 			"%33 = OpLoad %6 %8\n"
1136 			"%39 = OpLoad %6 %8\n"
1137 			"%41 = OpAccessChain %40 %38 %39 %10\n"
1138 			"%42 = OpLoad %26 %41\n"
1139 			"%44 = OpAccessChain %43 %32 %33 %10\n"
1140 			"OpStore %44 %42\n"
1141 			"OpReturn\n"
1142 			"OpFunctionEnd\n";
1143 		programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1144 	}
1145 
1146 	{
1147 	/*
1148 		"#version 450\n"
1149 		"layout(isolines) in;\n"
1150 		"\n"
1151 		"void main (void)\n"
1152 		"{\n"
1153 		"  float pixelSize = 2.0f/1024.0f;\n"
1154 		"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1155 		"}\n";
1156 	*/
1157 		const std::string teseNoSubgroup =
1158 			"; SPIR-V\n"
1159 			"; Version: 1.3\n"
1160 			"; Generator: Khronos Glslang Reference Front End; 2\n"
1161 			"; Bound: 42\n"
1162 			"; Schema: 0\n"
1163 			"OpCapability Tessellation\n"
1164 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1165 			"OpMemoryModel Logical GLSL450\n"
1166 			"OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1167 			"OpExecutionMode %4 Isolines\n"
1168 			"OpExecutionMode %4 SpacingEqual\n"
1169 			"OpExecutionMode %4 VertexOrderCcw\n"
1170 			"OpMemberDecorate %14 0 BuiltIn Position\n"
1171 			"OpMemberDecorate %14 1 BuiltIn PointSize\n"
1172 			"OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1173 			"OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1174 			"OpDecorate %14 Block\n"
1175 			"OpMemberDecorate %19 0 BuiltIn Position\n"
1176 			"OpMemberDecorate %19 1 BuiltIn PointSize\n"
1177 			"OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1178 			"OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1179 			"OpDecorate %19 Block\n"
1180 			"OpDecorate %29 BuiltIn TessCoord\n"
1181 			"%2 = OpTypeVoid\n"
1182 			"%3 = OpTypeFunction %2\n"
1183 			"%6 = OpTypeFloat 32\n"
1184 			"%7 = OpTypePointer Function %6\n"
1185 			"%9 = OpConstant %6 0.00195313\n"
1186 			"%10 = OpTypeVector %6 4\n"
1187 			"%11 = OpTypeInt 32 0\n"
1188 			"%12 = OpConstant %11 1\n"
1189 			"%13 = OpTypeArray %6 %12\n"
1190 			"%14 = OpTypeStruct %10 %6 %13 %13\n"
1191 			"%15 = OpTypePointer Output %14\n"
1192 			"%16 = OpVariable %15 Output\n"
1193 			"%17 = OpTypeInt 32 1\n"
1194 			"%18 = OpConstant %17 0\n"
1195 			"%19 = OpTypeStruct %10 %6 %13 %13\n"
1196 			"%20 = OpConstant %11 32\n"
1197 			"%21 = OpTypeArray %19 %20\n"
1198 			"%22 = OpTypePointer Input %21\n"
1199 			"%23 = OpVariable %22 Input\n"
1200 			"%24 = OpTypePointer Input %10\n"
1201 			"%27 = OpTypeVector %6 3\n"
1202 			"%28 = OpTypePointer Input %27\n"
1203 			"%29 = OpVariable %28 Input\n"
1204 			"%30 = OpConstant %11 0\n"
1205 			"%31 = OpTypePointer Input %6\n"
1206 			"%36 = OpConstant %6 2\n"
1207 			"%40 = OpTypePointer Output %10\n"
1208 			"%4 = OpFunction %2 None %3\n"
1209 			"%5 = OpLabel\n"
1210 			"%8 = OpVariable %7 Function\n"
1211 			"OpStore %8 %9\n"
1212 			"%25 = OpAccessChain %24 %23 %18 %18\n"
1213 			"%26 = OpLoad %10 %25\n"
1214 			"%32 = OpAccessChain %31 %29 %30\n"
1215 			"%33 = OpLoad %6 %32\n"
1216 			"%34 = OpLoad %6 %8\n"
1217 			"%35 = OpFMul %6 %33 %34\n"
1218 			"%37 = OpFDiv %6 %35 %36\n"
1219 			"%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1220 			"%39 = OpFAdd %10 %26 %38\n"
1221 			"%41 = OpAccessChain %40 %16 %18\n"
1222 			"OpStore %41 %39\n"
1223 			"OpReturn\n"
1224 			"OpFunctionEnd\n";
1225 		programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1226 	}
1227 
1228 }
1229 
getFramebufferBufferDeclarations(const VkFormat & format,const std::vector<std::string> & declarations,const deUint32 stage)1230 static std::string getFramebufferBufferDeclarations (const VkFormat&					format,
1231 													 const std::vector<std::string>&	declarations,
1232 													 const deUint32						stage)
1233 {
1234 	if (declarations.empty())
1235 	{
1236 		const std::string	name	= (stage == 0) ? "result" : "out_color";
1237 		const std::string	suffix	= (stage == 2) ? "[]" : "";
1238 		const std::string	result	=
1239 			"layout(location = 0) out float " + name + suffix + ";\n"
1240 			"layout(set = 0, binding = 0) uniform Buffer1\n"
1241 			"{\n"
1242 			"  " + de::toString(subgroups::getFormatNameForGLSL(format)) + " data[" + de::toString(subgroups::maxSupportedSubgroupSize()) + "];\n"
1243 			"};\n";
1244 
1245 		return result;
1246 	}
1247 	else
1248 	{
1249 		return declarations[stage];
1250 	}
1251 }
1252 
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations)1253 void vkt::subgroups::initStdFrameBufferPrograms (SourceCollections&					programCollection,
1254 												 const vk::ShaderBuildOptions&		buildOptions,
1255 												 VkShaderStageFlags					shaderStage,
1256 												 VkFormat							format,
1257 												 bool								gsPointSize,
1258 												 const std::string&					extHeader,
1259 												 const std::string&					testSrc,
1260 												 const std::string&					helperStr,
1261 												 const std::vector<std::string>&	declarations)
1262 {
1263 	subgroups::setFragmentShaderFrameBuffer(programCollection);
1264 
1265 	if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1266 		subgroups::setVertexShaderFrameBuffer(programCollection);
1267 
1268 	if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1269 	{
1270 		std::ostringstream vertex;
1271 
1272 		vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1273 			<< extHeader
1274 			<< "layout(location = 0) in highp vec4 in_position;\n"
1275 			<< getFramebufferBufferDeclarations(format, declarations, 0)
1276 			<< "\n"
1277 			<< helperStr
1278 			<< "void main (void)\n"
1279 			<< "{\n"
1280 			<< "  uint tempRes;\n"
1281 			<< testSrc
1282 			<< "  result = float(tempRes);\n"
1283 			<< "  gl_Position = in_position;\n"
1284 			<< "  gl_PointSize = 1.0f;\n"
1285 			<< "}\n";
1286 
1287 		programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1288 	}
1289 	else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1290 	{
1291 		std::ostringstream geometry;
1292 
1293 		geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1294 			<< extHeader
1295 			<< "layout(points) in;\n"
1296 			<< "layout(points, max_vertices = 1) out;\n"
1297 			<< getFramebufferBufferDeclarations(format, declarations, 1)
1298 			<< "\n"
1299 			<< helperStr
1300 			<< "void main (void)\n"
1301 			<< "{\n"
1302 			<< "  uint tempRes;\n"
1303 			<< testSrc
1304 			<< "  out_color = float(tempRes);\n"
1305 			<< "  gl_Position = gl_in[0].gl_Position;\n"
1306 			<< (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1307 			<< "  EmitVertex();\n"
1308 			<< "  EndPrimitive();\n"
1309 			<< "}\n";
1310 
1311 		programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1312 	}
1313 	else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1314 	{
1315 		std::ostringstream controlSource;
1316 
1317 		controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1318 			<< extHeader
1319 			<< "layout(vertices = 2) out;\n"
1320 			<< getFramebufferBufferDeclarations(format, declarations, 2)
1321 			<< "\n"
1322 			<< helperStr
1323 			<< "void main (void)\n"
1324 			<< "{\n"
1325 			<< "  if (gl_InvocationID == 0)\n"
1326 			<< "  {\n"
1327 			<< "    gl_TessLevelOuter[0] = 1.0f;\n"
1328 			<< "    gl_TessLevelOuter[1] = 1.0f;\n"
1329 			<< "  }\n"
1330 			<< "  uint tempRes;\n"
1331 			<< testSrc
1332 			<< "  out_color[gl_InvocationID] = float(tempRes);\n"
1333 			<< "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1334 			<< (gsPointSize ? "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "")
1335 			<< "}\n";
1336 
1337 		programCollection.glslSources.add("tesc") << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1338 		subgroups::setTesEvalShaderFrameBuffer(programCollection);
1339 	}
1340 	else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1341 	{
1342 		ostringstream evaluationSource;
1343 
1344 		evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1345 			<< extHeader
1346 			<< "layout(isolines, equal_spacing, ccw ) in;\n"
1347 			<< getFramebufferBufferDeclarations(format, declarations, 3)
1348 			<< "\n"
1349 			<< helperStr
1350 			<< "void main (void)\n"
1351 			<< "{\n"
1352 			<< "  uint tempRes;\n"
1353 			<< testSrc
1354 			<< "  out_color = float(tempRes);\n"
1355 			<< "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1356 			<< (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1357 			<< "}\n";
1358 
1359 		subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1360 		programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1361 	}
1362 	else
1363 	{
1364 		DE_FATAL("Unsupported shader stage");
1365 	}
1366 }
1367 
getBufferDeclarations(vk::VkShaderStageFlags shaderStage,const std::string & formatName,const std::vector<std::string> & declarations,const deUint32 stage)1368 static std::string getBufferDeclarations (vk::VkShaderStageFlags			shaderStage,
1369 										  const std::string&				formatName,
1370 										  const std::vector<std::string>&	declarations,
1371 										  const deUint32					stage)
1372 {
1373 	if (declarations.empty())
1374 	{
1375 		const deUint32	stageCount	= vkt::subgroups::getStagesCount(shaderStage);
1376 		const deUint32	binding0	= stage;
1377 		const deUint32	binding1	= stageCount;
1378 		const bool		fragment	= (shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) && (stage == stageCount);
1379 		const string	buffer1		= fragment
1380 									? "layout(location = 0) out uint result;\n"
1381 									: "layout(set = 0, binding = " + de::toString(binding0) + ", std430) buffer Buffer1\n"
1382 									  "{\n"
1383 									  "  uint result[];\n"
1384 									  "};\n";
1385 		//todo boza I suppose it can be "layout(set = 0, binding = " + de::toString(binding1) + ", std430) readonly buffer Buffer2\n"
1386 		const string	buffer2		= "layout(set = 0, binding = " + de::toString(binding1) + ", std430)" + (stageCount == 1 ? "" : " readonly") + " buffer Buffer" + (fragment ? "1" : "2") + "\n"
1387 									  "{\n"
1388 									  "  " + formatName + " data[];\n"
1389 									  "};\n";
1390 
1391 		return buffer1 + buffer2;
1392 	}
1393 	else
1394 	{
1395 		return declarations[stage];
1396 	}
1397 }
1398 
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations,const bool avoidHelperInvocations,const std::string & tempRes)1399 void vkt::subgroups::initStdPrograms (vk::SourceCollections&			programCollection,
1400 									  const vk::ShaderBuildOptions&		buildOptions,
1401 									  vk::VkShaderStageFlags			shaderStage,
1402 									  vk::VkFormat						format,
1403 									  bool								gsPointSize,
1404 									  const std::string&				extHeader,
1405 									  const std::string&				testSrc,
1406 									  const std::string&				helperStr,
1407 									  const std::vector<std::string>&	declarations,
1408 									  const bool						avoidHelperInvocations,
1409 									  const std::string&				tempRes)
1410 {
1411 	const std::string	formatName	= subgroups::getFormatNameForGLSL(format);
1412 
1413 	if (isAllComputeStages(shaderStage))
1414 	{
1415 		std::ostringstream	src;
1416 
1417 		src << "#version 450\n"
1418 			<< extHeader
1419 			<< "layout (local_size_x_id = 0, local_size_y_id = 1, "
1420 			"local_size_z_id = 2) in;\n"
1421 			<< getBufferDeclarations(shaderStage, formatName, declarations, 0)
1422 			<< "\n"
1423 			<< helperStr
1424 			<< "void main (void)\n"
1425 			<< "{\n"
1426 			<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1427 			<< "  highp uint offset = globalSize.x * ((globalSize.y * "
1428 			"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1429 			"gl_GlobalInvocationID.x;\n"
1430 			<< tempRes
1431 			<< testSrc
1432 			<< "  result[offset] = tempRes;\n"
1433 			<< "}\n";
1434 
1435 		programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1436 	}
1437 	else if (isAllGraphicsStages(shaderStage))
1438 	{
1439 		const string vertex =
1440 			"#version 450\n"
1441 			+ extHeader
1442 			+ getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1443 			"\n"
1444 			+ helperStr +
1445 			"void main (void)\n"
1446 			"{\n"
1447 			"  uint tempRes;\n"
1448 			+ testSrc +
1449 			"  result[gl_VertexIndex] = tempRes;\n"
1450 			"  float pixelSize = 2.0f/1024.0f;\n"
1451 			"  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1452 			"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1453 			"  gl_PointSize = 1.0f;\n"
1454 			"}\n";
1455 
1456 		const string tesc =
1457 			"#version 450\n"
1458 			+ extHeader +
1459 			"layout(vertices=1) out;\n"
1460 			+ getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1461 			"\n"
1462 			+ helperStr +
1463 			"void main (void)\n"
1464 			"{\n"
1465 			+ tempRes
1466 			+ testSrc +
1467 			"  result[gl_PrimitiveID] = tempRes;\n"
1468 			"  if (gl_InvocationID == 0)\n"
1469 			"  {\n"
1470 			"    gl_TessLevelOuter[0] = 1.0f;\n"
1471 			"    gl_TessLevelOuter[1] = 1.0f;\n"
1472 			"  }\n"
1473 			"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1474 			+ (gsPointSize ? "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1475 			"}\n";
1476 
1477 		const string tese =
1478 			"#version 450\n"
1479 			+ extHeader +
1480 			"layout(isolines) in;\n"
1481 			+ getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1482 			"\n"
1483 			+ helperStr +
1484 			"void main (void)\n"
1485 			"{\n"
1486 			+ tempRes
1487 			+ testSrc +
1488 			"  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1489 			"  float pixelSize = 2.0f/1024.0f;\n"
1490 			"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1491 			+ (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1492 			"}\n";
1493 
1494 		const string geometry =
1495 			"#version 450\n"
1496 			+ extHeader +
1497 			"layout(${TOPOLOGY}) in;\n"
1498 			"layout(points, max_vertices = 1) out;\n"
1499 			+ getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1500 			"\n"
1501 			+ helperStr +
1502 			"void main (void)\n"
1503 			"{\n"
1504 			+ tempRes
1505 			+ testSrc +
1506 			"  result[gl_PrimitiveIDIn] = tempRes;\n"
1507 			"  gl_Position = gl_in[0].gl_Position;\n"
1508 			+ (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1509 			"  EmitVertex();\n"
1510 			"  EndPrimitive();\n"
1511 			"}\n";
1512 
1513 		const string fragment =
1514 			"#version 450\n"
1515 			+ extHeader
1516 			+ getBufferDeclarations(shaderStage, formatName, declarations, 4)
1517 			+ helperStr +
1518 			"void main (void)\n"
1519 			"{\n"
1520 			+ (avoidHelperInvocations ? "  if (gl_HelperInvocation) return;\n" : "")
1521 			+ tempRes
1522 			+ testSrc +
1523 			"  result = tempRes;\n"
1524 			"}\n";
1525 
1526 		subgroups::addNoSubgroupShader(programCollection);
1527 
1528 		programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1529 		programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1530 		programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1531 		subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1532 		programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1533 	}
1534 	else if (isAllRayTracingStages(shaderStage))
1535 	{
1536 		const std::string	rgenShader	=
1537 			"#version 460 core\n"
1538 			"#extension GL_EXT_ray_tracing: require\n"
1539 			+ extHeader +
1540 			"layout(location = 0) rayPayloadEXT uvec4 payload;\n"
1541 			"layout(location = 0) callableDataEXT uvec4 callData;"
1542 			"layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
1543 			+ getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1544 			"\n"
1545 			+ helperStr +
1546 			"void main()\n"
1547 			"{\n"
1548 			+ tempRes
1549 			+ testSrc +
1550 			"  uint  rayFlags   = 0;\n"
1551 			"  uint  cullMask   = 0xFF;\n"
1552 			"  float tmin       = 0.0;\n"
1553 			"  float tmax       = 9.0;\n"
1554 			"  vec3  origin     = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
1555 			"  vec3  directHit  = vec3(0.0, 0.0, -1.0);\n"
1556 			"  vec3  directMiss = vec3(0.0, 0.0, +1.0);\n"
1557 			"\n"
1558 			"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
1559 			"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
1560 			"  executeCallableEXT(0, 0);"
1561 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1562 			"}\n";
1563 		const std::string	ahitShader	=
1564 			"#version 460 core\n"
1565 			"#extension GL_EXT_ray_tracing: require\n"
1566 			+ extHeader +
1567 			"hitAttributeEXT vec3 attribs;\n"
1568 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1569 			+ getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1570 			"\n"
1571 			+ helperStr +
1572 			"void main()\n"
1573 			"{\n"
1574 			+ tempRes
1575 			+ testSrc +
1576 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1577 			"}\n";
1578 		const std::string	chitShader	=
1579 			"#version 460 core\n"
1580 			"#extension GL_EXT_ray_tracing: require\n"
1581 			+ extHeader +
1582 			"hitAttributeEXT vec3 attribs;\n"
1583 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1584 			+ getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1585 			"\n"
1586 			+ helperStr +
1587 			"void main()\n"
1588 			"{\n"
1589 			+ tempRes
1590 			+ testSrc +
1591 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1592 			"}\n";
1593 		const std::string	missShader	=
1594 			"#version 460 core\n"
1595 			"#extension GL_EXT_ray_tracing: require\n"
1596 			+ extHeader +
1597 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1598 			+ getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1599 			"\n"
1600 			+ helperStr +
1601 			"void main()\n"
1602 			"{\n"
1603 			+ tempRes
1604 			+ testSrc +
1605 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1606 			"}\n";
1607 		const std::string	sectShader	=
1608 			"#version 460 core\n"
1609 			"#extension GL_EXT_ray_tracing: require\n"
1610 			+ extHeader +
1611 			"hitAttributeEXT vec3 hitAttribute;\n"
1612 			+ getBufferDeclarations(shaderStage, formatName, declarations, 4) +
1613 			"\n"
1614 			+ helperStr +
1615 			"void main()\n"
1616 			"{\n"
1617 			+ tempRes
1618 			+ testSrc +
1619 			"  reportIntersectionEXT(0.75f, gl_HitKindFrontFacingTriangleEXT);\n"
1620 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1621 			"}\n";
1622 		const std::string	callShader	=
1623 			"#version 460 core\n"
1624 			"#extension GL_EXT_ray_tracing: require\n"
1625 			+ extHeader +
1626 			"layout(location = 0) callableDataInEXT float callData;\n"
1627 			+ getBufferDeclarations(shaderStage, formatName, declarations, 5) +
1628 			"\n"
1629 			+ helperStr +
1630 			"void main()\n"
1631 			"{\n"
1632 			+ tempRes
1633 			+ testSrc +
1634 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1635 			"}\n";
1636 
1637 		programCollection.glslSources.add("rgen") << glu::RaygenSource		(rgenShader) << buildOptions;
1638 		programCollection.glslSources.add("ahit") << glu::AnyHitSource		(ahitShader) << buildOptions;
1639 		programCollection.glslSources.add("chit") << glu::ClosestHitSource	(chitShader) << buildOptions;
1640 		programCollection.glslSources.add("miss") << glu::MissSource		(missShader) << buildOptions;
1641 		programCollection.glslSources.add("sect") << glu::IntersectionSource(sectShader) << buildOptions;
1642 		programCollection.glslSources.add("call") << glu::CallableSource	(callShader) << buildOptions;
1643 
1644 		subgroups::addRayTracingNoSubgroupShader(programCollection);
1645 	}
1646 	else
1647 		TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1648 
1649 }
1650 
isSubgroupSupported(Context & context)1651 bool vkt::subgroups::isSubgroupSupported (Context& context)
1652 {
1653 	return context.contextSupports(vk::ApiVersion(1, 1, 0));
1654 }
1655 
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1656 bool vkt::subgroups::areSubgroupOperationsSupportedForStage (Context& context, const VkShaderStageFlags stage)
1657 {
1658 	return (stage & (context.getSubgroupProperties().supportedStages)) ? true : false;
1659 }
1660 
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1661 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice (Context& context, VkSubgroupFeatureFlagBits bit)
1662 {
1663 	return (bit & (context.getSubgroupProperties().supportedOperations)) ? true : false;
1664 }
1665 
isFragmentSSBOSupportedForDevice(Context & context)1666 bool vkt::subgroups::isFragmentSSBOSupportedForDevice (Context& context)
1667 {
1668 	return context.getDeviceFeatures().fragmentStoresAndAtomics ? true : false;
1669 }
1670 
isVertexSSBOSupportedForDevice(Context & context)1671 bool vkt::subgroups::isVertexSSBOSupportedForDevice (Context& context)
1672 {
1673 	return context.getDeviceFeatures().vertexPipelineStoresAndAtomics ? true : false;
1674 }
1675 
isInt64SupportedForDevice(Context & context)1676 bool vkt::subgroups::isInt64SupportedForDevice (Context& context)
1677 {
1678 	return context.getDeviceFeatures().shaderInt64 ? true : false;
1679 }
1680 
isTessellationAndGeometryPointSizeSupported(Context & context)1681 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1682 {
1683 	return context.getDeviceFeatures().shaderTessellationAndGeometryPointSize ? true : false;
1684 }
1685 
is16BitUBOStorageSupported(Context & context)1686 bool vkt::subgroups::is16BitUBOStorageSupported (Context& context)
1687 {
1688 	return context.get16BitStorageFeatures().uniformAndStorageBuffer16BitAccess ? true : false;
1689 }
1690 
is8BitUBOStorageSupported(Context & context)1691 bool vkt::subgroups::is8BitUBOStorageSupported (Context& context)
1692 {
1693 	return context.get8BitStorageFeatures().uniformAndStorageBuffer8BitAccess ? true : false;
1694 }
1695 
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1696 bool vkt::subgroups::isFormatSupportedForDevice (Context& context, vk::VkFormat format)
1697 {
1698 	const VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures&	subgroupExtendedTypesFeatures	= context.getShaderSubgroupExtendedTypesFeatures();
1699 	const VkPhysicalDeviceShaderFloat16Int8Features&			float16Int8Features				= context.getShaderFloat16Int8Features();
1700 	const VkPhysicalDevice16BitStorageFeatures&					storage16bit					= context.get16BitStorageFeatures();
1701 	const VkPhysicalDevice8BitStorageFeatures&					storage8bit						= context.get8BitStorageFeatures();
1702 	const VkPhysicalDeviceFeatures&								features						= context.getDeviceFeatures();
1703 	bool														shaderFloat64					= features.shaderFloat64 ? true : false;
1704 	bool														shaderInt16						= features.shaderInt16 ? true : false;
1705 	bool														shaderInt64						= features.shaderInt64 ? true : false;
1706 	bool														shaderSubgroupExtendedTypes		= false;
1707 	bool														shaderFloat16					= false;
1708 	bool														shaderInt8						= false;
1709 	bool														storageBuffer16BitAccess		= false;
1710 	bool														storageBuffer8BitAccess			= false;
1711 
1712 	if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1713 		context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1714 	{
1715 		shaderSubgroupExtendedTypes	= subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes ? true : false;
1716 		shaderFloat16				= float16Int8Features.shaderFloat16 ? true : false;
1717 		shaderInt8					= float16Int8Features.shaderInt8 ? true : false;
1718 
1719 		if ( context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage") )
1720 			storageBuffer16BitAccess = storage16bit.storageBuffer16BitAccess ? true : false;
1721 
1722 		if (context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage"))
1723 			storageBuffer8BitAccess = storage8bit.storageBuffer8BitAccess ? true : false;
1724 	}
1725 
1726 	switch (format)
1727 	{
1728 		default:
1729 			return true;
1730 		case VK_FORMAT_R16_SFLOAT:
1731 		case VK_FORMAT_R16G16_SFLOAT:
1732 		case VK_FORMAT_R16G16B16_SFLOAT:
1733 		case VK_FORMAT_R16G16B16A16_SFLOAT:
1734 			return shaderSubgroupExtendedTypes && shaderFloat16 && storageBuffer16BitAccess;
1735 		case VK_FORMAT_R64_SFLOAT:
1736 		case VK_FORMAT_R64G64_SFLOAT:
1737 		case VK_FORMAT_R64G64B64_SFLOAT:
1738 		case VK_FORMAT_R64G64B64A64_SFLOAT:
1739 			return shaderFloat64;
1740 		case VK_FORMAT_R8_SINT:
1741 		case VK_FORMAT_R8G8_SINT:
1742 		case VK_FORMAT_R8G8B8_SINT:
1743 		case VK_FORMAT_R8G8B8A8_SINT:
1744 		case VK_FORMAT_R8_UINT:
1745 		case VK_FORMAT_R8G8_UINT:
1746 		case VK_FORMAT_R8G8B8_UINT:
1747 		case VK_FORMAT_R8G8B8A8_UINT:
1748 			return shaderSubgroupExtendedTypes && shaderInt8 && storageBuffer8BitAccess;
1749 		case VK_FORMAT_R16_SINT:
1750 		case VK_FORMAT_R16G16_SINT:
1751 		case VK_FORMAT_R16G16B16_SINT:
1752 		case VK_FORMAT_R16G16B16A16_SINT:
1753 		case VK_FORMAT_R16_UINT:
1754 		case VK_FORMAT_R16G16_UINT:
1755 		case VK_FORMAT_R16G16B16_UINT:
1756 		case VK_FORMAT_R16G16B16A16_UINT:
1757 			return shaderSubgroupExtendedTypes && shaderInt16 && storageBuffer16BitAccess;
1758 		case VK_FORMAT_R64_SINT:
1759 		case VK_FORMAT_R64G64_SINT:
1760 		case VK_FORMAT_R64G64B64_SINT:
1761 		case VK_FORMAT_R64G64B64A64_SINT:
1762 		case VK_FORMAT_R64_UINT:
1763 		case VK_FORMAT_R64G64_UINT:
1764 		case VK_FORMAT_R64G64B64_UINT:
1765 		case VK_FORMAT_R64G64B64A64_UINT:
1766 			return shaderSubgroupExtendedTypes && shaderInt64;
1767 	}
1768 }
1769 
isSubgroupBroadcastDynamicIdSupported(Context & context)1770 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported (Context& context)
1771 {
1772 	return context.contextSupports(vk::ApiVersion(1, 2, 0)) && context.getDeviceVulkan12Features().subgroupBroadcastDynamicId;
1773 }
1774 
getFormatNameForGLSL(VkFormat format)1775 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1776 {
1777 	switch (format)
1778 	{
1779 		case VK_FORMAT_R8_SINT:				return "int8_t";
1780 		case VK_FORMAT_R8G8_SINT:			return "i8vec2";
1781 		case VK_FORMAT_R8G8B8_SINT:			return "i8vec3";
1782 		case VK_FORMAT_R8G8B8A8_SINT:		return "i8vec4";
1783 		case VK_FORMAT_R8_UINT:				return "uint8_t";
1784 		case VK_FORMAT_R8G8_UINT:			return "u8vec2";
1785 		case VK_FORMAT_R8G8B8_UINT:			return "u8vec3";
1786 		case VK_FORMAT_R8G8B8A8_UINT:		return "u8vec4";
1787 		case VK_FORMAT_R16_SINT:			return "int16_t";
1788 		case VK_FORMAT_R16G16_SINT:			return "i16vec2";
1789 		case VK_FORMAT_R16G16B16_SINT:		return "i16vec3";
1790 		case VK_FORMAT_R16G16B16A16_SINT:	return "i16vec4";
1791 		case VK_FORMAT_R16_UINT:			return "uint16_t";
1792 		case VK_FORMAT_R16G16_UINT:			return "u16vec2";
1793 		case VK_FORMAT_R16G16B16_UINT:		return "u16vec3";
1794 		case VK_FORMAT_R16G16B16A16_UINT:	return "u16vec4";
1795 		case VK_FORMAT_R32_SINT:			return "int";
1796 		case VK_FORMAT_R32G32_SINT:			return "ivec2";
1797 		case VK_FORMAT_R32G32B32_SINT:		return "ivec3";
1798 		case VK_FORMAT_R32G32B32A32_SINT:	return "ivec4";
1799 		case VK_FORMAT_R32_UINT:			return "uint";
1800 		case VK_FORMAT_R32G32_UINT:			return "uvec2";
1801 		case VK_FORMAT_R32G32B32_UINT:		return "uvec3";
1802 		case VK_FORMAT_R32G32B32A32_UINT:	return "uvec4";
1803 		case VK_FORMAT_R64_SINT:			return "int64_t";
1804 		case VK_FORMAT_R64G64_SINT:			return "i64vec2";
1805 		case VK_FORMAT_R64G64B64_SINT:		return "i64vec3";
1806 		case VK_FORMAT_R64G64B64A64_SINT:	return "i64vec4";
1807 		case VK_FORMAT_R64_UINT:			return "uint64_t";
1808 		case VK_FORMAT_R64G64_UINT:			return "u64vec2";
1809 		case VK_FORMAT_R64G64B64_UINT:		return "u64vec3";
1810 		case VK_FORMAT_R64G64B64A64_UINT:	return "u64vec4";
1811 		case VK_FORMAT_R16_SFLOAT:			return "float16_t";
1812 		case VK_FORMAT_R16G16_SFLOAT:		return "f16vec2";
1813 		case VK_FORMAT_R16G16B16_SFLOAT:	return "f16vec3";
1814 		case VK_FORMAT_R16G16B16A16_SFLOAT:	return "f16vec4";
1815 		case VK_FORMAT_R32_SFLOAT:			return "float";
1816 		case VK_FORMAT_R32G32_SFLOAT:		return "vec2";
1817 		case VK_FORMAT_R32G32B32_SFLOAT:	return "vec3";
1818 		case VK_FORMAT_R32G32B32A32_SFLOAT:	return "vec4";
1819 		case VK_FORMAT_R64_SFLOAT:			return "double";
1820 		case VK_FORMAT_R64G64_SFLOAT:		return "dvec2";
1821 		case VK_FORMAT_R64G64B64_SFLOAT:	return "dvec3";
1822 		case VK_FORMAT_R64G64B64A64_SFLOAT:	return "dvec4";
1823 		case VK_FORMAT_R8_USCALED:			return "bool";
1824 		case VK_FORMAT_R8G8_USCALED:		return "bvec2";
1825 		case VK_FORMAT_R8G8B8_USCALED:		return "bvec3";
1826 		case VK_FORMAT_R8G8B8A8_USCALED:	return "bvec4";
1827 		default:							TCU_THROW(InternalError, "Unhandled format");
1828 	}
1829 }
1830 
getAdditionalExtensionForFormat(vk::VkFormat format)1831 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1832 {
1833 	switch (format)
1834 	{
1835 		default:
1836 			return "";
1837 		case VK_FORMAT_R8_SINT:
1838 		case VK_FORMAT_R8G8_SINT:
1839 		case VK_FORMAT_R8G8B8_SINT:
1840 		case VK_FORMAT_R8G8B8A8_SINT:
1841 		case VK_FORMAT_R8_UINT:
1842 		case VK_FORMAT_R8G8_UINT:
1843 		case VK_FORMAT_R8G8B8_UINT:
1844 		case VK_FORMAT_R8G8B8A8_UINT:
1845 			return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1846 		case VK_FORMAT_R16_SINT:
1847 		case VK_FORMAT_R16G16_SINT:
1848 		case VK_FORMAT_R16G16B16_SINT:
1849 		case VK_FORMAT_R16G16B16A16_SINT:
1850 		case VK_FORMAT_R16_UINT:
1851 		case VK_FORMAT_R16G16_UINT:
1852 		case VK_FORMAT_R16G16B16_UINT:
1853 		case VK_FORMAT_R16G16B16A16_UINT:
1854 			return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1855 		case VK_FORMAT_R64_SINT:
1856 		case VK_FORMAT_R64G64_SINT:
1857 		case VK_FORMAT_R64G64B64_SINT:
1858 		case VK_FORMAT_R64G64B64A64_SINT:
1859 		case VK_FORMAT_R64_UINT:
1860 		case VK_FORMAT_R64G64_UINT:
1861 		case VK_FORMAT_R64G64B64_UINT:
1862 		case VK_FORMAT_R64G64B64A64_UINT:
1863 			return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1864 		case VK_FORMAT_R16_SFLOAT:
1865 		case VK_FORMAT_R16G16_SFLOAT:
1866 		case VK_FORMAT_R16G16B16_SFLOAT:
1867 		case VK_FORMAT_R16G16B16A16_SFLOAT:
1868 			return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1869 	}
1870 }
1871 
getAllFormats()1872 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats ()
1873 {
1874 	std::vector<VkFormat> formats;
1875 
1876 	formats.push_back(VK_FORMAT_R8_SINT);
1877 	formats.push_back(VK_FORMAT_R8G8_SINT);
1878 	formats.push_back(VK_FORMAT_R8G8B8_SINT);
1879 	formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1880 	formats.push_back(VK_FORMAT_R8_UINT);
1881 	formats.push_back(VK_FORMAT_R8G8_UINT);
1882 	formats.push_back(VK_FORMAT_R8G8B8_UINT);
1883 	formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1884 	formats.push_back(VK_FORMAT_R16_SINT);
1885 	formats.push_back(VK_FORMAT_R16G16_SINT);
1886 	formats.push_back(VK_FORMAT_R16G16B16_SINT);
1887 	formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1888 	formats.push_back(VK_FORMAT_R16_UINT);
1889 	formats.push_back(VK_FORMAT_R16G16_UINT);
1890 	formats.push_back(VK_FORMAT_R16G16B16_UINT);
1891 	formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
1892 	formats.push_back(VK_FORMAT_R32_SINT);
1893 	formats.push_back(VK_FORMAT_R32G32_SINT);
1894 	formats.push_back(VK_FORMAT_R32G32B32_SINT);
1895 	formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
1896 	formats.push_back(VK_FORMAT_R32_UINT);
1897 	formats.push_back(VK_FORMAT_R32G32_UINT);
1898 	formats.push_back(VK_FORMAT_R32G32B32_UINT);
1899 	formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
1900 	formats.push_back(VK_FORMAT_R64_SINT);
1901 	formats.push_back(VK_FORMAT_R64G64_SINT);
1902 	formats.push_back(VK_FORMAT_R64G64B64_SINT);
1903 	formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
1904 	formats.push_back(VK_FORMAT_R64_UINT);
1905 	formats.push_back(VK_FORMAT_R64G64_UINT);
1906 	formats.push_back(VK_FORMAT_R64G64B64_UINT);
1907 	formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
1908 	formats.push_back(VK_FORMAT_R16_SFLOAT);
1909 	formats.push_back(VK_FORMAT_R16G16_SFLOAT);
1910 	formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
1911 	formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
1912 	formats.push_back(VK_FORMAT_R32_SFLOAT);
1913 	formats.push_back(VK_FORMAT_R32G32_SFLOAT);
1914 	formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
1915 	formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
1916 	formats.push_back(VK_FORMAT_R64_SFLOAT);
1917 	formats.push_back(VK_FORMAT_R64G64_SFLOAT);
1918 	formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
1919 	formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
1920 	formats.push_back(VK_FORMAT_R8_USCALED);
1921 	formats.push_back(VK_FORMAT_R8G8_USCALED);
1922 	formats.push_back(VK_FORMAT_R8G8B8_USCALED);
1923 	formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
1924 
1925 	return formats;
1926 }
1927 
isFormatSigned(VkFormat format)1928 bool vkt::subgroups::isFormatSigned (VkFormat format)
1929 {
1930 	switch (format)
1931 	{
1932 		default:
1933 			return false;
1934 		case VK_FORMAT_R8_SINT:
1935 		case VK_FORMAT_R8G8_SINT:
1936 		case VK_FORMAT_R8G8B8_SINT:
1937 		case VK_FORMAT_R8G8B8A8_SINT:
1938 		case VK_FORMAT_R16_SINT:
1939 		case VK_FORMAT_R16G16_SINT:
1940 		case VK_FORMAT_R16G16B16_SINT:
1941 		case VK_FORMAT_R16G16B16A16_SINT:
1942 		case VK_FORMAT_R32_SINT:
1943 		case VK_FORMAT_R32G32_SINT:
1944 		case VK_FORMAT_R32G32B32_SINT:
1945 		case VK_FORMAT_R32G32B32A32_SINT:
1946 		case VK_FORMAT_R64_SINT:
1947 		case VK_FORMAT_R64G64_SINT:
1948 		case VK_FORMAT_R64G64B64_SINT:
1949 		case VK_FORMAT_R64G64B64A64_SINT:
1950 			return true;
1951 	}
1952 }
1953 
isFormatUnsigned(VkFormat format)1954 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
1955 {
1956 	switch (format)
1957 	{
1958 		default:
1959 			return false;
1960 		case VK_FORMAT_R8_UINT:
1961 		case VK_FORMAT_R8G8_UINT:
1962 		case VK_FORMAT_R8G8B8_UINT:
1963 		case VK_FORMAT_R8G8B8A8_UINT:
1964 		case VK_FORMAT_R16_UINT:
1965 		case VK_FORMAT_R16G16_UINT:
1966 		case VK_FORMAT_R16G16B16_UINT:
1967 		case VK_FORMAT_R16G16B16A16_UINT:
1968 		case VK_FORMAT_R32_UINT:
1969 		case VK_FORMAT_R32G32_UINT:
1970 		case VK_FORMAT_R32G32B32_UINT:
1971 		case VK_FORMAT_R32G32B32A32_UINT:
1972 		case VK_FORMAT_R64_UINT:
1973 		case VK_FORMAT_R64G64_UINT:
1974 		case VK_FORMAT_R64G64B64_UINT:
1975 		case VK_FORMAT_R64G64B64A64_UINT:
1976 			return true;
1977 	}
1978 }
1979 
isFormatFloat(VkFormat format)1980 bool vkt::subgroups::isFormatFloat (VkFormat format)
1981 {
1982 	switch (format)
1983 	{
1984 		default:
1985 			return false;
1986 		case VK_FORMAT_R16_SFLOAT:
1987 		case VK_FORMAT_R16G16_SFLOAT:
1988 		case VK_FORMAT_R16G16B16_SFLOAT:
1989 		case VK_FORMAT_R16G16B16A16_SFLOAT:
1990 		case VK_FORMAT_R32_SFLOAT:
1991 		case VK_FORMAT_R32G32_SFLOAT:
1992 		case VK_FORMAT_R32G32B32_SFLOAT:
1993 		case VK_FORMAT_R32G32B32A32_SFLOAT:
1994 		case VK_FORMAT_R64_SFLOAT:
1995 		case VK_FORMAT_R64G64_SFLOAT:
1996 		case VK_FORMAT_R64G64B64_SFLOAT:
1997 		case VK_FORMAT_R64G64B64A64_SFLOAT:
1998 			return true;
1999 	}
2000 }
2001 
isFormatBool(VkFormat format)2002 bool vkt::subgroups::isFormatBool (VkFormat format)
2003 {
2004 	switch (format)
2005 	{
2006 		default:
2007 			return false;
2008 		case VK_FORMAT_R8_USCALED:
2009 		case VK_FORMAT_R8G8_USCALED:
2010 		case VK_FORMAT_R8G8B8_USCALED:
2011 		case VK_FORMAT_R8G8B8A8_USCALED:
2012 			return true;
2013 	}
2014 }
2015 
isFormat8bitTy(VkFormat format)2016 bool vkt::subgroups::isFormat8bitTy (VkFormat format)
2017 {
2018 	switch (format)
2019 	{
2020 	default:
2021 		return false;
2022 	case VK_FORMAT_R8_SINT:
2023 	case VK_FORMAT_R8G8_SINT:
2024 	case VK_FORMAT_R8G8B8_SINT:
2025 	case VK_FORMAT_R8G8B8A8_SINT:
2026 	case VK_FORMAT_R8_UINT:
2027 	case VK_FORMAT_R8G8_UINT:
2028 	case VK_FORMAT_R8G8B8_UINT:
2029 	case VK_FORMAT_R8G8B8A8_UINT:
2030 		return true;
2031 	}
2032 }
2033 
isFormat16BitTy(VkFormat format)2034 bool vkt::subgroups::isFormat16BitTy (VkFormat format)
2035 {
2036 	switch (format)
2037 	{
2038 	default:
2039 		return false;
2040 	case VK_FORMAT_R16_SFLOAT:
2041 	case VK_FORMAT_R16G16_SFLOAT:
2042 	case VK_FORMAT_R16G16B16_SFLOAT:
2043 	case VK_FORMAT_R16G16B16A16_SFLOAT:
2044 	case VK_FORMAT_R16_SINT:
2045 	case VK_FORMAT_R16G16_SINT:
2046 	case VK_FORMAT_R16G16B16_SINT:
2047 	case VK_FORMAT_R16G16B16A16_SINT:
2048 	case VK_FORMAT_R16_UINT:
2049 	case VK_FORMAT_R16G16_UINT:
2050 	case VK_FORMAT_R16G16B16_UINT:
2051 	case VK_FORMAT_R16G16B16A16_UINT:
2052 		return true;
2053 	}
2054 }
2055 
setVertexShaderFrameBuffer(SourceCollections & programCollection)2056 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
2057 {
2058 	/*
2059 		"layout(location = 0) in highp vec4 in_position;\n"
2060 		"void main (void)\n"
2061 		"{\n"
2062 		"  gl_Position = in_position;\n"
2063 		"  gl_PointSize = 1.0f;\n"
2064 		"}\n";
2065 	*/
2066 	programCollection.spirvAsmSources.add("vert") <<
2067 		"; SPIR-V\n"
2068 		"; Version: 1.3\n"
2069 		"; Generator: Khronos Glslang Reference Front End; 7\n"
2070 		"; Bound: 25\n"
2071 		"; Schema: 0\n"
2072 		"OpCapability Shader\n"
2073 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2074 		"OpMemoryModel Logical GLSL450\n"
2075 		"OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2076 		"OpMemberDecorate %11 0 BuiltIn Position\n"
2077 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
2078 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2079 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2080 		"OpDecorate %11 Block\n"
2081 		"OpDecorate %17 Location 0\n"
2082 		"%2 = OpTypeVoid\n"
2083 		"%3 = OpTypeFunction %2\n"
2084 		"%6 = OpTypeFloat 32\n"
2085 		"%7 = OpTypeVector %6 4\n"
2086 		"%8 = OpTypeInt 32 0\n"
2087 		"%9 = OpConstant %8 1\n"
2088 		"%10 = OpTypeArray %6 %9\n"
2089 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
2090 		"%12 = OpTypePointer Output %11\n"
2091 		"%13 = OpVariable %12 Output\n"
2092 		"%14 = OpTypeInt 32 1\n"
2093 		"%15 = OpConstant %14 0\n"
2094 		"%16 = OpTypePointer Input %7\n"
2095 		"%17 = OpVariable %16 Input\n"
2096 		"%19 = OpTypePointer Output %7\n"
2097 		"%21 = OpConstant %14 1\n"
2098 		"%22 = OpConstant %6 1\n"
2099 		"%23 = OpTypePointer Output %6\n"
2100 		"%4 = OpFunction %2 None %3\n"
2101 		"%5 = OpLabel\n"
2102 		"%18 = OpLoad %7 %17\n"
2103 		"%20 = OpAccessChain %19 %13 %15\n"
2104 		"OpStore %20 %18\n"
2105 		"%24 = OpAccessChain %23 %13 %21\n"
2106 		"OpStore %24 %22\n"
2107 		"OpReturn\n"
2108 		"OpFunctionEnd\n";
2109 }
2110 
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2111 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
2112 {
2113 	/*
2114 		"layout(location = 0) in float in_color;\n"
2115 		"layout(location = 0) out uint out_color;\n"
2116 		"void main()\n"
2117 		{\n"
2118 		"	out_color = uint(in_color);\n"
2119 		"}\n";
2120 	*/
2121 	programCollection.spirvAsmSources.add("fragment") <<
2122 		"; SPIR-V\n"
2123 		"; Version: 1.3\n"
2124 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2125 		"; Bound: 14\n"
2126 		"; Schema: 0\n"
2127 		"OpCapability Shader\n"
2128 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2129 		"OpMemoryModel Logical GLSL450\n"
2130 		"OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2131 		"OpExecutionMode %4 OriginUpperLeft\n"
2132 		"OpDecorate %8 Location 0\n"
2133 		"OpDecorate %11 Location 0\n"
2134 		"%2 = OpTypeVoid\n"
2135 		"%3 = OpTypeFunction %2\n"
2136 		"%6 = OpTypeInt 32 0\n"
2137 		"%7 = OpTypePointer Output %6\n"
2138 		"%8 = OpVariable %7 Output\n"
2139 		"%9 = OpTypeFloat 32\n"
2140 		"%10 = OpTypePointer Input %9\n"
2141 		"%11 = OpVariable %10 Input\n"
2142 		"%4 = OpFunction %2 None %3\n"
2143 		"%5 = OpLabel\n"
2144 		"%12 = OpLoad %9 %11\n"
2145 		"%13 = OpConvertFToU %6 %12\n"
2146 		"OpStore %8 %13\n"
2147 		"OpReturn\n"
2148 		"OpFunctionEnd\n";
2149 }
2150 
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2151 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
2152 {
2153 	/*
2154 		"#extension GL_KHR_shader_subgroup_basic: enable\n"
2155 		"#extension GL_EXT_tessellation_shader : require\n"
2156 		"layout(vertices = 2) out;\n"
2157 		"void main (void)\n"
2158 		"{\n"
2159 		"  if (gl_InvocationID == 0)\n"
2160 		"  {\n"
2161 		"    gl_TessLevelOuter[0] = 1.0f;\n"
2162 		"    gl_TessLevelOuter[1] = 1.0f;\n"
2163 		"  }\n"
2164 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2165 		"}\n";
2166 	*/
2167 	programCollection.spirvAsmSources.add("tesc") <<
2168 		"; SPIR-V\n"
2169 		"; Version: 1.3\n"
2170 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2171 		"; Bound: 46\n"
2172 		"; Schema: 0\n"
2173 		"OpCapability Tessellation\n"
2174 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2175 		"OpMemoryModel Logical GLSL450\n"
2176 		"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2177 		"OpExecutionMode %4 OutputVertices 2\n"
2178 		"OpDecorate %8 BuiltIn InvocationId\n"
2179 		"OpDecorate %20 Patch\n"
2180 		"OpDecorate %20 BuiltIn TessLevelOuter\n"
2181 		"OpMemberDecorate %29 0 BuiltIn Position\n"
2182 		"OpMemberDecorate %29 1 BuiltIn PointSize\n"
2183 		"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2184 		"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2185 		"OpDecorate %29 Block\n"
2186 		"OpMemberDecorate %35 0 BuiltIn Position\n"
2187 		"OpMemberDecorate %35 1 BuiltIn PointSize\n"
2188 		"OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2189 		"OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2190 		"OpDecorate %35 Block\n"
2191 		"%2 = OpTypeVoid\n"
2192 		"%3 = OpTypeFunction %2\n"
2193 		"%6 = OpTypeInt 32 1\n"
2194 		"%7 = OpTypePointer Input %6\n"
2195 		"%8 = OpVariable %7 Input\n"
2196 		"%10 = OpConstant %6 0\n"
2197 		"%11 = OpTypeBool\n"
2198 		"%15 = OpTypeFloat 32\n"
2199 		"%16 = OpTypeInt 32 0\n"
2200 		"%17 = OpConstant %16 4\n"
2201 		"%18 = OpTypeArray %15 %17\n"
2202 		"%19 = OpTypePointer Output %18\n"
2203 		"%20 = OpVariable %19 Output\n"
2204 		"%21 = OpConstant %15 1\n"
2205 		"%22 = OpTypePointer Output %15\n"
2206 		"%24 = OpConstant %6 1\n"
2207 		"%26 = OpTypeVector %15 4\n"
2208 		"%27 = OpConstant %16 1\n"
2209 		"%28 = OpTypeArray %15 %27\n"
2210 		"%29 = OpTypeStruct %26 %15 %28 %28\n"
2211 		"%30 = OpConstant %16 2\n"
2212 		"%31 = OpTypeArray %29 %30\n"
2213 		"%32 = OpTypePointer Output %31\n"
2214 		"%33 = OpVariable %32 Output\n"
2215 		"%35 = OpTypeStruct %26 %15 %28 %28\n"
2216 		"%36 = OpConstant %16 32\n"
2217 		"%37 = OpTypeArray %35 %36\n"
2218 		"%38 = OpTypePointer Input %37\n"
2219 		"%39 = OpVariable %38 Input\n"
2220 		"%41 = OpTypePointer Input %26\n"
2221 		"%44 = OpTypePointer Output %26\n"
2222 		"%4 = OpFunction %2 None %3\n"
2223 		"%5 = OpLabel\n"
2224 		"%9 = OpLoad %6 %8\n"
2225 		"%12 = OpIEqual %11 %9 %10\n"
2226 		"OpSelectionMerge %14 None\n"
2227 		"OpBranchConditional %12 %13 %14\n"
2228 		"%13 = OpLabel\n"
2229 		"%23 = OpAccessChain %22 %20 %10\n"
2230 		"OpStore %23 %21\n"
2231 		"%25 = OpAccessChain %22 %20 %24\n"
2232 		"OpStore %25 %21\n"
2233 		"OpBranch %14\n"
2234 		"%14 = OpLabel\n"
2235 		"%34 = OpLoad %6 %8\n"
2236 		"%40 = OpLoad %6 %8\n"
2237 		"%42 = OpAccessChain %41 %39 %40 %10\n"
2238 		"%43 = OpLoad %26 %42\n"
2239 		"%45 = OpAccessChain %44 %33 %34 %10\n"
2240 		"OpStore %45 %43\n"
2241 		"OpReturn\n"
2242 		"OpFunctionEnd\n";
2243 }
2244 
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2245 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
2246 {
2247 	/*
2248 		"#extension GL_KHR_shader_subgroup_ballot: enable\n"
2249 		"#extension GL_EXT_tessellation_shader : require\n"
2250 		"layout(isolines, equal_spacing, ccw ) in;\n"
2251 		"layout(location = 0) in float in_color[];\n"
2252 		"layout(location = 0) out float out_color;\n"
2253 		"\n"
2254 		"void main (void)\n"
2255 		"{\n"
2256 		"  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2257 		"  out_color = in_color[0];\n"
2258 		"}\n";
2259 	*/
2260 	programCollection.spirvAsmSources.add("tese") <<
2261 		"; SPIR-V\n"
2262 		"; Version: 1.3\n"
2263 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2264 		"; Bound: 45\n"
2265 		"; Schema: 0\n"
2266 		"OpCapability Tessellation\n"
2267 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2268 		"OpMemoryModel Logical GLSL450\n"
2269 		"OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2270 		"OpExecutionMode %4 Isolines\n"
2271 		"OpExecutionMode %4 SpacingEqual\n"
2272 		"OpExecutionMode %4 VertexOrderCcw\n"
2273 		"OpMemberDecorate %11 0 BuiltIn Position\n"
2274 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
2275 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2276 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2277 		"OpDecorate %11 Block\n"
2278 		"OpMemberDecorate %16 0 BuiltIn Position\n"
2279 		"OpMemberDecorate %16 1 BuiltIn PointSize\n"
2280 		"OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2281 		"OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2282 		"OpDecorate %16 Block\n"
2283 		"OpDecorate %29 BuiltIn TessCoord\n"
2284 		"OpDecorate %39 Location 0\n"
2285 		"OpDecorate %42 Location 0\n"
2286 		"%2 = OpTypeVoid\n"
2287 		"%3 = OpTypeFunction %2\n"
2288 		"%6 = OpTypeFloat 32\n"
2289 		"%7 = OpTypeVector %6 4\n"
2290 		"%8 = OpTypeInt 32 0\n"
2291 		"%9 = OpConstant %8 1\n"
2292 		"%10 = OpTypeArray %6 %9\n"
2293 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
2294 		"%12 = OpTypePointer Output %11\n"
2295 		"%13 = OpVariable %12 Output\n"
2296 		"%14 = OpTypeInt 32 1\n"
2297 		"%15 = OpConstant %14 0\n"
2298 		"%16 = OpTypeStruct %7 %6 %10 %10\n"
2299 		"%17 = OpConstant %8 32\n"
2300 		"%18 = OpTypeArray %16 %17\n"
2301 		"%19 = OpTypePointer Input %18\n"
2302 		"%20 = OpVariable %19 Input\n"
2303 		"%21 = OpTypePointer Input %7\n"
2304 		"%24 = OpConstant %14 1\n"
2305 		"%27 = OpTypeVector %6 3\n"
2306 		"%28 = OpTypePointer Input %27\n"
2307 		"%29 = OpVariable %28 Input\n"
2308 		"%30 = OpConstant %8 0\n"
2309 		"%31 = OpTypePointer Input %6\n"
2310 		"%36 = OpTypePointer Output %7\n"
2311 		"%38 = OpTypePointer Output %6\n"
2312 		"%39 = OpVariable %38 Output\n"
2313 		"%40 = OpTypeArray %6 %17\n"
2314 		"%41 = OpTypePointer Input %40\n"
2315 		"%42 = OpVariable %41 Input\n"
2316 		"%4 = OpFunction %2 None %3\n"
2317 		"%5 = OpLabel\n"
2318 		"%22 = OpAccessChain %21 %20 %15 %15\n"
2319 		"%23 = OpLoad %7 %22\n"
2320 		"%25 = OpAccessChain %21 %20 %24 %15\n"
2321 		"%26 = OpLoad %7 %25\n"
2322 		"%32 = OpAccessChain %31 %29 %30\n"
2323 		"%33 = OpLoad %6 %32\n"
2324 		"%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2325 		"%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2326 		"%37 = OpAccessChain %36 %13 %15\n"
2327 		"OpStore %37 %35\n"
2328 		"%43 = OpAccessChain %31 %42 %15\n"
2329 		"%44 = OpLoad %6 %43\n"
2330 		"OpStore %39 %44\n"
2331 		"OpReturn\n"
2332 		"OpFunctionEnd\n";
2333 }
2334 
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2335 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options,  vk::GlslSourceCollection& collection)
2336 {
2337 	tcu::StringTemplate geometryTemplate(glslTemplate);
2338 
2339 	map<string, string>		linesParams;
2340 	linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2341 
2342 	map<string, string>		pointsParams;
2343 	pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2344 
2345 	collection.add("geometry_lines")	<< glu::GeometrySource(geometryTemplate.specialize(linesParams))	<< options;
2346 	collection.add("geometry_points")	<< glu::GeometrySource(geometryTemplate.specialize(pointsParams))	<< options;
2347 }
2348 
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2349 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
2350 {
2351 	tcu::StringTemplate geometryTemplate(spirvTemplate);
2352 
2353 	map<string, string>		linesParams;
2354 	linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2355 
2356 	map<string, string>		pointsParams;
2357 	pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2358 
2359 	collection.add("geometry_lines")	<< geometryTemplate.specialize(linesParams)		<< options;
2360 	collection.add("geometry_points")	<< geometryTemplate.specialize(pointsParams)	<< options;
2361 }
2362 
initializeMemory(Context & context,const Allocation & alloc,const subgroups::SSBOData & data)2363 void initializeMemory (Context& context, const Allocation& alloc, const subgroups::SSBOData& data)
2364 {
2365 	const vk::VkFormat format = data.format;
2366 	const vk::VkDeviceSize size = data.numElements *
2367 		(data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2368 	if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2369 	{
2370 		de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2371 
2372 		switch (format)
2373 		{
2374 			default:
2375 				DE_FATAL("Illegal buffer format");
2376 				break;
2377 			case VK_FORMAT_R8_SINT:
2378 			case VK_FORMAT_R8G8_SINT:
2379 			case VK_FORMAT_R8G8B8_SINT:
2380 			case VK_FORMAT_R8G8B8A8_SINT:
2381 			case VK_FORMAT_R8_UINT:
2382 			case VK_FORMAT_R8G8_UINT:
2383 			case VK_FORMAT_R8G8B8_UINT:
2384 			case VK_FORMAT_R8G8B8A8_UINT:
2385 			{
2386 				deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2387 
2388 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2389 				{
2390 					ptr[k] = rnd.getUint8();
2391 				}
2392 			}
2393 			break;
2394 			case VK_FORMAT_R16_SINT:
2395 			case VK_FORMAT_R16G16_SINT:
2396 			case VK_FORMAT_R16G16B16_SINT:
2397 			case VK_FORMAT_R16G16B16A16_SINT:
2398 			case VK_FORMAT_R16_UINT:
2399 			case VK_FORMAT_R16G16_UINT:
2400 			case VK_FORMAT_R16G16B16_UINT:
2401 			case VK_FORMAT_R16G16B16A16_UINT:
2402 			{
2403 				deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2404 
2405 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2406 				{
2407 					ptr[k] = rnd.getUint16();
2408 				}
2409 			}
2410 			break;
2411 			case VK_FORMAT_R8_USCALED:
2412 			case VK_FORMAT_R8G8_USCALED:
2413 			case VK_FORMAT_R8G8B8_USCALED:
2414 			case VK_FORMAT_R8G8B8A8_USCALED:
2415 			{
2416 				deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2417 
2418 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2419 				{
2420 					deUint32 r = rnd.getUint32();
2421 					ptr[k] = (r & 1) ? r : 0;
2422 				}
2423 			}
2424 			break;
2425 			case VK_FORMAT_R32_SINT:
2426 			case VK_FORMAT_R32G32_SINT:
2427 			case VK_FORMAT_R32G32B32_SINT:
2428 			case VK_FORMAT_R32G32B32A32_SINT:
2429 			case VK_FORMAT_R32_UINT:
2430 			case VK_FORMAT_R32G32_UINT:
2431 			case VK_FORMAT_R32G32B32_UINT:
2432 			case VK_FORMAT_R32G32B32A32_UINT:
2433 			{
2434 				deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2435 
2436 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2437 				{
2438 					ptr[k] = rnd.getUint32();
2439 				}
2440 			}
2441 			break;
2442 			case VK_FORMAT_R64_SINT:
2443 			case VK_FORMAT_R64G64_SINT:
2444 			case VK_FORMAT_R64G64B64_SINT:
2445 			case VK_FORMAT_R64G64B64A64_SINT:
2446 			case VK_FORMAT_R64_UINT:
2447 			case VK_FORMAT_R64G64_UINT:
2448 			case VK_FORMAT_R64G64B64_UINT:
2449 			case VK_FORMAT_R64G64B64A64_UINT:
2450 			{
2451 				deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2452 
2453 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2454 				{
2455 					ptr[k] = rnd.getUint64();
2456 				}
2457 			}
2458 			break;
2459 			case VK_FORMAT_R16_SFLOAT:
2460 			case VK_FORMAT_R16G16_SFLOAT:
2461 			case VK_FORMAT_R16G16B16_SFLOAT:
2462 			case VK_FORMAT_R16G16B16A16_SFLOAT:
2463 			{
2464 				deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2465 
2466 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2467 				{
2468 					ptr[k] = deFloat32To16(rnd.getFloat());
2469 				}
2470 			}
2471 			break;
2472 			case VK_FORMAT_R32_SFLOAT:
2473 			case VK_FORMAT_R32G32_SFLOAT:
2474 			case VK_FORMAT_R32G32B32_SFLOAT:
2475 			case VK_FORMAT_R32G32B32A32_SFLOAT:
2476 			{
2477 				float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2478 
2479 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2480 				{
2481 					ptr[k] = rnd.getFloat();
2482 				}
2483 			}
2484 			break;
2485 			case VK_FORMAT_R64_SFLOAT:
2486 			case VK_FORMAT_R64G64_SFLOAT:
2487 			case VK_FORMAT_R64G64B64_SFLOAT:
2488 			case VK_FORMAT_R64G64B64A64_SFLOAT:
2489 			{
2490 				double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2491 
2492 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2493 				{
2494 					ptr[k] = rnd.getDouble();
2495 				}
2496 			}
2497 			break;
2498 		}
2499 	}
2500 	else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2501 	{
2502 		deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2503 
2504 		for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2505 		{
2506 			ptr[k] = 0;
2507 		}
2508 	}
2509 
2510 	if (subgroups::SSBOData::InitializeNone != data.initializeType)
2511 	{
2512 		flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2513 	}
2514 }
2515 
getResultBinding(const VkShaderStageFlagBits shaderStage)2516 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2517 {
2518 	switch(shaderStage)
2519 	{
2520 		case VK_SHADER_STAGE_VERTEX_BIT:
2521 			return 0u;
2522 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2523 			return 1u;
2524 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2525 			return 2u;
2526 		case VK_SHADER_STAGE_GEOMETRY_BIT:
2527 			return 3u;
2528 		default:
2529 			DE_ASSERT(0);
2530 			return -1;
2531 	}
2532 	DE_ASSERT(0);
2533 	return -1;
2534 }
2535 
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage)2536 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (Context&					context,
2537 																		   VkFormat					format,
2538 																		   const SSBOData*			extraData,
2539 																		   deUint32					extraDataCount,
2540 																		   const void*				internalData,
2541 																		   subgroups::CheckResult	checkResult,
2542 																		   const VkShaderStageFlags	shaderStage)
2543 {
2544 	return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2545 }
2546 
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage,const deUint32 tessShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2547 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize (Context&					context,
2548 																							   VkFormat					format,
2549 																							   const SSBOData*			extraData,
2550 																							   deUint32					extraDataCount,
2551 																							   const void*				internalData,
2552 																							   subgroups::CheckResult	checkResult,
2553 																							   const VkShaderStageFlags	shaderStage,
2554 																							   const deUint32			tessShaderStageCreateFlags,
2555 																							   const deUint32			requiredSubgroupSize)
2556 {
2557 	const DeviceInterface&					vk						= context.getDeviceInterface();
2558 	const VkDevice							device					= context.getDevice();
2559 	const deUint32							maxWidth				= getMaxWidth();
2560 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
2561 	DescriptorSetLayoutBuilder				layoutBuilder;
2562 	DescriptorPoolBuilder					poolBuilder;
2563 	DescriptorSetUpdateBuilder				updateBuilder;
2564 	Move <VkDescriptorPool>					descriptorPool;
2565 	Move <VkDescriptorSet>					descriptorSet;
2566 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2567 	const Unique<VkShaderModule>			teCtrlShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("tesc"), 0u));
2568 	const Unique<VkShaderModule>			teEvalShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("tese"), 0u));
2569 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2570 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
2571 	const VkVertexInputBindingDescription	vertexInputBinding		=
2572 	{
2573 		0u,											//  deUint32			binding;
2574 		static_cast<deUint32>(sizeof(tcu::Vec4)),	//  deUint32			stride;
2575 		VK_VERTEX_INPUT_RATE_VERTEX					//  VkVertexInputRate	inputRate;
2576 	};
2577 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
2578 	{
2579 		0u,									//  deUint32	location;
2580 		0u,									//  deUint32	binding;
2581 		VK_FORMAT_R32G32B32A32_SFLOAT,		//  VkFormat	format;
2582 		0u									//  deUint32	offset;
2583 	};
2584 
2585 	for (deUint32 i = 0u; i < extraDataCount; i++)
2586 	{
2587 		if (extraData[i].isImage)
2588 		{
2589 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2590 		}
2591 		else
2592 		{
2593 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2594 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2595 		}
2596 		const Allocation& alloc = inputBuffers[i]->getAllocation();
2597 		initializeMemory(context, alloc, extraData[i]);
2598 	}
2599 
2600 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2601 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2602 
2603 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
2604 
2605 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
2606 
2607 	const deUint32 requiredSubgroupSizes[5] = {0u,
2608 											   ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2609 											   ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u),
2610 											   0u,
2611 											   0u};
2612 
2613 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
2614 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2615 																						  VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2616 																						  *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2617 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2618 																						  0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2619 																						  ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u),
2620 																						  0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2621 
2622 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2623 		poolBuilder.addType(inputBuffers[ndx]->getType());
2624 
2625 	if (extraDataCount > 0)
2626 	{
2627 		descriptorPool = poolBuilder.build(vk, device,
2628 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2629 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2630 	}
2631 
2632 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2633 	{
2634 		if (inputBuffers[buffersNdx]->isImage())
2635 		{
2636 			VkDescriptorImageInfo info =
2637 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2638 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2639 
2640 			updateBuilder.writeSingle(*descriptorSet,
2641 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2642 										inputBuffers[buffersNdx]->getType(), &info);
2643 		}
2644 		else
2645 		{
2646 			VkDescriptorBufferInfo info =
2647 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2648 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2649 
2650 			updateBuilder.writeSingle(*descriptorSet,
2651 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2652 										inputBuffers[buffersNdx]->getType(), &info);
2653 		}
2654 	}
2655 
2656 	updateBuilder.update(vk, device);
2657 
2658 	const VkQueue							queue					= context.getUniversalQueue();
2659 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
2660 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
2661 	const deUint32							subgroupSize			= getSubgroupSize(context);
2662 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
2663 	const vk::VkDeviceSize					vertexBufferSize		= 2ull * maxWidth * sizeof(tcu::Vec4);
2664 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2665 	unsigned								totalIterations			= 0u;
2666 	unsigned								failedIterations		= 0u;
2667 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2668 
2669 	{
2670 		const Allocation&		alloc				= vertexBuffer.getAllocation();
2671 		std::vector<tcu::Vec4>	data				(2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2672 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
2673 		float					leftHandPosition	= -1.0f;
2674 
2675 		for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2676 		{
2677 			data[ndx][0] = leftHandPosition;
2678 			leftHandPosition += pixelSize;
2679 			data[ndx+1][0] = leftHandPosition;
2680 		}
2681 
2682 		deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2683 		flushAlloc(vk, device, alloc);
2684 	}
2685 
2686 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2687 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
2688 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
2689 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2690 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2691 	const VkDeviceSize			vertexBufferOffset	= 0u;
2692 
2693 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2694 	{
2695 		totalIterations++;
2696 
2697 		beginCommandBuffer(vk, *cmdBuffer);
2698 		{
2699 
2700 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2701 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2702 
2703 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2704 
2705 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2706 
2707 			if (extraDataCount > 0)
2708 			{
2709 				vk.cmdBindDescriptorSets(*cmdBuffer,
2710 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2711 					&descriptorSet.get(), 0u, DE_NULL);
2712 			}
2713 
2714 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2715 			vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2716 
2717 			endRenderPass(vk, *cmdBuffer);
2718 
2719 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2720 			endCommandBuffer(vk, *cmdBuffer);
2721 
2722 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2723 		}
2724 
2725 		{
2726 			const Allocation& allocResult = imageBufferResult.getAllocation();
2727 			invalidateAlloc(vk, device, allocResult);
2728 
2729 			std::vector<const void*> datas;
2730 			datas.push_back(allocResult.getHostPtr());
2731 			if (!checkResult(internalData, datas, width/2u, subgroupSize))
2732 				failedIterations++;
2733 		}
2734 	}
2735 
2736 	if (0 < failedIterations)
2737 	{
2738 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2739 
2740 		context.getTestContext().getLog()
2741 				<< TestLog::Message << valuesPassed << " / "
2742 				<< totalIterations << " values passed" << TestLog::EndMessage;
2743 		return tcu::TestStatus::fail("Failed!");
2744 	}
2745 
2746 	return tcu::TestStatus::pass("OK");
2747 }
2748 
check(std::vector<const void * > datas,deUint32 width,deUint32 ref)2749 bool vkt::subgroups::check (std::vector<const void*> datas, deUint32 width, deUint32 ref)
2750 {
2751 	const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2752 
2753 	for (deUint32 n = 0; n < width; ++n)
2754 	{
2755 		if (data[n] != ref)
2756 		{
2757 			return false;
2758 		}
2759 	}
2760 
2761 	return true;
2762 }
2763 
checkCompute(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 ref)2764 bool vkt::subgroups::checkCompute (std::vector<const void*>		datas,
2765 								   const deUint32				numWorkgroups[3],
2766 								   const deUint32				localSize[3],
2767 								   deUint32						ref)
2768 {
2769 	const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2770 	const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2771 	const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2772 
2773 	return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2774 }
2775 
makeGeometryFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)2776 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest (Context&				context,
2777 															 VkFormat				format,
2778 															 const SSBOData*		extraData,
2779 															 deUint32				extraDataCount,
2780 															 const void*			internalData,
2781 															 subgroups::CheckResult	checkResult)
2782 {
2783 	return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
2784 }
2785 
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 geometryShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2786 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize (Context&					context,
2787 																				 VkFormat					format,
2788 																				 const SSBOData*			extraData,
2789 																				 deUint32					extraDataCount,
2790 																				 const void*				internalData,
2791 																				 subgroups::CheckResult		checkResult,
2792 																				 const deUint32				geometryShaderStageCreateFlags,
2793 																				 const deUint32				requiredSubgroupSize)
2794 {
2795 	const DeviceInterface&					vk						= context.getDeviceInterface();
2796 	const VkDevice							device					= context.getDevice();
2797 	const deUint32							maxWidth				= getMaxWidth();
2798 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
2799 	DescriptorSetLayoutBuilder				layoutBuilder;
2800 	DescriptorPoolBuilder					poolBuilder;
2801 	DescriptorSetUpdateBuilder				updateBuilder;
2802 	Move <VkDescriptorPool>					descriptorPool;
2803 	Move <VkDescriptorSet>					descriptorSet;
2804 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2805 	const Unique<VkShaderModule>			geometryShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2806 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2807 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
2808 	const VkVertexInputBindingDescription	vertexInputBinding		=
2809 	{
2810 		0u,											//  deUint32			binding;
2811 		static_cast<deUint32>(sizeof(tcu::Vec4)),	//  deUint32			stride;
2812 		VK_VERTEX_INPUT_RATE_VERTEX					//  VkVertexInputRate	inputRate;
2813 	};
2814 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
2815 	{
2816 		0u,									//  deUint32	location;
2817 		0u,									//  deUint32	binding;
2818 		VK_FORMAT_R32G32B32A32_SFLOAT,		//  VkFormat	format;
2819 		0u									//  deUint32	offset;
2820 	};
2821 
2822 	for (deUint32 i = 0u; i < extraDataCount; i++)
2823 	{
2824 		if (extraData[i].isImage)
2825 		{
2826 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2827 		}
2828 		else
2829 		{
2830 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2831 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2832 		}
2833 		const Allocation& alloc = inputBuffers[i]->getAllocation();
2834 		initializeMemory(context, alloc, extraData[i]);
2835 	}
2836 
2837 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2838 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2839 
2840 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
2841 
2842 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
2843 
2844 	const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2845 
2846 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
2847 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2848 																						  *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2849 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2850 																						  0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2851 																						  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2852 
2853 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2854 		poolBuilder.addType(inputBuffers[ndx]->getType());
2855 
2856 	if (extraDataCount > 0)
2857 	{
2858 		descriptorPool = poolBuilder.build(vk, device,
2859 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2860 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2861 	}
2862 
2863 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2864 	{
2865 		if (inputBuffers[buffersNdx]->isImage())
2866 		{
2867 			VkDescriptorImageInfo info =
2868 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2869 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2870 
2871 			updateBuilder.writeSingle(*descriptorSet,
2872 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2873 										inputBuffers[buffersNdx]->getType(), &info);
2874 		}
2875 		else
2876 		{
2877 			VkDescriptorBufferInfo info =
2878 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2879 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2880 
2881 			updateBuilder.writeSingle(*descriptorSet,
2882 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2883 										inputBuffers[buffersNdx]->getType(), &info);
2884 		}
2885 	}
2886 
2887 	updateBuilder.update(vk, device);
2888 
2889 	const VkQueue							queue					= context.getUniversalQueue();
2890 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
2891 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
2892 	const deUint32							subgroupSize			= getSubgroupSize(context);
2893 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
2894 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
2895 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2896 	unsigned								totalIterations			= 0u;
2897 	unsigned								failedIterations		= 0u;
2898 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2899 
2900 	{
2901 		const Allocation&		alloc				= vertexBuffer.getAllocation();
2902 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2903 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
2904 		float					leftHandPosition	= -1.0f;
2905 
2906 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2907 		{
2908 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2909 			leftHandPosition += pixelSize;
2910 		}
2911 
2912 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2913 		flushAlloc(vk, device, alloc);
2914 	}
2915 
2916 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2917 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
2918 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
2919 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2920 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2921 	const VkDeviceSize			vertexBufferOffset	= 0u;
2922 
2923 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2924 	{
2925 		totalIterations++;
2926 
2927 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2928 		{
2929 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2930 			initializeMemory(context, alloc, extraData[ndx]);
2931 		}
2932 
2933 		beginCommandBuffer(vk, *cmdBuffer);
2934 		{
2935 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2936 
2937 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2938 
2939 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2940 
2941 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2942 
2943 			if (extraDataCount > 0)
2944 			{
2945 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2946 					&descriptorSet.get(), 0u, DE_NULL);
2947 			}
2948 
2949 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2950 
2951 			vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2952 
2953 			endRenderPass(vk, *cmdBuffer);
2954 
2955 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2956 
2957 			endCommandBuffer(vk, *cmdBuffer);
2958 
2959 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2960 		}
2961 
2962 		{
2963 			const Allocation& allocResult = imageBufferResult.getAllocation();
2964 			invalidateAlloc(vk, device, allocResult);
2965 
2966 			std::vector<const void*> datas;
2967 			datas.push_back(allocResult.getHostPtr());
2968 			if (!checkResult(internalData, datas, width, subgroupSize))
2969 				failedIterations++;
2970 		}
2971 	}
2972 
2973 	if (0 < failedIterations)
2974 	{
2975 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2976 
2977 		context.getTestContext().getLog()
2978 				<< TestLog::Message << valuesPassed << " / "
2979 				<< totalIterations << " values passed" << TestLog::EndMessage;
2980 
2981 		return tcu::TestStatus::fail("Failed!");
2982 	}
2983 
2984 	return tcu::TestStatus::pass("OK");
2985 }
2986 
getPossibleGraphicsSubgroupStages(Context & context,const vk::VkShaderStageFlags testedStages)2987 vk::VkShaderStageFlags vkt::subgroups::getPossibleGraphicsSubgroupStages (Context& context, const vk::VkShaderStageFlags testedStages)
2988 {
2989 	const VkPhysicalDeviceSubgroupProperties&	subgroupProperties	= context.getSubgroupProperties();
2990 	VkShaderStageFlags							stages				= testedStages & subgroupProperties.supportedStages;
2991 
2992 	DE_ASSERT(isAllGraphicsStages(testedStages));
2993 
2994 	if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
2995 	{
2996 		if ((stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
2997 			TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
2998 		else
2999 			stages = VK_SHADER_STAGE_FRAGMENT_BIT;
3000 	}
3001 
3002 	if (static_cast<VkShaderStageFlags>(0u) == stages)
3003 		TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
3004 
3005 	return stages;
3006 }
3007 
allStages(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3008 tcu::TestStatus vkt::subgroups::allStages (Context&						context,
3009 										   vk::VkFormat					format,
3010 										   const SSBOData*				extraData,
3011 										   deUint32						extraDataCount,
3012 										   const void*					internalData,
3013 										   const VerificationFunctor&	checkResult,
3014 										   const vk::VkShaderStageFlags	shaderStage)
3015 {
3016 	return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage,
3017 														 0u, 0u, 0u, 0u, 0u, DE_NULL);
3018 }
3019 
allStagesRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStageTested,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])3020 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize (Context&						context,
3021 															   vk::VkFormat					format,
3022 															   const SSBOData*				extraDatas,
3023 															   deUint32						extraDatasCount,
3024 															   const void*					internalData,
3025 															   const VerificationFunctor&	checkResult,
3026 															   const vk::VkShaderStageFlags	shaderStageTested,
3027 															   const deUint32				vertexShaderStageCreateFlags,
3028 															   const deUint32				tessellationControlShaderStageCreateFlags,
3029 															   const deUint32				tessellationEvalShaderStageCreateFlags,
3030 															   const deUint32				geometryShaderStageCreateFlags,
3031 															   const deUint32				fragmentShaderStageCreateFlags,
3032 															   const deUint32				requiredSubgroupSize[5])
3033 {
3034 	const DeviceInterface&			vk					= context.getDeviceInterface();
3035 	const VkDevice					device				= context.getDevice();
3036 	const deUint32					maxWidth			= getMaxWidth();
3037 	vector<VkShaderStageFlagBits>	stagesVector;
3038 	VkShaderStageFlags				shaderStageRequired	= (VkShaderStageFlags)0ull;
3039 
3040 	Move<VkShaderModule>			vertexShaderModule;
3041 	Move<VkShaderModule>			teCtrlShaderModule;
3042 	Move<VkShaderModule>			teEvalShaderModule;
3043 	Move<VkShaderModule>			geometryShaderModule;
3044 	Move<VkShaderModule>			fragmentShaderModule;
3045 
3046 	if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3047 	{
3048 		stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3049 	}
3050 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3051 	{
3052 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3053 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3054 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3055 	}
3056 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3057 	{
3058 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3059 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3060 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3061 	}
3062 	if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3063 	{
3064 		stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3065 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3066 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3067 	}
3068 	if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3069 	{
3070 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3071 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3072 	}
3073 
3074 	const deUint32	stagesCount	= static_cast<deUint32>(stagesVector.size());
3075 	const string	vert		= (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT)					? "vert_noSubgroup"		: "vert";
3076 	const string	tesc		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)		? "tesc_noSubgroup"		: "tesc";
3077 	const string	tese		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)	? "tese_noSubgroup"		: "tese";
3078 
3079 	shaderStageRequired = shaderStageTested | shaderStageRequired;
3080 
3081 	vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3082 	if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3083 	{
3084 		teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3085 		teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3086 	}
3087 	if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3088 	{
3089 		if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3090 		{
3091 			// tessellation shaders output line primitives
3092 			geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3093 		}
3094 		else
3095 		{
3096 			// otherwise points are processed by geometry shader
3097 			geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3098 		}
3099 	}
3100 	if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3101 		fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3102 
3103 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
3104 
3105 	DescriptorSetLayoutBuilder layoutBuilder;
3106 	// The implicit result SSBO we use to store our outputs from the shader
3107 	for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3108 	{
3109 		const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3110 		const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3111 		inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3112 
3113 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
3114 	}
3115 
3116 	for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3117 	{
3118 		const deUint32 datasNdx = ndx - stagesCount;
3119 		if (extraDatas[datasNdx].isImage)
3120 		{
3121 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3122 		}
3123 		else
3124 		{
3125 			const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
3126 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3127 		}
3128 
3129 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3130 		initializeMemory(context, alloc, extraDatas[datasNdx]);
3131 
3132 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
3133 								extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
3134 	}
3135 
3136 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3137 
3138 	const Unique<VkPipelineLayout> pipelineLayout(
3139 		makePipelineLayout(vk, device, *descriptorSetLayout));
3140 
3141 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3142 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3143 														   shaderStageRequired,
3144 														   *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
3145 														   *renderPass,
3146 														   (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3147 														   DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT,
3148 														   vertexShaderStageCreateFlags, tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3149 														   geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3150 
3151 	Move <VkDescriptorPool>	descriptorPool;
3152 	Move <VkDescriptorSet>	descriptorSet;
3153 
3154 	if (inputBuffers.size() > 0)
3155 	{
3156 		DescriptorPoolBuilder poolBuilder;
3157 
3158 		for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
3159 		{
3160 			poolBuilder.addType(inputBuffers[ndx]->getType());
3161 		}
3162 
3163 		descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3164 
3165 		// Create descriptor set
3166 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3167 
3168 		DescriptorSetUpdateBuilder updateBuilder;
3169 
3170 		for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3171 		{
3172 			deUint32 binding;
3173 			if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
3174 			else binding = extraDatas[ndx -stagesCount].binding;
3175 
3176 			if (inputBuffers[ndx]->isImage())
3177 			{
3178 				VkDescriptorImageInfo info =
3179 					makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3180 											inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3181 
3182 				updateBuilder.writeSingle(	*descriptorSet,
3183 											DescriptorSetUpdateBuilder::Location::binding(binding),
3184 											inputBuffers[ndx]->getType(), &info);
3185 			}
3186 			else
3187 			{
3188 				VkDescriptorBufferInfo info =
3189 					makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
3190 							0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3191 
3192 				updateBuilder.writeSingle(	*descriptorSet,
3193 													DescriptorSetUpdateBuilder::Location::binding(binding),
3194 													inputBuffers[ndx]->getType(), &info);
3195 			}
3196 		}
3197 
3198 		updateBuilder.update(vk, device);
3199 	}
3200 
3201 	{
3202 		const VkQueue					queue					= context.getUniversalQueue();
3203 		const deUint32					queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3204 		const Unique<VkCommandPool>		cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
3205 		const deUint32					subgroupSize			= getSubgroupSize(context);
3206 		const Unique<VkCommandBuffer>	cmdBuffer				(makeCommandBuffer(context, *cmdPool));
3207 		unsigned						totalIterations			= 0u;
3208 		unsigned						failedIterations		= 0u;
3209 		Image							resultImage				(context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3210 		const Unique<VkFramebuffer>		framebuffer				(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3211 		const VkViewport				viewport				= makeViewport(maxWidth, 1u);
3212 		const VkRect2D					scissor					= makeRect2D(maxWidth, 1u);
3213 		const vk::VkDeviceSize			imageResultSize			= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3214 		Buffer							imageBufferResult		(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3215 		const VkImageSubresourceRange	subresourceRange		=
3216 		{
3217 			VK_IMAGE_ASPECT_COLOR_BIT,											//VkImageAspectFlags	aspectMask
3218 			0u,																	//deUint32				baseMipLevel
3219 			1u,																	//deUint32				levelCount
3220 			0u,																	//deUint32				baseArrayLayer
3221 			1u																	//deUint32				layerCount
3222 		};
3223 
3224 		const VkImageMemoryBarrier		colorAttachmentBarrier	= makeImageMemoryBarrier(
3225 			(VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3226 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
3227 			resultImage.getImage(), subresourceRange);
3228 
3229 		for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3230 		{
3231 			for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3232 			{
3233 				// re-init the data
3234 				const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3235 				initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3236 			}
3237 
3238 			totalIterations++;
3239 
3240 			beginCommandBuffer(vk, *cmdBuffer);
3241 
3242 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
3243 
3244 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3245 
3246 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3247 
3248 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3249 
3250 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3251 
3252 			if (stagesCount + extraDatasCount > 0)
3253 				vk.cmdBindDescriptorSets(*cmdBuffer,
3254 						VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3255 						&descriptorSet.get(), 0u, DE_NULL);
3256 
3257 			vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3258 
3259 			endRenderPass(vk, *cmdBuffer);
3260 
3261 			copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3262 
3263 			endCommandBuffer(vk, *cmdBuffer);
3264 
3265 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3266 
3267 			for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3268 			{
3269 				std::vector<const void*> datas;
3270 				if (!inputBuffers[ndx]->isImage())
3271 				{
3272 					const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
3273 					invalidateAlloc(vk, device, resultAlloc);
3274 					// we always have our result data first
3275 					datas.push_back(resultAlloc.getHostPtr());
3276 				}
3277 
3278 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3279 				{
3280 					const deUint32 datasNdx = index - stagesCount;
3281 					if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3282 					{
3283 						const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
3284 						invalidateAlloc(vk, device, resultAlloc);
3285 						// we always have our result data first
3286 						datas.push_back(resultAlloc.getHostPtr());
3287 					}
3288 				}
3289 
3290 				// Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3291 				const bool		multiCall	= (	stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT						||
3292 												stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT		||
3293 												stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT	||
3294 												stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT					);
3295 				const deUint32	usedWidth	= ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3296 
3297 				if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3298 					failedIterations++;
3299 			}
3300 			if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3301 			{
3302 				std::vector<const void*> datas;
3303 				const Allocation& resultAlloc = imageBufferResult.getAllocation();
3304 				invalidateAlloc(vk, device, resultAlloc);
3305 
3306 				// we always have our result data first
3307 				datas.push_back(resultAlloc.getHostPtr());
3308 
3309 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3310 				{
3311 					const deUint32 datasNdx = index - stagesCount;
3312 					if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3313 					{
3314 						const Allocation& alloc = inputBuffers[index]->getAllocation();
3315 						invalidateAlloc(vk, device, alloc);
3316 						// we always have our result data first
3317 						datas.push_back(alloc.getHostPtr());
3318 					}
3319 				}
3320 
3321 				if (!checkResult(internalData, datas, width, subgroupSize, false))
3322 					failedIterations++;
3323 			}
3324 
3325 			vk.resetCommandBuffer(*cmdBuffer, 0);
3326 		}
3327 
3328 		if (0 < failedIterations)
3329 		{
3330 			unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3331 
3332 			context.getTestContext().getLog()
3333 				<< TestLog::Message << valuesPassed << " / "
3334 				<< totalIterations << " values passed" << TestLog::EndMessage;
3335 
3336 			return tcu::TestStatus::fail("Failed!");
3337 		}
3338 	}
3339 
3340 	return tcu::TestStatus::pass("OK");
3341 }
3342 
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)3343 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest (Context&					context,
3344 														   vk::VkFormat				format,
3345 														   const SSBOData*			extraData,
3346 														   deUint32					extraDataCount,
3347 														   const void*				internalData,
3348 														   subgroups::CheckResult	checkResult)
3349 {
3350 	return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
3351 }
3352 
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 vertexShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3353 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize (Context&					context,
3354 																			   vk::VkFormat				format,
3355 																			   const SSBOData*			extraData,
3356 																			   deUint32					extraDataCount,
3357 																			   const void*				internalData,
3358 																			   subgroups::CheckResult	checkResult,
3359 																			   const deUint32			vertexShaderStageCreateFlags,
3360 																			   const deUint32			requiredSubgroupSize)
3361 {
3362 	const DeviceInterface&					vk						= context.getDeviceInterface();
3363 	const VkDevice							device					= context.getDevice();
3364 	const VkQueue							queue					= context.getUniversalQueue();
3365 	const deUint32							maxWidth				= getMaxWidth();
3366 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3367 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
3368 	DescriptorSetLayoutBuilder				layoutBuilder;
3369 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3370 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3371 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
3372 	const VkVertexInputBindingDescription	vertexInputBinding		=
3373 	{
3374 		0u,											// binding;
3375 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
3376 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
3377 	};
3378 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
3379 	{
3380 		0u,
3381 		0u,
3382 		VK_FORMAT_R32G32B32A32_SFLOAT,
3383 		0u
3384 	};
3385 
3386 	for (deUint32 i = 0u; i < extraDataCount; i++)
3387 	{
3388 		if (extraData[i].isImage)
3389 		{
3390 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
3391 		}
3392 		else
3393 		{
3394 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3395 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3396 		}
3397 		const Allocation& alloc = inputBuffers[i]->getAllocation();
3398 		initializeMemory(context, alloc, extraData[i]);
3399 	}
3400 
3401 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3402 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3403 
3404 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
3405 
3406 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
3407 
3408 	const deUint32 requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3409 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
3410 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3411 																						  *vertexShaderModule, *fragmentShaderModule,
3412 																						  DE_NULL, DE_NULL, DE_NULL,
3413 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3414 																						  &vertexInputBinding, &vertexInputAttribute, true, format,
3415 																						  vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3416 																						  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3417 	DescriptorPoolBuilder					poolBuilder;
3418 	DescriptorSetUpdateBuilder				updateBuilder;
3419 
3420 
3421 	for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3422 		poolBuilder.addType(inputBuffers[ndx]->getType());
3423 
3424 	Move <VkDescriptorPool>					descriptorPool;
3425 	Move <VkDescriptorSet>					descriptorSet;
3426 
3427 	if (extraDataCount > 0)
3428 	{
3429 		descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3430 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3431 	}
3432 
3433 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3434 	{
3435 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3436 		initializeMemory(context, alloc, extraData[ndx]);
3437 	}
3438 
3439 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3440 	{
3441 		if (inputBuffers[buffersNdx]->isImage())
3442 		{
3443 			VkDescriptorImageInfo info =
3444 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3445 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3446 
3447 			updateBuilder.writeSingle(*descriptorSet,
3448 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3449 										inputBuffers[buffersNdx]->getType(), &info);
3450 		}
3451 		else
3452 		{
3453 			VkDescriptorBufferInfo info =
3454 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3455 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3456 
3457 			updateBuilder.writeSingle(*descriptorSet,
3458 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3459 										inputBuffers[buffersNdx]->getType(), &info);
3460 		}
3461 	}
3462 	updateBuilder.update(vk, device);
3463 
3464 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
3465 
3466 	const deUint32							subgroupSize			= getSubgroupSize(context);
3467 
3468 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
3469 
3470 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
3471 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3472 
3473 	unsigned								totalIterations			= 0u;
3474 	unsigned								failedIterations		= 0u;
3475 
3476 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3477 
3478 	{
3479 		const Allocation&		alloc				= vertexBuffer.getAllocation();
3480 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3481 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
3482 		float					leftHandPosition	= -1.0f;
3483 
3484 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3485 		{
3486 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3487 			leftHandPosition += pixelSize;
3488 		}
3489 
3490 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3491 		flushAlloc(vk, device, alloc);
3492 	}
3493 
3494 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3495 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
3496 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
3497 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3498 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3499 	const VkDeviceSize			vertexBufferOffset	= 0u;
3500 
3501 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3502 	{
3503 		totalIterations++;
3504 
3505 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3506 		{
3507 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3508 			initializeMemory(context, alloc, extraData[ndx]);
3509 		}
3510 
3511 		beginCommandBuffer(vk, *cmdBuffer);
3512 		{
3513 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3514 
3515 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3516 
3517 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3518 
3519 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3520 
3521 			if (extraDataCount > 0)
3522 			{
3523 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3524 					&descriptorSet.get(), 0u, DE_NULL);
3525 			}
3526 
3527 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3528 
3529 			vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3530 
3531 			endRenderPass(vk, *cmdBuffer);
3532 
3533 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3534 
3535 			endCommandBuffer(vk, *cmdBuffer);
3536 
3537 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3538 		}
3539 
3540 		{
3541 			const Allocation& allocResult = imageBufferResult.getAllocation();
3542 			invalidateAlloc(vk, device, allocResult);
3543 
3544 			std::vector<const void*> datas;
3545 			datas.push_back(allocResult.getHostPtr());
3546 			if (!checkResult(internalData, datas, width, subgroupSize))
3547 				failedIterations++;
3548 		}
3549 	}
3550 
3551 	if (0 < failedIterations)
3552 	{
3553 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3554 
3555 		context.getTestContext().getLog()
3556 			<< TestLog::Message << valuesPassed << " / "
3557 			<< totalIterations << " values passed" << TestLog::EndMessage;
3558 
3559 		return tcu::TestStatus::fail("Failed!");
3560 	}
3561 
3562 	return tcu::TestStatus::pass("OK");
3563 }
3564 
makeFragmentFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult)3565 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest (Context&				context,
3566 															 VkFormat				format,
3567 															 const SSBOData*		extraDatas,
3568 															 deUint32				extraDatasCount,
3569 															 const void*			internalData,
3570 															 CheckResultFragment	checkResult)
3571 {
3572 	return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData, checkResult, 0u, 0u);
3573 }
3574 
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3575 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize (Context&				context,
3576 																				 VkFormat				format,
3577 																				 const SSBOData*		extraDatas,
3578 																				 deUint32				extraDatasCount,
3579 																				 const void*			internalData,
3580 																				 CheckResultFragment	checkResult,
3581 																				 const deUint32			fragmentShaderStageCreateFlags,
3582 																				 const deUint32			requiredSubgroupSize)
3583 {
3584 	const DeviceInterface&						vk						= context.getDeviceInterface();
3585 	const VkDevice								device					= context.getDevice();
3586 	const VkQueue								queue					= context.getUniversalQueue();
3587 	const deUint32								queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3588 	const Unique<VkShaderModule>				vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3589 	const Unique<VkShaderModule>				fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3590 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers			(extraDatasCount);
3591 
3592 	for (deUint32 i = 0; i < extraDatasCount; i++)
3593 	{
3594 		if (extraDatas[i].isImage)
3595 		{
3596 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3597 		}
3598 		else
3599 		{
3600 			const vk::VkDeviceSize	size	= getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3601 
3602 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3603 		}
3604 
3605 		const Allocation& alloc = inputBuffers[i]->getAllocation();
3606 
3607 		initializeMemory(context, alloc, extraDatas[i]);
3608 	}
3609 
3610 	DescriptorSetLayoutBuilder layoutBuilder;
3611 
3612 	for (deUint32 i = 0; i < extraDatasCount; i++)
3613 	{
3614 		layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3615 	}
3616 
3617 	const Unique<VkDescriptorSetLayout>	descriptorSetLayout(layoutBuilder.build(vk, device));
3618 	const Unique<VkPipelineLayout>		pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3619 	const Unique<VkRenderPass>			renderPass(makeRenderPass(context, format));
3620 	const deUint32						requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3621 	const Unique<VkPipeline>			pipeline(makeGraphicsPipeline(context,
3622 																	  *pipelineLayout,
3623 																	  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3624 																	  *vertexShaderModule,
3625 																	  *fragmentShaderModule,
3626 																	  DE_NULL,
3627 																	  DE_NULL,
3628 																	  DE_NULL,
3629 																	  *renderPass,
3630 																	  VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3631 																	  DE_NULL,
3632 																	  DE_NULL,
3633 																	  true,
3634 																	  VK_FORMAT_R32G32B32A32_SFLOAT,
3635 																	  0u,
3636 																	  0u,
3637 																	  0u,
3638 																	  0u,
3639 																	  fragmentShaderStageCreateFlags,
3640 																	  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3641 	DescriptorPoolBuilder				poolBuilder;
3642 
3643 	// To stop validation complaining, always add at least one type to pool.
3644 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3645 	for (deUint32 i = 0; i < extraDatasCount; i++)
3646 	{
3647 		poolBuilder.addType(inputBuffers[i]->getType());
3648 	}
3649 
3650 	Move<VkDescriptorPool> descriptorPool;
3651 	// Create descriptor set
3652 	Move<VkDescriptorSet> descriptorSet;
3653 
3654 	if (extraDatasCount > 0)
3655 	{
3656 		descriptorPool	= poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3657 
3658 		descriptorSet	= makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3659 	}
3660 
3661 	DescriptorSetUpdateBuilder updateBuilder;
3662 
3663 	for (deUint32 i = 0; i < extraDatasCount; i++)
3664 	{
3665 		if (inputBuffers[i]->isImage())
3666 		{
3667 			const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3668 
3669 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3670 		}
3671 		else
3672 		{
3673 			const VkDescriptorBufferInfo	info	= makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3674 
3675 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3676 		}
3677 	}
3678 
3679 	if (extraDatasCount > 0)
3680 		updateBuilder.update(vk, device);
3681 
3682 	const Unique<VkCommandPool>		cmdPool				(makeCommandPool(vk, device, queueFamilyIndex));
3683 	const deUint32					subgroupSize		= getSubgroupSize(context);
3684 	const Unique<VkCommandBuffer>	cmdBuffer			(makeCommandBuffer(context, *cmdPool));
3685 	unsigned						totalIterations		= 0;
3686 	unsigned						failedIterations	= 0;
3687 
3688 	for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3689 	{
3690 		for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3691 		{
3692 			totalIterations++;
3693 
3694 			// re-init the data
3695 			for (deUint32 i = 0; i < extraDatasCount; i++)
3696 			{
3697 				const Allocation& alloc = inputBuffers[i]->getAllocation();
3698 
3699 				initializeMemory(context, alloc, extraDatas[i]);
3700 			}
3701 
3702 			const VkDeviceSize			formatSize				= getFormatSizeInBytes(format);
3703 			const VkDeviceSize			resultImageSizeInBytes	= width * height * formatSize;
3704 			Image						resultImage				(context, width, height, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3705 			Buffer						resultBuffer			(context, resultImageSizeInBytes, VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3706 			const Unique<VkFramebuffer>	framebuffer				(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3707 			VkViewport					viewport				= makeViewport(width, height);
3708 			VkRect2D					scissor					= {{0, 0}, {width, height}};
3709 
3710 			beginCommandBuffer(vk, *cmdBuffer);
3711 
3712 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3713 
3714 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3715 
3716 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3717 
3718 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3719 
3720 			if (extraDatasCount > 0)
3721 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
3722 
3723 			vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3724 
3725 			endRenderPass(vk, *cmdBuffer);
3726 
3727 			copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3728 
3729 			endCommandBuffer(vk, *cmdBuffer);
3730 
3731 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3732 
3733 			std::vector<const void*> datas;
3734 			{
3735 				const Allocation& resultAlloc = resultBuffer.getAllocation();
3736 				invalidateAlloc(vk, device, resultAlloc);
3737 
3738 				// we always have our result data first
3739 				datas.push_back(resultAlloc.getHostPtr());
3740 			}
3741 
3742 			if (!checkResult(internalData, datas, width, height, subgroupSize))
3743 			{
3744 				failedIterations++;
3745 			}
3746 
3747 			vk.resetCommandBuffer(*cmdBuffer, 0);
3748 		}
3749 	}
3750 
3751 	if (0 < failedIterations)
3752 	{
3753 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3754 
3755 		context.getTestContext().getLog()
3756 			<< TestLog::Message << valuesPassed << " / "
3757 			<< totalIterations << " values passed" << TestLog::EndMessage;
3758 
3759 		return tcu::TestStatus::fail("Failed!");
3760 	}
3761 
3762 	return tcu::TestStatus::pass("OK");
3763 }
3764 
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize)3765 Move<VkPipeline> makeComputePipeline (Context&					context,
3766 									  const VkPipelineLayout	pipelineLayout,
3767 									  const VkShaderModule		shaderModule,
3768 									  const deUint32			pipelineShaderStageFlags,
3769 									  const deUint32			pipelineCreateFlags,
3770 									  VkPipeline				basePipelineHandle,
3771 									  deUint32					localSizeX,
3772 									  deUint32					localSizeY,
3773 									  deUint32					localSizeZ,
3774 									  deUint32					requiredSubgroupSize)
3775 {
3776 	const deUint32														localSize[3]				= {localSizeX, localSizeY, localSizeZ};
3777 	const vk::VkSpecializationMapEntry									entries[3]					=
3778 	{
3779 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
3780 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
3781 		{2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3782 	};
3783 	const vk::VkSpecializationInfo										info						=
3784 	{
3785 		/* mapEntryCount = */ 3,
3786 		/* pMapEntries   = */ entries,
3787 		/* dataSize      = */ sizeof(localSize),
3788 		/* pData         = */ localSize
3789 	};
3790 	const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT	subgroupSizeCreateInfo		=
3791 	{
3792 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,	// VkStructureType    sType;
3793 		DE_NULL,																		// void*              pNext;
3794 		requiredSubgroupSize															// uint32_t           requiredSubgroupSize;
3795 	};
3796 	const vk::VkPipelineShaderStageCreateInfo							pipelineShaderStageParams	=
3797 	{
3798 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,				// VkStructureType					sType;
3799 		(requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL),	// const void*						pNext;
3800 		pipelineShaderStageFlags,											// VkPipelineShaderStageCreateFlags	flags;
3801 		VK_SHADER_STAGE_COMPUTE_BIT,										// VkShaderStageFlagBits			stage;
3802 		shaderModule,														// VkShaderModule					module;
3803 		"main",																// const char*						pName;
3804 		&info,																// const VkSpecializationInfo*		pSpecializationInfo;
3805 	};
3806 	const vk::VkComputePipelineCreateInfo								pipelineCreateInfo			=
3807 	{
3808 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType	sType;
3809 		DE_NULL,										// const void*						pNext;
3810 		pipelineCreateFlags,							// VkPipelineCreateFlags			flags;
3811 		pipelineShaderStageParams,						// VkPipelineShaderStageCreateInfo	stage;
3812 		pipelineLayout,									// VkPipelineLayout					layout;
3813 		basePipelineHandle,								// VkPipeline						basePipelineHandle;
3814 		-1,												// deInt32							basePipelineIndex;
3815 	};
3816 
3817 	return createComputePipeline(context.getDeviceInterface(), context.getDevice(), DE_NULL, &pipelineCreateInfo);
3818 }
3819 
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)3820 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize (Context&			context,
3821 																	 VkFormat			format,
3822 																	 const SSBOData*	inputs,
3823 																	 deUint32			inputsCount,
3824 																	 const void*		internalData,
3825 																	 CheckResultCompute	checkResult,
3826 																	 const deUint32		pipelineShaderStageCreateFlags,
3827 																	 const deUint32		numWorkgroups[3],
3828 																	 const deBool		isRequiredSubgroupSize,
3829 																	 const deUint32		subgroupSize,
3830 																	 const deUint32		localSizesToTest[][3],
3831 																	 const deUint32		localSizesToTestCount)
3832 {
3833 	const DeviceInterface&									vk								= context.getDeviceInterface();
3834 	const VkDevice											device							= context.getDevice();
3835 	const VkQueue											queue							= context.getUniversalQueue();
3836 	const deUint32											queueFamilyIndex				= context.getUniversalQueueFamilyIndex();
3837 	const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties	= context.getSubgroupSizeControlPropertiesEXT();
3838 	const VkDeviceSize										elementSize						= getFormatSizeInBytes(format);
3839 	const VkDeviceSize										maxSubgroupSize					= isRequiredSubgroupSize
3840 																							? deMax32(subgroupSizeControlProperties.maxSubgroupSize, maxSupportedSubgroupSize())
3841 																							: maxSupportedSubgroupSize();
3842 	const VkDeviceSize										resultBufferSize				= maxSubgroupSize * maxSubgroupSize * maxSubgroupSize;
3843 	const VkDeviceSize										resultBufferSizeInBytes			= resultBufferSize * elementSize;
3844 	Buffer													resultBuffer					(context, resultBufferSizeInBytes);
3845 	std::vector< de::SharedPtr<BufferOrImage> >				inputBuffers					(inputsCount);
3846 
3847 	for (deUint32 i = 0; i < inputsCount; i++)
3848 	{
3849 		if (inputs[i].isImage)
3850 		{
3851 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
3852 		}
3853 		else
3854 		{
3855 			const vk::VkDeviceSize size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3856 
3857 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3858 		}
3859 
3860 		const Allocation& alloc = inputBuffers[i]->getAllocation();
3861 
3862 		initializeMemory(context, alloc, inputs[i]);
3863 	}
3864 
3865 	DescriptorSetLayoutBuilder layoutBuilder;
3866 	layoutBuilder.addBinding(
3867 		resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3868 
3869 	for (deUint32 i = 0; i < inputsCount; i++)
3870 	{
3871 		layoutBuilder.addBinding(
3872 			inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3873 	}
3874 
3875 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3876 		layoutBuilder.build(vk, device));
3877 
3878 	const Unique<VkShaderModule> shaderModule(
3879 		createShaderModule(vk, device,
3880 						   context.getBinaryCollection().get("comp"), 0u));
3881 	const Unique<VkPipelineLayout> pipelineLayout(
3882 		makePipelineLayout(vk, device, *descriptorSetLayout));
3883 
3884 	DescriptorPoolBuilder poolBuilder;
3885 
3886 	poolBuilder.addType(resultBuffer.getType());
3887 
3888 	for (deUint32 i = 0; i < inputsCount; i++)
3889 	{
3890 		poolBuilder.addType(inputBuffers[i]->getType());
3891 	}
3892 
3893 	const Unique<VkDescriptorPool>	descriptorPool			(poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3894 	const Unique<VkDescriptorSet>	descriptorSet			(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
3895 	const VkDescriptorBufferInfo	resultDescriptorInfo =	makeDescriptorBufferInfo(resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
3896 	DescriptorSetUpdateBuilder		updateBuilder;
3897 
3898 	updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
3899 
3900 	for (deUint32 i = 0; i < inputsCount; i++)
3901 	{
3902 		if (inputBuffers[i]->isImage())
3903 		{
3904 			const VkDescriptorImageInfo		info	= makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3905 
3906 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
3907 		}
3908 		else
3909 		{
3910 			vk::VkDeviceSize		size	= getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3911 			VkDescriptorBufferInfo	info	= makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
3912 
3913 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
3914 		}
3915 	}
3916 
3917 	updateBuilder.update(vk, device);
3918 
3919 	const Unique<VkCommandPool>						cmdPool				(makeCommandPool(vk, device, queueFamilyIndex));
3920 	unsigned										totalIterations		= 0;
3921 	unsigned										failedIterations	= 0;
3922 	const Unique<VkCommandBuffer>					cmdBuffer			(makeCommandBuffer(context, *cmdPool));
3923 	std::vector<de::SharedPtr<Move<VkPipeline>>>	pipelines			(localSizesToTestCount);
3924 
3925 	context.getTestContext().touchWatchdog();
3926 	{
3927 		pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
3928 																								*pipelineLayout,
3929 																								*shaderModule,
3930 																								pipelineShaderStageCreateFlags,
3931 																								VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
3932 																								(VkPipeline) DE_NULL,
3933 																								localSizesToTest[0][0],
3934 																								localSizesToTest[0][1],
3935 																								localSizesToTest[0][2],
3936 																								isRequiredSubgroupSize ? subgroupSize : 0u)));
3937 	}
3938 	context.getTestContext().touchWatchdog();
3939 
3940 	for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
3941 	{
3942 		const deUint32 nextX = localSizesToTest[index][0];
3943 		const deUint32 nextY = localSizesToTest[index][1];
3944 		const deUint32 nextZ = localSizesToTest[index][2];
3945 
3946 		context.getTestContext().touchWatchdog();
3947 		{
3948 			pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
3949 																										*pipelineLayout,
3950 																										*shaderModule,
3951 																										pipelineShaderStageCreateFlags,
3952 																										VK_PIPELINE_CREATE_DERIVATIVE_BIT,
3953 																										**pipelines[0],
3954 																										nextX,
3955 																										nextY,
3956 																										nextZ,
3957 																										isRequiredSubgroupSize ? subgroupSize : 0u)));
3958 		}
3959 		context.getTestContext().touchWatchdog();
3960 	}
3961 
3962 	for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
3963 	{
3964 		// we are running one test
3965 		totalIterations++;
3966 
3967 		beginCommandBuffer(vk, *cmdBuffer);
3968 		{
3969 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, **pipelines[index]);
3970 
3971 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
3972 
3973 			vk.cmdDispatch(*cmdBuffer,numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
3974 		}
3975 		endCommandBuffer(vk, *cmdBuffer);
3976 
3977 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3978 
3979 		std::vector<const void*> datas;
3980 
3981 		{
3982 			const Allocation& resultAlloc = resultBuffer.getAllocation();
3983 			invalidateAlloc(vk, device, resultAlloc);
3984 
3985 			// we always have our result data first
3986 			datas.push_back(resultAlloc.getHostPtr());
3987 		}
3988 
3989 		for (deUint32 i = 0; i < inputsCount; i++)
3990 		{
3991 			if (!inputBuffers[i]->isImage())
3992 			{
3993 				const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
3994 				invalidateAlloc(vk, device, resultAlloc);
3995 
3996 				// we always have our result data first
3997 				datas.push_back(resultAlloc.getHostPtr());
3998 			}
3999 		}
4000 
4001 		if (!checkResult(internalData, datas, numWorkgroups, localSizesToTest[index], subgroupSize))
4002 		{
4003 			failedIterations++;
4004 		}
4005 
4006 		vk.resetCommandBuffer(*cmdBuffer, 0);
4007 	}
4008 
4009 	if (0 < failedIterations)
4010 	{
4011 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4012 
4013 		context.getTestContext().getLog()
4014 			<< TestLog::Message << valuesPassed << " / "
4015 			<< totalIterations << " values passed" << TestLog::EndMessage;
4016 
4017 		return tcu::TestStatus::fail("Failed!");
4018 	}
4019 
4020 	return tcu::TestStatus::pass("OK");
4021 }
4022 
makeComputeTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4023 tcu::TestStatus vkt::subgroups::makeComputeTest (Context&				context,
4024 												 VkFormat				format,
4025 												 const SSBOData*		inputs,
4026 												 deUint32				inputsCount,
4027 												 const void*			internalData,
4028 												 CheckResultCompute		checkResult,
4029 												 deUint32				requiredSubgroupSize,
4030 												 const deUint32			pipelineShaderStageCreateFlags)
4031 {
4032 	const deUint32 numWorkgroups[3] = {4, 2, 2};
4033 	deUint32 subgroupSize = requiredSubgroupSize;
4034 
4035 	if(requiredSubgroupSize == 0)
4036 		subgroupSize = vkt::subgroups::getSubgroupSize(context);
4037 
4038 	const deUint32 localSizesToTestCount = 8;
4039 	deUint32 localSizesToTest[localSizesToTestCount][3] =
4040 	{
4041 		{1, 1, 1},
4042 		{subgroupSize, 1, 1},
4043 		{1, subgroupSize, 1},
4044 		{1, 1, subgroupSize},
4045 		{32, 4, 1},
4046 		{1, 4, 32},
4047 		{3, 5, 7},
4048 		{1, 1, 1} // Isn't used, just here to make double buffering checks easier
4049 	};
4050 
4051 	return makeComputeTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4052 											   numWorkgroups, requiredSubgroupSize != 0u, subgroupSize, localSizesToTest, localSizesToTestCount);
4053 }
4054 
checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)4055 static inline void checkShaderStageSetValidity (const VkShaderStageFlags shaderStages)
4056 {
4057 	if (shaderStages == 0)
4058 		TCU_THROW(InternalError, "Shader stage is not specified");
4059 
4060 	// It can actually be only 1 or 0.
4061 	const deUint32 exclusivePipelinesCount	= (isAllComputeStages(shaderStages) ? 1 :0)
4062 											+ (isAllGraphicsStages(shaderStages) ? 1 :0)
4063 											+ (isAllRayTracingStages(shaderStages) ? 1 :0);
4064 
4065 	if (exclusivePipelinesCount != 1)
4066 		TCU_THROW(InternalError, "Mix of shaders from different pipelines is detected");
4067 }
4068 
supportedCheckShader(Context & context,const VkShaderStageFlags shaderStages)4069 void vkt::subgroups::supportedCheckShader (Context& context, const VkShaderStageFlags shaderStages)
4070 {
4071 	checkShaderStageSetValidity(shaderStages);
4072 
4073 	if ((context.getSubgroupProperties().supportedStages & shaderStages) == 0)
4074 	{
4075 		if (isAllComputeStages(shaderStages))
4076 			TCU_FAIL("Compute shader is required to support subgroup operations");
4077 		else
4078 			TCU_THROW(NotSupportedError, "Subgroup support is not available for test shader stage(s)");
4079 	}
4080 
4081 	if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & shaderStages &&
4082 		context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4083 		!context.getPortabilitySubsetFeatures().tessellationIsolines)
4084 	{
4085 		TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4086 	}
4087 }
4088 
4089 
4090 namespace vkt
4091 {
4092 namespace subgroups
4093 {
4094 typedef std::vector< de::SharedPtr<BufferOrImage> > vectorBufferOrImage;
4095 
4096 enum ShaderGroups
4097 {
4098 	FIRST_GROUP		= 0,
4099 	RAYGEN_GROUP	= FIRST_GROUP,
4100 	MISS_GROUP,
4101 	HIT_GROUP,
4102 	CALL_GROUP,
4103 	GROUP_COUNT
4104 };
4105 
getAllRayTracingFormats()4106 const std::vector<vk::VkFormat> getAllRayTracingFormats()
4107 {
4108 	std::vector<VkFormat> formats;
4109 
4110 	formats.push_back(VK_FORMAT_R8G8B8_SINT);
4111 	formats.push_back(VK_FORMAT_R8_UINT);
4112 	formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
4113 	formats.push_back(VK_FORMAT_R16G16B16_SINT);
4114 	formats.push_back(VK_FORMAT_R16_UINT);
4115 	formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
4116 	formats.push_back(VK_FORMAT_R32G32B32_SINT);
4117 	formats.push_back(VK_FORMAT_R32_UINT);
4118 	formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
4119 	formats.push_back(VK_FORMAT_R64G64B64_SINT);
4120 	formats.push_back(VK_FORMAT_R64_UINT);
4121 	formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
4122 	formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
4123 	formats.push_back(VK_FORMAT_R32_SFLOAT);
4124 	formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
4125 	formats.push_back(VK_FORMAT_R64_SFLOAT);
4126 	formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
4127 	formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
4128 	formats.push_back(VK_FORMAT_R8_USCALED);
4129 	formats.push_back(VK_FORMAT_R8G8_USCALED);
4130 	formats.push_back(VK_FORMAT_R8G8B8_USCALED);
4131 	formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
4132 
4133 	return formats;
4134 }
4135 
addRayTracingNoSubgroupShader(SourceCollections & programCollection)4136 void addRayTracingNoSubgroupShader (SourceCollections& programCollection)
4137 {
4138 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
4139 
4140 	const std::string rgenShaderNoSubgroups =
4141 		"#version 460 core\n"
4142 		"#extension GL_EXT_ray_tracing: require\n"
4143 		"layout(location = 0) rayPayloadEXT uvec4 payload;\n"
4144 		"layout(location = 0) callableDataEXT uvec4 callData;"
4145 		"layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
4146 		"\n"
4147 		"void main()\n"
4148 		"{\n"
4149 		"  uint  rayFlags   = 0;\n"
4150 		"  uint  cullMask   = 0xFF;\n"
4151 		"  float tmin       = 0.0;\n"
4152 		"  float tmax       = 9.0;\n"
4153 		"  vec3  origin     = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
4154 		"  vec3  directHit  = vec3(0.0, 0.0, -1.0);\n"
4155 		"  vec3  directMiss = vec3(0.0, 0.0, +1.0);\n"
4156 		"\n"
4157 		"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
4158 		"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
4159 		"  executeCallableEXT(0, 0);"
4160 		"}\n";
4161 	const std::string hitShaderNoSubgroups =
4162 		"#version 460 core\n"
4163 		"#extension GL_EXT_ray_tracing: require\n"
4164 		"hitAttributeEXT vec3 attribs;\n"
4165 		"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4166 		"\n"
4167 		"void main()\n"
4168 		"{\n"
4169 		"}\n";
4170 	const std::string missShaderNoSubgroups =
4171 		"#version 460 core\n"
4172 		"#extension GL_EXT_ray_tracing: require\n"
4173 		"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4174 		"\n"
4175 		"void main()\n"
4176 		"{\n"
4177 		"}\n";
4178 	const std::string sectShaderNoSubgroups =
4179 		"#version 460 core\n"
4180 		"#extension GL_EXT_ray_tracing: require\n"
4181 		"hitAttributeEXT vec3 hitAttribute;\n"
4182 		"\n"
4183 		"void main()\n"
4184 		"{\n"
4185 		"  reportIntersectionEXT(0.75f, gl_HitKindFrontFacingTriangleEXT);\n"
4186 		"}\n";
4187 	const std::string callShaderNoSubgroups =
4188 		"#version 460 core\n"
4189 		"#extension GL_EXT_ray_tracing: require\n"
4190 		"layout(location = 0) callableDataInEXT float callData;\n"
4191 		"\n"
4192 		"void main()\n"
4193 		"{\n"
4194 		"}\n";
4195 
4196 	programCollection.glslSources.add("rgen_noSubgroup") << glu::RaygenSource		(rgenShaderNoSubgroups) << buildOptions;
4197 	programCollection.glslSources.add("ahit_noSubgroup") << glu::AnyHitSource		(hitShaderNoSubgroups)  << buildOptions;
4198 	programCollection.glslSources.add("chit_noSubgroup") << glu::ClosestHitSource	(hitShaderNoSubgroups)  << buildOptions;
4199 	programCollection.glslSources.add("miss_noSubgroup") << glu::MissSource			(missShaderNoSubgroups) << buildOptions;
4200 	programCollection.glslSources.add("sect_noSubgroup") << glu::IntersectionSource	(sectShaderNoSubgroups) << buildOptions;
4201 	programCollection.glslSources.add("call_noSubgroup") << glu::CallableSource		(callShaderNoSubgroups) << buildOptions;
4202 }
4203 
enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)4204 static vector<VkShaderStageFlagBits> enumerateRayTracingShaderStages (const VkShaderStageFlags	shaderStage)
4205 {
4206 	vector<VkShaderStageFlagBits>	result;
4207 	const VkShaderStageFlagBits		shaderStageFlags[]	=
4208 	{
4209 		VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4210 		VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4211 		VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4212 		VK_SHADER_STAGE_MISS_BIT_KHR,
4213 		VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4214 		VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4215 	};
4216 
4217 	for (auto shaderStageFlag: shaderStageFlags)
4218 	{
4219 		if (0 != (shaderStage & shaderStageFlag))
4220 			result.push_back(shaderStageFlag);
4221 	}
4222 
4223 	return result;
4224 }
4225 
getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)4226 static deUint32 getRayTracingResultBinding (const VkShaderStageFlagBits shaderStage)
4227 {
4228 	const VkShaderStageFlags	shaderStageFlags[]	=
4229 	{
4230 		VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4231 		VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4232 		VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4233 		VK_SHADER_STAGE_MISS_BIT_KHR,
4234 		VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4235 		VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4236 	};
4237 
4238 	for (deUint32 shaderStageNdx = 0; shaderStageNdx < DE_LENGTH_OF_ARRAY(shaderStageFlags); ++shaderStageNdx)
4239 	{
4240 		if (0 != (shaderStage & shaderStageFlags[shaderStageNdx]))
4241 		{
4242 			DE_ASSERT(0 == (shaderStage & (~shaderStageFlags[shaderStageNdx])));
4243 
4244 			return shaderStageNdx;
4245 		}
4246 	}
4247 
4248 	TCU_THROW(InternalError, "Non-raytracing stage specified or no stage at all");
4249 }
4250 
makeRayTracingInputBuffers(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector)4251 static vectorBufferOrImage makeRayTracingInputBuffers (Context&								context,
4252 													   VkFormat								format,
4253 													   const SSBOData*						extraDatas,
4254 													   deUint32								extraDatasCount,
4255 													   const vector<VkShaderStageFlagBits>&	stagesVector)
4256 {
4257 	const size_t		stagesCount		= stagesVector.size();
4258 	const VkDeviceSize	shaderSize		= getMaxWidth();
4259 	const VkDeviceSize	inputBufferSize	= getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
4260 	vectorBufferOrImage	inputBuffers	(stagesCount + extraDatasCount);
4261 
4262 	// The implicit result SSBO we use to store our outputs from the shader
4263 	for (size_t stageNdx = 0u; stageNdx < stagesCount; ++stageNdx)
4264 		inputBuffers[stageNdx]	= de::SharedPtr<BufferOrImage>(new Buffer(context, inputBufferSize));
4265 
4266 	for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4267 	{
4268 		const size_t	datasNdx	= stageNdx - stagesCount;
4269 
4270 		if (extraDatas[datasNdx].isImage)
4271 		{
4272 			inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
4273 		}
4274 		else
4275 		{
4276 			const VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
4277 
4278 			inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
4279 		}
4280 
4281 		initializeMemory(context, inputBuffers[stageNdx]->getAllocation(), extraDatas[datasNdx]);
4282 	}
4283 
4284 	return inputBuffers;
4285 }
4286 
makeRayTracingDescriptorSetLayout(Context & context,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4287 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayout (Context&								context,
4288 																	  const SSBOData*						extraDatas,
4289 																	  deUint32								extraDatasCount,
4290 																	  const vector<VkShaderStageFlagBits>&	stagesVector,
4291 																	  const vectorBufferOrImage&			inputBuffers)
4292 {
4293 	const DeviceInterface&		vkd				= context.getDeviceInterface();
4294 	const VkDevice				device			= context.getDevice();
4295 	const size_t				stagesCount		= stagesVector.size();
4296 	DescriptorSetLayoutBuilder	layoutBuilder;
4297 
4298 	// The implicit result SSBO we use to store our outputs from the shader
4299 	for (size_t stageNdx = 0u; stageNdx < stagesVector.size(); ++stageNdx)
4300 	{
4301 		const deUint32	stageBinding	= getRayTracingResultBinding(stagesVector[stageNdx]);
4302 
4303 		layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, stagesVector[stageNdx], stageBinding, DE_NULL);
4304 	}
4305 
4306 	for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4307 	{
4308 		const size_t datasNdx = stageNdx - stagesCount;
4309 
4310 		layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
4311 	}
4312 
4313 	return layoutBuilder.build(vkd, device);
4314 }
4315 
makeRayTracingDescriptorSetLayoutAS(Context & context)4316 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayoutAS (Context&	context)
4317 {
4318 	const DeviceInterface&		vkd				= context.getDeviceInterface();
4319 	const VkDevice				device			= context.getDevice();
4320 	DescriptorSetLayoutBuilder	layoutBuilder;
4321 
4322 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
4323 
4324 	return layoutBuilder.build(vkd, device);
4325 }
4326 
makeRayTracingDescriptorPool(Context & context,const vectorBufferOrImage & inputBuffers)4327 static Move<VkDescriptorPool> makeRayTracingDescriptorPool (Context&						context,
4328 															const vectorBufferOrImage&		inputBuffers)
4329 {
4330 	const DeviceInterface&	vkd					= context.getDeviceInterface();
4331 	const VkDevice			device				= context.getDevice();
4332 	const deUint32			maxDescriptorSets	= 2u;
4333 	DescriptorPoolBuilder	poolBuilder;
4334 	Move<VkDescriptorPool>	result;
4335 
4336 	if (inputBuffers.size() > 0)
4337 	{
4338 		for (size_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
4339 			poolBuilder.addType(inputBuffers[ndx]->getType());
4340 	}
4341 
4342 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
4343 
4344 	result = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, maxDescriptorSets);
4345 
4346 	return result;
4347 }
4348 
makeRayTracingDescriptorSet(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4349 static Move<VkDescriptorSet> makeRayTracingDescriptorSet (Context&								context,
4350 														  VkDescriptorPool						descriptorPool,
4351 														  VkDescriptorSetLayout					descriptorSetLayout,
4352 														  const SSBOData*						extraDatas,
4353 														  deUint32								extraDatasCount,
4354 														  const vector<VkShaderStageFlagBits>&	stagesVector,
4355 														  const vectorBufferOrImage&			inputBuffers)
4356 {
4357 	const DeviceInterface&	vkd				= context.getDeviceInterface();
4358 	const VkDevice			device			= context.getDevice();
4359 	const size_t			stagesCount		= stagesVector.size();
4360 	Move<VkDescriptorSet>	descriptorSet;
4361 
4362 	if (inputBuffers.size() > 0)
4363 	{
4364 		DescriptorSetUpdateBuilder updateBuilder;
4365 
4366 		// Create descriptor set
4367 		descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4368 
4369 		for (size_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
4370 		{
4371 			const deUint32	binding	= (ndx < stagesCount)
4372 									? getRayTracingResultBinding(stagesVector[ndx])
4373 									: extraDatas[ndx - stagesCount].binding;
4374 
4375 			if (inputBuffers[ndx]->isImage())
4376 			{
4377 				const VkDescriptorImageInfo		info	= makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(), inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4378 
4379 				updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4380 			}
4381 			else
4382 			{
4383 				const VkDescriptorBufferInfo	info	= makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
4384 
4385 				updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4386 			}
4387 		}
4388 
4389 		updateBuilder.update(vkd, device);
4390 	}
4391 
4392 	return descriptorSet;
4393 }
4394 
makeRayTracingDescriptorSetAS(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,de::MovePtr<TopLevelAccelerationStructure> & topLevelAccelerationStructure)4395 static Move<VkDescriptorSet> makeRayTracingDescriptorSetAS (Context&									context,
4396 															VkDescriptorPool							descriptorPool,
4397 															VkDescriptorSetLayout						descriptorSetLayout,
4398 															de::MovePtr<TopLevelAccelerationStructure>&	topLevelAccelerationStructure)
4399 {
4400 	const DeviceInterface&								vkd										= context.getDeviceInterface();
4401 	const VkDevice										device									= context.getDevice();
4402 	const TopLevelAccelerationStructure*				topLevelAccelerationStructurePtr		= topLevelAccelerationStructure.get();
4403 	const VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
4404 	{
4405 		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
4406 		DE_NULL,															//  const void*							pNext;
4407 		1u,																	//  deUint32							accelerationStructureCount;
4408 		topLevelAccelerationStructurePtr->getPtr(),							//  const VkAccelerationStructureKHR*	pAccelerationStructures;
4409 	};
4410 	Move<VkDescriptorSet>								descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4411 
4412 	DescriptorSetUpdateBuilder()
4413 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
4414 		.update(vkd, device);
4415 
4416 	return descriptorSet;
4417 }
4418 
makeRayTracingPipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout0,const VkDescriptorSetLayout descriptorSetLayout1)4419 static Move<VkPipelineLayout> makeRayTracingPipelineLayout (Context&					context,
4420 															const VkDescriptorSetLayout	descriptorSetLayout0,
4421 															const VkDescriptorSetLayout	descriptorSetLayout1)
4422 {
4423 	const DeviceInterface&						vkd							= context.getDeviceInterface();
4424 	const VkDevice								device						= context.getDevice();
4425 	const std::vector<VkDescriptorSetLayout>	descriptorSetLayouts		{ descriptorSetLayout0, descriptorSetLayout1 };
4426 	const deUint32								descriptorSetLayoutsSize	= static_cast<deUint32>(descriptorSetLayouts.size());
4427 
4428 	return makePipelineLayout(vkd, device, descriptorSetLayoutsSize, descriptorSetLayouts.data());
4429 }
4430 
createTopAccelerationStructure(Context & context,de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)4431 static de::MovePtr<TopLevelAccelerationStructure> createTopAccelerationStructure (Context&											context,
4432 																				  de::SharedPtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure)
4433 {
4434 	const DeviceInterface&						vkd			= context.getDeviceInterface();
4435 	const VkDevice								device		= context.getDevice();
4436 	Allocator&									allocator	= context.getDefaultAllocator();
4437 	de::MovePtr<TopLevelAccelerationStructure>	result		= makeTopLevelAccelerationStructure();
4438 
4439 	result->setInstanceCount(1);
4440 	result->addInstance(bottomLevelAccelerationStructure);
4441 	result->create(vkd, device, allocator);
4442 
4443 	return result;
4444 }
4445 
createBottomAccelerationStructure(Context & context)4446 static de::SharedPtr<BottomLevelAccelerationStructure> createBottomAccelerationStructure (Context&	context)
4447 {
4448 	const DeviceInterface&							vkd				= context.getDeviceInterface();
4449 	const VkDevice									device			= context.getDevice();
4450 	Allocator&										allocator		= context.getDefaultAllocator();
4451 	de::MovePtr<BottomLevelAccelerationStructure>	result			= makeBottomLevelAccelerationStructure();
4452 	const std::vector<tcu::Vec3>					geometryData	{ tcu::Vec3(-1.0f, -1.0f, -2.0f), tcu::Vec3(+1.0f, +1.0f, -1.0f) };
4453 
4454 	result->setGeometryCount(1u);
4455 	result->addGeometry(geometryData, false);
4456 	result->create(vkd, device, allocator, 0u);
4457 
4458 	return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
4459 }
4460 
makeRayTracingPipeline(Context & context,const VkShaderStageFlags shaderStageTested,const VkPipelineLayout pipelineLayout,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6],Move<VkPipeline> & pipelineOut)4461 static de::MovePtr<RayTracingPipeline> makeRayTracingPipeline (Context&					context,
4462 															   const VkShaderStageFlags	shaderStageTested,
4463 															   const VkPipelineLayout	pipelineLayout,
4464 															   const deUint32			shaderStageCreateFlags[6],
4465 															   const deUint32			requiredSubgroupSize[6],
4466 															   Move<VkPipeline>&		pipelineOut)
4467 {
4468 	const DeviceInterface&											vkd									= context.getDeviceInterface();
4469 	const VkDevice													device								= context.getDevice();
4470 	BinaryCollection&												collection							= context.getBinaryCollection();
4471 	const char*														shaderRgenName						= (0 != (shaderStageTested & VK_SHADER_STAGE_RAYGEN_BIT_KHR))			? "rgen" : "rgen_noSubgroup";
4472 	const char*														shaderAhitName						= (0 != (shaderStageTested & VK_SHADER_STAGE_ANY_HIT_BIT_KHR))			? "ahit" : "ahit_noSubgroup";
4473 	const char*														shaderChitName						= (0 != (shaderStageTested & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR))		? "chit" : "chit_noSubgroup";
4474 	const char*														shaderMissName						= (0 != (shaderStageTested & VK_SHADER_STAGE_MISS_BIT_KHR))				? "miss" : "miss_noSubgroup";
4475 	const char*														shaderSectName						= (0 != (shaderStageTested & VK_SHADER_STAGE_INTERSECTION_BIT_KHR))		? "sect" : "sect_noSubgroup";
4476 	const char*														shaderCallName						= (0 != (shaderStageTested & VK_SHADER_STAGE_CALLABLE_BIT_KHR))			? "call" : "call_noSubgroup";
4477 	const VkShaderModuleCreateFlags									noShaderModuleCreateFlags			= static_cast<VkShaderModuleCreateFlags>(0);
4478 	Move<VkShaderModule>											rgenShaderModule					= createShaderModule(vkd, device, collection.get(shaderRgenName), noShaderModuleCreateFlags);
4479 	Move<VkShaderModule>											ahitShaderModule					= createShaderModule(vkd, device, collection.get(shaderAhitName), noShaderModuleCreateFlags);
4480 	Move<VkShaderModule>											chitShaderModule					= createShaderModule(vkd, device, collection.get(shaderChitName), noShaderModuleCreateFlags);
4481 	Move<VkShaderModule>											missShaderModule					= createShaderModule(vkd, device, collection.get(shaderMissName), noShaderModuleCreateFlags);
4482 	Move<VkShaderModule>											sectShaderModule					= createShaderModule(vkd, device, collection.get(shaderSectName), noShaderModuleCreateFlags);
4483 	Move<VkShaderModule>											callShaderModule					= createShaderModule(vkd, device, collection.get(shaderCallName), noShaderModuleCreateFlags);
4484 	const VkPipelineShaderStageCreateFlags							noPipelineShaderStageCreateFlags	= static_cast<VkPipelineShaderStageCreateFlags>(0);
4485 	const VkPipelineShaderStageCreateFlags							rgenPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[0];
4486 	const VkPipelineShaderStageCreateFlags							ahitPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[1];
4487 	const VkPipelineShaderStageCreateFlags							chitPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[2];
4488 	const VkPipelineShaderStageCreateFlags							missPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[3];
4489 	const VkPipelineShaderStageCreateFlags							sectPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[4];
4490 	const VkPipelineShaderStageCreateFlags							callPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[5];
4491 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT	requiredSubgroupSizeCreateInfo[6]	=
4492 	{
4493 		{
4494 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4495 			DE_NULL,
4496 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
4497 		},
4498 		{
4499 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4500 			DE_NULL,
4501 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
4502 		},
4503 		{
4504 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4505 			DE_NULL,
4506 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
4507 		},
4508 		{
4509 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4510 			DE_NULL,
4511 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
4512 		},
4513 		{
4514 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4515 			DE_NULL,
4516 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
4517 		},
4518 		{
4519 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4520 			DE_NULL,
4521 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[5] : 0u,
4522 		},
4523 	};
4524 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	rgenRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[0];
4525 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	ahitRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[1];
4526 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	chitRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[2];
4527 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	missRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[3];
4528 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	sectRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[4];
4529 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	callRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[5].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[5];
4530 	de::MovePtr<RayTracingPipeline>									rayTracingPipeline					= de::newMovePtr<RayTracingPipeline>();
4531 
4532 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR		, rgenShaderModule, RAYGEN_GROUP,	DE_NULL, rgenPipelineShaderStageCreateFlags, rgenRequiredSubgroupSizeCreateInfo);
4533 	rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR		, ahitShaderModule, HIT_GROUP,		DE_NULL, ahitPipelineShaderStageCreateFlags, ahitRequiredSubgroupSizeCreateInfo);
4534 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR	, chitShaderModule, HIT_GROUP,		DE_NULL, chitPipelineShaderStageCreateFlags, chitRequiredSubgroupSizeCreateInfo);
4535 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR			, missShaderModule, MISS_GROUP,		DE_NULL, missPipelineShaderStageCreateFlags, missRequiredSubgroupSizeCreateInfo);
4536 	rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR	, sectShaderModule, HIT_GROUP,		DE_NULL, sectPipelineShaderStageCreateFlags, sectRequiredSubgroupSizeCreateInfo);
4537 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR		, callShaderModule, CALL_GROUP,		DE_NULL, callPipelineShaderStageCreateFlags, callRequiredSubgroupSizeCreateInfo);
4538 
4539 	// Must execute createPipeline here, due to pNext pointers in calls to addShader are local
4540 	pipelineOut	= rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
4541 
4542 	return rayTracingPipeline;
4543 }
4544 
getPossibleRayTracingSubgroupStages(Context & context,const VkShaderStageFlags testedStages)4545 VkShaderStageFlags getPossibleRayTracingSubgroupStages (Context& context, const VkShaderStageFlags testedStages)
4546 {
4547 	const VkPhysicalDeviceSubgroupProperties&	subgroupProperties	= context.getSubgroupProperties();
4548 	const VkShaderStageFlags					stages				= testedStages & subgroupProperties.supportedStages;
4549 
4550 	DE_ASSERT(isAllRayTracingStages(testedStages));
4551 
4552 	return stages;
4553 }
4554 
allRayTracingStages(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStage)4555 tcu::TestStatus allRayTracingStages (Context&						context,
4556 									 VkFormat						format,
4557 									 const SSBOData*				extraDatas,
4558 									 deUint32						extraDataCount,
4559 									 const void*					internalData,
4560 									 const VerificationFunctor&		checkResult,
4561 									 const VkShaderStageFlags		shaderStage)
4562 {
4563 	return vkt::subgroups::allRayTracingStagesRequiredSubgroupSize(context,
4564 																   format,
4565 																   extraDatas,
4566 																   extraDataCount,
4567 																   internalData,
4568 																   checkResult,
4569 																   shaderStage,
4570 																   DE_NULL,
4571 																   DE_NULL);
4572 }
4573 
allRayTracingStagesRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6])4574 tcu::TestStatus allRayTracingStagesRequiredSubgroupSize (Context&					context,
4575 														 VkFormat					format,
4576 														 const SSBOData*			extraDatas,
4577 														 deUint32					extraDatasCount,
4578 														 const void*				internalData,
4579 														 const VerificationFunctor&	checkResult,
4580 														 const VkShaderStageFlags	shaderStageTested,
4581 														 const deUint32				shaderStageCreateFlags[6],
4582 														 const deUint32				requiredSubgroupSize[6])
4583 {
4584 	const DeviceInterface&							vkd									= context.getDeviceInterface();
4585 	const VkDevice									device								= context.getDevice();
4586 	const VkQueue									queue								= context.getUniversalQueue();
4587 	const deUint32									queueFamilyIndex					= context.getUniversalQueueFamilyIndex();
4588 	Allocator&										allocator							= context.getDefaultAllocator();
4589 	const deUint32									subgroupSize						= getSubgroupSize(context);
4590 	const deUint32									maxWidth							= getMaxWidth();
4591 	const vector<VkShaderStageFlagBits>				stagesVector						= enumerateRayTracingShaderStages(shaderStageTested);
4592 	const deUint32									stagesCount							= static_cast<deUint32>(stagesVector.size());
4593 	de::SharedPtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure	= createBottomAccelerationStructure(context);
4594 	de::MovePtr<TopLevelAccelerationStructure>		topLevelAccelerationStructure		= createTopAccelerationStructure(context, bottomLevelAccelerationStructure);
4595 	vectorBufferOrImage								inputBuffers						= makeRayTracingInputBuffers(context, format, extraDatas, extraDatasCount, stagesVector);
4596 	const Move<VkDescriptorSetLayout>				descriptorSetLayout					= makeRayTracingDescriptorSetLayout(context, extraDatas, extraDatasCount, stagesVector, inputBuffers);
4597 	const Move<VkDescriptorSetLayout>				descriptorSetLayoutAS				= makeRayTracingDescriptorSetLayoutAS(context);
4598 	const Move<VkPipelineLayout>					pipelineLayout						= makeRayTracingPipelineLayout(context, *descriptorSetLayout, *descriptorSetLayoutAS);
4599 	Move<VkPipeline>								pipeline							= Move<VkPipeline>();
4600 	const de::MovePtr<RayTracingPipeline>			rayTracingPipeline					= makeRayTracingPipeline(context, shaderStageTested, *pipelineLayout, shaderStageCreateFlags, requiredSubgroupSize, pipeline);
4601 	const deUint32									shaderGroupHandleSize				= context.getRayTracingPipelineProperties().shaderGroupHandleSize;
4602 	const deUint32									shaderGroupBaseAlignment			= context.getRayTracingPipelineProperties().shaderGroupBaseAlignment;
4603 	de::MovePtr<BufferWithMemory>					rgenShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, RAYGEN_GROUP, 1u);
4604 	de::MovePtr<BufferWithMemory>					missShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, MISS_GROUP,   1u);
4605 	de::MovePtr<BufferWithMemory>					hitsShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, HIT_GROUP,    1u);
4606 	de::MovePtr<BufferWithMemory>					callShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, CALL_GROUP,   1u);
4607 	const VkStridedDeviceAddressRegionKHR			rgenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, rgenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
4608 	const VkStridedDeviceAddressRegionKHR			missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
4609 	const VkStridedDeviceAddressRegionKHR			hitsShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitsShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
4610 	const VkStridedDeviceAddressRegionKHR			callShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
4611 	const Move<VkDescriptorPool>					descriptorPool						= makeRayTracingDescriptorPool(context, inputBuffers);
4612 	const Move<VkDescriptorSet>						descriptorSet						= makeRayTracingDescriptorSet(context, *descriptorPool, *descriptorSetLayout, extraDatas, extraDatasCount, stagesVector, inputBuffers);
4613 	const Move<VkDescriptorSet>						descriptorSetAS						= makeRayTracingDescriptorSetAS(context, *descriptorPool, *descriptorSetLayoutAS, topLevelAccelerationStructure);
4614 	const Move<VkCommandPool>						cmdPool								= makeCommandPool(vkd, device, queueFamilyIndex);
4615 	const Move<VkCommandBuffer>						cmdBuffer							= makeCommandBuffer(context, *cmdPool);
4616 	deUint32										passIterations						= 0u;
4617 	deUint32										failIterations						= 0u;
4618 
4619 	DE_ASSERT(shaderStageTested != 0);
4620 
4621 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
4622 	{
4623 
4624 		for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
4625 		{
4626 			// re-init the data
4627 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
4628 
4629 			initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
4630 		}
4631 
4632 		beginCommandBuffer(vkd, *cmdBuffer);
4633 		{
4634 			vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4635 
4636 			bottomLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
4637 			topLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
4638 
4639 			vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 1u, 1u, &descriptorSetAS.get(), 0u, DE_NULL);
4640 
4641 			if (stagesCount + extraDatasCount > 0)
4642 				vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
4643 
4644 			cmdTraceRays(vkd,
4645 				*cmdBuffer,
4646 				&rgenShaderBindingTableRegion,
4647 				&missShaderBindingTableRegion,
4648 				&hitsShaderBindingTableRegion,
4649 				&callShaderBindingTableRegion,
4650 				width, 1, 1);
4651 
4652 			const VkMemoryBarrier	postTraceMemoryBarrier	= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4653 			cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_HOST_BIT, &postTraceMemoryBarrier);
4654 		}
4655 		endCommandBuffer(vkd, *cmdBuffer);
4656 
4657 		submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
4658 
4659 		for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
4660 		{
4661 			std::vector<const void*> datas;
4662 
4663 			if (!inputBuffers[ndx]->isImage())
4664 			{
4665 				const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
4666 
4667 				invalidateAlloc(vkd, device, resultAlloc);
4668 
4669 				// we always have our result data first
4670 				datas.push_back(resultAlloc.getHostPtr());
4671 			}
4672 
4673 			for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
4674 			{
4675 				const deUint32 datasNdx = index - stagesCount;
4676 
4677 				if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
4678 				{
4679 					const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
4680 
4681 					invalidateAlloc(vkd, device, resultAlloc);
4682 
4683 					// we always have our result data first
4684 					datas.push_back(resultAlloc.getHostPtr());
4685 				}
4686 			}
4687 
4688 			if (!checkResult(internalData, datas, width, subgroupSize, false))
4689 				failIterations++;
4690 			else
4691 				passIterations++;
4692 		}
4693 
4694 		vkd.resetCommandBuffer(*cmdBuffer, 0);
4695 	}
4696 
4697 	if (failIterations > 0 || passIterations == 0)
4698 		return tcu::TestStatus::fail("Failed " + de::toString(failIterations) + " out of " + de::toString(failIterations + passIterations) + " iterations.");
4699 	else
4700 		return tcu::TestStatus::pass("OK");
4701 }
4702 } // namespace subgroups
4703 } // nsamespace vkt
4704