• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Google Inc.
7  * Copyright (c) 2017 Codeplay Software Ltd.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests Utils
24  */ /*--------------------------------------------------------------------*/
25 
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vkRayTracingUtil.hpp"
28 #include "deFloat16.h"
29 #include "deRandom.hpp"
30 #include "tcuCommandLine.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 
38 using namespace tcu;
39 using namespace std;
40 using namespace vk;
41 using namespace vkt;
42 
43 namespace
44 {
45 
46 enum class ComputeLike { COMPUTE = 0, MESH };
47 
getMaxWidth()48 deUint32 getMaxWidth ()
49 {
50 	return 1024u;
51 }
52 
getNextWidth(const deUint32 width)53 deUint32 getNextWidth (const deUint32 width)
54 {
55 	if (width < 128)
56 	{
57 		// This ensures we test every value up to 128 (the max subgroup size).
58 		return width + 1;
59 	}
60 	else
61 	{
62 		// And once we hit 128 we increment to only power of 2's to reduce testing time.
63 		return width * 2;
64 	}
65 }
66 
getFormatSizeInBytes(const VkFormat format)67 deUint32 getFormatSizeInBytes (const VkFormat format)
68 {
69 	switch (format)
70 	{
71 		default:
72 			DE_FATAL("Unhandled format!");
73 			return 0;
74 		case VK_FORMAT_R8_SINT:
75 		case VK_FORMAT_R8_UINT:
76 			return static_cast<deUint32>(sizeof(deInt8));
77 		case VK_FORMAT_R8G8_SINT:
78 		case VK_FORMAT_R8G8_UINT:
79 			return static_cast<deUint32>(sizeof(deInt8) * 2);
80 		case VK_FORMAT_R8G8B8_SINT:
81 		case VK_FORMAT_R8G8B8_UINT:
82 		case VK_FORMAT_R8G8B8A8_SINT:
83 		case VK_FORMAT_R8G8B8A8_UINT:
84 			return static_cast<deUint32>(sizeof(deInt8) * 4);
85 		case VK_FORMAT_R16_SINT:
86 		case VK_FORMAT_R16_UINT:
87 		case VK_FORMAT_R16_SFLOAT:
88 			return static_cast<deUint32>(sizeof(deInt16));
89 		case VK_FORMAT_R16G16_SINT:
90 		case VK_FORMAT_R16G16_UINT:
91 		case VK_FORMAT_R16G16_SFLOAT:
92 			return static_cast<deUint32>(sizeof(deInt16) * 2);
93 		case VK_FORMAT_R16G16B16_UINT:
94 		case VK_FORMAT_R16G16B16_SINT:
95 		case VK_FORMAT_R16G16B16_SFLOAT:
96 		case VK_FORMAT_R16G16B16A16_SINT:
97 		case VK_FORMAT_R16G16B16A16_UINT:
98 		case VK_FORMAT_R16G16B16A16_SFLOAT:
99 			return static_cast<deUint32>(sizeof(deInt16) * 4);
100 		case VK_FORMAT_R32_SINT:
101 		case VK_FORMAT_R32_UINT:
102 		case VK_FORMAT_R32_SFLOAT:
103 			return static_cast<deUint32>(sizeof(deInt32));
104 		case VK_FORMAT_R32G32_SINT:
105 		case VK_FORMAT_R32G32_UINT:
106 		case VK_FORMAT_R32G32_SFLOAT:
107 			return static_cast<deUint32>(sizeof(deInt32) * 2);
108 		case VK_FORMAT_R32G32B32_SINT:
109 		case VK_FORMAT_R32G32B32_UINT:
110 		case VK_FORMAT_R32G32B32_SFLOAT:
111 		case VK_FORMAT_R32G32B32A32_SINT:
112 		case VK_FORMAT_R32G32B32A32_UINT:
113 		case VK_FORMAT_R32G32B32A32_SFLOAT:
114 			return static_cast<deUint32>(sizeof(deInt32) * 4);
115 		case VK_FORMAT_R64_SINT:
116 		case VK_FORMAT_R64_UINT:
117 		case VK_FORMAT_R64_SFLOAT:
118 			return static_cast<deUint32>(sizeof(deInt64));
119 		case VK_FORMAT_R64G64_SINT:
120 		case VK_FORMAT_R64G64_UINT:
121 		case VK_FORMAT_R64G64_SFLOAT:
122 			return static_cast<deUint32>(sizeof(deInt64) * 2);
123 		case VK_FORMAT_R64G64B64_SINT:
124 		case VK_FORMAT_R64G64B64_UINT:
125 		case VK_FORMAT_R64G64B64_SFLOAT:
126 		case VK_FORMAT_R64G64B64A64_SINT:
127 		case VK_FORMAT_R64G64B64A64_UINT:
128 		case VK_FORMAT_R64G64B64A64_SFLOAT:
129 			return static_cast<deUint32>(sizeof(deInt64) * 4);
130 		// The below formats are used to represent bool and bvec* types. These
131 		// types are passed to the shader as int and ivec* types, before the
132 		// calculations are done as booleans. We need a distinct type here so
133 		// that the shader generators can switch on it and generate the correct
134 		// shader source for testing.
135 		case VK_FORMAT_R8_USCALED:
136 			return static_cast<deUint32>(sizeof(deInt32));
137 		case VK_FORMAT_R8G8_USCALED:
138 			return static_cast<deUint32>(sizeof(deInt32) * 2);
139 		case VK_FORMAT_R8G8B8_USCALED:
140 		case VK_FORMAT_R8G8B8A8_USCALED:
141 			return static_cast<deUint32>(sizeof(deInt32) * 4);
142 	}
143 }
144 
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)145 deUint32 getElementSizeInBytes (const VkFormat									format,
146 								const subgroups::SSBOData::InputDataLayoutType	layout)
147 {
148 	const deUint32 bytes = getFormatSizeInBytes(format);
149 
150 	if (layout == subgroups::SSBOData::LayoutStd140)
151 		return bytes < 16 ? 16 : bytes;
152 	else
153 		return bytes;
154 }
155 
makeRenderPass(Context & context,VkFormat format)156 Move<VkRenderPass> makeRenderPass (Context& context, VkFormat format)
157 {
158 	const VkAttachmentReference		colorReference			=
159 	{
160 		0,
161 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
162 	};
163 	const VkSubpassDescription		subpassDescription		=
164 	{
165 		0u,									//  VkSubpassDescriptionFlags		flags;
166 		VK_PIPELINE_BIND_POINT_GRAPHICS,	//  VkPipelineBindPoint				pipelineBindPoint;
167 		0,									//  deUint32						inputAttachmentCount;
168 		DE_NULL,							//  const VkAttachmentReference*	pInputAttachments;
169 		1,									//  deUint32						colorAttachmentCount;
170 		&colorReference,					//  const VkAttachmentReference*	pColorAttachments;
171 		DE_NULL,							//  const VkAttachmentReference*	pResolveAttachments;
172 		DE_NULL,							//  const VkAttachmentReference*	pDepthStencilAttachment;
173 		0,									//  deUint32						preserveAttachmentCount;
174 		DE_NULL								//  const deUint32*					pPreserveAttachments;
175 	};
176 	const VkSubpassDependency		subpassDependencies[2]	=
177 	{
178 		{
179 			VK_SUBPASS_EXTERNAL,															//  deUint32				srcSubpass;
180 			0u,																				//  deUint32				dstSubpass;
181 			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,											//  VkPipelineStageFlags	srcStageMask;
182 			VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,									//  VkPipelineStageFlags	dstStageMask;
183 			VK_ACCESS_MEMORY_READ_BIT,														//  VkAccessFlags			srcAccessMask;
184 			VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,		//  VkAccessFlags			dstAccessMask;
185 			VK_DEPENDENCY_BY_REGION_BIT														//  VkDependencyFlags		dependencyFlags;
186 		},
187 		{
188 			0u,																				//  deUint32				srcSubpass;
189 			VK_SUBPASS_EXTERNAL,															//  deUint32				dstSubpass;
190 			VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,									//  VkPipelineStageFlags	srcStageMask;
191 			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,											//  VkPipelineStageFlags	dstStageMask;
192 			VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,		//  VkAccessFlags			srcAccessMask;
193 			VK_ACCESS_MEMORY_READ_BIT,														//  VkAccessFlags			dstAccessMask;
194 			VK_DEPENDENCY_BY_REGION_BIT														//  VkDependencyFlags		dependencyFlags;
195 		},
196 	};
197 	const VkAttachmentDescription	attachmentDescription	=
198 	{
199 		0u,											//  VkAttachmentDescriptionFlags	flags;
200 		format,										//  VkFormat						format;
201 		VK_SAMPLE_COUNT_1_BIT,						//  VkSampleCountFlagBits			samples;
202 		VK_ATTACHMENT_LOAD_OP_CLEAR,				//  VkAttachmentLoadOp				loadOp;
203 		VK_ATTACHMENT_STORE_OP_STORE,				//  VkAttachmentStoreOp				storeOp;
204 		VK_ATTACHMENT_LOAD_OP_DONT_CARE,			//  VkAttachmentLoadOp				stencilLoadOp;
205 		VK_ATTACHMENT_STORE_OP_DONT_CARE,			//  VkAttachmentStoreOp				stencilStoreOp;
206 		VK_IMAGE_LAYOUT_UNDEFINED,					//  VkImageLayout					initialLayout;
207 		VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL		//  VkImageLayout					finalLayout;
208 	};
209 	const VkRenderPassCreateInfo	renderPassCreateInfo =
210 	{
211 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,	//  VkStructureType					sType;
212 		DE_NULL,									//  const void*						pNext;
213 		0u,											//  VkRenderPassCreateFlags			flags;
214 		1,											//  deUint32						attachmentCount;
215 		&attachmentDescription,						//  const VkAttachmentDescription*	pAttachments;
216 		1,											//  deUint32						subpassCount;
217 		&subpassDescription,						//  const VkSubpassDescription*		pSubpasses;
218 		2,											//  deUint32						dependencyCount;
219 		subpassDependencies							//  const VkSubpassDependency*		pDependencies;
220 	};
221 
222 	return createRenderPass(context.getDeviceInterface(), context.getDevice(), &renderPassCreateInfo);
223 }
224 
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const deUint32 subpass,const deUint32 patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])225 Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface&							vk,
226 									   const VkDevice									device,
227 									   const VkPipelineLayout							pipelineLayout,
228 									   const VkShaderModule								vertexShaderModule,
229 									   const VkShaderModule								tessellationControlShaderModule,
230 									   const VkShaderModule								tessellationEvalShaderModule,
231 									   const VkShaderModule								geometryShaderModule,
232 									   const VkShaderModule								fragmentShaderModule,
233 									   const VkRenderPass								renderPass,
234 									   const std::vector<VkViewport>&					viewports,
235 									   const std::vector<VkRect2D>&						scissors,
236 									   const VkPrimitiveTopology						topology,
237 									   const deUint32									subpass,
238 									   const deUint32									patchControlPoints,
239 									   const VkPipelineVertexInputStateCreateInfo*		vertexInputStateCreateInfo,
240 									   const VkPipelineRasterizationStateCreateInfo*	rasterizationStateCreateInfo,
241 									   const VkPipelineMultisampleStateCreateInfo*		multisampleStateCreateInfo,
242 									   const VkPipelineDepthStencilStateCreateInfo*		depthStencilStateCreateInfo,
243 									   const VkPipelineColorBlendStateCreateInfo*		colorBlendStateCreateInfo,
244 									   const VkPipelineDynamicStateCreateInfo*			dynamicStateCreateInfo,
245 									   const deUint32									vertexShaderStageCreateFlags,
246 									   const deUint32									tessellationControlShaderStageCreateFlags,
247 									   const deUint32									tessellationEvalShaderStageCreateFlags,
248 									   const deUint32									geometryShaderStageCreateFlags,
249 									   const deUint32									fragmentShaderStageCreateFlags,
250 									   const deUint32									requiredSubgroupSize[5])
251 {
252 	const VkBool32									disableRasterization				= (fragmentShaderModule == DE_NULL);
253 	const bool										hasTessellation						= (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
254 
255 	VkPipelineShaderStageCreateInfo					stageCreateInfo						=
256 	{
257 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType                     sType
258 		DE_NULL,												// const void*                         pNext
259 		0u,														// VkPipelineShaderStageCreateFlags    flags
260 		VK_SHADER_STAGE_VERTEX_BIT,								// VkShaderStageFlagBits               stage
261 		DE_NULL,												// VkShaderModule                      module
262 		"main",													// const char*                         pName
263 		DE_NULL													// const VkSpecializationInfo*         pSpecializationInfo
264 	};
265 
266 	std::vector<VkPipelineShaderStageCreateInfo>	pipelineShaderStageParams;
267 
268 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] =
269 	{
270 		{
271 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
272 			DE_NULL,
273 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
274 		},
275 		{
276 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
277 			DE_NULL,
278 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
279 		},
280 		{
281 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
282 			DE_NULL,
283 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
284 		},
285 		{
286 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
287 			DE_NULL,
288 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
289 		},
290 		{
291 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
292 			DE_NULL,
293 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
294 		},
295 	};
296 
297 	{
298 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[0] : DE_NULL;
299 		stageCreateInfo.flags	= vertexShaderStageCreateFlags;
300 		stageCreateInfo.stage	= VK_SHADER_STAGE_VERTEX_BIT;
301 		stageCreateInfo.module	= vertexShaderModule;
302 		pipelineShaderStageParams.push_back(stageCreateInfo);
303 	}
304 
305 	if (tessellationControlShaderModule != DE_NULL)
306 	{
307 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[1] : DE_NULL;
308 		stageCreateInfo.flags	= tessellationControlShaderStageCreateFlags;
309 		stageCreateInfo.stage	= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
310 		stageCreateInfo.module	= tessellationControlShaderModule;
311 		pipelineShaderStageParams.push_back(stageCreateInfo);
312 	}
313 
314 	if (tessellationEvalShaderModule != DE_NULL)
315 	{
316 		stageCreateInfo.pNext	= (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[2] : DE_NULL;
317 		stageCreateInfo.flags	= tessellationEvalShaderStageCreateFlags;
318 		stageCreateInfo.stage	= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
319 		stageCreateInfo.module	= tessellationEvalShaderModule;
320 		pipelineShaderStageParams.push_back(stageCreateInfo);
321 	}
322 
323 	if (geometryShaderModule != DE_NULL)
324 	{
325 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[3] : DE_NULL;
326 		stageCreateInfo.flags	= geometryShaderStageCreateFlags;
327 		stageCreateInfo.stage	= VK_SHADER_STAGE_GEOMETRY_BIT;
328 		stageCreateInfo.module	= geometryShaderModule;
329 		pipelineShaderStageParams.push_back(stageCreateInfo);
330 	}
331 
332 	if (fragmentShaderModule != DE_NULL)
333 	{
334 		stageCreateInfo.pNext	= (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[4] : DE_NULL;
335 		stageCreateInfo.flags	= fragmentShaderStageCreateFlags;
336 		stageCreateInfo.stage	= VK_SHADER_STAGE_FRAGMENT_BIT;
337 		stageCreateInfo.module	= fragmentShaderModule;
338 		pipelineShaderStageParams.push_back(stageCreateInfo);
339 	}
340 
341 	const VkVertexInputBindingDescription			vertexInputBindingDescription		=
342 	{
343 		0u,								// deUint32             binding
344 		sizeof(tcu::Vec4),				// deUint32             stride
345 		VK_VERTEX_INPUT_RATE_VERTEX,	// VkVertexInputRate    inputRate
346 	};
347 
348 	const VkVertexInputAttributeDescription			vertexInputAttributeDescription		=
349 	{
350 		0u,								// deUint32    location
351 		0u,								// deUint32    binding
352 		VK_FORMAT_R32G32B32A32_SFLOAT,	// VkFormat    format
353 		0u								// deUint32    offset
354 	};
355 
356 	const VkPipelineVertexInputStateCreateInfo		vertexInputStateCreateInfoDefault	=
357 	{
358 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType                             sType
359 		DE_NULL,													// const void*                                 pNext
360 		(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags       flags
361 		1u,															// deUint32                                    vertexBindingDescriptionCount
362 		&vertexInputBindingDescription,								// const VkVertexInputBindingDescription*      pVertexBindingDescriptions
363 		1u,															// deUint32                                    vertexAttributeDescriptionCount
364 		&vertexInputAttributeDescription							// const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions
365 	};
366 
367 	const VkPipelineInputAssemblyStateCreateInfo	inputAssemblyStateCreateInfo		=
368 	{
369 		VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType                            sType
370 		DE_NULL,														// const void*                                pNext
371 		0u,																// VkPipelineInputAssemblyStateCreateFlags    flags
372 		topology,														// VkPrimitiveTopology                        topology
373 		VK_FALSE														// VkBool32                                   primitiveRestartEnable
374 	};
375 
376 	const VkPipelineTessellationStateCreateInfo		tessStateCreateInfo					=
377 	{
378 		VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,	// VkStructureType                           sType
379 		DE_NULL,													// const void*                               pNext
380 		0u,															// VkPipelineTessellationStateCreateFlags    flags
381 		patchControlPoints											// deUint32                                  patchControlPoints
382 	};
383 
384 	const VkPipelineViewportStateCreateInfo			viewportStateCreateInfo				=
385 	{
386 		VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,	// VkStructureType                             sType
387 		DE_NULL,												// const void*                                 pNext
388 		(VkPipelineViewportStateCreateFlags)0,					// VkPipelineViewportStateCreateFlags          flags
389 		viewports.empty() ? 1u : (deUint32)viewports.size(),	// deUint32                                    viewportCount
390 		viewports.empty() ? DE_NULL : &viewports[0],			// const VkViewport*                           pViewports
391 		viewports.empty() ? 1u : (deUint32)scissors.size(),		// deUint32                                    scissorCount
392 		scissors.empty() ? DE_NULL : &scissors[0]				// const VkRect2D*                             pScissors
393 	};
394 
395 	const VkPipelineRasterizationStateCreateInfo	rasterizationStateCreateInfoDefault	=
396 	{
397 		VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,	// VkStructureType                            sType
398 		DE_NULL,													// const void*                                pNext
399 		0u,															// VkPipelineRasterizationStateCreateFlags    flags
400 		VK_FALSE,													// VkBool32                                   depthClampEnable
401 		disableRasterization,										// VkBool32                                   rasterizerDiscardEnable
402 		VK_POLYGON_MODE_FILL,										// VkPolygonMode                              polygonMode
403 		VK_CULL_MODE_NONE,											// VkCullModeFlags                            cullMode
404 		VK_FRONT_FACE_COUNTER_CLOCKWISE,							// VkFrontFace                                frontFace
405 		VK_FALSE,													// VkBool32                                   depthBiasEnable
406 		0.0f,														// float                                      depthBiasConstantFactor
407 		0.0f,														// float                                      depthBiasClamp
408 		0.0f,														// float                                      depthBiasSlopeFactor
409 		1.0f														// float                                      lineWidth
410 	};
411 
412 	const VkPipelineMultisampleStateCreateInfo		multisampleStateCreateInfoDefault	=
413 	{
414 		VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType                          sType
415 		DE_NULL,													// const void*                              pNext
416 		0u,															// VkPipelineMultisampleStateCreateFlags    flags
417 		VK_SAMPLE_COUNT_1_BIT,										// VkSampleCountFlagBits                    rasterizationSamples
418 		VK_FALSE,													// VkBool32                                 sampleShadingEnable
419 		1.0f,														// float                                    minSampleShading
420 		DE_NULL,													// const VkSampleMask*                      pSampleMask
421 		VK_FALSE,													// VkBool32                                 alphaToCoverageEnable
422 		VK_FALSE													// VkBool32                                 alphaToOneEnable
423 	};
424 
425 	const VkStencilOpState							stencilOpState						=
426 	{
427 		VK_STENCIL_OP_KEEP,		// VkStencilOp    failOp
428 		VK_STENCIL_OP_KEEP,		// VkStencilOp    passOp
429 		VK_STENCIL_OP_KEEP,		// VkStencilOp    depthFailOp
430 		VK_COMPARE_OP_NEVER,	// VkCompareOp    compareOp
431 		0,						// deUint32       compareMask
432 		0,						// deUint32       writeMask
433 		0						// deUint32       reference
434 	};
435 
436 	const VkPipelineDepthStencilStateCreateInfo		depthStencilStateCreateInfoDefault	=
437 	{
438 		VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,	// VkStructureType                          sType
439 		DE_NULL,													// const void*                              pNext
440 		0u,															// VkPipelineDepthStencilStateCreateFlags   flags
441 		VK_FALSE,													// VkBool32                                 depthTestEnable
442 		VK_FALSE,													// VkBool32                                 depthWriteEnable
443 		VK_COMPARE_OP_LESS_OR_EQUAL,								// VkCompareOp                              depthCompareOp
444 		VK_FALSE,													// VkBool32                                 depthBoundsTestEnable
445 		VK_FALSE,													// VkBool32                                 stencilTestEnable
446 		stencilOpState,												// VkStencilOpState                         front
447 		stencilOpState,												// VkStencilOpState                         back
448 		0.0f,														// float                                    minDepthBounds
449 		1.0f,														// float                                    maxDepthBounds
450 	};
451 
452 	const VkPipelineColorBlendAttachmentState		colorBlendAttachmentState			=
453 	{
454 		VK_FALSE,					// VkBool32                 blendEnable
455 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            srcColorBlendFactor
456 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            dstColorBlendFactor
457 		VK_BLEND_OP_ADD,			// VkBlendOp                colorBlendOp
458 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            srcAlphaBlendFactor
459 		VK_BLEND_FACTOR_ZERO,		// VkBlendFactor            dstAlphaBlendFactor
460 		VK_BLEND_OP_ADD,			// VkBlendOp                alphaBlendOp
461 		VK_COLOR_COMPONENT_R_BIT	// VkColorComponentFlags    colorWriteMask
462 		| VK_COLOR_COMPONENT_G_BIT
463 		| VK_COLOR_COMPONENT_B_BIT
464 		| VK_COLOR_COMPONENT_A_BIT
465 	};
466 
467 	const VkPipelineColorBlendStateCreateInfo		colorBlendStateCreateInfoDefault	=
468 	{
469 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	// VkStructureType                               sType
470 		DE_NULL,													// const void*                                   pNext
471 		0u,															// VkPipelineColorBlendStateCreateFlags          flags
472 		VK_FALSE,													// VkBool32                                      logicOpEnable
473 		VK_LOGIC_OP_CLEAR,											// VkLogicOp                                     logicOp
474 		1u,															// deUint32                                      attachmentCount
475 		&colorBlendAttachmentState,									// const VkPipelineColorBlendAttachmentState*    pAttachments
476 		{ 0.0f, 0.0f, 0.0f, 0.0f }									// float                                         blendConstants[4]
477 	};
478 
479 	std::vector<VkDynamicState>						dynamicStates;
480 
481 	if (viewports.empty())
482 		dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
483 	if (scissors.empty())
484 		dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
485 
486 	const VkPipelineDynamicStateCreateInfo			dynamicStateCreateInfoDefault		=
487 	{
488 		VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,	// VkStructureType                      sType
489 		DE_NULL,												// const void*                          pNext
490 		0u,														// VkPipelineDynamicStateCreateFlags    flags
491 		(deUint32)dynamicStates.size(),							// deUint32                             dynamicStateCount
492 		dynamicStates.empty() ? DE_NULL : &dynamicStates[0]		// const VkDynamicState*                pDynamicStates
493 	};
494 
495 	const VkPipelineDynamicStateCreateInfo*			dynamicStateCreateInfoDefaultPtr	= dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
496 
497 	const VkGraphicsPipelineCreateInfo				pipelineCreateInfo					=
498 	{
499 		VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,														// VkStructureType                                  sType
500 		DE_NULL,																								// const void*                                      pNext
501 		0u,																										// VkPipelineCreateFlags                            flags
502 		(deUint32)pipelineShaderStageParams.size(),																// deUint32                                         stageCount
503 		&pipelineShaderStageParams[0],																			// const VkPipelineShaderStageCreateInfo*           pStages
504 		vertexInputStateCreateInfo ? vertexInputStateCreateInfo : &vertexInputStateCreateInfoDefault,			// const VkPipelineVertexInputStateCreateInfo*      pVertexInputState
505 		&inputAssemblyStateCreateInfo,																			// const VkPipelineInputAssemblyStateCreateInfo*    pInputAssemblyState
506 		hasTessellation ? &tessStateCreateInfo : DE_NULL,														// const VkPipelineTessellationStateCreateInfo*     pTessellationState
507 		&viewportStateCreateInfo,																				// const VkPipelineViewportStateCreateInfo*         pViewportState
508 		rasterizationStateCreateInfo ? rasterizationStateCreateInfo : &rasterizationStateCreateInfoDefault,		// const VkPipelineRasterizationStateCreateInfo*    pRasterizationState
509 		multisampleStateCreateInfo ? multisampleStateCreateInfo: &multisampleStateCreateInfoDefault,			// const VkPipelineMultisampleStateCreateInfo*      pMultisampleState
510 		depthStencilStateCreateInfo ? depthStencilStateCreateInfo : &depthStencilStateCreateInfoDefault,		// const VkPipelineDepthStencilStateCreateInfo*     pDepthStencilState
511 		colorBlendStateCreateInfo ? colorBlendStateCreateInfo : &colorBlendStateCreateInfoDefault,				// const VkPipelineColorBlendStateCreateInfo*       pColorBlendState
512 		dynamicStateCreateInfo ? dynamicStateCreateInfo : dynamicStateCreateInfoDefaultPtr,						// const VkPipelineDynamicStateCreateInfo*          pDynamicState
513 		pipelineLayout,																							// VkPipelineLayout                                 layout
514 		renderPass,																								// VkRenderPass                                     renderPass
515 		subpass,																								// deUint32                                         subpass
516 		DE_NULL,																								// VkPipeline                                       basePipelineHandle
517 		0																										// deInt32                                          basePipelineIndex;
518 	};
519 
520 	return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
521 }
522 
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const deUint32 vertexShaderStageCreateFlags=0u,const deUint32 tessellationControlShaderStageCreateFlags=0u,const deUint32 tessellationEvalShaderStageCreateFlags=0u,const deUint32 geometryShaderStageCreateFlags=0u,const deUint32 fragmentShaderStageCreateFlags=0u,const deUint32 requiredSubgroupSize[5]=DE_NULL)523 Move<VkPipeline> makeGraphicsPipeline (Context&									context,
524 									   const VkPipelineLayout					pipelineLayout,
525 									   const VkShaderStageFlags					stages,
526 									   const VkShaderModule						vertexShaderModule,
527 									   const VkShaderModule						fragmentShaderModule,
528 									   const VkShaderModule						geometryShaderModule,
529 									   const VkShaderModule						tessellationControlModule,
530 									   const VkShaderModule						tessellationEvaluationModule,
531 									   const VkRenderPass						renderPass,
532 									   const VkPrimitiveTopology				topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
533 									   const VkVertexInputBindingDescription*	vertexInputBindingDescription = DE_NULL,
534 									   const VkVertexInputAttributeDescription*	vertexInputAttributeDescriptions = DE_NULL,
535 									   const bool								frameBufferTests = false,
536 									   const vk::VkFormat						attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
537 									   const deUint32							vertexShaderStageCreateFlags = 0u,
538 									   const deUint32							tessellationControlShaderStageCreateFlags = 0u,
539 									   const deUint32							tessellationEvalShaderStageCreateFlags = 0u,
540 									   const deUint32							geometryShaderStageCreateFlags = 0u,
541 									   const deUint32							fragmentShaderStageCreateFlags = 0u,
542 									   const deUint32							requiredSubgroupSize[5] = DE_NULL)
543 {
544 	const std::vector<VkViewport>				noViewports;
545 	const std::vector<VkRect2D>					noScissors;
546 	const VkPipelineVertexInputStateCreateInfo	vertexInputStateCreateInfo	=
547 	{
548 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType								sType;
549 		DE_NULL,													// const void*									pNext;
550 		0u,															// VkPipelineVertexInputStateCreateFlags		flags;
551 		vertexInputBindingDescription == DE_NULL ? 0u : 1u,			// deUint32										vertexBindingDescriptionCount;
552 		vertexInputBindingDescription,								// const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
553 		vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u,		// deUint32										vertexAttributeDescriptionCount;
554 		vertexInputAttributeDescriptions,							// const VkVertexInputAttributeDescription*		pVertexAttributeDescriptions;
555 	};
556 	const deUint32								numChannels					= getNumUsedChannels(mapVkFormat(attachmentFormat).order);
557 	const VkColorComponentFlags					colorComponent				= numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
558 																			  numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
559 																			  numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
560 																			  VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
561 	const VkPipelineColorBlendAttachmentState	colorBlendAttachmentState	=
562 	{
563 		VK_FALSE,				//  VkBool32				blendEnable;
564 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			srcColorBlendFactor;
565 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			dstColorBlendFactor;
566 		VK_BLEND_OP_ADD,		//  VkBlendOp				colorBlendOp;
567 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			srcAlphaBlendFactor;
568 		VK_BLEND_FACTOR_ZERO,	//  VkBlendFactor			dstAlphaBlendFactor;
569 		VK_BLEND_OP_ADD,		//  VkBlendOp				alphaBlendOp;
570 		colorComponent			//  VkColorComponentFlags	colorWriteMask;
571 	};
572 	const VkPipelineColorBlendStateCreateInfo	colorBlendStateCreateInfo	=
573 	{
574 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	//  VkStructureType								sType;
575 		DE_NULL,													//  const void*									pNext;
576 		0u,															//  VkPipelineColorBlendStateCreateFlags		flags;
577 		VK_FALSE,													//  VkBool32									logicOpEnable;
578 		VK_LOGIC_OP_CLEAR,											//  VkLogicOp									logicOp;
579 		1,															//  deUint32									attachmentCount;
580 		&colorBlendAttachmentState,									//  const VkPipelineColorBlendAttachmentState*	pAttachments;
581 		{ 0.0f, 0.0f, 0.0f, 0.0f }									//  float										blendConstants[4];
582 	};
583 	const deUint32								patchControlPoints			= (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
584 
585 	return makeGraphicsPipeline(context.getDeviceInterface(),	// const DeviceInterface&                        vk
586 								context.getDevice(),			// const VkDevice                                device
587 								pipelineLayout,					// const VkPipelineLayout                        pipelineLayout
588 								vertexShaderModule,				// const VkShaderModule                          vertexShaderModule
589 								tessellationControlModule,		// const VkShaderModule                          tessellationControlShaderModule
590 								tessellationEvaluationModule,	// const VkShaderModule                          tessellationEvalShaderModule
591 								geometryShaderModule,			// const VkShaderModule                          geometryShaderModule
592 								fragmentShaderModule,			// const VkShaderModule                          fragmentShaderModule
593 								renderPass,						// const VkRenderPass                            renderPass
594 								noViewports,					// const std::vector<VkViewport>&                viewports
595 								noScissors,						// const std::vector<VkRect2D>&                  scissors
596 								topology,						// const VkPrimitiveTopology                     topology
597 								0u,								// const deUint32                                subpass
598 								patchControlPoints,				// const deUint32                                patchControlPoints
599 								&vertexInputStateCreateInfo,	// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
600 								DE_NULL,						// const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
601 								DE_NULL,						// const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
602 								DE_NULL,						// const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
603 								&colorBlendStateCreateInfo,		// const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
604 								DE_NULL,						// const VkPipelineDynamicStateCreateInfo*
605 								vertexShaderStageCreateFlags,	// const deUint32								 vertexShaderStageCreateFlags,
606 								tessellationControlShaderStageCreateFlags,	// const deUint32					 tessellationControlShaderStageCreateFlags
607 								tessellationEvalShaderStageCreateFlags,		// const deUint32					 tessellationEvalShaderStageCreateFlags
608 								geometryShaderStageCreateFlags,	// const deUint32								 geometryShaderStageCreateFlags
609 								fragmentShaderStageCreateFlags,	// const deUint32								 fragmentShaderStageCreateFlags
610 								requiredSubgroupSize);			// const deUint32								 requiredSubgroupSize[5]
611 }
612 
makeCommandBuffer(Context & context,const VkCommandPool commandPool)613 Move<VkCommandBuffer> makeCommandBuffer (Context& context, const VkCommandPool commandPool)
614 {
615 	const VkCommandBufferAllocateInfo bufferAllocateParams =
616 	{
617 		VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,	// VkStructureType		sType;
618 		DE_NULL,										// const void*			pNext;
619 		commandPool,									// VkCommandPool		commandPool;
620 		VK_COMMAND_BUFFER_LEVEL_PRIMARY,				// VkCommandBufferLevel	level;
621 		1u,												// deUint32				bufferCount;
622 	};
623 	return allocateCommandBuffer(context.getDeviceInterface(),
624 								 context.getDevice(), &bufferAllocateParams);
625 }
626 
627 struct Buffer;
628 struct Image;
629 
630 struct BufferOrImage
631 {
isImage__anon89c025990111::BufferOrImage632 	bool isImage() const
633 	{
634 		return m_isImage;
635 	}
636 
getAsBuffer__anon89c025990111::BufferOrImage637 	Buffer* getAsBuffer()
638 	{
639 		if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
640 		return reinterpret_cast<Buffer* >(this);
641 	}
642 
getAsImage__anon89c025990111::BufferOrImage643 	Image* getAsImage()
644 	{
645 		if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
646 		return reinterpret_cast<Image*>(this);
647 	}
648 
getType__anon89c025990111::BufferOrImage649 	virtual VkDescriptorType getType() const
650 	{
651 		if (m_isImage)
652 		{
653 			return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
654 		}
655 		else
656 		{
657 			return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
658 		}
659 	}
660 
getAllocation__anon89c025990111::BufferOrImage661 	Allocation& getAllocation() const
662 	{
663 		return *m_allocation;
664 	}
665 
~BufferOrImage__anon89c025990111::BufferOrImage666 	virtual ~BufferOrImage() {}
667 
668 protected:
BufferOrImage__anon89c025990111::BufferOrImage669 	explicit BufferOrImage(bool image) : m_isImage(image) {}
670 
671 	bool m_isImage;
672 	de::details::MovePtr<Allocation> m_allocation;
673 };
674 
675 struct Buffer : public BufferOrImage
676 {
Buffer__anon89c025990111::Buffer677 	explicit Buffer (Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage)
678 		: BufferOrImage		(false)
679 		, m_sizeInBytes		(sizeInBytes)
680 		, m_usage			(usage)
681 	{
682 		const DeviceInterface&			vkd					= context.getDeviceInterface();
683 		const VkDevice					device				= context.getDevice();
684 
685 		const vk::VkBufferCreateInfo	bufferCreateInfo	=
686 		{
687 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
688 			DE_NULL,
689 			0u,
690 			m_sizeInBytes,
691 			m_usage,
692 			VK_SHARING_MODE_EXCLUSIVE,
693 			0u,
694 			DE_NULL,
695 		};
696 		m_buffer		= createBuffer(vkd, device, &bufferCreateInfo);
697 
698 		VkMemoryRequirements			req					= getBufferMemoryRequirements(vkd, device, *m_buffer);
699 
700 		m_allocation	= context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
701 		VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
702 	}
703 
getType__anon89c025990111::Buffer704 	virtual VkDescriptorType getType() const
705 	{
706 		if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
707 		{
708 			return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
709 		}
710 		return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
711 	}
712 
getBuffer__anon89c025990111::Buffer713 	VkBuffer getBuffer () const
714 	{
715 		return *m_buffer;
716 	}
717 
getBufferPtr__anon89c025990111::Buffer718 	const VkBuffer* getBufferPtr () const
719 	{
720 		return &(*m_buffer);
721 	}
722 
getSize__anon89c025990111::Buffer723 	VkDeviceSize getSize () const
724 	{
725 		return m_sizeInBytes;
726 	}
727 
728 private:
729 	Move<VkBuffer>				m_buffer;
730 	VkDeviceSize				m_sizeInBytes;
731 	const VkBufferUsageFlags	m_usage;
732 };
733 
734 struct Image : public BufferOrImage
735 {
Image__anon89c025990111::Image736 	explicit Image (Context& context, deUint32 width, deUint32 height, VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
737 		: BufferOrImage(true)
738 	{
739 		const DeviceInterface&			vk					= context.getDeviceInterface();
740 		const VkDevice					device				= context.getDevice();
741 		const deUint32					queueFamilyIndex	= context.getUniversalQueueFamilyIndex();
742 
743 		const VkImageCreateInfo			imageCreateInfo		=
744 		{
745 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//  VkStructureType			sType;
746 			DE_NULL,								//  const void*				pNext;
747 			0,										//  VkImageCreateFlags		flags;
748 			VK_IMAGE_TYPE_2D,						//  VkImageType				imageType;
749 			format,									//  VkFormat				format;
750 			{width, height, 1},						//  VkExtent3D				extent;
751 			1,										//  deUint32				mipLevels;
752 			1,										//  deUint32				arrayLayers;
753 			VK_SAMPLE_COUNT_1_BIT,					//  VkSampleCountFlagBits	samples;
754 			VK_IMAGE_TILING_OPTIMAL,				//  VkImageTiling			tiling;
755 			usage,									//  VkImageUsageFlags		usage;
756 			VK_SHARING_MODE_EXCLUSIVE,				//  VkSharingMode			sharingMode;
757 			0u,										//  deUint32				queueFamilyIndexCount;
758 			DE_NULL,								//  const deUint32*			pQueueFamilyIndices;
759 			VK_IMAGE_LAYOUT_UNDEFINED				//  VkImageLayout			initialLayout;
760 		};
761 
762 		const VkComponentMapping		componentMapping	=
763 		{
764 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
765 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
766 		};
767 
768 		const VkImageSubresourceRange	subresourceRange	=
769 		{
770 			VK_IMAGE_ASPECT_COLOR_BIT,	//VkImageAspectFlags	aspectMask
771 			0u,							//deUint32				baseMipLevel
772 			1u,							//deUint32				levelCount
773 			0u,							//deUint32				baseArrayLayer
774 			1u							//deUint32				layerCount
775 		};
776 
777 		const VkSamplerCreateInfo		samplerCreateInfo	=
778 		{
779 			VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,		//  VkStructureType			sType;
780 			DE_NULL,									//  const void*				pNext;
781 			0u,											//  VkSamplerCreateFlags	flags;
782 			VK_FILTER_NEAREST,							//  VkFilter				magFilter;
783 			VK_FILTER_NEAREST,							//  VkFilter				minFilter;
784 			VK_SAMPLER_MIPMAP_MODE_NEAREST,				//  VkSamplerMipmapMode		mipmapMode;
785 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		//  VkSamplerAddressMode	addressModeU;
786 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		//  VkSamplerAddressMode	addressModeV;
787 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		//  VkSamplerAddressMode	addressModeW;
788 			0.0f,										//  float					mipLodBias;
789 			VK_FALSE,									//  VkBool32				anisotropyEnable;
790 			1.0f,										//  float					maxAnisotropy;
791 			DE_FALSE,									//  VkBool32				compareEnable;
792 			VK_COMPARE_OP_ALWAYS,						//  VkCompareOp				compareOp;
793 			0.0f,										//  float					minLod;
794 			0.0f,										//  float					maxLod;
795 			VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,	//  VkBorderColor			borderColor;
796 			VK_FALSE,									//  VkBool32				unnormalizedCoordinates;
797 		};
798 
799 		m_image			= createImage(vk, device, &imageCreateInfo);
800 
801 		VkMemoryRequirements			req					= getImageMemoryRequirements(vk, device, *m_image);
802 
803 		req.size		*= 2;
804 		m_allocation	= context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
805 
806 		VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
807 
808 		const VkImageViewCreateInfo		imageViewCreateInfo	=
809 		{
810 			VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,	//  VkStructureType			sType;
811 			DE_NULL,									//  const void*				pNext;
812 			0,											//  VkImageViewCreateFlags	flags;
813 			*m_image,									//  VkImage					image;
814 			VK_IMAGE_VIEW_TYPE_2D,						//  VkImageViewType			viewType;
815 			imageCreateInfo.format,						//  VkFormat				format;
816 			componentMapping,							//  VkComponentMapping		components;
817 			subresourceRange							//  VkImageSubresourceRange	subresourceRange;
818 		};
819 
820 		m_imageView		= createImageView(vk, device, &imageViewCreateInfo);
821 		m_sampler		= createSampler(vk, device, &samplerCreateInfo);
822 
823 		// Transition input image layouts
824 		{
825 			const Unique<VkCommandPool>		cmdPool			(makeCommandPool(vk, device, queueFamilyIndex));
826 			const Unique<VkCommandBuffer>	cmdBuffer		(makeCommandBuffer(context, *cmdPool));
827 
828 			beginCommandBuffer(vk, *cmdBuffer);
829 
830 			const VkImageMemoryBarrier		imageBarrier	= makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
831 																	VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
832 
833 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
834 				(VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
835 
836 			endCommandBuffer(vk, *cmdBuffer);
837 			submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
838 		}
839 	}
840 
getImage__anon89c025990111::Image841 	VkImage getImage () const
842 	{
843 		return *m_image;
844 	}
845 
getImageView__anon89c025990111::Image846 	VkImageView getImageView () const
847 	{
848 		return *m_imageView;
849 	}
850 
getSampler__anon89c025990111::Image851 	VkSampler getSampler () const
852 	{
853 		return *m_sampler;
854 	}
855 
856 private:
857 	Move<VkImage>		m_image;
858 	Move<VkImageView>	m_imageView;
859 	Move<VkSampler>		m_sampler;
860 };
861 }
862 
getStagesCount(const VkShaderStageFlags shaderStages)863 deUint32 vkt::subgroups::getStagesCount (const VkShaderStageFlags shaderStages)
864 {
865 	const deUint32	stageCount	= isAllGraphicsStages(shaderStages)    ? 4
866 								: isAllComputeStages(shaderStages)     ? 1
867 #ifndef CTS_USES_VULKANSC
868 								: isAllRayTracingStages(shaderStages)  ? 6
869 								: isAllMeshShadingStages(shaderStages) ? 1
870 #endif // CTS_USES_VULKANSC
871 								: 0;
872 
873 	DE_ASSERT(stageCount != 0);
874 
875 	return stageCount;
876 }
877 
getSharedMemoryBallotHelper()878 std::string vkt::subgroups::getSharedMemoryBallotHelper ()
879 {
880 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
881 			"uvec4 sharedMemoryBallot(bool vote)\n"
882 			"{\n"
883 			"  uint groupOffset = gl_SubgroupID;\n"
884 			"  // One invocation in the group 0's the whole group's data\n"
885 			"  if (subgroupElect())\n"
886 			"  {\n"
887 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
888 			"  }\n"
889 			"  subgroupMemoryBarrierShared();\n"
890 			"  if (vote)\n"
891 			"  {\n"
892 			"    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
893 			"    const highp uint bitToSet = 1u << invocationId;\n"
894 			"    switch (gl_SubgroupInvocationID / 32)\n"
895 			"    {\n"
896 			"    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
897 			"    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
898 			"    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
899 			"    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
900 			"    }\n"
901 			"  }\n"
902 			"  subgroupMemoryBarrierShared();\n"
903 			"  return superSecretComputeShaderHelper[groupOffset];\n"
904 			"}\n";
905 }
906 
getSharedMemoryBallotHelperARB()907 std::string vkt::subgroups::getSharedMemoryBallotHelperARB ()
908 {
909 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
910 			"uint64_t sharedMemoryBallot(bool vote)\n"
911 			"{\n"
912 			"  uint groupOffset = gl_SubgroupID;\n"
913 			"  // One invocation in the group 0's the whole group's data\n"
914 			"  if (subgroupElect())\n"
915 			"  {\n"
916 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
917 			"  }\n"
918 			"  subgroupMemoryBarrierShared();\n"
919 			"  if (vote)\n"
920 			"  {\n"
921 			"    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
922 			"    const highp uint bitToSet = 1u << invocationId;\n"
923 			"    switch (gl_SubgroupInvocationID / 32)\n"
924 			"    {\n"
925 			"    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
926 			"    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
927 			"    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
928 			"    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
929 			"    }\n"
930 			"  }\n"
931 			"  subgroupMemoryBarrierShared();\n"
932 			"  return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
933 			"}\n";
934 }
935 
getSubgroupSize(Context & context)936 deUint32 vkt::subgroups::getSubgroupSize (Context& context)
937 {
938 	return context.getSubgroupProperties().subgroupSize;
939 }
940 
maxSupportedSubgroupSize()941 deUint32 vkt::subgroups::maxSupportedSubgroupSize ()
942 {
943 	return 128u;
944 }
945 
getShaderStageName(VkShaderStageFlags stage)946 std::string vkt::subgroups::getShaderStageName (VkShaderStageFlags stage)
947 {
948 	switch (stage)
949 	{
950 		case VK_SHADER_STAGE_COMPUTE_BIT:					return "compute";
951 		case VK_SHADER_STAGE_FRAGMENT_BIT:					return "fragment";
952 		case VK_SHADER_STAGE_VERTEX_BIT:					return "vertex";
953 		case VK_SHADER_STAGE_GEOMETRY_BIT:					return "geometry";
954 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:		return "tess_control";
955 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:	return "tess_eval";
956 #ifndef CTS_USES_VULKANSC
957 		case VK_SHADER_STAGE_RAYGEN_BIT_KHR:				return "rgen";
958 		case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:				return "ahit";
959 		case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:			return "chit";
960 		case VK_SHADER_STAGE_MISS_BIT_KHR:					return "miss";
961 		case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:			return "sect";
962 		case VK_SHADER_STAGE_CALLABLE_BIT_KHR:				return "call";
963 		case VK_SHADER_STAGE_MESH_BIT_EXT:					return "mesh";
964 		case VK_SHADER_STAGE_TASK_BIT_EXT:					return "task";
965 #endif // CTS_USES_VULKANSC
966 		default:											TCU_THROW(InternalError, "Unhandled stage");
967 	}
968 }
969 
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)970 std::string vkt::subgroups::getSubgroupFeatureName (vk::VkSubgroupFeatureFlagBits bit)
971 {
972 	switch (bit)
973 	{
974 		case VK_SUBGROUP_FEATURE_BASIC_BIT:				return "VK_SUBGROUP_FEATURE_BASIC_BIT";
975 		case VK_SUBGROUP_FEATURE_VOTE_BIT:				return "VK_SUBGROUP_FEATURE_VOTE_BIT";
976 		case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:		return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
977 		case VK_SUBGROUP_FEATURE_BALLOT_BIT:			return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
978 		case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:			return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
979 		case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:	return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
980 		case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:			return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
981 		case VK_SUBGROUP_FEATURE_QUAD_BIT:				return "VK_SUBGROUP_FEATURE_QUAD_BIT";
982 		default:										TCU_THROW(InternalError, "Unknown subgroup feature category");
983 	}
984 }
985 
addNoSubgroupShader(SourceCollections & programCollection)986 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
987 {
988 	{
989 	/*
990 		"#version 450\n"
991 		"void main (void)\n"
992 		"{\n"
993 		"  float pixelSize = 2.0f/1024.0f;\n"
994 		"   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
995 		"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
996 		"  gl_PointSize = 1.0f;\n"
997 		"}\n"
998 	*/
999 		const std::string vertNoSubgroup =
1000 			"; SPIR-V\n"
1001 			"; Version: 1.3\n"
1002 			"; Generator: Khronos Glslang Reference Front End; 1\n"
1003 			"; Bound: 37\n"
1004 			"; Schema: 0\n"
1005 			"OpCapability Shader\n"
1006 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1007 			"OpMemoryModel Logical GLSL450\n"
1008 			"OpEntryPoint Vertex %4 \"main\" %22 %26\n"
1009 			"OpMemberDecorate %20 0 BuiltIn Position\n"
1010 			"OpMemberDecorate %20 1 BuiltIn PointSize\n"
1011 			"OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
1012 			"OpMemberDecorate %20 3 BuiltIn CullDistance\n"
1013 			"OpDecorate %20 Block\n"
1014 			"OpDecorate %26 BuiltIn VertexIndex\n"
1015 			"%2 = OpTypeVoid\n"
1016 			"%3 = OpTypeFunction %2\n"
1017 			"%6 = OpTypeFloat 32\n"
1018 			"%7 = OpTypePointer Function %6\n"
1019 			"%9 = OpConstant %6 0.00195313\n"
1020 			"%12 = OpConstant %6 2\n"
1021 			"%14 = OpConstant %6 1\n"
1022 			"%16 = OpTypeVector %6 4\n"
1023 			"%17 = OpTypeInt 32 0\n"
1024 			"%18 = OpConstant %17 1\n"
1025 			"%19 = OpTypeArray %6 %18\n"
1026 			"%20 = OpTypeStruct %16 %6 %19 %19\n"
1027 			"%21 = OpTypePointer Output %20\n"
1028 			"%22 = OpVariable %21 Output\n"
1029 			"%23 = OpTypeInt 32 1\n"
1030 			"%24 = OpConstant %23 0\n"
1031 			"%25 = OpTypePointer Input %23\n"
1032 			"%26 = OpVariable %25 Input\n"
1033 			"%33 = OpConstant %6 0\n"
1034 			"%35 = OpTypePointer Output %16\n"
1035 			"%37 = OpConstant %23 1\n"
1036 			"%38 = OpTypePointer Output %6\n"
1037 			"%4 = OpFunction %2 None %3\n"
1038 			"%5 = OpLabel\n"
1039 			"%8 = OpVariable %7 Function\n"
1040 			"%10 = OpVariable %7 Function\n"
1041 			"OpStore %8 %9\n"
1042 			"%11 = OpLoad %6 %8\n"
1043 			"%13 = OpFDiv %6 %11 %12\n"
1044 			"%15 = OpFSub %6 %13 %14\n"
1045 			"OpStore %10 %15\n"
1046 			"%27 = OpLoad %23 %26\n"
1047 			"%28 = OpConvertSToF %6 %27\n"
1048 			"%29 = OpLoad %6 %8\n"
1049 			"%30 = OpFMul %6 %28 %29\n"
1050 			"%31 = OpLoad %6 %10\n"
1051 			"%32 = OpFAdd %6 %30 %31\n"
1052 			"%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1053 			"%36 = OpAccessChain %35 %22 %24\n"
1054 			"OpStore %36 %34\n"
1055 			"%39 = OpAccessChain %38 %22 %37\n"
1056 			"OpStore %39 %14\n"
1057 			"OpReturn\n"
1058 			"OpFunctionEnd\n";
1059 		programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1060 	}
1061 
1062 	{
1063 	/*
1064 		"#version 450\n"
1065 		"layout(vertices=1) out;\n"
1066 		"\n"
1067 		"void main (void)\n"
1068 		"{\n"
1069 		"  if (gl_InvocationID == 0)\n"
1070 		"  {\n"
1071 		"    gl_TessLevelOuter[0] = 1.0f;\n"
1072 		"    gl_TessLevelOuter[1] = 1.0f;\n"
1073 		"  }\n"
1074 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1075 		"}\n"
1076 	*/
1077 		const std::string tescNoSubgroup =
1078 			"; SPIR-V\n"
1079 			"; Version: 1.3\n"
1080 			"; Generator: Khronos Glslang Reference Front End; 1\n"
1081 			"; Bound: 45\n"
1082 			"; Schema: 0\n"
1083 			"OpCapability Tessellation\n"
1084 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1085 			"OpMemoryModel Logical GLSL450\n"
1086 			"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1087 			"OpExecutionMode %4 OutputVertices 1\n"
1088 			"OpDecorate %8 BuiltIn InvocationId\n"
1089 			"OpDecorate %20 Patch\n"
1090 			"OpDecorate %20 BuiltIn TessLevelOuter\n"
1091 			"OpMemberDecorate %29 0 BuiltIn Position\n"
1092 			"OpMemberDecorate %29 1 BuiltIn PointSize\n"
1093 			"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1094 			"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1095 			"OpDecorate %29 Block\n"
1096 			"OpMemberDecorate %34 0 BuiltIn Position\n"
1097 			"OpMemberDecorate %34 1 BuiltIn PointSize\n"
1098 			"OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1099 			"OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1100 			"OpDecorate %34 Block\n"
1101 			"%2 = OpTypeVoid\n"
1102 			"%3 = OpTypeFunction %2\n"
1103 			"%6 = OpTypeInt 32 1\n"
1104 			"%7 = OpTypePointer Input %6\n"
1105 			"%8 = OpVariable %7 Input\n"
1106 			"%10 = OpConstant %6 0\n"
1107 			"%11 = OpTypeBool\n"
1108 			"%15 = OpTypeFloat 32\n"
1109 			"%16 = OpTypeInt 32 0\n"
1110 			"%17 = OpConstant %16 4\n"
1111 			"%18 = OpTypeArray %15 %17\n"
1112 			"%19 = OpTypePointer Output %18\n"
1113 			"%20 = OpVariable %19 Output\n"
1114 			"%21 = OpConstant %15 1\n"
1115 			"%22 = OpTypePointer Output %15\n"
1116 			"%24 = OpConstant %6 1\n"
1117 			"%26 = OpTypeVector %15 4\n"
1118 			"%27 = OpConstant %16 1\n"
1119 			"%28 = OpTypeArray %15 %27\n"
1120 			"%29 = OpTypeStruct %26 %15 %28 %28\n"
1121 			"%30 = OpTypeArray %29 %27\n"
1122 			"%31 = OpTypePointer Output %30\n"
1123 			"%32 = OpVariable %31 Output\n"
1124 			"%34 = OpTypeStruct %26 %15 %28 %28\n"
1125 			"%35 = OpConstant %16 32\n"
1126 			"%36 = OpTypeArray %34 %35\n"
1127 			"%37 = OpTypePointer Input %36\n"
1128 			"%38 = OpVariable %37 Input\n"
1129 			"%40 = OpTypePointer Input %26\n"
1130 			"%43 = OpTypePointer Output %26\n"
1131 			"%4 = OpFunction %2 None %3\n"
1132 			"%5 = OpLabel\n"
1133 			"%9 = OpLoad %6 %8\n"
1134 			"%12 = OpIEqual %11 %9 %10\n"
1135 			"OpSelectionMerge %14 None\n"
1136 			"OpBranchConditional %12 %13 %14\n"
1137 			"%13 = OpLabel\n"
1138 			"%23 = OpAccessChain %22 %20 %10\n"
1139 			"OpStore %23 %21\n"
1140 			"%25 = OpAccessChain %22 %20 %24\n"
1141 			"OpStore %25 %21\n"
1142 			"OpBranch %14\n"
1143 			"%14 = OpLabel\n"
1144 			"%33 = OpLoad %6 %8\n"
1145 			"%39 = OpLoad %6 %8\n"
1146 			"%41 = OpAccessChain %40 %38 %39 %10\n"
1147 			"%42 = OpLoad %26 %41\n"
1148 			"%44 = OpAccessChain %43 %32 %33 %10\n"
1149 			"OpStore %44 %42\n"
1150 			"OpReturn\n"
1151 			"OpFunctionEnd\n";
1152 		programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1153 	}
1154 
1155 	{
1156 	/*
1157 		"#version 450\n"
1158 		"layout(isolines) in;\n"
1159 		"\n"
1160 		"void main (void)\n"
1161 		"{\n"
1162 		"  float pixelSize = 2.0f/1024.0f;\n"
1163 		"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1164 		"}\n";
1165 	*/
1166 		const std::string teseNoSubgroup =
1167 			"; SPIR-V\n"
1168 			"; Version: 1.3\n"
1169 			"; Generator: Khronos Glslang Reference Front End; 2\n"
1170 			"; Bound: 42\n"
1171 			"; Schema: 0\n"
1172 			"OpCapability Tessellation\n"
1173 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1174 			"OpMemoryModel Logical GLSL450\n"
1175 			"OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1176 			"OpExecutionMode %4 Isolines\n"
1177 			"OpExecutionMode %4 SpacingEqual\n"
1178 			"OpExecutionMode %4 VertexOrderCcw\n"
1179 			"OpMemberDecorate %14 0 BuiltIn Position\n"
1180 			"OpMemberDecorate %14 1 BuiltIn PointSize\n"
1181 			"OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1182 			"OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1183 			"OpDecorate %14 Block\n"
1184 			"OpMemberDecorate %19 0 BuiltIn Position\n"
1185 			"OpMemberDecorate %19 1 BuiltIn PointSize\n"
1186 			"OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1187 			"OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1188 			"OpDecorate %19 Block\n"
1189 			"OpDecorate %29 BuiltIn TessCoord\n"
1190 			"%2 = OpTypeVoid\n"
1191 			"%3 = OpTypeFunction %2\n"
1192 			"%6 = OpTypeFloat 32\n"
1193 			"%7 = OpTypePointer Function %6\n"
1194 			"%9 = OpConstant %6 0.00195313\n"
1195 			"%10 = OpTypeVector %6 4\n"
1196 			"%11 = OpTypeInt 32 0\n"
1197 			"%12 = OpConstant %11 1\n"
1198 			"%13 = OpTypeArray %6 %12\n"
1199 			"%14 = OpTypeStruct %10 %6 %13 %13\n"
1200 			"%15 = OpTypePointer Output %14\n"
1201 			"%16 = OpVariable %15 Output\n"
1202 			"%17 = OpTypeInt 32 1\n"
1203 			"%18 = OpConstant %17 0\n"
1204 			"%19 = OpTypeStruct %10 %6 %13 %13\n"
1205 			"%20 = OpConstant %11 32\n"
1206 			"%21 = OpTypeArray %19 %20\n"
1207 			"%22 = OpTypePointer Input %21\n"
1208 			"%23 = OpVariable %22 Input\n"
1209 			"%24 = OpTypePointer Input %10\n"
1210 			"%27 = OpTypeVector %6 3\n"
1211 			"%28 = OpTypePointer Input %27\n"
1212 			"%29 = OpVariable %28 Input\n"
1213 			"%30 = OpConstant %11 0\n"
1214 			"%31 = OpTypePointer Input %6\n"
1215 			"%36 = OpConstant %6 2\n"
1216 			"%40 = OpTypePointer Output %10\n"
1217 			"%4 = OpFunction %2 None %3\n"
1218 			"%5 = OpLabel\n"
1219 			"%8 = OpVariable %7 Function\n"
1220 			"OpStore %8 %9\n"
1221 			"%25 = OpAccessChain %24 %23 %18 %18\n"
1222 			"%26 = OpLoad %10 %25\n"
1223 			"%32 = OpAccessChain %31 %29 %30\n"
1224 			"%33 = OpLoad %6 %32\n"
1225 			"%34 = OpLoad %6 %8\n"
1226 			"%35 = OpFMul %6 %33 %34\n"
1227 			"%37 = OpFDiv %6 %35 %36\n"
1228 			"%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1229 			"%39 = OpFAdd %10 %26 %38\n"
1230 			"%41 = OpAccessChain %40 %16 %18\n"
1231 			"OpStore %41 %39\n"
1232 			"OpReturn\n"
1233 			"OpFunctionEnd\n";
1234 		programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1235 	}
1236 
1237 }
1238 
getFramebufferBufferDeclarations(const VkFormat & format,const std::vector<std::string> & declarations,const deUint32 stage)1239 static std::string getFramebufferBufferDeclarations (const VkFormat&					format,
1240 													 const std::vector<std::string>&	declarations,
1241 													 const deUint32						stage)
1242 {
1243 	if (declarations.empty())
1244 	{
1245 		const std::string	name	= (stage == 0) ? "result" : "out_color";
1246 		const std::string	suffix	= (stage == 2) ? "[]" : "";
1247 		const std::string	result	=
1248 			"layout(location = 0) out float " + name + suffix + ";\n"
1249 			"layout(set = 0, binding = 0) uniform Buffer1\n"
1250 			"{\n"
1251 			"  " + de::toString(subgroups::getFormatNameForGLSL(format)) + " data[" + de::toString(subgroups::maxSupportedSubgroupSize()) + "];\n"
1252 			"};\n";
1253 
1254 		return result;
1255 	}
1256 	else
1257 	{
1258 		return declarations[stage];
1259 	}
1260 }
1261 
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations)1262 void vkt::subgroups::initStdFrameBufferPrograms (SourceCollections&					programCollection,
1263 												 const vk::ShaderBuildOptions&		buildOptions,
1264 												 VkShaderStageFlags					shaderStage,
1265 												 VkFormat							format,
1266 												 bool								gsPointSize,
1267 												 const std::string&					extHeader,
1268 												 const std::string&					testSrc,
1269 												 const std::string&					helperStr,
1270 												 const std::vector<std::string>&	declarations)
1271 {
1272 	subgroups::setFragmentShaderFrameBuffer(programCollection);
1273 
1274 	if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1275 		subgroups::setVertexShaderFrameBuffer(programCollection);
1276 
1277 	if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1278 	{
1279 		std::ostringstream vertex;
1280 
1281 		vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1282 			<< extHeader
1283 			<< "layout(location = 0) in highp vec4 in_position;\n"
1284 			<< getFramebufferBufferDeclarations(format, declarations, 0)
1285 			<< "\n"
1286 			<< helperStr
1287 			<< "void main (void)\n"
1288 			<< "{\n"
1289 			<< "  uint tempRes;\n"
1290 			<< testSrc
1291 			<< "  result = float(tempRes);\n"
1292 			<< "  gl_Position = in_position;\n"
1293 			<< "  gl_PointSize = 1.0f;\n"
1294 			<< "}\n";
1295 
1296 		programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1297 	}
1298 	else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1299 	{
1300 		std::ostringstream geometry;
1301 
1302 		geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1303 			<< extHeader
1304 			<< "layout(points) in;\n"
1305 			<< "layout(points, max_vertices = 1) out;\n"
1306 			<< getFramebufferBufferDeclarations(format, declarations, 1)
1307 			<< "\n"
1308 			<< helperStr
1309 			<< "void main (void)\n"
1310 			<< "{\n"
1311 			<< "  uint tempRes;\n"
1312 			<< testSrc
1313 			<< "  out_color = float(tempRes);\n"
1314 			<< "  gl_Position = gl_in[0].gl_Position;\n"
1315 			<< (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1316 			<< "  EmitVertex();\n"
1317 			<< "  EndPrimitive();\n"
1318 			<< "}\n";
1319 
1320 		programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1321 	}
1322 	else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1323 	{
1324 		std::ostringstream controlSource;
1325 
1326 		controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1327 			<< extHeader
1328 			<< "layout(vertices = 2) out;\n"
1329 			<< getFramebufferBufferDeclarations(format, declarations, 2)
1330 			<< "\n"
1331 			<< helperStr
1332 			<< "void main (void)\n"
1333 			<< "{\n"
1334 			<< "  if (gl_InvocationID == 0)\n"
1335 			<< "  {\n"
1336 			<< "    gl_TessLevelOuter[0] = 1.0f;\n"
1337 			<< "    gl_TessLevelOuter[1] = 1.0f;\n"
1338 			<< "  }\n"
1339 			<< "  uint tempRes;\n"
1340 			<< testSrc
1341 			<< "  out_color[gl_InvocationID] = float(tempRes);\n"
1342 			<< "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1343 			<< (gsPointSize ? "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "")
1344 			<< "}\n";
1345 
1346 		programCollection.glslSources.add("tesc") << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1347 		subgroups::setTesEvalShaderFrameBuffer(programCollection);
1348 	}
1349 	else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1350 	{
1351 		ostringstream evaluationSource;
1352 
1353 		evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1354 			<< extHeader
1355 			<< "layout(isolines, equal_spacing, ccw ) in;\n"
1356 			<< getFramebufferBufferDeclarations(format, declarations, 3)
1357 			<< "\n"
1358 			<< helperStr
1359 			<< "void main (void)\n"
1360 			<< "{\n"
1361 			<< "  uint tempRes;\n"
1362 			<< testSrc
1363 			<< "  out_color = float(tempRes);\n"
1364 			<< "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1365 			<< (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1366 			<< "}\n";
1367 
1368 		subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1369 		programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1370 	}
1371 	else
1372 	{
1373 		DE_FATAL("Unsupported shader stage");
1374 	}
1375 }
1376 
getBufferDeclarations(vk::VkShaderStageFlags shaderStage,const std::string & formatName,const std::vector<std::string> & declarations,const deUint32 stage)1377 static std::string getBufferDeclarations (vk::VkShaderStageFlags			shaderStage,
1378 										  const std::string&				formatName,
1379 										  const std::vector<std::string>&	declarations,
1380 										  const deUint32					stage)
1381 {
1382 	if (declarations.empty())
1383 	{
1384 		const deUint32	stageCount	= vkt::subgroups::getStagesCount(shaderStage);
1385 		const deUint32	binding0	= stage;
1386 		const deUint32	binding1	= stageCount;
1387 		const bool		fragment	= (shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) && (stage == stageCount);
1388 		const string	buffer1		= fragment
1389 									? "layout(location = 0) out uint result;\n"
1390 									: "layout(set = 0, binding = " + de::toString(binding0) + ", std430) buffer Buffer1\n"
1391 									  "{\n"
1392 									  "  uint result[];\n"
1393 									  "};\n";
1394 		//todo boza I suppose it can be "layout(set = 0, binding = " + de::toString(binding1) + ", std430) readonly buffer Buffer2\n"
1395 		const string	buffer2		= "layout(set = 0, binding = " + de::toString(binding1) + ", std430)" + (stageCount == 1 ? "" : " readonly") + " buffer Buffer" + (fragment ? "1" : "2") + "\n"
1396 									  "{\n"
1397 									  "  " + formatName + " data[];\n"
1398 									  "};\n";
1399 
1400 		return buffer1 + buffer2;
1401 	}
1402 	else
1403 	{
1404 		return declarations[stage];
1405 	}
1406 }
1407 
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations,const bool avoidHelperInvocations,const std::string & tempRes)1408 void vkt::subgroups::initStdPrograms (vk::SourceCollections&			programCollection,
1409 									  const vk::ShaderBuildOptions&		buildOptions,
1410 									  vk::VkShaderStageFlags			shaderStage,
1411 									  vk::VkFormat						format,
1412 									  bool								gsPointSize,
1413 									  const std::string&				extHeader,
1414 									  const std::string&				testSrc,
1415 									  const std::string&				helperStr,
1416 									  const std::vector<std::string>&	declarations,
1417 									  const bool						avoidHelperInvocations,
1418 									  const std::string&				tempRes)
1419 {
1420 	const std::string	formatName	= subgroups::getFormatNameForGLSL(format);
1421 
1422 	if (isAllComputeStages(shaderStage))
1423 	{
1424 		std::ostringstream	src;
1425 
1426 		src << "#version 450\n"
1427 			<< extHeader
1428 			<< "layout (local_size_x_id = 0, local_size_y_id = 1, "
1429 			"local_size_z_id = 2) in;\n"
1430 			<< getBufferDeclarations(shaderStage, formatName, declarations, 0)
1431 			<< "\n"
1432 			<< helperStr
1433 			<< "void main (void)\n"
1434 			<< "{\n"
1435 			<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1436 			<< "  highp uint offset = globalSize.x * ((globalSize.y * "
1437 			"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1438 			"gl_GlobalInvocationID.x;\n"
1439 			<< tempRes
1440 			<< testSrc
1441 			<< "  result[offset] = tempRes;\n"
1442 			<< "}\n";
1443 
1444 		programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1445 	}
1446 #ifndef CTS_USES_VULKANSC
1447 	else if (isAllMeshShadingStages(shaderStage))
1448 	{
1449 		const bool testMesh = ((shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
1450 		const bool testTask = ((shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
1451 
1452 		if (testMesh)
1453 		{
1454 			std::ostringstream mesh;
1455 
1456 			mesh
1457 				<< "#version 450\n"
1458 				<< "#extension GL_EXT_mesh_shader : enable\n"
1459 				//<< "#extension GL_NV_mesh_shader : enable\n"
1460 				<< extHeader
1461 				<< "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1462 				<< "layout (points) out;\n"
1463 				<< "layout (max_vertices = 1, max_primitives = 1) out;\n"
1464 				<< getBufferDeclarations(shaderStage, formatName, declarations, 0)
1465 				<< "\n"
1466 				<< helperStr
1467 				<< "void main (void)\n"
1468 				<< "{\n"
1469 				<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1470 				//<< "  uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1471 				<< "  highp uint offset = globalSize.x * ((globalSize.y * "
1472 				"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1473 				"gl_GlobalInvocationID.x;\n"
1474 				<< tempRes
1475 				<< testSrc
1476 				<< "  result[offset] = tempRes;\n"
1477 				<< "  SetMeshOutputsEXT(0u, 0u);\n"
1478 				//<< "  gl_PrimitiveCountNV = 0;\n"
1479 				<< "}\n";
1480 
1481 			programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1482 		}
1483 		else
1484 		{
1485 			const std::string meshShaderNoSubgroups =
1486 				"#version 450\n"
1487 				"#extension GL_EXT_mesh_shader : enable\n"
1488 				"\n"
1489 				"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
1490 				"layout (points) out;\n"
1491 				"layout (max_vertices = 1, max_primitives = 1) out;\n"
1492 				"\n"
1493 				"void main (void)\n"
1494 				"{\n"
1495 				"  SetMeshOutputsEXT(0u, 0u);\n"
1496 				"}\n"
1497 				;
1498 			programCollection.glslSources.add("mesh") << glu::MeshSource(meshShaderNoSubgroups) << buildOptions;
1499 		}
1500 
1501 		if (testTask)
1502 		{
1503 			const tcu::UVec3	emitSize = (testMesh ? tcu::UVec3(1u, 1u, 1u) : tcu::UVec3(0u, 0u, 0u));
1504 			std::ostringstream	task;
1505 
1506 			task
1507 				<< "#version 450\n"
1508 				<< "#extension GL_EXT_mesh_shader : enable\n"
1509 				//<< "#extension GL_NV_mesh_shader : enable\n"
1510 				<< extHeader
1511 				<< "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1512 				<< getBufferDeclarations(shaderStage, formatName, declarations, 0)
1513 				<< "\n"
1514 				<< helperStr
1515 				<< "void main (void)\n"
1516 				<< "{\n"
1517 				<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1518 				//<< "  uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1519 				<< "  highp uint offset = globalSize.x * ((globalSize.y * "
1520 				"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1521 				"gl_GlobalInvocationID.x;\n"
1522 				<< tempRes
1523 				<< testSrc
1524 				<< "  result[offset] = tempRes;\n"
1525 				<< "  EmitMeshTasksEXT(" << emitSize.x() << ", " << emitSize.y() << ", " << emitSize.z() << ");\n"
1526 				//<< "  gl_TaskCountNV = " << emitSize.x() << ";\n"
1527 				<< "}\n";
1528 
1529 			programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1530 		}
1531 	}
1532 #endif // CTS_USES_VULKANSC
1533 	else if (isAllGraphicsStages(shaderStage))
1534 	{
1535 		const string vertex =
1536 			"#version 450\n"
1537 			+ extHeader
1538 			+ getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1539 			"\n"
1540 			+ helperStr +
1541 			"void main (void)\n"
1542 			"{\n"
1543 			"  uint tempRes;\n"
1544 			+ testSrc +
1545 			"  result[gl_VertexIndex] = tempRes;\n"
1546 			"  float pixelSize = 2.0f/1024.0f;\n"
1547 			"  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1548 			"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1549 			"  gl_PointSize = 1.0f;\n"
1550 			"}\n";
1551 
1552 		const string tesc =
1553 			"#version 450\n"
1554 			+ extHeader +
1555 			"layout(vertices=1) out;\n"
1556 			+ getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1557 			"\n"
1558 			+ helperStr +
1559 			"void main (void)\n"
1560 			"{\n"
1561 			+ tempRes
1562 			+ testSrc +
1563 			"  result[gl_PrimitiveID] = tempRes;\n"
1564 			"  if (gl_InvocationID == 0)\n"
1565 			"  {\n"
1566 			"    gl_TessLevelOuter[0] = 1.0f;\n"
1567 			"    gl_TessLevelOuter[1] = 1.0f;\n"
1568 			"  }\n"
1569 			"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1570 			+ (gsPointSize ? "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1571 			"}\n";
1572 
1573 		const string tese =
1574 			"#version 450\n"
1575 			+ extHeader +
1576 			"layout(isolines) in;\n"
1577 			+ getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1578 			"\n"
1579 			+ helperStr +
1580 			"void main (void)\n"
1581 			"{\n"
1582 			+ tempRes
1583 			+ testSrc +
1584 			"  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1585 			"  float pixelSize = 2.0f/1024.0f;\n"
1586 			"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1587 			+ (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1588 			"}\n";
1589 
1590 		const string geometry =
1591 			"#version 450\n"
1592 			+ extHeader +
1593 			"layout(${TOPOLOGY}) in;\n"
1594 			"layout(points, max_vertices = 1) out;\n"
1595 			+ getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1596 			"\n"
1597 			+ helperStr +
1598 			"void main (void)\n"
1599 			"{\n"
1600 			+ tempRes
1601 			+ testSrc +
1602 			"  result[gl_PrimitiveIDIn] = tempRes;\n"
1603 			"  gl_Position = gl_in[0].gl_Position;\n"
1604 			+ (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1605 			"  EmitVertex();\n"
1606 			"  EndPrimitive();\n"
1607 			"}\n";
1608 
1609 		const string fragment =
1610 			"#version 450\n"
1611 			+ extHeader
1612 			+ getBufferDeclarations(shaderStage, formatName, declarations, 4)
1613 			+ helperStr +
1614 			"void main (void)\n"
1615 			"{\n"
1616 			+ (avoidHelperInvocations ? "  if (gl_HelperInvocation) return;\n" : "")
1617 			+ tempRes
1618 			+ testSrc +
1619 			"  result = tempRes;\n"
1620 			"}\n";
1621 
1622 		subgroups::addNoSubgroupShader(programCollection);
1623 
1624 		programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1625 		programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1626 		programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1627 		subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1628 		programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1629 	}
1630 #ifndef CTS_USES_VULKANSC
1631 	else if (isAllRayTracingStages(shaderStage))
1632 	{
1633 		const std::string	rgenShader	=
1634 			"#version 460 core\n"
1635 			"#extension GL_EXT_ray_tracing: require\n"
1636 			+ extHeader +
1637 			"layout(location = 0) rayPayloadEXT uvec4 payload;\n"
1638 			"layout(location = 0) callableDataEXT uvec4 callData;"
1639 			"layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
1640 			+ getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1641 			"\n"
1642 			+ helperStr +
1643 			"void main()\n"
1644 			"{\n"
1645 			+ tempRes
1646 			+ testSrc +
1647 			"  uint  rayFlags   = 0;\n"
1648 			"  uint  cullMask   = 0xFF;\n"
1649 			"  float tmin       = 0.0;\n"
1650 			"  float tmax       = 9.0;\n"
1651 			"  vec3  origin     = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
1652 			"  vec3  directHit  = vec3(0.0, 0.0, -1.0);\n"
1653 			"  vec3  directMiss = vec3(0.0, 0.0, +1.0);\n"
1654 			"\n"
1655 			"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
1656 			"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
1657 			"  executeCallableEXT(0, 0);"
1658 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1659 			"}\n";
1660 		const std::string	ahitShader	=
1661 			"#version 460 core\n"
1662 			"#extension GL_EXT_ray_tracing: require\n"
1663 			+ extHeader +
1664 			"hitAttributeEXT vec3 attribs;\n"
1665 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1666 			+ getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1667 			"\n"
1668 			+ helperStr +
1669 			"void main()\n"
1670 			"{\n"
1671 			+ tempRes
1672 			+ testSrc +
1673 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1674 			"}\n";
1675 		const std::string	chitShader	=
1676 			"#version 460 core\n"
1677 			"#extension GL_EXT_ray_tracing: require\n"
1678 			+ extHeader +
1679 			"hitAttributeEXT vec3 attribs;\n"
1680 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1681 			+ getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1682 			"\n"
1683 			+ helperStr +
1684 			"void main()\n"
1685 			"{\n"
1686 			+ tempRes
1687 			+ testSrc +
1688 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1689 			"}\n";
1690 		const std::string	missShader	=
1691 			"#version 460 core\n"
1692 			"#extension GL_EXT_ray_tracing: require\n"
1693 			+ extHeader +
1694 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1695 			+ getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1696 			"\n"
1697 			+ helperStr +
1698 			"void main()\n"
1699 			"{\n"
1700 			+ tempRes
1701 			+ testSrc +
1702 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1703 			"}\n";
1704 		const std::string	sectShader	=
1705 			"#version 460 core\n"
1706 			"#extension GL_EXT_ray_tracing: require\n"
1707 			+ extHeader +
1708 			"hitAttributeEXT vec3 hitAttribute;\n"
1709 			+ getBufferDeclarations(shaderStage, formatName, declarations, 4) +
1710 			"\n"
1711 			+ helperStr +
1712 			"void main()\n"
1713 			"{\n"
1714 			+ tempRes
1715 			+ testSrc +
1716 			"  reportIntersectionEXT(0.75f, 0x7Eu);\n"
1717 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1718 			"}\n";
1719 		const std::string	callShader	=
1720 			"#version 460 core\n"
1721 			"#extension GL_EXT_ray_tracing: require\n"
1722 			+ extHeader +
1723 			"layout(location = 0) callableDataInEXT float callData;\n"
1724 			+ getBufferDeclarations(shaderStage, formatName, declarations, 5) +
1725 			"\n"
1726 			+ helperStr +
1727 			"void main()\n"
1728 			"{\n"
1729 			+ tempRes
1730 			+ testSrc +
1731 			"  result[gl_LaunchIDEXT.x] = tempRes;\n"
1732 			"}\n";
1733 
1734 		programCollection.glslSources.add("rgen") << glu::RaygenSource		(rgenShader) << buildOptions;
1735 		programCollection.glslSources.add("ahit") << glu::AnyHitSource		(ahitShader) << buildOptions;
1736 		programCollection.glslSources.add("chit") << glu::ClosestHitSource	(chitShader) << buildOptions;
1737 		programCollection.glslSources.add("miss") << glu::MissSource		(missShader) << buildOptions;
1738 		programCollection.glslSources.add("sect") << glu::IntersectionSource(sectShader) << buildOptions;
1739 		programCollection.glslSources.add("call") << glu::CallableSource	(callShader) << buildOptions;
1740 
1741 		subgroups::addRayTracingNoSubgroupShader(programCollection);
1742 	}
1743 #endif // CTS_USES_VULKANSC
1744 	else
1745 		TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1746 
1747 }
1748 
isSubgroupSupported(Context & context)1749 bool vkt::subgroups::isSubgroupSupported (Context& context)
1750 {
1751 	return context.contextSupports(vk::ApiVersion(0, 1, 1, 0));
1752 }
1753 
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1754 bool vkt::subgroups::areSubgroupOperationsSupportedForStage (Context& context, const VkShaderStageFlags stage)
1755 {
1756 	return (stage & (context.getSubgroupProperties().supportedStages)) ? true : false;
1757 }
1758 
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1759 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice (Context& context, VkSubgroupFeatureFlagBits bit)
1760 {
1761 	return (bit & (context.getSubgroupProperties().supportedOperations)) ? true : false;
1762 }
1763 
isFragmentSSBOSupportedForDevice(Context & context)1764 bool vkt::subgroups::isFragmentSSBOSupportedForDevice (Context& context)
1765 {
1766 	return context.getDeviceFeatures().fragmentStoresAndAtomics ? true : false;
1767 }
1768 
isVertexSSBOSupportedForDevice(Context & context)1769 bool vkt::subgroups::isVertexSSBOSupportedForDevice (Context& context)
1770 {
1771 	return context.getDeviceFeatures().vertexPipelineStoresAndAtomics ? true : false;
1772 }
1773 
isInt64SupportedForDevice(Context & context)1774 bool vkt::subgroups::isInt64SupportedForDevice (Context& context)
1775 {
1776 	return context.getDeviceFeatures().shaderInt64 ? true : false;
1777 }
1778 
isTessellationAndGeometryPointSizeSupported(Context & context)1779 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1780 {
1781 	return context.getDeviceFeatures().shaderTessellationAndGeometryPointSize ? true : false;
1782 }
1783 
is16BitUBOStorageSupported(Context & context)1784 bool vkt::subgroups::is16BitUBOStorageSupported (Context& context)
1785 {
1786 	return context.get16BitStorageFeatures().uniformAndStorageBuffer16BitAccess ? true : false;
1787 }
1788 
is8BitUBOStorageSupported(Context & context)1789 bool vkt::subgroups::is8BitUBOStorageSupported (Context& context)
1790 {
1791 	return context.get8BitStorageFeatures().uniformAndStorageBuffer8BitAccess ? true : false;
1792 }
1793 
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1794 bool vkt::subgroups::isFormatSupportedForDevice (Context& context, vk::VkFormat format)
1795 {
1796 	const VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures&	subgroupExtendedTypesFeatures	= context.getShaderSubgroupExtendedTypesFeatures();
1797 	const VkPhysicalDeviceShaderFloat16Int8Features&			float16Int8Features				= context.getShaderFloat16Int8Features();
1798 	const VkPhysicalDevice16BitStorageFeatures&					storage16bit					= context.get16BitStorageFeatures();
1799 	const VkPhysicalDevice8BitStorageFeatures&					storage8bit						= context.get8BitStorageFeatures();
1800 	const VkPhysicalDeviceFeatures&								features						= context.getDeviceFeatures();
1801 	bool														shaderFloat64					= features.shaderFloat64 ? true : false;
1802 	bool														shaderInt16						= features.shaderInt16 ? true : false;
1803 	bool														shaderInt64						= features.shaderInt64 ? true : false;
1804 	bool														shaderSubgroupExtendedTypes		= false;
1805 	bool														shaderFloat16					= false;
1806 	bool														shaderInt8						= false;
1807 	bool														storageBuffer16BitAccess		= false;
1808 	bool														storageBuffer8BitAccess			= false;
1809 
1810 	if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1811 		context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1812 	{
1813 		shaderSubgroupExtendedTypes	= subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes ? true : false;
1814 		shaderFloat16				= float16Int8Features.shaderFloat16 ? true : false;
1815 		shaderInt8					= float16Int8Features.shaderInt8 ? true : false;
1816 
1817 		if ( context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage") )
1818 			storageBuffer16BitAccess = storage16bit.storageBuffer16BitAccess ? true : false;
1819 
1820 		if (context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage"))
1821 			storageBuffer8BitAccess = storage8bit.storageBuffer8BitAccess ? true : false;
1822 	}
1823 
1824 	switch (format)
1825 	{
1826 		default:
1827 			return true;
1828 		case VK_FORMAT_R16_SFLOAT:
1829 		case VK_FORMAT_R16G16_SFLOAT:
1830 		case VK_FORMAT_R16G16B16_SFLOAT:
1831 		case VK_FORMAT_R16G16B16A16_SFLOAT:
1832 			return shaderSubgroupExtendedTypes && shaderFloat16 && storageBuffer16BitAccess;
1833 		case VK_FORMAT_R64_SFLOAT:
1834 		case VK_FORMAT_R64G64_SFLOAT:
1835 		case VK_FORMAT_R64G64B64_SFLOAT:
1836 		case VK_FORMAT_R64G64B64A64_SFLOAT:
1837 			return shaderFloat64;
1838 		case VK_FORMAT_R8_SINT:
1839 		case VK_FORMAT_R8G8_SINT:
1840 		case VK_FORMAT_R8G8B8_SINT:
1841 		case VK_FORMAT_R8G8B8A8_SINT:
1842 		case VK_FORMAT_R8_UINT:
1843 		case VK_FORMAT_R8G8_UINT:
1844 		case VK_FORMAT_R8G8B8_UINT:
1845 		case VK_FORMAT_R8G8B8A8_UINT:
1846 			return shaderSubgroupExtendedTypes && shaderInt8 && storageBuffer8BitAccess;
1847 		case VK_FORMAT_R16_SINT:
1848 		case VK_FORMAT_R16G16_SINT:
1849 		case VK_FORMAT_R16G16B16_SINT:
1850 		case VK_FORMAT_R16G16B16A16_SINT:
1851 		case VK_FORMAT_R16_UINT:
1852 		case VK_FORMAT_R16G16_UINT:
1853 		case VK_FORMAT_R16G16B16_UINT:
1854 		case VK_FORMAT_R16G16B16A16_UINT:
1855 			return shaderSubgroupExtendedTypes && shaderInt16 && storageBuffer16BitAccess;
1856 		case VK_FORMAT_R64_SINT:
1857 		case VK_FORMAT_R64G64_SINT:
1858 		case VK_FORMAT_R64G64B64_SINT:
1859 		case VK_FORMAT_R64G64B64A64_SINT:
1860 		case VK_FORMAT_R64_UINT:
1861 		case VK_FORMAT_R64G64_UINT:
1862 		case VK_FORMAT_R64G64B64_UINT:
1863 		case VK_FORMAT_R64G64B64A64_UINT:
1864 			return shaderSubgroupExtendedTypes && shaderInt64;
1865 	}
1866 }
1867 
isSubgroupBroadcastDynamicIdSupported(Context & context)1868 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported (Context& context)
1869 {
1870 	return context.contextSupports(vk::ApiVersion(0, 1, 2, 0)) &&
1871 		vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice()).subgroupBroadcastDynamicId;
1872 }
1873 
getFormatNameForGLSL(VkFormat format)1874 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1875 {
1876 	switch (format)
1877 	{
1878 		case VK_FORMAT_R8_SINT:				return "int8_t";
1879 		case VK_FORMAT_R8G8_SINT:			return "i8vec2";
1880 		case VK_FORMAT_R8G8B8_SINT:			return "i8vec3";
1881 		case VK_FORMAT_R8G8B8A8_SINT:		return "i8vec4";
1882 		case VK_FORMAT_R8_UINT:				return "uint8_t";
1883 		case VK_FORMAT_R8G8_UINT:			return "u8vec2";
1884 		case VK_FORMAT_R8G8B8_UINT:			return "u8vec3";
1885 		case VK_FORMAT_R8G8B8A8_UINT:		return "u8vec4";
1886 		case VK_FORMAT_R16_SINT:			return "int16_t";
1887 		case VK_FORMAT_R16G16_SINT:			return "i16vec2";
1888 		case VK_FORMAT_R16G16B16_SINT:		return "i16vec3";
1889 		case VK_FORMAT_R16G16B16A16_SINT:	return "i16vec4";
1890 		case VK_FORMAT_R16_UINT:			return "uint16_t";
1891 		case VK_FORMAT_R16G16_UINT:			return "u16vec2";
1892 		case VK_FORMAT_R16G16B16_UINT:		return "u16vec3";
1893 		case VK_FORMAT_R16G16B16A16_UINT:	return "u16vec4";
1894 		case VK_FORMAT_R32_SINT:			return "int";
1895 		case VK_FORMAT_R32G32_SINT:			return "ivec2";
1896 		case VK_FORMAT_R32G32B32_SINT:		return "ivec3";
1897 		case VK_FORMAT_R32G32B32A32_SINT:	return "ivec4";
1898 		case VK_FORMAT_R32_UINT:			return "uint";
1899 		case VK_FORMAT_R32G32_UINT:			return "uvec2";
1900 		case VK_FORMAT_R32G32B32_UINT:		return "uvec3";
1901 		case VK_FORMAT_R32G32B32A32_UINT:	return "uvec4";
1902 		case VK_FORMAT_R64_SINT:			return "int64_t";
1903 		case VK_FORMAT_R64G64_SINT:			return "i64vec2";
1904 		case VK_FORMAT_R64G64B64_SINT:		return "i64vec3";
1905 		case VK_FORMAT_R64G64B64A64_SINT:	return "i64vec4";
1906 		case VK_FORMAT_R64_UINT:			return "uint64_t";
1907 		case VK_FORMAT_R64G64_UINT:			return "u64vec2";
1908 		case VK_FORMAT_R64G64B64_UINT:		return "u64vec3";
1909 		case VK_FORMAT_R64G64B64A64_UINT:	return "u64vec4";
1910 		case VK_FORMAT_R16_SFLOAT:			return "float16_t";
1911 		case VK_FORMAT_R16G16_SFLOAT:		return "f16vec2";
1912 		case VK_FORMAT_R16G16B16_SFLOAT:	return "f16vec3";
1913 		case VK_FORMAT_R16G16B16A16_SFLOAT:	return "f16vec4";
1914 		case VK_FORMAT_R32_SFLOAT:			return "float";
1915 		case VK_FORMAT_R32G32_SFLOAT:		return "vec2";
1916 		case VK_FORMAT_R32G32B32_SFLOAT:	return "vec3";
1917 		case VK_FORMAT_R32G32B32A32_SFLOAT:	return "vec4";
1918 		case VK_FORMAT_R64_SFLOAT:			return "double";
1919 		case VK_FORMAT_R64G64_SFLOAT:		return "dvec2";
1920 		case VK_FORMAT_R64G64B64_SFLOAT:	return "dvec3";
1921 		case VK_FORMAT_R64G64B64A64_SFLOAT:	return "dvec4";
1922 		case VK_FORMAT_R8_USCALED:			return "bool";
1923 		case VK_FORMAT_R8G8_USCALED:		return "bvec2";
1924 		case VK_FORMAT_R8G8B8_USCALED:		return "bvec3";
1925 		case VK_FORMAT_R8G8B8A8_USCALED:	return "bvec4";
1926 		default:							TCU_THROW(InternalError, "Unhandled format");
1927 	}
1928 }
1929 
getAdditionalExtensionForFormat(vk::VkFormat format)1930 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1931 {
1932 	switch (format)
1933 	{
1934 		default:
1935 			return "";
1936 		case VK_FORMAT_R8_SINT:
1937 		case VK_FORMAT_R8G8_SINT:
1938 		case VK_FORMAT_R8G8B8_SINT:
1939 		case VK_FORMAT_R8G8B8A8_SINT:
1940 		case VK_FORMAT_R8_UINT:
1941 		case VK_FORMAT_R8G8_UINT:
1942 		case VK_FORMAT_R8G8B8_UINT:
1943 		case VK_FORMAT_R8G8B8A8_UINT:
1944 			return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1945 		case VK_FORMAT_R16_SINT:
1946 		case VK_FORMAT_R16G16_SINT:
1947 		case VK_FORMAT_R16G16B16_SINT:
1948 		case VK_FORMAT_R16G16B16A16_SINT:
1949 		case VK_FORMAT_R16_UINT:
1950 		case VK_FORMAT_R16G16_UINT:
1951 		case VK_FORMAT_R16G16B16_UINT:
1952 		case VK_FORMAT_R16G16B16A16_UINT:
1953 			return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1954 		case VK_FORMAT_R64_SINT:
1955 		case VK_FORMAT_R64G64_SINT:
1956 		case VK_FORMAT_R64G64B64_SINT:
1957 		case VK_FORMAT_R64G64B64A64_SINT:
1958 		case VK_FORMAT_R64_UINT:
1959 		case VK_FORMAT_R64G64_UINT:
1960 		case VK_FORMAT_R64G64B64_UINT:
1961 		case VK_FORMAT_R64G64B64A64_UINT:
1962 			return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1963 		case VK_FORMAT_R16_SFLOAT:
1964 		case VK_FORMAT_R16G16_SFLOAT:
1965 		case VK_FORMAT_R16G16B16_SFLOAT:
1966 		case VK_FORMAT_R16G16B16A16_SFLOAT:
1967 			return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1968 	}
1969 }
1970 
getAllFormats()1971 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats ()
1972 {
1973 	std::vector<VkFormat> formats;
1974 
1975 	formats.push_back(VK_FORMAT_R8_SINT);
1976 	formats.push_back(VK_FORMAT_R8G8_SINT);
1977 	formats.push_back(VK_FORMAT_R8G8B8_SINT);
1978 	formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1979 	formats.push_back(VK_FORMAT_R8_UINT);
1980 	formats.push_back(VK_FORMAT_R8G8_UINT);
1981 	formats.push_back(VK_FORMAT_R8G8B8_UINT);
1982 	formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1983 	formats.push_back(VK_FORMAT_R16_SINT);
1984 	formats.push_back(VK_FORMAT_R16G16_SINT);
1985 	formats.push_back(VK_FORMAT_R16G16B16_SINT);
1986 	formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1987 	formats.push_back(VK_FORMAT_R16_UINT);
1988 	formats.push_back(VK_FORMAT_R16G16_UINT);
1989 	formats.push_back(VK_FORMAT_R16G16B16_UINT);
1990 	formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
1991 	formats.push_back(VK_FORMAT_R32_SINT);
1992 	formats.push_back(VK_FORMAT_R32G32_SINT);
1993 	formats.push_back(VK_FORMAT_R32G32B32_SINT);
1994 	formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
1995 	formats.push_back(VK_FORMAT_R32_UINT);
1996 	formats.push_back(VK_FORMAT_R32G32_UINT);
1997 	formats.push_back(VK_FORMAT_R32G32B32_UINT);
1998 	formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
1999 	formats.push_back(VK_FORMAT_R64_SINT);
2000 	formats.push_back(VK_FORMAT_R64G64_SINT);
2001 	formats.push_back(VK_FORMAT_R64G64B64_SINT);
2002 	formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
2003 	formats.push_back(VK_FORMAT_R64_UINT);
2004 	formats.push_back(VK_FORMAT_R64G64_UINT);
2005 	formats.push_back(VK_FORMAT_R64G64B64_UINT);
2006 	formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
2007 	formats.push_back(VK_FORMAT_R16_SFLOAT);
2008 	formats.push_back(VK_FORMAT_R16G16_SFLOAT);
2009 	formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
2010 	formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
2011 	formats.push_back(VK_FORMAT_R32_SFLOAT);
2012 	formats.push_back(VK_FORMAT_R32G32_SFLOAT);
2013 	formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
2014 	formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
2015 	formats.push_back(VK_FORMAT_R64_SFLOAT);
2016 	formats.push_back(VK_FORMAT_R64G64_SFLOAT);
2017 	formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
2018 	formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
2019 	formats.push_back(VK_FORMAT_R8_USCALED);
2020 	formats.push_back(VK_FORMAT_R8G8_USCALED);
2021 	formats.push_back(VK_FORMAT_R8G8B8_USCALED);
2022 	formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
2023 
2024 	return formats;
2025 }
2026 
isFormatSigned(VkFormat format)2027 bool vkt::subgroups::isFormatSigned (VkFormat format)
2028 {
2029 	switch (format)
2030 	{
2031 		default:
2032 			return false;
2033 		case VK_FORMAT_R8_SINT:
2034 		case VK_FORMAT_R8G8_SINT:
2035 		case VK_FORMAT_R8G8B8_SINT:
2036 		case VK_FORMAT_R8G8B8A8_SINT:
2037 		case VK_FORMAT_R16_SINT:
2038 		case VK_FORMAT_R16G16_SINT:
2039 		case VK_FORMAT_R16G16B16_SINT:
2040 		case VK_FORMAT_R16G16B16A16_SINT:
2041 		case VK_FORMAT_R32_SINT:
2042 		case VK_FORMAT_R32G32_SINT:
2043 		case VK_FORMAT_R32G32B32_SINT:
2044 		case VK_FORMAT_R32G32B32A32_SINT:
2045 		case VK_FORMAT_R64_SINT:
2046 		case VK_FORMAT_R64G64_SINT:
2047 		case VK_FORMAT_R64G64B64_SINT:
2048 		case VK_FORMAT_R64G64B64A64_SINT:
2049 			return true;
2050 	}
2051 }
2052 
isFormatUnsigned(VkFormat format)2053 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
2054 {
2055 	switch (format)
2056 	{
2057 		default:
2058 			return false;
2059 		case VK_FORMAT_R8_UINT:
2060 		case VK_FORMAT_R8G8_UINT:
2061 		case VK_FORMAT_R8G8B8_UINT:
2062 		case VK_FORMAT_R8G8B8A8_UINT:
2063 		case VK_FORMAT_R16_UINT:
2064 		case VK_FORMAT_R16G16_UINT:
2065 		case VK_FORMAT_R16G16B16_UINT:
2066 		case VK_FORMAT_R16G16B16A16_UINT:
2067 		case VK_FORMAT_R32_UINT:
2068 		case VK_FORMAT_R32G32_UINT:
2069 		case VK_FORMAT_R32G32B32_UINT:
2070 		case VK_FORMAT_R32G32B32A32_UINT:
2071 		case VK_FORMAT_R64_UINT:
2072 		case VK_FORMAT_R64G64_UINT:
2073 		case VK_FORMAT_R64G64B64_UINT:
2074 		case VK_FORMAT_R64G64B64A64_UINT:
2075 			return true;
2076 	}
2077 }
2078 
isFormatFloat(VkFormat format)2079 bool vkt::subgroups::isFormatFloat (VkFormat format)
2080 {
2081 	switch (format)
2082 	{
2083 		default:
2084 			return false;
2085 		case VK_FORMAT_R16_SFLOAT:
2086 		case VK_FORMAT_R16G16_SFLOAT:
2087 		case VK_FORMAT_R16G16B16_SFLOAT:
2088 		case VK_FORMAT_R16G16B16A16_SFLOAT:
2089 		case VK_FORMAT_R32_SFLOAT:
2090 		case VK_FORMAT_R32G32_SFLOAT:
2091 		case VK_FORMAT_R32G32B32_SFLOAT:
2092 		case VK_FORMAT_R32G32B32A32_SFLOAT:
2093 		case VK_FORMAT_R64_SFLOAT:
2094 		case VK_FORMAT_R64G64_SFLOAT:
2095 		case VK_FORMAT_R64G64B64_SFLOAT:
2096 		case VK_FORMAT_R64G64B64A64_SFLOAT:
2097 			return true;
2098 	}
2099 }
2100 
isFormatBool(VkFormat format)2101 bool vkt::subgroups::isFormatBool (VkFormat format)
2102 {
2103 	switch (format)
2104 	{
2105 		default:
2106 			return false;
2107 		case VK_FORMAT_R8_USCALED:
2108 		case VK_FORMAT_R8G8_USCALED:
2109 		case VK_FORMAT_R8G8B8_USCALED:
2110 		case VK_FORMAT_R8G8B8A8_USCALED:
2111 			return true;
2112 	}
2113 }
2114 
isFormat8bitTy(VkFormat format)2115 bool vkt::subgroups::isFormat8bitTy (VkFormat format)
2116 {
2117 	switch (format)
2118 	{
2119 	default:
2120 		return false;
2121 	case VK_FORMAT_R8_SINT:
2122 	case VK_FORMAT_R8G8_SINT:
2123 	case VK_FORMAT_R8G8B8_SINT:
2124 	case VK_FORMAT_R8G8B8A8_SINT:
2125 	case VK_FORMAT_R8_UINT:
2126 	case VK_FORMAT_R8G8_UINT:
2127 	case VK_FORMAT_R8G8B8_UINT:
2128 	case VK_FORMAT_R8G8B8A8_UINT:
2129 		return true;
2130 	}
2131 }
2132 
isFormat16BitTy(VkFormat format)2133 bool vkt::subgroups::isFormat16BitTy (VkFormat format)
2134 {
2135 	switch (format)
2136 	{
2137 	default:
2138 		return false;
2139 	case VK_FORMAT_R16_SFLOAT:
2140 	case VK_FORMAT_R16G16_SFLOAT:
2141 	case VK_FORMAT_R16G16B16_SFLOAT:
2142 	case VK_FORMAT_R16G16B16A16_SFLOAT:
2143 	case VK_FORMAT_R16_SINT:
2144 	case VK_FORMAT_R16G16_SINT:
2145 	case VK_FORMAT_R16G16B16_SINT:
2146 	case VK_FORMAT_R16G16B16A16_SINT:
2147 	case VK_FORMAT_R16_UINT:
2148 	case VK_FORMAT_R16G16_UINT:
2149 	case VK_FORMAT_R16G16B16_UINT:
2150 	case VK_FORMAT_R16G16B16A16_UINT:
2151 		return true;
2152 	}
2153 }
2154 
setVertexShaderFrameBuffer(SourceCollections & programCollection)2155 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
2156 {
2157 	/*
2158 		"layout(location = 0) in highp vec4 in_position;\n"
2159 		"void main (void)\n"
2160 		"{\n"
2161 		"  gl_Position = in_position;\n"
2162 		"  gl_PointSize = 1.0f;\n"
2163 		"}\n";
2164 	*/
2165 	programCollection.spirvAsmSources.add("vert") <<
2166 		"; SPIR-V\n"
2167 		"; Version: 1.3\n"
2168 		"; Generator: Khronos Glslang Reference Front End; 7\n"
2169 		"; Bound: 25\n"
2170 		"; Schema: 0\n"
2171 		"OpCapability Shader\n"
2172 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2173 		"OpMemoryModel Logical GLSL450\n"
2174 		"OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2175 		"OpMemberDecorate %11 0 BuiltIn Position\n"
2176 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
2177 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2178 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2179 		"OpDecorate %11 Block\n"
2180 		"OpDecorate %17 Location 0\n"
2181 		"%2 = OpTypeVoid\n"
2182 		"%3 = OpTypeFunction %2\n"
2183 		"%6 = OpTypeFloat 32\n"
2184 		"%7 = OpTypeVector %6 4\n"
2185 		"%8 = OpTypeInt 32 0\n"
2186 		"%9 = OpConstant %8 1\n"
2187 		"%10 = OpTypeArray %6 %9\n"
2188 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
2189 		"%12 = OpTypePointer Output %11\n"
2190 		"%13 = OpVariable %12 Output\n"
2191 		"%14 = OpTypeInt 32 1\n"
2192 		"%15 = OpConstant %14 0\n"
2193 		"%16 = OpTypePointer Input %7\n"
2194 		"%17 = OpVariable %16 Input\n"
2195 		"%19 = OpTypePointer Output %7\n"
2196 		"%21 = OpConstant %14 1\n"
2197 		"%22 = OpConstant %6 1\n"
2198 		"%23 = OpTypePointer Output %6\n"
2199 		"%4 = OpFunction %2 None %3\n"
2200 		"%5 = OpLabel\n"
2201 		"%18 = OpLoad %7 %17\n"
2202 		"%20 = OpAccessChain %19 %13 %15\n"
2203 		"OpStore %20 %18\n"
2204 		"%24 = OpAccessChain %23 %13 %21\n"
2205 		"OpStore %24 %22\n"
2206 		"OpReturn\n"
2207 		"OpFunctionEnd\n";
2208 }
2209 
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2210 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
2211 {
2212 	/*
2213 		"layout(location = 0) in float in_color;\n"
2214 		"layout(location = 0) out uint out_color;\n"
2215 		"void main()\n"
2216 		{\n"
2217 		"	out_color = uint(in_color);\n"
2218 		"}\n";
2219 	*/
2220 	programCollection.spirvAsmSources.add("fragment") <<
2221 		"; SPIR-V\n"
2222 		"; Version: 1.3\n"
2223 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2224 		"; Bound: 14\n"
2225 		"; Schema: 0\n"
2226 		"OpCapability Shader\n"
2227 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2228 		"OpMemoryModel Logical GLSL450\n"
2229 		"OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2230 		"OpExecutionMode %4 OriginUpperLeft\n"
2231 		"OpDecorate %8 Location 0\n"
2232 		"OpDecorate %11 Location 0\n"
2233 		"%2 = OpTypeVoid\n"
2234 		"%3 = OpTypeFunction %2\n"
2235 		"%6 = OpTypeInt 32 0\n"
2236 		"%7 = OpTypePointer Output %6\n"
2237 		"%8 = OpVariable %7 Output\n"
2238 		"%9 = OpTypeFloat 32\n"
2239 		"%10 = OpTypePointer Input %9\n"
2240 		"%11 = OpVariable %10 Input\n"
2241 		"%4 = OpFunction %2 None %3\n"
2242 		"%5 = OpLabel\n"
2243 		"%12 = OpLoad %9 %11\n"
2244 		"%13 = OpConvertFToU %6 %12\n"
2245 		"OpStore %8 %13\n"
2246 		"OpReturn\n"
2247 		"OpFunctionEnd\n";
2248 }
2249 
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2250 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
2251 {
2252 	/*
2253 		"#extension GL_KHR_shader_subgroup_basic: enable\n"
2254 		"#extension GL_EXT_tessellation_shader : require\n"
2255 		"layout(vertices = 2) out;\n"
2256 		"void main (void)\n"
2257 		"{\n"
2258 		"  if (gl_InvocationID == 0)\n"
2259 		"  {\n"
2260 		"    gl_TessLevelOuter[0] = 1.0f;\n"
2261 		"    gl_TessLevelOuter[1] = 1.0f;\n"
2262 		"  }\n"
2263 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2264 		"}\n";
2265 	*/
2266 	programCollection.spirvAsmSources.add("tesc") <<
2267 		"; SPIR-V\n"
2268 		"; Version: 1.3\n"
2269 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2270 		"; Bound: 46\n"
2271 		"; Schema: 0\n"
2272 		"OpCapability Tessellation\n"
2273 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2274 		"OpMemoryModel Logical GLSL450\n"
2275 		"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2276 		"OpExecutionMode %4 OutputVertices 2\n"
2277 		"OpDecorate %8 BuiltIn InvocationId\n"
2278 		"OpDecorate %20 Patch\n"
2279 		"OpDecorate %20 BuiltIn TessLevelOuter\n"
2280 		"OpMemberDecorate %29 0 BuiltIn Position\n"
2281 		"OpMemberDecorate %29 1 BuiltIn PointSize\n"
2282 		"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2283 		"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2284 		"OpDecorate %29 Block\n"
2285 		"OpMemberDecorate %35 0 BuiltIn Position\n"
2286 		"OpMemberDecorate %35 1 BuiltIn PointSize\n"
2287 		"OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2288 		"OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2289 		"OpDecorate %35 Block\n"
2290 		"%2 = OpTypeVoid\n"
2291 		"%3 = OpTypeFunction %2\n"
2292 		"%6 = OpTypeInt 32 1\n"
2293 		"%7 = OpTypePointer Input %6\n"
2294 		"%8 = OpVariable %7 Input\n"
2295 		"%10 = OpConstant %6 0\n"
2296 		"%11 = OpTypeBool\n"
2297 		"%15 = OpTypeFloat 32\n"
2298 		"%16 = OpTypeInt 32 0\n"
2299 		"%17 = OpConstant %16 4\n"
2300 		"%18 = OpTypeArray %15 %17\n"
2301 		"%19 = OpTypePointer Output %18\n"
2302 		"%20 = OpVariable %19 Output\n"
2303 		"%21 = OpConstant %15 1\n"
2304 		"%22 = OpTypePointer Output %15\n"
2305 		"%24 = OpConstant %6 1\n"
2306 		"%26 = OpTypeVector %15 4\n"
2307 		"%27 = OpConstant %16 1\n"
2308 		"%28 = OpTypeArray %15 %27\n"
2309 		"%29 = OpTypeStruct %26 %15 %28 %28\n"
2310 		"%30 = OpConstant %16 2\n"
2311 		"%31 = OpTypeArray %29 %30\n"
2312 		"%32 = OpTypePointer Output %31\n"
2313 		"%33 = OpVariable %32 Output\n"
2314 		"%35 = OpTypeStruct %26 %15 %28 %28\n"
2315 		"%36 = OpConstant %16 32\n"
2316 		"%37 = OpTypeArray %35 %36\n"
2317 		"%38 = OpTypePointer Input %37\n"
2318 		"%39 = OpVariable %38 Input\n"
2319 		"%41 = OpTypePointer Input %26\n"
2320 		"%44 = OpTypePointer Output %26\n"
2321 		"%4 = OpFunction %2 None %3\n"
2322 		"%5 = OpLabel\n"
2323 		"%9 = OpLoad %6 %8\n"
2324 		"%12 = OpIEqual %11 %9 %10\n"
2325 		"OpSelectionMerge %14 None\n"
2326 		"OpBranchConditional %12 %13 %14\n"
2327 		"%13 = OpLabel\n"
2328 		"%23 = OpAccessChain %22 %20 %10\n"
2329 		"OpStore %23 %21\n"
2330 		"%25 = OpAccessChain %22 %20 %24\n"
2331 		"OpStore %25 %21\n"
2332 		"OpBranch %14\n"
2333 		"%14 = OpLabel\n"
2334 		"%34 = OpLoad %6 %8\n"
2335 		"%40 = OpLoad %6 %8\n"
2336 		"%42 = OpAccessChain %41 %39 %40 %10\n"
2337 		"%43 = OpLoad %26 %42\n"
2338 		"%45 = OpAccessChain %44 %33 %34 %10\n"
2339 		"OpStore %45 %43\n"
2340 		"OpReturn\n"
2341 		"OpFunctionEnd\n";
2342 }
2343 
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2344 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
2345 {
2346 	/*
2347 		"#extension GL_KHR_shader_subgroup_ballot: enable\n"
2348 		"#extension GL_EXT_tessellation_shader : require\n"
2349 		"layout(isolines, equal_spacing, ccw ) in;\n"
2350 		"layout(location = 0) in float in_color[];\n"
2351 		"layout(location = 0) out float out_color;\n"
2352 		"\n"
2353 		"void main (void)\n"
2354 		"{\n"
2355 		"  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2356 		"  out_color = in_color[0];\n"
2357 		"}\n";
2358 	*/
2359 	programCollection.spirvAsmSources.add("tese") <<
2360 		"; SPIR-V\n"
2361 		"; Version: 1.3\n"
2362 		"; Generator: Khronos Glslang Reference Front End; 2\n"
2363 		"; Bound: 45\n"
2364 		"; Schema: 0\n"
2365 		"OpCapability Tessellation\n"
2366 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
2367 		"OpMemoryModel Logical GLSL450\n"
2368 		"OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2369 		"OpExecutionMode %4 Isolines\n"
2370 		"OpExecutionMode %4 SpacingEqual\n"
2371 		"OpExecutionMode %4 VertexOrderCcw\n"
2372 		"OpMemberDecorate %11 0 BuiltIn Position\n"
2373 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
2374 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2375 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2376 		"OpDecorate %11 Block\n"
2377 		"OpMemberDecorate %16 0 BuiltIn Position\n"
2378 		"OpMemberDecorate %16 1 BuiltIn PointSize\n"
2379 		"OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2380 		"OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2381 		"OpDecorate %16 Block\n"
2382 		"OpDecorate %29 BuiltIn TessCoord\n"
2383 		"OpDecorate %39 Location 0\n"
2384 		"OpDecorate %42 Location 0\n"
2385 		"%2 = OpTypeVoid\n"
2386 		"%3 = OpTypeFunction %2\n"
2387 		"%6 = OpTypeFloat 32\n"
2388 		"%7 = OpTypeVector %6 4\n"
2389 		"%8 = OpTypeInt 32 0\n"
2390 		"%9 = OpConstant %8 1\n"
2391 		"%10 = OpTypeArray %6 %9\n"
2392 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
2393 		"%12 = OpTypePointer Output %11\n"
2394 		"%13 = OpVariable %12 Output\n"
2395 		"%14 = OpTypeInt 32 1\n"
2396 		"%15 = OpConstant %14 0\n"
2397 		"%16 = OpTypeStruct %7 %6 %10 %10\n"
2398 		"%17 = OpConstant %8 32\n"
2399 		"%18 = OpTypeArray %16 %17\n"
2400 		"%19 = OpTypePointer Input %18\n"
2401 		"%20 = OpVariable %19 Input\n"
2402 		"%21 = OpTypePointer Input %7\n"
2403 		"%24 = OpConstant %14 1\n"
2404 		"%27 = OpTypeVector %6 3\n"
2405 		"%28 = OpTypePointer Input %27\n"
2406 		"%29 = OpVariable %28 Input\n"
2407 		"%30 = OpConstant %8 0\n"
2408 		"%31 = OpTypePointer Input %6\n"
2409 		"%36 = OpTypePointer Output %7\n"
2410 		"%38 = OpTypePointer Output %6\n"
2411 		"%39 = OpVariable %38 Output\n"
2412 		"%40 = OpTypeArray %6 %17\n"
2413 		"%41 = OpTypePointer Input %40\n"
2414 		"%42 = OpVariable %41 Input\n"
2415 		"%4 = OpFunction %2 None %3\n"
2416 		"%5 = OpLabel\n"
2417 		"%22 = OpAccessChain %21 %20 %15 %15\n"
2418 		"%23 = OpLoad %7 %22\n"
2419 		"%25 = OpAccessChain %21 %20 %24 %15\n"
2420 		"%26 = OpLoad %7 %25\n"
2421 		"%32 = OpAccessChain %31 %29 %30\n"
2422 		"%33 = OpLoad %6 %32\n"
2423 		"%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2424 		"%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2425 		"%37 = OpAccessChain %36 %13 %15\n"
2426 		"OpStore %37 %35\n"
2427 		"%43 = OpAccessChain %31 %42 %15\n"
2428 		"%44 = OpLoad %6 %43\n"
2429 		"OpStore %39 %44\n"
2430 		"OpReturn\n"
2431 		"OpFunctionEnd\n";
2432 }
2433 
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2434 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options,  vk::GlslSourceCollection& collection)
2435 {
2436 	tcu::StringTemplate geometryTemplate(glslTemplate);
2437 
2438 	map<string, string>		linesParams;
2439 	linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2440 
2441 	map<string, string>		pointsParams;
2442 	pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2443 
2444 	collection.add("geometry_lines")	<< glu::GeometrySource(geometryTemplate.specialize(linesParams))	<< options;
2445 	collection.add("geometry_points")	<< glu::GeometrySource(geometryTemplate.specialize(pointsParams))	<< options;
2446 }
2447 
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2448 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
2449 {
2450 	tcu::StringTemplate geometryTemplate(spirvTemplate);
2451 
2452 	map<string, string>		linesParams;
2453 	linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2454 
2455 	map<string, string>		pointsParams;
2456 	pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2457 
2458 	collection.add("geometry_lines")	<< geometryTemplate.specialize(linesParams)		<< options;
2459 	collection.add("geometry_points")	<< geometryTemplate.specialize(pointsParams)	<< options;
2460 }
2461 
initializeMemory(Context & context,const Allocation & alloc,const subgroups::SSBOData & data)2462 void initializeMemory (Context& context, const Allocation& alloc, const subgroups::SSBOData& data)
2463 {
2464 	const vk::VkFormat format = data.format;
2465 	const vk::VkDeviceSize size = data.numElements *
2466 		(data.isImage() ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2467 	if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2468 	{
2469 		de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2470 
2471 		switch (format)
2472 		{
2473 			default:
2474 				DE_FATAL("Illegal buffer format");
2475 				break;
2476 			case VK_FORMAT_R8_SINT:
2477 			case VK_FORMAT_R8G8_SINT:
2478 			case VK_FORMAT_R8G8B8_SINT:
2479 			case VK_FORMAT_R8G8B8A8_SINT:
2480 			case VK_FORMAT_R8_UINT:
2481 			case VK_FORMAT_R8G8_UINT:
2482 			case VK_FORMAT_R8G8B8_UINT:
2483 			case VK_FORMAT_R8G8B8A8_UINT:
2484 			{
2485 				deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2486 
2487 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2488 				{
2489 					ptr[k] = rnd.getUint8();
2490 				}
2491 			}
2492 			break;
2493 			case VK_FORMAT_R16_SINT:
2494 			case VK_FORMAT_R16G16_SINT:
2495 			case VK_FORMAT_R16G16B16_SINT:
2496 			case VK_FORMAT_R16G16B16A16_SINT:
2497 			case VK_FORMAT_R16_UINT:
2498 			case VK_FORMAT_R16G16_UINT:
2499 			case VK_FORMAT_R16G16B16_UINT:
2500 			case VK_FORMAT_R16G16B16A16_UINT:
2501 			{
2502 				deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2503 
2504 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2505 				{
2506 					ptr[k] = rnd.getUint16();
2507 				}
2508 			}
2509 			break;
2510 			case VK_FORMAT_R8_USCALED:
2511 			case VK_FORMAT_R8G8_USCALED:
2512 			case VK_FORMAT_R8G8B8_USCALED:
2513 			case VK_FORMAT_R8G8B8A8_USCALED:
2514 			{
2515 				deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2516 
2517 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2518 				{
2519 					deUint32 r = rnd.getUint32();
2520 					ptr[k] = (r & 1) ? r : 0;
2521 				}
2522 			}
2523 			break;
2524 			case VK_FORMAT_R32_SINT:
2525 			case VK_FORMAT_R32G32_SINT:
2526 			case VK_FORMAT_R32G32B32_SINT:
2527 			case VK_FORMAT_R32G32B32A32_SINT:
2528 			case VK_FORMAT_R32_UINT:
2529 			case VK_FORMAT_R32G32_UINT:
2530 			case VK_FORMAT_R32G32B32_UINT:
2531 			case VK_FORMAT_R32G32B32A32_UINT:
2532 			{
2533 				deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2534 
2535 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2536 				{
2537 					ptr[k] = rnd.getUint32();
2538 				}
2539 			}
2540 			break;
2541 			case VK_FORMAT_R64_SINT:
2542 			case VK_FORMAT_R64G64_SINT:
2543 			case VK_FORMAT_R64G64B64_SINT:
2544 			case VK_FORMAT_R64G64B64A64_SINT:
2545 			case VK_FORMAT_R64_UINT:
2546 			case VK_FORMAT_R64G64_UINT:
2547 			case VK_FORMAT_R64G64B64_UINT:
2548 			case VK_FORMAT_R64G64B64A64_UINT:
2549 			{
2550 				deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2551 
2552 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2553 				{
2554 					ptr[k] = rnd.getUint64();
2555 				}
2556 			}
2557 			break;
2558 			case VK_FORMAT_R16_SFLOAT:
2559 			case VK_FORMAT_R16G16_SFLOAT:
2560 			case VK_FORMAT_R16G16B16_SFLOAT:
2561 			case VK_FORMAT_R16G16B16A16_SFLOAT:
2562 			{
2563 				deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2564 
2565 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2566 				{
2567 					ptr[k] = deFloat32To16(rnd.getFloat());
2568 				}
2569 			}
2570 			break;
2571 			case VK_FORMAT_R32_SFLOAT:
2572 			case VK_FORMAT_R32G32_SFLOAT:
2573 			case VK_FORMAT_R32G32B32_SFLOAT:
2574 			case VK_FORMAT_R32G32B32A32_SFLOAT:
2575 			{
2576 				float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2577 
2578 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2579 				{
2580 					ptr[k] = rnd.getFloat();
2581 				}
2582 			}
2583 			break;
2584 			case VK_FORMAT_R64_SFLOAT:
2585 			case VK_FORMAT_R64G64_SFLOAT:
2586 			case VK_FORMAT_R64G64B64_SFLOAT:
2587 			case VK_FORMAT_R64G64B64A64_SFLOAT:
2588 			{
2589 				double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2590 
2591 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2592 				{
2593 					ptr[k] = rnd.getDouble();
2594 				}
2595 			}
2596 			break;
2597 		}
2598 	}
2599 	else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2600 	{
2601 		deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2602 
2603 		for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2604 		{
2605 			ptr[k] = 0;
2606 		}
2607 	}
2608 
2609 	if (subgroups::SSBOData::InitializeNone != data.initializeType)
2610 	{
2611 		flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2612 	}
2613 }
2614 
getResultBinding(const VkShaderStageFlagBits shaderStage)2615 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2616 {
2617 	switch(shaderStage)
2618 	{
2619 		case VK_SHADER_STAGE_VERTEX_BIT:
2620 			return 0u;
2621 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2622 			return 1u;
2623 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2624 			return 2u;
2625 		case VK_SHADER_STAGE_GEOMETRY_BIT:
2626 			return 3u;
2627 		default:
2628 			DE_ASSERT(0);
2629 			return -1;
2630 	}
2631 	DE_ASSERT(0);
2632 	return -1;
2633 }
2634 
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage)2635 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (Context&					context,
2636 																		   VkFormat					format,
2637 																		   const SSBOData*			extraData,
2638 																		   deUint32					extraDataCount,
2639 																		   const void*				internalData,
2640 																		   subgroups::CheckResult	checkResult,
2641 																		   const VkShaderStageFlags	shaderStage)
2642 {
2643 	return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2644 }
2645 
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage,const deUint32 tessShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2646 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize (Context&					context,
2647 																							   VkFormat					format,
2648 																							   const SSBOData*			extraData,
2649 																							   deUint32					extraDataCount,
2650 																							   const void*				internalData,
2651 																							   subgroups::CheckResult	checkResult,
2652 																							   const VkShaderStageFlags	shaderStage,
2653 																							   const deUint32			tessShaderStageCreateFlags,
2654 																							   const deUint32			requiredSubgroupSize)
2655 {
2656 	const DeviceInterface&					vk						= context.getDeviceInterface();
2657 	const VkDevice							device					= context.getDevice();
2658 	const deUint32							maxWidth				= getMaxWidth();
2659 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
2660 	DescriptorSetLayoutBuilder				layoutBuilder;
2661 	DescriptorPoolBuilder					poolBuilder;
2662 	DescriptorSetUpdateBuilder				updateBuilder;
2663 	Move <VkDescriptorPool>					descriptorPool;
2664 	Move <VkDescriptorSet>					descriptorSet;
2665 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2666 	const Unique<VkShaderModule>			teCtrlShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("tesc"), 0u));
2667 	const Unique<VkShaderModule>			teEvalShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("tese"), 0u));
2668 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2669 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
2670 	const VkVertexInputBindingDescription	vertexInputBinding		=
2671 	{
2672 		0u,											//  deUint32			binding;
2673 		static_cast<deUint32>(sizeof(tcu::Vec4)),	//  deUint32			stride;
2674 		VK_VERTEX_INPUT_RATE_VERTEX					//  VkVertexInputRate	inputRate;
2675 	};
2676 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
2677 	{
2678 		0u,									//  deUint32	location;
2679 		0u,									//  deUint32	binding;
2680 		VK_FORMAT_R32G32B32A32_SFLOAT,		//  VkFormat	format;
2681 		0u									//  deUint32	offset;
2682 	};
2683 
2684 	for (deUint32 i = 0u; i < extraDataCount; i++)
2685 	{
2686 		if (extraData[i].isImage())
2687 		{
2688 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2689 		}
2690 		else
2691 		{
2692 			DE_ASSERT(extraData[i].isUBO());
2693 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2694 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2695 		}
2696 		const Allocation& alloc = inputBuffers[i]->getAllocation();
2697 		initializeMemory(context, alloc, extraData[i]);
2698 	}
2699 
2700 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2701 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2702 
2703 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
2704 
2705 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
2706 
2707 	const deUint32 requiredSubgroupSizes[5] = {0u,
2708 											   ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2709 											   ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u),
2710 											   0u,
2711 											   0u};
2712 
2713 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
2714 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2715 																						  VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2716 																						  *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2717 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2718 																						  0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2719 																						  ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u),
2720 																						  0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2721 
2722 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2723 		poolBuilder.addType(inputBuffers[ndx]->getType());
2724 
2725 	if (extraDataCount > 0)
2726 	{
2727 		descriptorPool = poolBuilder.build(vk, device,
2728 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2729 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2730 	}
2731 
2732 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2733 	{
2734 		if (inputBuffers[buffersNdx]->isImage())
2735 		{
2736 			VkDescriptorImageInfo info =
2737 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2738 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2739 
2740 			updateBuilder.writeSingle(*descriptorSet,
2741 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2742 										inputBuffers[buffersNdx]->getType(), &info);
2743 		}
2744 		else
2745 		{
2746 			VkDescriptorBufferInfo info =
2747 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2748 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2749 
2750 			updateBuilder.writeSingle(*descriptorSet,
2751 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2752 										inputBuffers[buffersNdx]->getType(), &info);
2753 		}
2754 	}
2755 
2756 	updateBuilder.update(vk, device);
2757 
2758 	const VkQueue							queue					= context.getUniversalQueue();
2759 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
2760 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
2761 	const deUint32							subgroupSize			= getSubgroupSize(context);
2762 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
2763 	const vk::VkDeviceSize					vertexBufferSize		= 2ull * maxWidth * sizeof(tcu::Vec4);
2764 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2765 	unsigned								totalIterations			= 0u;
2766 	unsigned								failedIterations		= 0u;
2767 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2768 
2769 	{
2770 		const Allocation&		alloc				= vertexBuffer.getAllocation();
2771 		std::vector<tcu::Vec4>	data				(2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2772 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
2773 		float					leftHandPosition	= -1.0f;
2774 
2775 		for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2776 		{
2777 			data[ndx][0] = leftHandPosition;
2778 			leftHandPosition += pixelSize;
2779 			data[ndx+1][0] = leftHandPosition;
2780 		}
2781 
2782 		deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2783 		flushAlloc(vk, device, alloc);
2784 	}
2785 
2786 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2787 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
2788 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
2789 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2790 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2791 	const VkDeviceSize			vertexBufferOffset	= 0u;
2792 
2793 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2794 	{
2795 		totalIterations++;
2796 
2797 		beginCommandBuffer(vk, *cmdBuffer);
2798 		{
2799 
2800 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2801 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2802 
2803 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2804 
2805 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2806 
2807 			if (extraDataCount > 0)
2808 			{
2809 				vk.cmdBindDescriptorSets(*cmdBuffer,
2810 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2811 					&descriptorSet.get(), 0u, DE_NULL);
2812 			}
2813 
2814 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2815 			vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2816 
2817 			endRenderPass(vk, *cmdBuffer);
2818 
2819 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2820 			endCommandBuffer(vk, *cmdBuffer);
2821 
2822 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2823 		}
2824 		context.resetCommandPoolForVKSC(device, *cmdPool);
2825 
2826 		{
2827 			const Allocation& allocResult = imageBufferResult.getAllocation();
2828 			invalidateAlloc(vk, device, allocResult);
2829 
2830 			std::vector<const void*> datas;
2831 			datas.push_back(allocResult.getHostPtr());
2832 			if (!checkResult(internalData, datas, width/2u, subgroupSize))
2833 				failedIterations++;
2834 		}
2835 	}
2836 
2837 	if (0 < failedIterations)
2838 	{
2839 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2840 
2841 		context.getTestContext().getLog()
2842 				<< TestLog::Message << valuesPassed << " / "
2843 				<< totalIterations << " values passed" << TestLog::EndMessage;
2844 		return tcu::TestStatus::fail("Failed!");
2845 	}
2846 
2847 	return tcu::TestStatus::pass("OK");
2848 }
2849 
check(std::vector<const void * > datas,deUint32 width,deUint32 ref)2850 bool vkt::subgroups::check (std::vector<const void*> datas, deUint32 width, deUint32 ref)
2851 {
2852 	const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2853 
2854 	for (deUint32 n = 0; n < width; ++n)
2855 	{
2856 		if (data[n] != ref)
2857 		{
2858 			return false;
2859 		}
2860 	}
2861 
2862 	return true;
2863 }
2864 
checkComputeOrMesh(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 ref)2865 bool vkt::subgroups::checkComputeOrMesh (std::vector<const void*>	datas,
2866 										 const deUint32				numWorkgroups[3],
2867 										 const deUint32				localSize[3],
2868 										 deUint32					ref)
2869 {
2870 	const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2871 	const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2872 	const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2873 
2874 	return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2875 }
2876 
makeGeometryFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)2877 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest (Context&				context,
2878 															 VkFormat				format,
2879 															 const SSBOData*		extraData,
2880 															 deUint32				extraDataCount,
2881 															 const void*			internalData,
2882 															 subgroups::CheckResult	checkResult)
2883 {
2884 	return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
2885 }
2886 
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 geometryShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2887 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize (Context&					context,
2888 																				 VkFormat					format,
2889 																				 const SSBOData*			extraData,
2890 																				 deUint32					extraDataCount,
2891 																				 const void*				internalData,
2892 																				 subgroups::CheckResult		checkResult,
2893 																				 const deUint32				geometryShaderStageCreateFlags,
2894 																				 const deUint32				requiredSubgroupSize)
2895 {
2896 	const DeviceInterface&					vk						= context.getDeviceInterface();
2897 	const VkDevice							device					= context.getDevice();
2898 	const deUint32							maxWidth				= getMaxWidth();
2899 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
2900 	DescriptorSetLayoutBuilder				layoutBuilder;
2901 	DescriptorPoolBuilder					poolBuilder;
2902 	DescriptorSetUpdateBuilder				updateBuilder;
2903 	Move <VkDescriptorPool>					descriptorPool;
2904 	Move <VkDescriptorSet>					descriptorSet;
2905 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2906 	const Unique<VkShaderModule>			geometryShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2907 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2908 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
2909 	const VkVertexInputBindingDescription	vertexInputBinding		=
2910 	{
2911 		0u,											//  deUint32			binding;
2912 		static_cast<deUint32>(sizeof(tcu::Vec4)),	//  deUint32			stride;
2913 		VK_VERTEX_INPUT_RATE_VERTEX					//  VkVertexInputRate	inputRate;
2914 	};
2915 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
2916 	{
2917 		0u,									//  deUint32	location;
2918 		0u,									//  deUint32	binding;
2919 		VK_FORMAT_R32G32B32A32_SFLOAT,		//  VkFormat	format;
2920 		0u									//  deUint32	offset;
2921 	};
2922 
2923 	for (deUint32 i = 0u; i < extraDataCount; i++)
2924 	{
2925 		if (extraData[i].isImage())
2926 		{
2927 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2928 		}
2929 		else
2930 		{
2931 			DE_ASSERT(extraData[i].isUBO());
2932 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2933 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2934 		}
2935 		const Allocation& alloc = inputBuffers[i]->getAllocation();
2936 		initializeMemory(context, alloc, extraData[i]);
2937 	}
2938 
2939 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2940 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2941 
2942 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
2943 
2944 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
2945 
2946 	const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2947 
2948 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
2949 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2950 																						  *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2951 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2952 																						  0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2953 																						  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2954 
2955 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2956 		poolBuilder.addType(inputBuffers[ndx]->getType());
2957 
2958 	if (extraDataCount > 0)
2959 	{
2960 		descriptorPool = poolBuilder.build(vk, device,
2961 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2962 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2963 	}
2964 
2965 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2966 	{
2967 		if (inputBuffers[buffersNdx]->isImage())
2968 		{
2969 			VkDescriptorImageInfo info =
2970 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2971 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2972 
2973 			updateBuilder.writeSingle(*descriptorSet,
2974 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2975 										inputBuffers[buffersNdx]->getType(), &info);
2976 		}
2977 		else
2978 		{
2979 			VkDescriptorBufferInfo info =
2980 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2981 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2982 
2983 			updateBuilder.writeSingle(*descriptorSet,
2984 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2985 										inputBuffers[buffersNdx]->getType(), &info);
2986 		}
2987 	}
2988 
2989 	updateBuilder.update(vk, device);
2990 
2991 	const VkQueue							queue					= context.getUniversalQueue();
2992 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
2993 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
2994 	const deUint32							subgroupSize			= getSubgroupSize(context);
2995 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
2996 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
2997 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2998 	unsigned								totalIterations			= 0u;
2999 	unsigned								failedIterations		= 0u;
3000 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3001 
3002 	{
3003 		const Allocation&		alloc				= vertexBuffer.getAllocation();
3004 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3005 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
3006 		float					leftHandPosition	= -1.0f;
3007 
3008 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3009 		{
3010 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3011 			leftHandPosition += pixelSize;
3012 		}
3013 
3014 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3015 		flushAlloc(vk, device, alloc);
3016 	}
3017 
3018 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3019 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
3020 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
3021 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3022 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3023 	const VkDeviceSize			vertexBufferOffset	= 0u;
3024 
3025 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3026 	{
3027 		totalIterations++;
3028 
3029 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3030 		{
3031 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3032 			initializeMemory(context, alloc, extraData[ndx]);
3033 		}
3034 
3035 		beginCommandBuffer(vk, *cmdBuffer);
3036 		{
3037 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3038 
3039 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3040 
3041 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3042 
3043 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3044 
3045 			if (extraDataCount > 0)
3046 			{
3047 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3048 					&descriptorSet.get(), 0u, DE_NULL);
3049 			}
3050 
3051 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3052 
3053 			vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3054 
3055 			endRenderPass(vk, *cmdBuffer);
3056 
3057 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3058 
3059 			endCommandBuffer(vk, *cmdBuffer);
3060 
3061 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3062 		}
3063 		context.resetCommandPoolForVKSC(device, *cmdPool);
3064 
3065 		{
3066 			const Allocation& allocResult = imageBufferResult.getAllocation();
3067 			invalidateAlloc(vk, device, allocResult);
3068 
3069 			std::vector<const void*> datas;
3070 			datas.push_back(allocResult.getHostPtr());
3071 			if (!checkResult(internalData, datas, width, subgroupSize))
3072 				failedIterations++;
3073 		}
3074 	}
3075 
3076 	if (0 < failedIterations)
3077 	{
3078 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3079 
3080 		context.getTestContext().getLog()
3081 				<< TestLog::Message << valuesPassed << " / "
3082 				<< totalIterations << " values passed" << TestLog::EndMessage;
3083 
3084 		return tcu::TestStatus::fail("Failed!");
3085 	}
3086 
3087 	return tcu::TestStatus::pass("OK");
3088 }
3089 
getPossibleGraphicsSubgroupStages(Context & context,const vk::VkShaderStageFlags testedStages)3090 vk::VkShaderStageFlags vkt::subgroups::getPossibleGraphicsSubgroupStages (Context& context, const vk::VkShaderStageFlags testedStages)
3091 {
3092 	const VkPhysicalDeviceSubgroupProperties&	subgroupProperties	= context.getSubgroupProperties();
3093 	VkShaderStageFlags							stages				= testedStages & subgroupProperties.supportedStages;
3094 
3095 	DE_ASSERT(isAllGraphicsStages(testedStages));
3096 
3097 	if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
3098 	{
3099 		if ((stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
3100 			TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
3101 		else
3102 			stages = VK_SHADER_STAGE_FRAGMENT_BIT;
3103 	}
3104 
3105 	if (static_cast<VkShaderStageFlags>(0u) == stages)
3106 		TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
3107 
3108 	return stages;
3109 }
3110 
allStages(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3111 tcu::TestStatus vkt::subgroups::allStages (Context&						context,
3112 										   vk::VkFormat					format,
3113 										   const SSBOData*				extraData,
3114 										   deUint32						extraDataCount,
3115 										   const void*					internalData,
3116 										   const VerificationFunctor&	checkResult,
3117 										   const vk::VkShaderStageFlags	shaderStage)
3118 {
3119 	return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage,
3120 														 0u, 0u, 0u, 0u, 0u, DE_NULL);
3121 }
3122 
allStagesRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStageTested,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])3123 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize (Context&						context,
3124 															   vk::VkFormat					format,
3125 															   const SSBOData*				extraDatas,
3126 															   deUint32						extraDatasCount,
3127 															   const void*					internalData,
3128 															   const VerificationFunctor&	checkResult,
3129 															   const vk::VkShaderStageFlags	shaderStageTested,
3130 															   const deUint32				vertexShaderStageCreateFlags,
3131 															   const deUint32				tessellationControlShaderStageCreateFlags,
3132 															   const deUint32				tessellationEvalShaderStageCreateFlags,
3133 															   const deUint32				geometryShaderStageCreateFlags,
3134 															   const deUint32				fragmentShaderStageCreateFlags,
3135 															   const deUint32				requiredSubgroupSize[5])
3136 {
3137 	const DeviceInterface&			vk					= context.getDeviceInterface();
3138 	const VkDevice					device				= context.getDevice();
3139 	const deUint32					maxWidth			= getMaxWidth();
3140 	vector<VkShaderStageFlagBits>	stagesVector;
3141 	VkShaderStageFlags				shaderStageRequired	= (VkShaderStageFlags)0ull;
3142 
3143 	Move<VkShaderModule>			vertexShaderModule;
3144 	Move<VkShaderModule>			teCtrlShaderModule;
3145 	Move<VkShaderModule>			teEvalShaderModule;
3146 	Move<VkShaderModule>			geometryShaderModule;
3147 	Move<VkShaderModule>			fragmentShaderModule;
3148 
3149 	if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3150 	{
3151 		stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3152 	}
3153 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3154 	{
3155 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3156 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3157 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3158 	}
3159 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3160 	{
3161 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3162 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3163 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3164 	}
3165 	if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3166 	{
3167 		stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3168 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3169 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3170 	}
3171 	if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3172 	{
3173 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3174 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3175 	}
3176 
3177 	const deUint32	stagesCount	= static_cast<deUint32>(stagesVector.size());
3178 	const string	vert		= (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT)					? "vert_noSubgroup"		: "vert";
3179 	const string	tesc		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)		? "tesc_noSubgroup"		: "tesc";
3180 	const string	tese		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)	? "tese_noSubgroup"		: "tese";
3181 
3182 	shaderStageRequired = shaderStageTested | shaderStageRequired;
3183 
3184 	vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3185 	if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3186 	{
3187 		teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3188 		teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3189 	}
3190 	if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3191 	{
3192 		if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3193 		{
3194 			// tessellation shaders output line primitives
3195 			geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3196 		}
3197 		else
3198 		{
3199 			// otherwise points are processed by geometry shader
3200 			geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3201 		}
3202 	}
3203 	if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3204 		fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3205 
3206 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
3207 
3208 	DescriptorSetLayoutBuilder layoutBuilder;
3209 
3210 	// The implicit result SSBO we use to store our outputs from the shader
3211 	for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3212 	{
3213 		const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3214 		const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3215 		inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
3216 
3217 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
3218 	}
3219 
3220 	for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3221 	{
3222 		const deUint32 datasNdx = ndx - stagesCount;
3223 		if (extraDatas[datasNdx].isImage())
3224 		{
3225 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3226 		}
3227 		else
3228 		{
3229 			const auto usage	= (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3230 			const auto size		= getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
3231 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
3232 		}
3233 
3234 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3235 		initializeMemory(context, alloc, extraDatas[datasNdx]);
3236 
3237 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
3238 								extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
3239 	}
3240 
3241 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3242 
3243 	const Unique<VkPipelineLayout> pipelineLayout(
3244 		makePipelineLayout(vk, device, *descriptorSetLayout));
3245 
3246 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3247 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3248 														   shaderStageRequired,
3249 														   *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
3250 														   *renderPass,
3251 														   (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3252 														   DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT,
3253 														   vertexShaderStageCreateFlags, tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3254 														   geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3255 
3256 	Move <VkDescriptorPool>	descriptorPool;
3257 	Move <VkDescriptorSet>	descriptorSet;
3258 
3259 	if (inputBuffers.size() > 0)
3260 	{
3261 		DescriptorPoolBuilder poolBuilder;
3262 
3263 		for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
3264 		{
3265 			poolBuilder.addType(inputBuffers[ndx]->getType());
3266 		}
3267 
3268 		descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3269 
3270 		// Create descriptor set
3271 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3272 
3273 		DescriptorSetUpdateBuilder updateBuilder;
3274 
3275 		for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3276 		{
3277 			deUint32 binding;
3278 			if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
3279 			else binding = extraDatas[ndx -stagesCount].binding;
3280 
3281 			if (inputBuffers[ndx]->isImage())
3282 			{
3283 				VkDescriptorImageInfo info =
3284 					makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3285 											inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3286 
3287 				updateBuilder.writeSingle(	*descriptorSet,
3288 											DescriptorSetUpdateBuilder::Location::binding(binding),
3289 											inputBuffers[ndx]->getType(), &info);
3290 			}
3291 			else
3292 			{
3293 				VkDescriptorBufferInfo info =
3294 					makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
3295 							0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3296 
3297 				updateBuilder.writeSingle(	*descriptorSet,
3298 													DescriptorSetUpdateBuilder::Location::binding(binding),
3299 													inputBuffers[ndx]->getType(), &info);
3300 			}
3301 		}
3302 
3303 		updateBuilder.update(vk, device);
3304 	}
3305 
3306 	{
3307 		const VkQueue					queue					= context.getUniversalQueue();
3308 		const deUint32					queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3309 		const Unique<VkCommandPool>		cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
3310 		const deUint32					subgroupSize			= getSubgroupSize(context);
3311 		const Unique<VkCommandBuffer>	cmdBuffer				(makeCommandBuffer(context, *cmdPool));
3312 		unsigned						totalIterations			= 0u;
3313 		unsigned						failedIterations		= 0u;
3314 		Image							resultImage				(context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3315 		const Unique<VkFramebuffer>		framebuffer				(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3316 		const VkViewport				viewport				= makeViewport(maxWidth, 1u);
3317 		const VkRect2D					scissor					= makeRect2D(maxWidth, 1u);
3318 		const vk::VkDeviceSize			imageResultSize			= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3319 		Buffer							imageBufferResult		(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3320 		const VkImageSubresourceRange	subresourceRange		=
3321 		{
3322 			VK_IMAGE_ASPECT_COLOR_BIT,											//VkImageAspectFlags	aspectMask
3323 			0u,																	//deUint32				baseMipLevel
3324 			1u,																	//deUint32				levelCount
3325 			0u,																	//deUint32				baseArrayLayer
3326 			1u																	//deUint32				layerCount
3327 		};
3328 
3329 		const VkImageMemoryBarrier		colorAttachmentBarrier	= makeImageMemoryBarrier(
3330 			(VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3331 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
3332 			resultImage.getImage(), subresourceRange);
3333 
3334 		for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3335 		{
3336 			for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3337 			{
3338 				// re-init the data
3339 				const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3340 				initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3341 			}
3342 
3343 			totalIterations++;
3344 
3345 			beginCommandBuffer(vk, *cmdBuffer);
3346 
3347 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
3348 
3349 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3350 
3351 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3352 
3353 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3354 
3355 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3356 
3357 			if (stagesCount + extraDatasCount > 0)
3358 				vk.cmdBindDescriptorSets(*cmdBuffer,
3359 						VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3360 						&descriptorSet.get(), 0u, DE_NULL);
3361 
3362 			vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3363 
3364 			endRenderPass(vk, *cmdBuffer);
3365 
3366 			copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3367 
3368 			endCommandBuffer(vk, *cmdBuffer);
3369 
3370 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3371 
3372 			for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3373 			{
3374 				std::vector<const void*> datas;
3375 				if (!inputBuffers[ndx]->isImage())
3376 				{
3377 					const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
3378 					invalidateAlloc(vk, device, resultAlloc);
3379 					// we always have our result data first
3380 					datas.push_back(resultAlloc.getHostPtr());
3381 				}
3382 
3383 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3384 				{
3385 					const deUint32 datasNdx = index - stagesCount;
3386 					if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3387 					{
3388 						const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
3389 						invalidateAlloc(vk, device, resultAlloc);
3390 						// we always have our result data first
3391 						datas.push_back(resultAlloc.getHostPtr());
3392 					}
3393 				}
3394 
3395 				// Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3396 				const bool		multiCall	= (	stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT						||
3397 												stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT		||
3398 												stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT	||
3399 												stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT					);
3400 				const deUint32	usedWidth	= ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3401 
3402 				if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3403 					failedIterations++;
3404 			}
3405 			if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3406 			{
3407 				std::vector<const void*> datas;
3408 				const Allocation& resultAlloc = imageBufferResult.getAllocation();
3409 				invalidateAlloc(vk, device, resultAlloc);
3410 
3411 				// we always have our result data first
3412 				datas.push_back(resultAlloc.getHostPtr());
3413 
3414 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3415 				{
3416 					const deUint32 datasNdx = index - stagesCount;
3417 					if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3418 					{
3419 						const Allocation& alloc = inputBuffers[index]->getAllocation();
3420 						invalidateAlloc(vk, device, alloc);
3421 						// we always have our result data first
3422 						datas.push_back(alloc.getHostPtr());
3423 					}
3424 				}
3425 
3426 				if (!checkResult(internalData, datas, width, subgroupSize, false))
3427 					failedIterations++;
3428 			}
3429 
3430 			context.resetCommandPoolForVKSC(device, *cmdPool);
3431 		}
3432 
3433 		if (0 < failedIterations)
3434 		{
3435 			unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3436 
3437 			context.getTestContext().getLog()
3438 				<< TestLog::Message << valuesPassed << " / "
3439 				<< totalIterations << " values passed" << TestLog::EndMessage;
3440 
3441 			return tcu::TestStatus::fail("Failed!");
3442 		}
3443 	}
3444 
3445 	return tcu::TestStatus::pass("OK");
3446 }
3447 
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)3448 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest (Context&					context,
3449 														   vk::VkFormat				format,
3450 														   const SSBOData*			extraData,
3451 														   deUint32					extraDataCount,
3452 														   const void*				internalData,
3453 														   subgroups::CheckResult	checkResult)
3454 {
3455 	return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
3456 }
3457 
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 vertexShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3458 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize (Context&					context,
3459 																			   vk::VkFormat				format,
3460 																			   const SSBOData*			extraData,
3461 																			   deUint32					extraDataCount,
3462 																			   const void*				internalData,
3463 																			   subgroups::CheckResult	checkResult,
3464 																			   const deUint32			vertexShaderStageCreateFlags,
3465 																			   const deUint32			requiredSubgroupSize)
3466 {
3467 	const DeviceInterface&					vk						= context.getDeviceInterface();
3468 	const VkDevice							device					= context.getDevice();
3469 	const VkQueue							queue					= context.getUniversalQueue();
3470 	const deUint32							maxWidth				= getMaxWidth();
3471 	const deUint32							queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3472 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
3473 	DescriptorSetLayoutBuilder				layoutBuilder;
3474 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3475 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3476 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
3477 	const VkVertexInputBindingDescription	vertexInputBinding		=
3478 	{
3479 		0u,											// binding;
3480 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
3481 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
3482 	};
3483 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
3484 	{
3485 		0u,
3486 		0u,
3487 		VK_FORMAT_R32G32B32A32_SFLOAT,
3488 		0u
3489 	};
3490 
3491 	for (deUint32 i = 0u; i < extraDataCount; i++)
3492 	{
3493 		if (extraData[i].isImage())
3494 		{
3495 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
3496 		}
3497 		else
3498 		{
3499 			DE_ASSERT(extraData[i].isUBO());
3500 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3501 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3502 		}
3503 		const Allocation& alloc = inputBuffers[i]->getAllocation();
3504 		initializeMemory(context, alloc, extraData[i]);
3505 	}
3506 
3507 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3508 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3509 
3510 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(vk, device));
3511 
3512 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(vk, device, *descriptorSetLayout));
3513 
3514 	const deUint32 requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3515 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
3516 																						  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3517 																						  *vertexShaderModule, *fragmentShaderModule,
3518 																						  DE_NULL, DE_NULL, DE_NULL,
3519 																						  *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3520 																						  &vertexInputBinding, &vertexInputAttribute, true, format,
3521 																						  vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3522 																						  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3523 	DescriptorPoolBuilder					poolBuilder;
3524 	DescriptorSetUpdateBuilder				updateBuilder;
3525 
3526 
3527 	for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3528 		poolBuilder.addType(inputBuffers[ndx]->getType());
3529 
3530 	Move <VkDescriptorPool>					descriptorPool;
3531 	Move <VkDescriptorSet>					descriptorSet;
3532 
3533 	if (extraDataCount > 0)
3534 	{
3535 		descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3536 		descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3537 	}
3538 
3539 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3540 	{
3541 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3542 		initializeMemory(context, alloc, extraData[ndx]);
3543 	}
3544 
3545 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3546 	{
3547 		if (inputBuffers[buffersNdx]->isImage())
3548 		{
3549 			VkDescriptorImageInfo info =
3550 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3551 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3552 
3553 			updateBuilder.writeSingle(*descriptorSet,
3554 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3555 										inputBuffers[buffersNdx]->getType(), &info);
3556 		}
3557 		else
3558 		{
3559 			VkDescriptorBufferInfo info =
3560 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3561 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3562 
3563 			updateBuilder.writeSingle(*descriptorSet,
3564 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3565 										inputBuffers[buffersNdx]->getType(), &info);
3566 		}
3567 	}
3568 	updateBuilder.update(vk, device);
3569 
3570 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(vk, device, queueFamilyIndex));
3571 
3572 	const deUint32							subgroupSize			= getSubgroupSize(context);
3573 
3574 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
3575 
3576 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
3577 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3578 
3579 	unsigned								totalIterations			= 0u;
3580 	unsigned								failedIterations		= 0u;
3581 
3582 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3583 
3584 	{
3585 		const Allocation&		alloc				= vertexBuffer.getAllocation();
3586 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3587 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
3588 		float					leftHandPosition	= -1.0f;
3589 
3590 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3591 		{
3592 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3593 			leftHandPosition += pixelSize;
3594 		}
3595 
3596 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3597 		flushAlloc(vk, device, alloc);
3598 	}
3599 
3600 	const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3601 	const VkViewport			viewport			= makeViewport(maxWidth, 1u);
3602 	const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
3603 	const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3604 	Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3605 	const VkDeviceSize			vertexBufferOffset	= 0u;
3606 
3607 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3608 	{
3609 		totalIterations++;
3610 
3611 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3612 		{
3613 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3614 			initializeMemory(context, alloc, extraData[ndx]);
3615 		}
3616 
3617 		beginCommandBuffer(vk, *cmdBuffer);
3618 		{
3619 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3620 
3621 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3622 
3623 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3624 
3625 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3626 
3627 			if (extraDataCount > 0)
3628 			{
3629 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3630 					&descriptorSet.get(), 0u, DE_NULL);
3631 			}
3632 
3633 			vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3634 
3635 			vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3636 
3637 			endRenderPass(vk, *cmdBuffer);
3638 
3639 			copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3640 
3641 			endCommandBuffer(vk, *cmdBuffer);
3642 
3643 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3644 		}
3645 		context.resetCommandPoolForVKSC(device, *cmdPool);
3646 
3647 		{
3648 			const Allocation& allocResult = imageBufferResult.getAllocation();
3649 			invalidateAlloc(vk, device, allocResult);
3650 
3651 			std::vector<const void*> datas;
3652 			datas.push_back(allocResult.getHostPtr());
3653 			if (!checkResult(internalData, datas, width, subgroupSize))
3654 				failedIterations++;
3655 		}
3656 	}
3657 
3658 	if (0 < failedIterations)
3659 	{
3660 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3661 
3662 		context.getTestContext().getLog()
3663 			<< TestLog::Message << valuesPassed << " / "
3664 			<< totalIterations << " values passed" << TestLog::EndMessage;
3665 
3666 		return tcu::TestStatus::fail("Failed!");
3667 	}
3668 
3669 	return tcu::TestStatus::pass("OK");
3670 }
3671 
makeFragmentFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult)3672 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest (Context&				context,
3673 															 VkFormat				format,
3674 															 const SSBOData*		extraDatas,
3675 															 deUint32				extraDatasCount,
3676 															 const void*			internalData,
3677 															 CheckResultFragment	checkResult)
3678 {
3679 	return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData, checkResult, 0u, 0u);
3680 }
3681 
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3682 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize (Context&				context,
3683 																				 VkFormat				format,
3684 																				 const SSBOData*		extraDatas,
3685 																				 deUint32				extraDatasCount,
3686 																				 const void*			internalData,
3687 																				 CheckResultFragment	checkResult,
3688 																				 const deUint32			fragmentShaderStageCreateFlags,
3689 																				 const deUint32			requiredSubgroupSize)
3690 {
3691 	const DeviceInterface&						vk						= context.getDeviceInterface();
3692 	const VkDevice								device					= context.getDevice();
3693 	const VkQueue								queue					= context.getUniversalQueue();
3694 	const deUint32								queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
3695 	const Unique<VkShaderModule>				vertexShaderModule		(createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3696 	const Unique<VkShaderModule>				fragmentShaderModule	(createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3697 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers			(extraDatasCount);
3698 
3699 	for (deUint32 i = 0; i < extraDatasCount; i++)
3700 	{
3701 		if (extraDatas[i].isImage())
3702 		{
3703 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3704 		}
3705 		else
3706 		{
3707 			DE_ASSERT(extraDatas[i].isUBO());
3708 
3709 			const vk::VkDeviceSize	size	= getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3710 
3711 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3712 		}
3713 
3714 		const Allocation& alloc = inputBuffers[i]->getAllocation();
3715 
3716 		initializeMemory(context, alloc, extraDatas[i]);
3717 	}
3718 
3719 	DescriptorSetLayoutBuilder layoutBuilder;
3720 
3721 	for (deUint32 i = 0; i < extraDatasCount; i++)
3722 	{
3723 		layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3724 	}
3725 
3726 	const Unique<VkDescriptorSetLayout>	descriptorSetLayout(layoutBuilder.build(vk, device));
3727 	const Unique<VkPipelineLayout>		pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3728 	const Unique<VkRenderPass>			renderPass(makeRenderPass(context, format));
3729 	const deUint32						requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3730 	const Unique<VkPipeline>			pipeline(makeGraphicsPipeline(context,
3731 																	  *pipelineLayout,
3732 																	  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3733 																	  *vertexShaderModule,
3734 																	  *fragmentShaderModule,
3735 																	  DE_NULL,
3736 																	  DE_NULL,
3737 																	  DE_NULL,
3738 																	  *renderPass,
3739 																	  VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3740 																	  DE_NULL,
3741 																	  DE_NULL,
3742 																	  true,
3743 																	  VK_FORMAT_R32G32B32A32_SFLOAT,
3744 																	  0u,
3745 																	  0u,
3746 																	  0u,
3747 																	  0u,
3748 																	  fragmentShaderStageCreateFlags,
3749 																	  requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3750 	DescriptorPoolBuilder				poolBuilder;
3751 
3752 	// To stop validation complaining, always add at least one type to pool.
3753 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3754 	for (deUint32 i = 0; i < extraDatasCount; i++)
3755 	{
3756 		poolBuilder.addType(inputBuffers[i]->getType());
3757 	}
3758 
3759 	Move<VkDescriptorPool> descriptorPool;
3760 	// Create descriptor set
3761 	Move<VkDescriptorSet> descriptorSet;
3762 
3763 	if (extraDatasCount > 0)
3764 	{
3765 		descriptorPool	= poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3766 
3767 		descriptorSet	= makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3768 	}
3769 
3770 	DescriptorSetUpdateBuilder updateBuilder;
3771 
3772 	for (deUint32 i = 0; i < extraDatasCount; i++)
3773 	{
3774 		if (inputBuffers[i]->isImage())
3775 		{
3776 			const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3777 
3778 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3779 		}
3780 		else
3781 		{
3782 			const VkDescriptorBufferInfo	info	= makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3783 
3784 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3785 		}
3786 	}
3787 
3788 	if (extraDatasCount > 0)
3789 		updateBuilder.update(vk, device);
3790 
3791 	const Unique<VkCommandPool>		cmdPool				(makeCommandPool(vk, device, queueFamilyIndex));
3792 	const deUint32					subgroupSize		= getSubgroupSize(context);
3793 	const Unique<VkCommandBuffer>	cmdBuffer			(makeCommandBuffer(context, *cmdPool));
3794 	unsigned						totalIterations		= 0;
3795 	unsigned						failedIterations	= 0;
3796 
3797 	for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3798 	{
3799 		for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3800 		{
3801 			totalIterations++;
3802 
3803 			// re-init the data
3804 			for (deUint32 i = 0; i < extraDatasCount; i++)
3805 			{
3806 				const Allocation& alloc = inputBuffers[i]->getAllocation();
3807 
3808 				initializeMemory(context, alloc, extraDatas[i]);
3809 			}
3810 
3811 			const VkDeviceSize			formatSize				= getFormatSizeInBytes(format);
3812 			const VkDeviceSize			resultImageSizeInBytes	= width * height * formatSize;
3813 			Image						resultImage				(context, width, height, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3814 			Buffer						resultBuffer			(context, resultImageSizeInBytes, VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3815 			const Unique<VkFramebuffer>	framebuffer				(makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3816 			VkViewport					viewport				= makeViewport(width, height);
3817 			VkRect2D					scissor					= {{0, 0}, {width, height}};
3818 
3819 			beginCommandBuffer(vk, *cmdBuffer);
3820 
3821 			vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3822 
3823 			vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3824 
3825 			beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3826 
3827 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3828 
3829 			if (extraDatasCount > 0)
3830 				vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
3831 
3832 			vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3833 
3834 			endRenderPass(vk, *cmdBuffer);
3835 
3836 			copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3837 
3838 			endCommandBuffer(vk, *cmdBuffer);
3839 
3840 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3841 
3842 			std::vector<const void*> datas;
3843 			{
3844 				const Allocation& resultAlloc = resultBuffer.getAllocation();
3845 				invalidateAlloc(vk, device, resultAlloc);
3846 
3847 				// we always have our result data first
3848 				datas.push_back(resultAlloc.getHostPtr());
3849 			}
3850 
3851 			if (!checkResult(internalData, datas, width, height, subgroupSize))
3852 			{
3853 				failedIterations++;
3854 			}
3855 
3856 			context.resetCommandPoolForVKSC(device, *cmdPool);
3857 		}
3858 	}
3859 
3860 	if (0 < failedIterations)
3861 	{
3862 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3863 
3864 		context.getTestContext().getLog()
3865 			<< TestLog::Message << valuesPassed << " / "
3866 			<< totalIterations << " values passed" << TestLog::EndMessage;
3867 
3868 		return tcu::TestStatus::fail("Failed!");
3869 	}
3870 
3871 	return tcu::TestStatus::pass("OK");
3872 }
3873 
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize)3874 Move<VkPipeline> makeComputePipeline (Context&					context,
3875 									  const VkPipelineLayout	pipelineLayout,
3876 									  const VkShaderModule		shaderModule,
3877 									  const deUint32			pipelineShaderStageFlags,
3878 									  const deUint32			pipelineCreateFlags,
3879 									  VkPipeline				basePipelineHandle,
3880 									  deUint32					localSizeX,
3881 									  deUint32					localSizeY,
3882 									  deUint32					localSizeZ,
3883 									  deUint32					requiredSubgroupSize)
3884 {
3885 	const deUint32														localSize[3]				= {localSizeX, localSizeY, localSizeZ};
3886 	const vk::VkSpecializationMapEntry									entries[3]					=
3887 	{
3888 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
3889 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
3890 		{2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3891 	};
3892 	const vk::VkSpecializationInfo										info						=
3893 	{
3894 		/* mapEntryCount = */ 3,
3895 		/* pMapEntries   = */ entries,
3896 		/* dataSize      = */ sizeof(localSize),
3897 		/* pData         = */ localSize
3898 	};
3899 	const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT	subgroupSizeCreateInfo		=
3900 	{
3901 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,	// VkStructureType    sType;
3902 		DE_NULL,																		// void*              pNext;
3903 		requiredSubgroupSize															// uint32_t           requiredSubgroupSize;
3904 	};
3905 	const vk::VkPipelineShaderStageCreateInfo							pipelineShaderStageParams	=
3906 	{
3907 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,				// VkStructureType					sType;
3908 		(requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL),	// const void*						pNext;
3909 		pipelineShaderStageFlags,											// VkPipelineShaderStageCreateFlags	flags;
3910 		VK_SHADER_STAGE_COMPUTE_BIT,										// VkShaderStageFlagBits			stage;
3911 		shaderModule,														// VkShaderModule					module;
3912 		"main",																// const char*						pName;
3913 		&info,																// const VkSpecializationInfo*		pSpecializationInfo;
3914 	};
3915 	const vk::VkComputePipelineCreateInfo								pipelineCreateInfo			=
3916 	{
3917 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType	sType;
3918 		DE_NULL,										// const void*						pNext;
3919 		pipelineCreateFlags,							// VkPipelineCreateFlags			flags;
3920 		pipelineShaderStageParams,						// VkPipelineShaderStageCreateInfo	stage;
3921 		pipelineLayout,									// VkPipelineLayout					layout;
3922 		basePipelineHandle,								// VkPipeline						basePipelineHandle;
3923 		-1,												// deInt32							basePipelineIndex;
3924 	};
3925 
3926 	return createComputePipeline(context.getDeviceInterface(), context.getDevice(), DE_NULL, &pipelineCreateInfo);
3927 }
3928 
3929 #ifndef CTS_USES_VULKANSC
makeMeshPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule taskModule,const VkShaderModule meshModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize,const VkRenderPass renderPass)3930 Move<VkPipeline> makeMeshPipeline (Context&					context,
3931 								   const VkPipelineLayout	pipelineLayout,
3932 								   const VkShaderModule		taskModule,
3933 								   const VkShaderModule		meshModule,
3934 								   const deUint32			pipelineShaderStageFlags,
3935 								   const deUint32			pipelineCreateFlags,
3936 								   VkPipeline				basePipelineHandle,
3937 								   deUint32					localSizeX,
3938 								   deUint32					localSizeY,
3939 								   deUint32					localSizeZ,
3940 								   deUint32					requiredSubgroupSize,
3941 								   const VkRenderPass		renderPass)
3942 {
3943 	const deUint32														localSize[3]				= {localSizeX, localSizeY, localSizeZ};
3944 	const vk::VkSpecializationMapEntry									entries[3]					=
3945 	{
3946 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
3947 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
3948 		{2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3949 	};
3950 	const vk::VkSpecializationInfo										info						=
3951 	{
3952 		/* mapEntryCount = */ 3,
3953 		/* pMapEntries   = */ entries,
3954 		/* dataSize      = */ sizeof(localSize),
3955 		/* pData         = */ localSize
3956 	};
3957 	const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT	subgroupSizeCreateInfo		=
3958 	{
3959 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,	// VkStructureType    sType;
3960 		DE_NULL,																		// void*              pNext;
3961 		requiredSubgroupSize															// uint32_t           requiredSubgroupSize;
3962 	};
3963 
3964 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*		pSubgroupSizeCreateInfo		= ((requiredSubgroupSize != 0u) ? &subgroupSizeCreateInfo : nullptr);
3965 
3966 	std::vector<VkPipelineShaderStageCreateInfo>						shaderStageParams;
3967 	vk::VkPipelineShaderStageCreateInfo									pipelineShaderStageParams	=
3968 	{
3969 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType					sType;
3970 		nullptr,												// const void*						pNext;
3971 		pipelineShaderStageFlags,								// VkPipelineShaderStageCreateFlags	flags;
3972 		VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM,						// VkShaderStageFlagBits			stage;
3973 		DE_NULL,												// VkShaderModule					module;
3974 		"main",													// const char*						pName;
3975 		&info,													// const VkSpecializationInfo*		pSpecializationInfo;
3976 	};
3977 
3978 	if (taskModule != DE_NULL)
3979 	{
3980 		pipelineShaderStageParams.module	= taskModule;
3981 		pipelineShaderStageParams.pNext		= pSubgroupSizeCreateInfo;
3982 		pipelineShaderStageParams.stage		= VK_SHADER_STAGE_TASK_BIT_EXT;
3983 		shaderStageParams.push_back(pipelineShaderStageParams);
3984 	}
3985 
3986 	if (meshModule != DE_NULL)
3987 	{
3988 		pipelineShaderStageParams.module	= meshModule;
3989 		pipelineShaderStageParams.pNext		= ((taskModule == DE_NULL) ? pSubgroupSizeCreateInfo : nullptr);
3990 		pipelineShaderStageParams.stage		= VK_SHADER_STAGE_MESH_BIT_EXT;
3991 		shaderStageParams.push_back(pipelineShaderStageParams);
3992 	}
3993 
3994 	const std::vector<VkViewport>	viewports	(1u, makeViewport(1u, 1u));
3995 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(1u, 1u));
3996 
3997 	return makeGraphicsPipeline(context.getDeviceInterface(), context.getDevice(), basePipelineHandle, pipelineLayout, pipelineCreateFlags, shaderStageParams, renderPass, viewports, scissors);
3998 }
3999 #endif // CTS_USES_VULKANSC
4000 
makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,deUint32 inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4001 tcu::TestStatus makeComputeOrMeshTestRequiredSubgroupSize (ComputeLike							testType,
4002 														   Context&								context,
4003 														   VkFormat								format,
4004 														   const vkt::subgroups::SSBOData*		inputs,
4005 														   deUint32								inputsCount,
4006 														   const void*							internalData,
4007 														   vkt::subgroups::CheckResultCompute	checkResult,
4008 														   const deUint32						pipelineShaderStageCreateFlags,
4009 														   const deUint32						numWorkgroups[3],
4010 														   const deBool							isRequiredSubgroupSize,
4011 														   const deUint32						subgroupSize,
4012 														   const deUint32						localSizesToTest[][3],
4013 														   const deUint32						localSizesToTestCount)
4014 {
4015 	const DeviceInterface&									vk								= context.getDeviceInterface();
4016 	const VkDevice											device							= context.getDevice();
4017 	const VkQueue											queue							= context.getUniversalQueue();
4018 	const deUint32											queueFamilyIndex				= context.getUniversalQueueFamilyIndex();
4019 #ifndef CTS_USES_VULKANSC
4020 	const VkPhysicalDeviceSubgroupSizeControlProperties&	subgroupSizeControlProperties	= context.getSubgroupSizeControlProperties();
4021 #else
4022 	const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&	subgroupSizeControlProperties	= context.getSubgroupSizeControlPropertiesEXT();
4023 #endif // CTS_USES_VULKANSC
4024 	const VkDeviceSize										elementSize						= getFormatSizeInBytes(format);
4025 	const VkDeviceSize										maxSubgroupSize					= isRequiredSubgroupSize
4026 																							? deMax32(subgroupSizeControlProperties.maxSubgroupSize, vkt::subgroups::maxSupportedSubgroupSize())
4027 																							: vkt::subgroups::maxSupportedSubgroupSize();
4028 	const VkDeviceSize										resultBufferSize				= maxSubgroupSize * maxSubgroupSize * maxSubgroupSize;
4029 	const VkDeviceSize										resultBufferSizeInBytes			= resultBufferSize * elementSize;
4030 	Buffer													resultBuffer					(context, resultBufferSizeInBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4031 	std::vector< de::SharedPtr<BufferOrImage> >				inputBuffers					(inputsCount);
4032 	const auto												shaderStageFlags				= ((testType == ComputeLike::COMPUTE)
4033 																								? VK_SHADER_STAGE_COMPUTE_BIT
4034 #ifndef CTS_USES_VULKANSC
4035 																								: (VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT));
4036 #else
4037 																								: 0);
4038 #endif // CTS_USES_VULKANSC
4039 	const auto												pipelineBindPoint				= ((testType == ComputeLike::COMPUTE)
4040 																								? VK_PIPELINE_BIND_POINT_COMPUTE
4041 																								: VK_PIPELINE_BIND_POINT_GRAPHICS);
4042 	const auto												pipelineStage					= ((testType == ComputeLike::COMPUTE)
4043 																								? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
4044 #ifndef CTS_USES_VULKANSC
4045 																								: (VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT));
4046 #else
4047 																								: 0);
4048 #endif // CTS_USES_VULKANSC
4049 	const auto												renderArea						= makeRect2D(1u, 1u);
4050 
4051 	std::vector<tcu::UVec3>									usedLocalSizes;
4052 	for (deUint32 i = 0; i < localSizesToTestCount; ++i)
4053 	{
4054 		usedLocalSizes.push_back(tcu::UVec3(localSizesToTest[i][0], localSizesToTest[i][1], localSizesToTest[i][2]));
4055 	}
4056 
4057 	for (deUint32 i = 0; i < inputsCount; i++)
4058 	{
4059 		if (inputs[i].isImage())
4060 		{
4061 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
4062 		}
4063 		else
4064 		{
4065 			const auto usage	= (inputs[i].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4066 			const auto size		= getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4067 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4068 		}
4069 
4070 		const Allocation& alloc = inputBuffers[i]->getAllocation();
4071 
4072 		initializeMemory(context, alloc, inputs[i]);
4073 	}
4074 
4075 	DescriptorSetLayoutBuilder layoutBuilder;
4076 	layoutBuilder.addBinding(
4077 		resultBuffer.getType(), 1, shaderStageFlags, DE_NULL);
4078 
4079 	for (deUint32 i = 0; i < inputsCount; i++)
4080 	{
4081 		layoutBuilder.addBinding(
4082 			inputBuffers[i]->getType(), 1, shaderStageFlags, DE_NULL);
4083 	}
4084 
4085 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
4086 		layoutBuilder.build(vk, device));
4087 
4088 	Move<VkShaderModule>	compShader;
4089 	Move<VkShaderModule>	meshShader;
4090 	Move<VkShaderModule>	taskShader;
4091 	const auto&				binaries	= context.getBinaryCollection();
4092 
4093 	if (testType == ComputeLike::COMPUTE)
4094 	{
4095 		compShader = createShaderModule(vk, device, binaries.get("comp"));
4096 	}
4097 	else if (testType == ComputeLike::MESH)
4098 	{
4099 		meshShader = createShaderModule(vk, device, binaries.get("mesh"));
4100 		if (binaries.contains("task"))
4101 			taskShader = createShaderModule(vk, device, binaries.get("task"));
4102 	}
4103 	else
4104 	{
4105 		DE_ASSERT(false);
4106 	}
4107 
4108 	const Unique<VkPipelineLayout> pipelineLayout(
4109 		makePipelineLayout(vk, device, *descriptorSetLayout));
4110 
4111 	DescriptorPoolBuilder poolBuilder;
4112 
4113 	poolBuilder.addType(resultBuffer.getType());
4114 
4115 	for (deUint32 i = 0; i < inputsCount; i++)
4116 	{
4117 		poolBuilder.addType(inputBuffers[i]->getType());
4118 	}
4119 
4120 	const Unique<VkDescriptorPool>	descriptorPool			(poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
4121 	const Unique<VkDescriptorSet>	descriptorSet			(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
4122 	const VkDescriptorBufferInfo	resultDescriptorInfo =	makeDescriptorBufferInfo(resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
4123 	DescriptorSetUpdateBuilder		updateBuilder;
4124 
4125 	updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
4126 
4127 	for (deUint32 i = 0; i < inputsCount; i++)
4128 	{
4129 		if (inputBuffers[i]->isImage())
4130 		{
4131 			const VkDescriptorImageInfo		info	= makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4132 
4133 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
4134 		}
4135 		else
4136 		{
4137 			vk::VkDeviceSize		size	= getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4138 			VkDescriptorBufferInfo	info	= makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
4139 
4140 			updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
4141 		}
4142 	}
4143 
4144 	updateBuilder.update(vk, device);
4145 
4146 	const Unique<VkCommandPool>						cmdPool				(makeCommandPool(vk, device, queueFamilyIndex));
4147 	unsigned										totalIterations		= 0;
4148 	unsigned										failedIterations	= 0;
4149 	const Unique<VkCommandBuffer>					cmdBuffer			(makeCommandBuffer(context, *cmdPool));
4150 	std::vector<de::SharedPtr<Move<VkPipeline>>>	pipelines			(localSizesToTestCount);
4151 	const auto										reqSubgroupSize		= (isRequiredSubgroupSize ? subgroupSize : 0u);
4152 	Move<VkRenderPass>								renderPass;
4153 	Move<VkFramebuffer>								framebuffer;
4154 
4155 	if (testType == ComputeLike::MESH)
4156 	{
4157 		renderPass	= makeRenderPass(vk, device);
4158 		framebuffer	= makeFramebuffer(vk, device, renderPass.get(), 0u, nullptr, renderArea.extent.width, renderArea.extent.height);
4159 	}
4160 
4161 	context.getTestContext().touchWatchdog();
4162 	{
4163 		if (testType == ComputeLike::COMPUTE)
4164 		{
4165 			pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
4166 																									*pipelineLayout,
4167 																									*compShader,
4168 																									pipelineShaderStageCreateFlags,
4169 #ifndef CTS_USES_VULKANSC
4170 																									VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4171 #else
4172 																									0u,
4173 #endif // CTS_USES_VULKANSC
4174 																									(VkPipeline) DE_NULL,
4175 																									usedLocalSizes[0][0],
4176 																									usedLocalSizes[0][1],
4177 																									usedLocalSizes[0][2],
4178 																									reqSubgroupSize)));
4179 		}
4180 #ifndef CTS_USES_VULKANSC
4181 		else if (testType == ComputeLike::MESH)
4182 		{
4183 			pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(context,
4184 																								 pipelineLayout.get(),
4185 																								 taskShader.get(),
4186 																								 meshShader.get(),
4187 																								 pipelineShaderStageCreateFlags,
4188 																								 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4189 																								 DE_NULL,
4190 																								 usedLocalSizes[0][0],
4191 																								 usedLocalSizes[0][1],
4192 																								 usedLocalSizes[0][2],
4193 																								 reqSubgroupSize,
4194 																								 renderPass.get())));
4195 		}
4196 #endif // CTS_USES_VULKANSC
4197 		else
4198 		{
4199 			DE_ASSERT(false);
4200 		}
4201 	}
4202 	context.getTestContext().touchWatchdog();
4203 
4204 	for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
4205 	{
4206 		const deUint32 nextX = usedLocalSizes[index][0];
4207 		const deUint32 nextY = usedLocalSizes[index][1];
4208 		const deUint32 nextZ = usedLocalSizes[index][2];
4209 
4210 		context.getTestContext().touchWatchdog();
4211 		{
4212 			if (testType == ComputeLike::COMPUTE)
4213 			{
4214 				pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
4215 																											*pipelineLayout,
4216 																											*compShader,
4217 																											pipelineShaderStageCreateFlags,
4218 #ifndef CTS_USES_VULKANSC
4219 																											VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4220 #else
4221 																											0u,
4222 #endif // CTS_USES_VULKANSC
4223 																											**pipelines[0],
4224 																											nextX,
4225 																											nextY,
4226 																											nextZ,
4227 																											reqSubgroupSize)));
4228 			}
4229 #ifndef CTS_USES_VULKANSC
4230 			else if (testType == ComputeLike::MESH)
4231 			{
4232 				pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(context,
4233 																										 pipelineLayout.get(),
4234 																										 taskShader.get(),
4235 																										 meshShader.get(),
4236 																										 pipelineShaderStageCreateFlags,
4237 																										 VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4238 																										 pipelines[0].get()->get(),
4239 																										 nextX,
4240 																										 nextY,
4241 																										 nextZ,
4242 																										 reqSubgroupSize,
4243 																										 renderPass.get())));
4244 			}
4245 #endif // CTS_USES_VULKANSC
4246 			else
4247 			{
4248 				DE_ASSERT(false);
4249 			}
4250 		}
4251 		context.getTestContext().touchWatchdog();
4252 	}
4253 
4254 	for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
4255 	{
4256 		// we are running one test
4257 		totalIterations++;
4258 
4259 		beginCommandBuffer(vk, *cmdBuffer);
4260 		{
4261 			if (testType == ComputeLike::MESH)
4262 				beginRenderPass(vk, *cmdBuffer, renderPass.get(), framebuffer.get(), renderArea);
4263 
4264 			vk.cmdBindPipeline(*cmdBuffer, pipelineBindPoint, **pipelines[index]);
4265 
4266 			vk.cmdBindDescriptorSets(*cmdBuffer, pipelineBindPoint, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
4267 
4268 			if (testType == ComputeLike::COMPUTE)
4269 				vk.cmdDispatch(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4270 #ifndef CTS_USES_VULKANSC
4271 			else if (testType == ComputeLike::MESH)
4272 				vk.cmdDrawMeshTasksEXT(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4273 				//vk.cmdDrawMeshTasksNV(*cmdBuffer, numWorkgroups[0], 0);
4274 #endif // CTS_USES_VULKANSC
4275 			else
4276 				DE_ASSERT(false);
4277 
4278 			if (testType == ComputeLike::MESH)
4279 				endRenderPass(vk, *cmdBuffer);
4280 		}
4281 
4282 		// Make shader writes available.
4283 		const auto postShaderBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4284 		vk.cmdPipelineBarrier(*cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postShaderBarrier, 0u, nullptr, 0u, nullptr);
4285 
4286 		endCommandBuffer(vk, *cmdBuffer);
4287 
4288 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4289 
4290 		std::vector<const void*> datas;
4291 
4292 		{
4293 			const Allocation& resultAlloc = resultBuffer.getAllocation();
4294 			invalidateAlloc(vk, device, resultAlloc);
4295 
4296 			// we always have our result data first
4297 			datas.push_back(resultAlloc.getHostPtr());
4298 		}
4299 
4300 		for (deUint32 i = 0; i < inputsCount; i++)
4301 		{
4302 			if (!inputBuffers[i]->isImage())
4303 			{
4304 				const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
4305 				invalidateAlloc(vk, device, resultAlloc);
4306 
4307 				// we always have our result data first
4308 				datas.push_back(resultAlloc.getHostPtr());
4309 			}
4310 		}
4311 
4312 		if (!checkResult(internalData, datas, numWorkgroups, usedLocalSizes[index].getPtr(), subgroupSize))
4313 		{
4314 			failedIterations++;
4315 		}
4316 		else
4317 		{
4318 			failedIterations = failedIterations + 0;
4319 		}
4320 
4321 		context.resetCommandPoolForVKSC(device, *cmdPool);
4322 	}
4323 
4324 	if (0 < failedIterations)
4325 	{
4326 		unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4327 
4328 		context.getTestContext().getLog()
4329 			<< TestLog::Message << valuesPassed << " / "
4330 			<< totalIterations << " values passed" << TestLog::EndMessage;
4331 
4332 		return tcu::TestStatus::fail("Failed!");
4333 	}
4334 
4335 	return tcu::TestStatus::pass("OK");
4336 }
4337 
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4338 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize (Context&			context,
4339 																	 VkFormat			format,
4340 																	 const SSBOData*	inputs,
4341 																	 deUint32			inputsCount,
4342 																	 const void*		internalData,
4343 																	 CheckResultCompute	checkResult,
4344 																	 const deUint32		pipelineShaderStageCreateFlags,
4345 																	 const deUint32		numWorkgroups[3],
4346 																	 const deBool		isRequiredSubgroupSize,
4347 																	 const deUint32		subgroupSize,
4348 																	 const deUint32		localSizesToTest[][3],
4349 																	 const deUint32		localSizesToTestCount)
4350 {
4351 	return makeComputeOrMeshTestRequiredSubgroupSize(
4352 		ComputeLike::COMPUTE,
4353 		context,
4354 		format,
4355 		inputs,
4356 		inputsCount,
4357 		internalData,
4358 		checkResult,
4359 		pipelineShaderStageCreateFlags,
4360 		numWorkgroups,
4361 		isRequiredSubgroupSize,
4362 		subgroupSize,
4363 		localSizesToTest,
4364 		localSizesToTestCount);
4365 }
4366 
makeMeshTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4367 tcu::TestStatus vkt::subgroups::makeMeshTestRequiredSubgroupSize (Context&				context,
4368 																  VkFormat				format,
4369 																  const SSBOData*		inputs,
4370 																  deUint32				inputsCount,
4371 																  const void*			internalData,
4372 																  CheckResultCompute	checkResult,
4373 																  const deUint32		pipelineShaderStageCreateFlags,
4374 																  const deUint32		numWorkgroups[3],
4375 																  const deBool			isRequiredSubgroupSize,
4376 																  const deUint32		subgroupSize,
4377 																  const deUint32		localSizesToTest[][3],
4378 																  const deUint32		localSizesToTestCount)
4379 {
4380 	return makeComputeOrMeshTestRequiredSubgroupSize(
4381 		ComputeLike::MESH,
4382 		context,
4383 		format,
4384 		inputs,
4385 		inputsCount,
4386 		internalData,
4387 		checkResult,
4388 		pipelineShaderStageCreateFlags,
4389 		numWorkgroups,
4390 		isRequiredSubgroupSize,
4391 		subgroupSize,
4392 		localSizesToTest,
4393 		localSizesToTestCount);
4394 }
4395 
makeComputeOrMeshTest(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,deUint32 inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4396 tcu::TestStatus makeComputeOrMeshTest (ComputeLike							testType,
4397 									   Context&								context,
4398 									   VkFormat								format,
4399 									   const vkt::subgroups::SSBOData*		inputs,
4400 									   deUint32								inputsCount,
4401 									   const void*							internalData,
4402 									   vkt::subgroups::CheckResultCompute	checkResult,
4403 									   deUint32								requiredSubgroupSize,
4404 									   const deUint32						pipelineShaderStageCreateFlags)
4405 {
4406 	const uint32_t	numWorkgroups[3]		= {4, 2, 2};
4407 	const bool		isRequiredSubgroupSize	= (requiredSubgroupSize != 0u);
4408 	const uint32_t	subgroupSize			= (isRequiredSubgroupSize ? requiredSubgroupSize : vkt::subgroups::getSubgroupSize(context));
4409 
4410 	const deUint32 localSizesToTestCount = 8;
4411 	deUint32 localSizesToTest[localSizesToTestCount][3] =
4412 	{
4413 		{1, 1, 1},
4414 		{subgroupSize, 1, 1},
4415 		{1, subgroupSize, 1},
4416 		{1, 1, subgroupSize},
4417 		{32, 4, 1},
4418 		{1, 4, 32},
4419 		{3, 5, 7},
4420 		{1, 1, 1} // Isn't used, just here to make double buffering checks easier
4421 	};
4422 
4423 	if (testType == ComputeLike::COMPUTE)
4424 		return makeComputeTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4425 												   numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4426 	else
4427 		return makeMeshTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4428 												numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4429 }
4430 
makeComputeTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4431 tcu::TestStatus vkt::subgroups::makeComputeTest (Context&				context,
4432 												 VkFormat				format,
4433 												 const SSBOData*		inputs,
4434 												 deUint32				inputsCount,
4435 												 const void*			internalData,
4436 												 CheckResultCompute		checkResult,
4437 												 deUint32				requiredSubgroupSize,
4438 												 const deUint32			pipelineShaderStageCreateFlags)
4439 {
4440 	return makeComputeOrMeshTest(ComputeLike::COMPUTE, context, format, inputs, inputsCount, internalData, checkResult, requiredSubgroupSize, pipelineShaderStageCreateFlags);
4441 }
4442 
makeMeshTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4443 tcu::TestStatus vkt::subgroups::makeMeshTest (Context&				context,
4444 											  VkFormat				format,
4445 											  const SSBOData*		inputs,
4446 											  deUint32				inputsCount,
4447 											  const void*			internalData,
4448 											  CheckResultCompute	checkResult,
4449 											  deUint32				requiredSubgroupSize,
4450 											  const deUint32		pipelineShaderStageCreateFlags)
4451 {
4452 	return makeComputeOrMeshTest(ComputeLike::MESH, context, format, inputs, inputsCount, internalData, checkResult, requiredSubgroupSize, pipelineShaderStageCreateFlags);
4453 }
4454 
checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)4455 static inline void checkShaderStageSetValidity (const VkShaderStageFlags shaderStages)
4456 {
4457 	if (shaderStages == 0)
4458 		TCU_THROW(InternalError, "Shader stage is not specified");
4459 
4460 	// It can actually be only 1 or 0.
4461 	const deUint32 exclusivePipelinesCount	= (isAllComputeStages(shaderStages) ? 1 : 0)
4462 											+ (isAllGraphicsStages(shaderStages) ? 1 : 0)
4463 #ifndef CTS_USES_VULKANSC
4464 											+ (isAllRayTracingStages(shaderStages) ? 1 : 0)
4465 											+ (isAllMeshShadingStages(shaderStages) ? 1 : 0)
4466 #endif // CTS_USES_VULKANSC
4467 											;
4468 
4469 	if (exclusivePipelinesCount != 1)
4470 		TCU_THROW(InternalError, "Mix of shaders from different pipelines is detected");
4471 }
4472 
supportedCheckShader(Context & context,const VkShaderStageFlags shaderStages)4473 void vkt::subgroups::supportedCheckShader (Context& context, const VkShaderStageFlags shaderStages)
4474 {
4475 	checkShaderStageSetValidity(shaderStages);
4476 
4477 	if ((shaderStages & VK_SHADER_STAGE_GEOMETRY_BIT) != 0)
4478 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_GEOMETRY_SHADER);
4479 
4480 	if ((context.getSubgroupProperties().supportedStages & shaderStages) == 0)
4481 	{
4482 		if (isAllComputeStages(shaderStages))
4483 			TCU_FAIL("Compute shader is required to support subgroup operations");
4484 		else
4485 			TCU_THROW(NotSupportedError, "Subgroup support is not available for test shader stage(s)");
4486 	}
4487 
4488 #ifndef CTS_USES_VULKANSC
4489 	if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & shaderStages &&
4490 		context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4491 		!context.getPortabilitySubsetFeatures().tessellationIsolines)
4492 	{
4493 		TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4494 	}
4495 #endif // CTS_USES_VULKANSC
4496 }
4497 
4498 
4499 namespace vkt
4500 {
4501 namespace subgroups
4502 {
4503 typedef std::vector< de::SharedPtr<BufferOrImage> > vectorBufferOrImage;
4504 
4505 enum ShaderGroups
4506 {
4507 	FIRST_GROUP		= 0,
4508 	RAYGEN_GROUP	= FIRST_GROUP,
4509 	MISS_GROUP,
4510 	HIT_GROUP,
4511 	CALL_GROUP,
4512 	GROUP_COUNT
4513 };
4514 
getAllRayTracingFormats()4515 const std::vector<vk::VkFormat> getAllRayTracingFormats()
4516 {
4517 	std::vector<VkFormat> formats;
4518 
4519 	formats.push_back(VK_FORMAT_R8G8B8_SINT);
4520 	formats.push_back(VK_FORMAT_R8_UINT);
4521 	formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
4522 	formats.push_back(VK_FORMAT_R16G16B16_SINT);
4523 	formats.push_back(VK_FORMAT_R16_UINT);
4524 	formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
4525 	formats.push_back(VK_FORMAT_R32G32B32_SINT);
4526 	formats.push_back(VK_FORMAT_R32_UINT);
4527 	formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
4528 	formats.push_back(VK_FORMAT_R64G64B64_SINT);
4529 	formats.push_back(VK_FORMAT_R64_UINT);
4530 	formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
4531 	formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
4532 	formats.push_back(VK_FORMAT_R32_SFLOAT);
4533 	formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
4534 	formats.push_back(VK_FORMAT_R64_SFLOAT);
4535 	formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
4536 	formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
4537 	formats.push_back(VK_FORMAT_R8_USCALED);
4538 	formats.push_back(VK_FORMAT_R8G8_USCALED);
4539 	formats.push_back(VK_FORMAT_R8G8B8_USCALED);
4540 	formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
4541 
4542 	return formats;
4543 }
4544 
addRayTracingNoSubgroupShader(SourceCollections & programCollection)4545 void addRayTracingNoSubgroupShader (SourceCollections& programCollection)
4546 {
4547 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
4548 
4549 	const std::string rgenShaderNoSubgroups =
4550 		"#version 460 core\n"
4551 		"#extension GL_EXT_ray_tracing: require\n"
4552 		"layout(location = 0) rayPayloadEXT uvec4 payload;\n"
4553 		"layout(location = 0) callableDataEXT uvec4 callData;"
4554 		"layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
4555 		"\n"
4556 		"void main()\n"
4557 		"{\n"
4558 		"  uint  rayFlags   = 0;\n"
4559 		"  uint  cullMask   = 0xFF;\n"
4560 		"  float tmin       = 0.0;\n"
4561 		"  float tmax       = 9.0;\n"
4562 		"  vec3  origin     = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
4563 		"  vec3  directHit  = vec3(0.0, 0.0, -1.0);\n"
4564 		"  vec3  directMiss = vec3(0.0, 0.0, +1.0);\n"
4565 		"\n"
4566 		"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
4567 		"  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
4568 		"  executeCallableEXT(0, 0);"
4569 		"}\n";
4570 	const std::string hitShaderNoSubgroups =
4571 		"#version 460 core\n"
4572 		"#extension GL_EXT_ray_tracing: require\n"
4573 		"hitAttributeEXT vec3 attribs;\n"
4574 		"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4575 		"\n"
4576 		"void main()\n"
4577 		"{\n"
4578 		"}\n";
4579 	const std::string missShaderNoSubgroups =
4580 		"#version 460 core\n"
4581 		"#extension GL_EXT_ray_tracing: require\n"
4582 		"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4583 		"\n"
4584 		"void main()\n"
4585 		"{\n"
4586 		"}\n";
4587 	const std::string sectShaderNoSubgroups =
4588 		"#version 460 core\n"
4589 		"#extension GL_EXT_ray_tracing: require\n"
4590 		"hitAttributeEXT vec3 hitAttribute;\n"
4591 		"\n"
4592 		"void main()\n"
4593 		"{\n"
4594 		"  reportIntersectionEXT(0.75f, 0x7Eu);\n"
4595 		"}\n";
4596 	const std::string callShaderNoSubgroups =
4597 		"#version 460 core\n"
4598 		"#extension GL_EXT_ray_tracing: require\n"
4599 		"layout(location = 0) callableDataInEXT float callData;\n"
4600 		"\n"
4601 		"void main()\n"
4602 		"{\n"
4603 		"}\n";
4604 
4605 	programCollection.glslSources.add("rgen_noSubgroup") << glu::RaygenSource		(rgenShaderNoSubgroups) << buildOptions;
4606 	programCollection.glslSources.add("ahit_noSubgroup") << glu::AnyHitSource		(hitShaderNoSubgroups)  << buildOptions;
4607 	programCollection.glslSources.add("chit_noSubgroup") << glu::ClosestHitSource	(hitShaderNoSubgroups)  << buildOptions;
4608 	programCollection.glslSources.add("miss_noSubgroup") << glu::MissSource			(missShaderNoSubgroups) << buildOptions;
4609 	programCollection.glslSources.add("sect_noSubgroup") << glu::IntersectionSource	(sectShaderNoSubgroups) << buildOptions;
4610 	programCollection.glslSources.add("call_noSubgroup") << glu::CallableSource		(callShaderNoSubgroups) << buildOptions;
4611 }
4612 
4613 #ifndef CTS_USES_VULKANSC
4614 
enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)4615 static vector<VkShaderStageFlagBits> enumerateRayTracingShaderStages (const VkShaderStageFlags	shaderStage)
4616 {
4617 	vector<VkShaderStageFlagBits>	result;
4618 	const VkShaderStageFlagBits		shaderStageFlags[]	=
4619 	{
4620 		VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4621 		VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4622 		VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4623 		VK_SHADER_STAGE_MISS_BIT_KHR,
4624 		VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4625 		VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4626 	};
4627 
4628 	for (auto shaderStageFlag: shaderStageFlags)
4629 	{
4630 		if (0 != (shaderStage & shaderStageFlag))
4631 			result.push_back(shaderStageFlag);
4632 	}
4633 
4634 	return result;
4635 }
4636 
getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)4637 static deUint32 getRayTracingResultBinding (const VkShaderStageFlagBits shaderStage)
4638 {
4639 	const VkShaderStageFlags	shaderStageFlags[]	=
4640 	{
4641 		VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4642 		VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4643 		VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4644 		VK_SHADER_STAGE_MISS_BIT_KHR,
4645 		VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4646 		VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4647 	};
4648 
4649 	for (deUint32 shaderStageNdx = 0; shaderStageNdx < DE_LENGTH_OF_ARRAY(shaderStageFlags); ++shaderStageNdx)
4650 	{
4651 		if (0 != (shaderStage & shaderStageFlags[shaderStageNdx]))
4652 		{
4653 			DE_ASSERT(0 == (shaderStage & (~shaderStageFlags[shaderStageNdx])));
4654 
4655 			return shaderStageNdx;
4656 		}
4657 	}
4658 
4659 	TCU_THROW(InternalError, "Non-raytracing stage specified or no stage at all");
4660 }
4661 
makeRayTracingInputBuffers(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector)4662 static vectorBufferOrImage makeRayTracingInputBuffers (Context&								context,
4663 													   VkFormat								format,
4664 													   const SSBOData*						extraDatas,
4665 													   deUint32								extraDatasCount,
4666 													   const vector<VkShaderStageFlagBits>&	stagesVector)
4667 {
4668 	const size_t		stagesCount		= stagesVector.size();
4669 	const VkDeviceSize	shaderSize		= getMaxWidth();
4670 	const VkDeviceSize	inputBufferSize	= getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
4671 	vectorBufferOrImage	inputBuffers	(stagesCount + extraDatasCount);
4672 
4673 	// The implicit result SSBO we use to store our outputs from the shader
4674 	for (size_t stageNdx = 0u; stageNdx < stagesCount; ++stageNdx)
4675 		inputBuffers[stageNdx]	= de::SharedPtr<BufferOrImage>(new Buffer(context, inputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
4676 
4677 	for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4678 	{
4679 		const size_t	datasNdx	= stageNdx - stagesCount;
4680 
4681 		if (extraDatas[datasNdx].isImage())
4682 		{
4683 			inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
4684 		}
4685 		else
4686 		{
4687 			const auto usage	= (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4688 			const auto size		= getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
4689 			inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4690 		}
4691 
4692 		initializeMemory(context, inputBuffers[stageNdx]->getAllocation(), extraDatas[datasNdx]);
4693 	}
4694 
4695 	return inputBuffers;
4696 }
4697 
makeRayTracingDescriptorSetLayout(Context & context,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4698 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayout (Context&								context,
4699 																	  const SSBOData*						extraDatas,
4700 																	  deUint32								extraDatasCount,
4701 																	  const vector<VkShaderStageFlagBits>&	stagesVector,
4702 																	  const vectorBufferOrImage&			inputBuffers)
4703 {
4704 	const DeviceInterface&		vkd				= context.getDeviceInterface();
4705 	const VkDevice				device			= context.getDevice();
4706 	const size_t				stagesCount		= stagesVector.size();
4707 	DescriptorSetLayoutBuilder	layoutBuilder;
4708 
4709 	// The implicit result SSBO we use to store our outputs from the shader
4710 	for (size_t stageNdx = 0u; stageNdx < stagesVector.size(); ++stageNdx)
4711 	{
4712 		const deUint32	stageBinding	= getRayTracingResultBinding(stagesVector[stageNdx]);
4713 
4714 		layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, stagesVector[stageNdx], stageBinding, DE_NULL);
4715 	}
4716 
4717 	for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4718 	{
4719 		const size_t datasNdx = stageNdx - stagesCount;
4720 
4721 		layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
4722 	}
4723 
4724 	return layoutBuilder.build(vkd, device);
4725 }
4726 
makeRayTracingDescriptorSetLayoutAS(Context & context)4727 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayoutAS (Context&	context)
4728 {
4729 	const DeviceInterface&		vkd				= context.getDeviceInterface();
4730 	const VkDevice				device			= context.getDevice();
4731 	DescriptorSetLayoutBuilder	layoutBuilder;
4732 
4733 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
4734 
4735 	return layoutBuilder.build(vkd, device);
4736 }
4737 
makeRayTracingDescriptorPool(Context & context,const vectorBufferOrImage & inputBuffers)4738 static Move<VkDescriptorPool> makeRayTracingDescriptorPool (Context&						context,
4739 															const vectorBufferOrImage&		inputBuffers)
4740 {
4741 	const DeviceInterface&	vkd					= context.getDeviceInterface();
4742 	const VkDevice			device				= context.getDevice();
4743 	const deUint32			maxDescriptorSets	= 2u;
4744 	DescriptorPoolBuilder	poolBuilder;
4745 	Move<VkDescriptorPool>	result;
4746 
4747 	if (inputBuffers.size() > 0)
4748 	{
4749 		for (size_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
4750 			poolBuilder.addType(inputBuffers[ndx]->getType());
4751 	}
4752 
4753 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
4754 
4755 	result = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, maxDescriptorSets);
4756 
4757 	return result;
4758 }
4759 
makeRayTracingDescriptorSet(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4760 static Move<VkDescriptorSet> makeRayTracingDescriptorSet (Context&								context,
4761 														  VkDescriptorPool						descriptorPool,
4762 														  VkDescriptorSetLayout					descriptorSetLayout,
4763 														  const SSBOData*						extraDatas,
4764 														  deUint32								extraDatasCount,
4765 														  const vector<VkShaderStageFlagBits>&	stagesVector,
4766 														  const vectorBufferOrImage&			inputBuffers)
4767 {
4768 	const DeviceInterface&	vkd				= context.getDeviceInterface();
4769 	const VkDevice			device			= context.getDevice();
4770 	const size_t			stagesCount		= stagesVector.size();
4771 	Move<VkDescriptorSet>	descriptorSet;
4772 
4773 	if (inputBuffers.size() > 0)
4774 	{
4775 		DescriptorSetUpdateBuilder updateBuilder;
4776 
4777 		// Create descriptor set
4778 		descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4779 
4780 		for (size_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
4781 		{
4782 			const deUint32	binding	= (ndx < stagesCount)
4783 									? getRayTracingResultBinding(stagesVector[ndx])
4784 									: extraDatas[ndx - stagesCount].binding;
4785 
4786 			if (inputBuffers[ndx]->isImage())
4787 			{
4788 				const VkDescriptorImageInfo		info	= makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(), inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4789 
4790 				updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4791 			}
4792 			else
4793 			{
4794 				const VkDescriptorBufferInfo	info	= makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
4795 
4796 				updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4797 			}
4798 		}
4799 
4800 		updateBuilder.update(vkd, device);
4801 	}
4802 
4803 	return descriptorSet;
4804 }
4805 
makeRayTracingDescriptorSetAS(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,de::MovePtr<TopLevelAccelerationStructure> & topLevelAccelerationStructure)4806 static Move<VkDescriptorSet> makeRayTracingDescriptorSetAS (Context&									context,
4807 															VkDescriptorPool							descriptorPool,
4808 															VkDescriptorSetLayout						descriptorSetLayout,
4809 															de::MovePtr<TopLevelAccelerationStructure>&	topLevelAccelerationStructure)
4810 {
4811 	const DeviceInterface&								vkd										= context.getDeviceInterface();
4812 	const VkDevice										device									= context.getDevice();
4813 	const TopLevelAccelerationStructure*				topLevelAccelerationStructurePtr		= topLevelAccelerationStructure.get();
4814 	const VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
4815 	{
4816 		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
4817 		DE_NULL,															//  const void*							pNext;
4818 		1u,																	//  deUint32							accelerationStructureCount;
4819 		topLevelAccelerationStructurePtr->getPtr(),							//  const VkAccelerationStructureKHR*	pAccelerationStructures;
4820 	};
4821 	Move<VkDescriptorSet>								descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4822 
4823 	DescriptorSetUpdateBuilder()
4824 		.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
4825 		.update(vkd, device);
4826 
4827 	return descriptorSet;
4828 }
4829 
makeRayTracingPipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout0,const VkDescriptorSetLayout descriptorSetLayout1)4830 static Move<VkPipelineLayout> makeRayTracingPipelineLayout (Context&					context,
4831 															const VkDescriptorSetLayout	descriptorSetLayout0,
4832 															const VkDescriptorSetLayout	descriptorSetLayout1)
4833 {
4834 	const DeviceInterface&						vkd							= context.getDeviceInterface();
4835 	const VkDevice								device						= context.getDevice();
4836 	const std::vector<VkDescriptorSetLayout>	descriptorSetLayouts		{ descriptorSetLayout0, descriptorSetLayout1 };
4837 	const deUint32								descriptorSetLayoutsSize	= static_cast<deUint32>(descriptorSetLayouts.size());
4838 
4839 	return makePipelineLayout(vkd, device, descriptorSetLayoutsSize, descriptorSetLayouts.data());
4840 }
4841 
createTopAccelerationStructure(Context & context,de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)4842 static de::MovePtr<TopLevelAccelerationStructure> createTopAccelerationStructure (Context&											context,
4843 																				  de::SharedPtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure)
4844 {
4845 	const DeviceInterface&						vkd			= context.getDeviceInterface();
4846 	const VkDevice								device		= context.getDevice();
4847 	Allocator&									allocator	= context.getDefaultAllocator();
4848 	de::MovePtr<TopLevelAccelerationStructure>	result		= makeTopLevelAccelerationStructure();
4849 
4850 	result->setInstanceCount(1);
4851 	result->addInstance(bottomLevelAccelerationStructure);
4852 	result->create(vkd, device, allocator);
4853 
4854 	return result;
4855 }
4856 
createBottomAccelerationStructure(Context & context)4857 static de::SharedPtr<BottomLevelAccelerationStructure> createBottomAccelerationStructure (Context&	context)
4858 {
4859 	const DeviceInterface&							vkd				= context.getDeviceInterface();
4860 	const VkDevice									device			= context.getDevice();
4861 	Allocator&										allocator		= context.getDefaultAllocator();
4862 	de::MovePtr<BottomLevelAccelerationStructure>	result			= makeBottomLevelAccelerationStructure();
4863 	const std::vector<tcu::Vec3>					geometryData	{ tcu::Vec3(-1.0f, -1.0f, -2.0f), tcu::Vec3(+1.0f, +1.0f, -1.0f) };
4864 
4865 	result->setGeometryCount(1u);
4866 	result->addGeometry(geometryData, false);
4867 	result->create(vkd, device, allocator, 0u);
4868 
4869 	return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
4870 }
4871 
makeRayTracingPipeline(Context & context,const VkShaderStageFlags shaderStageTested,const VkPipelineLayout pipelineLayout,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6],Move<VkPipeline> & pipelineOut)4872 static de::MovePtr<RayTracingPipeline> makeRayTracingPipeline (Context&					context,
4873 															   const VkShaderStageFlags	shaderStageTested,
4874 															   const VkPipelineLayout	pipelineLayout,
4875 															   const deUint32			shaderStageCreateFlags[6],
4876 															   const deUint32			requiredSubgroupSize[6],
4877 															   Move<VkPipeline>&		pipelineOut)
4878 {
4879 	const DeviceInterface&											vkd									= context.getDeviceInterface();
4880 	const VkDevice													device								= context.getDevice();
4881 	BinaryCollection&												collection							= context.getBinaryCollection();
4882 	const char*														shaderRgenName						= (0 != (shaderStageTested & VK_SHADER_STAGE_RAYGEN_BIT_KHR))			? "rgen" : "rgen_noSubgroup";
4883 	const char*														shaderAhitName						= (0 != (shaderStageTested & VK_SHADER_STAGE_ANY_HIT_BIT_KHR))			? "ahit" : "ahit_noSubgroup";
4884 	const char*														shaderChitName						= (0 != (shaderStageTested & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR))		? "chit" : "chit_noSubgroup";
4885 	const char*														shaderMissName						= (0 != (shaderStageTested & VK_SHADER_STAGE_MISS_BIT_KHR))				? "miss" : "miss_noSubgroup";
4886 	const char*														shaderSectName						= (0 != (shaderStageTested & VK_SHADER_STAGE_INTERSECTION_BIT_KHR))		? "sect" : "sect_noSubgroup";
4887 	const char*														shaderCallName						= (0 != (shaderStageTested & VK_SHADER_STAGE_CALLABLE_BIT_KHR))			? "call" : "call_noSubgroup";
4888 	const VkShaderModuleCreateFlags									noShaderModuleCreateFlags			= static_cast<VkShaderModuleCreateFlags>(0);
4889 	Move<VkShaderModule>											rgenShaderModule					= createShaderModule(vkd, device, collection.get(shaderRgenName), noShaderModuleCreateFlags);
4890 	Move<VkShaderModule>											ahitShaderModule					= createShaderModule(vkd, device, collection.get(shaderAhitName), noShaderModuleCreateFlags);
4891 	Move<VkShaderModule>											chitShaderModule					= createShaderModule(vkd, device, collection.get(shaderChitName), noShaderModuleCreateFlags);
4892 	Move<VkShaderModule>											missShaderModule					= createShaderModule(vkd, device, collection.get(shaderMissName), noShaderModuleCreateFlags);
4893 	Move<VkShaderModule>											sectShaderModule					= createShaderModule(vkd, device, collection.get(shaderSectName), noShaderModuleCreateFlags);
4894 	Move<VkShaderModule>											callShaderModule					= createShaderModule(vkd, device, collection.get(shaderCallName), noShaderModuleCreateFlags);
4895 	const VkPipelineShaderStageCreateFlags							noPipelineShaderStageCreateFlags	= static_cast<VkPipelineShaderStageCreateFlags>(0);
4896 	const VkPipelineShaderStageCreateFlags							rgenPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[0];
4897 	const VkPipelineShaderStageCreateFlags							ahitPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[1];
4898 	const VkPipelineShaderStageCreateFlags							chitPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[2];
4899 	const VkPipelineShaderStageCreateFlags							missPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[3];
4900 	const VkPipelineShaderStageCreateFlags							sectPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[4];
4901 	const VkPipelineShaderStageCreateFlags							callPipelineShaderStageCreateFlags	= (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[5];
4902 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT	requiredSubgroupSizeCreateInfo[6]	=
4903 	{
4904 		{
4905 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4906 			DE_NULL,
4907 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
4908 		},
4909 		{
4910 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4911 			DE_NULL,
4912 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
4913 		},
4914 		{
4915 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4916 			DE_NULL,
4917 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
4918 		},
4919 		{
4920 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4921 			DE_NULL,
4922 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
4923 		},
4924 		{
4925 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4926 			DE_NULL,
4927 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
4928 		},
4929 		{
4930 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4931 			DE_NULL,
4932 			requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[5] : 0u,
4933 		},
4934 	};
4935 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	rgenRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[0];
4936 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	ahitRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[1];
4937 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	chitRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[2];
4938 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	missRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[3];
4939 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	sectRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[4];
4940 	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT*	callRequiredSubgroupSizeCreateInfo	= (requiredSubgroupSizeCreateInfo[5].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[5];
4941 	de::MovePtr<RayTracingPipeline>									rayTracingPipeline					= de::newMovePtr<RayTracingPipeline>();
4942 
4943 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR		, rgenShaderModule, RAYGEN_GROUP,	DE_NULL, rgenPipelineShaderStageCreateFlags, rgenRequiredSubgroupSizeCreateInfo);
4944 	rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR		, ahitShaderModule, HIT_GROUP,		DE_NULL, ahitPipelineShaderStageCreateFlags, ahitRequiredSubgroupSizeCreateInfo);
4945 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR	, chitShaderModule, HIT_GROUP,		DE_NULL, chitPipelineShaderStageCreateFlags, chitRequiredSubgroupSizeCreateInfo);
4946 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR			, missShaderModule, MISS_GROUP,		DE_NULL, missPipelineShaderStageCreateFlags, missRequiredSubgroupSizeCreateInfo);
4947 	rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR	, sectShaderModule, HIT_GROUP,		DE_NULL, sectPipelineShaderStageCreateFlags, sectRequiredSubgroupSizeCreateInfo);
4948 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR		, callShaderModule, CALL_GROUP,		DE_NULL, callPipelineShaderStageCreateFlags, callRequiredSubgroupSizeCreateInfo);
4949 
4950 	// Must execute createPipeline here, due to pNext pointers in calls to addShader are local
4951 	pipelineOut	= rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
4952 
4953 	return rayTracingPipeline;
4954 }
4955 
getPossibleRayTracingSubgroupStages(Context & context,const VkShaderStageFlags testedStages)4956 VkShaderStageFlags getPossibleRayTracingSubgroupStages (Context& context, const VkShaderStageFlags testedStages)
4957 {
4958 	const VkPhysicalDeviceSubgroupProperties&	subgroupProperties	= context.getSubgroupProperties();
4959 	const VkShaderStageFlags					stages				= testedStages & subgroupProperties.supportedStages;
4960 
4961 	DE_ASSERT(isAllRayTracingStages(testedStages));
4962 
4963 	return stages;
4964 }
4965 
allRayTracingStages(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStage)4966 tcu::TestStatus allRayTracingStages (Context&						context,
4967 									 VkFormat						format,
4968 									 const SSBOData*				extraDatas,
4969 									 deUint32						extraDataCount,
4970 									 const void*					internalData,
4971 									 const VerificationFunctor&		checkResult,
4972 									 const VkShaderStageFlags		shaderStage)
4973 {
4974 	return vkt::subgroups::allRayTracingStagesRequiredSubgroupSize(context,
4975 																   format,
4976 																   extraDatas,
4977 																   extraDataCount,
4978 																   internalData,
4979 																   checkResult,
4980 																   shaderStage,
4981 																   DE_NULL,
4982 																   DE_NULL);
4983 }
4984 
allRayTracingStagesRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6])4985 tcu::TestStatus allRayTracingStagesRequiredSubgroupSize (Context&					context,
4986 														 VkFormat					format,
4987 														 const SSBOData*			extraDatas,
4988 														 deUint32					extraDatasCount,
4989 														 const void*				internalData,
4990 														 const VerificationFunctor&	checkResult,
4991 														 const VkShaderStageFlags	shaderStageTested,
4992 														 const deUint32				shaderStageCreateFlags[6],
4993 														 const deUint32				requiredSubgroupSize[6])
4994 {
4995 	const DeviceInterface&							vkd									= context.getDeviceInterface();
4996 	const VkDevice									device								= context.getDevice();
4997 	const VkQueue									queue								= context.getUniversalQueue();
4998 	const deUint32									queueFamilyIndex					= context.getUniversalQueueFamilyIndex();
4999 	Allocator&										allocator							= context.getDefaultAllocator();
5000 	const deUint32									subgroupSize						= getSubgroupSize(context);
5001 	const deUint32									maxWidth							= getMaxWidth();
5002 	const vector<VkShaderStageFlagBits>				stagesVector						= enumerateRayTracingShaderStages(shaderStageTested);
5003 	const deUint32									stagesCount							= static_cast<deUint32>(stagesVector.size());
5004 	de::SharedPtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure	= createBottomAccelerationStructure(context);
5005 	de::MovePtr<TopLevelAccelerationStructure>		topLevelAccelerationStructure		= createTopAccelerationStructure(context, bottomLevelAccelerationStructure);
5006 	vectorBufferOrImage								inputBuffers						= makeRayTracingInputBuffers(context, format, extraDatas, extraDatasCount, stagesVector);
5007 	const Move<VkDescriptorSetLayout>				descriptorSetLayout					= makeRayTracingDescriptorSetLayout(context, extraDatas, extraDatasCount, stagesVector, inputBuffers);
5008 	const Move<VkDescriptorSetLayout>				descriptorSetLayoutAS				= makeRayTracingDescriptorSetLayoutAS(context);
5009 	const Move<VkPipelineLayout>					pipelineLayout						= makeRayTracingPipelineLayout(context, *descriptorSetLayout, *descriptorSetLayoutAS);
5010 	Move<VkPipeline>								pipeline							= Move<VkPipeline>();
5011 	const de::MovePtr<RayTracingPipeline>			rayTracingPipeline					= makeRayTracingPipeline(context, shaderStageTested, *pipelineLayout, shaderStageCreateFlags, requiredSubgroupSize, pipeline);
5012 	const deUint32									shaderGroupHandleSize				= context.getRayTracingPipelineProperties().shaderGroupHandleSize;
5013 	const deUint32									shaderGroupBaseAlignment			= context.getRayTracingPipelineProperties().shaderGroupBaseAlignment;
5014 	de::MovePtr<BufferWithMemory>					rgenShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, RAYGEN_GROUP, 1u);
5015 	de::MovePtr<BufferWithMemory>					missShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, MISS_GROUP,   1u);
5016 	de::MovePtr<BufferWithMemory>					hitsShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, HIT_GROUP,    1u);
5017 	de::MovePtr<BufferWithMemory>					callShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, CALL_GROUP,   1u);
5018 	const VkStridedDeviceAddressRegionKHR			rgenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, rgenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5019 	const VkStridedDeviceAddressRegionKHR			missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5020 	const VkStridedDeviceAddressRegionKHR			hitsShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitsShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5021 	const VkStridedDeviceAddressRegionKHR			callShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5022 	const Move<VkDescriptorPool>					descriptorPool						= makeRayTracingDescriptorPool(context, inputBuffers);
5023 	const Move<VkDescriptorSet>						descriptorSet						= makeRayTracingDescriptorSet(context, *descriptorPool, *descriptorSetLayout, extraDatas, extraDatasCount, stagesVector, inputBuffers);
5024 	const Move<VkDescriptorSet>						descriptorSetAS						= makeRayTracingDescriptorSetAS(context, *descriptorPool, *descriptorSetLayoutAS, topLevelAccelerationStructure);
5025 	const Move<VkCommandPool>						cmdPool								= makeCommandPool(vkd, device, queueFamilyIndex);
5026 	const Move<VkCommandBuffer>						cmdBuffer							= makeCommandBuffer(context, *cmdPool);
5027 	deUint32										passIterations						= 0u;
5028 	deUint32										failIterations						= 0u;
5029 
5030 	DE_ASSERT(shaderStageTested != 0);
5031 
5032 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
5033 	{
5034 
5035 		for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
5036 		{
5037 			// re-init the data
5038 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
5039 
5040 			initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
5041 		}
5042 
5043 		beginCommandBuffer(vkd, *cmdBuffer);
5044 		{
5045 			vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
5046 
5047 			bottomLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
5048 			topLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
5049 
5050 			vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 1u, 1u, &descriptorSetAS.get(), 0u, DE_NULL);
5051 
5052 			if (stagesCount + extraDatasCount > 0)
5053 				vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
5054 
5055 			cmdTraceRays(vkd,
5056 				*cmdBuffer,
5057 				&rgenShaderBindingTableRegion,
5058 				&missShaderBindingTableRegion,
5059 				&hitsShaderBindingTableRegion,
5060 				&callShaderBindingTableRegion,
5061 				width, 1, 1);
5062 
5063 			const VkMemoryBarrier	postTraceMemoryBarrier	= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
5064 			cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_HOST_BIT, &postTraceMemoryBarrier);
5065 		}
5066 		endCommandBuffer(vkd, *cmdBuffer);
5067 
5068 		submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
5069 
5070 		for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
5071 		{
5072 			std::vector<const void*> datas;
5073 
5074 			if (!inputBuffers[ndx]->isImage())
5075 			{
5076 				const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
5077 
5078 				invalidateAlloc(vkd, device, resultAlloc);
5079 
5080 				// we always have our result data first
5081 				datas.push_back(resultAlloc.getHostPtr());
5082 			}
5083 
5084 			for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
5085 			{
5086 				const deUint32 datasNdx = index - stagesCount;
5087 
5088 				if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
5089 				{
5090 					const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
5091 
5092 					invalidateAlloc(vkd, device, resultAlloc);
5093 
5094 					// we always have our result data first
5095 					datas.push_back(resultAlloc.getHostPtr());
5096 				}
5097 			}
5098 
5099 			if (!checkResult(internalData, datas, width, subgroupSize, false))
5100 				failIterations++;
5101 			else
5102 				passIterations++;
5103 		}
5104 
5105 		context.resetCommandPoolForVKSC(device, *cmdPool);
5106 	}
5107 
5108 	if (failIterations > 0 || passIterations == 0)
5109 		return tcu::TestStatus::fail("Failed " + de::toString(failIterations) + " out of " + de::toString(failIterations + passIterations) + " iterations.");
5110 	else
5111 		return tcu::TestStatus::pass("OK");
5112 }
5113 #endif // CTS_USES_VULKANSC
5114 
5115 } // namespace subgroups
5116 } // nsamespace vkt
5117