• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 Google LLC.
6  *
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Tests that compute shaders have a subgroup size that is uniform in
23  * command scope.
24  *//*--------------------------------------------------------------------*/
25 
26 #include "deUniquePtr.hpp"
27 
28 #include "vkRef.hpp"
29 #include "vkRefUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkMemUtil.hpp"
32 #include "vkBuilderUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vkObjUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkImageWithMemory.hpp"
37 #include "vkBarrierUtil.hpp"
38 
39 #include "vktTestCaseUtil.hpp"
40 
41 using namespace vk;
42 
43 namespace vkt
44 {
45 namespace subgroups
46 {
47 namespace
48 {
49 using std::vector;
50 using de::MovePtr;
51 
52 class MultipleDispatchesUniformSubgroupSizeInstance : public TestInstance
53 {
54 public:
55 					MultipleDispatchesUniformSubgroupSizeInstance	(Context&	context);
56 	tcu::TestStatus	iterate											(void);
57 };
58 
MultipleDispatchesUniformSubgroupSizeInstance(Context & context)59 MultipleDispatchesUniformSubgroupSizeInstance::MultipleDispatchesUniformSubgroupSizeInstance	(Context&	context)
60 	:TestInstance																				(context)
61 {
62 }
63 
iterate(void)64 tcu::TestStatus MultipleDispatchesUniformSubgroupSizeInstance::iterate (void)
65 {
66 	const DeviceInterface&				vk						= m_context.getDeviceInterface();
67 	const VkDevice						device					= m_context.getDevice();
68 	Allocator&							allocator				= m_context.getDefaultAllocator();
69 	const VkQueue						queue					= m_context.getUniversalQueue();
70 	const deUint32						queueFamilyIndex		= m_context.getUniversalQueueFamilyIndex();
71 
72 	const Move<VkCommandPool>			cmdPool					= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
73 	const Move<VkCommandBuffer>			cmdBuffer				= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
74 
75 	Move<VkShaderModule>				computeShader			= createShaderModule (vk, device, m_context.getBinaryCollection().get("comp"), 0u);
76 
77 	// The number of invocations in a workgroup.
78 	const deUint32						maxLocalSize			= m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
79 
80 	// Create a storage buffer to hold the sizes of subgroups.
81 	const VkDeviceSize					bufferSize				= maxLocalSize * 2 * sizeof(deUint32);
82 
83 	const VkBufferCreateInfo			resultBufferCreateInfo	= makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
84 	Move<VkBuffer>						resultBuffer			= createBuffer(vk, device, &resultBufferCreateInfo);
85 	MovePtr<Allocation>					resultBufferMemory		= allocator.allocate(getBufferMemoryRequirements(vk, device, *resultBuffer), MemoryRequirement::HostVisible);
86 
87 	VK_CHECK(vk.bindBufferMemory(device, *resultBuffer, resultBufferMemory->getMemory(), resultBufferMemory->getOffset()));
88 
89 	// Build descriptors for the storage buffer
90 	const Unique<VkDescriptorPool>		descriptorPool			(DescriptorPoolBuilder().addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
91 																						.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
92 	const auto							descriptorSetLayout1	(DescriptorSetLayoutBuilder().addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, VK_SHADER_STAGE_COMPUTE_BIT)
93 																							 .build(vk, device));
94 	const VkDescriptorBufferInfo		resultInfo				= makeDescriptorBufferInfo(*resultBuffer, 0u,
95 																						   (VkDeviceSize) bufferSize - maxLocalSize * sizeof(deUint32));
96 
97 	const VkDescriptorSetAllocateInfo	allocInfo				=
98 	{
99 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,	// sType
100 		DE_NULL,										// pNext
101 		*descriptorPool,								// descriptorPool
102 		1u,												// descriptorSetCount
103 		&(*descriptorSetLayout1)						// pSetLayouts
104 	};
105 
106 	Move<VkDescriptorSet>				descriptorSet			= allocateDescriptorSet(vk, device, &allocInfo);
107 	DescriptorSetUpdateBuilder			builder;
108 
109 	builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, &resultInfo);
110 	builder.update(vk, device);
111 
112 	// Compute pipeline
113 	const Move<VkPipelineLayout>		computePipelineLayout	= makePipelineLayout (vk, device, *descriptorSetLayout1);
114 
115 	for (deUint32 localSize1 = 8; localSize1 < maxLocalSize + 1; localSize1 *= 2)
116 	{
117 		for (deUint32 localSize2 = 8; localSize2 < maxLocalSize + 1; localSize2 *= 2)
118 		{
119 			// On each iteration, change the number of invocations which might affect
120 			// the subgroup size if the driver doesn't behave as expected.
121 			const VkSpecializationMapEntry			entries					=
122 			{
123 				0u,					// deUint32 constantID;
124 				0u,					// deUint32 offset;
125 				sizeof(localSize1)	// size_t size;
126 			};
127 			const VkSpecializationInfo				specInfo				=
128 			{
129 				1,					// mapEntryCount
130 				&entries,			// pMapEntries
131 				sizeof(localSize1),	// dataSize
132 				&localSize1			// pData
133 			};
134 			const VkSpecializationInfo				specInfo2				=
135 			{
136 				1,					// mapEntryCount
137 				&entries,			// pMapEntries
138 				sizeof(localSize2),	// dataSize
139 				&localSize2			// pData
140 			};
141 
142 			const VkPipelineShaderStageCreateInfo	shaderStageCreateInfo	=
143 			{
144 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,					// sType
145 				DE_NULL,																// pNext
146 				VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT,	// flags
147 				VK_SHADER_STAGE_COMPUTE_BIT,											// stage
148 				*computeShader,															// module
149 				"main",																	// pName
150 				&specInfo,																// pSpecializationInfo
151 			};
152 
153 			const VkPipelineShaderStageCreateInfo	shaderStageCreateInfo2	=
154 			{
155 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,					// sType
156 				DE_NULL,																// pNext
157 				VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT,	// flags
158 				VK_SHADER_STAGE_COMPUTE_BIT,											// stage
159 				*computeShader,															// module
160 				"main",																	// pName
161 				&specInfo2,																// pSpecializationInfo
162 			};
163 
164 			const VkComputePipelineCreateInfo		pipelineCreateInfo		=
165 			{
166 				VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// sType
167 				DE_NULL,										// pNext
168 				0u,												// flags
169 				shaderStageCreateInfo,							// stage
170 				*computePipelineLayout,							// layout
171 				(VkPipeline) 0,									// basePipelineHandle
172 				0u,												// basePipelineIndex
173 			};
174 
175 			const VkComputePipelineCreateInfo		pipelineCreateInfo2		=
176 			{
177 				VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// sType
178 				DE_NULL,										// pNext
179 				0u,												// flags
180 				shaderStageCreateInfo2,							// stage
181 				*computePipelineLayout,							// layout
182 				(VkPipeline) 0,									// basePipelineHandle
183 				0u,												// basePipelineIndex
184 			};
185 
186 			Move<VkPipeline>						computePipeline			= createComputePipeline(vk, device, (VkPipelineCache) 0u, &pipelineCreateInfo);
187 			Move<VkPipeline>						computePipeline2		= createComputePipeline(vk, device, (VkPipelineCache) 0u, &pipelineCreateInfo2);
188 
189 			beginCommandBuffer(vk, *cmdBuffer);
190 
191 			// Clears the values written on the previous iteration.
192 			vk.cmdFillBuffer(*cmdBuffer, *resultBuffer, 0u, VK_WHOLE_SIZE, 0);
193 
194 			const deUint32							zero					= 0u;
195 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u, &descriptorSet.get(), 1, &zero);
196 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
197 			vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
198 
199 			const auto								barrier					= makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT, *resultBuffer, 0ull, bufferSize);
200 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags) 0,
201 								  0, (const VkMemoryBarrier *) DE_NULL, 1, &barrier, 0, (const VkImageMemoryBarrier *) DE_NULL);
202 
203 			const deUint32							offset					= static_cast<deUint32>(maxLocalSize * sizeof(deUint32));
204 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u, &descriptorSet.get(), 1u, &offset);
205 			vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline2);
206 			vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
207 
208 			endCommandBuffer(vk, *cmdBuffer);
209 			submitCommandsAndWait(vk, device, queue, *cmdBuffer);
210 
211 			invalidateAlloc(vk, device, *resultBufferMemory);
212 
213 			const deUint32							*res					= static_cast<const deUint32 *>(resultBufferMemory->getHostPtr());
214 			deUint32								size					= 0;
215 
216 			// Search for the first nonzero size. Then go through the data of both pipelines and check that
217 			// the first nonzero size matches with other nonzero values.
218 			for (deUint32 i = 0; i < maxLocalSize; i++)
219 			{
220 				if (res[i] != 0)
221 				{
222 					size = res[i];
223 					break;
224 				}
225 			}
226 
227 			// Subgroup size is guaranteed to be at least 1.
228 			DE_ASSERT(size > 0);
229 
230 			for (deUint32 i = 0; i < maxLocalSize * 2; i++)
231 			{
232 				if (size != res[i] && res[i] != 0)
233 					return tcu::TestStatus::fail("Subgroup size not uniform in command scope. " + std::to_string(res[i]) + " != " + std::to_string(size));
234 			}
235 		}
236 	}
237 
238 	return tcu::TestStatus::pass("pass");
239 }
240 
241 class MultipleDispatchesUniformSubgroupSize : public TestCase
242 {
243 public:
244 						MultipleDispatchesUniformSubgroupSize (tcu::TestContext&	testCtx,
245 															   const std::string&	name,
246 															   const std::string&	description);
247 
248 	void				initPrograms						  (SourceCollections&	programCollection) const;
249 	TestInstance*		createInstance						  (Context&				context) const;
250 	virtual void		checkSupport						  (Context&				context) const;
251 
252 };
253 
MultipleDispatchesUniformSubgroupSize(tcu::TestContext & testCtx,const std::string & name,const std::string & description)254 MultipleDispatchesUniformSubgroupSize::MultipleDispatchesUniformSubgroupSize (tcu::TestContext&	testCtx,
255 																			  const std::string&	name,
256 																			  const std::string&	description)
257 	: TestCase	(testCtx, name, description)
258 {
259 }
260 
checkSupport(Context & context) const261 void MultipleDispatchesUniformSubgroupSize::checkSupport (Context& context) const
262 {
263 	const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT&	subgroupSizeControlFeatures	= context.getSubgroupSizeControlFeaturesEXT();
264 
265 	if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
266 		TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes");
267 }
268 
initPrograms(SourceCollections & programCollection) const269 void MultipleDispatchesUniformSubgroupSize::initPrograms (SourceCollections& programCollection) const
270 {
271 	std::ostringstream computeSrc;
272 	computeSrc
273 		<< glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
274 		<< "#extension GL_KHR_shader_subgroup_basic : enable\n"
275 		<< "#extension GL_KHR_shader_subgroup_vote : enable\n"
276 		<< "#extension GL_KHR_shader_subgroup_ballot : enable\n"
277 		<< "layout(std430, binding = 0) buffer Outputs { uint sizes[]; };\n"
278 
279 		<< "layout(local_size_x_id = 0) in;\n"
280 
281 		<< "void main()\n"
282 		<< "{\n"
283 		<< "    if (subgroupElect())\n"
284 		<< "    {\n"
285 		<< "        sizes[gl_WorkGroupID.x * gl_NumSubgroups + gl_SubgroupID] = gl_SubgroupSize;\n"
286 		<< "    }\n"
287 		<< "}\n";
288 
289 	programCollection.glslSources.add("comp") << glu::ComputeSource(computeSrc.str())
290 	<< ShaderBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
291 }
292 
createInstance(Context & context) const293 TestInstance* MultipleDispatchesUniformSubgroupSize::createInstance (Context& context) const
294 {
295 	return new MultipleDispatchesUniformSubgroupSizeInstance(context);
296 }
297 
298 } // anonymous ns
299 
createMultipleDispatchesUniformSubgroupSizeTests(tcu::TestContext & testCtx)300 tcu::TestCaseGroup* createMultipleDispatchesUniformSubgroupSizeTests (tcu::TestContext& testCtx)
301 {
302 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "multiple_dispatches", "Multiple dispatches uniform subgroup size tests"));
303 
304 	testGroup->addChild(new MultipleDispatchesUniformSubgroupSize(testCtx, "uniform_subgroup_size", ""));
305 	return testGroup.release();
306 }
307 
308 } // compute
309 } // vkt
310