• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 Google LLC.
6  *
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Tests that compute shaders have a subgroup size that is uniform in
23  * command scope.
24  *//*--------------------------------------------------------------------*/
25 
26 #include "deUniquePtr.hpp"
27 
28 #include "vkRef.hpp"
29 #include "vkRefUtil.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkMemUtil.hpp"
32 #include "vkBuilderUtil.hpp"
33 #include "vkCmdUtil.hpp"
34 #include "vkObjUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkBufferWithMemory.hpp"
37 #include "vkBarrierUtil.hpp"
38 #include "vktTestCaseUtil.hpp"
39 #include "tcuTestLog.hpp"
40 #include <sstream>
41 
42 using namespace vk;
43 
44 namespace vkt
45 {
46 namespace subgroups
47 {
48 namespace
49 {
50 
51 class MultipleDispatchesUniformSubgroupSizeInstance : public TestInstance
52 {
53 public:
54 					MultipleDispatchesUniformSubgroupSizeInstance	(Context&	context);
55 	tcu::TestStatus	iterate											(void);
56 };
57 
MultipleDispatchesUniformSubgroupSizeInstance(Context & context)58 MultipleDispatchesUniformSubgroupSizeInstance::MultipleDispatchesUniformSubgroupSizeInstance	(Context&	context)
59 	:TestInstance																				(context)
60 {
61 }
62 
iterate(void)63 tcu::TestStatus MultipleDispatchesUniformSubgroupSizeInstance::iterate (void)
64 {
65 	const DeviceInterface&				vk						= m_context.getDeviceInterface();
66 	const VkDevice						device					= m_context.getDevice();
67 	Allocator&							allocator				= m_context.getDefaultAllocator();
68 	const VkQueue						queue					= m_context.getUniversalQueue();
69 	const uint32_t						queueFamilyIndex		= m_context.getUniversalQueueFamilyIndex();
70 
71 	const Move<VkCommandPool>			cmdPool					= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex);
72 	const Move<VkCommandBuffer>			cmdBuffer				= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
73 
74 	Move<VkShaderModule>				computeShader			= createShaderModule (vk, device, m_context.getBinaryCollection().get("comp"), 0u);
75 
76 	// The maximum number of invocations in a workgroup.
77 	const uint32_t						maxLocalSize			= m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
78 #ifndef CTS_USES_VULKANSC
79 	const uint32_t						minSubgroupSize			= m_context.getSubgroupSizeControlProperties().minSubgroupSize;
80 #else
81 	const uint32_t						minSubgroupSize			= m_context.getSubgroupSizeControlPropertiesEXT().minSubgroupSize;
82 #endif // CTS_USES_VULKANSC
83 
84 	// Create a storage buffer to hold the sizes of subgroups.
85 	const VkDeviceSize					bufferSize				= (maxLocalSize / minSubgroupSize + 1u) * sizeof(uint32_t);
86 
87 	const VkBufferCreateInfo			resultBufferCreateInfo	= makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
88 	BufferWithMemory					resultBuffer			(vk, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible);
89 	auto&								resultBufferAlloc		= resultBuffer.getAllocation();
90 
91 	// Build descriptors for the storage buffer
92 	const Unique<VkDescriptorPool>		descriptorPool			(DescriptorPoolBuilder().addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
93 																						.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
94 	const auto							descriptorSetLayout1	(DescriptorSetLayoutBuilder().addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
95 																							 .build(vk, device));
96 	const VkDescriptorBufferInfo		resultInfo				= makeDescriptorBufferInfo(*resultBuffer, 0u, bufferSize);
97 
98 	const VkDescriptorSetAllocateInfo	allocInfo				=
99 	{
100 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,	// sType
101 		DE_NULL,										// pNext
102 		*descriptorPool,								// descriptorPool
103 		1u,												// descriptorSetCount
104 		&(*descriptorSetLayout1)						// pSetLayouts
105 	};
106 
107 	Move<VkDescriptorSet>				descriptorSet			= allocateDescriptorSet(vk, device, &allocInfo);
108 	DescriptorSetUpdateBuilder			builder;
109 
110 	builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultInfo);
111 	builder.update(vk, device);
112 
113 	// Compute pipeline
114 	const Move<VkPipelineLayout>		computePipelineLayout	= makePipelineLayout (vk, device, *descriptorSetLayout1);
115 
116 	for (uint32_t localSize = 1u; localSize <= maxLocalSize; localSize *= 2u)
117 	{
118 		// On each iteration, change the number of invocations which might affect
119 		// the subgroup size.
120 		const VkSpecializationMapEntry			entries					=
121 		{
122 			0u,					// uint32_t constantID;
123 			0u,					// uint32_t offset;
124 			sizeof(localSize)	// size_t size;
125 		};
126 
127 		const VkSpecializationInfo				specInfo				=
128 		{
129 			1,					// mapEntryCount
130 			&entries,			// pMapEntries
131 			sizeof(localSize),	// dataSize
132 			&localSize			// pData
133 		};
134 
135 		const VkPipelineShaderStageCreateInfo	shaderStageCreateInfo	=
136 		{
137 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,					// sType
138 			DE_NULL,																// pNext
139 			VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT,	// flags
140 			VK_SHADER_STAGE_COMPUTE_BIT,											// stage
141 			*computeShader,															// module
142 			"main",																	// pName
143 			&specInfo,																// pSpecializationInfo
144 		};
145 
146 		const VkComputePipelineCreateInfo		pipelineCreateInfo		=
147 		{
148 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// sType
149 			DE_NULL,										// pNext
150 			0u,												// flags
151 			shaderStageCreateInfo,							// stage
152 			*computePipelineLayout,							// layout
153 			(VkPipeline) 0,									// basePipelineHandle
154 			0u,												// basePipelineIndex
155 		};
156 
157 		Move<VkPipeline>						computePipeline			= createComputePipeline(vk, device, (VkPipelineCache) 0u, &pipelineCreateInfo);
158 
159 		beginCommandBuffer(vk, *cmdBuffer);
160 
161 		// Clears the values in the buffer.
162 		vk.cmdFillBuffer(*cmdBuffer, *resultBuffer, 0u, VK_WHOLE_SIZE, 0);
163 
164 		const auto fillBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT, *resultBuffer, 0ull, bufferSize);
165 		cmdPipelineBufferMemoryBarrier(vk, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, &fillBarrier);
166 
167 		// Runs pipeline.
168 		vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, nullptr);
169 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
170 		vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
171 
172 		const auto computeToHostBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
173 		cmdPipelineMemoryBarrier(vk, *cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &computeToHostBarrier);
174 
175 		endCommandBuffer(vk, *cmdBuffer);
176 		submitCommandsAndWait(vk, device, queue, *cmdBuffer);
177 
178 		invalidateAlloc(vk, device, resultBufferAlloc);
179 
180 		// Validate results: all non-zero subgroup sizes must be the same.
181 		const uint32_t							*res					= static_cast<const uint32_t *>(resultBufferAlloc.getHostPtr());
182 		const uint32_t							maxIters				= static_cast<uint32_t>(bufferSize / sizeof(uint32_t));
183 		uint32_t								size					= 0u;
184 		uint32_t								subgroupCount			= 0u;
185 		auto&									log						= m_context.getTestContext().getLog();
186 
187 		for (uint32_t sizeIdx = 0u; sizeIdx < maxIters; ++sizeIdx)
188 		{
189 			if (res[sizeIdx] != 0u)
190 			{
191 				if (size == 0u)
192 				{
193 					size = res[sizeIdx];
194 				}
195 				else if (res[sizeIdx] != size)
196 				{
197 					std::ostringstream msg;
198 					msg << "Subgroup size not uniform in command scope: " << res[sizeIdx] << " != " << size << " at position " << sizeIdx;
199 					TCU_FAIL(msg.str());
200 				}
201 				++subgroupCount;
202 			}
203 		}
204 
205 		// Subgroup size is guaranteed to be at least 1.
206 		if (size == 0u)
207 			TCU_FAIL("Subgroup size must be at least 1");
208 
209 		// The number of reported sizes must match.
210 		const auto expectedSubgroupCount = (localSize / size + ((localSize % size != 0u) ? 1u : 0u));
211 		if (subgroupCount != expectedSubgroupCount)
212 		{
213 			std::ostringstream msg;
214 			msg << "Local size " << localSize << " with subgroup size " << size << " resulted in subgroup count " << subgroupCount << " != " << expectedSubgroupCount;
215 			TCU_FAIL(msg.str());
216 		}
217 
218 		{
219 			std::ostringstream msg;
220 			msg << "Subgroup size " << size << " with local size " << localSize;
221 			log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
222 		}
223 	}
224 
225 	return tcu::TestStatus::pass("Pass");
226 }
227 
228 class MultipleDispatchesUniformSubgroupSize : public TestCase
229 {
230 public:
231 						MultipleDispatchesUniformSubgroupSize (tcu::TestContext&	testCtx,
232 															   const std::string&	name,
233 															   const std::string&	description);
234 
235 	void				initPrograms						  (SourceCollections&	programCollection) const;
236 	TestInstance*		createInstance						  (Context&				context) const;
237 	virtual void		checkSupport						  (Context&				context) const;
238 
239 };
240 
MultipleDispatchesUniformSubgroupSize(tcu::TestContext & testCtx,const std::string & name,const std::string & description)241 MultipleDispatchesUniformSubgroupSize::MultipleDispatchesUniformSubgroupSize (tcu::TestContext&	testCtx,
242 																			  const std::string&	name,
243 																			  const std::string&	description)
244 	: TestCase	(testCtx, name, description)
245 {
246 }
247 
checkSupport(Context & context) const248 void MultipleDispatchesUniformSubgroupSize::checkSupport (Context& context) const
249 {
250 #ifndef CTS_USES_VULKANSC
251 	const VkPhysicalDeviceSubgroupSizeControlFeatures&		subgroupSizeControlFeatures = context.getSubgroupSizeControlFeatures();
252 #else
253 	const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT&	subgroupSizeControlFeatures = context.getSubgroupSizeControlFeaturesEXT();
254 #endif // CTS_USES_VULKANSC
255 
256 	if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
257 		TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes");
258 }
259 
initPrograms(SourceCollections & programCollection) const260 void MultipleDispatchesUniformSubgroupSize::initPrograms (SourceCollections& programCollection) const
261 {
262 	std::ostringstream computeSrc;
263 	computeSrc
264 		<< glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
265 		<< "#extension GL_KHR_shader_subgroup_basic : enable\n"
266 		<< "#extension GL_KHR_shader_subgroup_vote : enable\n"
267 		<< "#extension GL_KHR_shader_subgroup_ballot : enable\n"
268 		<< "layout(std430, binding = 0) buffer Outputs { uint sizes[]; };\n"
269 
270 		<< "layout(local_size_x_id = 0) in;\n"
271 
272 		<< "void main()\n"
273 		<< "{\n"
274 		<< "    if (subgroupElect())\n"
275 		<< "    {\n"
276 		<< "        sizes[gl_WorkGroupID.x * gl_NumSubgroups + gl_SubgroupID] = gl_SubgroupSize;\n"
277 		<< "    }\n"
278 		<< "}\n";
279 
280 	programCollection.glslSources.add("comp") << glu::ComputeSource(computeSrc.str())
281 	<< ShaderBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
282 }
283 
createInstance(Context & context) const284 TestInstance* MultipleDispatchesUniformSubgroupSize::createInstance (Context& context) const
285 {
286 	return new MultipleDispatchesUniformSubgroupSizeInstance(context);
287 }
288 
289 } // anonymous ns
290 
createMultipleDispatchesUniformSubgroupSizeTests(tcu::TestContext & testCtx)291 tcu::TestCaseGroup* createMultipleDispatchesUniformSubgroupSizeTests (tcu::TestContext& testCtx)
292 {
293 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "multiple_dispatches", "Multiple dispatches uniform subgroup size tests"));
294 
295 	testGroup->addChild(new MultipleDispatchesUniformSubgroupSize(testCtx, "uniform_subgroup_size", ""));
296 	return testGroup.release();
297 }
298 
299 } // compute
300 } // vkt
301