• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 Google LLC
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Test Case Skeleton Based on Compute Shaders
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSpvAsmComputeShaderCase.hpp"
25 
26 #include "deSharedPtr.hpp"
27 #include "deSTLUtil.hpp"
28 
29 #include "vktSpvAsmUtils.hpp"
30 
31 #include "vkBuilderUtil.hpp"
32 #include "vkMemUtil.hpp"
33 #include "vkPlatform.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkTypeUtil.hpp"
37 #include "vkCmdUtil.hpp"
38 #include "vkImageUtil.hpp"
39 
40 #include <cassert>
41 
42 namespace
43 {
44 
45 using namespace vk;
46 using std::vector;
47 
48 typedef vkt::SpirVAssembly::AllocationMp			AllocationMp;
49 typedef vkt::SpirVAssembly::AllocationSp			AllocationSp;
50 typedef vk::Unique<VkBuffer>						BufferHandleUp;
51 typedef vk::Unique<VkImage>							ImageHandleUp;
52 typedef vk::Unique<VkImageView>						ImageViewHandleUp;
53 typedef vk::Unique<VkSampler>						SamplerHandleUp;
54 typedef de::SharedPtr<BufferHandleUp>				BufferHandleSp;
55 typedef de::SharedPtr<ImageHandleUp>				ImageHandleSp;
56 typedef de::SharedPtr<ImageViewHandleUp>			ImageViewHandleSp;
57 typedef de::SharedPtr<SamplerHandleUp>				SamplerHandleSp;
58 
59 /*--------------------------------------------------------------------*//*!
60  * \brief Create a buffer, allocate and bind memory for the buffer
61  *
62  * The memory is created as host visible and passed back as a vk::Allocation
63  * instance via outMemory.
64  *//*--------------------------------------------------------------------*/
createBufferAndBindMemory(vkt::Context & context,const DeviceInterface & vkdi,const VkDevice & device,VkDescriptorType dtype,Allocator & allocator,size_t numBytes,AllocationMp * outMemory,bool physStorageBuffer,bool coherent=false)65 Move<VkBuffer> createBufferAndBindMemory (vkt::Context&				context,
66 										  const DeviceInterface&	vkdi,
67 										  const VkDevice&			device,
68 										  VkDescriptorType			dtype,
69 										  Allocator&				allocator,
70 										  size_t					numBytes,
71 										  AllocationMp*				outMemory,
72 										  bool						physStorageBuffer,
73 										  bool						coherent = false)
74 {
75 	VkBufferUsageFlags			usageFlags			= (VkBufferUsageFlags)0u;
76 
77 	if (physStorageBuffer)
78 		usageFlags |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
79 
80 	switch (dtype)
81 	{
82 		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:			usageFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;	break;
83 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:			usageFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;	break;
84 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:			usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;	break;
85 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:			usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;	break;
86 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:	usageFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;	break;
87 		default:										DE_FATAL("Not implemented");
88 	}
89 
90 	const VkBufferCreateInfo	bufferCreateInfo	=
91 	{
92 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// sType
93 		DE_NULL,								// pNext
94 		0u,										// flags
95 		numBytes,								// size
96 		usageFlags,								// usage
97 		VK_SHARING_MODE_EXCLUSIVE,				// sharingMode
98 		0u,										// queueFamilyCount
99 		DE_NULL,								// pQueueFamilyIndices
100 	};
101 
102 	Move<VkBuffer>				buffer			(createBuffer(vkdi, device, &bufferCreateInfo));
103 	const VkMemoryRequirements	requirements	= getBufferMemoryRequirements(vkdi, device, *buffer);
104 	AllocationMp				bufferMemory	= allocator.allocate(requirements,
105 													(coherent ? MemoryRequirement::Coherent : MemoryRequirement::Any) |
106 													(context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") && physStorageBuffer ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any) |
107 													MemoryRequirement::HostVisible);
108 
109 	VK_CHECK(vkdi.bindBufferMemory(device, *buffer, bufferMemory->getMemory(), bufferMemory->getOffset()));
110 	*outMemory = bufferMemory;
111 
112 	return buffer;
113 }
114 
115 /*--------------------------------------------------------------------*//*!
116  * \brief Create image, allocate and bind memory for the image
117  *
118  *//*--------------------------------------------------------------------*/
createImageAndBindMemory(const DeviceInterface & vkdi,const VkDevice & device,VkDescriptorType dtype,Allocator & allocator,deUint32 queueFamilyIndex,AllocationMp * outMemory)119 Move<VkImage> createImageAndBindMemory (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorType dtype, Allocator& allocator, deUint32 queueFamilyIndex, AllocationMp* outMemory)
120 {
121 	VkImageUsageFlags			usageBits			= (VkImageUsageFlags)0;
122 
123 	switch (dtype)
124 	{
125 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:			usageBits = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;	break;
126 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:			usageBits = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;	break;
127 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:	usageBits = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;	break;
128 		default:										DE_FATAL("Not implemented");
129 	}
130 
131 	const VkImageCreateInfo		resourceImageParams	=
132 	{
133 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,									//	VkStructureType		sType;
134 		DE_NULL,																//	const void*			pNext;
135 		0u,																		//	VkImageCreateFlags	flags;
136 		VK_IMAGE_TYPE_2D,														//	VkImageType			imageType;
137 		VK_FORMAT_R32G32B32A32_SFLOAT,											//	VkFormat			format;
138 		{ 8, 8, 1 },															//  VkExtent3D			extent;
139 		1u,																		//	deUint32			mipLevels;
140 		1u,																		//	deUint32			arraySize;
141 		VK_SAMPLE_COUNT_1_BIT,													//	deUint32			samples;
142 		VK_IMAGE_TILING_OPTIMAL,												//	VkImageTiling		tiling;
143 		usageBits,																//  VkImageUsageFlags	usage;
144 		VK_SHARING_MODE_EXCLUSIVE,												//	VkSharingMode		sharingMode;
145 		1u,																		//	deUint32			queueFamilyCount;
146 		&queueFamilyIndex,														//	const deUint32*		pQueueFamilyIndices;
147 		VK_IMAGE_LAYOUT_UNDEFINED,												//	VkImageLayout		initialLayout;
148 	};
149 
150 	// Create image
151 	Move<VkImage>				image				= createImage(vkdi, device, &resourceImageParams);
152 	const VkMemoryRequirements	requirements		= getImageMemoryRequirements(vkdi, device, *image);
153 	de::MovePtr<Allocation>		imageMemory			= allocator.allocate(requirements, MemoryRequirement::Any);
154 
155 	VK_CHECK(vkdi.bindImageMemory(device, *image, imageMemory->getMemory(), imageMemory->getOffset()));
156 	*outMemory = imageMemory;
157 
158 	return image;
159 }
160 
setMemory(const DeviceInterface & vkdi,const VkDevice & device,Allocation * destAlloc,size_t numBytes,const void * data,bool coherent=false)161 void setMemory (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, const void* data, bool coherent = false)
162 {
163 	void* const hostPtr = destAlloc->getHostPtr();
164 
165 	deMemcpy((deUint8*)hostPtr, data, numBytes);
166 
167 	if (!coherent)
168 		flushAlloc(vkdi, device, *destAlloc);
169 }
170 
fillMemoryWithValue(const DeviceInterface & vkdi,const VkDevice & device,Allocation * destAlloc,size_t numBytes,deUint8 value,bool coherent=false)171 void fillMemoryWithValue (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, deUint8 value, bool coherent = false)
172 {
173 	void* const hostPtr = destAlloc->getHostPtr();
174 
175 	deMemset((deUint8*)hostPtr, value, numBytes);
176 
177 	if (!coherent)
178 		flushAlloc(vkdi, device, *destAlloc);
179 }
180 
invalidateMemory(const DeviceInterface & vkdi,const VkDevice & device,Allocation * srcAlloc,bool coherent=false)181 void invalidateMemory (const DeviceInterface& vkdi, const VkDevice& device, Allocation* srcAlloc, bool coherent = false)
182 {
183 	if (!coherent)
184 		invalidateAlloc(vkdi, device, *srcAlloc);
185 }
186 
187 /*--------------------------------------------------------------------*//*!
188  * \brief Create a descriptor set layout with the given descriptor types
189  *
190  * All descriptors are created for compute pipeline.
191  *//*--------------------------------------------------------------------*/
createDescriptorSetLayout(const DeviceInterface & vkdi,const VkDevice & device,const vector<VkDescriptorType> & dtypes)192 Move<VkDescriptorSetLayout> createDescriptorSetLayout (const DeviceInterface& vkdi, const VkDevice& device, const vector<VkDescriptorType>& dtypes)
193 {
194 	DescriptorSetLayoutBuilder builder;
195 
196 	for (size_t bindingNdx = 0; bindingNdx < dtypes.size(); ++bindingNdx)
197 		builder.addSingleBinding(dtypes[bindingNdx], VK_SHADER_STAGE_COMPUTE_BIT);
198 
199 	return builder.build(vkdi, device);
200 }
201 
202 /*--------------------------------------------------------------------*//*!
203  * \brief Create a pipeline layout with one descriptor set
204  *//*--------------------------------------------------------------------*/
createPipelineLayout(const DeviceInterface & vkdi,const VkDevice & device,VkDescriptorSetLayout descriptorSetLayout,const vkt::SpirVAssembly::BufferSp & pushConstants)205 Move<VkPipelineLayout> createPipelineLayout (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorSetLayout descriptorSetLayout, const vkt::SpirVAssembly::BufferSp& pushConstants)
206 {
207 	VkPipelineLayoutCreateInfo		createInfo	=
208 	{
209 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,	// sType
210 		DE_NULL,										// pNext
211 		(VkPipelineLayoutCreateFlags)0,
212 		1u,												// descriptorSetCount
213 		&descriptorSetLayout,							// pSetLayouts
214 		0u,												// pushConstantRangeCount
215 		DE_NULL,										// pPushConstantRanges
216 	};
217 
218 	VkPushConstantRange				range		=
219 	{
220 		VK_SHADER_STAGE_COMPUTE_BIT,					// stageFlags
221 		0,												// offset
222 		0,												// size
223 	};
224 
225 	if (pushConstants != DE_NULL)
226 	{
227 		vector<deUint8> pushConstantsBytes;
228 		pushConstants->getBytes(pushConstantsBytes);
229 
230 		range.size							= static_cast<deUint32>(pushConstantsBytes.size());
231 		createInfo.pushConstantRangeCount	= 1;
232 		createInfo.pPushConstantRanges		= &range;
233 	}
234 
235 	return createPipelineLayout(vkdi, device, &createInfo);
236 }
237 
238 /*--------------------------------------------------------------------*//*!
239  * \brief Create a one-time descriptor pool for one descriptor set that
240  * support the given descriptor types.
241  *//*--------------------------------------------------------------------*/
createDescriptorPool(const DeviceInterface & vkdi,const VkDevice & device,const vector<VkDescriptorType> & dtypes)242 inline Move<VkDescriptorPool> createDescriptorPool (const DeviceInterface& vkdi, const VkDevice& device, const vector<VkDescriptorType>& dtypes)
243 {
244 	DescriptorPoolBuilder builder;
245 
246 	for (size_t typeNdx = 0; typeNdx < dtypes.size(); ++typeNdx)
247 		builder.addType(dtypes[typeNdx], 1);
248 
249 	return builder.build(vkdi, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, /* maxSets = */ 1);
250 }
251 
252 /*--------------------------------------------------------------------*//*!
253  * \brief Create a descriptor set
254  *
255  * The descriptor set's layout contains the given descriptor types,
256  * sequentially binded to binding points starting from 0.
257  *//*--------------------------------------------------------------------*/
createDescriptorSet(const DeviceInterface & vkdi,const VkDevice & device,VkDescriptorPool pool,VkDescriptorSetLayout layout,const vector<VkDescriptorType> & dtypes,const vector<VkDescriptorBufferInfo> & descriptorInfos,const vector<VkDescriptorImageInfo> & descriptorImageInfos)258 Move<VkDescriptorSet> createDescriptorSet (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorPool pool, VkDescriptorSetLayout layout, const vector<VkDescriptorType>& dtypes, const vector<VkDescriptorBufferInfo>& descriptorInfos, const vector<VkDescriptorImageInfo>& descriptorImageInfos)
259 {
260 	DE_ASSERT(dtypes.size() == descriptorInfos.size() + descriptorImageInfos.size());
261 
262 	const VkDescriptorSetAllocateInfo	allocInfo	=
263 	{
264 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
265 		DE_NULL,
266 		pool,
267 		1u,
268 		&layout
269 	};
270 
271 	Move<VkDescriptorSet>				descriptorSet	= allocateDescriptorSet(vkdi, device, &allocInfo);
272 	DescriptorSetUpdateBuilder			builder;
273 
274 	deUint32							bufferNdx		= 0u;
275 	deUint32							imageNdx		= 0u;
276 
277 	for (deUint32 descriptorNdx = 0; descriptorNdx < dtypes.size(); ++descriptorNdx)
278 	{
279 		switch (dtypes[descriptorNdx])
280 		{
281 			// Write buffer descriptor
282 			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
283 			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
284 				builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(descriptorNdx), dtypes[descriptorNdx], &descriptorInfos[bufferNdx++]);
285 				break;
286 
287 			// Write image/sampler descriptor
288 			case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
289 			case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
290 			case VK_DESCRIPTOR_TYPE_SAMPLER:
291 			case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
292 				builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(descriptorNdx), dtypes[descriptorNdx], &descriptorImageInfos[imageNdx++]);
293 				break;
294 
295 			default:
296 				DE_FATAL("Not implemented");
297 		}
298 	}
299 	builder.update(vkdi, device);
300 
301 	return descriptorSet;
302 }
303 
304 /*--------------------------------------------------------------------*//*!
305  * \brief Create a compute pipeline based on the given shader
306  *//*--------------------------------------------------------------------*/
createComputePipeline(const DeviceInterface & vkdi,const VkDevice & device,VkPipelineLayout pipelineLayout,VkShaderModule shader,const char * entryPoint,const vkt::SpirVAssembly::SpecConstants & specConstants)307 Move<VkPipeline> createComputePipeline (const DeviceInterface& vkdi, const VkDevice& device, VkPipelineLayout pipelineLayout, VkShaderModule shader, const char* entryPoint, const vkt::SpirVAssembly::SpecConstants& specConstants)
308 {
309 	const deUint32							numSpecConstants				= (deUint32)specConstants.getValuesCount();
310 	vector<VkSpecializationMapEntry>		entries;
311 	VkSpecializationInfo					specInfo;
312 	size_t									offset							= 0;
313 
314 	if (numSpecConstants != 0)
315 	{
316 		entries.resize(numSpecConstants);
317 
318 		for (deUint32 ndx = 0; ndx < numSpecConstants; ++ndx)
319 		{
320 			const size_t valueSize	= specConstants.getValueSize(ndx);
321 
322 			entries[ndx].constantID	= ndx;
323 			entries[ndx].offset		= static_cast<deUint32>(offset);
324 			entries[ndx].size		= valueSize;
325 
326 			offset					+= valueSize;
327 		}
328 
329 		specInfo.mapEntryCount		= numSpecConstants;
330 		specInfo.pMapEntries		= &entries[0];
331 		specInfo.dataSize			= offset;
332 		specInfo.pData				= specConstants.getValuesBuffer();
333 	}
334 
335 	const VkPipelineShaderStageCreateInfo	pipelineShaderStageCreateInfo	=
336 	{
337 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// sType
338 		DE_NULL,												// pNext
339 		(VkPipelineShaderStageCreateFlags)0,					// flags
340 		VK_SHADER_STAGE_COMPUTE_BIT,							// stage
341 		shader,													// module
342 		entryPoint,												// pName
343 		(numSpecConstants == 0) ? DE_NULL : &specInfo,			// pSpecializationInfo
344 	};
345 	const VkComputePipelineCreateInfo		pipelineCreateInfo				=
346 	{
347 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,			// sType
348 		DE_NULL,												// pNext
349 		(VkPipelineCreateFlags)0,
350 		pipelineShaderStageCreateInfo,							// cs
351 		pipelineLayout,											// layout
352 		(VkPipeline)0,											// basePipelineHandle
353 		0u,														// basePipelineIndex
354 	};
355 
356 	return createComputePipeline(vkdi, device, (VkPipelineCache)0u, &pipelineCreateInfo);
357 }
358 
359 } // anonymous
360 
361 namespace vkt
362 {
363 namespace SpirVAssembly
364 {
365 
366 // ComputeShaderTestCase implementations
367 
SpvAsmComputeShaderCase(tcu::TestContext & testCtx,const char * name,const char * description,const ComputeShaderSpec & spec)368 SpvAsmComputeShaderCase::SpvAsmComputeShaderCase (tcu::TestContext& testCtx, const char* name, const char* description, const ComputeShaderSpec& spec)
369 	: TestCase		(testCtx, name, description)
370 	, m_shaderSpec	(spec)
371 {
372 }
373 
checkSupport(Context & context) const374 void SpvAsmComputeShaderCase::checkSupport(Context& context) const
375 {
376 	if (getMinRequiredVulkanVersion(m_shaderSpec.spirvVersion) > context.getUsedApiVersion())
377 	{
378 		TCU_THROW(NotSupportedError, std::string("Vulkan higher than or equal to " + getVulkanName(getMinRequiredVulkanVersion(m_shaderSpec.spirvVersion)) + " is required for this test to run").c_str());
379 	}
380 
381 	// Check all required extensions are supported
382 	for (const auto& ext : m_shaderSpec.extensions)
383 		context.requireDeviceFunctionality(ext);
384 
385 	// Core features
386 	// Check that we're not skipping tests needlessly based on things that don't affect compute.
387 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.fullDrawIndexUint32						== DE_FALSE);
388 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.independentBlend						== DE_FALSE);
389 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.geometryShader							== DE_FALSE);
390 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.tessellationShader						== DE_FALSE);
391 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.sampleRateShading						== DE_FALSE);
392 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.dualSrcBlend							== DE_FALSE);
393 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.logicOp									== DE_FALSE);
394 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.multiDrawIndirect						== DE_FALSE);
395 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.drawIndirectFirstInstance				== DE_FALSE);
396 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.depthClamp								== DE_FALSE);
397 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.depthBiasClamp							== DE_FALSE);
398 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.fillModeNonSolid						== DE_FALSE);
399 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.depthBounds								== DE_FALSE);
400 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.wideLines								== DE_FALSE);
401 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.largePoints								== DE_FALSE);
402 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.alphaToOne								== DE_FALSE);
403 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.multiViewport							== DE_FALSE);
404 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.occlusionQueryPrecise					== DE_FALSE);
405 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.vertexPipelineStoresAndAtomics			== DE_FALSE);
406 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.fragmentStoresAndAtomics				== DE_FALSE);
407 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.shaderTessellationAndGeometryPointSize	== DE_FALSE);
408 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.shaderClipDistance						== DE_FALSE);
409 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.shaderCullDistance						== DE_FALSE);
410 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.sparseBinding							== DE_FALSE);
411 	assert(m_shaderSpec.requestedVulkanFeatures.coreFeatures.variableMultisampleRate					== DE_FALSE);
412 
413 	const char* unsupportedFeature = DE_NULL;
414 	if (!isVulkanFeaturesSupported(context, m_shaderSpec.requestedVulkanFeatures, &unsupportedFeature))
415 		TCU_THROW(NotSupportedError, std::string("At least following requested feature is not supported: ") + unsupportedFeature);
416 
417 	// Extension features
418 	if (m_shaderSpec.usesPhysStorageBuffer && !context.isBufferDeviceAddressSupported())
419 		TCU_THROW(NotSupportedError, "Request physical storage buffer feature not supported");
420 }
421 
initPrograms(SourceCollections & programCollection) const422 void SpvAsmComputeShaderCase::initPrograms (SourceCollections& programCollection) const
423 {
424 	const auto&	extensions			= m_shaderSpec.extensions;
425 	const bool	allowSpirv14		= (std::find(extensions.begin(), extensions.end(), "VK_KHR_spirv_1_4") != extensions.end());
426 	const bool	allowMaintenance4	= (std::find(extensions.begin(), extensions.end(), "VK_KHR_maintenance4") != extensions.end());
427 
428 	programCollection.spirvAsmSources.add("compute")
429 		<< m_shaderSpec.assembly.c_str()
430 		<< SpirVAsmBuildOptions(programCollection.usedVulkanVersion, m_shaderSpec.spirvVersion, allowSpirv14, allowMaintenance4);
431 }
432 
createInstance(Context & ctx) const433 TestInstance* SpvAsmComputeShaderCase::createInstance (Context& ctx) const
434 {
435 	return new SpvAsmComputeShaderInstance(ctx, m_shaderSpec);
436 }
437 
438 // ComputeShaderTestInstance implementations
439 
SpvAsmComputeShaderInstance(Context & ctx,const ComputeShaderSpec & spec)440 SpvAsmComputeShaderInstance::SpvAsmComputeShaderInstance (Context& ctx, const ComputeShaderSpec& spec)
441 	: TestInstance		(ctx)
442 	, m_shaderSpec		(spec)
443 {
444 }
445 
getMatchingComputeImageUsageFlags(VkDescriptorType dType)446 VkImageUsageFlags getMatchingComputeImageUsageFlags (VkDescriptorType dType)
447 {
448 	switch (dType)
449 	{
450 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:			return VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
451 		case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:			return VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
452 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:	return VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
453 		default:										DE_FATAL("Not implemented");
454 	}
455 	return (VkImageUsageFlags)0;
456 }
457 
iterate(void)458 tcu::TestStatus SpvAsmComputeShaderInstance::iterate (void)
459 {
460 	const deUint32						queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
461 	const VkDevice&						device				= m_context.getDevice();
462 	const DeviceInterface&				vkdi				= m_context.getDeviceInterface();
463 	Allocator&							allocator			= m_context.getDefaultAllocator();
464 	const VkQueue						queue				= m_context.getUniversalQueue();
465 
466 	vector<AllocationSp>				inputAllocs;
467 	vector<AllocationSp>				outputAllocs;
468 	vector<BufferHandleSp>				inputBuffers;
469 	vector<ImageHandleSp>				inputImages;
470 	vector<ImageViewHandleSp>			inputImageViews;
471 	vector<SamplerHandleSp>				inputSamplers;
472 	vector<BufferHandleSp>				outputBuffers;
473 	vector<VkDescriptorBufferInfo>		descriptorInfos;
474 	vector<VkDescriptorImageInfo>		descriptorImageInfos;
475 	vector<VkDescriptorType>			descriptorTypes;
476 
477 	DE_ASSERT(!m_shaderSpec.outputs.empty());
478 
479 	// Create command pool and command buffer
480 
481 	const Unique<VkCommandPool>			cmdPool				(createCommandPool(vkdi, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
482 	Unique<VkCommandBuffer>				cmdBuffer			(allocateCommandBuffer(vkdi, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
483 
484 	// Create buffer and image objects, allocate storage, and create view for all input/output buffers and images.
485 
486 	for (deUint32 inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
487 	{
488 		const VkDescriptorType	descType	= m_shaderSpec.inputs[inputNdx].getDescriptorType();
489 
490 		const bool				hasImage	= (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)	||
491 											  (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE)	||
492 											  (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
493 
494 		const bool				hasSampler	= (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE)	||
495 											  (descType == VK_DESCRIPTOR_TYPE_SAMPLER)			||
496 											  (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
497 
498 		descriptorTypes.push_back(descType);
499 
500 		// Buffer
501 		if (!hasImage && !hasSampler)
502 		{
503 			const BufferSp&		input			= m_shaderSpec.inputs[inputNdx].getBuffer();
504 			vector<deUint8>		inputBytes;
505 
506 			input->getBytes(inputBytes);
507 
508 			const size_t		numBytes		= inputBytes.size();
509 
510 			AllocationMp		bufferAlloc;
511 			BufferHandleUp*		buffer			= new BufferHandleUp(createBufferAndBindMemory(m_context, vkdi, device, descType, allocator, numBytes, &bufferAlloc, m_shaderSpec.usesPhysStorageBuffer, m_shaderSpec.coherentMemory));
512 
513 			setMemory(vkdi, device, &*bufferAlloc, numBytes, &inputBytes.front(), m_shaderSpec.coherentMemory);
514 			inputBuffers.push_back(BufferHandleSp(buffer));
515 			inputAllocs.push_back(de::SharedPtr<Allocation>(bufferAlloc.release()));
516 		}
517 		// Image
518 		else if (hasImage)
519 		{
520 			const BufferSp&				input			= m_shaderSpec.inputs[inputNdx].getBuffer();
521 			vector<deUint8>				inputBytes;
522 
523 			input->getBytes(inputBytes);
524 
525 			const size_t				numBytes		= inputBytes.size();
526 
527 			AllocationMp				bufferAlloc;
528 			BufferHandleUp*				buffer			= new BufferHandleUp(createBufferAndBindMemory(m_context, vkdi, device, descType, allocator, numBytes, &bufferAlloc, m_shaderSpec.usesPhysStorageBuffer));
529 
530 			AllocationMp				imageAlloc;
531 			ImageHandleUp*				image			= new ImageHandleUp(createImageAndBindMemory(vkdi, device, descType, allocator, queueFamilyIndex, &imageAlloc));
532 
533 			setMemory(vkdi, device, &*bufferAlloc, numBytes, &inputBytes.front());
534 
535 			inputBuffers.push_back(BufferHandleSp(buffer));
536 			inputAllocs.push_back(de::SharedPtr<Allocation>(bufferAlloc.release()));
537 
538 			inputImages.push_back(ImageHandleSp(image));
539 			inputAllocs.push_back(de::SharedPtr<Allocation>(imageAlloc.release()));
540 
541 			const VkImageLayout			imageLayout		= (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
542 			const VkBufferImageCopy		copyRegion		=
543 			{
544 				0u,												// VkDeviceSize				bufferOffset;
545 				0u,												// deUint32					bufferRowLength;
546 				0u,												// deUint32					bufferImageHeight;
547 				{
548 					VK_IMAGE_ASPECT_COLOR_BIT,						// VkImageAspectFlags		aspect;
549 					0u,												// deUint32					mipLevel;
550 					0u,												// deUint32					baseArrayLayer;
551 					1u,												// deUint32					layerCount;
552 				},												// VkImageSubresourceLayers	imageSubresource;
553 				{ 0, 0, 0 },									// VkOffset3D				imageOffset;
554 				{ 8, 8, 1 }										// VkExtent3D				imageExtent;
555 			};
556 			vector<VkBufferImageCopy>	copyRegions;
557 			copyRegions.push_back(copyRegion);
558 
559 			copyBufferToImage(vkdi, device, queue, queueFamilyIndex, buffer->get(), (deUint32)numBytes, copyRegions, DE_NULL, VK_IMAGE_ASPECT_COLOR_BIT, 1u, 1u, image->get(), imageLayout);
560 		}
561 	}
562 
563 	deUint32							imageNdx			= 0u;
564 	deUint32							bufferNdx			= 0u;
565 
566 	for (deUint32 inputNdx = 0; inputNdx < descriptorTypes.size(); ++inputNdx)
567 	{
568 		const VkDescriptorType	descType	= descriptorTypes[inputNdx];
569 
570 		const bool				hasImage	= (descType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)	||
571 											  (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE)	||
572 											  (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
573 
574 		const bool				hasSampler	= (descType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE)	||
575 											  (descType == VK_DESCRIPTOR_TYPE_SAMPLER)			||
576 											  (descType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
577 
578 		// Create image view and sampler
579 		if (hasImage || hasSampler)
580 		{
581 			if (descType != VK_DESCRIPTOR_TYPE_SAMPLER)
582 			{
583 				const VkImageViewCreateInfo	imgViewParams	=
584 				{
585 					VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,	//	VkStructureType			sType;
586 					DE_NULL,									//	const void*				pNext;
587 					0u,											//	VkImageViewCreateFlags	flags;
588 					**inputImages[imageNdx++],					//	VkImage					image;
589 					VK_IMAGE_VIEW_TYPE_2D,						//	VkImageViewType			viewType;
590 					VK_FORMAT_R32G32B32A32_SFLOAT,				//	VkFormat				format;
591 					{
592 						VK_COMPONENT_SWIZZLE_R,
593 						VK_COMPONENT_SWIZZLE_G,
594 						VK_COMPONENT_SWIZZLE_B,
595 						VK_COMPONENT_SWIZZLE_A
596 					},											//	VkChannelMapping		channels;
597 					{
598 						VK_IMAGE_ASPECT_COLOR_BIT,					//	VkImageAspectFlags		aspectMask;
599 						0u,											//	deUint32				baseMipLevel;
600 						1u,											//	deUint32				mipLevels;
601 						0u,											//	deUint32				baseArrayLayer;
602 						1u,											//	deUint32				arraySize;
603 					},											//	VkImageSubresourceRange	subresourceRange;
604 				};
605 
606 				Move<VkImageView>			imgView			(createImageView(vkdi, device, &imgViewParams));
607 				inputImageViews.push_back(ImageViewHandleSp(new ImageViewHandleUp(imgView)));
608 			}
609 
610 			if (hasSampler)
611 			{
612 				const VkSamplerCreateInfo	samplerParams	=
613 				{
614 					VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,		// VkStructureType			sType;
615 					DE_NULL,									// const void*				pNext;
616 					0,											// VkSamplerCreateFlags		flags;
617 					VK_FILTER_NEAREST,							// VkFilter					magFilter:
618 					VK_FILTER_NEAREST,							// VkFilter					minFilter;
619 					VK_SAMPLER_MIPMAP_MODE_NEAREST,				// VkSamplerMipmapMode		mipmapMode;
620 					VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		// VkSamplerAddressMode		addressModeU;
621 					VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		// VkSamplerAddressMode		addressModeV;
622 					VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,		// VkSamplerAddressMode		addressModeW;
623 					0.0f,										// float					mipLodBias;
624 					VK_FALSE,									// VkBool32					anistoropyEnable;
625 					1.0f,										// float					maxAnisotropy;
626 					VK_FALSE,									// VkBool32					compareEnable;
627 					VK_COMPARE_OP_ALWAYS,						// VkCompareOp				compareOp;
628 					0.0f,										// float					minLod;
629 					0.0f,										// float					maxLod;
630 					VK_BORDER_COLOR_INT_OPAQUE_BLACK,			// VkBorderColor			borderColor;
631 					VK_FALSE									// VkBool32					unnormalizedCoordinates;
632 				};
633 
634 				Move<VkSampler>				sampler			(createSampler(vkdi, device, &samplerParams));
635 				inputSamplers.push_back(SamplerHandleSp(new SamplerHandleUp(sampler)));
636 			}
637 		}
638 
639 		// Create descriptor buffer and image infos
640 		switch (descType)
641 		{
642 			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
643 			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
644 			{
645 				const VkDescriptorBufferInfo bufInfo =
646 				{
647 					**inputBuffers[bufferNdx++],				// VkBuffer					buffer;
648 					0,											// VkDeviceSize				offset;
649 					VK_WHOLE_SIZE,								// VkDeviceSize				size;
650 				};
651 
652 				descriptorInfos.push_back(bufInfo);
653 				break;
654 			}
655 
656 			case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
657 			{
658 				const VkDescriptorImageInfo	imgInfo	=
659 				{
660 					DE_NULL,									// VkSampler				sampler;
661 					**inputImageViews.back(),					// VkImageView				imageView;
662 					VK_IMAGE_LAYOUT_GENERAL						// VkImageLayout			imageLayout;
663 				};
664 
665 				descriptorImageInfos.push_back(imgInfo);
666 				break;
667 			}
668 
669 			case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
670 			{
671 				const VkDescriptorImageInfo	imgInfo	=
672 				{
673 					DE_NULL,									// VkSampler				sampler;
674 					**inputImageViews.back(),					// VkImageView				imageView;
675 					VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL	// VkImageLayout			imageLayout;
676 				};
677 
678 				descriptorImageInfos.push_back(imgInfo);
679 				break;
680 			}
681 
682 			case VK_DESCRIPTOR_TYPE_SAMPLER:
683 			{
684 				const VkDescriptorImageInfo	imgInfo	=
685 				{
686 					**inputSamplers.back(),						// VkSampler				sampler;
687 					DE_NULL,									// VkImageView				imageView;
688 					VK_IMAGE_LAYOUT_GENERAL						// VkImageLayout			imageLayout;
689 				};
690 
691 				descriptorImageInfos.push_back(imgInfo);
692 				break;
693 			}
694 
695 			case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
696 			{
697 				const VkDescriptorImageInfo	imgInfo	=
698 				{
699 					**inputSamplers.back(),						// VkSampler				sampler;
700 					**inputImageViews.back(),					// VkImageView				imageView;
701 					VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL	// VkImageLayout			imageLayout;
702 				};
703 
704 				descriptorImageInfos.push_back(imgInfo);
705 				break;
706 			}
707 
708 			default:
709 				DE_FATAL("Not implemented");
710 		}
711 	}
712 
713 	for (deUint32 outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
714 	{
715 		DE_ASSERT(m_shaderSpec.outputs[outputNdx].getDescriptorType() == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
716 
717 		descriptorTypes.push_back(m_shaderSpec.outputs[outputNdx].getDescriptorType());
718 
719 		AllocationMp		alloc;
720 		const BufferSp&		output		= m_shaderSpec.outputs[outputNdx].getBuffer();
721 		vector<deUint8>		outputBytes;
722 
723 		output->getBytes(outputBytes);
724 
725 		const size_t		numBytes	= outputBytes.size();
726 		BufferHandleUp*		buffer		= new BufferHandleUp(createBufferAndBindMemory(m_context, vkdi, device, descriptorTypes.back(), allocator, numBytes, &alloc, m_shaderSpec.usesPhysStorageBuffer, m_shaderSpec.coherentMemory));
727 
728 		fillMemoryWithValue(vkdi, device, &*alloc, numBytes, 0xff, m_shaderSpec.coherentMemory);
729 		descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes));
730 		outputBuffers.push_back(BufferHandleSp(buffer));
731 		outputAllocs.push_back(de::SharedPtr<Allocation>(alloc.release()));
732 	}
733 
734 	std::vector<VkDeviceAddress> gpuAddrs;
735 	// Query the buffer device addresses, write them into a new buffer, and replace
736 	// all the descriptors with just a desciptor to this new buffer.
737 	if (m_shaderSpec.usesPhysStorageBuffer)
738 	{
739 		VkBufferDeviceAddressInfo info
740 		{
741 			VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,		// VkStructureType	sType;
742 			DE_NULL,											// const void*		pNext;
743 			0,													// VkBuffer			buffer
744 		};
745 
746 		for (deUint32 inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
747 		{
748 			info.buffer = **inputBuffers[inputNdx];
749 			VkDeviceAddress addr = vkdi.getBufferDeviceAddress(device, &info);
750 
751 			gpuAddrs.push_back(addr);
752 		}
753 		for (deUint32 outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
754 		{
755 			info.buffer = **outputBuffers[outputNdx];
756 			VkDeviceAddress addr = vkdi.getBufferDeviceAddress(device, &info);
757 
758 			gpuAddrs.push_back(addr);
759 		}
760 
761 		descriptorInfos.clear();
762 		descriptorTypes.clear();
763 		descriptorTypes.push_back(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
764 		const size_t		numBytes		= gpuAddrs.size() * sizeof(VkDeviceAddress);
765 
766 		AllocationMp		bufferAlloc;
767 		BufferHandleUp*		buffer			= new BufferHandleUp(createBufferAndBindMemory(m_context, vkdi, device, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
768 																						   allocator, numBytes, &bufferAlloc, false, m_shaderSpec.coherentMemory));
769 
770 		setMemory(vkdi, device, &*bufferAlloc, numBytes, &gpuAddrs.front(), m_shaderSpec.coherentMemory);
771 		inputBuffers.push_back(BufferHandleSp(buffer));
772 		inputAllocs.push_back(de::SharedPtr<Allocation>(bufferAlloc.release()));
773 
774 		descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes));
775 	}
776 
777 	// Create layouts and descriptor set.
778 
779 	Unique<VkDescriptorSetLayout>		descriptorSetLayout	(createDescriptorSetLayout(vkdi, device, descriptorTypes));
780 	Unique<VkPipelineLayout>			pipelineLayout		(createPipelineLayout(vkdi, device, *descriptorSetLayout, m_shaderSpec.pushConstants));
781 	Unique<VkDescriptorPool>			descriptorPool		(createDescriptorPool(vkdi, device, descriptorTypes));
782 	Unique<VkDescriptorSet>				descriptorSet		(createDescriptorSet(vkdi, device, *descriptorPool, *descriptorSetLayout, descriptorTypes, descriptorInfos, descriptorImageInfos));
783 
784 	// Create compute shader and pipeline.
785 
786 	const ProgramBinary&				binary				= m_context.getBinaryCollection().get("compute");
787 	if (m_shaderSpec.verifyBinary && !m_shaderSpec.verifyBinary(binary))
788 	{
789 		return tcu::TestStatus::fail("Binary verification of SPIR-V in the test failed");
790 	}
791 	Unique<VkShaderModule>				module				(createShaderModule(vkdi, device, binary, (VkShaderModuleCreateFlags)0u));
792 
793 	Unique<VkPipeline>					computePipeline		(createComputePipeline(vkdi, device, *pipelineLayout, *module, m_shaderSpec.entryPoint.c_str(), m_shaderSpec.specConstants));
794 
795 	// Create command buffer and record commands
796 
797 	const tcu::IVec3&				numWorkGroups		= m_shaderSpec.numWorkGroups;
798 
799 	beginCommandBuffer(vkdi, *cmdBuffer);
800 	vkdi.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
801 	vkdi.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
802 	if (m_shaderSpec.pushConstants != DE_NULL)
803 	{
804 		vector<deUint8>	pushConstantsBytes;
805 		m_shaderSpec.pushConstants->getBytes(pushConstantsBytes);
806 
807 		const deUint32	size	= static_cast<deUint32>(pushConstantsBytes.size());
808 		const void*		data	= &pushConstantsBytes.front();
809 
810 		vkdi.cmdPushConstants(*cmdBuffer, *pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, /* offset = */ 0, /* size = */ size, data);
811 	}
812 	vkdi.cmdDispatch(*cmdBuffer, numWorkGroups.x(), numWorkGroups.y(), numWorkGroups.z());
813 
814 	// Insert a barrier so data written by the shader is available to the host
815 	for (deUint32 outputBufferNdx = 0; outputBufferNdx < outputBuffers.size(); ++outputBufferNdx)
816 	{
817 		const VkBufferMemoryBarrier buf_barrier =
818 		{
819 			VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	//    VkStructureType    sType;
820 			DE_NULL,									//    const void*        pNext;
821 			VK_ACCESS_SHADER_WRITE_BIT,					//    VkAccessFlags      srcAccessMask;
822 			VK_ACCESS_HOST_READ_BIT,					//    VkAccessFlags      dstAccessMask;
823 			VK_QUEUE_FAMILY_IGNORED,					//    uint32_t           srcQueueFamilyIndex;
824 			VK_QUEUE_FAMILY_IGNORED,					//    uint32_t           dstQueueFamilyIndex;
825 			**outputBuffers[outputBufferNdx],			//    VkBuffer           buffer;
826 			0,											//    VkDeviceSize       offset;
827 			VK_WHOLE_SIZE								//    VkDeviceSize       size;
828 		};
829 
830 		vkdi.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, DE_NULL, 1, &buf_barrier, 0, DE_NULL);
831 	}
832 	endCommandBuffer(vkdi, *cmdBuffer);
833 
834 	submitCommandsAndWait(vkdi, device, queue, *cmdBuffer);
835 	m_context.resetCommandPoolForVKSC(device, *cmdPool);
836 
837 	// Invalidate output memory ranges before checking on host.
838 	for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
839 	{
840 		invalidateMemory(vkdi, device, outputAllocs[outputNdx].get(), m_shaderSpec.coherentMemory);
841 	}
842 
843 	// Check output.
844 	if (m_shaderSpec.verifyIO)
845 	{
846 		if (!(*m_shaderSpec.verifyIO)(m_shaderSpec.inputs, outputAllocs, m_shaderSpec.outputs, m_context.getTestContext().getLog()))
847 			return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage);
848 	}
849 	else
850 	{
851 		for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
852 		{
853 			const BufferSp&	expectedOutput = m_shaderSpec.outputs[outputNdx].getBuffer();
854 			vector<deUint8>	expectedBytes;
855 
856 			expectedOutput->getBytes(expectedBytes);
857 
858 			if (deMemCmp(&expectedBytes.front(), outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size()))
859 			{
860 				const size_t	errorsMax	= 16u;
861 				const deUint8*	ptrHost		= static_cast<deUint8*>(outputAllocs[outputNdx]->getHostPtr());
862 				const deUint8*	ptrExpected	= static_cast<deUint8*>(&expectedBytes.front());
863 				size_t			errors		= 0u;
864 				size_t			ndx			= 0u;
865 
866 				for (; ndx < expectedBytes.size(); ++ndx)
867 				{
868 					if (ptrHost[ndx] != ptrExpected[ndx])
869 						break;
870 				}
871 
872 				for (; ndx < expectedBytes.size(); ++ndx)
873 				{
874 					if (ptrHost[ndx] != ptrExpected[ndx])
875 					{
876 						m_context.getTestContext().getLog() << tcu::TestLog::Message
877 															<< "OutputBuffer:" << outputNdx
878 															<< " got:" << ((deUint32)ptrHost[ndx])
879 															<< " expected:" << ((deUint32)ptrExpected[ndx])
880 															<< " at byte " << ndx << tcu::TestLog::EndMessage;
881 						errors++;
882 
883 						if (errors >= errorsMax)
884 						{
885 							m_context.getTestContext().getLog() << tcu::TestLog::Message << "Maximum error count reached (" << errors << "). Stop output."
886 																<< tcu::TestLog::EndMessage;
887 							break;
888 						}
889 					}
890 				}
891 
892 				return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage);
893 			}
894 		}
895 	}
896 
897 	return tcu::TestStatus::pass("Output match with expected");
898 }
899 
900 } // SpirVAssembly
901 } // vkt
902