• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017-2019 The Khronos Group Inc.
6  * Copyright (c) 2018-2019 NVIDIA Corporation
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *	  http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Tests for VK_EXT_fragment_shader_interlock.
23  * These tests render a set of overlapping full-screen quads that use image
24  * or buffer reads and writes to accumulate values into a result image/buffer.
25  * They use fragment shader interlock to avoid race conditions on the read/write
26  * and validate that the final result includes all the writes.
27  * Each fragment shader invocation computes a coordinate, and does a read/modify/write
28  * into the image or buffer, inside the interlock. The value in memory accumulates a bitmask
29  * indicating which primitives or samples have already run through the interlock. e.g.
30  * for single sample, PIXEL_UNORDERED mode, there is one bit in the bitmask for each primitive
31  * and each primitive ORs in its own bit. For PIXEL_ORDERED mode, each invocation also tests
32  * that all the previous primitives (less significant bits) are also set, else it clobbers the
33  * value. Sample and shading_rate interlock are variants of this where there is one value per
34  * sample or per coarse fragment location, respectively. When there are multiple samples per
35  * fragment, we merge in the whole sample mask. But within a pixel, we don't try to distinguish
36  * primitive order between samples on the internal diagonal of the quad (triangle strip).
37  *//*--------------------------------------------------------------------*/
38 
39 #include "vktFragmentShaderInterlockBasic.hpp"
40 
41 #include "vkBufferWithMemory.hpp"
42 #include "vkImageWithMemory.hpp"
43 #include "vkQueryUtil.hpp"
44 #include "vkDeviceUtil.hpp"
45 #include "vkBuilderUtil.hpp"
46 #include "vkCmdUtil.hpp"
47 #include "vkTypeUtil.hpp"
48 #include "vkObjUtil.hpp"
49 
50 #include "vktTestGroupUtil.hpp"
51 #include "vktTestCase.hpp"
52 #include "vktCustomInstancesDevices.hpp"
53 
54 #include "deDefs.h"
55 #include "deMath.h"
56 #include "deRandom.h"
57 #include "deSharedPtr.hpp"
58 #include "deString.h"
59 
60 #include "tcuTestCase.hpp"
61 #include "tcuTestLog.hpp"
62 #include "tcuCommandLine.hpp"
63 
64 #include <string>
65 #include <sstream>
66 
67 namespace vkt
68 {
69 namespace FragmentShaderInterlock
70 {
71 namespace
72 {
73 using namespace vk;
74 using namespace std;
75 
76 typedef enum
77 {
78 	RES_SSBO = 0,
79 	RES_IMAGE,
80 } Resource;
81 
82 typedef enum
83 {
84 	INT_PIXEL_ORDERED = 0,
85 	INT_PIXEL_UNORDERED,
86 	INT_SAMPLE_ORDERED,
87 	INT_SAMPLE_UNORDERED,
88 	INT_SHADING_RATE_ORDERED,
89 	INT_SHADING_RATE_UNORDERED,
90 } Interlock;
91 
92 struct CaseDef
93 {
94 	deUint32 dim;
95 	Resource resType;
96 	Interlock interlock;
97 	VkSampleCountFlagBits samples;
98 	bool killOdd;
99 	bool sampleShading;
100 
isSampleInterlockvkt::FragmentShaderInterlock::__anon69f9caaf0111::CaseDef101 	bool isSampleInterlock() const
102 	{
103 		return sampleShading || interlock == INT_SAMPLE_ORDERED || interlock == INT_SAMPLE_UNORDERED;
104 	}
isOrderedvkt::FragmentShaderInterlock::__anon69f9caaf0111::CaseDef105 	bool isOrdered() const
106 	{
107 		return interlock == INT_PIXEL_ORDERED || interlock == INT_SAMPLE_ORDERED || interlock == INT_SHADING_RATE_ORDERED;
108 	}
109 };
110 
111 class FSITestInstance : public TestInstance
112 {
113 public:
114 						FSITestInstance		(Context& context, const CaseDef& data);
115 						~FSITestInstance	(void);
116 	tcu::TestStatus		iterate				(void);
117 
118 private:
119 	CaseDef				m_data;
120 };
121 
FSITestInstance(Context & context,const CaseDef & data)122 FSITestInstance::FSITestInstance (Context& context, const CaseDef& data)
123 	: vkt::TestInstance		(context)
124 	, m_data				(data)
125 {
126 }
127 
~FSITestInstance(void)128 FSITestInstance::~FSITestInstance (void)
129 {
130 }
131 
132 class FSITestCase : public TestCase
133 {
134 	public:
135 								FSITestCase		(tcu::TestContext& context, const char* name, const CaseDef data);
136 								~FSITestCase	(void);
137 	virtual	void				initPrograms	(SourceCollections& programCollection) const;
138 	virtual TestInstance*		createInstance	(Context& context) const;
139 	virtual void				checkSupport	(Context& context) const;
140 
141 private:
142 	CaseDef						m_data;
143 };
144 
FSITestCase(tcu::TestContext & context,const char * name,const CaseDef data)145 FSITestCase::FSITestCase (tcu::TestContext& context, const char* name, const CaseDef data)
146 	: vkt::TestCase	(context, name)
147 	, m_data		(data)
148 {
149 }
150 
~FSITestCase(void)151 FSITestCase::~FSITestCase	(void)
152 {
153 }
154 
checkSupport(Context & context) const155 void FSITestCase::checkSupport(Context& context) const
156 {
157 	context.requireDeviceFunctionality("VK_EXT_fragment_shader_interlock");
158 
159 	if ((m_data.interlock == INT_SAMPLE_ORDERED || m_data.interlock == INT_SAMPLE_UNORDERED) &&
160 		!context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderSampleInterlock)
161 	{
162 		TCU_THROW(NotSupportedError, "Fragment shader sample interlock not supported");
163 	}
164 
165 	if ((m_data.interlock == INT_PIXEL_ORDERED || m_data.interlock == INT_PIXEL_UNORDERED) &&
166 		!context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderPixelInterlock)
167 	{
168 		TCU_THROW(NotSupportedError, "Fragment shader pixel interlock not supported");
169 	}
170 
171 #ifndef CTS_USES_VULKANSC
172 	if ((m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED) &&
173 		!context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderShadingRateInterlock)
174 	{
175 		TCU_THROW(NotSupportedError, "Fragment shader shading rate interlock not supported");
176 	}
177 	if ((m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED) &&
178 		(!context.getFragmentShadingRateFeatures().pipelineFragmentShadingRate ||
179 		 !context.getFragmentShadingRateProperties().fragmentShadingRateWithFragmentShaderInterlock))
180 	{
181 		TCU_THROW(NotSupportedError, "fragment shading rate not supported");
182 	}
183 #endif // CTS_USES_VULKANSC
184 }
185 
bitsPerQuad(const CaseDef & c)186 static int bitsPerQuad(const CaseDef &c)
187 {
188 	deUint32 bpq = c.samples;
189 
190 	if (c.isSampleInterlock())
191 		bpq = 1;
192 	else if (c.interlock == INT_SHADING_RATE_ORDERED || c.interlock == INT_SHADING_RATE_UNORDERED)
193 		bpq *= 4;
194 
195 	return bpq;
196 }
197 
initPrograms(SourceCollections & programCollection) const198 void FSITestCase::initPrograms (SourceCollections& programCollection) const
199 {
200 	std::stringstream vss;
201 
202 	vss <<
203 		"#version 450 core\n"
204 		"layout(location = 0) out int primID;\n"
205 		"void main()\n"
206 		"{\n"
207 		"  primID = gl_InstanceIndex;\n"
208 		// full-viewport quad
209 		"  gl_Position = vec4( 2.0*float(gl_VertexIndex&2) - 1.0, 4.0*(gl_VertexIndex&1)-1.0, 1.0 - 2.0 * float(gl_VertexIndex&1), 1);\n"
210 		"}\n";
211 
212 	programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
213 
214 	std::stringstream fss;
215 
216 	fss <<
217 		"#version 450 core\n"
218 		"#extension GL_ARB_fragment_shader_interlock : enable\n"
219 		"#extension GL_NV_shading_rate_image : enable\n"
220 		"layout(r32ui, set = 0, binding = 0) coherent uniform uimage2D image0;\n"
221 		"layout(std430, set = 0, binding = 1) coherent buffer B1 { uint x[]; } buf1;\n"
222 		"layout(location = 0) flat in int primID;\n";
223 
224 	switch (m_data.interlock)
225 	{
226 		default:							DE_ASSERT(0);												// fallthrough
227 		case INT_PIXEL_ORDERED:				fss << "layout(pixel_interlock_ordered) in;\n";				break;
228 		case INT_PIXEL_UNORDERED:			fss << "layout(pixel_interlock_unordered) in;\n";			break;
229 		case INT_SAMPLE_ORDERED:			fss << "layout(sample_interlock_ordered) in;\n";			break;
230 		case INT_SAMPLE_UNORDERED:			fss << "layout(sample_interlock_unordered) in;\n";			break;
231 		case INT_SHADING_RATE_ORDERED:		fss << "layout(shading_rate_interlock_ordered) in;\n";		break;
232 		case INT_SHADING_RATE_UNORDERED:	fss << "layout(shading_rate_interlock_unordered) in;\n";	break;
233 	}
234 
235 	// Each fragment shader invocation computes a coordinate, and does a read/modify/write
236 	// into the image or buffer, inside the interlock. The value in memory accumulates a bitmask
237 	// indicating which primitives or samples have already run through the interlock. e.g.
238 	// for single sample, PIXEL_UNORDERED mode, there is one bit in the bitmask for each primitive
239 	// and each primitive ORs in its own bit. For PIXEL_ORDERED mode, each invocation also tests
240 	// that all the previous primitives (less significant bits) are also set, else it clobbers the
241 	// value. Sample and shading_rate interlock are variants of this where there is one value per
242 	// sample or per coarse fragment location, respectively. When there are multiple samples per
243 	// fragment, we merge in the whole sample mask. But within a pixel, we don't try to distinguish
244 	// primitive order between samples on the internal diagonal of the quad (triangle strip).
245 
246 	fss <<
247 		"void main()\n"
248 		"{\n"
249 		"  ivec2 coordxy = ivec2(gl_FragCoord.xy);\n"
250 		"  uint stride = " << m_data.dim << ";\n"
251 		"  uint bitsPerQuad = " << bitsPerQuad(m_data) << ";\n";
252 
253 	// Compute the coordinate
254 	if (m_data.isSampleInterlock())
255 	{
256 		// Spread samples out in the x dimension
257 		fss << "  coordxy.x = coordxy.x * " << m_data.samples << " + gl_SampleID;\n";
258 		fss << "  stride *= " << m_data.samples << ";\n";
259 	}
260 	else if (m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED)
261 	{
262 		// shading rate is 2x2. Divide xy by 2
263 		fss << "  coordxy /= 2;\n";
264 		fss << "  stride /= 2;\n";
265 	}
266 
267 	if (m_data.isSampleInterlock())
268 	{
269 		// sample interlock runs per-sample, and stores one bit per sample
270 		fss << "  uint mask = 1 << primID;\n";
271 		fss << "  uint previousMask = (1 << primID)-1;\n";
272 	}
273 	else
274 	{
275 		// pixel and shading_rate interlock run per-fragment, and store the sample mask
276 		fss << "  uint mask = gl_SampleMaskIn[0] << (primID * bitsPerQuad);\n";
277 		fss << "  uint previousMask = (1 << (primID * bitsPerQuad))-1;\n";
278 	}
279 
280 	// Exercise discard before and during the interlock
281 	if (m_data.killOdd)
282 		fss << "  if (coordxy.y < " << m_data.dim / 4 << " && (coordxy.x & 1) != 0) discard;\n";
283 
284 	fss << "  beginInvocationInterlockARB();\n";
285 
286 	if (m_data.killOdd)
287 		fss << "  if ((coordxy.x & 1) != 0) discard;\n";
288 
289 	// Read the current value from the image or buffer
290 	if (m_data.resType == RES_IMAGE)
291 		fss << "  uint temp = imageLoad(image0, coordxy).x;\n";
292 	else
293 	{
294 		fss << "  uint coord = coordxy.y * stride + coordxy.x;\n";
295 		fss << "  uint temp = buf1.x[coord];\n";
296 	}
297 
298 	// Update the value. For "ordered" modes, check that all the previous primitives'
299 	// bits are already set
300 	if (m_data.isOrdered())
301 		fss << "  if ((temp & previousMask) == previousMask) temp |= mask; else temp = 0;\n";
302 	else
303 		fss << "  temp |= mask;\n";
304 
305 	// Store out the new value
306 	if (m_data.resType == RES_IMAGE)
307 		fss << "  imageStore(image0, coordxy, uvec4(temp, 0, 0, 0));\n";
308 	else
309 		fss << "  buf1.x[coord] = temp;\n";
310 
311 	fss << "  endInvocationInterlockARB();\n";
312 
313 	if (m_data.killOdd)
314 		fss << "  discard;\n";
315 
316 	fss << "}\n";
317 
318 	programCollection.glslSources.add("frag") << glu::FragmentSource(fss.str());
319 }
320 
createInstance(Context & context) const321 TestInstance* FSITestCase::createInstance (Context& context) const
322 {
323 	return new FSITestInstance(context, m_data);
324 }
325 
iterate(void)326 tcu::TestStatus FSITestInstance::iterate (void)
327 {
328 	const DeviceInterface&	vk						= m_context.getDeviceInterface();
329 	const VkDevice			device					= m_context.getDevice();
330 	Allocator&				allocator				= m_context.getDefaultAllocator();
331 	VkFlags					allShaderStages			= VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
332 	VkFlags					allPipelineStages		= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
333 
334 	VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
335 
336 	Move<vk::VkDescriptorSetLayout>	descriptorSetLayout;
337 	Move<vk::VkDescriptorPool>		descriptorPool;
338 	Move<vk::VkDescriptorSet>		descriptorSet;
339 
340 	VkDescriptorPoolCreateFlags poolCreateFlags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
341 	VkDescriptorSetLayoutCreateFlags layoutCreateFlags = 0;
342 
343 	const VkDescriptorSetLayoutBinding bindings[2] =
344 	{
345 		{
346 			0u,										// binding
347 			VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,		// descriptorType
348 			1u,										// descriptorCount
349 			allShaderStages,						// stageFlags
350 			DE_NULL,								// pImmutableSamplers
351 		},
352 		{
353 			1u,										// binding
354 			VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,		// descriptorType
355 			1u,										// descriptorCount
356 			allShaderStages,						// stageFlags
357 			DE_NULL,								// pImmutableSamplers
358 		},
359 	};
360 
361 	// Create a layout and allocate a descriptor set for it.
362 	const VkDescriptorSetLayoutCreateInfo setLayoutCreateInfo =
363 	{
364 		vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,	// sType
365 		DE_NULL,													// pNext
366 		layoutCreateFlags,											// flags
367 		2u,															// bindingCount
368 		&bindings[0]												// pBindings
369 	};
370 
371 	descriptorSetLayout = vk::createDescriptorSetLayout(vk, device, &setLayoutCreateInfo);
372 
373 	vk::DescriptorPoolBuilder poolBuilder;
374 	poolBuilder.addType(bindings[0].descriptorType, 1);
375 	poolBuilder.addType(bindings[1].descriptorType, 1);
376 
377 	descriptorPool = poolBuilder.build(vk, device, poolCreateFlags, 1u);
378 	descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
379 
380 	// one uint per sample (max of 4 samples)
381 	VkDeviceSize bufferSize = m_data.dim*m_data.dim*sizeof(deUint32)*4;
382 
383 	de::MovePtr<BufferWithMemory> buffer;
384 	buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
385 		vk, device, allocator, makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible));
386 
387 	flushAlloc(vk, device, buffer->getAllocation());
388 
389 	const VkQueue					queue					= getDeviceQueue(vk, device, m_context.getUniversalQueueFamilyIndex(), 0);
390 	Move<VkCommandPool>				cmdPool					= createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex());
391 	Move<VkCommandBuffer>			cmdBuffer				= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
392 
393 	beginCommandBuffer(vk, *cmdBuffer, 0u);
394 
395 	const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
396 	{
397 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,				// sType
398 		DE_NULL,													// pNext
399 		(VkPipelineLayoutCreateFlags)0,
400 		1,															// setLayoutCount
401 		&descriptorSetLayout.get(),									// pSetLayouts
402 		0u,															// pushConstantRangeCount
403 		DE_NULL,													// pPushConstantRanges
404 	};
405 
406 	Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
407 
408 	de::MovePtr<BufferWithMemory> copyBuffer;
409 	copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
410 		vk, device, allocator, makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached));
411 
412 	const VkImageCreateInfo			imageCreateInfo			=
413 	{
414 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	// VkStructureType			sType;
415 		DE_NULL,								// const void*				pNext;
416 		(VkImageCreateFlags)0u,					// VkImageCreateFlags		flags;
417 		VK_IMAGE_TYPE_2D,						// VkImageType				imageType;
418 		VK_FORMAT_R32_UINT,						// VkFormat					format;
419 		{
420 			m_data.dim * m_data.samples,		// deUint32	width;
421 			m_data.dim,							// deUint32	height;
422 			1u									// deUint32	depth;
423 		},										// VkExtent3D				extent;
424 		1u,										// deUint32					mipLevels;
425 		1u,										// deUint32					arrayLayers;
426 		VK_SAMPLE_COUNT_1_BIT,					// VkSampleCountFlagBits	samples;
427 		VK_IMAGE_TILING_OPTIMAL,				// VkImageTiling			tiling;
428 		VK_IMAGE_USAGE_STORAGE_BIT
429 		| VK_IMAGE_USAGE_TRANSFER_SRC_BIT
430 		| VK_IMAGE_USAGE_TRANSFER_DST_BIT,		// VkImageUsageFlags		usage;
431 		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode			sharingMode;
432 		0u,										// deUint32					queueFamilyIndexCount;
433 		DE_NULL,								// const deUint32*			pQueueFamilyIndices;
434 		VK_IMAGE_LAYOUT_UNDEFINED				// VkImageLayout			initialLayout;
435 	};
436 
437 	VkImageViewCreateInfo		imageViewCreateInfo		=
438 	{
439 		VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,	// VkStructureType			sType;
440 		DE_NULL,									// const void*				pNext;
441 		(VkImageViewCreateFlags)0u,					// VkImageViewCreateFlags	flags;
442 		DE_NULL,									// VkImage					image;
443 		VK_IMAGE_VIEW_TYPE_2D,						// VkImageViewType			viewType;
444 		VK_FORMAT_R32_UINT,							// VkFormat					format;
445 		{
446 			VK_COMPONENT_SWIZZLE_R,					// VkComponentSwizzle	r;
447 			VK_COMPONENT_SWIZZLE_G,					// VkComponentSwizzle	g;
448 			VK_COMPONENT_SWIZZLE_B,					// VkComponentSwizzle	b;
449 			VK_COMPONENT_SWIZZLE_A					// VkComponentSwizzle	a;
450 		},											// VkComponentMapping		 components;
451 		{
452 			VK_IMAGE_ASPECT_COLOR_BIT,				// VkImageAspectFlags	aspectMask;
453 			0u,										// deUint32				baseMipLevel;
454 			1u,										// deUint32				levelCount;
455 			0u,										// deUint32				baseArrayLayer;
456 			1u										// deUint32				layerCount;
457 		}											// VkImageSubresourceRange	subresourceRange;
458 	};
459 
460 	de::MovePtr<ImageWithMemory> image;
461 	Move<VkImageView> imageView;
462 
463 	image = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
464 		vk, device, allocator, imageCreateInfo, MemoryRequirement::Any));
465 	imageViewCreateInfo.image = **image;
466 	imageView = createImageView(vk, device, &imageViewCreateInfo, NULL);
467 
468 	VkDescriptorImageInfo imageInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
469 	VkDescriptorBufferInfo bufferInfo = makeDescriptorBufferInfo(**buffer, 0, bufferSize);
470 
471 	VkWriteDescriptorSet w =
472 	{
473 		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,							// sType
474 		DE_NULL,														// pNext
475 		*descriptorSet,													// dstSet
476 		(deUint32)0,													// dstBinding
477 		0,																// dstArrayElement
478 		1u,																// descriptorCount
479 		bindings[0].descriptorType,										// descriptorType
480 		&imageInfo,														// pImageInfo
481 		&bufferInfo,													// pBufferInfo
482 		DE_NULL,														// pTexelBufferView
483 	};
484 	vk.updateDescriptorSets(device, 1, &w, 0, NULL);
485 
486 	w.dstBinding = 1;
487 	w.descriptorType = bindings[1].descriptorType;
488 	vk.updateDescriptorSets(device, 1, &w, 0, NULL);
489 
490 	vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
491 
492 	VkBool32 shadingRateEnable = m_data.interlock == INT_SHADING_RATE_ORDERED ||
493 								 m_data.interlock == INT_SHADING_RATE_UNORDERED ? VK_TRUE : VK_FALSE;
494 
495 	Move<VkPipeline> pipeline;
496 	Move<VkRenderPass> renderPass;
497 	Move<VkFramebuffer> framebuffer;
498 
499 	{
500 		const vk::VkSubpassDescription		subpassDesc			=
501 		{
502 			(vk::VkSubpassDescriptionFlags)0,
503 			vk::VK_PIPELINE_BIND_POINT_GRAPHICS,					// pipelineBindPoint
504 			0u,														// inputCount
505 			DE_NULL,												// pInputAttachments
506 			0u,														// colorCount
507 			DE_NULL,												// pColorAttachments
508 			DE_NULL,												// pResolveAttachments
509 			DE_NULL,												// depthStencilAttachment
510 			0u,														// preserveCount
511 			DE_NULL,												// pPreserveAttachments
512 		};
513 		const vk::VkRenderPassCreateInfo	renderPassParams	=
514 		{
515 			vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// sType
516 			DE_NULL,												// pNext
517 			(vk::VkRenderPassCreateFlags)0,
518 			0u,														// attachmentCount
519 			DE_NULL,												// pAttachments
520 			1u,														// subpassCount
521 			&subpassDesc,											// pSubpasses
522 			0u,														// dependencyCount
523 			DE_NULL,												// pDependencies
524 		};
525 
526 		renderPass = createRenderPass(vk, device, &renderPassParams);
527 
528 		const vk::VkFramebufferCreateInfo	framebufferParams	=
529 		{
530 			vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,	// sType
531 			DE_NULL,										// pNext
532 			(vk::VkFramebufferCreateFlags)0,
533 			*renderPass,									// renderPass
534 			0u,												// attachmentCount
535 			DE_NULL,										// pAttachments
536 			m_data.dim,										// width
537 			m_data.dim,										// height
538 			1u,												// layers
539 		};
540 
541 		framebuffer = createFramebuffer(vk, device, &framebufferParams);
542 
543 		const VkPipelineVertexInputStateCreateInfo		vertexInputStateCreateInfo		=
544 		{
545 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType							sType;
546 			DE_NULL,													// const void*								pNext;
547 			(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags	flags;
548 			0u,															// deUint32									vertexBindingDescriptionCount;
549 			DE_NULL,													// const VkVertexInputBindingDescription*	pVertexBindingDescriptions;
550 			0u,															// deUint32									vertexAttributeDescriptionCount;
551 			DE_NULL														// const VkVertexInputAttributeDescription*	pVertexAttributeDescriptions;
552 		};
553 
554 		const VkPipelineInputAssemblyStateCreateInfo	inputAssemblyStateCreateInfo	=
555 		{
556 			VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType							sType;
557 			DE_NULL,														// const void*								pNext;
558 			(VkPipelineInputAssemblyStateCreateFlags)0,						// VkPipelineInputAssemblyStateCreateFlags	flags;
559 			VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,							// VkPrimitiveTopology						topology;
560 			VK_FALSE														// VkBool32									primitiveRestartEnable;
561 		};
562 
563 		const VkPipelineRasterizationStateCreateInfo	rasterizationStateCreateInfo	=
564 		{
565 			VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,		// VkStructureType							sType;
566 			DE_NULL,														// const void*								pNext;
567 			(VkPipelineRasterizationStateCreateFlags)0,						// VkPipelineRasterizationStateCreateFlags	flags;
568 			VK_FALSE,														// VkBool32									depthClampEnable;
569 			VK_FALSE,														// VkBool32									rasterizerDiscardEnable;
570 			VK_POLYGON_MODE_FILL,											// VkPolygonMode							polygonMode;
571 			VK_CULL_MODE_NONE,												// VkCullModeFlags							cullMode;
572 			VK_FRONT_FACE_CLOCKWISE,										// VkFrontFace								frontFace;
573 			VK_FALSE,														// VkBool32									depthBiasEnable;
574 			0.0f,															// float									depthBiasConstantFactor;
575 			0.0f,															// float									depthBiasClamp;
576 			0.0f,															// float									depthBiasSlopeFactor;
577 			1.0f															// float									lineWidth;
578 		};
579 
580 		const VkPipelineMultisampleStateCreateInfo		multisampleStateCreateInfo =
581 		{
582 			VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType							sType
583 			DE_NULL,													// const void*								pNext
584 			0u,															// VkPipelineMultisampleStateCreateFlags	flags
585 			(VkSampleCountFlagBits)m_data.samples,						// VkSampleCountFlagBits					rasterizationSamples
586 			m_data.sampleShading ? VK_TRUE : VK_FALSE,					// VkBool32									sampleShadingEnable
587 			1.0f,														// float									minSampleShading
588 			DE_NULL,													// const VkSampleMask*						pSampleMask
589 			VK_FALSE,													// VkBool32									alphaToCoverageEnable
590 			VK_FALSE													// VkBool32									alphaToOneEnable
591 		};
592 
593 		VkViewport viewport = makeViewport(m_data.dim, m_data.dim);
594 		VkRect2D scissor = makeRect2D(m_data.dim, m_data.dim);
595 
596 		VkPipelineFragmentShadingRateStateCreateInfoKHR shadingRateStateCreateInfo =
597 		{
598 			VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR,								// VkStructureType						sType;
599 			DE_NULL,																							// const void*							pNext;
600 			{ 2, 2 },																							// VkExtent2D							fragmentSize;
601 			{ VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR, VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR },	// VkFragmentShadingRateCombinerOpKHR	combinerOps[2];
602 		};
603 
604 		const VkPipelineViewportStateCreateInfo			viewportStateCreateInfo				=
605 		{
606 			VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,		// VkStructureType							sType
607 			DE_NULL,													// const void*								pNext
608 			(VkPipelineViewportStateCreateFlags)0,						// VkPipelineViewportStateCreateFlags		flags
609 			1u,															// deUint32									viewportCount
610 			&viewport,													// const VkViewport*						pViewports
611 			1u,															// deUint32									scissorCount
612 			&scissor													// const VkRect2D*							pScissors
613 		};
614 
615 		Move<VkShaderModule> fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("frag"), 0);
616 		Move<VkShaderModule> vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
617 		deUint32 numStages = 2u;
618 
619 		const VkPipelineShaderStageCreateInfo	shaderCreateInfo[2] =
620 		{
621 			{
622 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
623 				DE_NULL,
624 				(VkPipelineShaderStageCreateFlags)0,
625 				VK_SHADER_STAGE_VERTEX_BIT,									// stage
626 				*vs,														// shader
627 				"main",
628 				DE_NULL,													// pSpecializationInfo
629 			},
630 			{
631 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
632 				DE_NULL,
633 				(VkPipelineShaderStageCreateFlags)0,
634 				VK_SHADER_STAGE_FRAGMENT_BIT,								// stage
635 				*fs,														// shader
636 				"main",
637 				DE_NULL,													// pSpecializationInfo
638 			}
639 		};
640 
641 		const VkGraphicsPipelineCreateInfo				graphicsPipelineCreateInfo		=
642 		{
643 			VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,	// VkStructureType									sType;
644 			shadingRateEnable ? &shadingRateStateCreateInfo : DE_NULL,		// const void*										pNext;
645 			(VkPipelineCreateFlags)0,							// VkPipelineCreateFlags							flags;
646 			numStages,											// deUint32											stageCount;
647 			&shaderCreateInfo[0],								// const VkPipelineShaderStageCreateInfo*			pStages;
648 			&vertexInputStateCreateInfo,						// const VkPipelineVertexInputStateCreateInfo*		pVertexInputState;
649 			&inputAssemblyStateCreateInfo,						// const VkPipelineInputAssemblyStateCreateInfo*	pInputAssemblyState;
650 			DE_NULL,											// const VkPipelineTessellationStateCreateInfo*		pTessellationState;
651 			&viewportStateCreateInfo,							// const VkPipelineViewportStateCreateInfo*			pViewportState;
652 			&rasterizationStateCreateInfo,						// const VkPipelineRasterizationStateCreateInfo*	pRasterizationState;
653 			&multisampleStateCreateInfo,						// const VkPipelineMultisampleStateCreateInfo*		pMultisampleState;
654 			DE_NULL,											// const VkPipelineDepthStencilStateCreateInfo*		pDepthStencilState;
655 			DE_NULL,											// const VkPipelineColorBlendStateCreateInfo*		pColorBlendState;
656 			DE_NULL,											// const VkPipelineDynamicStateCreateInfo*			pDynamicState;
657 			pipelineLayout.get(),								// VkPipelineLayout									layout;
658 			renderPass.get(),									// VkRenderPass										renderPass;
659 			0u,													// deUint32											subpass;
660 			DE_NULL,											// VkPipeline										basePipelineHandle;
661 			0													// int												basePipelineIndex;
662 		};
663 
664 		pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
665 	}
666 
667 	const VkImageMemoryBarrier imageBarrier =
668 	{
669 		VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,				// VkStructureType		sType
670 		DE_NULL,											// const void*			pNext
671 		0u,													// VkAccessFlags		srcAccessMask
672 		VK_ACCESS_TRANSFER_WRITE_BIT,						// VkAccessFlags		dstAccessMask
673 		VK_IMAGE_LAYOUT_UNDEFINED,							// VkImageLayout		oldLayout
674 		VK_IMAGE_LAYOUT_GENERAL,							// VkImageLayout		newLayout
675 		VK_QUEUE_FAMILY_IGNORED,							// uint32_t				srcQueueFamilyIndex
676 		VK_QUEUE_FAMILY_IGNORED,							// uint32_t				dstQueueFamilyIndex
677 		**image,											// VkImage				image
678 		{
679 			VK_IMAGE_ASPECT_COLOR_BIT,				// VkImageAspectFlags	aspectMask
680 			0u,										// uint32_t				baseMipLevel
681 			1u,										// uint32_t				mipLevels,
682 			0u,										// uint32_t				baseArray
683 			1u,										// uint32_t				arraySize
684 		}
685 	};
686 
687 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
688 							(VkDependencyFlags)0,
689 							0, (const VkMemoryBarrier*)DE_NULL,
690 							0, (const VkBufferMemoryBarrier*)DE_NULL,
691 							1, &imageBarrier);
692 
693 	vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
694 
695 	VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
696 	VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
697 
698 	VkMemoryBarrier					memBarrier =
699 	{
700 		VK_STRUCTURE_TYPE_MEMORY_BARRIER,	// sType
701 		DE_NULL,							// pNext
702 		0u,									// srcAccessMask
703 		0u,									// dstAccessMask
704 	};
705 
706 	vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
707 
708 	vk.cmdFillBuffer(*cmdBuffer, **buffer, 0, bufferSize, 0);
709 
710 	memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
711 	memBarrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
712 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
713 		0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
714 
715 	beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
716 					makeRect2D(m_data.dim, m_data.dim),
717 					0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
718 
719 	// Draw N fullscreen "quads", one per instance.
720 	deUint32 N = 32 / bitsPerQuad(m_data);
721 	deUint32 expectedValue = 0xFFFFFFFF;
722 	vk.cmdDraw(*cmdBuffer, 4u, N, 0u, 0u);
723 
724 	endRenderPass(vk, *cmdBuffer);
725 
726 	memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
727 	memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
728 	vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
729 		0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
730 
731 	deUint32 copyDimX = m_data.dim;
732 	deUint32 copyDimY = m_data.dim;
733 
734 	if (m_data.isSampleInterlock())
735 		copyDimX *= m_data.samples;
736 
737 	if (shadingRateEnable)
738 	{
739 		copyDimX /= 2;
740 		copyDimY /= 2;
741 	}
742 
743 	if (m_data.resType == RES_IMAGE)
744 	{
745 		const VkBufferImageCopy copyRegion = makeBufferImageCopy(makeExtent3D(copyDimX, copyDimY, 1u),
746 																 makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u));
747 		vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **copyBuffer, 1u, &copyRegion);
748 	}
749 	else
750 	{
751 		const VkBufferCopy		copyRegion	= makeBufferCopy(0u, 0u, copyDimX*copyDimY*sizeof(deUint32));
752 		vk.cmdCopyBuffer(*cmdBuffer, **buffer, **copyBuffer, 1, &copyRegion);
753 	}
754 
755 	memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
756 	memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
757 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
758 		0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
759 
760 	endCommandBuffer(vk, *cmdBuffer);
761 
762 	submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
763 
764 	deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
765 	invalidateAlloc(vk, device, copyBuffer->getAllocation());
766 
767 	qpTestResult res = QP_TEST_RESULT_PASS;
768 
769 	for (deUint32 i = 0; i < copyDimX*copyDimY; ++i)
770 	{
771 		if (m_data.killOdd && (i & 1))
772 		{
773 			if (ptr[i] != 0)
774 				res = QP_TEST_RESULT_FAIL;
775 		}
776 		else if (ptr[i] != expectedValue)
777 			res = QP_TEST_RESULT_FAIL;
778 	}
779 
780 	return tcu::TestStatus(res, qpGetTestResultName(res));
781 }
782 
783 }	// anonymous
784 
createBasicTests(tcu::TestContext & testCtx)785 tcu::TestCaseGroup*	createBasicTests (tcu::TestContext& testCtx)
786 {
787 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "basic"));
788 
789 	typedef struct
790 	{
791 		deUint32				count;
792 		const char*				name;
793 	} TestGroupCase;
794 
795 	TestGroupCase dimCases[] =
796 	{
797 		{ 8,	"8x8"},
798 		{ 16,	"16x16"},
799 		{ 32,	"32x32"},
800 		{ 64,	"64x64"},
801 		{ 128,	"128x128"},
802 		{ 256,	"256x256"},
803 		{ 512,	"512x512"},
804 		{ 1024,	"1024x1024"},
805 	};
806 
807 	TestGroupCase resCases[] =
808 	{
809 		{ RES_IMAGE,	"image"},
810 		{ RES_SSBO,		"ssbo"},
811 	};
812 
813 	TestGroupCase killCases[] =
814 	{
815 		{ 0,	"nodiscard"},
816 		{ 1,	"discard"},
817 	};
818 
819 	TestGroupCase sampCases[] =
820 	{
821 		{ 1,	"1xaa"},
822 		{ 4,	"4xaa"},
823 	};
824 
825 	TestGroupCase ssCases[] =
826 	{
827 		{ 0,	"no_sample_shading"},
828 		{ 1,	"sample_shading"},
829 	};
830 
831 	TestGroupCase intCases[] =
832 	{
833 		{ INT_PIXEL_ORDERED,	"pixel_ordered"},
834 		{ INT_PIXEL_UNORDERED,	"pixel_unordered"},
835 		{ INT_SAMPLE_ORDERED,	"sample_ordered"},
836 		{ INT_SAMPLE_UNORDERED,	"sample_unordered"},
837 #ifndef CTS_USES_VULKANSC
838 		{ INT_SHADING_RATE_ORDERED,		"shading_rate_ordered"},
839 		{ INT_SHADING_RATE_UNORDERED,	"shading_rate_unordered"},
840 #endif // CTS_USES_VULKANSC
841 	};
842 
843 	for (int killNdx = 0; killNdx < DE_LENGTH_OF_ARRAY(killCases); killNdx++)
844 	{
845 		de::MovePtr<tcu::TestCaseGroup> killGroup(new tcu::TestCaseGroup(testCtx, killCases[killNdx].name));
846 		for (int resNdx = 0; resNdx < DE_LENGTH_OF_ARRAY(resCases); resNdx++)
847 		{
848 			de::MovePtr<tcu::TestCaseGroup> resGroup(new tcu::TestCaseGroup(testCtx, resCases[resNdx].name));
849 			for (int intNdx = 0; intNdx < DE_LENGTH_OF_ARRAY(intCases); intNdx++)
850 			{
851 				de::MovePtr<tcu::TestCaseGroup> intGroup(new tcu::TestCaseGroup(testCtx, intCases[intNdx].name));
852 				for (int sampNdx = 0; sampNdx < DE_LENGTH_OF_ARRAY(sampCases); sampNdx++)
853 				{
854 					de::MovePtr<tcu::TestCaseGroup> sampGroup(new tcu::TestCaseGroup(testCtx, sampCases[sampNdx].name));
855 					for (int ssNdx = 0; ssNdx < DE_LENGTH_OF_ARRAY(ssCases); ssNdx++)
856 					{
857 						de::MovePtr<tcu::TestCaseGroup> ssGroup(new tcu::TestCaseGroup(testCtx, ssCases[ssNdx].name));
858 						for (int dimNdx = 0; dimNdx < DE_LENGTH_OF_ARRAY(dimCases); dimNdx++)
859 						{
860 							CaseDef c =
861 							{
862 								dimCases[dimNdx].count,								// deUint32 set;
863 								(Resource)resCases[resNdx].count,					// Resource resType;
864 								(Interlock)intCases[intNdx].count,					// Interlock interlock;
865 								(VkSampleCountFlagBits)sampCases[sampNdx].count,	// VkSampleCountFlagBits samples;
866 								(bool)killCases[killNdx].count,						// bool killOdd;
867 								(bool)ssCases[ssNdx].count,							// bool sampleShading;
868 							};
869 
870 							if (c.sampleShading && c.samples == 1)
871 								continue;
872 
873 							ssGroup->addChild(new FSITestCase(testCtx, dimCases[dimNdx].name, c));
874 						}
875 						sampGroup->addChild(ssGroup.release());
876 					}
877 					intGroup->addChild(sampGroup.release());
878 				}
879 				resGroup->addChild(intGroup.release());
880 			}
881 			killGroup->addChild(resGroup.release());
882 		}
883 		group->addChild(killGroup.release());
884 	}
885 	return group.release();
886 }
887 
888 }	// FragmentShaderInterlock
889 }	// vkt
890