1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017-2019 The Khronos Group Inc.
6 * Copyright (c) 2018-2019 NVIDIA Corporation
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Tests for VK_EXT_fragment_shader_interlock.
23 * These tests render a set of overlapping full-screen quads that use image
24 * or buffer reads and writes to accumulate values into a result image/buffer.
25 * They use fragment shader interlock to avoid race conditions on the read/write
26 * and validate that the final result includes all the writes.
27 * Each fragment shader invocation computes a coordinate, and does a read/modify/write
28 * into the image or buffer, inside the interlock. The value in memory accumulates a bitmask
29 * indicating which primitives or samples have already run through the interlock. e.g.
30 * for single sample, PIXEL_UNORDERED mode, there is one bit in the bitmask for each primitive
31 * and each primitive ORs in its own bit. For PIXEL_ORDERED mode, each invocation also tests
32 * that all the previous primitives (less significant bits) are also set, else it clobbers the
33 * value. Sample and shading_rate interlock are variants of this where there is one value per
34 * sample or per coarse fragment location, respectively. When there are multiple samples per
35 * fragment, we merge in the whole sample mask. But within a pixel, we don't try to distinguish
36 * primitive order between samples on the internal diagonal of the quad (triangle strip).
37 *//*--------------------------------------------------------------------*/
38
39 #include "vktFragmentShaderInterlockBasic.hpp"
40
41 #include "vkBufferWithMemory.hpp"
42 #include "vkImageWithMemory.hpp"
43 #include "vkQueryUtil.hpp"
44 #include "vkBuilderUtil.hpp"
45 #include "vkCmdUtil.hpp"
46 #include "vkTypeUtil.hpp"
47 #include "vkObjUtil.hpp"
48
49 #include "vktTestGroupUtil.hpp"
50 #include "vktTestCase.hpp"
51 #include "vktCustomInstancesDevices.hpp"
52
53 #include "deDefs.h"
54 #include "deMath.h"
55 #include "deRandom.h"
56 #include "deSharedPtr.hpp"
57 #include "deString.h"
58
59 #include "tcuTestCase.hpp"
60 #include "tcuTestLog.hpp"
61 #include "tcuCommandLine.hpp"
62
63 #include <string>
64 #include <sstream>
65
66 namespace vkt
67 {
68 namespace FragmentShaderInterlock
69 {
70 namespace
71 {
72 using namespace vk;
73 using namespace std;
74
75 typedef enum
76 {
77 RES_SSBO = 0,
78 RES_IMAGE,
79 } Resource;
80
81 typedef enum
82 {
83 INT_PIXEL_ORDERED = 0,
84 INT_PIXEL_UNORDERED,
85 INT_SAMPLE_ORDERED,
86 INT_SAMPLE_UNORDERED,
87 INT_SHADING_RATE_ORDERED,
88 INT_SHADING_RATE_UNORDERED,
89 } Interlock;
90
91 de::SharedPtr<Move<vk::VkDevice>> g_singletonDevice;
92
getDevice(Context & context,Interlock interlock)93 VkDevice getDevice(Context& context, Interlock interlock)
94 {
95 if (interlock == INT_SHADING_RATE_ORDERED || interlock == INT_SHADING_RATE_UNORDERED)
96 {
97 if (!g_singletonDevice)
98 {
99 const float queuePriority = 1.0f;
100
101 // Create a universal queue that supports graphics and compute
102 const VkDeviceQueueCreateInfo queueParams =
103 {
104 VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType;
105 DE_NULL, // const void* pNext;
106 0u, // VkDeviceQueueCreateFlags flags;
107 context.getUniversalQueueFamilyIndex(), // deUint32 queueFamilyIndex;
108 1u, // deUint32 queueCount;
109 &queuePriority // const float* pQueuePriorities;
110 };
111
112 const char * extensions[] =
113 {
114 "VK_EXT_fragment_shader_interlock",
115 "VK_NV_shading_rate_image",
116 };
117
118 VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragmentShaderInterlockFeatures = initVulkanStructure();
119 VkPhysicalDeviceShadingRateImageFeaturesNV shadingRateImageFeatures = initVulkanStructure(&fragmentShaderInterlockFeatures);
120 VkPhysicalDeviceFeatures2 features2 = initVulkanStructure(&shadingRateImageFeatures);
121
122 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
123
124 const VkDeviceCreateInfo deviceCreateInfo =
125 {
126 VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, //sType;
127 &features2, //pNext;
128 (VkDeviceCreateFlags)0u, //flags
129 1, //queueRecordCount;
130 &queueParams, //pRequestedQueues;
131 0, //layerCount;
132 DE_NULL, //ppEnabledLayerNames;
133 DE_LENGTH_OF_ARRAY(extensions), // deUint32 enabledExtensionCount;
134 extensions, // const char* const* ppEnabledExtensionNames;
135 DE_NULL, //pEnabledFeatures;
136 };
137
138 Move<VkDevice> device = createCustomDevice(context.getTestContext().getCommandLine().isValidationEnabled(), context.getPlatformInterface(), context.getInstance(), context.getInstanceInterface(), context.getPhysicalDevice(), &deviceCreateInfo);
139 g_singletonDevice = de::SharedPtr<Move<VkDevice>>(new Move<VkDevice>(device));
140 }
141
142 return g_singletonDevice->get();
143 }
144
145 return context.getDevice();
146 }
147
148 struct CaseDef
149 {
150 deUint32 dim;
151 Resource resType;
152 Interlock interlock;
153 VkSampleCountFlagBits samples;
154 bool killOdd;
155 bool sampleShading;
156
isSampleInterlockvkt::FragmentShaderInterlock::__anon7f68a66e0111::CaseDef157 bool isSampleInterlock() const
158 {
159 return sampleShading || interlock == INT_SAMPLE_ORDERED || interlock == INT_SAMPLE_UNORDERED;
160 }
isOrderedvkt::FragmentShaderInterlock::__anon7f68a66e0111::CaseDef161 bool isOrdered() const
162 {
163 return interlock == INT_PIXEL_ORDERED || interlock == INT_SAMPLE_ORDERED || interlock == INT_SHADING_RATE_ORDERED;
164 }
165 };
166
167 class FSITestInstance : public TestInstance
168 {
169 public:
170 FSITestInstance (Context& context, const CaseDef& data);
171 ~FSITestInstance (void);
172 tcu::TestStatus iterate (void);
173
174 private:
175 CaseDef m_data;
176 };
177
FSITestInstance(Context & context,const CaseDef & data)178 FSITestInstance::FSITestInstance (Context& context, const CaseDef& data)
179 : vkt::TestInstance (context)
180 , m_data (data)
181 {
182 }
183
~FSITestInstance(void)184 FSITestInstance::~FSITestInstance (void)
185 {
186 }
187
188 class FSITestCase : public TestCase
189 {
190 public:
191 FSITestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data);
192 ~FSITestCase (void);
193 virtual void initPrograms (SourceCollections& programCollection) const;
194 virtual TestInstance* createInstance (Context& context) const;
195 virtual void checkSupport (Context& context) const;
196
197 private:
198 CaseDef m_data;
199 };
200
FSITestCase(tcu::TestContext & context,const char * name,const char * desc,const CaseDef data)201 FSITestCase::FSITestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data)
202 : vkt::TestCase (context, name, desc)
203 , m_data (data)
204 {
205 }
206
~FSITestCase(void)207 FSITestCase::~FSITestCase (void)
208 {
209 }
210
checkSupport(Context & context) const211 void FSITestCase::checkSupport(Context& context) const
212 {
213 context.requireDeviceFunctionality("VK_EXT_fragment_shader_interlock");
214
215 if ((m_data.interlock == INT_SAMPLE_ORDERED || m_data.interlock == INT_SAMPLE_UNORDERED) &&
216 !context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderSampleInterlock)
217 {
218 TCU_THROW(NotSupportedError, "Fragment shader sample interlock not supported");
219 }
220
221 if ((m_data.interlock == INT_PIXEL_ORDERED || m_data.interlock == INT_PIXEL_UNORDERED) &&
222 !context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderPixelInterlock)
223 {
224 TCU_THROW(NotSupportedError, "Fragment shader pixel interlock not supported");
225 }
226
227 if ((m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED))
228 {
229 if (!context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderShadingRateInterlock)
230 TCU_THROW(NotSupportedError, "Fragment shader shading rate interlock not supported");
231
232 context.requireDeviceFunctionality("VK_NV_shading_rate_image");
233
234 // We need to query the VK_NV_shading_rate_image features because they might be disabled
235 // in the default context due to a conflict with VK_KHR_fragment_shading_rate.
236 VkPhysicalDeviceShadingRateImageFeaturesNV shadingRateImageFeatures = initVulkanStructure();
237 VkPhysicalDeviceFeatures2KHR features2 = initVulkanStructure(&shadingRateImageFeatures);
238
239 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
240
241 if (!shadingRateImageFeatures.shadingRateImage)
242 TCU_THROW(NotSupportedError, "Shading rate image not supported");
243 }
244 }
245
bitsPerQuad(const CaseDef & c)246 static int bitsPerQuad(const CaseDef &c)
247 {
248 deUint32 bpq = c.samples;
249
250 if (c.isSampleInterlock())
251 bpq = 1;
252 else if (c.interlock == INT_SHADING_RATE_ORDERED || c.interlock == INT_SHADING_RATE_UNORDERED)
253 bpq *= 4;
254
255 return bpq;
256 }
257
initPrograms(SourceCollections & programCollection) const258 void FSITestCase::initPrograms (SourceCollections& programCollection) const
259 {
260 std::stringstream vss;
261
262 vss <<
263 "#version 450 core\n"
264 "layout(location = 0) out int primID;\n"
265 "void main()\n"
266 "{\n"
267 " primID = gl_InstanceIndex;\n"
268 // full-viewport quad
269 " gl_Position = vec4( 2.0*float(gl_VertexIndex&2) - 1.0, 4.0*(gl_VertexIndex&1)-1.0, 1.0 - 2.0 * float(gl_VertexIndex&1), 1);\n"
270 "}\n";
271
272 programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
273
274 std::stringstream fss;
275
276 fss <<
277 "#version 450 core\n"
278 "#extension GL_ARB_fragment_shader_interlock : enable\n"
279 "#extension GL_NV_shading_rate_image : enable\n"
280 "layout(r32ui, set = 0, binding = 0) coherent uniform uimage2D image0;\n"
281 "layout(std430, set = 0, binding = 1) coherent buffer B1 { uint x[]; } buf1;\n"
282 "layout(location = 0) flat in int primID;\n";
283
284 switch (m_data.interlock)
285 {
286 default: DE_ASSERT(0); // fallthrough
287 case INT_PIXEL_ORDERED: fss << "layout(pixel_interlock_ordered) in;\n"; break;
288 case INT_PIXEL_UNORDERED: fss << "layout(pixel_interlock_unordered) in;\n"; break;
289 case INT_SAMPLE_ORDERED: fss << "layout(sample_interlock_ordered) in;\n"; break;
290 case INT_SAMPLE_UNORDERED: fss << "layout(sample_interlock_unordered) in;\n"; break;
291 case INT_SHADING_RATE_ORDERED: fss << "layout(shading_rate_interlock_ordered) in;\n"; break;
292 case INT_SHADING_RATE_UNORDERED: fss << "layout(shading_rate_interlock_unordered) in;\n"; break;
293 }
294
295 // Each fragment shader invocation computes a coordinate, and does a read/modify/write
296 // into the image or buffer, inside the interlock. The value in memory accumulates a bitmask
297 // indicating which primitives or samples have already run through the interlock. e.g.
298 // for single sample, PIXEL_UNORDERED mode, there is one bit in the bitmask for each primitive
299 // and each primitive ORs in its own bit. For PIXEL_ORDERED mode, each invocation also tests
300 // that all the previous primitives (less significant bits) are also set, else it clobbers the
301 // value. Sample and shading_rate interlock are variants of this where there is one value per
302 // sample or per coarse fragment location, respectively. When there are multiple samples per
303 // fragment, we merge in the whole sample mask. But within a pixel, we don't try to distinguish
304 // primitive order between samples on the internal diagonal of the quad (triangle strip).
305
306 fss <<
307 "void main()\n"
308 "{\n"
309 " ivec2 coordxy = ivec2(gl_FragCoord.xy);\n"
310 " uint stride = " << m_data.dim << ";\n"
311 " uint bitsPerQuad = " << bitsPerQuad(m_data) << ";\n";
312
313 // Compute the coordinate
314 if (m_data.isSampleInterlock())
315 {
316 // Spread samples out in the x dimension
317 fss << " coordxy.x = coordxy.x * " << m_data.samples << " + gl_SampleID;\n";
318 fss << " stride *= " << m_data.samples << ";\n";
319 }
320 else if (m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED)
321 {
322 // shading rate is 2x2. Divide xy by 2
323 fss << " coordxy /= 2;\n";
324 fss << " stride /= 2;\n";
325 }
326
327 if (m_data.isSampleInterlock())
328 {
329 // sample interlock runs per-sample, and stores one bit per sample
330 fss << " uint mask = 1 << primID;\n";
331 fss << " uint previousMask = (1 << primID)-1;\n";
332 }
333 else
334 {
335 // pixel and shading_rate interlock run per-fragment, and store the sample mask
336 fss << " uint mask = gl_SampleMaskIn[0] << (primID * bitsPerQuad);\n";
337 fss << " uint previousMask = (1 << (primID * bitsPerQuad))-1;\n";
338 }
339
340 // Exercise discard before and during the interlock
341 if (m_data.killOdd)
342 fss << " if (coordxy.y < " << m_data.dim / 4 << " && (coordxy.x & 1) != 0) discard;\n";
343
344 fss << " beginInvocationInterlockARB();\n";
345
346 if (m_data.killOdd)
347 fss << " if ((coordxy.x & 1) != 0) discard;\n";
348
349 // Read the current value from the image or buffer
350 if (m_data.resType == RES_IMAGE)
351 fss << " uint temp = imageLoad(image0, coordxy).x;\n";
352 else
353 {
354 fss << " uint coord = coordxy.y * stride + coordxy.x;\n";
355 fss << " uint temp = buf1.x[coord];\n";
356 }
357
358 // Update the value. For "ordered" modes, check that all the previous primitives'
359 // bits are already set
360 if (m_data.isOrdered())
361 fss << " if ((temp & previousMask) == previousMask) temp |= mask; else temp = 0;\n";
362 else
363 fss << " temp |= mask;\n";
364
365 // Store out the new value
366 if (m_data.resType == RES_IMAGE)
367 fss << " imageStore(image0, coordxy, uvec4(temp, 0, 0, 0));\n";
368 else
369 fss << " buf1.x[coord] = temp;\n";
370
371 fss << " endInvocationInterlockARB();\n";
372
373 if (m_data.killOdd)
374 fss << " discard;\n";
375
376 fss << "}\n";
377
378 programCollection.glslSources.add("frag") << glu::FragmentSource(fss.str());
379 }
380
createInstance(Context & context) const381 TestInstance* FSITestCase::createInstance (Context& context) const
382 {
383 return new FSITestInstance(context, m_data);
384 }
385
iterate(void)386 tcu::TestStatus FSITestInstance::iterate (void)
387 {
388 const DeviceInterface& vk = m_context.getDeviceInterface();
389 const VkDevice device = getDevice(m_context, m_data.interlock);
390 Allocator& allocator = m_context.getDefaultAllocator();
391 VkFlags allShaderStages = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
392 VkFlags allPipelineStages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
393
394 VkPhysicalDeviceProperties2 properties;
395 deMemset(&properties, 0, sizeof(properties));
396 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
397
398 m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
399
400 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
401
402 Move<vk::VkDescriptorSetLayout> descriptorSetLayout;
403 Move<vk::VkDescriptorPool> descriptorPool;
404 Move<vk::VkDescriptorSet> descriptorSet;
405
406 VkDescriptorPoolCreateFlags poolCreateFlags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
407 VkDescriptorSetLayoutCreateFlags layoutCreateFlags = 0;
408
409 const VkDescriptorSetLayoutBinding bindings[2] =
410 {
411 {
412 0u, // binding
413 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, // descriptorType
414 1u, // descriptorCount
415 allShaderStages, // stageFlags
416 DE_NULL, // pImmutableSamplers
417 },
418 {
419 1u, // binding
420 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptorType
421 1u, // descriptorCount
422 allShaderStages, // stageFlags
423 DE_NULL, // pImmutableSamplers
424 },
425 };
426
427 // Create a layout and allocate a descriptor set for it.
428 const VkDescriptorSetLayoutCreateInfo setLayoutCreateInfo =
429 {
430 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // sType
431 DE_NULL, // pNext
432 layoutCreateFlags, // flags
433 2u, // bindingCount
434 &bindings[0] // pBindings
435 };
436
437 descriptorSetLayout = vk::createDescriptorSetLayout(vk, device, &setLayoutCreateInfo);
438
439 vk::DescriptorPoolBuilder poolBuilder;
440 poolBuilder.addType(bindings[0].descriptorType, 1);
441 poolBuilder.addType(bindings[1].descriptorType, 1);
442
443 descriptorPool = poolBuilder.build(vk, device, poolCreateFlags, 1u);
444 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
445
446 // one uint per sample (max of 4 samples)
447 VkDeviceSize bufferSize = m_data.dim*m_data.dim*sizeof(deUint32)*4;
448
449 de::MovePtr<BufferWithMemory> buffer;
450 buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
451 vk, device, allocator, makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::Any));
452
453 flushAlloc(vk, device, buffer->getAllocation());
454
455 const VkQueue queue = getDeviceQueue(vk, device, m_context.getUniversalQueueFamilyIndex(), 0);
456 Move<VkCommandPool> cmdPool = createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex());
457 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
458
459 beginCommandBuffer(vk, *cmdBuffer, 0u);
460
461 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
462 {
463 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
464 DE_NULL, // pNext
465 (VkPipelineLayoutCreateFlags)0,
466 1, // setLayoutCount
467 &descriptorSetLayout.get(), // pSetLayouts
468 0u, // pushConstantRangeCount
469 DE_NULL, // pPushConstantRanges
470 };
471
472 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
473
474 de::MovePtr<BufferWithMemory> copyBuffer;
475 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
476 vk, device, allocator, makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached));
477
478 const VkImageCreateInfo imageCreateInfo =
479 {
480 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
481 DE_NULL, // const void* pNext;
482 (VkImageCreateFlags)0u, // VkImageCreateFlags flags;
483 VK_IMAGE_TYPE_2D, // VkImageType imageType;
484 VK_FORMAT_R32_UINT, // VkFormat format;
485 {
486 m_data.dim * m_data.samples, // deUint32 width;
487 m_data.dim, // deUint32 height;
488 1u // deUint32 depth;
489 }, // VkExtent3D extent;
490 1u, // deUint32 mipLevels;
491 1u, // deUint32 arrayLayers;
492 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
493 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
494 VK_IMAGE_USAGE_STORAGE_BIT
495 | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
496 | VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
497 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
498 0u, // deUint32 queueFamilyIndexCount;
499 DE_NULL, // const deUint32* pQueueFamilyIndices;
500 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
501 };
502
503 VkImageViewCreateInfo imageViewCreateInfo =
504 {
505 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
506 DE_NULL, // const void* pNext;
507 (VkImageViewCreateFlags)0u, // VkImageViewCreateFlags flags;
508 DE_NULL, // VkImage image;
509 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
510 VK_FORMAT_R32_UINT, // VkFormat format;
511 {
512 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
513 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
514 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
515 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
516 }, // VkComponentMapping components;
517 {
518 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
519 0u, // deUint32 baseMipLevel;
520 1u, // deUint32 levelCount;
521 0u, // deUint32 baseArrayLayer;
522 1u // deUint32 layerCount;
523 } // VkImageSubresourceRange subresourceRange;
524 };
525
526 de::MovePtr<ImageWithMemory> image;
527 Move<VkImageView> imageView;
528
529 image = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
530 vk, device, allocator, imageCreateInfo, MemoryRequirement::Any));
531 imageViewCreateInfo.image = **image;
532 imageView = createImageView(vk, device, &imageViewCreateInfo, NULL);
533
534 VkDescriptorImageInfo imageInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
535 VkDescriptorBufferInfo bufferInfo = makeDescriptorBufferInfo(**buffer, 0, bufferSize);
536
537 VkWriteDescriptorSet w =
538 {
539 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
540 DE_NULL, // pNext
541 *descriptorSet, // dstSet
542 (deUint32)0, // dstBinding
543 0, // dstArrayElement
544 1u, // descriptorCount
545 bindings[0].descriptorType, // descriptorType
546 &imageInfo, // pImageInfo
547 &bufferInfo, // pBufferInfo
548 DE_NULL, // pTexelBufferView
549 };
550 vk.updateDescriptorSets(device, 1, &w, 0, NULL);
551
552 w.dstBinding = 1;
553 w.descriptorType = bindings[1].descriptorType;
554 vk.updateDescriptorSets(device, 1, &w, 0, NULL);
555
556 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
557
558 VkBool32 shadingRateEnable = m_data.interlock == INT_SHADING_RATE_ORDERED ||
559 m_data.interlock == INT_SHADING_RATE_UNORDERED ? VK_TRUE : VK_FALSE;
560
561 Move<VkPipeline> pipeline;
562 Move<VkRenderPass> renderPass;
563 Move<VkFramebuffer> framebuffer;
564
565 {
566 const vk::VkSubpassDescription subpassDesc =
567 {
568 (vk::VkSubpassDescriptionFlags)0,
569 vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
570 0u, // inputCount
571 DE_NULL, // pInputAttachments
572 0u, // colorCount
573 DE_NULL, // pColorAttachments
574 DE_NULL, // pResolveAttachments
575 DE_NULL, // depthStencilAttachment
576 0u, // preserveCount
577 DE_NULL, // pPreserveAttachments
578 };
579 const vk::VkRenderPassCreateInfo renderPassParams =
580 {
581 vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType
582 DE_NULL, // pNext
583 (vk::VkRenderPassCreateFlags)0,
584 0u, // attachmentCount
585 DE_NULL, // pAttachments
586 1u, // subpassCount
587 &subpassDesc, // pSubpasses
588 0u, // dependencyCount
589 DE_NULL, // pDependencies
590 };
591
592 renderPass = createRenderPass(vk, device, &renderPassParams);
593
594 const vk::VkFramebufferCreateInfo framebufferParams =
595 {
596 vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType
597 DE_NULL, // pNext
598 (vk::VkFramebufferCreateFlags)0,
599 *renderPass, // renderPass
600 0u, // attachmentCount
601 DE_NULL, // pAttachments
602 m_data.dim, // width
603 m_data.dim, // height
604 1u, // layers
605 };
606
607 framebuffer = createFramebuffer(vk, device, &framebufferParams);
608
609 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
610 {
611 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
612 DE_NULL, // const void* pNext;
613 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
614 0u, // deUint32 vertexBindingDescriptionCount;
615 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
616 0u, // deUint32 vertexAttributeDescriptionCount;
617 DE_NULL // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
618 };
619
620 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo =
621 {
622 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
623 DE_NULL, // const void* pNext;
624 (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
625 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology topology;
626 VK_FALSE // VkBool32 primitiveRestartEnable;
627 };
628
629 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo =
630 {
631 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
632 DE_NULL, // const void* pNext;
633 (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
634 VK_FALSE, // VkBool32 depthClampEnable;
635 VK_FALSE, // VkBool32 rasterizerDiscardEnable;
636 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
637 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
638 VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace;
639 VK_FALSE, // VkBool32 depthBiasEnable;
640 0.0f, // float depthBiasConstantFactor;
641 0.0f, // float depthBiasClamp;
642 0.0f, // float depthBiasSlopeFactor;
643 1.0f // float lineWidth;
644 };
645
646 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo =
647 {
648 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
649 DE_NULL, // const void* pNext
650 0u, // VkPipelineMultisampleStateCreateFlags flags
651 (VkSampleCountFlagBits)m_data.samples, // VkSampleCountFlagBits rasterizationSamples
652 m_data.sampleShading ? VK_TRUE : VK_FALSE, // VkBool32 sampleShadingEnable
653 1.0f, // float minSampleShading
654 DE_NULL, // const VkSampleMask* pSampleMask
655 VK_FALSE, // VkBool32 alphaToCoverageEnable
656 VK_FALSE // VkBool32 alphaToOneEnable
657 };
658
659 VkViewport viewport = makeViewport(m_data.dim, m_data.dim);
660 VkRect2D scissor = makeRect2D(m_data.dim, m_data.dim);
661
662 VkPipelineFragmentShadingRateStateCreateInfoKHR shadingRateStateCreateInfo =
663 {
664 VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR, // VkStructureType sType;
665 DE_NULL, // const void* pNext;
666 { 2, 2 }, // VkExtent2D fragmentSize;
667 { VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR, VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR }, // VkFragmentShadingRateCombinerOpKHR combinerOps[2];
668 };
669
670 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
671 {
672 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
673 DE_NULL, // const void* pNext
674 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
675 1u, // deUint32 viewportCount
676 &viewport, // const VkViewport* pViewports
677 1u, // deUint32 scissorCount
678 &scissor // const VkRect2D* pScissors
679 };
680
681 Move<VkShaderModule> fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("frag"), 0);
682 Move<VkShaderModule> vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
683 deUint32 numStages = 2u;
684
685 const VkPipelineShaderStageCreateInfo shaderCreateInfo[2] =
686 {
687 {
688 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
689 DE_NULL,
690 (VkPipelineShaderStageCreateFlags)0,
691 VK_SHADER_STAGE_VERTEX_BIT, // stage
692 *vs, // shader
693 "main",
694 DE_NULL, // pSpecializationInfo
695 },
696 {
697 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
698 DE_NULL,
699 (VkPipelineShaderStageCreateFlags)0,
700 VK_SHADER_STAGE_FRAGMENT_BIT, // stage
701 *fs, // shader
702 "main",
703 DE_NULL, // pSpecializationInfo
704 }
705 };
706
707 const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo =
708 {
709 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
710 shadingRateEnable ? &shadingRateStateCreateInfo : DE_NULL, // const void* pNext;
711 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
712 numStages, // deUint32 stageCount;
713 &shaderCreateInfo[0], // const VkPipelineShaderStageCreateInfo* pStages;
714 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
715 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
716 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
717 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState;
718 &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
719 &multisampleStateCreateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
720 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
721 DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
722 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
723 pipelineLayout.get(), // VkPipelineLayout layout;
724 renderPass.get(), // VkRenderPass renderPass;
725 0u, // deUint32 subpass;
726 DE_NULL, // VkPipeline basePipelineHandle;
727 0 // int basePipelineIndex;
728 };
729
730 pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
731 }
732
733 const VkImageMemoryBarrier imageBarrier =
734 {
735 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType
736 DE_NULL, // const void* pNext
737 0u, // VkAccessFlags srcAccessMask
738 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags dstAccessMask
739 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout oldLayout
740 VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout
741 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
742 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
743 **image, // VkImage image
744 {
745 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask
746 0u, // uint32_t baseMipLevel
747 1u, // uint32_t mipLevels,
748 0u, // uint32_t baseArray
749 1u, // uint32_t arraySize
750 }
751 };
752
753 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
754 (VkDependencyFlags)0,
755 0, (const VkMemoryBarrier*)DE_NULL,
756 0, (const VkBufferMemoryBarrier*)DE_NULL,
757 1, &imageBarrier);
758
759 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
760
761 VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
762 VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
763
764 VkMemoryBarrier memBarrier =
765 {
766 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
767 DE_NULL, // pNext
768 0u, // srcAccessMask
769 0u, // dstAccessMask
770 };
771
772 vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
773
774 vk.cmdFillBuffer(*cmdBuffer, **buffer, 0, bufferSize, 0);
775
776 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
777 memBarrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
778 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
779 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
780
781 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
782 makeRect2D(m_data.dim, m_data.dim),
783 0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
784
785 // Draw N fullscreen "quads", one per instance.
786 deUint32 N = 32 / bitsPerQuad(m_data);
787 deUint32 expectedValue = 0xFFFFFFFF;
788 vk.cmdDraw(*cmdBuffer, 4u, N, 0u, 0u);
789
790 endRenderPass(vk, *cmdBuffer);
791
792 memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
793 memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
794 vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
795 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
796
797 deUint32 copyDimX = m_data.dim;
798 deUint32 copyDimY = m_data.dim;
799
800 if (m_data.isSampleInterlock())
801 copyDimX *= m_data.samples;
802
803 if (shadingRateEnable)
804 {
805 copyDimX /= 2;
806 copyDimY /= 2;
807 }
808
809 if (m_data.resType == RES_IMAGE)
810 {
811 const VkBufferImageCopy copyRegion = makeBufferImageCopy(makeExtent3D(copyDimX, copyDimY, 1u),
812 makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u));
813 vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **copyBuffer, 1u, ©Region);
814 }
815 else
816 {
817 const VkBufferCopy copyRegion = makeBufferCopy(0u, 0u, copyDimX*copyDimY*sizeof(deUint32));
818 vk.cmdCopyBuffer(*cmdBuffer, **buffer, **copyBuffer, 1, ©Region);
819 }
820
821 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
822 memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
823 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
824 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
825
826 endCommandBuffer(vk, *cmdBuffer);
827
828 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
829
830 deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
831 invalidateAlloc(vk, device, copyBuffer->getAllocation());
832
833 qpTestResult res = QP_TEST_RESULT_PASS;
834
835 for (deUint32 i = 0; i < copyDimX*copyDimY; ++i)
836 {
837 if (m_data.killOdd && (i & 1))
838 {
839 if (ptr[i] != 0)
840 res = QP_TEST_RESULT_FAIL;
841 }
842 else if (ptr[i] != expectedValue)
843 res = QP_TEST_RESULT_FAIL;
844 }
845
846 return tcu::TestStatus(res, qpGetTestResultName(res));
847 }
848
849 } // anonymous
850
createBasicTests(tcu::TestContext & testCtx)851 tcu::TestCaseGroup* createBasicTests (tcu::TestContext& testCtx)
852 {
853 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "basic", "Test VK_EXT_fragment_shader_interlock"));
854
855 typedef struct
856 {
857 deUint32 count;
858 const char* name;
859 const char* description;
860 } TestGroupCase;
861
862 TestGroupCase dimCases[] =
863 {
864 { 8, "8x8", "8x8" },
865 { 16, "16x16", "16x16" },
866 { 32, "32x32", "32x32" },
867 { 64, "64x64", "64x64" },
868 { 128, "128x128", "128x128" },
869 { 256, "256x256", "256x256" },
870 { 512, "512x512", "512x512" },
871 { 1024, "1024x1024","1024x1024" },
872 };
873
874 TestGroupCase resCases[] =
875 {
876 { RES_IMAGE, "image", "image" },
877 { RES_SSBO, "ssbo", "ssbo" },
878 };
879
880 TestGroupCase killCases[] =
881 {
882 { 0, "nodiscard", "no discard" },
883 { 1, "discard", "discard odd pixels"},
884 };
885
886 TestGroupCase sampCases[] =
887 {
888 { 1, "1xaa", "1 sample" },
889 { 4, "4xaa", "4 sample" },
890 };
891
892 TestGroupCase ssCases[] =
893 {
894 { 0, "no_sample_shading", "no sample shading" },
895 { 1, "sample_shading", "sample shading" },
896 };
897
898 TestGroupCase intCases[] =
899 {
900 { INT_PIXEL_ORDERED, "pixel_ordered", "pixel_ordered" },
901 { INT_PIXEL_UNORDERED, "pixel_unordered", "pixel_unordered" },
902 { INT_SAMPLE_ORDERED, "sample_ordered", "sample_ordered" },
903 { INT_SAMPLE_UNORDERED, "sample_unordered", "sample_unordered" },
904 { INT_SHADING_RATE_ORDERED, "shading_rate_ordered", "shading_rate_ordered" },
905 { INT_SHADING_RATE_UNORDERED, "shading_rate_unordered", "shading_rate_unordered" },
906 };
907
908 for (int killNdx = 0; killNdx < DE_LENGTH_OF_ARRAY(killCases); killNdx++)
909 {
910 de::MovePtr<tcu::TestCaseGroup> killGroup(new tcu::TestCaseGroup(testCtx, killCases[killNdx].name, killCases[killNdx].description));
911 for (int resNdx = 0; resNdx < DE_LENGTH_OF_ARRAY(resCases); resNdx++)
912 {
913 de::MovePtr<tcu::TestCaseGroup> resGroup(new tcu::TestCaseGroup(testCtx, resCases[resNdx].name, resCases[resNdx].description));
914 for (int intNdx = 0; intNdx < DE_LENGTH_OF_ARRAY(intCases); intNdx++)
915 {
916 de::MovePtr<tcu::TestCaseGroup> intGroup(new tcu::TestCaseGroup(testCtx, intCases[intNdx].name, intCases[intNdx].description));
917 for (int sampNdx = 0; sampNdx < DE_LENGTH_OF_ARRAY(sampCases); sampNdx++)
918 {
919 de::MovePtr<tcu::TestCaseGroup> sampGroup(new tcu::TestCaseGroup(testCtx, sampCases[sampNdx].name, sampCases[sampNdx].description));
920 for (int ssNdx = 0; ssNdx < DE_LENGTH_OF_ARRAY(ssCases); ssNdx++)
921 {
922 de::MovePtr<tcu::TestCaseGroup> ssGroup(new tcu::TestCaseGroup(testCtx, ssCases[ssNdx].name, ssCases[ssNdx].description));
923 for (int dimNdx = 0; dimNdx < DE_LENGTH_OF_ARRAY(dimCases); dimNdx++)
924 {
925 CaseDef c =
926 {
927 dimCases[dimNdx].count, // deUint32 set;
928 (Resource)resCases[resNdx].count, // Resource resType;
929 (Interlock)intCases[intNdx].count, // Interlock interlock;
930 (VkSampleCountFlagBits)sampCases[sampNdx].count, // VkSampleCountFlagBits samples;
931 (bool)killCases[killNdx].count, // bool killOdd;
932 (bool)ssCases[ssNdx].count, // bool sampleShading;
933 };
934
935 if (c.sampleShading && c.samples == 1)
936 continue;
937
938 ssGroup->addChild(new FSITestCase(testCtx, dimCases[dimNdx].name, dimCases[dimNdx].description, c));
939 }
940 sampGroup->addChild(ssGroup.release());
941 }
942 intGroup->addChild(sampGroup.release());
943 }
944 resGroup->addChild(intGroup.release());
945 }
946 killGroup->addChild(resGroup.release());
947 }
948 group->addChild(killGroup.release());
949 }
950 return group.release();
951 }
952
cleanupDevice()953 void cleanupDevice()
954 {
955 g_singletonDevice.clear();
956 }
957
958 } // FragmentShaderInterlock
959 } // vkt
960