1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017-2019 The Khronos Group Inc.
6 * Copyright (c) 2018-2019 NVIDIA Corporation
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Tests for VK_EXT_fragment_shader_interlock.
23 * These tests render a set of overlapping full-screen quads that use image
24 * or buffer reads and writes to accumulate values into a result image/buffer.
25 * They use fragment shader interlock to avoid race conditions on the read/write
26 * and validate that the final result includes all the writes.
27 * Each fragment shader invocation computes a coordinate, and does a read/modify/write
28 * into the image or buffer, inside the interlock. The value in memory accumulates a bitmask
29 * indicating which primitives or samples have already run through the interlock. e.g.
30 * for single sample, PIXEL_UNORDERED mode, there is one bit in the bitmask for each primitive
31 * and each primitive ORs in its own bit. For PIXEL_ORDERED mode, each invocation also tests
32 * that all the previous primitives (less significant bits) are also set, else it clobbers the
33 * value. Sample and shading_rate interlock are variants of this where there is one value per
34 * sample or per coarse fragment location, respectively. When there are multiple samples per
35 * fragment, we merge in the whole sample mask. But within a pixel, we don't try to distinguish
36 * primitive order between samples on the internal diagonal of the quad (triangle strip).
37 *//*--------------------------------------------------------------------*/
38
39 #include "vktFragmentShaderInterlockBasic.hpp"
40
41 #include "vkBufferWithMemory.hpp"
42 #include "vkImageWithMemory.hpp"
43 #include "vkQueryUtil.hpp"
44 #include "vkDeviceUtil.hpp"
45 #include "vkBuilderUtil.hpp"
46 #include "vkCmdUtil.hpp"
47 #include "vkTypeUtil.hpp"
48 #include "vkObjUtil.hpp"
49
50 #include "vktTestGroupUtil.hpp"
51 #include "vktTestCase.hpp"
52 #include "vktCustomInstancesDevices.hpp"
53
54 #include "deDefs.h"
55 #include "deMath.h"
56 #include "deRandom.h"
57 #include "deSharedPtr.hpp"
58 #include "deString.h"
59
60 #include "tcuTestCase.hpp"
61 #include "tcuTestLog.hpp"
62 #include "tcuCommandLine.hpp"
63
64 #include <string>
65 #include <sstream>
66
67 namespace vkt
68 {
69 namespace FragmentShaderInterlock
70 {
71 namespace
72 {
73 using namespace vk;
74 using namespace std;
75
76 typedef enum
77 {
78 RES_SSBO = 0,
79 RES_IMAGE,
80 } Resource;
81
82 typedef enum
83 {
84 INT_PIXEL_ORDERED = 0,
85 INT_PIXEL_UNORDERED,
86 INT_SAMPLE_ORDERED,
87 INT_SAMPLE_UNORDERED,
88 INT_SHADING_RATE_ORDERED,
89 INT_SHADING_RATE_UNORDERED,
90 } Interlock;
91
92 struct CaseDef
93 {
94 deUint32 dim;
95 Resource resType;
96 Interlock interlock;
97 VkSampleCountFlagBits samples;
98 bool killOdd;
99 bool sampleShading;
100
isSampleInterlockvkt::FragmentShaderInterlock::__anon69f9caaf0111::CaseDef101 bool isSampleInterlock() const
102 {
103 return sampleShading || interlock == INT_SAMPLE_ORDERED || interlock == INT_SAMPLE_UNORDERED;
104 }
isOrderedvkt::FragmentShaderInterlock::__anon69f9caaf0111::CaseDef105 bool isOrdered() const
106 {
107 return interlock == INT_PIXEL_ORDERED || interlock == INT_SAMPLE_ORDERED || interlock == INT_SHADING_RATE_ORDERED;
108 }
109 };
110
111 class FSITestInstance : public TestInstance
112 {
113 public:
114 FSITestInstance (Context& context, const CaseDef& data);
115 ~FSITestInstance (void);
116 tcu::TestStatus iterate (void);
117
118 private:
119 CaseDef m_data;
120 };
121
FSITestInstance(Context & context,const CaseDef & data)122 FSITestInstance::FSITestInstance (Context& context, const CaseDef& data)
123 : vkt::TestInstance (context)
124 , m_data (data)
125 {
126 }
127
~FSITestInstance(void)128 FSITestInstance::~FSITestInstance (void)
129 {
130 }
131
132 class FSITestCase : public TestCase
133 {
134 public:
135 FSITestCase (tcu::TestContext& context, const char* name, const CaseDef data);
136 ~FSITestCase (void);
137 virtual void initPrograms (SourceCollections& programCollection) const;
138 virtual TestInstance* createInstance (Context& context) const;
139 virtual void checkSupport (Context& context) const;
140
141 private:
142 CaseDef m_data;
143 };
144
FSITestCase(tcu::TestContext & context,const char * name,const CaseDef data)145 FSITestCase::FSITestCase (tcu::TestContext& context, const char* name, const CaseDef data)
146 : vkt::TestCase (context, name)
147 , m_data (data)
148 {
149 }
150
~FSITestCase(void)151 FSITestCase::~FSITestCase (void)
152 {
153 }
154
checkSupport(Context & context) const155 void FSITestCase::checkSupport(Context& context) const
156 {
157 context.requireDeviceFunctionality("VK_EXT_fragment_shader_interlock");
158
159 if ((m_data.interlock == INT_SAMPLE_ORDERED || m_data.interlock == INT_SAMPLE_UNORDERED) &&
160 !context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderSampleInterlock)
161 {
162 TCU_THROW(NotSupportedError, "Fragment shader sample interlock not supported");
163 }
164
165 if ((m_data.interlock == INT_PIXEL_ORDERED || m_data.interlock == INT_PIXEL_UNORDERED) &&
166 !context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderPixelInterlock)
167 {
168 TCU_THROW(NotSupportedError, "Fragment shader pixel interlock not supported");
169 }
170
171 #ifndef CTS_USES_VULKANSC
172 if ((m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED) &&
173 !context.getFragmentShaderInterlockFeaturesEXT().fragmentShaderShadingRateInterlock)
174 {
175 TCU_THROW(NotSupportedError, "Fragment shader shading rate interlock not supported");
176 }
177 if ((m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED) &&
178 (!context.getFragmentShadingRateFeatures().pipelineFragmentShadingRate ||
179 !context.getFragmentShadingRateProperties().fragmentShadingRateWithFragmentShaderInterlock))
180 {
181 TCU_THROW(NotSupportedError, "fragment shading rate not supported");
182 }
183 #endif // CTS_USES_VULKANSC
184 }
185
bitsPerQuad(const CaseDef & c)186 static int bitsPerQuad(const CaseDef &c)
187 {
188 deUint32 bpq = c.samples;
189
190 if (c.isSampleInterlock())
191 bpq = 1;
192 else if (c.interlock == INT_SHADING_RATE_ORDERED || c.interlock == INT_SHADING_RATE_UNORDERED)
193 bpq *= 4;
194
195 return bpq;
196 }
197
initPrograms(SourceCollections & programCollection) const198 void FSITestCase::initPrograms (SourceCollections& programCollection) const
199 {
200 std::stringstream vss;
201
202 vss <<
203 "#version 450 core\n"
204 "layout(location = 0) out int primID;\n"
205 "void main()\n"
206 "{\n"
207 " primID = gl_InstanceIndex;\n"
208 // full-viewport quad
209 " gl_Position = vec4( 2.0*float(gl_VertexIndex&2) - 1.0, 4.0*(gl_VertexIndex&1)-1.0, 1.0 - 2.0 * float(gl_VertexIndex&1), 1);\n"
210 "}\n";
211
212 programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
213
214 std::stringstream fss;
215
216 fss <<
217 "#version 450 core\n"
218 "#extension GL_ARB_fragment_shader_interlock : enable\n"
219 "#extension GL_NV_shading_rate_image : enable\n"
220 "layout(r32ui, set = 0, binding = 0) coherent uniform uimage2D image0;\n"
221 "layout(std430, set = 0, binding = 1) coherent buffer B1 { uint x[]; } buf1;\n"
222 "layout(location = 0) flat in int primID;\n";
223
224 switch (m_data.interlock)
225 {
226 default: DE_ASSERT(0); // fallthrough
227 case INT_PIXEL_ORDERED: fss << "layout(pixel_interlock_ordered) in;\n"; break;
228 case INT_PIXEL_UNORDERED: fss << "layout(pixel_interlock_unordered) in;\n"; break;
229 case INT_SAMPLE_ORDERED: fss << "layout(sample_interlock_ordered) in;\n"; break;
230 case INT_SAMPLE_UNORDERED: fss << "layout(sample_interlock_unordered) in;\n"; break;
231 case INT_SHADING_RATE_ORDERED: fss << "layout(shading_rate_interlock_ordered) in;\n"; break;
232 case INT_SHADING_RATE_UNORDERED: fss << "layout(shading_rate_interlock_unordered) in;\n"; break;
233 }
234
235 // Each fragment shader invocation computes a coordinate, and does a read/modify/write
236 // into the image or buffer, inside the interlock. The value in memory accumulates a bitmask
237 // indicating which primitives or samples have already run through the interlock. e.g.
238 // for single sample, PIXEL_UNORDERED mode, there is one bit in the bitmask for each primitive
239 // and each primitive ORs in its own bit. For PIXEL_ORDERED mode, each invocation also tests
240 // that all the previous primitives (less significant bits) are also set, else it clobbers the
241 // value. Sample and shading_rate interlock are variants of this where there is one value per
242 // sample or per coarse fragment location, respectively. When there are multiple samples per
243 // fragment, we merge in the whole sample mask. But within a pixel, we don't try to distinguish
244 // primitive order between samples on the internal diagonal of the quad (triangle strip).
245
246 fss <<
247 "void main()\n"
248 "{\n"
249 " ivec2 coordxy = ivec2(gl_FragCoord.xy);\n"
250 " uint stride = " << m_data.dim << ";\n"
251 " uint bitsPerQuad = " << bitsPerQuad(m_data) << ";\n";
252
253 // Compute the coordinate
254 if (m_data.isSampleInterlock())
255 {
256 // Spread samples out in the x dimension
257 fss << " coordxy.x = coordxy.x * " << m_data.samples << " + gl_SampleID;\n";
258 fss << " stride *= " << m_data.samples << ";\n";
259 }
260 else if (m_data.interlock == INT_SHADING_RATE_ORDERED || m_data.interlock == INT_SHADING_RATE_UNORDERED)
261 {
262 // shading rate is 2x2. Divide xy by 2
263 fss << " coordxy /= 2;\n";
264 fss << " stride /= 2;\n";
265 }
266
267 if (m_data.isSampleInterlock())
268 {
269 // sample interlock runs per-sample, and stores one bit per sample
270 fss << " uint mask = 1 << primID;\n";
271 fss << " uint previousMask = (1 << primID)-1;\n";
272 }
273 else
274 {
275 // pixel and shading_rate interlock run per-fragment, and store the sample mask
276 fss << " uint mask = gl_SampleMaskIn[0] << (primID * bitsPerQuad);\n";
277 fss << " uint previousMask = (1 << (primID * bitsPerQuad))-1;\n";
278 }
279
280 // Exercise discard before and during the interlock
281 if (m_data.killOdd)
282 fss << " if (coordxy.y < " << m_data.dim / 4 << " && (coordxy.x & 1) != 0) discard;\n";
283
284 fss << " beginInvocationInterlockARB();\n";
285
286 if (m_data.killOdd)
287 fss << " if ((coordxy.x & 1) != 0) discard;\n";
288
289 // Read the current value from the image or buffer
290 if (m_data.resType == RES_IMAGE)
291 fss << " uint temp = imageLoad(image0, coordxy).x;\n";
292 else
293 {
294 fss << " uint coord = coordxy.y * stride + coordxy.x;\n";
295 fss << " uint temp = buf1.x[coord];\n";
296 }
297
298 // Update the value. For "ordered" modes, check that all the previous primitives'
299 // bits are already set
300 if (m_data.isOrdered())
301 fss << " if ((temp & previousMask) == previousMask) temp |= mask; else temp = 0;\n";
302 else
303 fss << " temp |= mask;\n";
304
305 // Store out the new value
306 if (m_data.resType == RES_IMAGE)
307 fss << " imageStore(image0, coordxy, uvec4(temp, 0, 0, 0));\n";
308 else
309 fss << " buf1.x[coord] = temp;\n";
310
311 fss << " endInvocationInterlockARB();\n";
312
313 if (m_data.killOdd)
314 fss << " discard;\n";
315
316 fss << "}\n";
317
318 programCollection.glslSources.add("frag") << glu::FragmentSource(fss.str());
319 }
320
createInstance(Context & context) const321 TestInstance* FSITestCase::createInstance (Context& context) const
322 {
323 return new FSITestInstance(context, m_data);
324 }
325
iterate(void)326 tcu::TestStatus FSITestInstance::iterate (void)
327 {
328 const DeviceInterface& vk = m_context.getDeviceInterface();
329 const VkDevice device = m_context.getDevice();
330 Allocator& allocator = m_context.getDefaultAllocator();
331 VkFlags allShaderStages = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
332 VkFlags allPipelineStages = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
333
334 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
335
336 Move<vk::VkDescriptorSetLayout> descriptorSetLayout;
337 Move<vk::VkDescriptorPool> descriptorPool;
338 Move<vk::VkDescriptorSet> descriptorSet;
339
340 VkDescriptorPoolCreateFlags poolCreateFlags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
341 VkDescriptorSetLayoutCreateFlags layoutCreateFlags = 0;
342
343 const VkDescriptorSetLayoutBinding bindings[2] =
344 {
345 {
346 0u, // binding
347 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, // descriptorType
348 1u, // descriptorCount
349 allShaderStages, // stageFlags
350 DE_NULL, // pImmutableSamplers
351 },
352 {
353 1u, // binding
354 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptorType
355 1u, // descriptorCount
356 allShaderStages, // stageFlags
357 DE_NULL, // pImmutableSamplers
358 },
359 };
360
361 // Create a layout and allocate a descriptor set for it.
362 const VkDescriptorSetLayoutCreateInfo setLayoutCreateInfo =
363 {
364 vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // sType
365 DE_NULL, // pNext
366 layoutCreateFlags, // flags
367 2u, // bindingCount
368 &bindings[0] // pBindings
369 };
370
371 descriptorSetLayout = vk::createDescriptorSetLayout(vk, device, &setLayoutCreateInfo);
372
373 vk::DescriptorPoolBuilder poolBuilder;
374 poolBuilder.addType(bindings[0].descriptorType, 1);
375 poolBuilder.addType(bindings[1].descriptorType, 1);
376
377 descriptorPool = poolBuilder.build(vk, device, poolCreateFlags, 1u);
378 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
379
380 // one uint per sample (max of 4 samples)
381 VkDeviceSize bufferSize = m_data.dim*m_data.dim*sizeof(deUint32)*4;
382
383 de::MovePtr<BufferWithMemory> buffer;
384 buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
385 vk, device, allocator, makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible));
386
387 flushAlloc(vk, device, buffer->getAllocation());
388
389 const VkQueue queue = getDeviceQueue(vk, device, m_context.getUniversalQueueFamilyIndex(), 0);
390 Move<VkCommandPool> cmdPool = createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex());
391 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
392
393 beginCommandBuffer(vk, *cmdBuffer, 0u);
394
395 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
396 {
397 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
398 DE_NULL, // pNext
399 (VkPipelineLayoutCreateFlags)0,
400 1, // setLayoutCount
401 &descriptorSetLayout.get(), // pSetLayouts
402 0u, // pushConstantRangeCount
403 DE_NULL, // pPushConstantRanges
404 };
405
406 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
407
408 de::MovePtr<BufferWithMemory> copyBuffer;
409 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
410 vk, device, allocator, makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached));
411
412 const VkImageCreateInfo imageCreateInfo =
413 {
414 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
415 DE_NULL, // const void* pNext;
416 (VkImageCreateFlags)0u, // VkImageCreateFlags flags;
417 VK_IMAGE_TYPE_2D, // VkImageType imageType;
418 VK_FORMAT_R32_UINT, // VkFormat format;
419 {
420 m_data.dim * m_data.samples, // deUint32 width;
421 m_data.dim, // deUint32 height;
422 1u // deUint32 depth;
423 }, // VkExtent3D extent;
424 1u, // deUint32 mipLevels;
425 1u, // deUint32 arrayLayers;
426 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
427 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
428 VK_IMAGE_USAGE_STORAGE_BIT
429 | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
430 | VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
431 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
432 0u, // deUint32 queueFamilyIndexCount;
433 DE_NULL, // const deUint32* pQueueFamilyIndices;
434 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
435 };
436
437 VkImageViewCreateInfo imageViewCreateInfo =
438 {
439 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
440 DE_NULL, // const void* pNext;
441 (VkImageViewCreateFlags)0u, // VkImageViewCreateFlags flags;
442 DE_NULL, // VkImage image;
443 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
444 VK_FORMAT_R32_UINT, // VkFormat format;
445 {
446 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
447 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
448 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
449 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
450 }, // VkComponentMapping components;
451 {
452 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
453 0u, // deUint32 baseMipLevel;
454 1u, // deUint32 levelCount;
455 0u, // deUint32 baseArrayLayer;
456 1u // deUint32 layerCount;
457 } // VkImageSubresourceRange subresourceRange;
458 };
459
460 de::MovePtr<ImageWithMemory> image;
461 Move<VkImageView> imageView;
462
463 image = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
464 vk, device, allocator, imageCreateInfo, MemoryRequirement::Any));
465 imageViewCreateInfo.image = **image;
466 imageView = createImageView(vk, device, &imageViewCreateInfo, NULL);
467
468 VkDescriptorImageInfo imageInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
469 VkDescriptorBufferInfo bufferInfo = makeDescriptorBufferInfo(**buffer, 0, bufferSize);
470
471 VkWriteDescriptorSet w =
472 {
473 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // sType
474 DE_NULL, // pNext
475 *descriptorSet, // dstSet
476 (deUint32)0, // dstBinding
477 0, // dstArrayElement
478 1u, // descriptorCount
479 bindings[0].descriptorType, // descriptorType
480 &imageInfo, // pImageInfo
481 &bufferInfo, // pBufferInfo
482 DE_NULL, // pTexelBufferView
483 };
484 vk.updateDescriptorSets(device, 1, &w, 0, NULL);
485
486 w.dstBinding = 1;
487 w.descriptorType = bindings[1].descriptorType;
488 vk.updateDescriptorSets(device, 1, &w, 0, NULL);
489
490 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
491
492 VkBool32 shadingRateEnable = m_data.interlock == INT_SHADING_RATE_ORDERED ||
493 m_data.interlock == INT_SHADING_RATE_UNORDERED ? VK_TRUE : VK_FALSE;
494
495 Move<VkPipeline> pipeline;
496 Move<VkRenderPass> renderPass;
497 Move<VkFramebuffer> framebuffer;
498
499 {
500 const vk::VkSubpassDescription subpassDesc =
501 {
502 (vk::VkSubpassDescriptionFlags)0,
503 vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
504 0u, // inputCount
505 DE_NULL, // pInputAttachments
506 0u, // colorCount
507 DE_NULL, // pColorAttachments
508 DE_NULL, // pResolveAttachments
509 DE_NULL, // depthStencilAttachment
510 0u, // preserveCount
511 DE_NULL, // pPreserveAttachments
512 };
513 const vk::VkRenderPassCreateInfo renderPassParams =
514 {
515 vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType
516 DE_NULL, // pNext
517 (vk::VkRenderPassCreateFlags)0,
518 0u, // attachmentCount
519 DE_NULL, // pAttachments
520 1u, // subpassCount
521 &subpassDesc, // pSubpasses
522 0u, // dependencyCount
523 DE_NULL, // pDependencies
524 };
525
526 renderPass = createRenderPass(vk, device, &renderPassParams);
527
528 const vk::VkFramebufferCreateInfo framebufferParams =
529 {
530 vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType
531 DE_NULL, // pNext
532 (vk::VkFramebufferCreateFlags)0,
533 *renderPass, // renderPass
534 0u, // attachmentCount
535 DE_NULL, // pAttachments
536 m_data.dim, // width
537 m_data.dim, // height
538 1u, // layers
539 };
540
541 framebuffer = createFramebuffer(vk, device, &framebufferParams);
542
543 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
544 {
545 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
546 DE_NULL, // const void* pNext;
547 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
548 0u, // deUint32 vertexBindingDescriptionCount;
549 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
550 0u, // deUint32 vertexAttributeDescriptionCount;
551 DE_NULL // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
552 };
553
554 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo =
555 {
556 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
557 DE_NULL, // const void* pNext;
558 (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
559 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology topology;
560 VK_FALSE // VkBool32 primitiveRestartEnable;
561 };
562
563 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo =
564 {
565 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
566 DE_NULL, // const void* pNext;
567 (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
568 VK_FALSE, // VkBool32 depthClampEnable;
569 VK_FALSE, // VkBool32 rasterizerDiscardEnable;
570 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
571 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
572 VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace;
573 VK_FALSE, // VkBool32 depthBiasEnable;
574 0.0f, // float depthBiasConstantFactor;
575 0.0f, // float depthBiasClamp;
576 0.0f, // float depthBiasSlopeFactor;
577 1.0f // float lineWidth;
578 };
579
580 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo =
581 {
582 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
583 DE_NULL, // const void* pNext
584 0u, // VkPipelineMultisampleStateCreateFlags flags
585 (VkSampleCountFlagBits)m_data.samples, // VkSampleCountFlagBits rasterizationSamples
586 m_data.sampleShading ? VK_TRUE : VK_FALSE, // VkBool32 sampleShadingEnable
587 1.0f, // float minSampleShading
588 DE_NULL, // const VkSampleMask* pSampleMask
589 VK_FALSE, // VkBool32 alphaToCoverageEnable
590 VK_FALSE // VkBool32 alphaToOneEnable
591 };
592
593 VkViewport viewport = makeViewport(m_data.dim, m_data.dim);
594 VkRect2D scissor = makeRect2D(m_data.dim, m_data.dim);
595
596 VkPipelineFragmentShadingRateStateCreateInfoKHR shadingRateStateCreateInfo =
597 {
598 VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR, // VkStructureType sType;
599 DE_NULL, // const void* pNext;
600 { 2, 2 }, // VkExtent2D fragmentSize;
601 { VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR, VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR }, // VkFragmentShadingRateCombinerOpKHR combinerOps[2];
602 };
603
604 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
605 {
606 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
607 DE_NULL, // const void* pNext
608 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
609 1u, // deUint32 viewportCount
610 &viewport, // const VkViewport* pViewports
611 1u, // deUint32 scissorCount
612 &scissor // const VkRect2D* pScissors
613 };
614
615 Move<VkShaderModule> fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("frag"), 0);
616 Move<VkShaderModule> vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
617 deUint32 numStages = 2u;
618
619 const VkPipelineShaderStageCreateInfo shaderCreateInfo[2] =
620 {
621 {
622 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
623 DE_NULL,
624 (VkPipelineShaderStageCreateFlags)0,
625 VK_SHADER_STAGE_VERTEX_BIT, // stage
626 *vs, // shader
627 "main",
628 DE_NULL, // pSpecializationInfo
629 },
630 {
631 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
632 DE_NULL,
633 (VkPipelineShaderStageCreateFlags)0,
634 VK_SHADER_STAGE_FRAGMENT_BIT, // stage
635 *fs, // shader
636 "main",
637 DE_NULL, // pSpecializationInfo
638 }
639 };
640
641 const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo =
642 {
643 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
644 shadingRateEnable ? &shadingRateStateCreateInfo : DE_NULL, // const void* pNext;
645 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
646 numStages, // deUint32 stageCount;
647 &shaderCreateInfo[0], // const VkPipelineShaderStageCreateInfo* pStages;
648 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
649 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
650 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
651 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState;
652 &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
653 &multisampleStateCreateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
654 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
655 DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
656 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
657 pipelineLayout.get(), // VkPipelineLayout layout;
658 renderPass.get(), // VkRenderPass renderPass;
659 0u, // deUint32 subpass;
660 DE_NULL, // VkPipeline basePipelineHandle;
661 0 // int basePipelineIndex;
662 };
663
664 pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
665 }
666
667 const VkImageMemoryBarrier imageBarrier =
668 {
669 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType
670 DE_NULL, // const void* pNext
671 0u, // VkAccessFlags srcAccessMask
672 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags dstAccessMask
673 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout oldLayout
674 VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout
675 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
676 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
677 **image, // VkImage image
678 {
679 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask
680 0u, // uint32_t baseMipLevel
681 1u, // uint32_t mipLevels,
682 0u, // uint32_t baseArray
683 1u, // uint32_t arraySize
684 }
685 };
686
687 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
688 (VkDependencyFlags)0,
689 0, (const VkMemoryBarrier*)DE_NULL,
690 0, (const VkBufferMemoryBarrier*)DE_NULL,
691 1, &imageBarrier);
692
693 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
694
695 VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
696 VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
697
698 VkMemoryBarrier memBarrier =
699 {
700 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
701 DE_NULL, // pNext
702 0u, // srcAccessMask
703 0u, // dstAccessMask
704 };
705
706 vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
707
708 vk.cmdFillBuffer(*cmdBuffer, **buffer, 0, bufferSize, 0);
709
710 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
711 memBarrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
712 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
713 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
714
715 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
716 makeRect2D(m_data.dim, m_data.dim),
717 0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
718
719 // Draw N fullscreen "quads", one per instance.
720 deUint32 N = 32 / bitsPerQuad(m_data);
721 deUint32 expectedValue = 0xFFFFFFFF;
722 vk.cmdDraw(*cmdBuffer, 4u, N, 0u, 0u);
723
724 endRenderPass(vk, *cmdBuffer);
725
726 memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
727 memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
728 vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
729 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
730
731 deUint32 copyDimX = m_data.dim;
732 deUint32 copyDimY = m_data.dim;
733
734 if (m_data.isSampleInterlock())
735 copyDimX *= m_data.samples;
736
737 if (shadingRateEnable)
738 {
739 copyDimX /= 2;
740 copyDimY /= 2;
741 }
742
743 if (m_data.resType == RES_IMAGE)
744 {
745 const VkBufferImageCopy copyRegion = makeBufferImageCopy(makeExtent3D(copyDimX, copyDimY, 1u),
746 makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u));
747 vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **copyBuffer, 1u, ©Region);
748 }
749 else
750 {
751 const VkBufferCopy copyRegion = makeBufferCopy(0u, 0u, copyDimX*copyDimY*sizeof(deUint32));
752 vk.cmdCopyBuffer(*cmdBuffer, **buffer, **copyBuffer, 1, ©Region);
753 }
754
755 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
756 memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
757 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
758 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
759
760 endCommandBuffer(vk, *cmdBuffer);
761
762 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
763
764 deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
765 invalidateAlloc(vk, device, copyBuffer->getAllocation());
766
767 qpTestResult res = QP_TEST_RESULT_PASS;
768
769 for (deUint32 i = 0; i < copyDimX*copyDimY; ++i)
770 {
771 if (m_data.killOdd && (i & 1))
772 {
773 if (ptr[i] != 0)
774 res = QP_TEST_RESULT_FAIL;
775 }
776 else if (ptr[i] != expectedValue)
777 res = QP_TEST_RESULT_FAIL;
778 }
779
780 return tcu::TestStatus(res, qpGetTestResultName(res));
781 }
782
783 } // anonymous
784
createBasicTests(tcu::TestContext & testCtx)785 tcu::TestCaseGroup* createBasicTests (tcu::TestContext& testCtx)
786 {
787 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "basic"));
788
789 typedef struct
790 {
791 deUint32 count;
792 const char* name;
793 } TestGroupCase;
794
795 TestGroupCase dimCases[] =
796 {
797 { 8, "8x8"},
798 { 16, "16x16"},
799 { 32, "32x32"},
800 { 64, "64x64"},
801 { 128, "128x128"},
802 { 256, "256x256"},
803 { 512, "512x512"},
804 { 1024, "1024x1024"},
805 };
806
807 TestGroupCase resCases[] =
808 {
809 { RES_IMAGE, "image"},
810 { RES_SSBO, "ssbo"},
811 };
812
813 TestGroupCase killCases[] =
814 {
815 { 0, "nodiscard"},
816 { 1, "discard"},
817 };
818
819 TestGroupCase sampCases[] =
820 {
821 { 1, "1xaa"},
822 { 4, "4xaa"},
823 };
824
825 TestGroupCase ssCases[] =
826 {
827 { 0, "no_sample_shading"},
828 { 1, "sample_shading"},
829 };
830
831 TestGroupCase intCases[] =
832 {
833 { INT_PIXEL_ORDERED, "pixel_ordered"},
834 { INT_PIXEL_UNORDERED, "pixel_unordered"},
835 { INT_SAMPLE_ORDERED, "sample_ordered"},
836 { INT_SAMPLE_UNORDERED, "sample_unordered"},
837 #ifndef CTS_USES_VULKANSC
838 { INT_SHADING_RATE_ORDERED, "shading_rate_ordered"},
839 { INT_SHADING_RATE_UNORDERED, "shading_rate_unordered"},
840 #endif // CTS_USES_VULKANSC
841 };
842
843 for (int killNdx = 0; killNdx < DE_LENGTH_OF_ARRAY(killCases); killNdx++)
844 {
845 de::MovePtr<tcu::TestCaseGroup> killGroup(new tcu::TestCaseGroup(testCtx, killCases[killNdx].name));
846 for (int resNdx = 0; resNdx < DE_LENGTH_OF_ARRAY(resCases); resNdx++)
847 {
848 de::MovePtr<tcu::TestCaseGroup> resGroup(new tcu::TestCaseGroup(testCtx, resCases[resNdx].name));
849 for (int intNdx = 0; intNdx < DE_LENGTH_OF_ARRAY(intCases); intNdx++)
850 {
851 de::MovePtr<tcu::TestCaseGroup> intGroup(new tcu::TestCaseGroup(testCtx, intCases[intNdx].name));
852 for (int sampNdx = 0; sampNdx < DE_LENGTH_OF_ARRAY(sampCases); sampNdx++)
853 {
854 de::MovePtr<tcu::TestCaseGroup> sampGroup(new tcu::TestCaseGroup(testCtx, sampCases[sampNdx].name));
855 for (int ssNdx = 0; ssNdx < DE_LENGTH_OF_ARRAY(ssCases); ssNdx++)
856 {
857 de::MovePtr<tcu::TestCaseGroup> ssGroup(new tcu::TestCaseGroup(testCtx, ssCases[ssNdx].name));
858 for (int dimNdx = 0; dimNdx < DE_LENGTH_OF_ARRAY(dimCases); dimNdx++)
859 {
860 CaseDef c =
861 {
862 dimCases[dimNdx].count, // deUint32 set;
863 (Resource)resCases[resNdx].count, // Resource resType;
864 (Interlock)intCases[intNdx].count, // Interlock interlock;
865 (VkSampleCountFlagBits)sampCases[sampNdx].count, // VkSampleCountFlagBits samples;
866 (bool)killCases[killNdx].count, // bool killOdd;
867 (bool)ssCases[ssNdx].count, // bool sampleShading;
868 };
869
870 if (c.sampleShading && c.samples == 1)
871 continue;
872
873 ssGroup->addChild(new FSITestCase(testCtx, dimCases[dimNdx].name, c));
874 }
875 sampGroup->addChild(ssGroup.release());
876 }
877 intGroup->addChild(sampGroup.release());
878 }
879 resGroup->addChild(intGroup.release());
880 }
881 killGroup->addChild(resGroup.release());
882 }
883 group->addChild(killGroup.release());
884 }
885 return group.release();
886 }
887
888 } // FragmentShaderInterlock
889 } // vkt
890