• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Google Inc.
7  * Copyright (c) 2017 Codeplay Software Ltd.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests Utils
24  */ /*--------------------------------------------------------------------*/
25 
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vkRayTracingUtil.hpp"
28 #include "tcuFloat.hpp"
29 #include "deRandom.hpp"
30 #include "tcuCommandLine.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 
38 using namespace tcu;
39 using namespace std;
40 using namespace vk;
41 using namespace vkt;
42 
43 namespace
44 {
45 
46 enum class ComputeLike
47 {
48     COMPUTE = 0,
49     MESH
50 };
51 
getMaxWidth()52 uint32_t getMaxWidth()
53 {
54     return 1024u;
55 }
56 
getNextWidth(const uint32_t width)57 uint32_t getNextWidth(const uint32_t width)
58 {
59     if (width < 128)
60     {
61         // This ensures we test every value up to 128 (the max subgroup size).
62         return width + 1;
63     }
64     else
65     {
66         // And once we hit 128 we increment to only power of 2's to reduce testing time.
67         return width * 2;
68     }
69 }
70 
getFormatSizeInBytes(const VkFormat format)71 uint32_t getFormatSizeInBytes(const VkFormat format)
72 {
73     switch (format)
74     {
75     default:
76         DE_FATAL("Unhandled format!");
77         return 0;
78     case VK_FORMAT_R8_SINT:
79     case VK_FORMAT_R8_UINT:
80         return static_cast<uint32_t>(sizeof(int8_t));
81     case VK_FORMAT_R8G8_SINT:
82     case VK_FORMAT_R8G8_UINT:
83         return static_cast<uint32_t>(sizeof(int8_t) * 2);
84     case VK_FORMAT_R8G8B8_SINT:
85     case VK_FORMAT_R8G8B8_UINT:
86     case VK_FORMAT_R8G8B8A8_SINT:
87     case VK_FORMAT_R8G8B8A8_UINT:
88         return static_cast<uint32_t>(sizeof(int8_t) * 4);
89     case VK_FORMAT_R16_SINT:
90     case VK_FORMAT_R16_UINT:
91     case VK_FORMAT_R16_SFLOAT:
92         return static_cast<uint32_t>(sizeof(int16_t));
93     case VK_FORMAT_R16G16_SINT:
94     case VK_FORMAT_R16G16_UINT:
95     case VK_FORMAT_R16G16_SFLOAT:
96         return static_cast<uint32_t>(sizeof(int16_t) * 2);
97     case VK_FORMAT_R16G16B16_UINT:
98     case VK_FORMAT_R16G16B16_SINT:
99     case VK_FORMAT_R16G16B16_SFLOAT:
100     case VK_FORMAT_R16G16B16A16_SINT:
101     case VK_FORMAT_R16G16B16A16_UINT:
102     case VK_FORMAT_R16G16B16A16_SFLOAT:
103         return static_cast<uint32_t>(sizeof(int16_t) * 4);
104     case VK_FORMAT_R32_SINT:
105     case VK_FORMAT_R32_UINT:
106     case VK_FORMAT_R32_SFLOAT:
107         return static_cast<uint32_t>(sizeof(int32_t));
108     case VK_FORMAT_R32G32_SINT:
109     case VK_FORMAT_R32G32_UINT:
110     case VK_FORMAT_R32G32_SFLOAT:
111         return static_cast<uint32_t>(sizeof(int32_t) * 2);
112     case VK_FORMAT_R32G32B32_SINT:
113     case VK_FORMAT_R32G32B32_UINT:
114     case VK_FORMAT_R32G32B32_SFLOAT:
115     case VK_FORMAT_R32G32B32A32_SINT:
116     case VK_FORMAT_R32G32B32A32_UINT:
117     case VK_FORMAT_R32G32B32A32_SFLOAT:
118         return static_cast<uint32_t>(sizeof(int32_t) * 4);
119     case VK_FORMAT_R64_SINT:
120     case VK_FORMAT_R64_UINT:
121     case VK_FORMAT_R64_SFLOAT:
122         return static_cast<uint32_t>(sizeof(int64_t));
123     case VK_FORMAT_R64G64_SINT:
124     case VK_FORMAT_R64G64_UINT:
125     case VK_FORMAT_R64G64_SFLOAT:
126         return static_cast<uint32_t>(sizeof(int64_t) * 2);
127     case VK_FORMAT_R64G64B64_SINT:
128     case VK_FORMAT_R64G64B64_UINT:
129     case VK_FORMAT_R64G64B64_SFLOAT:
130     case VK_FORMAT_R64G64B64A64_SINT:
131     case VK_FORMAT_R64G64B64A64_UINT:
132     case VK_FORMAT_R64G64B64A64_SFLOAT:
133         return static_cast<uint32_t>(sizeof(int64_t) * 4);
134     // The below formats are used to represent bool and bvec* types. These
135     // types are passed to the shader as int and ivec* types, before the
136     // calculations are done as booleans. We need a distinct type here so
137     // that the shader generators can switch on it and generate the correct
138     // shader source for testing.
139     case VK_FORMAT_R8_USCALED:
140         return static_cast<uint32_t>(sizeof(int32_t));
141     case VK_FORMAT_R8G8_USCALED:
142         return static_cast<uint32_t>(sizeof(int32_t) * 2);
143     case VK_FORMAT_R8G8B8_USCALED:
144     case VK_FORMAT_R8G8B8A8_USCALED:
145         return static_cast<uint32_t>(sizeof(int32_t) * 4);
146     }
147 }
148 
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)149 uint32_t getElementSizeInBytes(const VkFormat format, const subgroups::SSBOData::InputDataLayoutType layout)
150 {
151     const uint32_t bytes = getFormatSizeInBytes(format);
152 
153     if (layout == subgroups::SSBOData::LayoutStd140)
154         return bytes < 16 ? 16 : bytes;
155     else
156         return bytes;
157 }
158 
makeRenderPass(Context & context,VkFormat format)159 Move<VkRenderPass> makeRenderPass(Context &context, VkFormat format)
160 {
161     const VkAttachmentReference colorReference    = {0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
162     const VkSubpassDescription subpassDescription = {
163         0u,                              //  VkSubpassDescriptionFlags flags;
164         VK_PIPELINE_BIND_POINT_GRAPHICS, //  VkPipelineBindPoint pipelineBindPoint;
165         0,                               //  uint32_t inputAttachmentCount;
166         nullptr,                         //  const VkAttachmentReference* pInputAttachments;
167         1,                               //  uint32_t colorAttachmentCount;
168         &colorReference,                 //  const VkAttachmentReference* pColorAttachments;
169         nullptr,                         //  const VkAttachmentReference* pResolveAttachments;
170         nullptr,                         //  const VkAttachmentReference* pDepthStencilAttachment;
171         0,                               //  uint32_t preserveAttachmentCount;
172         nullptr                          //  const uint32_t* pPreserveAttachments;
173     };
174     const VkSubpassDependency subpassDependencies[2] = {
175         {
176             VK_SUBPASS_EXTERNAL,                           //  uint32_t srcSubpass;
177             0u,                                            //  uint32_t dstSubpass;
178             VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,          //  VkPipelineStageFlags srcStageMask;
179             VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, //  VkPipelineStageFlags dstStageMask;
180             VK_ACCESS_MEMORY_READ_BIT,                     //  VkAccessFlags srcAccessMask;
181             VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, //  VkAccessFlags dstAccessMask;
182             VK_DEPENDENCY_BY_REGION_BIT //  VkDependencyFlags dependencyFlags;
183         },
184         {
185             0u,                                            //  uint32_t srcSubpass;
186             VK_SUBPASS_EXTERNAL,                           //  uint32_t dstSubpass;
187             VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, //  VkPipelineStageFlags srcStageMask;
188             VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,          //  VkPipelineStageFlags dstStageMask;
189             VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, //  VkAccessFlags srcAccessMask;
190             VK_ACCESS_MEMORY_READ_BIT,                                                  //  VkAccessFlags dstAccessMask;
191             VK_DEPENDENCY_BY_REGION_BIT //  VkDependencyFlags dependencyFlags;
192         },
193     };
194     const VkAttachmentDescription attachmentDescription = {
195         0u,                                  //  VkAttachmentDescriptionFlags flags;
196         format,                              //  VkFormat format;
197         VK_SAMPLE_COUNT_1_BIT,               //  VkSampleCountFlagBits samples;
198         VK_ATTACHMENT_LOAD_OP_CLEAR,         //  VkAttachmentLoadOp loadOp;
199         VK_ATTACHMENT_STORE_OP_STORE,        //  VkAttachmentStoreOp storeOp;
200         VK_ATTACHMENT_LOAD_OP_DONT_CARE,     //  VkAttachmentLoadOp stencilLoadOp;
201         VK_ATTACHMENT_STORE_OP_DONT_CARE,    //  VkAttachmentStoreOp stencilStoreOp;
202         VK_IMAGE_LAYOUT_UNDEFINED,           //  VkImageLayout initialLayout;
203         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL //  VkImageLayout finalLayout;
204     };
205     const VkRenderPassCreateInfo renderPassCreateInfo = {
206         VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, //  VkStructureType sType;
207         nullptr,                                   //  const void* pNext;
208         0u,                                        //  VkRenderPassCreateFlags flags;
209         1,                                         //  uint32_t attachmentCount;
210         &attachmentDescription,                    //  const VkAttachmentDescription* pAttachments;
211         1,                                         //  uint32_t subpassCount;
212         &subpassDescription,                       //  const VkSubpassDescription* pSubpasses;
213         2,                                         //  uint32_t dependencyCount;
214         subpassDependencies                        //  const VkSubpassDependency* pDependencies;
215     };
216 
217     return createRenderPass(context.getDeviceInterface(), context.getDevice(), &renderPassCreateInfo);
218 }
219 
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const uint32_t subpass,const uint32_t patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const uint32_t vertexShaderStageCreateFlags,const uint32_t tessellationControlShaderStageCreateFlags,const uint32_t tessellationEvalShaderStageCreateFlags,const uint32_t geometryShaderStageCreateFlags,const uint32_t fragmentShaderStageCreateFlags,const uint32_t requiredSubgroupSize[5])220 Move<VkPipeline> makeGraphicsPipeline(
221     const DeviceInterface &vk, const VkDevice device, const VkPipelineLayout pipelineLayout,
222     const VkShaderModule vertexShaderModule, const VkShaderModule tessellationControlShaderModule,
223     const VkShaderModule tessellationEvalShaderModule, const VkShaderModule geometryShaderModule,
224     const VkShaderModule fragmentShaderModule, const VkRenderPass renderPass, const std::vector<VkViewport> &viewports,
225     const std::vector<VkRect2D> &scissors, const VkPrimitiveTopology topology, const uint32_t subpass,
226     const uint32_t patchControlPoints, const VkPipelineVertexInputStateCreateInfo *vertexInputStateCreateInfo,
227     const VkPipelineRasterizationStateCreateInfo *rasterizationStateCreateInfo,
228     const VkPipelineMultisampleStateCreateInfo *multisampleStateCreateInfo,
229     const VkPipelineDepthStencilStateCreateInfo *depthStencilStateCreateInfo,
230     const VkPipelineColorBlendStateCreateInfo *colorBlendStateCreateInfo,
231     const VkPipelineDynamicStateCreateInfo *dynamicStateCreateInfo, const uint32_t vertexShaderStageCreateFlags,
232     const uint32_t tessellationControlShaderStageCreateFlags, const uint32_t tessellationEvalShaderStageCreateFlags,
233     const uint32_t geometryShaderStageCreateFlags, const uint32_t fragmentShaderStageCreateFlags,
234     const uint32_t requiredSubgroupSize[5])
235 {
236     const VkBool32 disableRasterization = (fragmentShaderModule == VK_NULL_HANDLE);
237     const bool hasTessellation =
238         (tessellationControlShaderModule != VK_NULL_HANDLE || tessellationEvalShaderModule != VK_NULL_HANDLE);
239 
240     VkPipelineShaderStageCreateInfo stageCreateInfo = {
241         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType                     sType
242         nullptr,                                             // const void*                         pNext
243         0u,                                                  // VkPipelineShaderStageCreateFlags    flags
244         VK_SHADER_STAGE_VERTEX_BIT,                          // VkShaderStageFlagBits               stage
245         VK_NULL_HANDLE,                                      // VkShaderModule                      module
246         "main",                                              // const char*                         pName
247         nullptr                                              // const VkSpecializationInfo*         pSpecializationInfo
248     };
249 
250     std::vector<VkPipelineShaderStageCreateInfo> pipelineShaderStageParams;
251 
252     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] = {
253         {
254             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
255             nullptr,
256             requiredSubgroupSize != nullptr ? requiredSubgroupSize[0] : 0u,
257         },
258         {
259             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
260             nullptr,
261             requiredSubgroupSize != nullptr ? requiredSubgroupSize[1] : 0u,
262         },
263         {
264             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
265             nullptr,
266             requiredSubgroupSize != nullptr ? requiredSubgroupSize[2] : 0u,
267         },
268         {
269             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
270             nullptr,
271             requiredSubgroupSize != nullptr ? requiredSubgroupSize[3] : 0u,
272         },
273         {
274             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
275             nullptr,
276             requiredSubgroupSize != nullptr ? requiredSubgroupSize[4] : 0u,
277         },
278     };
279 
280     {
281         stageCreateInfo.pNext  = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ?
282                                      &requiredSubgroupSizeCreateInfo[0] :
283                                      nullptr;
284         stageCreateInfo.flags  = vertexShaderStageCreateFlags;
285         stageCreateInfo.stage  = VK_SHADER_STAGE_VERTEX_BIT;
286         stageCreateInfo.module = vertexShaderModule;
287         pipelineShaderStageParams.push_back(stageCreateInfo);
288     }
289 
290     if (tessellationControlShaderModule != VK_NULL_HANDLE)
291     {
292         stageCreateInfo.pNext  = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ?
293                                      &requiredSubgroupSizeCreateInfo[1] :
294                                      nullptr;
295         stageCreateInfo.flags  = tessellationControlShaderStageCreateFlags;
296         stageCreateInfo.stage  = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
297         stageCreateInfo.module = tessellationControlShaderModule;
298         pipelineShaderStageParams.push_back(stageCreateInfo);
299     }
300 
301     if (tessellationEvalShaderModule != VK_NULL_HANDLE)
302     {
303         stageCreateInfo.pNext =
304             (requiredSubgroupSize != nullptr && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ?
305                 &requiredSubgroupSizeCreateInfo[2] :
306                 nullptr;
307         stageCreateInfo.flags  = tessellationEvalShaderStageCreateFlags;
308         stageCreateInfo.stage  = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
309         stageCreateInfo.module = tessellationEvalShaderModule;
310         pipelineShaderStageParams.push_back(stageCreateInfo);
311     }
312 
313     if (geometryShaderModule != VK_NULL_HANDLE)
314     {
315         stageCreateInfo.pNext  = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ?
316                                      &requiredSubgroupSizeCreateInfo[3] :
317                                      nullptr;
318         stageCreateInfo.flags  = geometryShaderStageCreateFlags;
319         stageCreateInfo.stage  = VK_SHADER_STAGE_GEOMETRY_BIT;
320         stageCreateInfo.module = geometryShaderModule;
321         pipelineShaderStageParams.push_back(stageCreateInfo);
322     }
323 
324     if (fragmentShaderModule != VK_NULL_HANDLE)
325     {
326         stageCreateInfo.pNext  = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ?
327                                      &requiredSubgroupSizeCreateInfo[4] :
328                                      nullptr;
329         stageCreateInfo.flags  = fragmentShaderStageCreateFlags;
330         stageCreateInfo.stage  = VK_SHADER_STAGE_FRAGMENT_BIT;
331         stageCreateInfo.module = fragmentShaderModule;
332         pipelineShaderStageParams.push_back(stageCreateInfo);
333     }
334 
335     const VkVertexInputBindingDescription vertexInputBindingDescription = {
336         0u,                          // uint32_t             binding
337         sizeof(tcu::Vec4),           // uint32_t             stride
338         VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate    inputRate
339     };
340 
341     const VkVertexInputAttributeDescription vertexInputAttributeDescription = {
342         0u,                            // uint32_t    location
343         0u,                            // uint32_t    binding
344         VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat    format
345         0u                             // uint32_t    offset
346     };
347 
348     const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfoDefault = {
349         VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType                             sType
350         nullptr,                                                   // const void*                                 pNext
351         (VkPipelineVertexInputStateCreateFlags)0,                  // VkPipelineVertexInputStateCreateFlags       flags
352         1u,                              // uint32_t                                    vertexBindingDescriptionCount
353         &vertexInputBindingDescription,  // const VkVertexInputBindingDescription*      pVertexBindingDescriptions
354         1u,                              // uint32_t                                    vertexAttributeDescriptionCount
355         &vertexInputAttributeDescription // const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions
356     };
357 
358     const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo = {
359         VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType                            sType
360         nullptr,                                                     // const void*                                pNext
361         0u,                                                          // VkPipelineInputAssemblyStateCreateFlags    flags
362         topology, // VkPrimitiveTopology                        topology
363         VK_FALSE  // VkBool32                                   primitiveRestartEnable
364     };
365 
366     const VkPipelineTessellationStateCreateInfo tessStateCreateInfo = {
367         VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, // VkStructureType                           sType
368         nullptr,                                                   // const void*                               pNext
369         0u,                                                        // VkPipelineTessellationStateCreateFlags    flags
370         patchControlPoints // uint32_t                                  patchControlPoints
371     };
372 
373     const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = {
374         VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType                             sType
375         nullptr,                                               // const void*                                 pNext
376         (VkPipelineViewportStateCreateFlags)0,                 // VkPipelineViewportStateCreateFlags          flags
377         viewports.empty() ? 1u :
378                             (uint32_t)viewports.size(),     // uint32_t                                    viewportCount
379         viewports.empty() ? nullptr : &viewports[0],        // const VkViewport*                           pViewports
380         viewports.empty() ? 1u : (uint32_t)scissors.size(), // uint32_t                                    scissorCount
381         scissors.empty() ? nullptr : &scissors[0]           // const VkRect2D*                             pScissors
382     };
383 
384     const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfoDefault = {
385         VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType                            sType
386         nullptr,                                                    // const void*                                pNext
387         0u,                                                         // VkPipelineRasterizationStateCreateFlags    flags
388         VK_FALSE,                        // VkBool32                                   depthClampEnable
389         disableRasterization,            // VkBool32                                   rasterizerDiscardEnable
390         VK_POLYGON_MODE_FILL,            // VkPolygonMode                              polygonMode
391         VK_CULL_MODE_NONE,               // VkCullModeFlags                            cullMode
392         VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace                                frontFace
393         VK_FALSE,                        // VkBool32                                   depthBiasEnable
394         0.0f,                            // float                                      depthBiasConstantFactor
395         0.0f,                            // float                                      depthBiasClamp
396         0.0f,                            // float                                      depthBiasSlopeFactor
397         1.0f                             // float                                      lineWidth
398     };
399 
400     const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfoDefault = {
401         VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType                          sType
402         nullptr,                                                  // const void*                              pNext
403         0u,                                                       // VkPipelineMultisampleStateCreateFlags    flags
404         VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits                    rasterizationSamples
405         VK_FALSE,              // VkBool32                                 sampleShadingEnable
406         1.0f,                  // float                                    minSampleShading
407         nullptr,               // const VkSampleMask*                      pSampleMask
408         VK_FALSE,              // VkBool32                                 alphaToCoverageEnable
409         VK_FALSE               // VkBool32                                 alphaToOneEnable
410     };
411 
412     const VkStencilOpState stencilOpState = {
413         VK_STENCIL_OP_KEEP,  // VkStencilOp    failOp
414         VK_STENCIL_OP_KEEP,  // VkStencilOp    passOp
415         VK_STENCIL_OP_KEEP,  // VkStencilOp    depthFailOp
416         VK_COMPARE_OP_NEVER, // VkCompareOp    compareOp
417         0,                   // uint32_t       compareMask
418         0,                   // uint32_t       writeMask
419         0                    // uint32_t       reference
420     };
421 
422     const VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfoDefault = {
423         VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType                          sType
424         nullptr,                                                    // const void*                              pNext
425         0u,                                                         // VkPipelineDepthStencilStateCreateFlags   flags
426         VK_FALSE,                    // VkBool32                                 depthTestEnable
427         VK_FALSE,                    // VkBool32                                 depthWriteEnable
428         VK_COMPARE_OP_LESS_OR_EQUAL, // VkCompareOp                              depthCompareOp
429         VK_FALSE,                    // VkBool32                                 depthBoundsTestEnable
430         VK_FALSE,                    // VkBool32                                 stencilTestEnable
431         stencilOpState,              // VkStencilOpState                         front
432         stencilOpState,              // VkStencilOpState                         back
433         0.0f,                        // float                                    minDepthBounds
434         1.0f,                        // float                                    maxDepthBounds
435     };
436 
437     const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {
438         VK_FALSE,                // VkBool32                 blendEnable
439         VK_BLEND_FACTOR_ZERO,    // VkBlendFactor            srcColorBlendFactor
440         VK_BLEND_FACTOR_ZERO,    // VkBlendFactor            dstColorBlendFactor
441         VK_BLEND_OP_ADD,         // VkBlendOp                colorBlendOp
442         VK_BLEND_FACTOR_ZERO,    // VkBlendFactor            srcAlphaBlendFactor
443         VK_BLEND_FACTOR_ZERO,    // VkBlendFactor            dstAlphaBlendFactor
444         VK_BLEND_OP_ADD,         // VkBlendOp                alphaBlendOp
445         VK_COLOR_COMPONENT_R_BIT // VkColorComponentFlags    colorWriteMask
446             | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT};
447 
448     const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfoDefault = {
449         VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType                               sType
450         nullptr,                                                  // const void*                                   pNext
451         0u,                                                       // VkPipelineColorBlendStateCreateFlags          flags
452         VK_FALSE,                   // VkBool32                                      logicOpEnable
453         VK_LOGIC_OP_CLEAR,          // VkLogicOp                                     logicOp
454         1u,                         // uint32_t                                      attachmentCount
455         &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState*    pAttachments
456         {0.0f, 0.0f, 0.0f, 0.0f}    // float                                         blendConstants[4]
457     };
458 
459     std::vector<VkDynamicState> dynamicStates;
460 
461     if (viewports.empty())
462         dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
463     if (scissors.empty())
464         dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
465 
466     const VkPipelineDynamicStateCreateInfo dynamicStateCreateInfoDefault = {
467         VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType                      sType
468         nullptr,                                              // const void*                          pNext
469         0u,                                                   // VkPipelineDynamicStateCreateFlags    flags
470         (uint32_t)dynamicStates.size(),                       // uint32_t                             dynamicStateCount
471         dynamicStates.empty() ? nullptr : &dynamicStates[0]   // const VkDynamicState*                pDynamicStates
472     };
473 
474     const VkPipelineDynamicStateCreateInfo *dynamicStateCreateInfoDefaultPtr =
475         dynamicStates.empty() ? nullptr : &dynamicStateCreateInfoDefault;
476 
477     const VkGraphicsPipelineCreateInfo pipelineCreateInfo = {
478         VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType                                  sType
479         nullptr,                                         // const void*                                      pNext
480         0u,                                              // VkPipelineCreateFlags                            flags
481         (uint32_t)pipelineShaderStageParams.size(),      // uint32_t                                         stageCount
482         &pipelineShaderStageParams[0],                   // const VkPipelineShaderStageCreateInfo*           pStages
483         vertexInputStateCreateInfo ?
484             vertexInputStateCreateInfo :
485             &vertexInputStateCreateInfoDefault, // const VkPipelineVertexInputStateCreateInfo*      pVertexInputState
486         &inputAssemblyStateCreateInfo,          // const VkPipelineInputAssemblyStateCreateInfo*    pInputAssemblyState
487         hasTessellation ? &tessStateCreateInfo :
488                           nullptr, // const VkPipelineTessellationStateCreateInfo*     pTessellationState
489         &viewportStateCreateInfo,  // const VkPipelineViewportStateCreateInfo*         pViewportState
490         rasterizationStateCreateInfo ?
491             rasterizationStateCreateInfo :
492             &rasterizationStateCreateInfoDefault, // const VkPipelineRasterizationStateCreateInfo*    pRasterizationState
493         multisampleStateCreateInfo ?
494             multisampleStateCreateInfo :
495             &multisampleStateCreateInfoDefault, // const VkPipelineMultisampleStateCreateInfo*      pMultisampleState
496         depthStencilStateCreateInfo ?
497             depthStencilStateCreateInfo :
498             &depthStencilStateCreateInfoDefault, // const VkPipelineDepthStencilStateCreateInfo*     pDepthStencilState
499         colorBlendStateCreateInfo ?
500             colorBlendStateCreateInfo :
501             &colorBlendStateCreateInfoDefault, // const VkPipelineColorBlendStateCreateInfo*       pColorBlendState
502         dynamicStateCreateInfo ?
503             dynamicStateCreateInfo :
504             dynamicStateCreateInfoDefaultPtr, // const VkPipelineDynamicStateCreateInfo*          pDynamicState
505         pipelineLayout,                       // VkPipelineLayout                                 layout
506         renderPass,                           // VkRenderPass                                     renderPass
507         subpass,                              // uint32_t                                         subpass
508         VK_NULL_HANDLE,                       // VkPipeline                                       basePipelineHandle
509         0                                     // int32_t                                          basePipelineIndex;
510     };
511 
512     return createGraphicsPipeline(vk, device, VK_NULL_HANDLE, &pipelineCreateInfo);
513 }
514 
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=nullptr,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=nullptr,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const uint32_t vertexShaderStageCreateFlags=0u,const uint32_t tessellationControlShaderStageCreateFlags=0u,const uint32_t tessellationEvalShaderStageCreateFlags=0u,const uint32_t geometryShaderStageCreateFlags=0u,const uint32_t fragmentShaderStageCreateFlags=0u,const uint32_t requiredSubgroupSize[5]=nullptr)515 Move<VkPipeline> makeGraphicsPipeline(
516     Context &context, const VkPipelineLayout pipelineLayout, const VkShaderStageFlags stages,
517     const VkShaderModule vertexShaderModule, const VkShaderModule fragmentShaderModule,
518     const VkShaderModule geometryShaderModule, const VkShaderModule tessellationControlModule,
519     const VkShaderModule tessellationEvaluationModule, const VkRenderPass renderPass,
520     const VkPrimitiveTopology topology                                        = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
521     const VkVertexInputBindingDescription *vertexInputBindingDescription      = nullptr,
522     const VkVertexInputAttributeDescription *vertexInputAttributeDescriptions = nullptr,
523     const bool frameBufferTests = false, const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
524     const uint32_t vertexShaderStageCreateFlags = 0u, const uint32_t tessellationControlShaderStageCreateFlags = 0u,
525     const uint32_t tessellationEvalShaderStageCreateFlags = 0u, const uint32_t geometryShaderStageCreateFlags = 0u,
526     const uint32_t fragmentShaderStageCreateFlags = 0u, const uint32_t requiredSubgroupSize[5] = nullptr)
527 {
528     const std::vector<VkViewport> noViewports;
529     const std::vector<VkRect2D> noScissors;
530     const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = {
531         VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
532         nullptr,                                                   // const void* pNext;
533         0u,                                                        // VkPipelineVertexInputStateCreateFlags flags;
534         vertexInputBindingDescription == nullptr ? 0u : 1u,        // uint32_t vertexBindingDescriptionCount;
535         vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
536         vertexInputAttributeDescriptions == nullptr ? 0u : 1u, // uint32_t vertexAttributeDescriptionCount;
537         vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
538     };
539     const uint32_t numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
540     const VkColorComponentFlags colorComponent =
541         numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
542         numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
543         numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
544                            VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT |
545                                VK_COLOR_COMPONENT_A_BIT;
546     const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {
547         VK_FALSE,             //  VkBool32 blendEnable;
548         VK_BLEND_FACTOR_ZERO, //  VkBlendFactor srcColorBlendFactor;
549         VK_BLEND_FACTOR_ZERO, //  VkBlendFactor dstColorBlendFactor;
550         VK_BLEND_OP_ADD,      //  VkBlendOp colorBlendOp;
551         VK_BLEND_FACTOR_ZERO, //  VkBlendFactor srcAlphaBlendFactor;
552         VK_BLEND_FACTOR_ZERO, //  VkBlendFactor dstAlphaBlendFactor;
553         VK_BLEND_OP_ADD,      //  VkBlendOp alphaBlendOp;
554         colorComponent        //  VkColorComponentFlags colorWriteMask;
555     };
556     const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo = {
557         VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, //  VkStructureType sType;
558         nullptr,                                                  //  const void* pNext;
559         0u,                                                       //  VkPipelineColorBlendStateCreateFlags flags;
560         VK_FALSE,                                                 //  VkBool32 logicOpEnable;
561         VK_LOGIC_OP_CLEAR,                                        //  VkLogicOp logicOp;
562         1,                                                        //  uint32_t attachmentCount;
563         &colorBlendAttachmentState, //  const VkPipelineColorBlendAttachmentState* pAttachments;
564         {0.0f, 0.0f, 0.0f, 0.0f}    //  float blendConstants[4];
565     };
566     const uint32_t patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
567 
568     return makeGraphicsPipeline(
569         context.getDeviceInterface(), // const DeviceInterface&                        vk
570         context.getDevice(),          // const VkDevice                                device
571         pipelineLayout,               // const VkPipelineLayout                        pipelineLayout
572         vertexShaderModule,           // const VkShaderModule                          vertexShaderModule
573         tessellationControlModule,    // const VkShaderModule                          tessellationControlShaderModule
574         tessellationEvaluationModule, // const VkShaderModule                          tessellationEvalShaderModule
575         geometryShaderModule,         // const VkShaderModule                          geometryShaderModule
576         fragmentShaderModule,         // const VkShaderModule                          fragmentShaderModule
577         renderPass,                   // const VkRenderPass                            renderPass
578         noViewports,                  // const std::vector<VkViewport>&                viewports
579         noScissors,                   // const std::vector<VkRect2D>&                  scissors
580         topology,                     // const VkPrimitiveTopology                     topology
581         0u,                           // const uint32_t                                subpass
582         patchControlPoints,           // const uint32_t                                patchControlPoints
583         &vertexInputStateCreateInfo,  // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
584         nullptr,                      // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
585         nullptr,                      // const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
586         nullptr,                      // const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
587         &colorBlendStateCreateInfo,   // const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
588         nullptr,                      // const VkPipelineDynamicStateCreateInfo*
589         vertexShaderStageCreateFlags, // const uint32_t                                 vertexShaderStageCreateFlags,
590         tessellationControlShaderStageCreateFlags, // const uint32_t                     tessellationControlShaderStageCreateFlags
591         tessellationEvalShaderStageCreateFlags, // const uint32_t                     tessellationEvalShaderStageCreateFlags
592         geometryShaderStageCreateFlags, // const uint32_t                                 geometryShaderStageCreateFlags
593         fragmentShaderStageCreateFlags, // const uint32_t                                 fragmentShaderStageCreateFlags
594         requiredSubgroupSize);          // const uint32_t                                 requiredSubgroupSize[5]
595 }
596 
makeCommandBuffer(Context & context,const VkCommandPool commandPool)597 Move<VkCommandBuffer> makeCommandBuffer(Context &context, const VkCommandPool commandPool)
598 {
599     const VkCommandBufferAllocateInfo bufferAllocateParams = {
600         VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
601         nullptr,                                        // const void* pNext;
602         commandPool,                                    // VkCommandPool commandPool;
603         VK_COMMAND_BUFFER_LEVEL_PRIMARY,                // VkCommandBufferLevel level;
604         1u,                                             // uint32_t bufferCount;
605     };
606     return allocateCommandBuffer(context.getDeviceInterface(), context.getDevice(), &bufferAllocateParams);
607 }
608 
609 struct Buffer;
610 struct Image;
611 
612 struct BufferOrImage
613 {
isImage__anon084a9e710111::BufferOrImage614     bool isImage() const
615     {
616         return m_isImage;
617     }
618 
getAsBuffer__anon084a9e710111::BufferOrImage619     Buffer *getAsBuffer()
620     {
621         if (m_isImage)
622             DE_FATAL("Trying to get a buffer as an image!");
623         return reinterpret_cast<Buffer *>(this);
624     }
625 
getAsImage__anon084a9e710111::BufferOrImage626     Image *getAsImage()
627     {
628         if (!m_isImage)
629             DE_FATAL("Trying to get an image as a buffer!");
630         return reinterpret_cast<Image *>(this);
631     }
632 
getType__anon084a9e710111::BufferOrImage633     virtual VkDescriptorType getType() const
634     {
635         if (m_isImage)
636         {
637             return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
638         }
639         else
640         {
641             return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
642         }
643     }
644 
getAllocation__anon084a9e710111::BufferOrImage645     Allocation &getAllocation() const
646     {
647         return *m_allocation;
648     }
649 
~BufferOrImage__anon084a9e710111::BufferOrImage650     virtual ~BufferOrImage()
651     {
652     }
653 
654 protected:
BufferOrImage__anon084a9e710111::BufferOrImage655     explicit BufferOrImage(bool image) : m_isImage(image)
656     {
657     }
658 
659     bool m_isImage;
660     de::details::MovePtr<Allocation> m_allocation;
661 };
662 
663 struct Buffer : public BufferOrImage
664 {
Buffer__anon084a9e710111::Buffer665     explicit Buffer(Context &context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage)
666         : BufferOrImage(false)
667         , m_sizeInBytes(sizeInBytes)
668         , m_usage(usage)
669     {
670         const DeviceInterface &vkd = context.getDeviceInterface();
671         const VkDevice device      = context.getDevice();
672 
673         const vk::VkBufferCreateInfo bufferCreateInfo = {
674             VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
675             nullptr,
676             0u,
677             m_sizeInBytes,
678             m_usage,
679             VK_SHARING_MODE_EXCLUSIVE,
680             0u,
681             nullptr,
682         };
683         m_buffer = createBuffer(vkd, device, &bufferCreateInfo);
684 
685         VkMemoryRequirements req = getBufferMemoryRequirements(vkd, device, *m_buffer);
686 
687         m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
688         VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
689     }
690 
getType__anon084a9e710111::Buffer691     virtual VkDescriptorType getType() const
692     {
693         if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
694         {
695             return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
696         }
697         return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
698     }
699 
getBuffer__anon084a9e710111::Buffer700     VkBuffer getBuffer() const
701     {
702         return *m_buffer;
703     }
704 
getBufferPtr__anon084a9e710111::Buffer705     const VkBuffer *getBufferPtr() const
706     {
707         return &(*m_buffer);
708     }
709 
getSize__anon084a9e710111::Buffer710     VkDeviceSize getSize() const
711     {
712         return m_sizeInBytes;
713     }
714 
715 private:
716     Move<VkBuffer> m_buffer;
717     VkDeviceSize m_sizeInBytes;
718     const VkBufferUsageFlags m_usage;
719 };
720 
721 struct Image : public BufferOrImage
722 {
Image__anon084a9e710111::Image723     explicit Image(Context &context, uint32_t width, uint32_t height, VkFormat format,
724                    VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
725         : BufferOrImage(true)
726     {
727         const DeviceInterface &vk       = context.getDeviceInterface();
728         const VkDevice device           = context.getDevice();
729         const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
730 
731         const VkImageCreateInfo imageCreateInfo = {
732             VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, //  VkStructureType sType;
733             nullptr,                             //  const void* pNext;
734             0,                                   //  VkImageCreateFlags flags;
735             VK_IMAGE_TYPE_2D,                    //  VkImageType imageType;
736             format,                              //  VkFormat format;
737             {width, height, 1},                  //  VkExtent3D extent;
738             1,                                   //  uint32_t mipLevels;
739             1,                                   //  uint32_t arrayLayers;
740             VK_SAMPLE_COUNT_1_BIT,               //  VkSampleCountFlagBits samples;
741             VK_IMAGE_TILING_OPTIMAL,             //  VkImageTiling tiling;
742             usage,                               //  VkImageUsageFlags usage;
743             VK_SHARING_MODE_EXCLUSIVE,           //  VkSharingMode sharingMode;
744             0u,                                  //  uint32_t queueFamilyIndexCount;
745             nullptr,                             //  const uint32_t* pQueueFamilyIndices;
746             VK_IMAGE_LAYOUT_UNDEFINED            //  VkImageLayout initialLayout;
747         };
748 
749         const VkComponentMapping componentMapping = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
750                                                      VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
751 
752         const VkImageSubresourceRange subresourceRange = {
753             VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags    aspectMask
754             0u,                        //uint32_t                baseMipLevel
755             1u,                        //uint32_t                levelCount
756             0u,                        //uint32_t                baseArrayLayer
757             1u                         //uint32_t                layerCount
758         };
759 
760         const VkSamplerCreateInfo samplerCreateInfo = {
761             VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,   //  VkStructureType sType;
762             nullptr,                                 //  const void* pNext;
763             0u,                                      //  VkSamplerCreateFlags flags;
764             VK_FILTER_NEAREST,                       //  VkFilter magFilter;
765             VK_FILTER_NEAREST,                       //  VkFilter minFilter;
766             VK_SAMPLER_MIPMAP_MODE_NEAREST,          //  VkSamplerMipmapMode mipmapMode;
767             VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,   //  VkSamplerAddressMode addressModeU;
768             VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,   //  VkSamplerAddressMode addressModeV;
769             VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,   //  VkSamplerAddressMode addressModeW;
770             0.0f,                                    //  float mipLodBias;
771             VK_FALSE,                                //  VkBool32 anisotropyEnable;
772             1.0f,                                    //  float maxAnisotropy;
773             false,                                   //  VkBool32 compareEnable;
774             VK_COMPARE_OP_ALWAYS,                    //  VkCompareOp compareOp;
775             0.0f,                                    //  float minLod;
776             0.0f,                                    //  float maxLod;
777             VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, //  VkBorderColor borderColor;
778             VK_FALSE,                                //  VkBool32 unnormalizedCoordinates;
779         };
780 
781         m_image = createImage(vk, device, &imageCreateInfo);
782 
783         VkMemoryRequirements req = getImageMemoryRequirements(vk, device, *m_image);
784 
785         req.size *= 2;
786         m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
787 
788         VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
789 
790         const VkImageViewCreateInfo imageViewCreateInfo = {
791             VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, //  VkStructureType sType;
792             nullptr,                                  //  const void* pNext;
793             0,                                        //  VkImageViewCreateFlags flags;
794             *m_image,                                 //  VkImage image;
795             VK_IMAGE_VIEW_TYPE_2D,                    //  VkImageViewType viewType;
796             imageCreateInfo.format,                   //  VkFormat format;
797             componentMapping,                         //  VkComponentMapping components;
798             subresourceRange                          //  VkImageSubresourceRange subresourceRange;
799         };
800 
801         m_imageView = createImageView(vk, device, &imageViewCreateInfo);
802         m_sampler   = createSampler(vk, device, &samplerCreateInfo);
803 
804         // Transition input image layouts
805         {
806             const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
807             const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
808 
809             beginCommandBuffer(vk, *cmdBuffer);
810 
811             const VkImageMemoryBarrier imageBarrier =
812                 makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
813                                        VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
814 
815             vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
816                                   (VkDependencyFlags)0, 0u, nullptr, 0u, nullptr, 1u, &imageBarrier);
817 
818             endCommandBuffer(vk, *cmdBuffer);
819             submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
820         }
821     }
822 
getImage__anon084a9e710111::Image823     VkImage getImage() const
824     {
825         return *m_image;
826     }
827 
getImageView__anon084a9e710111::Image828     VkImageView getImageView() const
829     {
830         return *m_imageView;
831     }
832 
getSampler__anon084a9e710111::Image833     VkSampler getSampler() const
834     {
835         return *m_sampler;
836     }
837 
838 private:
839     Move<VkImage> m_image;
840     Move<VkImageView> m_imageView;
841     Move<VkSampler> m_sampler;
842 };
843 } // namespace
844 
getStagesCount(const VkShaderStageFlags shaderStages)845 uint32_t vkt::subgroups::getStagesCount(const VkShaderStageFlags shaderStages)
846 {
847     const uint32_t stageCount = isAllGraphicsStages(shaderStages) ? 4 :
848                                 isAllComputeStages(shaderStages)  ? 1
849 #ifndef CTS_USES_VULKANSC
850                                 :
851                                 isAllRayTracingStages(shaderStages)  ? 6 :
852                                 isAllMeshShadingStages(shaderStages) ? 1
853 #endif // CTS_USES_VULKANSC
854                                                                        :
855                                                                        0;
856 
857     DE_ASSERT(stageCount != 0);
858 
859     return stageCount;
860 }
861 
getSharedMemoryBallotHelper()862 std::string vkt::subgroups::getSharedMemoryBallotHelper()
863 {
864     return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * "
865            "gl_WorkGroupSize.z];\n"
866            "uvec4 sharedMemoryBallot(bool vote)\n"
867            "{\n"
868            "  uint groupOffset = gl_SubgroupID;\n"
869            "  // One invocation in the group 0's the whole group's data\n"
870            "  if (subgroupElect())\n"
871            "  {\n"
872            "    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
873            "  }\n"
874            "  subgroupMemoryBarrierShared();\n"
875            "  if (vote)\n"
876            "  {\n"
877            "    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
878            "    const highp uint bitToSet = 1u << invocationId;\n"
879            "    switch (gl_SubgroupInvocationID / 32)\n"
880            "    {\n"
881            "    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
882            "    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
883            "    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
884            "    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
885            "    }\n"
886            "  }\n"
887            "  subgroupMemoryBarrierShared();\n"
888            "  return superSecretComputeShaderHelper[groupOffset];\n"
889            "}\n";
890 }
891 
getSharedMemoryBallotHelperARB()892 std::string vkt::subgroups::getSharedMemoryBallotHelperARB()
893 {
894     return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * "
895            "gl_WorkGroupSize.z];\n"
896            "uint64_t sharedMemoryBallot(bool vote)\n"
897            "{\n"
898            "  uint groupOffset = gl_SubgroupID;\n"
899            "  // One invocation in the group 0's the whole group's data\n"
900            "  if (subgroupElect())\n"
901            "  {\n"
902            "    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
903            "  }\n"
904            "  subgroupMemoryBarrierShared();\n"
905            "  if (vote)\n"
906            "  {\n"
907            "    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
908            "    const highp uint bitToSet = 1u << invocationId;\n"
909            "    switch (gl_SubgroupInvocationID / 32)\n"
910            "    {\n"
911            "    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
912            "    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
913            "    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
914            "    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
915            "    }\n"
916            "  }\n"
917            "  subgroupMemoryBarrierShared();\n"
918            "  return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
919            "}\n";
920 }
921 
getSubgroupSize(Context & context)922 uint32_t vkt::subgroups::getSubgroupSize(Context &context)
923 {
924     return context.getSubgroupProperties().subgroupSize;
925 }
926 
maxSupportedSubgroupSize()927 uint32_t vkt::subgroups::maxSupportedSubgroupSize()
928 {
929     return 128u;
930 }
931 
getShaderStageName(VkShaderStageFlags stage)932 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
933 {
934     switch (stage)
935     {
936     case VK_SHADER_STAGE_COMPUTE_BIT:
937         return "compute";
938     case VK_SHADER_STAGE_FRAGMENT_BIT:
939         return "fragment";
940     case VK_SHADER_STAGE_VERTEX_BIT:
941         return "vertex";
942     case VK_SHADER_STAGE_GEOMETRY_BIT:
943         return "geometry";
944     case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
945         return "tess_control";
946     case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
947         return "tess_eval";
948 #ifndef CTS_USES_VULKANSC
949     case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
950         return "rgen";
951     case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
952         return "ahit";
953     case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
954         return "chit";
955     case VK_SHADER_STAGE_MISS_BIT_KHR:
956         return "miss";
957     case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
958         return "sect";
959     case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
960         return "call";
961     case VK_SHADER_STAGE_MESH_BIT_EXT:
962         return "mesh";
963     case VK_SHADER_STAGE_TASK_BIT_EXT:
964         return "task";
965 #endif // CTS_USES_VULKANSC
966     default:
967         TCU_THROW(InternalError, "Unhandled stage");
968     }
969 }
970 
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)971 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
972 {
973     switch (bit)
974     {
975     case VK_SUBGROUP_FEATURE_BASIC_BIT:
976         return "VK_SUBGROUP_FEATURE_BASIC_BIT";
977     case VK_SUBGROUP_FEATURE_VOTE_BIT:
978         return "VK_SUBGROUP_FEATURE_VOTE_BIT";
979     case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
980         return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
981     case VK_SUBGROUP_FEATURE_BALLOT_BIT:
982         return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
983     case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
984         return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
985     case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
986         return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
987     case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
988         return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
989     case VK_SUBGROUP_FEATURE_QUAD_BIT:
990         return "VK_SUBGROUP_FEATURE_QUAD_BIT";
991     default:
992         TCU_THROW(InternalError, "Unknown subgroup feature category");
993     }
994 }
995 
addNoSubgroupShader(SourceCollections & programCollection)996 void vkt::subgroups::addNoSubgroupShader(SourceCollections &programCollection)
997 {
998     {
999         /*
1000             "#version 450\n"
1001             "void main (void)\n"
1002             "{\n"
1003             "  float pixelSize = 2.0f/1024.0f;\n"
1004             "   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1005             "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1006             "  gl_PointSize = 1.0f;\n"
1007             "}\n"
1008         */
1009         const std::string vertNoSubgroup = "; SPIR-V\n"
1010                                            "; Version: 1.3\n"
1011                                            "; Generator: Khronos Glslang Reference Front End; 1\n"
1012                                            "; Bound: 37\n"
1013                                            "; Schema: 0\n"
1014                                            "OpCapability Shader\n"
1015                                            "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1016                                            "OpMemoryModel Logical GLSL450\n"
1017                                            "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
1018                                            "OpMemberDecorate %20 0 BuiltIn Position\n"
1019                                            "OpMemberDecorate %20 1 BuiltIn PointSize\n"
1020                                            "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
1021                                            "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
1022                                            "OpDecorate %20 Block\n"
1023                                            "OpDecorate %26 BuiltIn VertexIndex\n"
1024                                            "%2 = OpTypeVoid\n"
1025                                            "%3 = OpTypeFunction %2\n"
1026                                            "%6 = OpTypeFloat 32\n"
1027                                            "%7 = OpTypePointer Function %6\n"
1028                                            "%9 = OpConstant %6 0.00195313\n"
1029                                            "%12 = OpConstant %6 2\n"
1030                                            "%14 = OpConstant %6 1\n"
1031                                            "%16 = OpTypeVector %6 4\n"
1032                                            "%17 = OpTypeInt 32 0\n"
1033                                            "%18 = OpConstant %17 1\n"
1034                                            "%19 = OpTypeArray %6 %18\n"
1035                                            "%20 = OpTypeStruct %16 %6 %19 %19\n"
1036                                            "%21 = OpTypePointer Output %20\n"
1037                                            "%22 = OpVariable %21 Output\n"
1038                                            "%23 = OpTypeInt 32 1\n"
1039                                            "%24 = OpConstant %23 0\n"
1040                                            "%25 = OpTypePointer Input %23\n"
1041                                            "%26 = OpVariable %25 Input\n"
1042                                            "%33 = OpConstant %6 0\n"
1043                                            "%35 = OpTypePointer Output %16\n"
1044                                            "%37 = OpConstant %23 1\n"
1045                                            "%38 = OpTypePointer Output %6\n"
1046                                            "%4 = OpFunction %2 None %3\n"
1047                                            "%5 = OpLabel\n"
1048                                            "%8 = OpVariable %7 Function\n"
1049                                            "%10 = OpVariable %7 Function\n"
1050                                            "OpStore %8 %9\n"
1051                                            "%11 = OpLoad %6 %8\n"
1052                                            "%13 = OpFDiv %6 %11 %12\n"
1053                                            "%15 = OpFSub %6 %13 %14\n"
1054                                            "OpStore %10 %15\n"
1055                                            "%27 = OpLoad %23 %26\n"
1056                                            "%28 = OpConvertSToF %6 %27\n"
1057                                            "%29 = OpLoad %6 %8\n"
1058                                            "%30 = OpFMul %6 %28 %29\n"
1059                                            "%31 = OpLoad %6 %10\n"
1060                                            "%32 = OpFAdd %6 %30 %31\n"
1061                                            "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1062                                            "%36 = OpAccessChain %35 %22 %24\n"
1063                                            "OpStore %36 %34\n"
1064                                            "%39 = OpAccessChain %38 %22 %37\n"
1065                                            "OpStore %39 %14\n"
1066                                            "OpReturn\n"
1067                                            "OpFunctionEnd\n";
1068         programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1069     }
1070 
1071     {
1072         /*
1073             "#version 450\n"
1074             "layout(vertices=1) out;\n"
1075             "\n"
1076             "void main (void)\n"
1077             "{\n"
1078             "  if (gl_InvocationID == 0)\n"
1079             "  {\n"
1080             "    gl_TessLevelOuter[0] = 1.0f;\n"
1081             "    gl_TessLevelOuter[1] = 1.0f;\n"
1082             "  }\n"
1083             "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1084             "}\n"
1085         */
1086         const std::string tescNoSubgroup = "; SPIR-V\n"
1087                                            "; Version: 1.3\n"
1088                                            "; Generator: Khronos Glslang Reference Front End; 1\n"
1089                                            "; Bound: 45\n"
1090                                            "; Schema: 0\n"
1091                                            "OpCapability Tessellation\n"
1092                                            "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1093                                            "OpMemoryModel Logical GLSL450\n"
1094                                            "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1095                                            "OpExecutionMode %4 OutputVertices 1\n"
1096                                            "OpDecorate %8 BuiltIn InvocationId\n"
1097                                            "OpDecorate %20 Patch\n"
1098                                            "OpDecorate %20 BuiltIn TessLevelOuter\n"
1099                                            "OpMemberDecorate %29 0 BuiltIn Position\n"
1100                                            "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1101                                            "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1102                                            "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1103                                            "OpDecorate %29 Block\n"
1104                                            "OpMemberDecorate %34 0 BuiltIn Position\n"
1105                                            "OpMemberDecorate %34 1 BuiltIn PointSize\n"
1106                                            "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1107                                            "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1108                                            "OpDecorate %34 Block\n"
1109                                            "%2 = OpTypeVoid\n"
1110                                            "%3 = OpTypeFunction %2\n"
1111                                            "%6 = OpTypeInt 32 1\n"
1112                                            "%7 = OpTypePointer Input %6\n"
1113                                            "%8 = OpVariable %7 Input\n"
1114                                            "%10 = OpConstant %6 0\n"
1115                                            "%11 = OpTypeBool\n"
1116                                            "%15 = OpTypeFloat 32\n"
1117                                            "%16 = OpTypeInt 32 0\n"
1118                                            "%17 = OpConstant %16 4\n"
1119                                            "%18 = OpTypeArray %15 %17\n"
1120                                            "%19 = OpTypePointer Output %18\n"
1121                                            "%20 = OpVariable %19 Output\n"
1122                                            "%21 = OpConstant %15 1\n"
1123                                            "%22 = OpTypePointer Output %15\n"
1124                                            "%24 = OpConstant %6 1\n"
1125                                            "%26 = OpTypeVector %15 4\n"
1126                                            "%27 = OpConstant %16 1\n"
1127                                            "%28 = OpTypeArray %15 %27\n"
1128                                            "%29 = OpTypeStruct %26 %15 %28 %28\n"
1129                                            "%30 = OpTypeArray %29 %27\n"
1130                                            "%31 = OpTypePointer Output %30\n"
1131                                            "%32 = OpVariable %31 Output\n"
1132                                            "%34 = OpTypeStruct %26 %15 %28 %28\n"
1133                                            "%35 = OpConstant %16 32\n"
1134                                            "%36 = OpTypeArray %34 %35\n"
1135                                            "%37 = OpTypePointer Input %36\n"
1136                                            "%38 = OpVariable %37 Input\n"
1137                                            "%40 = OpTypePointer Input %26\n"
1138                                            "%43 = OpTypePointer Output %26\n"
1139                                            "%4 = OpFunction %2 None %3\n"
1140                                            "%5 = OpLabel\n"
1141                                            "%9 = OpLoad %6 %8\n"
1142                                            "%12 = OpIEqual %11 %9 %10\n"
1143                                            "OpSelectionMerge %14 None\n"
1144                                            "OpBranchConditional %12 %13 %14\n"
1145                                            "%13 = OpLabel\n"
1146                                            "%23 = OpAccessChain %22 %20 %10\n"
1147                                            "OpStore %23 %21\n"
1148                                            "%25 = OpAccessChain %22 %20 %24\n"
1149                                            "OpStore %25 %21\n"
1150                                            "OpBranch %14\n"
1151                                            "%14 = OpLabel\n"
1152                                            "%33 = OpLoad %6 %8\n"
1153                                            "%39 = OpLoad %6 %8\n"
1154                                            "%41 = OpAccessChain %40 %38 %39 %10\n"
1155                                            "%42 = OpLoad %26 %41\n"
1156                                            "%44 = OpAccessChain %43 %32 %33 %10\n"
1157                                            "OpStore %44 %42\n"
1158                                            "OpReturn\n"
1159                                            "OpFunctionEnd\n";
1160         programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1161     }
1162 
1163     {
1164         /*
1165             "#version 450\n"
1166             "layout(isolines) in;\n"
1167             "\n"
1168             "void main (void)\n"
1169             "{\n"
1170             "  float pixelSize = 2.0f/1024.0f;\n"
1171             "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1172             "}\n";
1173         */
1174         const std::string teseNoSubgroup = "; SPIR-V\n"
1175                                            "; Version: 1.3\n"
1176                                            "; Generator: Khronos Glslang Reference Front End; 2\n"
1177                                            "; Bound: 42\n"
1178                                            "; Schema: 0\n"
1179                                            "OpCapability Tessellation\n"
1180                                            "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1181                                            "OpMemoryModel Logical GLSL450\n"
1182                                            "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1183                                            "OpExecutionMode %4 Isolines\n"
1184                                            "OpExecutionMode %4 SpacingEqual\n"
1185                                            "OpExecutionMode %4 VertexOrderCcw\n"
1186                                            "OpMemberDecorate %14 0 BuiltIn Position\n"
1187                                            "OpMemberDecorate %14 1 BuiltIn PointSize\n"
1188                                            "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1189                                            "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1190                                            "OpDecorate %14 Block\n"
1191                                            "OpMemberDecorate %19 0 BuiltIn Position\n"
1192                                            "OpMemberDecorate %19 1 BuiltIn PointSize\n"
1193                                            "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1194                                            "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1195                                            "OpDecorate %19 Block\n"
1196                                            "OpDecorate %29 BuiltIn TessCoord\n"
1197                                            "%2 = OpTypeVoid\n"
1198                                            "%3 = OpTypeFunction %2\n"
1199                                            "%6 = OpTypeFloat 32\n"
1200                                            "%7 = OpTypePointer Function %6\n"
1201                                            "%9 = OpConstant %6 0.00195313\n"
1202                                            "%10 = OpTypeVector %6 4\n"
1203                                            "%11 = OpTypeInt 32 0\n"
1204                                            "%12 = OpConstant %11 1\n"
1205                                            "%13 = OpTypeArray %6 %12\n"
1206                                            "%14 = OpTypeStruct %10 %6 %13 %13\n"
1207                                            "%15 = OpTypePointer Output %14\n"
1208                                            "%16 = OpVariable %15 Output\n"
1209                                            "%17 = OpTypeInt 32 1\n"
1210                                            "%18 = OpConstant %17 0\n"
1211                                            "%19 = OpTypeStruct %10 %6 %13 %13\n"
1212                                            "%20 = OpConstant %11 32\n"
1213                                            "%21 = OpTypeArray %19 %20\n"
1214                                            "%22 = OpTypePointer Input %21\n"
1215                                            "%23 = OpVariable %22 Input\n"
1216                                            "%24 = OpTypePointer Input %10\n"
1217                                            "%27 = OpTypeVector %6 3\n"
1218                                            "%28 = OpTypePointer Input %27\n"
1219                                            "%29 = OpVariable %28 Input\n"
1220                                            "%30 = OpConstant %11 0\n"
1221                                            "%31 = OpTypePointer Input %6\n"
1222                                            "%36 = OpConstant %6 2\n"
1223                                            "%40 = OpTypePointer Output %10\n"
1224                                            "%4 = OpFunction %2 None %3\n"
1225                                            "%5 = OpLabel\n"
1226                                            "%8 = OpVariable %7 Function\n"
1227                                            "OpStore %8 %9\n"
1228                                            "%25 = OpAccessChain %24 %23 %18 %18\n"
1229                                            "%26 = OpLoad %10 %25\n"
1230                                            "%32 = OpAccessChain %31 %29 %30\n"
1231                                            "%33 = OpLoad %6 %32\n"
1232                                            "%34 = OpLoad %6 %8\n"
1233                                            "%35 = OpFMul %6 %33 %34\n"
1234                                            "%37 = OpFDiv %6 %35 %36\n"
1235                                            "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1236                                            "%39 = OpFAdd %10 %26 %38\n"
1237                                            "%41 = OpAccessChain %40 %16 %18\n"
1238                                            "OpStore %41 %39\n"
1239                                            "OpReturn\n"
1240                                            "OpFunctionEnd\n";
1241         programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1242     }
1243 }
1244 
getFramebufferBufferDeclarations(const VkFormat & format,const std::vector<std::string> & declarations,const uint32_t stage)1245 static std::string getFramebufferBufferDeclarations(const VkFormat &format,
1246                                                     const std::vector<std::string> &declarations, const uint32_t stage)
1247 {
1248     if (declarations.empty())
1249     {
1250         const std::string name   = (stage == 0) ? "result" : "out_color";
1251         const std::string suffix = (stage == 2) ? "[]" : "";
1252         const std::string result = "layout(location = 0) out float " + name + suffix +
1253                                    ";\n"
1254                                    "layout(set = 0, binding = 0) uniform Buffer1\n"
1255                                    "{\n"
1256                                    "  " +
1257                                    de::toString(subgroups::getFormatNameForGLSL(format)) + " data[" +
1258                                    de::toString(subgroups::maxSupportedSubgroupSize()) +
1259                                    "];\n"
1260                                    "};\n";
1261 
1262         return result;
1263     }
1264     else
1265     {
1266         return declarations[stage];
1267     }
1268 }
1269 
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations)1270 void vkt::subgroups::initStdFrameBufferPrograms(SourceCollections &programCollection,
1271                                                 const vk::ShaderBuildOptions &buildOptions,
1272                                                 VkShaderStageFlags shaderStage, VkFormat format, bool gsPointSize,
1273                                                 const std::string &extHeader, const std::string &testSrc,
1274                                                 const std::string &helperStr,
1275                                                 const std::vector<std::string> &declarations)
1276 {
1277     subgroups::setFragmentShaderFrameBuffer(programCollection);
1278 
1279     if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1280         subgroups::setVertexShaderFrameBuffer(programCollection);
1281 
1282     if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1283     {
1284         std::ostringstream vertex;
1285 
1286         vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1287                << extHeader << "layout(location = 0) in highp vec4 in_position;\n"
1288                << getFramebufferBufferDeclarations(format, declarations, 0) << "\n"
1289                << helperStr << "void main (void)\n"
1290                << "{\n"
1291                << "  uint tempRes;\n"
1292                << testSrc << "  result = float(tempRes);\n"
1293                << "  gl_Position = in_position;\n"
1294                << "  gl_PointSize = 1.0f;\n"
1295                << "}\n";
1296 
1297         programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1298     }
1299     else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1300     {
1301         std::ostringstream geometry;
1302 
1303         geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1304                  << extHeader << "layout(points) in;\n"
1305                  << "layout(points, max_vertices = 1) out;\n"
1306                  << getFramebufferBufferDeclarations(format, declarations, 1) << "\n"
1307                  << helperStr << "void main (void)\n"
1308                  << "{\n"
1309                  << "  uint tempRes;\n"
1310                  << testSrc << "  out_color = float(tempRes);\n"
1311                  << "  gl_Position = gl_in[0].gl_Position;\n"
1312                  << (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") << "  EmitVertex();\n"
1313                  << "  EndPrimitive();\n"
1314                  << "}\n";
1315 
1316         programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1317     }
1318     else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1319     {
1320         std::ostringstream controlSource;
1321 
1322         controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1323                       << extHeader << "layout(vertices = 2) out;\n"
1324                       << getFramebufferBufferDeclarations(format, declarations, 2) << "\n"
1325                       << helperStr << "void main (void)\n"
1326                       << "{\n"
1327                       << "  if (gl_InvocationID == 0)\n"
1328                       << "  {\n"
1329                       << "    gl_TessLevelOuter[0] = 1.0f;\n"
1330                       << "    gl_TessLevelOuter[1] = 1.0f;\n"
1331                       << "  }\n"
1332                       << "  uint tempRes;\n"
1333                       << testSrc << "  out_color[gl_InvocationID] = float(tempRes);\n"
1334                       << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1335                       << (gsPointSize ?
1336                               "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" :
1337                               "")
1338                       << "}\n";
1339 
1340         programCollection.glslSources.add("tesc")
1341             << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1342         subgroups::setTesEvalShaderFrameBuffer(programCollection);
1343     }
1344     else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1345     {
1346         ostringstream evaluationSource;
1347 
1348         evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1349                          << extHeader << "layout(isolines, equal_spacing, ccw ) in;\n"
1350                          << getFramebufferBufferDeclarations(format, declarations, 3) << "\n"
1351                          << helperStr << "void main (void)\n"
1352                          << "{\n"
1353                          << "  uint tempRes;\n"
1354                          << testSrc << "  out_color = float(tempRes);\n"
1355                          << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1356                          << (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") << "}\n";
1357 
1358         subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1359         programCollection.glslSources.add("tese")
1360             << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1361     }
1362     else
1363     {
1364         DE_FATAL("Unsupported shader stage");
1365     }
1366 }
1367 
getBufferDeclarations(vk::VkShaderStageFlags shaderStage,const std::string & formatName,const std::vector<std::string> & declarations,const uint32_t stage)1368 static std::string getBufferDeclarations(vk::VkShaderStageFlags shaderStage, const std::string &formatName,
1369                                          const std::vector<std::string> &declarations, const uint32_t stage)
1370 {
1371     if (declarations.empty())
1372     {
1373         const uint32_t stageCount = vkt::subgroups::getStagesCount(shaderStage);
1374         const uint32_t binding0   = stage;
1375         const uint32_t binding1   = stageCount;
1376         const bool fragment       = (shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) && (stage == stageCount);
1377         const string buffer1      = fragment ? "layout(location = 0) out uint result;\n" :
1378                                                "layout(set = 0, binding = " + de::toString(binding0) +
1379                                               ", std430) buffer Buffer1\n"
1380                                                    "{\n"
1381                                                    "  uint result[];\n"
1382                                                    "};\n";
1383         //todo boza I suppose it can be "layout(set = 0, binding = " + de::toString(binding1) + ", std430) readonly buffer Buffer2\n"
1384         const string buffer2 = "layout(set = 0, binding = " + de::toString(binding1) + ", std430)" +
1385                                (stageCount == 1 ? "" : " readonly") + " buffer Buffer" + (fragment ? "1" : "2") +
1386                                "\n"
1387                                "{\n"
1388                                "  " +
1389                                formatName +
1390                                " data[];\n"
1391                                "};\n";
1392 
1393         return buffer1 + buffer2;
1394     }
1395     else
1396     {
1397         return declarations[stage];
1398     }
1399 }
1400 
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations,const bool avoidHelperInvocations,const std::string & tempRes)1401 void vkt::subgroups::initStdPrograms(vk::SourceCollections &programCollection,
1402                                      const vk::ShaderBuildOptions &buildOptions, vk::VkShaderStageFlags shaderStage,
1403                                      vk::VkFormat format, bool gsPointSize, const std::string &extHeader,
1404                                      const std::string &testSrc, const std::string &helperStr,
1405                                      const std::vector<std::string> &declarations, const bool avoidHelperInvocations,
1406                                      const std::string &tempRes)
1407 {
1408     const std::string formatName = subgroups::getFormatNameForGLSL(format);
1409 
1410     if (isAllComputeStages(shaderStage))
1411     {
1412         std::ostringstream src;
1413 
1414         src << "#version 450\n"
1415             << extHeader
1416             << "layout (local_size_x_id = 0, local_size_y_id = 1, "
1417                "local_size_z_id = 2) in;\n"
1418             << getBufferDeclarations(shaderStage, formatName, declarations, 0) << "\n"
1419             << helperStr << "void main (void)\n"
1420             << "{\n"
1421             << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1422             << "  highp uint offset = globalSize.x * ((globalSize.y * "
1423                "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1424                "gl_GlobalInvocationID.x;\n"
1425             << tempRes << testSrc << "  result[offset] = tempRes;\n"
1426             << "}\n";
1427 
1428         programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1429     }
1430 #ifndef CTS_USES_VULKANSC
1431     else if (isAllMeshShadingStages(shaderStage))
1432     {
1433         const bool testMesh = ((shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
1434         const bool testTask = ((shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
1435 
1436         if (testMesh)
1437         {
1438             std::ostringstream mesh;
1439 
1440             mesh << "#version 450\n"
1441                  << "#extension GL_EXT_mesh_shader : enable\n"
1442                  << extHeader << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1443                  << "layout (points) out;\n"
1444                  << "layout (max_vertices = 1, max_primitives = 1) out;\n"
1445                  << getBufferDeclarations(shaderStage, formatName, declarations, 0) << "\n"
1446                  << helperStr << "void main (void)\n"
1447                  << "{\n"
1448                  << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1449                  << "  highp uint offset = globalSize.x * ((globalSize.y * "
1450                     "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1451                     "gl_GlobalInvocationID.x;\n"
1452                  << tempRes << testSrc << "  result[offset] = tempRes;\n"
1453                  << "  SetMeshOutputsEXT(0u, 0u);\n"
1454                  << "}\n";
1455 
1456             programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1457         }
1458         else
1459         {
1460             const std::string meshShaderNoSubgroups =
1461                 "#version 450\n"
1462                 "#extension GL_EXT_mesh_shader : enable\n"
1463                 "\n"
1464                 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
1465                 "layout (points) out;\n"
1466                 "layout (max_vertices = 1, max_primitives = 1) out;\n"
1467                 "\n"
1468                 "void main (void)\n"
1469                 "{\n"
1470                 "  SetMeshOutputsEXT(0u, 0u);\n"
1471                 "}\n";
1472             programCollection.glslSources.add("mesh") << glu::MeshSource(meshShaderNoSubgroups) << buildOptions;
1473         }
1474 
1475         if (testTask)
1476         {
1477             const tcu::UVec3 emitSize = (testMesh ? tcu::UVec3(1u, 1u, 1u) : tcu::UVec3(0u, 0u, 0u));
1478             std::ostringstream task;
1479 
1480             task << "#version 450\n"
1481                  << "#extension GL_EXT_mesh_shader : enable\n"
1482                  //<< "#extension GL_NV_mesh_shader : enable\n"
1483                  << extHeader << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1484                  << getBufferDeclarations(shaderStage, formatName, declarations, 0) << "\n"
1485                  << helperStr << "void main (void)\n"
1486                  << "{\n"
1487                  << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1488                  //<< "  uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1489                  << "  highp uint offset = globalSize.x * ((globalSize.y * "
1490                     "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1491                     "gl_GlobalInvocationID.x;\n"
1492                  << tempRes << testSrc << "  result[offset] = tempRes;\n"
1493                  << "  EmitMeshTasksEXT(" << emitSize.x() << ", " << emitSize.y() << ", " << emitSize.z()
1494                  << ");\n"
1495                  //<< "  gl_TaskCountNV = " << emitSize.x() << ";\n"
1496                  << "}\n";
1497 
1498             programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1499         }
1500     }
1501 #endif // CTS_USES_VULKANSC
1502     else if (isAllGraphicsStages(shaderStage))
1503     {
1504         const string vertex =
1505             "#version 450\n" + extHeader + getBufferDeclarations(shaderStage, formatName, declarations, 0) + "\n" +
1506             helperStr +
1507             "void main (void)\n"
1508             "{\n"
1509             "  uint tempRes;\n" +
1510             testSrc +
1511             "  result[gl_VertexIndex] = tempRes;\n"
1512             "  float pixelSize = 2.0f/1024.0f;\n"
1513             "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1514             "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1515             "  gl_PointSize = 1.0f;\n"
1516             "}\n";
1517 
1518         const string tesc =
1519             "#version 450\n" + extHeader + "layout(vertices=1) out;\n" +
1520             getBufferDeclarations(shaderStage, formatName, declarations, 1) + "\n" + helperStr +
1521             "void main (void)\n"
1522             "{\n" +
1523             tempRes + testSrc +
1524             "  result[gl_PrimitiveID] = tempRes;\n"
1525             "  if (gl_InvocationID == 0)\n"
1526             "  {\n"
1527             "    gl_TessLevelOuter[0] = 1.0f;\n"
1528             "    gl_TessLevelOuter[1] = 1.0f;\n"
1529             "  }\n"
1530             "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n" +
1531             (gsPointSize ? "  gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1532             "}\n";
1533 
1534         const string tese = "#version 450\n" + extHeader + "layout(isolines) in;\n" +
1535                             getBufferDeclarations(shaderStage, formatName, declarations, 2) + "\n" + helperStr +
1536                             "void main (void)\n"
1537                             "{\n" +
1538                             tempRes + testSrc +
1539                             "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1540                             "  float pixelSize = 2.0f/1024.0f;\n"
1541                             "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n" +
1542                             (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") + "}\n";
1543 
1544         const string geometry = "#version 450\n" + extHeader +
1545                                 "layout(${TOPOLOGY}) in;\n"
1546                                 "layout(points, max_vertices = 1) out;\n" +
1547                                 getBufferDeclarations(shaderStage, formatName, declarations, 3) + "\n" + helperStr +
1548                                 "void main (void)\n"
1549                                 "{\n" +
1550                                 tempRes + testSrc +
1551                                 "  result[gl_PrimitiveIDIn] = tempRes;\n"
1552                                 "  gl_Position = gl_in[0].gl_Position;\n" +
1553                                 (gsPointSize ? "  gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1554                                 "  EmitVertex();\n"
1555                                 "  EndPrimitive();\n"
1556                                 "}\n";
1557 
1558         const string fragment =
1559             "#version 450\n" + extHeader + getBufferDeclarations(shaderStage, formatName, declarations, 4) + helperStr +
1560             "void main (void)\n"
1561             "{\n" +
1562             (avoidHelperInvocations ? "  if (gl_HelperInvocation) return;\n" : "") + tempRes + testSrc +
1563             "  result = tempRes;\n"
1564             "}\n";
1565 
1566         subgroups::addNoSubgroupShader(programCollection);
1567 
1568         programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1569         programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1570         programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1571         subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1572         programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment) << buildOptions;
1573     }
1574 #ifndef CTS_USES_VULKANSC
1575     else if (isAllRayTracingStages(shaderStage))
1576     {
1577         const std::string rgenShader =
1578             "#version 460 core\n"
1579             "#extension GL_EXT_ray_tracing: require\n" +
1580             extHeader +
1581             "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
1582             "layout(location = 0) callableDataEXT uvec4 callData;"
1583             "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n" +
1584             getBufferDeclarations(shaderStage, formatName, declarations, 0) + "\n" + helperStr +
1585             "void main()\n"
1586             "{\n" +
1587             tempRes + testSrc +
1588             "  uint  rayFlags   = 0;\n"
1589             "  uint  cullMask   = 0xFF;\n"
1590             "  float tmin       = 0.0;\n"
1591             "  float tmax       = 9.0;\n"
1592             "  vec3  origin     = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), "
1593             "(float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
1594             "  vec3  directHit  = vec3(0.0, 0.0, -1.0);\n"
1595             "  vec3  directMiss = vec3(0.0, 0.0, +1.0);\n"
1596             "\n"
1597             "  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
1598             "  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
1599             "  executeCallableEXT(0, 0);"
1600             "  result[gl_LaunchIDEXT.x] = tempRes;\n"
1601             "}\n";
1602         const std::string ahitShader = "#version 460 core\n"
1603                                        "#extension GL_EXT_ray_tracing: require\n" +
1604                                        extHeader +
1605                                        "hitAttributeEXT vec3 attribs;\n"
1606                                        "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n" +
1607                                        getBufferDeclarations(shaderStage, formatName, declarations, 1) + "\n" +
1608                                        helperStr +
1609                                        "void main()\n"
1610                                        "{\n" +
1611                                        tempRes + testSrc +
1612                                        "  result[gl_LaunchIDEXT.x] = tempRes;\n"
1613                                        "}\n";
1614         const std::string chitShader = "#version 460 core\n"
1615                                        "#extension GL_EXT_ray_tracing: require\n" +
1616                                        extHeader +
1617                                        "hitAttributeEXT vec3 attribs;\n"
1618                                        "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n" +
1619                                        getBufferDeclarations(shaderStage, formatName, declarations, 2) + "\n" +
1620                                        helperStr +
1621                                        "void main()\n"
1622                                        "{\n" +
1623                                        tempRes + testSrc +
1624                                        "  result[gl_LaunchIDEXT.x] = tempRes;\n"
1625                                        "}\n";
1626         const std::string missShader = "#version 460 core\n"
1627                                        "#extension GL_EXT_ray_tracing: require\n" +
1628                                        extHeader + "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n" +
1629                                        getBufferDeclarations(shaderStage, formatName, declarations, 3) + "\n" +
1630                                        helperStr +
1631                                        "void main()\n"
1632                                        "{\n" +
1633                                        tempRes + testSrc +
1634                                        "  result[gl_LaunchIDEXT.x] = tempRes;\n"
1635                                        "}\n";
1636         const std::string sectShader = "#version 460 core\n"
1637                                        "#extension GL_EXT_ray_tracing: require\n" +
1638                                        extHeader + "hitAttributeEXT vec3 hitAttribute;\n" +
1639                                        getBufferDeclarations(shaderStage, formatName, declarations, 4) + "\n" +
1640                                        helperStr +
1641                                        "void main()\n"
1642                                        "{\n" +
1643                                        tempRes + testSrc +
1644                                        "  reportIntersectionEXT(0.75f, 0x7Eu);\n"
1645                                        "  result[gl_LaunchIDEXT.x] = tempRes;\n"
1646                                        "}\n";
1647         const std::string callShader = "#version 460 core\n"
1648                                        "#extension GL_EXT_ray_tracing: require\n" +
1649                                        extHeader + "layout(location = 0) callableDataInEXT float callData;\n" +
1650                                        getBufferDeclarations(shaderStage, formatName, declarations, 5) + "\n" +
1651                                        helperStr +
1652                                        "void main()\n"
1653                                        "{\n" +
1654                                        tempRes + testSrc +
1655                                        "  result[gl_LaunchIDEXT.x] = tempRes;\n"
1656                                        "}\n";
1657 
1658         programCollection.glslSources.add("rgen") << glu::RaygenSource(rgenShader) << buildOptions;
1659         programCollection.glslSources.add("ahit") << glu::AnyHitSource(ahitShader) << buildOptions;
1660         programCollection.glslSources.add("chit") << glu::ClosestHitSource(chitShader) << buildOptions;
1661         programCollection.glslSources.add("miss") << glu::MissSource(missShader) << buildOptions;
1662         programCollection.glslSources.add("sect") << glu::IntersectionSource(sectShader) << buildOptions;
1663         programCollection.glslSources.add("call") << glu::CallableSource(callShader) << buildOptions;
1664 
1665         subgroups::addRayTracingNoSubgroupShader(programCollection);
1666     }
1667 #endif // CTS_USES_VULKANSC
1668     else
1669         TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1670 }
1671 
isSubgroupSupported(Context & context)1672 bool vkt::subgroups::isSubgroupSupported(Context &context)
1673 {
1674     return context.contextSupports(vk::ApiVersion(0, 1, 1, 0));
1675 }
1676 
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1677 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(Context &context, const VkShaderStageFlags stage)
1678 {
1679     return (stage & (context.getSubgroupProperties().supportedStages)) ? true : false;
1680 }
1681 
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1682 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(Context &context, VkSubgroupFeatureFlagBits bit)
1683 {
1684     return (bit & (context.getSubgroupProperties().supportedOperations)) ? true : false;
1685 }
1686 
areQuadOperationsSupportedForStages(Context & context,const VkShaderStageFlags stages)1687 bool vkt::subgroups::areQuadOperationsSupportedForStages(Context &context, const VkShaderStageFlags stages)
1688 {
1689     // Check general quad feature support first.
1690     if (!isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_QUAD_BIT))
1691         return false;
1692 
1693     if (context.getSubgroupProperties().quadOperationsInAllStages == VK_TRUE)
1694         return true; // No problem, any stage works.
1695 
1696     // Only frag and compute are supported.
1697     const VkShaderStageFlags fragCompute = (VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
1698     const VkShaderStageFlags otherStages = ~fragCompute;
1699     return ((stages & otherStages) == 0u);
1700 }
1701 
isFragmentSSBOSupportedForDevice(Context & context)1702 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context &context)
1703 {
1704     return context.getDeviceFeatures().fragmentStoresAndAtomics ? true : false;
1705 }
1706 
isVertexSSBOSupportedForDevice(Context & context)1707 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context &context)
1708 {
1709     return context.getDeviceFeatures().vertexPipelineStoresAndAtomics ? true : false;
1710 }
1711 
isInt64SupportedForDevice(Context & context)1712 bool vkt::subgroups::isInt64SupportedForDevice(Context &context)
1713 {
1714     return context.getDeviceFeatures().shaderInt64 ? true : false;
1715 }
1716 
isTessellationAndGeometryPointSizeSupported(Context & context)1717 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported(Context &context)
1718 {
1719     return context.getDeviceFeatures().shaderTessellationAndGeometryPointSize ? true : false;
1720 }
1721 
is16BitUBOStorageSupported(Context & context)1722 bool vkt::subgroups::is16BitUBOStorageSupported(Context &context)
1723 {
1724     return context.get16BitStorageFeatures().uniformAndStorageBuffer16BitAccess ? true : false;
1725 }
1726 
is8BitUBOStorageSupported(Context & context)1727 bool vkt::subgroups::is8BitUBOStorageSupported(Context &context)
1728 {
1729     return context.get8BitStorageFeatures().uniformAndStorageBuffer8BitAccess ? true : false;
1730 }
1731 
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1732 bool vkt::subgroups::isFormatSupportedForDevice(Context &context, vk::VkFormat format)
1733 {
1734     const VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures &subgroupExtendedTypesFeatures =
1735         context.getShaderSubgroupExtendedTypesFeatures();
1736     const VkPhysicalDeviceShaderFloat16Int8Features &float16Int8Features = context.getShaderFloat16Int8Features();
1737     const VkPhysicalDevice16BitStorageFeatures &storage16bit             = context.get16BitStorageFeatures();
1738     const VkPhysicalDevice8BitStorageFeatures &storage8bit               = context.get8BitStorageFeatures();
1739     const VkPhysicalDeviceFeatures &features                             = context.getDeviceFeatures();
1740     bool shaderFloat64                                                   = features.shaderFloat64 ? true : false;
1741     bool shaderInt16                                                     = features.shaderInt16 ? true : false;
1742     bool shaderInt64                                                     = features.shaderInt64 ? true : false;
1743     bool shaderSubgroupExtendedTypes                                     = false;
1744     bool shaderFloat16                                                   = false;
1745     bool shaderInt8                                                      = false;
1746     bool storageBuffer16BitAccess                                        = false;
1747     bool storageBuffer8BitAccess                                         = false;
1748 
1749     if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1750         context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1751     {
1752         shaderSubgroupExtendedTypes = subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes ? true : false;
1753         shaderFloat16               = float16Int8Features.shaderFloat16 ? true : false;
1754         shaderInt8                  = float16Int8Features.shaderInt8 ? true : false;
1755 
1756         if (context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage"))
1757             storageBuffer16BitAccess = storage16bit.storageBuffer16BitAccess ? true : false;
1758 
1759         if (context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage"))
1760             storageBuffer8BitAccess = storage8bit.storageBuffer8BitAccess ? true : false;
1761     }
1762 
1763     switch (format)
1764     {
1765     default:
1766         return true;
1767     case VK_FORMAT_R16_SFLOAT:
1768     case VK_FORMAT_R16G16_SFLOAT:
1769     case VK_FORMAT_R16G16B16_SFLOAT:
1770     case VK_FORMAT_R16G16B16A16_SFLOAT:
1771         return shaderSubgroupExtendedTypes && shaderFloat16 && storageBuffer16BitAccess;
1772     case VK_FORMAT_R64_SFLOAT:
1773     case VK_FORMAT_R64G64_SFLOAT:
1774     case VK_FORMAT_R64G64B64_SFLOAT:
1775     case VK_FORMAT_R64G64B64A64_SFLOAT:
1776         return shaderFloat64;
1777     case VK_FORMAT_R8_SINT:
1778     case VK_FORMAT_R8G8_SINT:
1779     case VK_FORMAT_R8G8B8_SINT:
1780     case VK_FORMAT_R8G8B8A8_SINT:
1781     case VK_FORMAT_R8_UINT:
1782     case VK_FORMAT_R8G8_UINT:
1783     case VK_FORMAT_R8G8B8_UINT:
1784     case VK_FORMAT_R8G8B8A8_UINT:
1785         return shaderSubgroupExtendedTypes && shaderInt8 && storageBuffer8BitAccess;
1786     case VK_FORMAT_R16_SINT:
1787     case VK_FORMAT_R16G16_SINT:
1788     case VK_FORMAT_R16G16B16_SINT:
1789     case VK_FORMAT_R16G16B16A16_SINT:
1790     case VK_FORMAT_R16_UINT:
1791     case VK_FORMAT_R16G16_UINT:
1792     case VK_FORMAT_R16G16B16_UINT:
1793     case VK_FORMAT_R16G16B16A16_UINT:
1794         return shaderSubgroupExtendedTypes && shaderInt16 && storageBuffer16BitAccess;
1795     case VK_FORMAT_R64_SINT:
1796     case VK_FORMAT_R64G64_SINT:
1797     case VK_FORMAT_R64G64B64_SINT:
1798     case VK_FORMAT_R64G64B64A64_SINT:
1799     case VK_FORMAT_R64_UINT:
1800     case VK_FORMAT_R64G64_UINT:
1801     case VK_FORMAT_R64G64B64_UINT:
1802     case VK_FORMAT_R64G64B64A64_UINT:
1803         return shaderSubgroupExtendedTypes && shaderInt64;
1804     }
1805 }
1806 
isSubgroupBroadcastDynamicIdSupported(Context & context)1807 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported(Context &context)
1808 {
1809     return context.contextSupports(vk::ApiVersion(0, 1, 2, 0)) &&
1810            vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice())
1811                .subgroupBroadcastDynamicId;
1812 }
1813 
isSubgroupRotateSpecVersionValid(Context & context)1814 bool vkt::subgroups::isSubgroupRotateSpecVersionValid(Context &context)
1815 {
1816     // Ensure "VK_KHR_shader_subgroup_rotate" extension's spec version is at least 2
1817     {
1818         const std::string extensionName = "VK_KHR_shader_subgroup_rotate";
1819         const std::vector<VkExtensionProperties> deviceExtensionProperties =
1820             enumerateDeviceExtensionProperties(context.getInstanceInterface(), context.getPhysicalDevice(), nullptr);
1821 
1822         for (const auto &property : deviceExtensionProperties)
1823         {
1824             if (property.extensionName == extensionName && property.specVersion < 2)
1825             {
1826                 return false;
1827             }
1828         }
1829     }
1830     return true;
1831 }
1832 
getFormatNameForGLSL(VkFormat format)1833 std::string vkt::subgroups::getFormatNameForGLSL(VkFormat format)
1834 {
1835     switch (format)
1836     {
1837     case VK_FORMAT_R8_SINT:
1838         return "int8_t";
1839     case VK_FORMAT_R8G8_SINT:
1840         return "i8vec2";
1841     case VK_FORMAT_R8G8B8_SINT:
1842         return "i8vec3";
1843     case VK_FORMAT_R8G8B8A8_SINT:
1844         return "i8vec4";
1845     case VK_FORMAT_R8_UINT:
1846         return "uint8_t";
1847     case VK_FORMAT_R8G8_UINT:
1848         return "u8vec2";
1849     case VK_FORMAT_R8G8B8_UINT:
1850         return "u8vec3";
1851     case VK_FORMAT_R8G8B8A8_UINT:
1852         return "u8vec4";
1853     case VK_FORMAT_R16_SINT:
1854         return "int16_t";
1855     case VK_FORMAT_R16G16_SINT:
1856         return "i16vec2";
1857     case VK_FORMAT_R16G16B16_SINT:
1858         return "i16vec3";
1859     case VK_FORMAT_R16G16B16A16_SINT:
1860         return "i16vec4";
1861     case VK_FORMAT_R16_UINT:
1862         return "uint16_t";
1863     case VK_FORMAT_R16G16_UINT:
1864         return "u16vec2";
1865     case VK_FORMAT_R16G16B16_UINT:
1866         return "u16vec3";
1867     case VK_FORMAT_R16G16B16A16_UINT:
1868         return "u16vec4";
1869     case VK_FORMAT_R32_SINT:
1870         return "int";
1871     case VK_FORMAT_R32G32_SINT:
1872         return "ivec2";
1873     case VK_FORMAT_R32G32B32_SINT:
1874         return "ivec3";
1875     case VK_FORMAT_R32G32B32A32_SINT:
1876         return "ivec4";
1877     case VK_FORMAT_R32_UINT:
1878         return "uint";
1879     case VK_FORMAT_R32G32_UINT:
1880         return "uvec2";
1881     case VK_FORMAT_R32G32B32_UINT:
1882         return "uvec3";
1883     case VK_FORMAT_R32G32B32A32_UINT:
1884         return "uvec4";
1885     case VK_FORMAT_R64_SINT:
1886         return "int64_t";
1887     case VK_FORMAT_R64G64_SINT:
1888         return "i64vec2";
1889     case VK_FORMAT_R64G64B64_SINT:
1890         return "i64vec3";
1891     case VK_FORMAT_R64G64B64A64_SINT:
1892         return "i64vec4";
1893     case VK_FORMAT_R64_UINT:
1894         return "uint64_t";
1895     case VK_FORMAT_R64G64_UINT:
1896         return "u64vec2";
1897     case VK_FORMAT_R64G64B64_UINT:
1898         return "u64vec3";
1899     case VK_FORMAT_R64G64B64A64_UINT:
1900         return "u64vec4";
1901     case VK_FORMAT_R16_SFLOAT:
1902         return "float16_t";
1903     case VK_FORMAT_R16G16_SFLOAT:
1904         return "f16vec2";
1905     case VK_FORMAT_R16G16B16_SFLOAT:
1906         return "f16vec3";
1907     case VK_FORMAT_R16G16B16A16_SFLOAT:
1908         return "f16vec4";
1909     case VK_FORMAT_R32_SFLOAT:
1910         return "float";
1911     case VK_FORMAT_R32G32_SFLOAT:
1912         return "vec2";
1913     case VK_FORMAT_R32G32B32_SFLOAT:
1914         return "vec3";
1915     case VK_FORMAT_R32G32B32A32_SFLOAT:
1916         return "vec4";
1917     case VK_FORMAT_R64_SFLOAT:
1918         return "double";
1919     case VK_FORMAT_R64G64_SFLOAT:
1920         return "dvec2";
1921     case VK_FORMAT_R64G64B64_SFLOAT:
1922         return "dvec3";
1923     case VK_FORMAT_R64G64B64A64_SFLOAT:
1924         return "dvec4";
1925     case VK_FORMAT_R8_USCALED:
1926         return "bool";
1927     case VK_FORMAT_R8G8_USCALED:
1928         return "bvec2";
1929     case VK_FORMAT_R8G8B8_USCALED:
1930         return "bvec3";
1931     case VK_FORMAT_R8G8B8A8_USCALED:
1932         return "bvec4";
1933     default:
1934         TCU_THROW(InternalError, "Unhandled format");
1935     }
1936 }
1937 
getAdditionalExtensionForFormat(vk::VkFormat format)1938 std::string vkt::subgroups::getAdditionalExtensionForFormat(vk::VkFormat format)
1939 {
1940     switch (format)
1941     {
1942     default:
1943         return "";
1944     case VK_FORMAT_R8_SINT:
1945     case VK_FORMAT_R8G8_SINT:
1946     case VK_FORMAT_R8G8B8_SINT:
1947     case VK_FORMAT_R8G8B8A8_SINT:
1948     case VK_FORMAT_R8_UINT:
1949     case VK_FORMAT_R8G8_UINT:
1950     case VK_FORMAT_R8G8B8_UINT:
1951     case VK_FORMAT_R8G8B8A8_UINT:
1952         return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1953     case VK_FORMAT_R16_SINT:
1954     case VK_FORMAT_R16G16_SINT:
1955     case VK_FORMAT_R16G16B16_SINT:
1956     case VK_FORMAT_R16G16B16A16_SINT:
1957     case VK_FORMAT_R16_UINT:
1958     case VK_FORMAT_R16G16_UINT:
1959     case VK_FORMAT_R16G16B16_UINT:
1960     case VK_FORMAT_R16G16B16A16_UINT:
1961         return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1962     case VK_FORMAT_R64_SINT:
1963     case VK_FORMAT_R64G64_SINT:
1964     case VK_FORMAT_R64G64B64_SINT:
1965     case VK_FORMAT_R64G64B64A64_SINT:
1966     case VK_FORMAT_R64_UINT:
1967     case VK_FORMAT_R64G64_UINT:
1968     case VK_FORMAT_R64G64B64_UINT:
1969     case VK_FORMAT_R64G64B64A64_UINT:
1970         return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1971     case VK_FORMAT_R16_SFLOAT:
1972     case VK_FORMAT_R16G16_SFLOAT:
1973     case VK_FORMAT_R16G16B16_SFLOAT:
1974     case VK_FORMAT_R16G16B16A16_SFLOAT:
1975         return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1976     }
1977 }
1978 
getAllFormats()1979 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats()
1980 {
1981     std::vector<VkFormat> formats;
1982 
1983     formats.push_back(VK_FORMAT_R8_SINT);
1984     formats.push_back(VK_FORMAT_R8G8_SINT);
1985     formats.push_back(VK_FORMAT_R8G8B8_SINT);
1986     formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1987     formats.push_back(VK_FORMAT_R8_UINT);
1988     formats.push_back(VK_FORMAT_R8G8_UINT);
1989     formats.push_back(VK_FORMAT_R8G8B8_UINT);
1990     formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1991     formats.push_back(VK_FORMAT_R16_SINT);
1992     formats.push_back(VK_FORMAT_R16G16_SINT);
1993     formats.push_back(VK_FORMAT_R16G16B16_SINT);
1994     formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1995     formats.push_back(VK_FORMAT_R16_UINT);
1996     formats.push_back(VK_FORMAT_R16G16_UINT);
1997     formats.push_back(VK_FORMAT_R16G16B16_UINT);
1998     formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
1999     formats.push_back(VK_FORMAT_R32_SINT);
2000     formats.push_back(VK_FORMAT_R32G32_SINT);
2001     formats.push_back(VK_FORMAT_R32G32B32_SINT);
2002     formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
2003     formats.push_back(VK_FORMAT_R32_UINT);
2004     formats.push_back(VK_FORMAT_R32G32_UINT);
2005     formats.push_back(VK_FORMAT_R32G32B32_UINT);
2006     formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
2007     formats.push_back(VK_FORMAT_R64_SINT);
2008     formats.push_back(VK_FORMAT_R64G64_SINT);
2009     formats.push_back(VK_FORMAT_R64G64B64_SINT);
2010     formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
2011     formats.push_back(VK_FORMAT_R64_UINT);
2012     formats.push_back(VK_FORMAT_R64G64_UINT);
2013     formats.push_back(VK_FORMAT_R64G64B64_UINT);
2014     formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
2015     formats.push_back(VK_FORMAT_R16_SFLOAT);
2016     formats.push_back(VK_FORMAT_R16G16_SFLOAT);
2017     formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
2018     formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
2019     formats.push_back(VK_FORMAT_R32_SFLOAT);
2020     formats.push_back(VK_FORMAT_R32G32_SFLOAT);
2021     formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
2022     formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
2023     formats.push_back(VK_FORMAT_R64_SFLOAT);
2024     formats.push_back(VK_FORMAT_R64G64_SFLOAT);
2025     formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
2026     formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
2027     formats.push_back(VK_FORMAT_R8_USCALED);
2028     formats.push_back(VK_FORMAT_R8G8_USCALED);
2029     formats.push_back(VK_FORMAT_R8G8B8_USCALED);
2030     formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
2031 
2032     return formats;
2033 }
2034 
isFormatSigned(VkFormat format)2035 bool vkt::subgroups::isFormatSigned(VkFormat format)
2036 {
2037     switch (format)
2038     {
2039     default:
2040         return false;
2041     case VK_FORMAT_R8_SINT:
2042     case VK_FORMAT_R8G8_SINT:
2043     case VK_FORMAT_R8G8B8_SINT:
2044     case VK_FORMAT_R8G8B8A8_SINT:
2045     case VK_FORMAT_R16_SINT:
2046     case VK_FORMAT_R16G16_SINT:
2047     case VK_FORMAT_R16G16B16_SINT:
2048     case VK_FORMAT_R16G16B16A16_SINT:
2049     case VK_FORMAT_R32_SINT:
2050     case VK_FORMAT_R32G32_SINT:
2051     case VK_FORMAT_R32G32B32_SINT:
2052     case VK_FORMAT_R32G32B32A32_SINT:
2053     case VK_FORMAT_R64_SINT:
2054     case VK_FORMAT_R64G64_SINT:
2055     case VK_FORMAT_R64G64B64_SINT:
2056     case VK_FORMAT_R64G64B64A64_SINT:
2057         return true;
2058     }
2059 }
2060 
isFormatUnsigned(VkFormat format)2061 bool vkt::subgroups::isFormatUnsigned(VkFormat format)
2062 {
2063     switch (format)
2064     {
2065     default:
2066         return false;
2067     case VK_FORMAT_R8_UINT:
2068     case VK_FORMAT_R8G8_UINT:
2069     case VK_FORMAT_R8G8B8_UINT:
2070     case VK_FORMAT_R8G8B8A8_UINT:
2071     case VK_FORMAT_R16_UINT:
2072     case VK_FORMAT_R16G16_UINT:
2073     case VK_FORMAT_R16G16B16_UINT:
2074     case VK_FORMAT_R16G16B16A16_UINT:
2075     case VK_FORMAT_R32_UINT:
2076     case VK_FORMAT_R32G32_UINT:
2077     case VK_FORMAT_R32G32B32_UINT:
2078     case VK_FORMAT_R32G32B32A32_UINT:
2079     case VK_FORMAT_R64_UINT:
2080     case VK_FORMAT_R64G64_UINT:
2081     case VK_FORMAT_R64G64B64_UINT:
2082     case VK_FORMAT_R64G64B64A64_UINT:
2083         return true;
2084     }
2085 }
2086 
isFormatFloat(VkFormat format)2087 bool vkt::subgroups::isFormatFloat(VkFormat format)
2088 {
2089     switch (format)
2090     {
2091     default:
2092         return false;
2093     case VK_FORMAT_R16_SFLOAT:
2094     case VK_FORMAT_R16G16_SFLOAT:
2095     case VK_FORMAT_R16G16B16_SFLOAT:
2096     case VK_FORMAT_R16G16B16A16_SFLOAT:
2097     case VK_FORMAT_R32_SFLOAT:
2098     case VK_FORMAT_R32G32_SFLOAT:
2099     case VK_FORMAT_R32G32B32_SFLOAT:
2100     case VK_FORMAT_R32G32B32A32_SFLOAT:
2101     case VK_FORMAT_R64_SFLOAT:
2102     case VK_FORMAT_R64G64_SFLOAT:
2103     case VK_FORMAT_R64G64B64_SFLOAT:
2104     case VK_FORMAT_R64G64B64A64_SFLOAT:
2105         return true;
2106     }
2107 }
2108 
isFormatBool(VkFormat format)2109 bool vkt::subgroups::isFormatBool(VkFormat format)
2110 {
2111     switch (format)
2112     {
2113     default:
2114         return false;
2115     case VK_FORMAT_R8_USCALED:
2116     case VK_FORMAT_R8G8_USCALED:
2117     case VK_FORMAT_R8G8B8_USCALED:
2118     case VK_FORMAT_R8G8B8A8_USCALED:
2119         return true;
2120     }
2121 }
2122 
isFormat8bitTy(VkFormat format)2123 bool vkt::subgroups::isFormat8bitTy(VkFormat format)
2124 {
2125     switch (format)
2126     {
2127     default:
2128         return false;
2129     case VK_FORMAT_R8_SINT:
2130     case VK_FORMAT_R8G8_SINT:
2131     case VK_FORMAT_R8G8B8_SINT:
2132     case VK_FORMAT_R8G8B8A8_SINT:
2133     case VK_FORMAT_R8_UINT:
2134     case VK_FORMAT_R8G8_UINT:
2135     case VK_FORMAT_R8G8B8_UINT:
2136     case VK_FORMAT_R8G8B8A8_UINT:
2137         return true;
2138     }
2139 }
2140 
isFormat16BitTy(VkFormat format)2141 bool vkt::subgroups::isFormat16BitTy(VkFormat format)
2142 {
2143     switch (format)
2144     {
2145     default:
2146         return false;
2147     case VK_FORMAT_R16_SFLOAT:
2148     case VK_FORMAT_R16G16_SFLOAT:
2149     case VK_FORMAT_R16G16B16_SFLOAT:
2150     case VK_FORMAT_R16G16B16A16_SFLOAT:
2151     case VK_FORMAT_R16_SINT:
2152     case VK_FORMAT_R16G16_SINT:
2153     case VK_FORMAT_R16G16B16_SINT:
2154     case VK_FORMAT_R16G16B16A16_SINT:
2155     case VK_FORMAT_R16_UINT:
2156     case VK_FORMAT_R16G16_UINT:
2157     case VK_FORMAT_R16G16B16_UINT:
2158     case VK_FORMAT_R16G16B16A16_UINT:
2159         return true;
2160     }
2161 }
2162 
setVertexShaderFrameBuffer(SourceCollections & programCollection)2163 void vkt::subgroups::setVertexShaderFrameBuffer(SourceCollections &programCollection)
2164 {
2165     /*
2166         "layout(location = 0) in highp vec4 in_position;\n"
2167         "void main (void)\n"
2168         "{\n"
2169         "  gl_Position = in_position;\n"
2170         "  gl_PointSize = 1.0f;\n"
2171         "}\n";
2172     */
2173     programCollection.spirvAsmSources.add("vert") << "; SPIR-V\n"
2174                                                      "; Version: 1.3\n"
2175                                                      "; Generator: Khronos Glslang Reference Front End; 7\n"
2176                                                      "; Bound: 25\n"
2177                                                      "; Schema: 0\n"
2178                                                      "OpCapability Shader\n"
2179                                                      "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2180                                                      "OpMemoryModel Logical GLSL450\n"
2181                                                      "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2182                                                      "OpMemberDecorate %11 0 BuiltIn Position\n"
2183                                                      "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2184                                                      "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2185                                                      "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2186                                                      "OpDecorate %11 Block\n"
2187                                                      "OpDecorate %17 Location 0\n"
2188                                                      "%2 = OpTypeVoid\n"
2189                                                      "%3 = OpTypeFunction %2\n"
2190                                                      "%6 = OpTypeFloat 32\n"
2191                                                      "%7 = OpTypeVector %6 4\n"
2192                                                      "%8 = OpTypeInt 32 0\n"
2193                                                      "%9 = OpConstant %8 1\n"
2194                                                      "%10 = OpTypeArray %6 %9\n"
2195                                                      "%11 = OpTypeStruct %7 %6 %10 %10\n"
2196                                                      "%12 = OpTypePointer Output %11\n"
2197                                                      "%13 = OpVariable %12 Output\n"
2198                                                      "%14 = OpTypeInt 32 1\n"
2199                                                      "%15 = OpConstant %14 0\n"
2200                                                      "%16 = OpTypePointer Input %7\n"
2201                                                      "%17 = OpVariable %16 Input\n"
2202                                                      "%19 = OpTypePointer Output %7\n"
2203                                                      "%21 = OpConstant %14 1\n"
2204                                                      "%22 = OpConstant %6 1\n"
2205                                                      "%23 = OpTypePointer Output %6\n"
2206                                                      "%4 = OpFunction %2 None %3\n"
2207                                                      "%5 = OpLabel\n"
2208                                                      "%18 = OpLoad %7 %17\n"
2209                                                      "%20 = OpAccessChain %19 %13 %15\n"
2210                                                      "OpStore %20 %18\n"
2211                                                      "%24 = OpAccessChain %23 %13 %21\n"
2212                                                      "OpStore %24 %22\n"
2213                                                      "OpReturn\n"
2214                                                      "OpFunctionEnd\n";
2215 }
2216 
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2217 void vkt::subgroups::setFragmentShaderFrameBuffer(vk::SourceCollections &programCollection)
2218 {
2219     /*
2220         "layout(location = 0) in float in_color;\n"
2221         "layout(location = 0) out uint out_color;\n"
2222         "void main()\n"
2223         {\n"
2224         "    out_color = uint(in_color);\n"
2225         "}\n";
2226     */
2227     programCollection.spirvAsmSources.add("fragment") << "; SPIR-V\n"
2228                                                          "; Version: 1.3\n"
2229                                                          "; Generator: Khronos Glslang Reference Front End; 2\n"
2230                                                          "; Bound: 14\n"
2231                                                          "; Schema: 0\n"
2232                                                          "OpCapability Shader\n"
2233                                                          "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2234                                                          "OpMemoryModel Logical GLSL450\n"
2235                                                          "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2236                                                          "OpExecutionMode %4 OriginUpperLeft\n"
2237                                                          "OpDecorate %8 Location 0\n"
2238                                                          "OpDecorate %11 Location 0\n"
2239                                                          "%2 = OpTypeVoid\n"
2240                                                          "%3 = OpTypeFunction %2\n"
2241                                                          "%6 = OpTypeInt 32 0\n"
2242                                                          "%7 = OpTypePointer Output %6\n"
2243                                                          "%8 = OpVariable %7 Output\n"
2244                                                          "%9 = OpTypeFloat 32\n"
2245                                                          "%10 = OpTypePointer Input %9\n"
2246                                                          "%11 = OpVariable %10 Input\n"
2247                                                          "%4 = OpFunction %2 None %3\n"
2248                                                          "%5 = OpLabel\n"
2249                                                          "%12 = OpLoad %9 %11\n"
2250                                                          "%13 = OpConvertFToU %6 %12\n"
2251                                                          "OpStore %8 %13\n"
2252                                                          "OpReturn\n"
2253                                                          "OpFunctionEnd\n";
2254 }
2255 
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2256 void vkt::subgroups::setTesCtrlShaderFrameBuffer(vk::SourceCollections &programCollection)
2257 {
2258     /*
2259         "#extension GL_KHR_shader_subgroup_basic: enable\n"
2260         "#extension GL_EXT_tessellation_shader : require\n"
2261         "layout(vertices = 2) out;\n"
2262         "void main (void)\n"
2263         "{\n"
2264         "  if (gl_InvocationID == 0)\n"
2265         "  {\n"
2266         "    gl_TessLevelOuter[0] = 1.0f;\n"
2267         "    gl_TessLevelOuter[1] = 1.0f;\n"
2268         "  }\n"
2269         "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2270         "}\n";
2271     */
2272     programCollection.spirvAsmSources.add("tesc") << "; SPIR-V\n"
2273                                                      "; Version: 1.3\n"
2274                                                      "; Generator: Khronos Glslang Reference Front End; 2\n"
2275                                                      "; Bound: 46\n"
2276                                                      "; Schema: 0\n"
2277                                                      "OpCapability Tessellation\n"
2278                                                      "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2279                                                      "OpMemoryModel Logical GLSL450\n"
2280                                                      "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2281                                                      "OpExecutionMode %4 OutputVertices 2\n"
2282                                                      "OpDecorate %8 BuiltIn InvocationId\n"
2283                                                      "OpDecorate %20 Patch\n"
2284                                                      "OpDecorate %20 BuiltIn TessLevelOuter\n"
2285                                                      "OpMemberDecorate %29 0 BuiltIn Position\n"
2286                                                      "OpMemberDecorate %29 1 BuiltIn PointSize\n"
2287                                                      "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2288                                                      "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2289                                                      "OpDecorate %29 Block\n"
2290                                                      "OpMemberDecorate %35 0 BuiltIn Position\n"
2291                                                      "OpMemberDecorate %35 1 BuiltIn PointSize\n"
2292                                                      "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2293                                                      "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2294                                                      "OpDecorate %35 Block\n"
2295                                                      "%2 = OpTypeVoid\n"
2296                                                      "%3 = OpTypeFunction %2\n"
2297                                                      "%6 = OpTypeInt 32 1\n"
2298                                                      "%7 = OpTypePointer Input %6\n"
2299                                                      "%8 = OpVariable %7 Input\n"
2300                                                      "%10 = OpConstant %6 0\n"
2301                                                      "%11 = OpTypeBool\n"
2302                                                      "%15 = OpTypeFloat 32\n"
2303                                                      "%16 = OpTypeInt 32 0\n"
2304                                                      "%17 = OpConstant %16 4\n"
2305                                                      "%18 = OpTypeArray %15 %17\n"
2306                                                      "%19 = OpTypePointer Output %18\n"
2307                                                      "%20 = OpVariable %19 Output\n"
2308                                                      "%21 = OpConstant %15 1\n"
2309                                                      "%22 = OpTypePointer Output %15\n"
2310                                                      "%24 = OpConstant %6 1\n"
2311                                                      "%26 = OpTypeVector %15 4\n"
2312                                                      "%27 = OpConstant %16 1\n"
2313                                                      "%28 = OpTypeArray %15 %27\n"
2314                                                      "%29 = OpTypeStruct %26 %15 %28 %28\n"
2315                                                      "%30 = OpConstant %16 2\n"
2316                                                      "%31 = OpTypeArray %29 %30\n"
2317                                                      "%32 = OpTypePointer Output %31\n"
2318                                                      "%33 = OpVariable %32 Output\n"
2319                                                      "%35 = OpTypeStruct %26 %15 %28 %28\n"
2320                                                      "%36 = OpConstant %16 32\n"
2321                                                      "%37 = OpTypeArray %35 %36\n"
2322                                                      "%38 = OpTypePointer Input %37\n"
2323                                                      "%39 = OpVariable %38 Input\n"
2324                                                      "%41 = OpTypePointer Input %26\n"
2325                                                      "%44 = OpTypePointer Output %26\n"
2326                                                      "%4 = OpFunction %2 None %3\n"
2327                                                      "%5 = OpLabel\n"
2328                                                      "%9 = OpLoad %6 %8\n"
2329                                                      "%12 = OpIEqual %11 %9 %10\n"
2330                                                      "OpSelectionMerge %14 None\n"
2331                                                      "OpBranchConditional %12 %13 %14\n"
2332                                                      "%13 = OpLabel\n"
2333                                                      "%23 = OpAccessChain %22 %20 %10\n"
2334                                                      "OpStore %23 %21\n"
2335                                                      "%25 = OpAccessChain %22 %20 %24\n"
2336                                                      "OpStore %25 %21\n"
2337                                                      "OpBranch %14\n"
2338                                                      "%14 = OpLabel\n"
2339                                                      "%34 = OpLoad %6 %8\n"
2340                                                      "%40 = OpLoad %6 %8\n"
2341                                                      "%42 = OpAccessChain %41 %39 %40 %10\n"
2342                                                      "%43 = OpLoad %26 %42\n"
2343                                                      "%45 = OpAccessChain %44 %33 %34 %10\n"
2344                                                      "OpStore %45 %43\n"
2345                                                      "OpReturn\n"
2346                                                      "OpFunctionEnd\n";
2347 }
2348 
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2349 void vkt::subgroups::setTesEvalShaderFrameBuffer(vk::SourceCollections &programCollection)
2350 {
2351     /*
2352         "#extension GL_KHR_shader_subgroup_ballot: enable\n"
2353         "#extension GL_EXT_tessellation_shader : require\n"
2354         "layout(isolines, equal_spacing, ccw ) in;\n"
2355         "layout(location = 0) in float in_color[];\n"
2356         "layout(location = 0) out float out_color;\n"
2357         "\n"
2358         "void main (void)\n"
2359         "{\n"
2360         "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2361         "  out_color = in_color[0];\n"
2362         "}\n";
2363     */
2364     programCollection.spirvAsmSources.add("tese")
2365         << "; SPIR-V\n"
2366            "; Version: 1.3\n"
2367            "; Generator: Khronos Glslang Reference Front End; 2\n"
2368            "; Bound: 45\n"
2369            "; Schema: 0\n"
2370            "OpCapability Tessellation\n"
2371            "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2372            "OpMemoryModel Logical GLSL450\n"
2373            "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2374            "OpExecutionMode %4 Isolines\n"
2375            "OpExecutionMode %4 SpacingEqual\n"
2376            "OpExecutionMode %4 VertexOrderCcw\n"
2377            "OpMemberDecorate %11 0 BuiltIn Position\n"
2378            "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2379            "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2380            "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2381            "OpDecorate %11 Block\n"
2382            "OpMemberDecorate %16 0 BuiltIn Position\n"
2383            "OpMemberDecorate %16 1 BuiltIn PointSize\n"
2384            "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2385            "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2386            "OpDecorate %16 Block\n"
2387            "OpDecorate %29 BuiltIn TessCoord\n"
2388            "OpDecorate %39 Location 0\n"
2389            "OpDecorate %42 Location 0\n"
2390            "%2 = OpTypeVoid\n"
2391            "%3 = OpTypeFunction %2\n"
2392            "%6 = OpTypeFloat 32\n"
2393            "%7 = OpTypeVector %6 4\n"
2394            "%8 = OpTypeInt 32 0\n"
2395            "%9 = OpConstant %8 1\n"
2396            "%10 = OpTypeArray %6 %9\n"
2397            "%11 = OpTypeStruct %7 %6 %10 %10\n"
2398            "%12 = OpTypePointer Output %11\n"
2399            "%13 = OpVariable %12 Output\n"
2400            "%14 = OpTypeInt 32 1\n"
2401            "%15 = OpConstant %14 0\n"
2402            "%16 = OpTypeStruct %7 %6 %10 %10\n"
2403            "%17 = OpConstant %8 32\n"
2404            "%18 = OpTypeArray %16 %17\n"
2405            "%19 = OpTypePointer Input %18\n"
2406            "%20 = OpVariable %19 Input\n"
2407            "%21 = OpTypePointer Input %7\n"
2408            "%24 = OpConstant %14 1\n"
2409            "%27 = OpTypeVector %6 3\n"
2410            "%28 = OpTypePointer Input %27\n"
2411            "%29 = OpVariable %28 Input\n"
2412            "%30 = OpConstant %8 0\n"
2413            "%31 = OpTypePointer Input %6\n"
2414            "%36 = OpTypePointer Output %7\n"
2415            "%38 = OpTypePointer Output %6\n"
2416            "%39 = OpVariable %38 Output\n"
2417            "%40 = OpTypeArray %6 %17\n"
2418            "%41 = OpTypePointer Input %40\n"
2419            "%42 = OpVariable %41 Input\n"
2420            "%4 = OpFunction %2 None %3\n"
2421            "%5 = OpLabel\n"
2422            "%22 = OpAccessChain %21 %20 %15 %15\n"
2423            "%23 = OpLoad %7 %22\n"
2424            "%25 = OpAccessChain %21 %20 %24 %15\n"
2425            "%26 = OpLoad %7 %25\n"
2426            "%32 = OpAccessChain %31 %29 %30\n"
2427            "%33 = OpLoad %6 %32\n"
2428            "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2429            "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2430            "%37 = OpAccessChain %36 %13 %15\n"
2431            "OpStore %37 %35\n"
2432            "%43 = OpAccessChain %31 %42 %15\n"
2433            "%44 = OpLoad %6 %43\n"
2434            "OpStore %39 %44\n"
2435            "OpReturn\n"
2436            "OpFunctionEnd\n";
2437 }
2438 
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2439 void vkt::subgroups::addGeometryShadersFromTemplate(const std::string &glslTemplate,
2440                                                     const vk::ShaderBuildOptions &options,
2441                                                     vk::GlslSourceCollection &collection)
2442 {
2443     tcu::StringTemplate geometryTemplate(glslTemplate);
2444 
2445     map<string, string> linesParams;
2446     linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2447 
2448     map<string, string> pointsParams;
2449     pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2450 
2451     collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
2452     collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
2453 }
2454 
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2455 void vkt::subgroups::addGeometryShadersFromTemplate(const std::string &spirvTemplate,
2456                                                     const vk::SpirVAsmBuildOptions &options,
2457                                                     vk::SpirVAsmCollection &collection)
2458 {
2459     tcu::StringTemplate geometryTemplate(spirvTemplate);
2460 
2461     map<string, string> linesParams;
2462     linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2463 
2464     map<string, string> pointsParams;
2465     pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2466 
2467     collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
2468     collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
2469 }
2470 
initializeMemory(Context & context,const Allocation & alloc,const subgroups::SSBOData & data)2471 void initializeMemory(Context &context, const Allocation &alloc, const subgroups::SSBOData &data)
2472 {
2473     const vk::VkFormat format = data.format;
2474     const vk::VkDeviceSize size =
2475         data.numElements * (data.isImage() ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2476     if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2477     {
2478         de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2479 
2480         switch (format)
2481         {
2482         default:
2483             DE_FATAL("Illegal buffer format");
2484             break;
2485         case VK_FORMAT_R8_SINT:
2486         case VK_FORMAT_R8G8_SINT:
2487         case VK_FORMAT_R8G8B8_SINT:
2488         case VK_FORMAT_R8G8B8A8_SINT:
2489         case VK_FORMAT_R8_UINT:
2490         case VK_FORMAT_R8G8_UINT:
2491         case VK_FORMAT_R8G8B8_UINT:
2492         case VK_FORMAT_R8G8B8A8_UINT:
2493         {
2494             uint8_t *ptr = reinterpret_cast<uint8_t *>(alloc.getHostPtr());
2495 
2496             for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint8_t)); k++)
2497             {
2498                 ptr[k] = rnd.getUint8();
2499             }
2500         }
2501         break;
2502         case VK_FORMAT_R16_SINT:
2503         case VK_FORMAT_R16G16_SINT:
2504         case VK_FORMAT_R16G16B16_SINT:
2505         case VK_FORMAT_R16G16B16A16_SINT:
2506         case VK_FORMAT_R16_UINT:
2507         case VK_FORMAT_R16G16_UINT:
2508         case VK_FORMAT_R16G16B16_UINT:
2509         case VK_FORMAT_R16G16B16A16_UINT:
2510         {
2511             uint16_t *ptr = reinterpret_cast<uint16_t *>(alloc.getHostPtr());
2512 
2513             for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint16_t)); k++)
2514             {
2515                 ptr[k] = rnd.getUint16();
2516             }
2517         }
2518         break;
2519         case VK_FORMAT_R8_USCALED:
2520         case VK_FORMAT_R8G8_USCALED:
2521         case VK_FORMAT_R8G8B8_USCALED:
2522         case VK_FORMAT_R8G8B8A8_USCALED:
2523         {
2524             uint32_t *ptr = reinterpret_cast<uint32_t *>(alloc.getHostPtr());
2525 
2526             for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint32_t)); k++)
2527             {
2528                 uint32_t r = rnd.getUint32();
2529                 ptr[k]     = (r & 1) ? r : 0;
2530             }
2531         }
2532         break;
2533         case VK_FORMAT_R32_SINT:
2534         case VK_FORMAT_R32G32_SINT:
2535         case VK_FORMAT_R32G32B32_SINT:
2536         case VK_FORMAT_R32G32B32A32_SINT:
2537         case VK_FORMAT_R32_UINT:
2538         case VK_FORMAT_R32G32_UINT:
2539         case VK_FORMAT_R32G32B32_UINT:
2540         case VK_FORMAT_R32G32B32A32_UINT:
2541         {
2542             uint32_t *ptr = reinterpret_cast<uint32_t *>(alloc.getHostPtr());
2543 
2544             for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint32_t)); k++)
2545             {
2546                 ptr[k] = rnd.getUint32();
2547             }
2548         }
2549         break;
2550         case VK_FORMAT_R64_SINT:
2551         case VK_FORMAT_R64G64_SINT:
2552         case VK_FORMAT_R64G64B64_SINT:
2553         case VK_FORMAT_R64G64B64A64_SINT:
2554         case VK_FORMAT_R64_UINT:
2555         case VK_FORMAT_R64G64_UINT:
2556         case VK_FORMAT_R64G64B64_UINT:
2557         case VK_FORMAT_R64G64B64A64_UINT:
2558         {
2559             uint64_t *ptr = reinterpret_cast<uint64_t *>(alloc.getHostPtr());
2560 
2561             for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint64_t)); k++)
2562             {
2563                 ptr[k] = rnd.getUint64();
2564             }
2565         }
2566         break;
2567         case VK_FORMAT_R16_SFLOAT:
2568         case VK_FORMAT_R16G16_SFLOAT:
2569         case VK_FORMAT_R16G16B16_SFLOAT:
2570         case VK_FORMAT_R16G16B16A16_SFLOAT:
2571         {
2572             float16_t *const ptr = reinterpret_cast<float16_t *>(alloc.getHostPtr());
2573 
2574             for (vk::VkDeviceSize k = 0; k < (size / sizeof(float16_t)); k++)
2575             {
2576                 ptr[k] = tcu::Float16(rnd.getFloat()).bits();
2577             }
2578         }
2579         break;
2580         case VK_FORMAT_R32_SFLOAT:
2581         case VK_FORMAT_R32G32_SFLOAT:
2582         case VK_FORMAT_R32G32B32_SFLOAT:
2583         case VK_FORMAT_R32G32B32A32_SFLOAT:
2584         {
2585             float *ptr = reinterpret_cast<float *>(alloc.getHostPtr());
2586 
2587             for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2588             {
2589                 ptr[k] = rnd.getFloat();
2590             }
2591         }
2592         break;
2593         case VK_FORMAT_R64_SFLOAT:
2594         case VK_FORMAT_R64G64_SFLOAT:
2595         case VK_FORMAT_R64G64B64_SFLOAT:
2596         case VK_FORMAT_R64G64B64A64_SFLOAT:
2597         {
2598             double *ptr = reinterpret_cast<double *>(alloc.getHostPtr());
2599 
2600             for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2601             {
2602                 ptr[k] = rnd.getDouble();
2603             }
2604         }
2605         break;
2606         }
2607     }
2608     else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2609     {
2610         uint32_t *ptr = reinterpret_cast<uint32_t *>(alloc.getHostPtr());
2611 
2612         for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2613         {
2614             ptr[k] = 0;
2615         }
2616     }
2617 
2618     if (subgroups::SSBOData::InitializeNone != data.initializeType)
2619     {
2620         flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2621     }
2622 }
2623 
getResultBinding(const VkShaderStageFlagBits shaderStage)2624 uint32_t getResultBinding(const VkShaderStageFlagBits shaderStage)
2625 {
2626     switch (shaderStage)
2627     {
2628     case VK_SHADER_STAGE_VERTEX_BIT:
2629         return 0u;
2630     case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2631         return 1u;
2632     case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2633         return 2u;
2634     case VK_SHADER_STAGE_GEOMETRY_BIT:
2635         return 3u;
2636     default:
2637         DE_ASSERT(0);
2638         return -1;
2639     }
2640     DE_ASSERT(0);
2641     return -1;
2642 }
2643 
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage)2644 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
2645     Context &context, VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
2646     subgroups::CheckResult checkResult, const VkShaderStageFlags shaderStage)
2647 {
2648     return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
2649         context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2650 }
2651 
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage,const uint32_t tessShaderStageCreateFlags,const uint32_t requiredSubgroupSize)2652 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
2653     Context &context, VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
2654     subgroups::CheckResult checkResult, const VkShaderStageFlags shaderStage, const uint32_t tessShaderStageCreateFlags,
2655     const uint32_t requiredSubgroupSize)
2656 {
2657     const DeviceInterface &vk = context.getDeviceInterface();
2658     const VkDevice device     = context.getDevice();
2659     const uint32_t maxWidth   = getMaxWidth();
2660     vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDataCount);
2661     DescriptorSetLayoutBuilder layoutBuilder;
2662     DescriptorPoolBuilder poolBuilder;
2663     DescriptorSetUpdateBuilder updateBuilder;
2664     Move<VkDescriptorPool> descriptorPool;
2665     Move<VkDescriptorSet> descriptorSet;
2666     const Unique<VkShaderModule> vertexShaderModule(
2667         createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2668     const Unique<VkShaderModule> teCtrlShaderModule(
2669         createShaderModule(vk, device, context.getBinaryCollection().get("tesc"), 0u));
2670     const Unique<VkShaderModule> teEvalShaderModule(
2671         createShaderModule(vk, device, context.getBinaryCollection().get("tese"), 0u));
2672     const Unique<VkShaderModule> fragmentShaderModule(
2673         createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2674     const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2675     const VkVertexInputBindingDescription vertexInputBinding = {
2676         0u,                                       //  uint32_t binding;
2677         static_cast<uint32_t>(sizeof(tcu::Vec4)), //  uint32_t stride;
2678         VK_VERTEX_INPUT_RATE_VERTEX               //  VkVertexInputRate inputRate;
2679     };
2680     const VkVertexInputAttributeDescription vertexInputAttribute = {
2681         0u,                            //  uint32_t location;
2682         0u,                            //  uint32_t binding;
2683         VK_FORMAT_R32G32B32A32_SFLOAT, //  VkFormat format;
2684         0u                             //  uint32_t offset;
2685     };
2686 
2687     for (uint32_t i = 0u; i < extraDataCount; i++)
2688     {
2689         if (extraData[i].isImage())
2690         {
2691             inputBuffers[i] = de::SharedPtr<BufferOrImage>(
2692                 new Image(context, static_cast<uint32_t>(extraData[i].numElements), 1u, extraData[i].format));
2693         }
2694         else
2695         {
2696             DE_ASSERT(extraData[i].isUBO());
2697             vk::VkDeviceSize size =
2698                 getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2699             inputBuffers[i] =
2700                 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2701         }
2702         const Allocation &alloc = inputBuffers[i]->getAllocation();
2703         initializeMemory(context, alloc, extraData[i]);
2704     }
2705 
2706     for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2707         layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, nullptr);
2708 
2709     const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
2710 
2711     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2712 
2713     const uint32_t requiredSubgroupSizes[5] = {
2714         0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2715         ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u), 0u, 0u};
2716 
2717     const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
2718         context, *pipelineLayout,
2719         VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
2720             VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2721         *vertexShaderModule, *fragmentShaderModule, VK_NULL_HANDLE, *teCtrlShaderModule, *teEvalShaderModule,
2722         *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format, 0u,
2723         ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2724         ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u), 0u, 0u,
2725         requiredSubgroupSize != 0u ? requiredSubgroupSizes : nullptr));
2726 
2727     for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2728         poolBuilder.addType(inputBuffers[ndx]->getType());
2729 
2730     if (extraDataCount > 0)
2731     {
2732         descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2733         descriptorSet  = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2734     }
2735 
2736     for (uint32_t buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2737     {
2738         if (inputBuffers[buffersNdx]->isImage())
2739         {
2740             VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2741                                                                  inputBuffers[buffersNdx]->getAsImage()->getImageView(),
2742                                                                  VK_IMAGE_LAYOUT_GENERAL);
2743 
2744             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2745                                       inputBuffers[buffersNdx]->getType(), &info);
2746         }
2747         else
2748         {
2749             VkDescriptorBufferInfo info =
2750                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 0ull,
2751                                          inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2752 
2753             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2754                                       inputBuffers[buffersNdx]->getType(), &info);
2755         }
2756     }
2757 
2758     updateBuilder.update(vk, device);
2759 
2760     const VkQueue queue             = context.getUniversalQueue();
2761     const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2762     const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2763     const uint32_t subgroupSize = getSubgroupSize(context);
2764     const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
2765     const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
2766     Buffer vertexBuffer(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2767     unsigned totalIterations  = 0u;
2768     unsigned failedIterations = 0u;
2769     Image discardableImage(context, maxWidth, 1u, format,
2770                            VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2771 
2772     {
2773         const Allocation &alloc = vertexBuffer.getAllocation();
2774         std::vector<tcu::Vec4> data(2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2775         const float pixelSize  = 2.0f / static_cast<float>(maxWidth);
2776         float leftHandPosition = -1.0f;
2777 
2778         for (uint32_t ndx = 0u; ndx < data.size(); ndx += 2u)
2779         {
2780             data[ndx][0] = leftHandPosition;
2781             leftHandPosition += pixelSize;
2782             data[ndx + 1][0] = leftHandPosition;
2783         }
2784 
2785         deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2786         flushAlloc(vk, device, alloc);
2787     }
2788 
2789     const Unique<VkFramebuffer> framebuffer(
2790         makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2791     const VkViewport viewport              = makeViewport(maxWidth, 1u);
2792     const VkRect2D scissor                 = makeRect2D(maxWidth, 1u);
2793     const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2794     Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2795     const VkDeviceSize vertexBufferOffset = 0u;
2796 
2797     for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
2798     {
2799         totalIterations++;
2800 
2801         beginCommandBuffer(vk, *cmdBuffer);
2802         {
2803 
2804             vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2805             vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2806 
2807             beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2808 
2809             vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2810 
2811             if (extraDataCount > 0)
2812             {
2813                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2814                                          &descriptorSet.get(), 0u, nullptr);
2815             }
2816 
2817             vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2818             vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2819 
2820             endRenderPass(vk, *cmdBuffer);
2821 
2822             copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(),
2823                               tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
2824                               VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2825             endCommandBuffer(vk, *cmdBuffer);
2826 
2827             submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2828         }
2829         context.resetCommandPoolForVKSC(device, *cmdPool);
2830 
2831         {
2832             const Allocation &allocResult = imageBufferResult.getAllocation();
2833             invalidateAlloc(vk, device, allocResult);
2834 
2835             std::vector<const void *> datas;
2836             datas.push_back(allocResult.getHostPtr());
2837             if (!checkResult(internalData, datas, width / 2u, subgroupSize))
2838                 failedIterations++;
2839         }
2840     }
2841 
2842     if (0 < failedIterations)
2843     {
2844         unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2845 
2846         context.getTestContext().getLog()
2847             << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
2848         return tcu::TestStatus::fail("Failed!");
2849     }
2850 
2851     return tcu::TestStatus::pass("OK");
2852 }
2853 
check(std::vector<const void * > datas,uint32_t width,uint32_t ref)2854 bool vkt::subgroups::check(std::vector<const void *> datas, uint32_t width, uint32_t ref)
2855 {
2856     const uint32_t *data = reinterpret_cast<const uint32_t *>(datas[0]);
2857 
2858     for (uint32_t n = 0; n < width; ++n)
2859     {
2860         if (data[n] != ref)
2861         {
2862             return false;
2863         }
2864     }
2865 
2866     return true;
2867 }
2868 
checkComputeOrMesh(std::vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t ref)2869 bool vkt::subgroups::checkComputeOrMesh(std::vector<const void *> datas, const uint32_t numWorkgroups[3],
2870                                         const uint32_t localSize[3], uint32_t ref)
2871 {
2872     const uint32_t globalSizeX = numWorkgroups[0] * localSize[0];
2873     const uint32_t globalSizeY = numWorkgroups[1] * localSize[1];
2874     const uint32_t globalSizeZ = numWorkgroups[2] * localSize[2];
2875 
2876     return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2877 }
2878 
makeGeometryFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult)2879 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(Context &context, VkFormat format,
2880                                                             const SSBOData *extraData, uint32_t extraDataCount,
2881                                                             const void *internalData,
2882                                                             subgroups::CheckResult checkResult)
2883 {
2884     return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData,
2885                                                            checkResult, 0u, 0u);
2886 }
2887 
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const uint32_t geometryShaderStageCreateFlags,const uint32_t requiredSubgroupSize)2888 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(
2889     Context &context, VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
2890     subgroups::CheckResult checkResult, const uint32_t geometryShaderStageCreateFlags,
2891     const uint32_t requiredSubgroupSize)
2892 {
2893     const DeviceInterface &vk = context.getDeviceInterface();
2894     const VkDevice device     = context.getDevice();
2895     const uint32_t maxWidth   = getMaxWidth();
2896     vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDataCount);
2897     DescriptorSetLayoutBuilder layoutBuilder;
2898     DescriptorPoolBuilder poolBuilder;
2899     DescriptorSetUpdateBuilder updateBuilder;
2900     Move<VkDescriptorPool> descriptorPool;
2901     Move<VkDescriptorSet> descriptorSet;
2902     const Unique<VkShaderModule> vertexShaderModule(
2903         createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2904     const Unique<VkShaderModule> geometryShaderModule(
2905         createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2906     const Unique<VkShaderModule> fragmentShaderModule(
2907         createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2908     const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2909     const VkVertexInputBindingDescription vertexInputBinding = {
2910         0u,                                       //  uint32_t binding;
2911         static_cast<uint32_t>(sizeof(tcu::Vec4)), //  uint32_t stride;
2912         VK_VERTEX_INPUT_RATE_VERTEX               //  VkVertexInputRate inputRate;
2913     };
2914     const VkVertexInputAttributeDescription vertexInputAttribute = {
2915         0u,                            //  uint32_t location;
2916         0u,                            //  uint32_t binding;
2917         VK_FORMAT_R32G32B32A32_SFLOAT, //  VkFormat format;
2918         0u                             //  uint32_t offset;
2919     };
2920 
2921     for (uint32_t i = 0u; i < extraDataCount; i++)
2922     {
2923         if (extraData[i].isImage())
2924         {
2925             inputBuffers[i] = de::SharedPtr<BufferOrImage>(
2926                 new Image(context, static_cast<uint32_t>(extraData[i].numElements), 1u, extraData[i].format));
2927         }
2928         else
2929         {
2930             DE_ASSERT(extraData[i].isUBO());
2931             vk::VkDeviceSize size =
2932                 getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2933             inputBuffers[i] =
2934                 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2935         }
2936         const Allocation &alloc = inputBuffers[i]->getAllocation();
2937         initializeMemory(context, alloc, extraData[i]);
2938     }
2939 
2940     for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2941         layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, nullptr);
2942 
2943     const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
2944 
2945     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2946 
2947     const uint32_t requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2948 
2949     const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
2950         context, *pipelineLayout,
2951         VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT, *vertexShaderModule,
2952         *fragmentShaderModule, *geometryShaderModule, VK_NULL_HANDLE, VK_NULL_HANDLE, *renderPass,
2953         VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format, 0u, 0u, 0u,
2954         geometryShaderStageCreateFlags, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : nullptr));
2955 
2956     for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2957         poolBuilder.addType(inputBuffers[ndx]->getType());
2958 
2959     if (extraDataCount > 0)
2960     {
2961         descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2962         descriptorSet  = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2963     }
2964 
2965     for (uint32_t buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2966     {
2967         if (inputBuffers[buffersNdx]->isImage())
2968         {
2969             VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2970                                                                  inputBuffers[buffersNdx]->getAsImage()->getImageView(),
2971                                                                  VK_IMAGE_LAYOUT_GENERAL);
2972 
2973             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2974                                       inputBuffers[buffersNdx]->getType(), &info);
2975         }
2976         else
2977         {
2978             VkDescriptorBufferInfo info =
2979                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 0ull,
2980                                          inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2981 
2982             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2983                                       inputBuffers[buffersNdx]->getType(), &info);
2984         }
2985     }
2986 
2987     updateBuilder.update(vk, device);
2988 
2989     const VkQueue queue             = context.getUniversalQueue();
2990     const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2991     const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2992     const uint32_t subgroupSize = getSubgroupSize(context);
2993     const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
2994     const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
2995     Buffer vertexBuffer(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2996     unsigned totalIterations  = 0u;
2997     unsigned failedIterations = 0u;
2998     Image discardableImage(context, maxWidth, 1u, format,
2999                            VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3000 
3001     {
3002         const Allocation &alloc = vertexBuffer.getAllocation();
3003         std::vector<tcu::Vec4> data(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3004         const float pixelSize  = 2.0f / static_cast<float>(maxWidth);
3005         float leftHandPosition = -1.0f;
3006 
3007         for (uint32_t ndx = 0u; ndx < maxWidth; ++ndx)
3008         {
3009             data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3010             leftHandPosition += pixelSize;
3011         }
3012 
3013         deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3014         flushAlloc(vk, device, alloc);
3015     }
3016 
3017     const Unique<VkFramebuffer> framebuffer(
3018         makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3019     const VkViewport viewport              = makeViewport(maxWidth, 1u);
3020     const VkRect2D scissor                 = makeRect2D(maxWidth, 1u);
3021     const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3022     Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3023     const VkDeviceSize vertexBufferOffset = 0u;
3024 
3025     for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
3026     {
3027         totalIterations++;
3028 
3029         for (uint32_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
3030         {
3031             const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3032             initializeMemory(context, alloc, extraData[ndx]);
3033         }
3034 
3035         beginCommandBuffer(vk, *cmdBuffer);
3036         {
3037             vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3038 
3039             vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3040 
3041             beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3042 
3043             vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3044 
3045             if (extraDataCount > 0)
3046             {
3047                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3048                                          &descriptorSet.get(), 0u, nullptr);
3049             }
3050 
3051             vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3052 
3053             vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3054 
3055             endRenderPass(vk, *cmdBuffer);
3056 
3057             copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(),
3058                               tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3059                               VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3060 
3061             endCommandBuffer(vk, *cmdBuffer);
3062 
3063             submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3064         }
3065         context.resetCommandPoolForVKSC(device, *cmdPool);
3066 
3067         {
3068             const Allocation &allocResult = imageBufferResult.getAllocation();
3069             invalidateAlloc(vk, device, allocResult);
3070 
3071             std::vector<const void *> datas;
3072             datas.push_back(allocResult.getHostPtr());
3073             if (!checkResult(internalData, datas, width, subgroupSize))
3074                 failedIterations++;
3075         }
3076     }
3077 
3078     if (0 < failedIterations)
3079     {
3080         unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3081 
3082         context.getTestContext().getLog()
3083             << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
3084 
3085         return tcu::TestStatus::fail("Failed!");
3086     }
3087 
3088     return tcu::TestStatus::pass("OK");
3089 }
3090 
getPossibleGraphicsSubgroupStages(Context & context,const vk::VkShaderStageFlags testedStages)3091 vk::VkShaderStageFlags vkt::subgroups::getPossibleGraphicsSubgroupStages(Context &context,
3092                                                                          const vk::VkShaderStageFlags testedStages)
3093 {
3094     const VkPhysicalDeviceSubgroupProperties &subgroupProperties = context.getSubgroupProperties();
3095     VkShaderStageFlags stages                                    = testedStages & subgroupProperties.supportedStages;
3096 
3097     DE_ASSERT(isAllGraphicsStages(testedStages));
3098 
3099     if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
3100     {
3101         if ((stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
3102             TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
3103         else
3104             stages = VK_SHADER_STAGE_FRAGMENT_BIT;
3105     }
3106 
3107     if (static_cast<VkShaderStageFlags>(0u) == stages)
3108         TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
3109 
3110     return stages;
3111 }
3112 
allStages(Context & context,vk::VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3113 tcu::TestStatus vkt::subgroups::allStages(Context &context, vk::VkFormat format, const SSBOData *extraData,
3114                                           uint32_t extraDataCount, const void *internalData,
3115                                           const VerificationFunctor &checkResult,
3116                                           const vk::VkShaderStageFlags shaderStage)
3117 {
3118     return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData,
3119                                                          checkResult, shaderStage, 0u, 0u, 0u, 0u, 0u, nullptr);
3120 }
3121 
allStagesRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStageTested,const uint32_t vertexShaderStageCreateFlags,const uint32_t tessellationControlShaderStageCreateFlags,const uint32_t tessellationEvalShaderStageCreateFlags,const uint32_t geometryShaderStageCreateFlags,const uint32_t fragmentShaderStageCreateFlags,const uint32_t requiredSubgroupSize[5])3122 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize(
3123     Context &context, vk::VkFormat format, const SSBOData *extraDatas, uint32_t extraDatasCount,
3124     const void *internalData, const VerificationFunctor &checkResult, const vk::VkShaderStageFlags shaderStageTested,
3125     const uint32_t vertexShaderStageCreateFlags, const uint32_t tessellationControlShaderStageCreateFlags,
3126     const uint32_t tessellationEvalShaderStageCreateFlags, const uint32_t geometryShaderStageCreateFlags,
3127     const uint32_t fragmentShaderStageCreateFlags, const uint32_t requiredSubgroupSize[5])
3128 {
3129     const DeviceInterface &vk = context.getDeviceInterface();
3130     const VkDevice device     = context.getDevice();
3131     const uint32_t maxWidth   = getMaxWidth();
3132     vector<VkShaderStageFlagBits> stagesVector;
3133     VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
3134 
3135     Move<VkShaderModule> vertexShaderModule;
3136     Move<VkShaderModule> teCtrlShaderModule;
3137     Move<VkShaderModule> teEvalShaderModule;
3138     Move<VkShaderModule> geometryShaderModule;
3139     Move<VkShaderModule> fragmentShaderModule;
3140 
3141     if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3142     {
3143         stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3144     }
3145     if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3146     {
3147         stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3148         shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ?
3149                                    (VkShaderStageFlags)0u :
3150                                    (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3151         shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ?
3152                                    (VkShaderStageFlags)0u :
3153                                    (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3154     }
3155     if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3156     {
3157         stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3158         shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ?
3159                                    (VkShaderStageFlags)0u :
3160                                    (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3161         shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ?
3162                                    (VkShaderStageFlags)0u :
3163                                    (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3164     }
3165     if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3166     {
3167         stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3168         const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3169         shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags)0 : required;
3170     }
3171     if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3172     {
3173         const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3174         shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags)0 : required;
3175     }
3176 
3177     const uint32_t stagesCount = static_cast<uint32_t>(stagesVector.size());
3178     const string vert          = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
3179     const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
3180     const string tese =
3181         (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
3182 
3183     shaderStageRequired = shaderStageTested | shaderStageRequired;
3184 
3185     vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3186     if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3187     {
3188         teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3189         teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3190     }
3191     if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3192     {
3193         if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3194         {
3195             // tessellation shaders output line primitives
3196             geometryShaderModule =
3197                 createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3198         }
3199         else
3200         {
3201             // otherwise points are processed by geometry shader
3202             geometryShaderModule =
3203                 createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3204         }
3205     }
3206     if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3207         fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3208 
3209     std::vector<de::SharedPtr<BufferOrImage>> inputBuffers(stagesCount + extraDatasCount);
3210 
3211     DescriptorSetLayoutBuilder layoutBuilder;
3212 
3213     // The implicit result SSBO we use to store our outputs from the shader
3214     for (uint32_t ndx = 0u; ndx < stagesCount; ++ndx)
3215     {
3216         const VkDeviceSize shaderSize =
3217             (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3218         const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3219         inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
3220 
3221         layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx],
3222                                         getResultBinding(stagesVector[ndx]), nullptr);
3223     }
3224 
3225     for (uint32_t ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3226     {
3227         const uint32_t datasNdx = ndx - stagesCount;
3228         if (extraDatas[datasNdx].isImage())
3229         {
3230             inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(
3231                 context, static_cast<uint32_t>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3232         }
3233         else
3234         {
3235             const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT :
3236                                                                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3237             const auto size  = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) *
3238                               extraDatas[datasNdx].numElements;
3239             inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
3240         }
3241 
3242         const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3243         initializeMemory(context, alloc, extraDatas[datasNdx]);
3244 
3245         layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, extraDatas[datasNdx].stages,
3246                                         extraDatas[datasNdx].binding, nullptr);
3247     }
3248 
3249     const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3250 
3251     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3252 
3253     const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3254     const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
3255         context, *pipelineLayout, shaderStageRequired, *vertexShaderModule, *fragmentShaderModule,
3256         *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule, *renderPass,
3257         (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST :
3258                                                                            VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3259         nullptr, nullptr, false, VK_FORMAT_R32G32B32A32_SFLOAT, vertexShaderStageCreateFlags,
3260         tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3261         geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3262 
3263     Move<VkDescriptorPool> descriptorPool;
3264     Move<VkDescriptorSet> descriptorSet;
3265 
3266     if (inputBuffers.size() > 0)
3267     {
3268         DescriptorPoolBuilder poolBuilder;
3269 
3270         for (uint32_t ndx = 0u; ndx < static_cast<uint32_t>(inputBuffers.size()); ndx++)
3271         {
3272             poolBuilder.addType(inputBuffers[ndx]->getType());
3273         }
3274 
3275         descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3276 
3277         // Create descriptor set
3278         descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3279 
3280         DescriptorSetUpdateBuilder updateBuilder;
3281 
3282         for (uint32_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3283         {
3284             uint32_t binding;
3285             if (ndx < stagesCount)
3286                 binding = getResultBinding(stagesVector[ndx]);
3287             else
3288                 binding = extraDatas[ndx - stagesCount].binding;
3289 
3290             if (inputBuffers[ndx]->isImage())
3291             {
3292                 VkDescriptorImageInfo info =
3293                     makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3294                                             inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3295 
3296                 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
3297                                           inputBuffers[ndx]->getType(), &info);
3298             }
3299             else
3300             {
3301                 VkDescriptorBufferInfo info = makeDescriptorBufferInfo(
3302                     inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3303 
3304                 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
3305                                           inputBuffers[ndx]->getType(), &info);
3306             }
3307         }
3308 
3309         updateBuilder.update(vk, device);
3310     }
3311 
3312     {
3313         const VkQueue queue             = context.getUniversalQueue();
3314         const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3315         const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
3316         const uint32_t subgroupSize = getSubgroupSize(context);
3317         const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
3318         unsigned totalIterations  = 0u;
3319         unsigned failedIterations = 0u;
3320         Image resultImage(context, maxWidth, 1, format,
3321                           VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3322         const Unique<VkFramebuffer> framebuffer(
3323             makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3324         const VkViewport viewport              = makeViewport(maxWidth, 1u);
3325         const VkRect2D scissor                 = makeRect2D(maxWidth, 1u);
3326         const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3327         Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3328         const VkImageSubresourceRange subresourceRange = {
3329             VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags    aspectMask
3330             0u,                        //uint32_t                baseMipLevel
3331             1u,                        //uint32_t                levelCount
3332             0u,                        //uint32_t                baseArrayLayer
3333             1u                         //uint32_t                layerCount
3334         };
3335 
3336         const VkImageMemoryBarrier colorAttachmentBarrier =
3337             makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
3338                                    VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, resultImage.getImage(), subresourceRange);
3339 
3340         for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
3341         {
3342             for (uint32_t ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3343             {
3344                 // re-init the data
3345                 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3346                 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3347             }
3348 
3349             totalIterations++;
3350 
3351             beginCommandBuffer(vk, *cmdBuffer);
3352 
3353             vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
3354                                   VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, nullptr, 0u,
3355                                   nullptr, 1u, &colorAttachmentBarrier);
3356 
3357             vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3358 
3359             vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3360 
3361             beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3362 
3363             vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3364 
3365             if (stagesCount + extraDatasCount > 0)
3366                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3367                                          &descriptorSet.get(), 0u, nullptr);
3368 
3369             vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3370 
3371             endRenderPass(vk, *cmdBuffer);
3372 
3373             copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(),
3374                               tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3375                               VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3376 
3377             endCommandBuffer(vk, *cmdBuffer);
3378 
3379             submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3380 
3381             for (uint32_t ndx = 0u; ndx < stagesCount; ++ndx)
3382             {
3383                 std::vector<const void *> datas;
3384                 if (!inputBuffers[ndx]->isImage())
3385                 {
3386                     const Allocation &resultAlloc = inputBuffers[ndx]->getAllocation();
3387                     invalidateAlloc(vk, device, resultAlloc);
3388                     // we always have our result data first
3389                     datas.push_back(resultAlloc.getHostPtr());
3390                 }
3391 
3392                 for (uint32_t index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3393                 {
3394                     const uint32_t datasNdx = index - stagesCount;
3395                     if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3396                     {
3397                         const Allocation &resultAlloc = inputBuffers[index]->getAllocation();
3398                         invalidateAlloc(vk, device, resultAlloc);
3399                         // we always have our result data first
3400                         datas.push_back(resultAlloc.getHostPtr());
3401                     }
3402                 }
3403 
3404                 // Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3405                 const bool multiCall = (stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT ||
3406                                         stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
3407                                         stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT ||
3408                                         stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT);
3409                 const uint32_t usedWidth =
3410                     ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3411 
3412                 if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3413                     failedIterations++;
3414             }
3415             if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3416             {
3417                 std::vector<const void *> datas;
3418                 const Allocation &resultAlloc = imageBufferResult.getAllocation();
3419                 invalidateAlloc(vk, device, resultAlloc);
3420 
3421                 // we always have our result data first
3422                 datas.push_back(resultAlloc.getHostPtr());
3423 
3424                 for (uint32_t index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3425                 {
3426                     const uint32_t datasNdx = index - stagesCount;
3427                     if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3428                     {
3429                         const Allocation &alloc = inputBuffers[index]->getAllocation();
3430                         invalidateAlloc(vk, device, alloc);
3431                         // we always have our result data first
3432                         datas.push_back(alloc.getHostPtr());
3433                     }
3434                 }
3435 
3436                 if (!checkResult(internalData, datas, width, subgroupSize, false))
3437                     failedIterations++;
3438             }
3439 
3440             context.resetCommandPoolForVKSC(device, *cmdPool);
3441         }
3442 
3443         if (0 < failedIterations)
3444         {
3445             unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3446 
3447             context.getTestContext().getLog() << TestLog::Message << valuesPassed << " / " << totalIterations
3448                                               << " values passed" << TestLog::EndMessage;
3449 
3450             return tcu::TestStatus::fail("Failed!");
3451         }
3452     }
3453 
3454     return tcu::TestStatus::pass("OK");
3455 }
3456 
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult)3457 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context &context, vk::VkFormat format,
3458                                                           const SSBOData *extraData, uint32_t extraDataCount,
3459                                                           const void *internalData, subgroups::CheckResult checkResult)
3460 {
3461     return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData,
3462                                                          checkResult, 0u, 0u);
3463 }
3464 
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const uint32_t vertexShaderStageCreateFlags,const uint32_t requiredSubgroupSize)3465 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(
3466     Context &context, vk::VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
3467     subgroups::CheckResult checkResult, const uint32_t vertexShaderStageCreateFlags,
3468     const uint32_t requiredSubgroupSize)
3469 {
3470     const DeviceInterface &vk       = context.getDeviceInterface();
3471     const VkDevice device           = context.getDevice();
3472     const VkQueue queue             = context.getUniversalQueue();
3473     const uint32_t maxWidth         = getMaxWidth();
3474     const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3475     vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDataCount);
3476     DescriptorSetLayoutBuilder layoutBuilder;
3477     const Unique<VkShaderModule> vertexShaderModule(
3478         createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3479     const Unique<VkShaderModule> fragmentShaderModule(
3480         createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3481     const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3482     const VkVertexInputBindingDescription vertexInputBinding = {
3483         0u,                                       // binding;
3484         static_cast<uint32_t>(sizeof(tcu::Vec4)), // stride;
3485         VK_VERTEX_INPUT_RATE_VERTEX               // inputRate
3486     };
3487     const VkVertexInputAttributeDescription vertexInputAttribute = {0u, 0u, VK_FORMAT_R32G32B32A32_SFLOAT, 0u};
3488 
3489     for (uint32_t i = 0u; i < extraDataCount; i++)
3490     {
3491         if (extraData[i].isImage())
3492         {
3493             inputBuffers[i] = de::SharedPtr<BufferOrImage>(
3494                 new Image(context, static_cast<uint32_t>(extraData[i].numElements), 1u, extraData[i].format));
3495         }
3496         else
3497         {
3498             DE_ASSERT(extraData[i].isUBO());
3499             vk::VkDeviceSize size =
3500                 getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3501             inputBuffers[i] =
3502                 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3503         }
3504         const Allocation &alloc = inputBuffers[i]->getAllocation();
3505         initializeMemory(context, alloc, extraData[i]);
3506     }
3507 
3508     for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
3509         layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr);
3510 
3511     const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3512 
3513     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3514 
3515     const uint32_t requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3516     const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
3517         context, *pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, *vertexShaderModule,
3518         *fragmentShaderModule, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, *renderPass,
3519         VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
3520         vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : nullptr));
3521     DescriptorPoolBuilder poolBuilder;
3522     DescriptorSetUpdateBuilder updateBuilder;
3523 
3524     for (uint32_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
3525         poolBuilder.addType(inputBuffers[ndx]->getType());
3526 
3527     Move<VkDescriptorPool> descriptorPool;
3528     Move<VkDescriptorSet> descriptorSet;
3529 
3530     if (extraDataCount > 0)
3531     {
3532         descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3533         descriptorSet  = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3534     }
3535 
3536     for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
3537     {
3538         const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3539         initializeMemory(context, alloc, extraData[ndx]);
3540     }
3541 
3542     for (uint32_t buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3543     {
3544         if (inputBuffers[buffersNdx]->isImage())
3545         {
3546             VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3547                                                                  inputBuffers[buffersNdx]->getAsImage()->getImageView(),
3548                                                                  VK_IMAGE_LAYOUT_GENERAL);
3549 
3550             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3551                                       inputBuffers[buffersNdx]->getType(), &info);
3552         }
3553         else
3554         {
3555             VkDescriptorBufferInfo info =
3556                 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 0ull,
3557                                          inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3558 
3559             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3560                                       inputBuffers[buffersNdx]->getType(), &info);
3561         }
3562     }
3563     updateBuilder.update(vk, device);
3564 
3565     const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
3566 
3567     const uint32_t subgroupSize = getSubgroupSize(context);
3568 
3569     const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
3570 
3571     const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
3572     Buffer vertexBuffer(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3573 
3574     unsigned totalIterations  = 0u;
3575     unsigned failedIterations = 0u;
3576 
3577     Image discardableImage(context, maxWidth, 1u, format,
3578                            VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3579 
3580     {
3581         const Allocation &alloc = vertexBuffer.getAllocation();
3582         std::vector<tcu::Vec4> data(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3583         const float pixelSize  = 2.0f / static_cast<float>(maxWidth);
3584         float leftHandPosition = -1.0f;
3585 
3586         for (uint32_t ndx = 0u; ndx < maxWidth; ++ndx)
3587         {
3588             data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3589             leftHandPosition += pixelSize;
3590         }
3591 
3592         deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3593         flushAlloc(vk, device, alloc);
3594     }
3595 
3596     const Unique<VkFramebuffer> framebuffer(
3597         makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3598     const VkViewport viewport              = makeViewport(maxWidth, 1u);
3599     const VkRect2D scissor                 = makeRect2D(maxWidth, 1u);
3600     const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3601     Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3602     const VkDeviceSize vertexBufferOffset = 0u;
3603 
3604     for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
3605     {
3606         totalIterations++;
3607 
3608         for (uint32_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
3609         {
3610             const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3611             initializeMemory(context, alloc, extraData[ndx]);
3612         }
3613 
3614         beginCommandBuffer(vk, *cmdBuffer);
3615         {
3616             vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3617 
3618             vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3619 
3620             beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3621 
3622             vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3623 
3624             if (extraDataCount > 0)
3625             {
3626                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3627                                          &descriptorSet.get(), 0u, nullptr);
3628             }
3629 
3630             vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3631 
3632             vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3633 
3634             endRenderPass(vk, *cmdBuffer);
3635 
3636             copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(),
3637                               tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3638                               VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3639 
3640             endCommandBuffer(vk, *cmdBuffer);
3641 
3642             submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3643         }
3644         context.resetCommandPoolForVKSC(device, *cmdPool);
3645 
3646         {
3647             const Allocation &allocResult = imageBufferResult.getAllocation();
3648             invalidateAlloc(vk, device, allocResult);
3649 
3650             std::vector<const void *> datas;
3651             datas.push_back(allocResult.getHostPtr());
3652             if (!checkResult(internalData, datas, width, subgroupSize))
3653                 failedIterations++;
3654         }
3655     }
3656 
3657     if (0 < failedIterations)
3658     {
3659         unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3660 
3661         context.getTestContext().getLog()
3662             << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
3663 
3664         return tcu::TestStatus::fail("Failed!");
3665     }
3666 
3667     return tcu::TestStatus::pass("OK");
3668 }
3669 
makeFragmentFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,CheckResultFragment checkResult)3670 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest(Context &context, VkFormat format,
3671                                                             const SSBOData *extraDatas, uint32_t extraDatasCount,
3672                                                             const void *internalData, CheckResultFragment checkResult)
3673 {
3674     return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData,
3675                                                            checkResult, 0u, 0u);
3676 }
3677 
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,CheckResultFragment checkResult,const uint32_t fragmentShaderStageCreateFlags,const uint32_t requiredSubgroupSize)3678 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(
3679     Context &context, VkFormat format, const SSBOData *extraDatas, uint32_t extraDatasCount, const void *internalData,
3680     CheckResultFragment checkResult, const uint32_t fragmentShaderStageCreateFlags, const uint32_t requiredSubgroupSize)
3681 {
3682     const DeviceInterface &vk       = context.getDeviceInterface();
3683     const VkDevice device           = context.getDevice();
3684     const VkQueue queue             = context.getUniversalQueue();
3685     const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3686     const Unique<VkShaderModule> vertexShaderModule(
3687         createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3688     const Unique<VkShaderModule> fragmentShaderModule(
3689         createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3690     std::vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDatasCount);
3691 
3692     for (uint32_t i = 0; i < extraDatasCount; i++)
3693     {
3694         if (extraDatas[i].isImage())
3695         {
3696             inputBuffers[i] = de::SharedPtr<BufferOrImage>(
3697                 new Image(context, static_cast<uint32_t>(extraDatas[i].numElements), 1, extraDatas[i].format));
3698         }
3699         else
3700         {
3701             DE_ASSERT(extraDatas[i].isUBO());
3702 
3703             const vk::VkDeviceSize size =
3704                 getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3705 
3706             inputBuffers[i] =
3707                 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3708         }
3709 
3710         const Allocation &alloc = inputBuffers[i]->getAllocation();
3711 
3712         initializeMemory(context, alloc, extraDatas[i]);
3713     }
3714 
3715     DescriptorSetLayoutBuilder layoutBuilder;
3716 
3717     for (uint32_t i = 0; i < extraDatasCount; i++)
3718     {
3719         layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_FRAGMENT_BIT, nullptr);
3720     }
3721 
3722     const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3723     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3724     const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3725     const uint32_t requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3726     const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
3727         context, *pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, *vertexShaderModule,
3728         *fragmentShaderModule, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, *renderPass,
3729         VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, nullptr, nullptr, true, VK_FORMAT_R32G32B32A32_SFLOAT, 0u, 0u, 0u, 0u,
3730         fragmentShaderStageCreateFlags, requiredSubgroupSize != 0u ? requiredSubgroupSizes : nullptr));
3731     DescriptorPoolBuilder poolBuilder;
3732 
3733     // To stop validation complaining, always add at least one type to pool.
3734     poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3735     for (uint32_t i = 0; i < extraDatasCount; i++)
3736     {
3737         poolBuilder.addType(inputBuffers[i]->getType());
3738     }
3739 
3740     Move<VkDescriptorPool> descriptorPool;
3741     // Create descriptor set
3742     Move<VkDescriptorSet> descriptorSet;
3743 
3744     if (extraDatasCount > 0)
3745     {
3746         descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3747 
3748         descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3749     }
3750 
3751     DescriptorSetUpdateBuilder updateBuilder;
3752 
3753     for (uint32_t i = 0; i < extraDatasCount; i++)
3754     {
3755         if (inputBuffers[i]->isImage())
3756         {
3757             const VkDescriptorImageInfo info =
3758                 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3759                                         inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3760 
3761             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
3762                                       inputBuffers[i]->getType(), &info);
3763         }
3764         else
3765         {
3766             const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(
3767                 inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3768 
3769             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
3770                                       inputBuffers[i]->getType(), &info);
3771         }
3772     }
3773 
3774     if (extraDatasCount > 0)
3775         updateBuilder.update(vk, device);
3776 
3777     const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
3778     const uint32_t subgroupSize = getSubgroupSize(context);
3779     const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
3780     unsigned totalIterations  = 0;
3781     unsigned failedIterations = 0;
3782 
3783     for (uint32_t width = 8; width <= subgroupSize; width *= 2)
3784     {
3785         for (uint32_t height = 8; height <= subgroupSize; height *= 2)
3786         {
3787             totalIterations++;
3788 
3789             // re-init the data
3790             for (uint32_t i = 0; i < extraDatasCount; i++)
3791             {
3792                 const Allocation &alloc = inputBuffers[i]->getAllocation();
3793 
3794                 initializeMemory(context, alloc, extraDatas[i]);
3795             }
3796 
3797             const VkDeviceSize formatSize             = getFormatSizeInBytes(format);
3798             const VkDeviceSize resultImageSizeInBytes = width * height * formatSize;
3799             Image resultImage(context, width, height, format,
3800                               VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3801             Buffer resultBuffer(context, resultImageSizeInBytes, VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3802             const Unique<VkFramebuffer> framebuffer(
3803                 makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3804             VkViewport viewport = makeViewport(width, height);
3805             VkRect2D scissor    = {{0, 0}, {width, height}};
3806 
3807             beginCommandBuffer(vk, *cmdBuffer);
3808 
3809             vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3810 
3811             vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3812 
3813             beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height),
3814                             tcu::Vec4(0.0f));
3815 
3816             vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3817 
3818             if (extraDatasCount > 0)
3819                 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3820                                          &descriptorSet.get(), 0u, nullptr);
3821 
3822             vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3823 
3824             endRenderPass(vk, *cmdBuffer);
3825 
3826             copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(),
3827                               tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3828                               VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3829 
3830             endCommandBuffer(vk, *cmdBuffer);
3831 
3832             submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3833 
3834             std::vector<const void *> datas;
3835             {
3836                 const Allocation &resultAlloc = resultBuffer.getAllocation();
3837                 invalidateAlloc(vk, device, resultAlloc);
3838 
3839                 // we always have our result data first
3840                 datas.push_back(resultAlloc.getHostPtr());
3841             }
3842 
3843             if (!checkResult(internalData, datas, width, height, subgroupSize))
3844             {
3845                 failedIterations++;
3846             }
3847 
3848             context.resetCommandPoolForVKSC(device, *cmdPool);
3849         }
3850     }
3851 
3852     if (0 < failedIterations)
3853     {
3854         unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3855 
3856         context.getTestContext().getLog()
3857             << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
3858 
3859         return tcu::TestStatus::fail("Failed!");
3860     }
3861 
3862     return tcu::TestStatus::pass("OK");
3863 }
3864 
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const uint32_t pipelineShaderStageFlags,const uint32_t pipelineCreateFlags,VkPipeline basePipelineHandle,uint32_t localSizeX,uint32_t localSizeY,uint32_t localSizeZ,uint32_t requiredSubgroupSize)3865 Move<VkPipeline> makeComputePipeline(Context &context, const VkPipelineLayout pipelineLayout,
3866                                      const VkShaderModule shaderModule, const uint32_t pipelineShaderStageFlags,
3867                                      const uint32_t pipelineCreateFlags, VkPipeline basePipelineHandle,
3868                                      uint32_t localSizeX, uint32_t localSizeY, uint32_t localSizeZ,
3869                                      uint32_t requiredSubgroupSize)
3870 {
3871     const uint32_t localSize[3]                   = {localSizeX, localSizeY, localSizeZ};
3872     const vk::VkSpecializationMapEntry entries[3] = {
3873         {0, sizeof(uint32_t) * 0, sizeof(uint32_t)},
3874         {1, sizeof(uint32_t) * 1, sizeof(uint32_t)},
3875         {2, static_cast<uint32_t>(sizeof(uint32_t) * 2), sizeof(uint32_t)},
3876     };
3877     const vk::VkSpecializationInfo info                                                     = {/* mapEntryCount = */ 3,
3878                                            /* pMapEntries   = */ entries,
3879                                            /* dataSize      = */ sizeof(localSize),
3880                                            /* pData         = */ localSize};
3881     const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo = {
3882         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType    sType;
3883         nullptr,                                                                        // void*              pNext;
3884         requiredSubgroupSize // uint32_t           requiredSubgroupSize;
3885     };
3886     const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
3887         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,              // VkStructureType sType;
3888         (requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : nullptr), // const void* pNext;
3889         pipelineShaderStageFlags,                                         // VkPipelineShaderStageCreateFlags flags;
3890         VK_SHADER_STAGE_COMPUTE_BIT,                                      // VkShaderStageFlagBits stage;
3891         shaderModule,                                                     // VkShaderModule module;
3892         "main",                                                           // const char* pName;
3893         &info, // const VkSpecializationInfo* pSpecializationInfo;
3894     };
3895     const vk::VkComputePipelineCreateInfo pipelineCreateInfo = {
3896         VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
3897         nullptr,                                        // const void* pNext;
3898         pipelineCreateFlags,                            // VkPipelineCreateFlags flags;
3899         pipelineShaderStageParams,                      // VkPipelineShaderStageCreateInfo stage;
3900         pipelineLayout,                                 // VkPipelineLayout layout;
3901 #ifndef CTS_USES_VULKANSC
3902         basePipelineHandle, // VkPipeline basePipelineHandle;
3903         -1,                 // int32_t basePipelineIndex;
3904 #else
3905         VK_NULL_HANDLE, // VkPipeline basePipelineHandle;
3906         0,              // int32_t basePipelineIndex;
3907 #endif // CTS_USES_VULKANSC
3908     };
3909     static_cast<void>(basePipelineHandle);
3910 
3911     return createComputePipeline(context.getDeviceInterface(), context.getDevice(), VK_NULL_HANDLE,
3912                                  &pipelineCreateInfo);
3913 }
3914 
3915 #ifndef CTS_USES_VULKANSC
makeMeshPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule taskModule,const VkShaderModule meshModule,const uint32_t pipelineShaderStageFlags,const uint32_t pipelineCreateFlags,VkPipeline basePipelineHandle,uint32_t localSizeX,uint32_t localSizeY,uint32_t localSizeZ,uint32_t requiredSubgroupSize,const VkRenderPass renderPass)3916 Move<VkPipeline> makeMeshPipeline(Context &context, const VkPipelineLayout pipelineLayout,
3917                                   const VkShaderModule taskModule, const VkShaderModule meshModule,
3918                                   const uint32_t pipelineShaderStageFlags, const uint32_t pipelineCreateFlags,
3919                                   VkPipeline basePipelineHandle, uint32_t localSizeX, uint32_t localSizeY,
3920                                   uint32_t localSizeZ, uint32_t requiredSubgroupSize, const VkRenderPass renderPass)
3921 {
3922     const uint32_t localSize[3]                   = {localSizeX, localSizeY, localSizeZ};
3923     const vk::VkSpecializationMapEntry entries[3] = {
3924         {0, sizeof(uint32_t) * 0, sizeof(uint32_t)},
3925         {1, sizeof(uint32_t) * 1, sizeof(uint32_t)},
3926         {2, static_cast<uint32_t>(sizeof(uint32_t) * 2), sizeof(uint32_t)},
3927     };
3928     const vk::VkSpecializationInfo info                                                     = {/* mapEntryCount = */ 3,
3929                                            /* pMapEntries   = */ entries,
3930                                            /* dataSize      = */ sizeof(localSize),
3931                                            /* pData         = */ localSize};
3932     const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo = {
3933         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType    sType;
3934         nullptr,                                                                        // void*              pNext;
3935         requiredSubgroupSize // uint32_t           requiredSubgroupSize;
3936     };
3937 
3938     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *pSubgroupSizeCreateInfo =
3939         ((requiredSubgroupSize != 0u) ? &subgroupSizeCreateInfo : nullptr);
3940 
3941     std::vector<VkPipelineShaderStageCreateInfo> shaderStageParams;
3942     vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
3943         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3944         nullptr,                                             // const void* pNext;
3945         pipelineShaderStageFlags,                            // VkPipelineShaderStageCreateFlags flags;
3946         VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM,                  // VkShaderStageFlagBits stage;
3947         VK_NULL_HANDLE,                                      // VkShaderModule module;
3948         "main",                                              // const char* pName;
3949         &info,                                               // const VkSpecializationInfo* pSpecializationInfo;
3950     };
3951 
3952     if (taskModule != VK_NULL_HANDLE)
3953     {
3954         pipelineShaderStageParams.module = taskModule;
3955         pipelineShaderStageParams.pNext  = pSubgroupSizeCreateInfo;
3956         pipelineShaderStageParams.stage  = VK_SHADER_STAGE_TASK_BIT_EXT;
3957         shaderStageParams.push_back(pipelineShaderStageParams);
3958     }
3959 
3960     if (meshModule != VK_NULL_HANDLE)
3961     {
3962         pipelineShaderStageParams.module = meshModule;
3963         pipelineShaderStageParams.pNext  = ((taskModule == VK_NULL_HANDLE) ? pSubgroupSizeCreateInfo : nullptr);
3964         pipelineShaderStageParams.stage  = VK_SHADER_STAGE_MESH_BIT_EXT;
3965         shaderStageParams.push_back(pipelineShaderStageParams);
3966     }
3967 
3968     const std::vector<VkViewport> viewports(1u, makeViewport(1u, 1u));
3969     const std::vector<VkRect2D> scissors(1u, makeRect2D(1u, 1u));
3970 
3971     return makeGraphicsPipeline(context.getDeviceInterface(), context.getDevice(), basePipelineHandle, pipelineLayout,
3972                                 pipelineCreateFlags, shaderStageParams, renderPass, viewports, scissors);
3973 }
3974 #endif // CTS_USES_VULKANSC
3975 
makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,uint32_t inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,const uint32_t pipelineShaderStageCreateFlags,const uint32_t numWorkgroups[3],const bool isRequiredSubgroupSize,const uint32_t subgroupSize,const uint32_t localSizesToTest[][3],const uint32_t localSizesToTestCount)3976 tcu::TestStatus makeComputeOrMeshTestRequiredSubgroupSize(
3977     ComputeLike testType, Context &context, VkFormat format, const vkt::subgroups::SSBOData *inputs,
3978     uint32_t inputsCount, const void *internalData, vkt::subgroups::CheckResultCompute checkResult,
3979     const uint32_t pipelineShaderStageCreateFlags, const uint32_t numWorkgroups[3], const bool isRequiredSubgroupSize,
3980     const uint32_t subgroupSize, const uint32_t localSizesToTest[][3], const uint32_t localSizesToTestCount)
3981 {
3982     const DeviceInterface &vk                 = context.getDeviceInterface();
3983     const VkDevice device                     = context.getDevice();
3984     const VkQueue queue                       = context.getUniversalQueue();
3985     const uint32_t queueFamilyIndex           = context.getUniversalQueueFamilyIndex();
3986     const auto &subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
3987     const VkDeviceSize elementSize            = getFormatSizeInBytes(format);
3988     const VkDeviceSize maxSubgroupSize = isRequiredSubgroupSize ? deMax32(subgroupSizeControlProperties.maxSubgroupSize,
3989                                                                           vkt::subgroups::maxSupportedSubgroupSize()) :
3990                                                                   vkt::subgroups::maxSupportedSubgroupSize();
3991     const VkDeviceSize resultBufferSize        = maxSubgroupSize * maxSubgroupSize * maxSubgroupSize;
3992     const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
3993     Buffer resultBuffer(context, resultBufferSizeInBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3994     std::vector<de::SharedPtr<BufferOrImage>> inputBuffers(inputsCount);
3995     const auto shaderStageFlags =
3996         ((testType == ComputeLike::COMPUTE) ? VK_SHADER_STAGE_COMPUTE_BIT
3997 #ifndef CTS_USES_VULKANSC
3998                                               :
3999                                               (VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT));
4000 #else
4001                                               :
4002                                               0);
4003 #endif // CTS_USES_VULKANSC
4004     const auto pipelineBindPoint =
4005         ((testType == ComputeLike::COMPUTE) ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS);
4006     const auto pipelineStage = ((testType == ComputeLike::COMPUTE) ?
4007                                     VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
4008 #ifndef CTS_USES_VULKANSC
4009                                     :
4010                                     (VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT));
4011 #else
4012                                     :
4013                                     0);
4014 #endif // CTS_USES_VULKANSC
4015     const auto renderArea = makeRect2D(1u, 1u);
4016 
4017     std::vector<tcu::UVec3> usedLocalSizes;
4018     for (uint32_t i = 0; i < localSizesToTestCount; ++i)
4019     {
4020         usedLocalSizes.push_back(tcu::UVec3(localSizesToTest[i][0], localSizesToTest[i][1], localSizesToTest[i][2]));
4021     }
4022 
4023     for (uint32_t i = 0; i < inputsCount; i++)
4024     {
4025         if (inputs[i].isImage())
4026         {
4027             inputBuffers[i] = de::SharedPtr<BufferOrImage>(
4028                 new Image(context, static_cast<uint32_t>(inputs[i].numElements), 1, inputs[i].format));
4029         }
4030         else
4031         {
4032             const auto usage =
4033                 (inputs[i].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4034             const auto size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4035             inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4036         }
4037 
4038         const Allocation &alloc = inputBuffers[i]->getAllocation();
4039 
4040         initializeMemory(context, alloc, inputs[i]);
4041     }
4042 
4043     DescriptorSetLayoutBuilder layoutBuilder;
4044     layoutBuilder.addBinding(resultBuffer.getType(), 1, shaderStageFlags, nullptr);
4045 
4046     for (uint32_t i = 0; i < inputsCount; i++)
4047     {
4048         layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, shaderStageFlags, nullptr);
4049     }
4050 
4051     const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
4052 
4053     Move<VkShaderModule> compShader;
4054     Move<VkShaderModule> meshShader;
4055     Move<VkShaderModule> taskShader;
4056     const auto &binaries = context.getBinaryCollection();
4057 
4058     if (testType == ComputeLike::COMPUTE)
4059     {
4060         compShader = createShaderModule(vk, device, binaries.get("comp"));
4061     }
4062     else if (testType == ComputeLike::MESH)
4063     {
4064         meshShader = createShaderModule(vk, device, binaries.get("mesh"));
4065         if (binaries.contains("task"))
4066             taskShader = createShaderModule(vk, device, binaries.get("task"));
4067     }
4068     else
4069     {
4070         DE_ASSERT(false);
4071     }
4072 
4073     const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
4074 
4075     DescriptorPoolBuilder poolBuilder;
4076 
4077     poolBuilder.addType(resultBuffer.getType());
4078 
4079     for (uint32_t i = 0; i < inputsCount; i++)
4080     {
4081         poolBuilder.addType(inputBuffers[i]->getType());
4082     }
4083 
4084     const Unique<VkDescriptorPool> descriptorPool(
4085         poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
4086     const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
4087     const VkDescriptorBufferInfo resultDescriptorInfo =
4088         makeDescriptorBufferInfo(resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
4089     DescriptorSetUpdateBuilder updateBuilder;
4090 
4091     updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
4092                               VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
4093 
4094     for (uint32_t i = 0; i < inputsCount; i++)
4095     {
4096         if (inputBuffers[i]->isImage())
4097         {
4098             const VkDescriptorImageInfo info =
4099                 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
4100                                         inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4101 
4102             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1),
4103                                       inputBuffers[i]->getType(), &info);
4104         }
4105         else
4106         {
4107             vk::VkDeviceSize size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4108             VkDescriptorBufferInfo info =
4109                 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
4110 
4111             updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1),
4112                                       inputBuffers[i]->getType(), &info);
4113         }
4114     }
4115 
4116     updateBuilder.update(vk, device);
4117 
4118     const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
4119     unsigned totalIterations  = 0;
4120     unsigned failedIterations = 0;
4121     const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
4122     std::vector<de::SharedPtr<Move<VkPipeline>>> pipelines(localSizesToTestCount);
4123     const auto reqSubgroupSize = (isRequiredSubgroupSize ? subgroupSize : 0u);
4124     Move<VkRenderPass> renderPass;
4125     Move<VkFramebuffer> framebuffer;
4126 
4127     if (testType == ComputeLike::MESH)
4128     {
4129         renderPass  = makeRenderPass(vk, device);
4130         framebuffer = makeFramebuffer(vk, device, renderPass.get(), 0u, nullptr, renderArea.extent.width,
4131                                       renderArea.extent.height);
4132     }
4133 
4134     context.getTestContext().touchWatchdog();
4135     {
4136         if (testType == ComputeLike::COMPUTE)
4137         {
4138             pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(
4139                 context, *pipelineLayout, *compShader, pipelineShaderStageCreateFlags,
4140 #ifndef CTS_USES_VULKANSC
4141                 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4142 #else
4143                 0u,
4144 #endif // CTS_USES_VULKANSC
4145                 VK_NULL_HANDLE, usedLocalSizes[0][0], usedLocalSizes[0][1], usedLocalSizes[0][2], reqSubgroupSize)));
4146         }
4147 #ifndef CTS_USES_VULKANSC
4148         else if (testType == ComputeLike::MESH)
4149         {
4150             pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(
4151                 context, pipelineLayout.get(), taskShader.get(), meshShader.get(), pipelineShaderStageCreateFlags,
4152                 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT, VK_NULL_HANDLE, usedLocalSizes[0][0], usedLocalSizes[0][1],
4153                 usedLocalSizes[0][2], reqSubgroupSize, renderPass.get())));
4154         }
4155 #endif // CTS_USES_VULKANSC
4156         else
4157         {
4158             DE_ASSERT(false);
4159         }
4160     }
4161     context.getTestContext().touchWatchdog();
4162 
4163     for (uint32_t index = 1; index < (localSizesToTestCount - 1); index++)
4164     {
4165         const uint32_t nextX = usedLocalSizes[index][0];
4166         const uint32_t nextY = usedLocalSizes[index][1];
4167         const uint32_t nextZ = usedLocalSizes[index][2];
4168 
4169         context.getTestContext().touchWatchdog();
4170         {
4171             if (testType == ComputeLike::COMPUTE)
4172             {
4173                 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(
4174                     makeComputePipeline(context, *pipelineLayout, *compShader, pipelineShaderStageCreateFlags,
4175 #ifndef CTS_USES_VULKANSC
4176                                         VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4177 #else
4178                                         0u,
4179 #endif // CTS_USES_VULKANSC
4180                                         **pipelines[0], nextX, nextY, nextZ, reqSubgroupSize)));
4181             }
4182 #ifndef CTS_USES_VULKANSC
4183             else if (testType == ComputeLike::MESH)
4184             {
4185                 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(
4186                     context, pipelineLayout.get(), taskShader.get(), meshShader.get(), pipelineShaderStageCreateFlags,
4187                     VK_PIPELINE_CREATE_DERIVATIVE_BIT, pipelines[0].get()->get(), nextX, nextY, nextZ, reqSubgroupSize,
4188                     renderPass.get())));
4189             }
4190 #endif // CTS_USES_VULKANSC
4191             else
4192             {
4193                 DE_ASSERT(false);
4194             }
4195         }
4196         context.getTestContext().touchWatchdog();
4197     }
4198 
4199     for (uint32_t index = 0; index < (localSizesToTestCount - 1); index++)
4200     {
4201         // we are running one test
4202         totalIterations++;
4203 
4204         beginCommandBuffer(vk, *cmdBuffer);
4205         {
4206             if (testType == ComputeLike::MESH)
4207                 beginRenderPass(vk, *cmdBuffer, renderPass.get(), framebuffer.get(), renderArea);
4208 
4209             vk.cmdBindPipeline(*cmdBuffer, pipelineBindPoint, **pipelines[index]);
4210 
4211             vk.cmdBindDescriptorSets(*cmdBuffer, pipelineBindPoint, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u,
4212                                      nullptr);
4213 
4214             if (testType == ComputeLike::COMPUTE)
4215                 vk.cmdDispatch(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4216 #ifndef CTS_USES_VULKANSC
4217             else if (testType == ComputeLike::MESH)
4218                 vk.cmdDrawMeshTasksEXT(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4219                 //vk.cmdDrawMeshTasksNV(*cmdBuffer, numWorkgroups[0], 0);
4220 #endif // CTS_USES_VULKANSC
4221             else
4222                 DE_ASSERT(false);
4223 
4224             if (testType == ComputeLike::MESH)
4225                 endRenderPass(vk, *cmdBuffer);
4226         }
4227 
4228         // Make shader writes available.
4229         const auto postShaderBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4230         vk.cmdPipelineBarrier(*cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postShaderBarrier, 0u,
4231                               nullptr, 0u, nullptr);
4232 
4233         endCommandBuffer(vk, *cmdBuffer);
4234 
4235         submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4236 
4237         std::vector<const void *> datas;
4238 
4239         {
4240             const Allocation &resultAlloc = resultBuffer.getAllocation();
4241             invalidateAlloc(vk, device, resultAlloc);
4242 
4243             // we always have our result data first
4244             datas.push_back(resultAlloc.getHostPtr());
4245         }
4246 
4247         for (uint32_t i = 0; i < inputsCount; i++)
4248         {
4249             if (!inputBuffers[i]->isImage())
4250             {
4251                 const Allocation &resultAlloc = inputBuffers[i]->getAllocation();
4252                 invalidateAlloc(vk, device, resultAlloc);
4253 
4254                 // we always have our result data first
4255                 datas.push_back(resultAlloc.getHostPtr());
4256             }
4257         }
4258 
4259         if (!checkResult(internalData, datas, numWorkgroups, usedLocalSizes[index].getPtr(), subgroupSize))
4260         {
4261             failedIterations++;
4262         }
4263 
4264         context.resetCommandPoolForVKSC(device, *cmdPool);
4265     }
4266 
4267     if (0 < failedIterations)
4268     {
4269         unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4270 
4271         context.getTestContext().getLog()
4272             << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
4273 
4274         return tcu::TestStatus::fail("Failed!");
4275     }
4276 
4277     return tcu::TestStatus::pass("OK");
4278 }
4279 
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,const uint32_t pipelineShaderStageCreateFlags,const uint32_t numWorkgroups[3],const bool isRequiredSubgroupSize,const uint32_t subgroupSize,const uint32_t localSizesToTest[][3],const uint32_t localSizesToTestCount)4280 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize(
4281     Context &context, VkFormat format, const SSBOData *inputs, uint32_t inputsCount, const void *internalData,
4282     CheckResultCompute checkResult, const uint32_t pipelineShaderStageCreateFlags, const uint32_t numWorkgroups[3],
4283     const bool isRequiredSubgroupSize, const uint32_t subgroupSize, const uint32_t localSizesToTest[][3],
4284     const uint32_t localSizesToTestCount)
4285 {
4286     return makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike::COMPUTE, context, format, inputs, inputsCount,
4287                                                      internalData, checkResult, pipelineShaderStageCreateFlags,
4288                                                      numWorkgroups, isRequiredSubgroupSize, subgroupSize,
4289                                                      localSizesToTest, localSizesToTestCount);
4290 }
4291 
makeMeshTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,const uint32_t pipelineShaderStageCreateFlags,const uint32_t numWorkgroups[3],const bool isRequiredSubgroupSize,const uint32_t subgroupSize,const uint32_t localSizesToTest[][3],const uint32_t localSizesToTestCount)4292 tcu::TestStatus vkt::subgroups::makeMeshTestRequiredSubgroupSize(
4293     Context &context, VkFormat format, const SSBOData *inputs, uint32_t inputsCount, const void *internalData,
4294     CheckResultCompute checkResult, const uint32_t pipelineShaderStageCreateFlags, const uint32_t numWorkgroups[3],
4295     const bool isRequiredSubgroupSize, const uint32_t subgroupSize, const uint32_t localSizesToTest[][3],
4296     const uint32_t localSizesToTestCount)
4297 {
4298     return makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike::MESH, context, format, inputs, inputsCount,
4299                                                      internalData, checkResult, pipelineShaderStageCreateFlags,
4300                                                      numWorkgroups, isRequiredSubgroupSize, subgroupSize,
4301                                                      localSizesToTest, localSizesToTestCount);
4302 }
4303 
makeComputeOrMeshTest(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,uint32_t inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,uint32_t requiredSubgroupSize,const uint32_t pipelineShaderStageCreateFlags)4304 tcu::TestStatus makeComputeOrMeshTest(ComputeLike testType, Context &context, VkFormat format,
4305                                       const vkt::subgroups::SSBOData *inputs, uint32_t inputsCount,
4306                                       const void *internalData, vkt::subgroups::CheckResultCompute checkResult,
4307                                       uint32_t requiredSubgroupSize, const uint32_t pipelineShaderStageCreateFlags)
4308 {
4309     const uint32_t numWorkgroups[3]   = {4, 2, 2};
4310     const bool isRequiredSubgroupSize = (requiredSubgroupSize != 0u);
4311     const uint32_t subgroupSize =
4312         (isRequiredSubgroupSize ? requiredSubgroupSize : vkt::subgroups::getSubgroupSize(context));
4313 
4314     const uint32_t localSizesToTestCount                = 8;
4315     uint32_t localSizesToTest[localSizesToTestCount][3] = {
4316         {1, 1, 1}, {subgroupSize, 1, 1}, {1, subgroupSize, 1}, {1, 1, subgroupSize}, {32, 4, 1}, {1, 4, 32}, {3, 5, 7},
4317         {1, 1, 1} // Isn't used, just here to make double buffering checks easier
4318     };
4319 
4320     if (testType == ComputeLike::COMPUTE)
4321         return makeComputeTestRequiredSubgroupSize(
4322             context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4323             numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4324     else
4325         return makeMeshTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult,
4326                                                 pipelineShaderStageCreateFlags, numWorkgroups, isRequiredSubgroupSize,
4327                                                 subgroupSize, localSizesToTest, localSizesToTestCount);
4328 }
4329 
makeComputeTest(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,uint32_t requiredSubgroupSize,const uint32_t pipelineShaderStageCreateFlags)4330 tcu::TestStatus vkt::subgroups::makeComputeTest(Context &context, VkFormat format, const SSBOData *inputs,
4331                                                 uint32_t inputsCount, const void *internalData,
4332                                                 CheckResultCompute checkResult, uint32_t requiredSubgroupSize,
4333                                                 const uint32_t pipelineShaderStageCreateFlags)
4334 {
4335     return makeComputeOrMeshTest(ComputeLike::COMPUTE, context, format, inputs, inputsCount, internalData, checkResult,
4336                                  requiredSubgroupSize, pipelineShaderStageCreateFlags);
4337 }
4338 
makeMeshTest(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,uint32_t requiredSubgroupSize,const uint32_t pipelineShaderStageCreateFlags)4339 tcu::TestStatus vkt::subgroups::makeMeshTest(Context &context, VkFormat format, const SSBOData *inputs,
4340                                              uint32_t inputsCount, const void *internalData,
4341                                              CheckResultCompute checkResult, uint32_t requiredSubgroupSize,
4342                                              const uint32_t pipelineShaderStageCreateFlags)
4343 {
4344     return makeComputeOrMeshTest(ComputeLike::MESH, context, format, inputs, inputsCount, internalData, checkResult,
4345                                  requiredSubgroupSize, pipelineShaderStageCreateFlags);
4346 }
4347 
checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)4348 static inline void checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)
4349 {
4350     if (shaderStages == 0)
4351         TCU_THROW(InternalError, "Shader stage is not specified");
4352 
4353     // It can actually be only 1 or 0.
4354     const uint32_t exclusivePipelinesCount =
4355         (isAllComputeStages(shaderStages) ? 1 : 0) + (isAllGraphicsStages(shaderStages) ? 1 : 0)
4356 #ifndef CTS_USES_VULKANSC
4357         + (isAllRayTracingStages(shaderStages) ? 1 : 0) + (isAllMeshShadingStages(shaderStages) ? 1 : 0)
4358 #endif // CTS_USES_VULKANSC
4359         ;
4360 
4361     if (exclusivePipelinesCount != 1)
4362         TCU_THROW(InternalError, "Mix of shaders from different pipelines is detected");
4363 }
4364 
supportedCheckShader(Context & context,const VkShaderStageFlags shaderStages)4365 void vkt::subgroups::supportedCheckShader(Context &context, const VkShaderStageFlags shaderStages)
4366 {
4367     checkShaderStageSetValidity(shaderStages);
4368 
4369     if ((context.getSubgroupProperties().supportedStages & shaderStages) == 0)
4370     {
4371         if (isAllComputeStages(shaderStages))
4372             TCU_FAIL("Compute shader is required to support subgroup operations");
4373         else
4374             TCU_THROW(NotSupportedError, "Subgroup support is not available for test shader stage(s)");
4375     }
4376 
4377 #ifndef CTS_USES_VULKANSC
4378     if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & shaderStages &&
4379         context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4380         !context.getPortabilitySubsetFeatures().tessellationIsolines)
4381     {
4382         TCU_THROW(NotSupportedError,
4383                   "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4384     }
4385 #endif // CTS_USES_VULKANSC
4386 }
4387 
4388 namespace vkt
4389 {
4390 namespace subgroups
4391 {
4392 typedef std::vector<de::SharedPtr<BufferOrImage>> vectorBufferOrImage;
4393 
4394 enum ShaderGroups
4395 {
4396     FIRST_GROUP  = 0,
4397     RAYGEN_GROUP = FIRST_GROUP,
4398     MISS_GROUP,
4399     HIT_GROUP,
4400     CALL_GROUP,
4401     GROUP_COUNT
4402 };
4403 
getAllRayTracingFormats()4404 const std::vector<vk::VkFormat> getAllRayTracingFormats()
4405 {
4406     std::vector<VkFormat> formats;
4407 
4408     formats.push_back(VK_FORMAT_R8G8B8_SINT);
4409     formats.push_back(VK_FORMAT_R8_UINT);
4410     formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
4411     formats.push_back(VK_FORMAT_R16G16B16_SINT);
4412     formats.push_back(VK_FORMAT_R16_UINT);
4413     formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
4414     formats.push_back(VK_FORMAT_R32G32B32_SINT);
4415     formats.push_back(VK_FORMAT_R32_UINT);
4416     formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
4417     formats.push_back(VK_FORMAT_R64G64B64_SINT);
4418     formats.push_back(VK_FORMAT_R64_UINT);
4419     formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
4420     formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
4421     formats.push_back(VK_FORMAT_R32_SFLOAT);
4422     formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
4423     formats.push_back(VK_FORMAT_R64_SFLOAT);
4424     formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
4425     formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
4426     formats.push_back(VK_FORMAT_R8_USCALED);
4427     formats.push_back(VK_FORMAT_R8G8_USCALED);
4428     formats.push_back(VK_FORMAT_R8G8B8_USCALED);
4429     formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
4430 
4431     return formats;
4432 }
4433 
addRayTracingNoSubgroupShader(SourceCollections & programCollection)4434 void addRayTracingNoSubgroupShader(SourceCollections &programCollection)
4435 {
4436     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
4437 
4438     const std::string rgenShaderNoSubgroups =
4439         "#version 460 core\n"
4440         "#extension GL_EXT_ray_tracing: require\n"
4441         "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
4442         "layout(location = 0) callableDataEXT uvec4 callData;"
4443         "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
4444         "\n"
4445         "void main()\n"
4446         "{\n"
4447         "  uint  rayFlags   = 0;\n"
4448         "  uint  cullMask   = 0xFF;\n"
4449         "  float tmin       = 0.0;\n"
4450         "  float tmax       = 9.0;\n"
4451         "  vec3  origin     = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), "
4452         "(float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
4453         "  vec3  directHit  = vec3(0.0, 0.0, -1.0);\n"
4454         "  vec3  directMiss = vec3(0.0, 0.0, +1.0);\n"
4455         "\n"
4456         "  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
4457         "  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
4458         "  executeCallableEXT(0, 0);"
4459         "}\n";
4460     const std::string hitShaderNoSubgroups  = "#version 460 core\n"
4461                                               "#extension GL_EXT_ray_tracing: require\n"
4462                                               "hitAttributeEXT vec3 attribs;\n"
4463                                               "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4464                                               "\n"
4465                                               "void main()\n"
4466                                               "{\n"
4467                                               "}\n";
4468     const std::string missShaderNoSubgroups = "#version 460 core\n"
4469                                               "#extension GL_EXT_ray_tracing: require\n"
4470                                               "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4471                                               "\n"
4472                                               "void main()\n"
4473                                               "{\n"
4474                                               "}\n";
4475     const std::string sectShaderNoSubgroups = "#version 460 core\n"
4476                                               "#extension GL_EXT_ray_tracing: require\n"
4477                                               "hitAttributeEXT vec3 hitAttribute;\n"
4478                                               "\n"
4479                                               "void main()\n"
4480                                               "{\n"
4481                                               "  reportIntersectionEXT(0.75f, 0x7Eu);\n"
4482                                               "}\n";
4483     const std::string callShaderNoSubgroups = "#version 460 core\n"
4484                                               "#extension GL_EXT_ray_tracing: require\n"
4485                                               "layout(location = 0) callableDataInEXT float callData;\n"
4486                                               "\n"
4487                                               "void main()\n"
4488                                               "{\n"
4489                                               "}\n";
4490 
4491     programCollection.glslSources.add("rgen_noSubgroup") << glu::RaygenSource(rgenShaderNoSubgroups) << buildOptions;
4492     programCollection.glslSources.add("ahit_noSubgroup") << glu::AnyHitSource(hitShaderNoSubgroups) << buildOptions;
4493     programCollection.glslSources.add("chit_noSubgroup") << glu::ClosestHitSource(hitShaderNoSubgroups) << buildOptions;
4494     programCollection.glslSources.add("miss_noSubgroup") << glu::MissSource(missShaderNoSubgroups) << buildOptions;
4495     programCollection.glslSources.add("sect_noSubgroup")
4496         << glu::IntersectionSource(sectShaderNoSubgroups) << buildOptions;
4497     programCollection.glslSources.add("call_noSubgroup") << glu::CallableSource(callShaderNoSubgroups) << buildOptions;
4498 }
4499 
4500 #ifndef CTS_USES_VULKANSC
4501 
enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)4502 static vector<VkShaderStageFlagBits> enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)
4503 {
4504     vector<VkShaderStageFlagBits> result;
4505     const VkShaderStageFlagBits shaderStageFlags[] = {
4506         VK_SHADER_STAGE_RAYGEN_BIT_KHR, VK_SHADER_STAGE_ANY_HIT_BIT_KHR,      VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4507         VK_SHADER_STAGE_MISS_BIT_KHR,   VK_SHADER_STAGE_INTERSECTION_BIT_KHR, VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4508     };
4509 
4510     for (auto shaderStageFlag : shaderStageFlags)
4511     {
4512         if (0 != (shaderStage & shaderStageFlag))
4513             result.push_back(shaderStageFlag);
4514     }
4515 
4516     return result;
4517 }
4518 
getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)4519 static uint32_t getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)
4520 {
4521     const VkShaderStageFlags shaderStageFlags[] = {
4522         VK_SHADER_STAGE_RAYGEN_BIT_KHR, VK_SHADER_STAGE_ANY_HIT_BIT_KHR,      VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4523         VK_SHADER_STAGE_MISS_BIT_KHR,   VK_SHADER_STAGE_INTERSECTION_BIT_KHR, VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4524     };
4525 
4526     for (uint32_t shaderStageNdx = 0; shaderStageNdx < DE_LENGTH_OF_ARRAY(shaderStageFlags); ++shaderStageNdx)
4527     {
4528         if (0 != (shaderStage & shaderStageFlags[shaderStageNdx]))
4529         {
4530             DE_ASSERT(0 == (shaderStage & (~shaderStageFlags[shaderStageNdx])));
4531 
4532             return shaderStageNdx;
4533         }
4534     }
4535 
4536     TCU_THROW(InternalError, "Non-raytracing stage specified or no stage at all");
4537 }
4538 
makeRayTracingInputBuffers(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector)4539 static vectorBufferOrImage makeRayTracingInputBuffers(Context &context, VkFormat format, const SSBOData *extraDatas,
4540                                                       uint32_t extraDatasCount,
4541                                                       const vector<VkShaderStageFlagBits> &stagesVector)
4542 {
4543     const size_t stagesCount           = stagesVector.size();
4544     const VkDeviceSize shaderSize      = getMaxWidth();
4545     const VkDeviceSize inputBufferSize = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
4546     vectorBufferOrImage inputBuffers(stagesCount + extraDatasCount);
4547 
4548     // The implicit result SSBO we use to store our outputs from the shader
4549     for (size_t stageNdx = 0u; stageNdx < stagesCount; ++stageNdx)
4550         inputBuffers[stageNdx] =
4551             de::SharedPtr<BufferOrImage>(new Buffer(context, inputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
4552 
4553     for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4554     {
4555         const size_t datasNdx = stageNdx - stagesCount;
4556 
4557         if (extraDatas[datasNdx].isImage())
4558         {
4559             inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Image(
4560                 context, static_cast<uint32_t>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
4561         }
4562         else
4563         {
4564             const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT :
4565                                                                VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4566             const auto size  = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) *
4567                               extraDatas[datasNdx].numElements;
4568             inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4569         }
4570 
4571         initializeMemory(context, inputBuffers[stageNdx]->getAllocation(), extraDatas[datasNdx]);
4572     }
4573 
4574     return inputBuffers;
4575 }
4576 
makeRayTracingDescriptorSetLayout(Context & context,const SSBOData * extraDatas,uint32_t extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4577 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayout(Context &context, const SSBOData *extraDatas,
4578                                                                      uint32_t extraDatasCount,
4579                                                                      const vector<VkShaderStageFlagBits> &stagesVector,
4580                                                                      const vectorBufferOrImage &inputBuffers)
4581 {
4582     const DeviceInterface &vkd = context.getDeviceInterface();
4583     const VkDevice device      = context.getDevice();
4584     const size_t stagesCount   = stagesVector.size();
4585     DescriptorSetLayoutBuilder layoutBuilder;
4586 
4587     // The implicit result SSBO we use to store our outputs from the shader
4588     for (size_t stageNdx = 0u; stageNdx < stagesVector.size(); ++stageNdx)
4589     {
4590         const uint32_t stageBinding = getRayTracingResultBinding(stagesVector[stageNdx]);
4591 
4592         layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, stagesVector[stageNdx], stageBinding,
4593                                         nullptr);
4594     }
4595 
4596     for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4597     {
4598         const size_t datasNdx = stageNdx - stagesCount;
4599 
4600         layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, extraDatas[datasNdx].stages,
4601                                         extraDatas[datasNdx].binding, nullptr);
4602     }
4603 
4604     return layoutBuilder.build(vkd, device);
4605 }
4606 
makeRayTracingDescriptorSetLayoutAS(Context & context)4607 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayoutAS(Context &context)
4608 {
4609     const DeviceInterface &vkd = context.getDeviceInterface();
4610     const VkDevice device      = context.getDevice();
4611     DescriptorSetLayoutBuilder layoutBuilder;
4612 
4613     layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
4614 
4615     return layoutBuilder.build(vkd, device);
4616 }
4617 
makeRayTracingDescriptorPool(Context & context,const vectorBufferOrImage & inputBuffers)4618 static Move<VkDescriptorPool> makeRayTracingDescriptorPool(Context &context, const vectorBufferOrImage &inputBuffers)
4619 {
4620     const DeviceInterface &vkd       = context.getDeviceInterface();
4621     const VkDevice device            = context.getDevice();
4622     const uint32_t maxDescriptorSets = 2u;
4623     DescriptorPoolBuilder poolBuilder;
4624     Move<VkDescriptorPool> result;
4625 
4626     if (inputBuffers.size() > 0)
4627     {
4628         for (size_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
4629             poolBuilder.addType(inputBuffers[ndx]->getType());
4630     }
4631 
4632     poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
4633 
4634     result = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, maxDescriptorSets);
4635 
4636     return result;
4637 }
4638 
makeRayTracingDescriptorSet(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,const SSBOData * extraDatas,uint32_t extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4639 static Move<VkDescriptorSet> makeRayTracingDescriptorSet(Context &context, VkDescriptorPool descriptorPool,
4640                                                          VkDescriptorSetLayout descriptorSetLayout,
4641                                                          const SSBOData *extraDatas, uint32_t extraDatasCount,
4642                                                          const vector<VkShaderStageFlagBits> &stagesVector,
4643                                                          const vectorBufferOrImage &inputBuffers)
4644 {
4645     const DeviceInterface &vkd = context.getDeviceInterface();
4646     const VkDevice device      = context.getDevice();
4647     const size_t stagesCount   = stagesVector.size();
4648     Move<VkDescriptorSet> descriptorSet;
4649 
4650     if (inputBuffers.size() > 0)
4651     {
4652         DescriptorSetUpdateBuilder updateBuilder;
4653 
4654         // Create descriptor set
4655         descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4656 
4657         for (size_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
4658         {
4659             const uint32_t binding = (ndx < stagesCount) ? getRayTracingResultBinding(stagesVector[ndx]) :
4660                                                            extraDatas[ndx - stagesCount].binding;
4661 
4662             if (inputBuffers[ndx]->isImage())
4663             {
4664                 const VkDescriptorImageInfo info =
4665                     makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
4666                                             inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4667 
4668                 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
4669                                           inputBuffers[ndx]->getType(), &info);
4670             }
4671             else
4672             {
4673                 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(
4674                     inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
4675 
4676                 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
4677                                           inputBuffers[ndx]->getType(), &info);
4678             }
4679         }
4680 
4681         updateBuilder.update(vkd, device);
4682     }
4683 
4684     return descriptorSet;
4685 }
4686 
makeRayTracingDescriptorSetAS(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,de::MovePtr<TopLevelAccelerationStructure> & topLevelAccelerationStructure)4687 static Move<VkDescriptorSet> makeRayTracingDescriptorSetAS(
4688     Context &context, VkDescriptorPool descriptorPool, VkDescriptorSetLayout descriptorSetLayout,
4689     de::MovePtr<TopLevelAccelerationStructure> &topLevelAccelerationStructure)
4690 {
4691     const DeviceInterface &vkd                                            = context.getDeviceInterface();
4692     const VkDevice device                                                 = context.getDevice();
4693     const TopLevelAccelerationStructure *topLevelAccelerationStructurePtr = topLevelAccelerationStructure.get();
4694     const VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
4695         VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
4696         nullptr,                                                           //  const void* pNext;
4697         1u,                                                                //  uint32_t accelerationStructureCount;
4698         topLevelAccelerationStructurePtr->getPtr(), //  const VkAccelerationStructureKHR* pAccelerationStructures;
4699     };
4700     Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4701 
4702     DescriptorSetUpdateBuilder()
4703         .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
4704                      VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
4705         .update(vkd, device);
4706 
4707     return descriptorSet;
4708 }
4709 
makeRayTracingPipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout0,const VkDescriptorSetLayout descriptorSetLayout1)4710 static Move<VkPipelineLayout> makeRayTracingPipelineLayout(Context &context,
4711                                                            const VkDescriptorSetLayout descriptorSetLayout0,
4712                                                            const VkDescriptorSetLayout descriptorSetLayout1)
4713 {
4714     const DeviceInterface &vkd = context.getDeviceInterface();
4715     const VkDevice device      = context.getDevice();
4716     const std::vector<VkDescriptorSetLayout> descriptorSetLayouts{descriptorSetLayout0, descriptorSetLayout1};
4717     const uint32_t descriptorSetLayoutsSize = static_cast<uint32_t>(descriptorSetLayouts.size());
4718 
4719     return makePipelineLayout(vkd, device, descriptorSetLayoutsSize, descriptorSetLayouts.data());
4720 }
4721 
createTopAccelerationStructure(Context & context,de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)4722 static de::MovePtr<TopLevelAccelerationStructure> createTopAccelerationStructure(
4723     Context &context, de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)
4724 {
4725     const DeviceInterface &vkd                        = context.getDeviceInterface();
4726     const VkDevice device                             = context.getDevice();
4727     Allocator &allocator                              = context.getDefaultAllocator();
4728     de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
4729 
4730     result->setInstanceCount(1);
4731     result->addInstance(bottomLevelAccelerationStructure);
4732     result->create(vkd, device, allocator);
4733 
4734     return result;
4735 }
4736 
createBottomAccelerationStructure(Context & context)4737 static de::SharedPtr<BottomLevelAccelerationStructure> createBottomAccelerationStructure(Context &context)
4738 {
4739     const DeviceInterface &vkd                           = context.getDeviceInterface();
4740     const VkDevice device                                = context.getDevice();
4741     Allocator &allocator                                 = context.getDefaultAllocator();
4742     de::MovePtr<BottomLevelAccelerationStructure> result = makeBottomLevelAccelerationStructure();
4743     const std::vector<tcu::Vec3> geometryData{tcu::Vec3(-1.0f, -1.0f, -2.0f), tcu::Vec3(+1.0f, +1.0f, -1.0f)};
4744 
4745     result->setGeometryCount(1u);
4746     result->addGeometry(geometryData, false);
4747     result->create(vkd, device, allocator, 0u);
4748 
4749     return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
4750 }
4751 
makeRayTracingPipeline(Context & context,const VkShaderStageFlags shaderStageTested,const VkPipelineLayout pipelineLayout,const uint32_t shaderStageCreateFlags[6],const uint32_t requiredSubgroupSize[6],Move<VkPipeline> & pipelineOut)4752 static de::MovePtr<RayTracingPipeline> makeRayTracingPipeline(
4753     Context &context, const VkShaderStageFlags shaderStageTested, const VkPipelineLayout pipelineLayout,
4754     const uint32_t shaderStageCreateFlags[6], const uint32_t requiredSubgroupSize[6], Move<VkPipeline> &pipelineOut)
4755 {
4756     const DeviceInterface &vkd   = context.getDeviceInterface();
4757     const VkDevice device        = context.getDevice();
4758     BinaryCollection &collection = context.getBinaryCollection();
4759     const char *shaderRgenName =
4760         (0 != (shaderStageTested & VK_SHADER_STAGE_RAYGEN_BIT_KHR)) ? "rgen" : "rgen_noSubgroup";
4761     const char *shaderAhitName =
4762         (0 != (shaderStageTested & VK_SHADER_STAGE_ANY_HIT_BIT_KHR)) ? "ahit" : "ahit_noSubgroup";
4763     const char *shaderChitName =
4764         (0 != (shaderStageTested & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)) ? "chit" : "chit_noSubgroup";
4765     const char *shaderMissName = (0 != (shaderStageTested & VK_SHADER_STAGE_MISS_BIT_KHR)) ? "miss" : "miss_noSubgroup";
4766     const char *shaderSectName =
4767         (0 != (shaderStageTested & VK_SHADER_STAGE_INTERSECTION_BIT_KHR)) ? "sect" : "sect_noSubgroup";
4768     const char *shaderCallName =
4769         (0 != (shaderStageTested & VK_SHADER_STAGE_CALLABLE_BIT_KHR)) ? "call" : "call_noSubgroup";
4770     const VkShaderModuleCreateFlags noShaderModuleCreateFlags = static_cast<VkShaderModuleCreateFlags>(0);
4771     Move<VkShaderModule> rgenShaderModule =
4772         createShaderModule(vkd, device, collection.get(shaderRgenName), noShaderModuleCreateFlags);
4773     Move<VkShaderModule> ahitShaderModule =
4774         createShaderModule(vkd, device, collection.get(shaderAhitName), noShaderModuleCreateFlags);
4775     Move<VkShaderModule> chitShaderModule =
4776         createShaderModule(vkd, device, collection.get(shaderChitName), noShaderModuleCreateFlags);
4777     Move<VkShaderModule> missShaderModule =
4778         createShaderModule(vkd, device, collection.get(shaderMissName), noShaderModuleCreateFlags);
4779     Move<VkShaderModule> sectShaderModule =
4780         createShaderModule(vkd, device, collection.get(shaderSectName), noShaderModuleCreateFlags);
4781     Move<VkShaderModule> callShaderModule =
4782         createShaderModule(vkd, device, collection.get(shaderCallName), noShaderModuleCreateFlags);
4783     const VkPipelineShaderStageCreateFlags noPipelineShaderStageCreateFlags =
4784         static_cast<VkPipelineShaderStageCreateFlags>(0);
4785     const VkPipelineShaderStageCreateFlags rgenPipelineShaderStageCreateFlags =
4786         (shaderStageCreateFlags == nullptr) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[0];
4787     const VkPipelineShaderStageCreateFlags ahitPipelineShaderStageCreateFlags =
4788         (shaderStageCreateFlags == nullptr) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[1];
4789     const VkPipelineShaderStageCreateFlags chitPipelineShaderStageCreateFlags =
4790         (shaderStageCreateFlags == nullptr) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[2];
4791     const VkPipelineShaderStageCreateFlags missPipelineShaderStageCreateFlags =
4792         (shaderStageCreateFlags == nullptr) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[3];
4793     const VkPipelineShaderStageCreateFlags sectPipelineShaderStageCreateFlags =
4794         (shaderStageCreateFlags == nullptr) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[4];
4795     const VkPipelineShaderStageCreateFlags callPipelineShaderStageCreateFlags =
4796         (shaderStageCreateFlags == nullptr) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[5];
4797     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[6] = {
4798         {
4799             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4800             nullptr,
4801             requiredSubgroupSize != nullptr ? requiredSubgroupSize[0] : 0u,
4802         },
4803         {
4804             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4805             nullptr,
4806             requiredSubgroupSize != nullptr ? requiredSubgroupSize[1] : 0u,
4807         },
4808         {
4809             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4810             nullptr,
4811             requiredSubgroupSize != nullptr ? requiredSubgroupSize[2] : 0u,
4812         },
4813         {
4814             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4815             nullptr,
4816             requiredSubgroupSize != nullptr ? requiredSubgroupSize[3] : 0u,
4817         },
4818         {
4819             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4820             nullptr,
4821             requiredSubgroupSize != nullptr ? requiredSubgroupSize[4] : 0u,
4822         },
4823         {
4824             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4825             nullptr,
4826             requiredSubgroupSize != nullptr ? requiredSubgroupSize[5] : 0u,
4827         },
4828     };
4829     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rgenRequiredSubgroupSizeCreateInfo =
4830         (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize == 0) ? nullptr : &requiredSubgroupSizeCreateInfo[0];
4831     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *ahitRequiredSubgroupSizeCreateInfo =
4832         (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize == 0) ? nullptr : &requiredSubgroupSizeCreateInfo[1];
4833     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *chitRequiredSubgroupSizeCreateInfo =
4834         (requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize == 0) ? nullptr : &requiredSubgroupSizeCreateInfo[2];
4835     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *missRequiredSubgroupSizeCreateInfo =
4836         (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize == 0) ? nullptr : &requiredSubgroupSizeCreateInfo[3];
4837     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *sectRequiredSubgroupSizeCreateInfo =
4838         (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize == 0) ? nullptr : &requiredSubgroupSizeCreateInfo[4];
4839     const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *callRequiredSubgroupSizeCreateInfo =
4840         (requiredSubgroupSizeCreateInfo[5].requiredSubgroupSize == 0) ? nullptr : &requiredSubgroupSizeCreateInfo[5];
4841     de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
4842 
4843     rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, rgenShaderModule, RAYGEN_GROUP, nullptr,
4844                                   rgenPipelineShaderStageCreateFlags, rgenRequiredSubgroupSizeCreateInfo);
4845     rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR, ahitShaderModule, HIT_GROUP, nullptr,
4846                                   ahitPipelineShaderStageCreateFlags, ahitRequiredSubgroupSizeCreateInfo);
4847     rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, chitShaderModule, HIT_GROUP, nullptr,
4848                                   chitPipelineShaderStageCreateFlags, chitRequiredSubgroupSizeCreateInfo);
4849     rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, missShaderModule, MISS_GROUP, nullptr,
4850                                   missPipelineShaderStageCreateFlags, missRequiredSubgroupSizeCreateInfo);
4851     rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, sectShaderModule, HIT_GROUP, nullptr,
4852                                   sectPipelineShaderStageCreateFlags, sectRequiredSubgroupSizeCreateInfo);
4853     rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR, callShaderModule, CALL_GROUP, nullptr,
4854                                   callPipelineShaderStageCreateFlags, callRequiredSubgroupSizeCreateInfo);
4855 
4856     // Must execute createPipeline here, due to pNext pointers in calls to addShader are local
4857     pipelineOut = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
4858 
4859     return rayTracingPipeline;
4860 }
4861 
getPossibleRayTracingSubgroupStages(Context & context,const VkShaderStageFlags testedStages)4862 VkShaderStageFlags getPossibleRayTracingSubgroupStages(Context &context, const VkShaderStageFlags testedStages)
4863 {
4864     const VkPhysicalDeviceSubgroupProperties &subgroupProperties = context.getSubgroupProperties();
4865     const VkShaderStageFlags stages                              = testedStages & subgroupProperties.supportedStages;
4866 
4867     DE_ASSERT(isAllRayTracingStages(testedStages));
4868 
4869     return stages;
4870 }
4871 
allRayTracingStages(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStage)4872 tcu::TestStatus allRayTracingStages(Context &context, VkFormat format, const SSBOData *extraDatas,
4873                                     uint32_t extraDataCount, const void *internalData,
4874                                     const VerificationFunctor &checkResult, const VkShaderStageFlags shaderStage)
4875 {
4876     return vkt::subgroups::allRayTracingStagesRequiredSubgroupSize(
4877         context, format, extraDatas, extraDataCount, internalData, checkResult, shaderStage, nullptr, nullptr);
4878 }
4879 
allRayTracingStagesRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const uint32_t shaderStageCreateFlags[6],const uint32_t requiredSubgroupSize[6])4880 tcu::TestStatus allRayTracingStagesRequiredSubgroupSize(Context &context, VkFormat format, const SSBOData *extraDatas,
4881                                                         uint32_t extraDatasCount, const void *internalData,
4882                                                         const VerificationFunctor &checkResult,
4883                                                         const VkShaderStageFlags shaderStageTested,
4884                                                         const uint32_t shaderStageCreateFlags[6],
4885                                                         const uint32_t requiredSubgroupSize[6])
4886 {
4887     const DeviceInterface &vkd                       = context.getDeviceInterface();
4888     const VkDevice device                            = context.getDevice();
4889     const VkQueue queue                              = context.getUniversalQueue();
4890     const uint32_t queueFamilyIndex                  = context.getUniversalQueueFamilyIndex();
4891     Allocator &allocator                             = context.getDefaultAllocator();
4892     const uint32_t subgroupSize                      = getSubgroupSize(context);
4893     const uint32_t maxWidth                          = getMaxWidth();
4894     const vector<VkShaderStageFlagBits> stagesVector = enumerateRayTracingShaderStages(shaderStageTested);
4895     const uint32_t stagesCount                       = static_cast<uint32_t>(stagesVector.size());
4896     de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure =
4897         createBottomAccelerationStructure(context);
4898     de::MovePtr<TopLevelAccelerationStructure> topLevelAccelerationStructure =
4899         createTopAccelerationStructure(context, bottomLevelAccelerationStructure);
4900     vectorBufferOrImage inputBuffers =
4901         makeRayTracingInputBuffers(context, format, extraDatas, extraDatasCount, stagesVector);
4902     const Move<VkDescriptorSetLayout> descriptorSetLayout =
4903         makeRayTracingDescriptorSetLayout(context, extraDatas, extraDatasCount, stagesVector, inputBuffers);
4904     const Move<VkDescriptorSetLayout> descriptorSetLayoutAS = makeRayTracingDescriptorSetLayoutAS(context);
4905     const Move<VkPipelineLayout> pipelineLayout =
4906         makeRayTracingPipelineLayout(context, *descriptorSetLayout, *descriptorSetLayoutAS);
4907     Move<VkPipeline> pipeline                                = Move<VkPipeline>();
4908     const de::MovePtr<RayTracingPipeline> rayTracingPipeline = makeRayTracingPipeline(
4909         context, shaderStageTested, *pipelineLayout, shaderStageCreateFlags, requiredSubgroupSize, pipeline);
4910     const uint32_t shaderGroupHandleSize    = context.getRayTracingPipelineProperties().shaderGroupHandleSize;
4911     const uint32_t shaderGroupBaseAlignment = context.getRayTracingPipelineProperties().shaderGroupBaseAlignment;
4912     de::MovePtr<BufferWithMemory> rgenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4913         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, RAYGEN_GROUP, 1u);
4914     de::MovePtr<BufferWithMemory> missShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4915         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, MISS_GROUP, 1u);
4916     de::MovePtr<BufferWithMemory> hitsShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4917         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, HIT_GROUP, 1u);
4918     de::MovePtr<BufferWithMemory> callShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4919         vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, CALL_GROUP, 1u);
4920     const VkStridedDeviceAddressRegionKHR rgenShaderBindingTableRegion =
4921         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, rgenShaderBindingTable->get(), 0),
4922                                           shaderGroupHandleSize, shaderGroupHandleSize);
4923     const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion =
4924         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0),
4925                                           shaderGroupHandleSize, shaderGroupHandleSize);
4926     const VkStridedDeviceAddressRegionKHR hitsShaderBindingTableRegion =
4927         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitsShaderBindingTable->get(), 0),
4928                                           shaderGroupHandleSize, shaderGroupHandleSize);
4929     const VkStridedDeviceAddressRegionKHR callShaderBindingTableRegion =
4930         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callShaderBindingTable->get(), 0),
4931                                           shaderGroupHandleSize, shaderGroupHandleSize);
4932     const Move<VkDescriptorPool> descriptorPool = makeRayTracingDescriptorPool(context, inputBuffers);
4933     const Move<VkDescriptorSet> descriptorSet   = makeRayTracingDescriptorSet(
4934         context, *descriptorPool, *descriptorSetLayout, extraDatas, extraDatasCount, stagesVector, inputBuffers);
4935     const Move<VkDescriptorSet> descriptorSetAS =
4936         makeRayTracingDescriptorSetAS(context, *descriptorPool, *descriptorSetLayoutAS, topLevelAccelerationStructure);
4937     const Move<VkCommandPool> cmdPool     = makeCommandPool(vkd, device, queueFamilyIndex);
4938     const Move<VkCommandBuffer> cmdBuffer = makeCommandBuffer(context, *cmdPool);
4939     uint32_t passIterations               = 0u;
4940     uint32_t failIterations               = 0u;
4941 
4942     DE_ASSERT(shaderStageTested != 0);
4943 
4944     for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
4945     {
4946 
4947         for (uint32_t ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
4948         {
4949             // re-init the data
4950             const Allocation &alloc = inputBuffers[ndx]->getAllocation();
4951 
4952             initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
4953         }
4954 
4955         beginCommandBuffer(vkd, *cmdBuffer);
4956         {
4957             vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4958 
4959             bottomLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
4960             topLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
4961 
4962             vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 1u, 1u,
4963                                       &descriptorSetAS.get(), 0u, nullptr);
4964 
4965             if (stagesCount + extraDatasCount > 0)
4966                 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0u, 1u,
4967                                           &descriptorSet.get(), 0u, nullptr);
4968 
4969             cmdTraceRays(vkd, *cmdBuffer, &rgenShaderBindingTableRegion, &missShaderBindingTableRegion,
4970                          &hitsShaderBindingTableRegion, &callShaderBindingTableRegion, width, 1, 1);
4971 
4972             const VkMemoryBarrier postTraceMemoryBarrier =
4973                 makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4974             cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
4975                                      VK_PIPELINE_STAGE_HOST_BIT, &postTraceMemoryBarrier);
4976         }
4977         endCommandBuffer(vkd, *cmdBuffer);
4978 
4979         submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
4980 
4981         for (uint32_t ndx = 0u; ndx < stagesCount; ++ndx)
4982         {
4983             std::vector<const void *> datas;
4984 
4985             if (!inputBuffers[ndx]->isImage())
4986             {
4987                 const Allocation &resultAlloc = inputBuffers[ndx]->getAllocation();
4988 
4989                 invalidateAlloc(vkd, device, resultAlloc);
4990 
4991                 // we always have our result data first
4992                 datas.push_back(resultAlloc.getHostPtr());
4993             }
4994 
4995             for (uint32_t index = stagesCount; index < stagesCount + extraDatasCount; ++index)
4996             {
4997                 const uint32_t datasNdx = index - stagesCount;
4998 
4999                 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
5000                 {
5001                     const Allocation &resultAlloc = inputBuffers[index]->getAllocation();
5002 
5003                     invalidateAlloc(vkd, device, resultAlloc);
5004 
5005                     // we always have our result data first
5006                     datas.push_back(resultAlloc.getHostPtr());
5007                 }
5008             }
5009 
5010             if (!checkResult(internalData, datas, width, subgroupSize, false))
5011                 failIterations++;
5012             else
5013                 passIterations++;
5014         }
5015 
5016         context.resetCommandPoolForVKSC(device, *cmdPool);
5017     }
5018 
5019     if (failIterations > 0 || passIterations == 0)
5020         return tcu::TestStatus::fail("Failed " + de::toString(failIterations) + " out of " +
5021                                      de::toString(failIterations + passIterations) + " iterations.");
5022     else
5023         return tcu::TestStatus::pass("OK");
5024 }
5025 #endif // CTS_USES_VULKANSC
5026 
5027 } // namespace subgroups
5028 } // namespace vkt
5029