1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Google Inc.
7 * Copyright (c) 2017 Codeplay Software Ltd.
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 */ /*!
22 * \file
23 * \brief Subgroups Tests Utils
24 */ /*--------------------------------------------------------------------*/
25
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vkRayTracingUtil.hpp"
28 #include "deFloat16.h"
29 #include "deRandom.hpp"
30 #include "tcuCommandLine.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37
38 using namespace tcu;
39 using namespace std;
40 using namespace vk;
41 using namespace vkt;
42
43 namespace
44 {
45
46 enum class ComputeLike { COMPUTE = 0, MESH };
47
getMaxWidth()48 deUint32 getMaxWidth ()
49 {
50 return 1024u;
51 }
52
getNextWidth(const deUint32 width)53 deUint32 getNextWidth (const deUint32 width)
54 {
55 if (width < 128)
56 {
57 // This ensures we test every value up to 128 (the max subgroup size).
58 return width + 1;
59 }
60 else
61 {
62 // And once we hit 128 we increment to only power of 2's to reduce testing time.
63 return width * 2;
64 }
65 }
66
getFormatSizeInBytes(const VkFormat format)67 deUint32 getFormatSizeInBytes (const VkFormat format)
68 {
69 switch (format)
70 {
71 default:
72 DE_FATAL("Unhandled format!");
73 return 0;
74 case VK_FORMAT_R8_SINT:
75 case VK_FORMAT_R8_UINT:
76 return static_cast<deUint32>(sizeof(deInt8));
77 case VK_FORMAT_R8G8_SINT:
78 case VK_FORMAT_R8G8_UINT:
79 return static_cast<deUint32>(sizeof(deInt8) * 2);
80 case VK_FORMAT_R8G8B8_SINT:
81 case VK_FORMAT_R8G8B8_UINT:
82 case VK_FORMAT_R8G8B8A8_SINT:
83 case VK_FORMAT_R8G8B8A8_UINT:
84 return static_cast<deUint32>(sizeof(deInt8) * 4);
85 case VK_FORMAT_R16_SINT:
86 case VK_FORMAT_R16_UINT:
87 case VK_FORMAT_R16_SFLOAT:
88 return static_cast<deUint32>(sizeof(deInt16));
89 case VK_FORMAT_R16G16_SINT:
90 case VK_FORMAT_R16G16_UINT:
91 case VK_FORMAT_R16G16_SFLOAT:
92 return static_cast<deUint32>(sizeof(deInt16) * 2);
93 case VK_FORMAT_R16G16B16_UINT:
94 case VK_FORMAT_R16G16B16_SINT:
95 case VK_FORMAT_R16G16B16_SFLOAT:
96 case VK_FORMAT_R16G16B16A16_SINT:
97 case VK_FORMAT_R16G16B16A16_UINT:
98 case VK_FORMAT_R16G16B16A16_SFLOAT:
99 return static_cast<deUint32>(sizeof(deInt16) * 4);
100 case VK_FORMAT_R32_SINT:
101 case VK_FORMAT_R32_UINT:
102 case VK_FORMAT_R32_SFLOAT:
103 return static_cast<deUint32>(sizeof(deInt32));
104 case VK_FORMAT_R32G32_SINT:
105 case VK_FORMAT_R32G32_UINT:
106 case VK_FORMAT_R32G32_SFLOAT:
107 return static_cast<deUint32>(sizeof(deInt32) * 2);
108 case VK_FORMAT_R32G32B32_SINT:
109 case VK_FORMAT_R32G32B32_UINT:
110 case VK_FORMAT_R32G32B32_SFLOAT:
111 case VK_FORMAT_R32G32B32A32_SINT:
112 case VK_FORMAT_R32G32B32A32_UINT:
113 case VK_FORMAT_R32G32B32A32_SFLOAT:
114 return static_cast<deUint32>(sizeof(deInt32) * 4);
115 case VK_FORMAT_R64_SINT:
116 case VK_FORMAT_R64_UINT:
117 case VK_FORMAT_R64_SFLOAT:
118 return static_cast<deUint32>(sizeof(deInt64));
119 case VK_FORMAT_R64G64_SINT:
120 case VK_FORMAT_R64G64_UINT:
121 case VK_FORMAT_R64G64_SFLOAT:
122 return static_cast<deUint32>(sizeof(deInt64) * 2);
123 case VK_FORMAT_R64G64B64_SINT:
124 case VK_FORMAT_R64G64B64_UINT:
125 case VK_FORMAT_R64G64B64_SFLOAT:
126 case VK_FORMAT_R64G64B64A64_SINT:
127 case VK_FORMAT_R64G64B64A64_UINT:
128 case VK_FORMAT_R64G64B64A64_SFLOAT:
129 return static_cast<deUint32>(sizeof(deInt64) * 4);
130 // The below formats are used to represent bool and bvec* types. These
131 // types are passed to the shader as int and ivec* types, before the
132 // calculations are done as booleans. We need a distinct type here so
133 // that the shader generators can switch on it and generate the correct
134 // shader source for testing.
135 case VK_FORMAT_R8_USCALED:
136 return static_cast<deUint32>(sizeof(deInt32));
137 case VK_FORMAT_R8G8_USCALED:
138 return static_cast<deUint32>(sizeof(deInt32) * 2);
139 case VK_FORMAT_R8G8B8_USCALED:
140 case VK_FORMAT_R8G8B8A8_USCALED:
141 return static_cast<deUint32>(sizeof(deInt32) * 4);
142 }
143 }
144
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)145 deUint32 getElementSizeInBytes (const VkFormat format,
146 const subgroups::SSBOData::InputDataLayoutType layout)
147 {
148 const deUint32 bytes = getFormatSizeInBytes(format);
149
150 if (layout == subgroups::SSBOData::LayoutStd140)
151 return bytes < 16 ? 16 : bytes;
152 else
153 return bytes;
154 }
155
makeRenderPass(Context & context,VkFormat format)156 Move<VkRenderPass> makeRenderPass (Context& context, VkFormat format)
157 {
158 const VkAttachmentReference colorReference =
159 {
160 0,
161 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
162 };
163 const VkSubpassDescription subpassDescription =
164 {
165 0u, // VkSubpassDescriptionFlags flags;
166 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
167 0, // deUint32 inputAttachmentCount;
168 DE_NULL, // const VkAttachmentReference* pInputAttachments;
169 1, // deUint32 colorAttachmentCount;
170 &colorReference, // const VkAttachmentReference* pColorAttachments;
171 DE_NULL, // const VkAttachmentReference* pResolveAttachments;
172 DE_NULL, // const VkAttachmentReference* pDepthStencilAttachment;
173 0, // deUint32 preserveAttachmentCount;
174 DE_NULL // const deUint32* pPreserveAttachments;
175 };
176 const VkSubpassDependency subpassDependencies[2] =
177 {
178 {
179 VK_SUBPASS_EXTERNAL, // deUint32 srcSubpass;
180 0u, // deUint32 dstSubpass;
181 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // VkPipelineStageFlags srcStageMask;
182 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // VkPipelineStageFlags dstStageMask;
183 VK_ACCESS_MEMORY_READ_BIT, // VkAccessFlags srcAccessMask;
184 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // VkAccessFlags dstAccessMask;
185 VK_DEPENDENCY_BY_REGION_BIT // VkDependencyFlags dependencyFlags;
186 },
187 {
188 0u, // deUint32 srcSubpass;
189 VK_SUBPASS_EXTERNAL, // deUint32 dstSubpass;
190 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // VkPipelineStageFlags srcStageMask;
191 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // VkPipelineStageFlags dstStageMask;
192 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // VkAccessFlags srcAccessMask;
193 VK_ACCESS_MEMORY_READ_BIT, // VkAccessFlags dstAccessMask;
194 VK_DEPENDENCY_BY_REGION_BIT // VkDependencyFlags dependencyFlags;
195 },
196 };
197 const VkAttachmentDescription attachmentDescription =
198 {
199 0u, // VkAttachmentDescriptionFlags flags;
200 format, // VkFormat format;
201 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
202 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
203 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
204 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
205 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
206 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
207 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL // VkImageLayout finalLayout;
208 };
209 const VkRenderPassCreateInfo renderPassCreateInfo =
210 {
211 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
212 DE_NULL, // const void* pNext;
213 0u, // VkRenderPassCreateFlags flags;
214 1, // deUint32 attachmentCount;
215 &attachmentDescription, // const VkAttachmentDescription* pAttachments;
216 1, // deUint32 subpassCount;
217 &subpassDescription, // const VkSubpassDescription* pSubpasses;
218 2, // deUint32 dependencyCount;
219 subpassDependencies // const VkSubpassDependency* pDependencies;
220 };
221
222 return createRenderPass(context.getDeviceInterface(), context.getDevice(), &renderPassCreateInfo);
223 }
224
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const deUint32 subpass,const deUint32 patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])225 Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface& vk,
226 const VkDevice device,
227 const VkPipelineLayout pipelineLayout,
228 const VkShaderModule vertexShaderModule,
229 const VkShaderModule tessellationControlShaderModule,
230 const VkShaderModule tessellationEvalShaderModule,
231 const VkShaderModule geometryShaderModule,
232 const VkShaderModule fragmentShaderModule,
233 const VkRenderPass renderPass,
234 const std::vector<VkViewport>& viewports,
235 const std::vector<VkRect2D>& scissors,
236 const VkPrimitiveTopology topology,
237 const deUint32 subpass,
238 const deUint32 patchControlPoints,
239 const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo,
240 const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo,
241 const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo,
242 const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo,
243 const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo,
244 const VkPipelineDynamicStateCreateInfo* dynamicStateCreateInfo,
245 const deUint32 vertexShaderStageCreateFlags,
246 const deUint32 tessellationControlShaderStageCreateFlags,
247 const deUint32 tessellationEvalShaderStageCreateFlags,
248 const deUint32 geometryShaderStageCreateFlags,
249 const deUint32 fragmentShaderStageCreateFlags,
250 const deUint32 requiredSubgroupSize[5])
251 {
252 const VkBool32 disableRasterization = (fragmentShaderModule == DE_NULL);
253 const bool hasTessellation = (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
254
255 VkPipelineShaderStageCreateInfo stageCreateInfo =
256 {
257 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType
258 DE_NULL, // const void* pNext
259 0u, // VkPipelineShaderStageCreateFlags flags
260 VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage
261 DE_NULL, // VkShaderModule module
262 "main", // const char* pName
263 DE_NULL // const VkSpecializationInfo* pSpecializationInfo
264 };
265
266 std::vector<VkPipelineShaderStageCreateInfo> pipelineShaderStageParams;
267
268 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] =
269 {
270 {
271 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
272 DE_NULL,
273 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
274 },
275 {
276 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
277 DE_NULL,
278 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
279 },
280 {
281 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
282 DE_NULL,
283 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
284 },
285 {
286 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
287 DE_NULL,
288 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
289 },
290 {
291 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
292 DE_NULL,
293 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
294 },
295 };
296
297 {
298 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[0] : DE_NULL;
299 stageCreateInfo.flags = vertexShaderStageCreateFlags;
300 stageCreateInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
301 stageCreateInfo.module = vertexShaderModule;
302 pipelineShaderStageParams.push_back(stageCreateInfo);
303 }
304
305 if (tessellationControlShaderModule != DE_NULL)
306 {
307 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[1] : DE_NULL;
308 stageCreateInfo.flags = tessellationControlShaderStageCreateFlags;
309 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
310 stageCreateInfo.module = tessellationControlShaderModule;
311 pipelineShaderStageParams.push_back(stageCreateInfo);
312 }
313
314 if (tessellationEvalShaderModule != DE_NULL)
315 {
316 stageCreateInfo.pNext = (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[2] : DE_NULL;
317 stageCreateInfo.flags = tessellationEvalShaderStageCreateFlags;
318 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
319 stageCreateInfo.module = tessellationEvalShaderModule;
320 pipelineShaderStageParams.push_back(stageCreateInfo);
321 }
322
323 if (geometryShaderModule != DE_NULL)
324 {
325 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[3] : DE_NULL;
326 stageCreateInfo.flags = geometryShaderStageCreateFlags;
327 stageCreateInfo.stage = VK_SHADER_STAGE_GEOMETRY_BIT;
328 stageCreateInfo.module = geometryShaderModule;
329 pipelineShaderStageParams.push_back(stageCreateInfo);
330 }
331
332 if (fragmentShaderModule != DE_NULL)
333 {
334 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[4] : DE_NULL;
335 stageCreateInfo.flags = fragmentShaderStageCreateFlags;
336 stageCreateInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
337 stageCreateInfo.module = fragmentShaderModule;
338 pipelineShaderStageParams.push_back(stageCreateInfo);
339 }
340
341 const VkVertexInputBindingDescription vertexInputBindingDescription =
342 {
343 0u, // deUint32 binding
344 sizeof(tcu::Vec4), // deUint32 stride
345 VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate
346 };
347
348 const VkVertexInputAttributeDescription vertexInputAttributeDescription =
349 {
350 0u, // deUint32 location
351 0u, // deUint32 binding
352 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format
353 0u // deUint32 offset
354 };
355
356 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfoDefault =
357 {
358 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType
359 DE_NULL, // const void* pNext
360 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags
361 1u, // deUint32 vertexBindingDescriptionCount
362 &vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions
363 1u, // deUint32 vertexAttributeDescriptionCount
364 &vertexInputAttributeDescription // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions
365 };
366
367 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo =
368 {
369 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType
370 DE_NULL, // const void* pNext
371 0u, // VkPipelineInputAssemblyStateCreateFlags flags
372 topology, // VkPrimitiveTopology topology
373 VK_FALSE // VkBool32 primitiveRestartEnable
374 };
375
376 const VkPipelineTessellationStateCreateInfo tessStateCreateInfo =
377 {
378 VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, // VkStructureType sType
379 DE_NULL, // const void* pNext
380 0u, // VkPipelineTessellationStateCreateFlags flags
381 patchControlPoints // deUint32 patchControlPoints
382 };
383
384 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
385 {
386 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
387 DE_NULL, // const void* pNext
388 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
389 viewports.empty() ? 1u : (deUint32)viewports.size(), // deUint32 viewportCount
390 viewports.empty() ? DE_NULL : &viewports[0], // const VkViewport* pViewports
391 viewports.empty() ? 1u : (deUint32)scissors.size(), // deUint32 scissorCount
392 scissors.empty() ? DE_NULL : &scissors[0] // const VkRect2D* pScissors
393 };
394
395 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfoDefault =
396 {
397 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType
398 DE_NULL, // const void* pNext
399 0u, // VkPipelineRasterizationStateCreateFlags flags
400 VK_FALSE, // VkBool32 depthClampEnable
401 disableRasterization, // VkBool32 rasterizerDiscardEnable
402 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode
403 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode
404 VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace
405 VK_FALSE, // VkBool32 depthBiasEnable
406 0.0f, // float depthBiasConstantFactor
407 0.0f, // float depthBiasClamp
408 0.0f, // float depthBiasSlopeFactor
409 1.0f // float lineWidth
410 };
411
412 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfoDefault =
413 {
414 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
415 DE_NULL, // const void* pNext
416 0u, // VkPipelineMultisampleStateCreateFlags flags
417 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples
418 VK_FALSE, // VkBool32 sampleShadingEnable
419 1.0f, // float minSampleShading
420 DE_NULL, // const VkSampleMask* pSampleMask
421 VK_FALSE, // VkBool32 alphaToCoverageEnable
422 VK_FALSE // VkBool32 alphaToOneEnable
423 };
424
425 const VkStencilOpState stencilOpState =
426 {
427 VK_STENCIL_OP_KEEP, // VkStencilOp failOp
428 VK_STENCIL_OP_KEEP, // VkStencilOp passOp
429 VK_STENCIL_OP_KEEP, // VkStencilOp depthFailOp
430 VK_COMPARE_OP_NEVER, // VkCompareOp compareOp
431 0, // deUint32 compareMask
432 0, // deUint32 writeMask
433 0 // deUint32 reference
434 };
435
436 const VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfoDefault =
437 {
438 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType
439 DE_NULL, // const void* pNext
440 0u, // VkPipelineDepthStencilStateCreateFlags flags
441 VK_FALSE, // VkBool32 depthTestEnable
442 VK_FALSE, // VkBool32 depthWriteEnable
443 VK_COMPARE_OP_LESS_OR_EQUAL, // VkCompareOp depthCompareOp
444 VK_FALSE, // VkBool32 depthBoundsTestEnable
445 VK_FALSE, // VkBool32 stencilTestEnable
446 stencilOpState, // VkStencilOpState front
447 stencilOpState, // VkStencilOpState back
448 0.0f, // float minDepthBounds
449 1.0f, // float maxDepthBounds
450 };
451
452 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
453 {
454 VK_FALSE, // VkBool32 blendEnable
455 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor
456 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor
457 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp
458 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor
459 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor
460 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp
461 VK_COLOR_COMPONENT_R_BIT // VkColorComponentFlags colorWriteMask
462 | VK_COLOR_COMPONENT_G_BIT
463 | VK_COLOR_COMPONENT_B_BIT
464 | VK_COLOR_COMPONENT_A_BIT
465 };
466
467 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfoDefault =
468 {
469 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType
470 DE_NULL, // const void* pNext
471 0u, // VkPipelineColorBlendStateCreateFlags flags
472 VK_FALSE, // VkBool32 logicOpEnable
473 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp
474 1u, // deUint32 attachmentCount
475 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments
476 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConstants[4]
477 };
478
479 std::vector<VkDynamicState> dynamicStates;
480
481 if (viewports.empty())
482 dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
483 if (scissors.empty())
484 dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
485
486 const VkPipelineDynamicStateCreateInfo dynamicStateCreateInfoDefault =
487 {
488 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType sType
489 DE_NULL, // const void* pNext
490 0u, // VkPipelineDynamicStateCreateFlags flags
491 (deUint32)dynamicStates.size(), // deUint32 dynamicStateCount
492 dynamicStates.empty() ? DE_NULL : &dynamicStates[0] // const VkDynamicState* pDynamicStates
493 };
494
495 const VkPipelineDynamicStateCreateInfo* dynamicStateCreateInfoDefaultPtr = dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
496
497 const VkGraphicsPipelineCreateInfo pipelineCreateInfo =
498 {
499 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType
500 DE_NULL, // const void* pNext
501 0u, // VkPipelineCreateFlags flags
502 (deUint32)pipelineShaderStageParams.size(), // deUint32 stageCount
503 &pipelineShaderStageParams[0], // const VkPipelineShaderStageCreateInfo* pStages
504 vertexInputStateCreateInfo ? vertexInputStateCreateInfo : &vertexInputStateCreateInfoDefault, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState
505 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState
506 hasTessellation ? &tessStateCreateInfo : DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState
507 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState
508 rasterizationStateCreateInfo ? rasterizationStateCreateInfo : &rasterizationStateCreateInfoDefault, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState
509 multisampleStateCreateInfo ? multisampleStateCreateInfo: &multisampleStateCreateInfoDefault, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState
510 depthStencilStateCreateInfo ? depthStencilStateCreateInfo : &depthStencilStateCreateInfoDefault, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState
511 colorBlendStateCreateInfo ? colorBlendStateCreateInfo : &colorBlendStateCreateInfoDefault, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState
512 dynamicStateCreateInfo ? dynamicStateCreateInfo : dynamicStateCreateInfoDefaultPtr, // const VkPipelineDynamicStateCreateInfo* pDynamicState
513 pipelineLayout, // VkPipelineLayout layout
514 renderPass, // VkRenderPass renderPass
515 subpass, // deUint32 subpass
516 DE_NULL, // VkPipeline basePipelineHandle
517 0 // deInt32 basePipelineIndex;
518 };
519
520 return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
521 }
522
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const deUint32 vertexShaderStageCreateFlags=0u,const deUint32 tessellationControlShaderStageCreateFlags=0u,const deUint32 tessellationEvalShaderStageCreateFlags=0u,const deUint32 geometryShaderStageCreateFlags=0u,const deUint32 fragmentShaderStageCreateFlags=0u,const deUint32 requiredSubgroupSize[5]=DE_NULL)523 Move<VkPipeline> makeGraphicsPipeline (Context& context,
524 const VkPipelineLayout pipelineLayout,
525 const VkShaderStageFlags stages,
526 const VkShaderModule vertexShaderModule,
527 const VkShaderModule fragmentShaderModule,
528 const VkShaderModule geometryShaderModule,
529 const VkShaderModule tessellationControlModule,
530 const VkShaderModule tessellationEvaluationModule,
531 const VkRenderPass renderPass,
532 const VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
533 const VkVertexInputBindingDescription* vertexInputBindingDescription = DE_NULL,
534 const VkVertexInputAttributeDescription* vertexInputAttributeDescriptions = DE_NULL,
535 const bool frameBufferTests = false,
536 const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
537 const deUint32 vertexShaderStageCreateFlags = 0u,
538 const deUint32 tessellationControlShaderStageCreateFlags = 0u,
539 const deUint32 tessellationEvalShaderStageCreateFlags = 0u,
540 const deUint32 geometryShaderStageCreateFlags = 0u,
541 const deUint32 fragmentShaderStageCreateFlags = 0u,
542 const deUint32 requiredSubgroupSize[5] = DE_NULL)
543 {
544 const std::vector<VkViewport> noViewports;
545 const std::vector<VkRect2D> noScissors;
546 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
547 {
548 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
549 DE_NULL, // const void* pNext;
550 0u, // VkPipelineVertexInputStateCreateFlags flags;
551 vertexInputBindingDescription == DE_NULL ? 0u : 1u, // deUint32 vertexBindingDescriptionCount;
552 vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
553 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u, // deUint32 vertexAttributeDescriptionCount;
554 vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
555 };
556 const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
557 const VkColorComponentFlags colorComponent = numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
558 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
559 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
560 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
561 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
562 {
563 VK_FALSE, // VkBool32 blendEnable;
564 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor;
565 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
566 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp;
567 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor;
568 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor;
569 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp;
570 colorComponent // VkColorComponentFlags colorWriteMask;
571 };
572 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
573 {
574 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
575 DE_NULL, // const void* pNext;
576 0u, // VkPipelineColorBlendStateCreateFlags flags;
577 VK_FALSE, // VkBool32 logicOpEnable;
578 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp;
579 1, // deUint32 attachmentCount;
580 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments;
581 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConstants[4];
582 };
583 const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
584
585 return makeGraphicsPipeline(context.getDeviceInterface(), // const DeviceInterface& vk
586 context.getDevice(), // const VkDevice device
587 pipelineLayout, // const VkPipelineLayout pipelineLayout
588 vertexShaderModule, // const VkShaderModule vertexShaderModule
589 tessellationControlModule, // const VkShaderModule tessellationControlShaderModule
590 tessellationEvaluationModule, // const VkShaderModule tessellationEvalShaderModule
591 geometryShaderModule, // const VkShaderModule geometryShaderModule
592 fragmentShaderModule, // const VkShaderModule fragmentShaderModule
593 renderPass, // const VkRenderPass renderPass
594 noViewports, // const std::vector<VkViewport>& viewports
595 noScissors, // const std::vector<VkRect2D>& scissors
596 topology, // const VkPrimitiveTopology topology
597 0u, // const deUint32 subpass
598 patchControlPoints, // const deUint32 patchControlPoints
599 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
600 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
601 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
602 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
603 &colorBlendStateCreateInfo, // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
604 DE_NULL, // const VkPipelineDynamicStateCreateInfo*
605 vertexShaderStageCreateFlags, // const deUint32 vertexShaderStageCreateFlags,
606 tessellationControlShaderStageCreateFlags, // const deUint32 tessellationControlShaderStageCreateFlags
607 tessellationEvalShaderStageCreateFlags, // const deUint32 tessellationEvalShaderStageCreateFlags
608 geometryShaderStageCreateFlags, // const deUint32 geometryShaderStageCreateFlags
609 fragmentShaderStageCreateFlags, // const deUint32 fragmentShaderStageCreateFlags
610 requiredSubgroupSize); // const deUint32 requiredSubgroupSize[5]
611 }
612
makeCommandBuffer(Context & context,const VkCommandPool commandPool)613 Move<VkCommandBuffer> makeCommandBuffer (Context& context, const VkCommandPool commandPool)
614 {
615 const VkCommandBufferAllocateInfo bufferAllocateParams =
616 {
617 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
618 DE_NULL, // const void* pNext;
619 commandPool, // VkCommandPool commandPool;
620 VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
621 1u, // deUint32 bufferCount;
622 };
623 return allocateCommandBuffer(context.getDeviceInterface(),
624 context.getDevice(), &bufferAllocateParams);
625 }
626
627 struct Buffer;
628 struct Image;
629
630 struct BufferOrImage
631 {
isImage__anonc13380c00111::BufferOrImage632 bool isImage() const
633 {
634 return m_isImage;
635 }
636
getAsBuffer__anonc13380c00111::BufferOrImage637 Buffer* getAsBuffer()
638 {
639 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
640 return reinterpret_cast<Buffer* >(this);
641 }
642
getAsImage__anonc13380c00111::BufferOrImage643 Image* getAsImage()
644 {
645 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
646 return reinterpret_cast<Image*>(this);
647 }
648
getType__anonc13380c00111::BufferOrImage649 virtual VkDescriptorType getType() const
650 {
651 if (m_isImage)
652 {
653 return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
654 }
655 else
656 {
657 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
658 }
659 }
660
getAllocation__anonc13380c00111::BufferOrImage661 Allocation& getAllocation() const
662 {
663 return *m_allocation;
664 }
665
~BufferOrImage__anonc13380c00111::BufferOrImage666 virtual ~BufferOrImage() {}
667
668 protected:
BufferOrImage__anonc13380c00111::BufferOrImage669 explicit BufferOrImage(bool image) : m_isImage(image) {}
670
671 bool m_isImage;
672 de::details::MovePtr<Allocation> m_allocation;
673 };
674
675 struct Buffer : public BufferOrImage
676 {
Buffer__anonc13380c00111::Buffer677 explicit Buffer (Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage)
678 : BufferOrImage (false)
679 , m_sizeInBytes (sizeInBytes)
680 , m_usage (usage)
681 {
682 const DeviceInterface& vkd = context.getDeviceInterface();
683 const VkDevice device = context.getDevice();
684
685 const vk::VkBufferCreateInfo bufferCreateInfo =
686 {
687 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
688 DE_NULL,
689 0u,
690 m_sizeInBytes,
691 m_usage,
692 VK_SHARING_MODE_EXCLUSIVE,
693 0u,
694 DE_NULL,
695 };
696 m_buffer = createBuffer(vkd, device, &bufferCreateInfo);
697
698 VkMemoryRequirements req = getBufferMemoryRequirements(vkd, device, *m_buffer);
699
700 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
701 VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
702 }
703
getType__anonc13380c00111::Buffer704 virtual VkDescriptorType getType() const
705 {
706 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
707 {
708 return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
709 }
710 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
711 }
712
getBuffer__anonc13380c00111::Buffer713 VkBuffer getBuffer () const
714 {
715 return *m_buffer;
716 }
717
getBufferPtr__anonc13380c00111::Buffer718 const VkBuffer* getBufferPtr () const
719 {
720 return &(*m_buffer);
721 }
722
getSize__anonc13380c00111::Buffer723 VkDeviceSize getSize () const
724 {
725 return m_sizeInBytes;
726 }
727
728 private:
729 Move<VkBuffer> m_buffer;
730 VkDeviceSize m_sizeInBytes;
731 const VkBufferUsageFlags m_usage;
732 };
733
734 struct Image : public BufferOrImage
735 {
Image__anonc13380c00111::Image736 explicit Image (Context& context, deUint32 width, deUint32 height, VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
737 : BufferOrImage(true)
738 {
739 const DeviceInterface& vk = context.getDeviceInterface();
740 const VkDevice device = context.getDevice();
741 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
742
743 const VkImageCreateInfo imageCreateInfo =
744 {
745 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
746 DE_NULL, // const void* pNext;
747 0, // VkImageCreateFlags flags;
748 VK_IMAGE_TYPE_2D, // VkImageType imageType;
749 format, // VkFormat format;
750 {width, height, 1}, // VkExtent3D extent;
751 1, // deUint32 mipLevels;
752 1, // deUint32 arrayLayers;
753 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
754 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
755 usage, // VkImageUsageFlags usage;
756 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
757 0u, // deUint32 queueFamilyIndexCount;
758 DE_NULL, // const deUint32* pQueueFamilyIndices;
759 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
760 };
761
762 const VkComponentMapping componentMapping =
763 {
764 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
765 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
766 };
767
768 const VkImageSubresourceRange subresourceRange =
769 {
770 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
771 0u, //deUint32 baseMipLevel
772 1u, //deUint32 levelCount
773 0u, //deUint32 baseArrayLayer
774 1u //deUint32 layerCount
775 };
776
777 const VkSamplerCreateInfo samplerCreateInfo =
778 {
779 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType;
780 DE_NULL, // const void* pNext;
781 0u, // VkSamplerCreateFlags flags;
782 VK_FILTER_NEAREST, // VkFilter magFilter;
783 VK_FILTER_NEAREST, // VkFilter minFilter;
784 VK_SAMPLER_MIPMAP_MODE_NEAREST, // VkSamplerMipmapMode mipmapMode;
785 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeU;
786 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeV;
787 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW;
788 0.0f, // float mipLodBias;
789 VK_FALSE, // VkBool32 anisotropyEnable;
790 1.0f, // float maxAnisotropy;
791 DE_FALSE, // VkBool32 compareEnable;
792 VK_COMPARE_OP_ALWAYS, // VkCompareOp compareOp;
793 0.0f, // float minLod;
794 0.0f, // float maxLod;
795 VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // VkBorderColor borderColor;
796 VK_FALSE, // VkBool32 unnormalizedCoordinates;
797 };
798
799 m_image = createImage(vk, device, &imageCreateInfo);
800
801 VkMemoryRequirements req = getImageMemoryRequirements(vk, device, *m_image);
802
803 req.size *= 2;
804 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
805
806 VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
807
808 const VkImageViewCreateInfo imageViewCreateInfo =
809 {
810 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
811 DE_NULL, // const void* pNext;
812 0, // VkImageViewCreateFlags flags;
813 *m_image, // VkImage image;
814 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
815 imageCreateInfo.format, // VkFormat format;
816 componentMapping, // VkComponentMapping components;
817 subresourceRange // VkImageSubresourceRange subresourceRange;
818 };
819
820 m_imageView = createImageView(vk, device, &imageViewCreateInfo);
821 m_sampler = createSampler(vk, device, &samplerCreateInfo);
822
823 // Transition input image layouts
824 {
825 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
826 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
827
828 beginCommandBuffer(vk, *cmdBuffer);
829
830 const VkImageMemoryBarrier imageBarrier = makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
831 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
832
833 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
834 (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
835
836 endCommandBuffer(vk, *cmdBuffer);
837 submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
838 }
839 }
840
getImage__anonc13380c00111::Image841 VkImage getImage () const
842 {
843 return *m_image;
844 }
845
getImageView__anonc13380c00111::Image846 VkImageView getImageView () const
847 {
848 return *m_imageView;
849 }
850
getSampler__anonc13380c00111::Image851 VkSampler getSampler () const
852 {
853 return *m_sampler;
854 }
855
856 private:
857 Move<VkImage> m_image;
858 Move<VkImageView> m_imageView;
859 Move<VkSampler> m_sampler;
860 };
861 }
862
getStagesCount(const VkShaderStageFlags shaderStages)863 deUint32 vkt::subgroups::getStagesCount (const VkShaderStageFlags shaderStages)
864 {
865 const deUint32 stageCount = isAllGraphicsStages(shaderStages) ? 4
866 : isAllComputeStages(shaderStages) ? 1
867 #ifndef CTS_USES_VULKANSC
868 : isAllRayTracingStages(shaderStages) ? 6
869 : isAllMeshShadingStages(shaderStages) ? 1
870 #endif // CTS_USES_VULKANSC
871 : 0;
872
873 DE_ASSERT(stageCount != 0);
874
875 return stageCount;
876 }
877
getSharedMemoryBallotHelper()878 std::string vkt::subgroups::getSharedMemoryBallotHelper ()
879 {
880 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
881 "uvec4 sharedMemoryBallot(bool vote)\n"
882 "{\n"
883 " uint groupOffset = gl_SubgroupID;\n"
884 " // One invocation in the group 0's the whole group's data\n"
885 " if (subgroupElect())\n"
886 " {\n"
887 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
888 " }\n"
889 " subgroupMemoryBarrierShared();\n"
890 " if (vote)\n"
891 " {\n"
892 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
893 " const highp uint bitToSet = 1u << invocationId;\n"
894 " switch (gl_SubgroupInvocationID / 32)\n"
895 " {\n"
896 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
897 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
898 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
899 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
900 " }\n"
901 " }\n"
902 " subgroupMemoryBarrierShared();\n"
903 " return superSecretComputeShaderHelper[groupOffset];\n"
904 "}\n";
905 }
906
getSharedMemoryBallotHelperARB()907 std::string vkt::subgroups::getSharedMemoryBallotHelperARB ()
908 {
909 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
910 "uint64_t sharedMemoryBallot(bool vote)\n"
911 "{\n"
912 " uint groupOffset = gl_SubgroupID;\n"
913 " // One invocation in the group 0's the whole group's data\n"
914 " if (subgroupElect())\n"
915 " {\n"
916 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
917 " }\n"
918 " subgroupMemoryBarrierShared();\n"
919 " if (vote)\n"
920 " {\n"
921 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
922 " const highp uint bitToSet = 1u << invocationId;\n"
923 " switch (gl_SubgroupInvocationID / 32)\n"
924 " {\n"
925 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
926 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
927 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
928 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
929 " }\n"
930 " }\n"
931 " subgroupMemoryBarrierShared();\n"
932 " return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
933 "}\n";
934 }
935
getSubgroupSize(Context & context)936 deUint32 vkt::subgroups::getSubgroupSize (Context& context)
937 {
938 return context.getSubgroupProperties().subgroupSize;
939 }
940
maxSupportedSubgroupSize()941 deUint32 vkt::subgroups::maxSupportedSubgroupSize ()
942 {
943 return 128u;
944 }
945
getShaderStageName(VkShaderStageFlags stage)946 std::string vkt::subgroups::getShaderStageName (VkShaderStageFlags stage)
947 {
948 switch (stage)
949 {
950 case VK_SHADER_STAGE_COMPUTE_BIT: return "compute";
951 case VK_SHADER_STAGE_FRAGMENT_BIT: return "fragment";
952 case VK_SHADER_STAGE_VERTEX_BIT: return "vertex";
953 case VK_SHADER_STAGE_GEOMETRY_BIT: return "geometry";
954 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return "tess_control";
955 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return "tess_eval";
956 #ifndef CTS_USES_VULKANSC
957 case VK_SHADER_STAGE_RAYGEN_BIT_KHR: return "rgen";
958 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: return "ahit";
959 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: return "chit";
960 case VK_SHADER_STAGE_MISS_BIT_KHR: return "miss";
961 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: return "sect";
962 case VK_SHADER_STAGE_CALLABLE_BIT_KHR: return "call";
963 case VK_SHADER_STAGE_MESH_BIT_EXT: return "mesh";
964 case VK_SHADER_STAGE_TASK_BIT_EXT: return "task";
965 #endif // CTS_USES_VULKANSC
966 default: TCU_THROW(InternalError, "Unhandled stage");
967 }
968 }
969
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)970 std::string vkt::subgroups::getSubgroupFeatureName (vk::VkSubgroupFeatureFlagBits bit)
971 {
972 switch (bit)
973 {
974 case VK_SUBGROUP_FEATURE_BASIC_BIT: return "VK_SUBGROUP_FEATURE_BASIC_BIT";
975 case VK_SUBGROUP_FEATURE_VOTE_BIT: return "VK_SUBGROUP_FEATURE_VOTE_BIT";
976 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT: return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
977 case VK_SUBGROUP_FEATURE_BALLOT_BIT: return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
978 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT: return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
979 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT: return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
980 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT: return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
981 case VK_SUBGROUP_FEATURE_QUAD_BIT: return "VK_SUBGROUP_FEATURE_QUAD_BIT";
982 default: TCU_THROW(InternalError, "Unknown subgroup feature category");
983 }
984 }
985
addNoSubgroupShader(SourceCollections & programCollection)986 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
987 {
988 {
989 /*
990 "#version 450\n"
991 "void main (void)\n"
992 "{\n"
993 " float pixelSize = 2.0f/1024.0f;\n"
994 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
995 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
996 " gl_PointSize = 1.0f;\n"
997 "}\n"
998 */
999 const std::string vertNoSubgroup =
1000 "; SPIR-V\n"
1001 "; Version: 1.3\n"
1002 "; Generator: Khronos Glslang Reference Front End; 1\n"
1003 "; Bound: 37\n"
1004 "; Schema: 0\n"
1005 "OpCapability Shader\n"
1006 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1007 "OpMemoryModel Logical GLSL450\n"
1008 "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
1009 "OpMemberDecorate %20 0 BuiltIn Position\n"
1010 "OpMemberDecorate %20 1 BuiltIn PointSize\n"
1011 "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
1012 "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
1013 "OpDecorate %20 Block\n"
1014 "OpDecorate %26 BuiltIn VertexIndex\n"
1015 "%2 = OpTypeVoid\n"
1016 "%3 = OpTypeFunction %2\n"
1017 "%6 = OpTypeFloat 32\n"
1018 "%7 = OpTypePointer Function %6\n"
1019 "%9 = OpConstant %6 0.00195313\n"
1020 "%12 = OpConstant %6 2\n"
1021 "%14 = OpConstant %6 1\n"
1022 "%16 = OpTypeVector %6 4\n"
1023 "%17 = OpTypeInt 32 0\n"
1024 "%18 = OpConstant %17 1\n"
1025 "%19 = OpTypeArray %6 %18\n"
1026 "%20 = OpTypeStruct %16 %6 %19 %19\n"
1027 "%21 = OpTypePointer Output %20\n"
1028 "%22 = OpVariable %21 Output\n"
1029 "%23 = OpTypeInt 32 1\n"
1030 "%24 = OpConstant %23 0\n"
1031 "%25 = OpTypePointer Input %23\n"
1032 "%26 = OpVariable %25 Input\n"
1033 "%33 = OpConstant %6 0\n"
1034 "%35 = OpTypePointer Output %16\n"
1035 "%37 = OpConstant %23 1\n"
1036 "%38 = OpTypePointer Output %6\n"
1037 "%4 = OpFunction %2 None %3\n"
1038 "%5 = OpLabel\n"
1039 "%8 = OpVariable %7 Function\n"
1040 "%10 = OpVariable %7 Function\n"
1041 "OpStore %8 %9\n"
1042 "%11 = OpLoad %6 %8\n"
1043 "%13 = OpFDiv %6 %11 %12\n"
1044 "%15 = OpFSub %6 %13 %14\n"
1045 "OpStore %10 %15\n"
1046 "%27 = OpLoad %23 %26\n"
1047 "%28 = OpConvertSToF %6 %27\n"
1048 "%29 = OpLoad %6 %8\n"
1049 "%30 = OpFMul %6 %28 %29\n"
1050 "%31 = OpLoad %6 %10\n"
1051 "%32 = OpFAdd %6 %30 %31\n"
1052 "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1053 "%36 = OpAccessChain %35 %22 %24\n"
1054 "OpStore %36 %34\n"
1055 "%39 = OpAccessChain %38 %22 %37\n"
1056 "OpStore %39 %14\n"
1057 "OpReturn\n"
1058 "OpFunctionEnd\n";
1059 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1060 }
1061
1062 {
1063 /*
1064 "#version 450\n"
1065 "layout(vertices=1) out;\n"
1066 "\n"
1067 "void main (void)\n"
1068 "{\n"
1069 " if (gl_InvocationID == 0)\n"
1070 " {\n"
1071 " gl_TessLevelOuter[0] = 1.0f;\n"
1072 " gl_TessLevelOuter[1] = 1.0f;\n"
1073 " }\n"
1074 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1075 "}\n"
1076 */
1077 const std::string tescNoSubgroup =
1078 "; SPIR-V\n"
1079 "; Version: 1.3\n"
1080 "; Generator: Khronos Glslang Reference Front End; 1\n"
1081 "; Bound: 45\n"
1082 "; Schema: 0\n"
1083 "OpCapability Tessellation\n"
1084 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1085 "OpMemoryModel Logical GLSL450\n"
1086 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1087 "OpExecutionMode %4 OutputVertices 1\n"
1088 "OpDecorate %8 BuiltIn InvocationId\n"
1089 "OpDecorate %20 Patch\n"
1090 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1091 "OpMemberDecorate %29 0 BuiltIn Position\n"
1092 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1093 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1094 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1095 "OpDecorate %29 Block\n"
1096 "OpMemberDecorate %34 0 BuiltIn Position\n"
1097 "OpMemberDecorate %34 1 BuiltIn PointSize\n"
1098 "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1099 "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1100 "OpDecorate %34 Block\n"
1101 "%2 = OpTypeVoid\n"
1102 "%3 = OpTypeFunction %2\n"
1103 "%6 = OpTypeInt 32 1\n"
1104 "%7 = OpTypePointer Input %6\n"
1105 "%8 = OpVariable %7 Input\n"
1106 "%10 = OpConstant %6 0\n"
1107 "%11 = OpTypeBool\n"
1108 "%15 = OpTypeFloat 32\n"
1109 "%16 = OpTypeInt 32 0\n"
1110 "%17 = OpConstant %16 4\n"
1111 "%18 = OpTypeArray %15 %17\n"
1112 "%19 = OpTypePointer Output %18\n"
1113 "%20 = OpVariable %19 Output\n"
1114 "%21 = OpConstant %15 1\n"
1115 "%22 = OpTypePointer Output %15\n"
1116 "%24 = OpConstant %6 1\n"
1117 "%26 = OpTypeVector %15 4\n"
1118 "%27 = OpConstant %16 1\n"
1119 "%28 = OpTypeArray %15 %27\n"
1120 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1121 "%30 = OpTypeArray %29 %27\n"
1122 "%31 = OpTypePointer Output %30\n"
1123 "%32 = OpVariable %31 Output\n"
1124 "%34 = OpTypeStruct %26 %15 %28 %28\n"
1125 "%35 = OpConstant %16 32\n"
1126 "%36 = OpTypeArray %34 %35\n"
1127 "%37 = OpTypePointer Input %36\n"
1128 "%38 = OpVariable %37 Input\n"
1129 "%40 = OpTypePointer Input %26\n"
1130 "%43 = OpTypePointer Output %26\n"
1131 "%4 = OpFunction %2 None %3\n"
1132 "%5 = OpLabel\n"
1133 "%9 = OpLoad %6 %8\n"
1134 "%12 = OpIEqual %11 %9 %10\n"
1135 "OpSelectionMerge %14 None\n"
1136 "OpBranchConditional %12 %13 %14\n"
1137 "%13 = OpLabel\n"
1138 "%23 = OpAccessChain %22 %20 %10\n"
1139 "OpStore %23 %21\n"
1140 "%25 = OpAccessChain %22 %20 %24\n"
1141 "OpStore %25 %21\n"
1142 "OpBranch %14\n"
1143 "%14 = OpLabel\n"
1144 "%33 = OpLoad %6 %8\n"
1145 "%39 = OpLoad %6 %8\n"
1146 "%41 = OpAccessChain %40 %38 %39 %10\n"
1147 "%42 = OpLoad %26 %41\n"
1148 "%44 = OpAccessChain %43 %32 %33 %10\n"
1149 "OpStore %44 %42\n"
1150 "OpReturn\n"
1151 "OpFunctionEnd\n";
1152 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1153 }
1154
1155 {
1156 /*
1157 "#version 450\n"
1158 "layout(isolines) in;\n"
1159 "\n"
1160 "void main (void)\n"
1161 "{\n"
1162 " float pixelSize = 2.0f/1024.0f;\n"
1163 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1164 "}\n";
1165 */
1166 const std::string teseNoSubgroup =
1167 "; SPIR-V\n"
1168 "; Version: 1.3\n"
1169 "; Generator: Khronos Glslang Reference Front End; 2\n"
1170 "; Bound: 42\n"
1171 "; Schema: 0\n"
1172 "OpCapability Tessellation\n"
1173 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1174 "OpMemoryModel Logical GLSL450\n"
1175 "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1176 "OpExecutionMode %4 Isolines\n"
1177 "OpExecutionMode %4 SpacingEqual\n"
1178 "OpExecutionMode %4 VertexOrderCcw\n"
1179 "OpMemberDecorate %14 0 BuiltIn Position\n"
1180 "OpMemberDecorate %14 1 BuiltIn PointSize\n"
1181 "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1182 "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1183 "OpDecorate %14 Block\n"
1184 "OpMemberDecorate %19 0 BuiltIn Position\n"
1185 "OpMemberDecorate %19 1 BuiltIn PointSize\n"
1186 "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1187 "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1188 "OpDecorate %19 Block\n"
1189 "OpDecorate %29 BuiltIn TessCoord\n"
1190 "%2 = OpTypeVoid\n"
1191 "%3 = OpTypeFunction %2\n"
1192 "%6 = OpTypeFloat 32\n"
1193 "%7 = OpTypePointer Function %6\n"
1194 "%9 = OpConstant %6 0.00195313\n"
1195 "%10 = OpTypeVector %6 4\n"
1196 "%11 = OpTypeInt 32 0\n"
1197 "%12 = OpConstant %11 1\n"
1198 "%13 = OpTypeArray %6 %12\n"
1199 "%14 = OpTypeStruct %10 %6 %13 %13\n"
1200 "%15 = OpTypePointer Output %14\n"
1201 "%16 = OpVariable %15 Output\n"
1202 "%17 = OpTypeInt 32 1\n"
1203 "%18 = OpConstant %17 0\n"
1204 "%19 = OpTypeStruct %10 %6 %13 %13\n"
1205 "%20 = OpConstant %11 32\n"
1206 "%21 = OpTypeArray %19 %20\n"
1207 "%22 = OpTypePointer Input %21\n"
1208 "%23 = OpVariable %22 Input\n"
1209 "%24 = OpTypePointer Input %10\n"
1210 "%27 = OpTypeVector %6 3\n"
1211 "%28 = OpTypePointer Input %27\n"
1212 "%29 = OpVariable %28 Input\n"
1213 "%30 = OpConstant %11 0\n"
1214 "%31 = OpTypePointer Input %6\n"
1215 "%36 = OpConstant %6 2\n"
1216 "%40 = OpTypePointer Output %10\n"
1217 "%4 = OpFunction %2 None %3\n"
1218 "%5 = OpLabel\n"
1219 "%8 = OpVariable %7 Function\n"
1220 "OpStore %8 %9\n"
1221 "%25 = OpAccessChain %24 %23 %18 %18\n"
1222 "%26 = OpLoad %10 %25\n"
1223 "%32 = OpAccessChain %31 %29 %30\n"
1224 "%33 = OpLoad %6 %32\n"
1225 "%34 = OpLoad %6 %8\n"
1226 "%35 = OpFMul %6 %33 %34\n"
1227 "%37 = OpFDiv %6 %35 %36\n"
1228 "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1229 "%39 = OpFAdd %10 %26 %38\n"
1230 "%41 = OpAccessChain %40 %16 %18\n"
1231 "OpStore %41 %39\n"
1232 "OpReturn\n"
1233 "OpFunctionEnd\n";
1234 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1235 }
1236
1237 }
1238
getFramebufferBufferDeclarations(const VkFormat & format,const std::vector<std::string> & declarations,const deUint32 stage)1239 static std::string getFramebufferBufferDeclarations (const VkFormat& format,
1240 const std::vector<std::string>& declarations,
1241 const deUint32 stage)
1242 {
1243 if (declarations.empty())
1244 {
1245 const std::string name = (stage == 0) ? "result" : "out_color";
1246 const std::string suffix = (stage == 2) ? "[]" : "";
1247 const std::string result =
1248 "layout(location = 0) out float " + name + suffix + ";\n"
1249 "layout(set = 0, binding = 0) uniform Buffer1\n"
1250 "{\n"
1251 " " + de::toString(subgroups::getFormatNameForGLSL(format)) + " data[" + de::toString(subgroups::maxSupportedSubgroupSize()) + "];\n"
1252 "};\n";
1253
1254 return result;
1255 }
1256 else
1257 {
1258 return declarations[stage];
1259 }
1260 }
1261
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations)1262 void vkt::subgroups::initStdFrameBufferPrograms (SourceCollections& programCollection,
1263 const vk::ShaderBuildOptions& buildOptions,
1264 VkShaderStageFlags shaderStage,
1265 VkFormat format,
1266 bool gsPointSize,
1267 const std::string& extHeader,
1268 const std::string& testSrc,
1269 const std::string& helperStr,
1270 const std::vector<std::string>& declarations)
1271 {
1272 subgroups::setFragmentShaderFrameBuffer(programCollection);
1273
1274 if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1275 subgroups::setVertexShaderFrameBuffer(programCollection);
1276
1277 if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1278 {
1279 std::ostringstream vertex;
1280
1281 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1282 << extHeader
1283 << "layout(location = 0) in highp vec4 in_position;\n"
1284 << getFramebufferBufferDeclarations(format, declarations, 0)
1285 << "\n"
1286 << helperStr
1287 << "void main (void)\n"
1288 << "{\n"
1289 << " uint tempRes;\n"
1290 << testSrc
1291 << " result = float(tempRes);\n"
1292 << " gl_Position = in_position;\n"
1293 << " gl_PointSize = 1.0f;\n"
1294 << "}\n";
1295
1296 programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1297 }
1298 else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1299 {
1300 std::ostringstream geometry;
1301
1302 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1303 << extHeader
1304 << "layout(points) in;\n"
1305 << "layout(points, max_vertices = 1) out;\n"
1306 << getFramebufferBufferDeclarations(format, declarations, 1)
1307 << "\n"
1308 << helperStr
1309 << "void main (void)\n"
1310 << "{\n"
1311 << " uint tempRes;\n"
1312 << testSrc
1313 << " out_color = float(tempRes);\n"
1314 << " gl_Position = gl_in[0].gl_Position;\n"
1315 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1316 << " EmitVertex();\n"
1317 << " EndPrimitive();\n"
1318 << "}\n";
1319
1320 programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1321 }
1322 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1323 {
1324 std::ostringstream controlSource;
1325
1326 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1327 << extHeader
1328 << "layout(vertices = 2) out;\n"
1329 << getFramebufferBufferDeclarations(format, declarations, 2)
1330 << "\n"
1331 << helperStr
1332 << "void main (void)\n"
1333 << "{\n"
1334 << " if (gl_InvocationID == 0)\n"
1335 << " {\n"
1336 << " gl_TessLevelOuter[0] = 1.0f;\n"
1337 << " gl_TessLevelOuter[1] = 1.0f;\n"
1338 << " }\n"
1339 << " uint tempRes;\n"
1340 << testSrc
1341 << " out_color[gl_InvocationID] = float(tempRes);\n"
1342 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1343 << (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "")
1344 << "}\n";
1345
1346 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1347 subgroups::setTesEvalShaderFrameBuffer(programCollection);
1348 }
1349 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1350 {
1351 ostringstream evaluationSource;
1352
1353 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1354 << extHeader
1355 << "layout(isolines, equal_spacing, ccw ) in;\n"
1356 << getFramebufferBufferDeclarations(format, declarations, 3)
1357 << "\n"
1358 << helperStr
1359 << "void main (void)\n"
1360 << "{\n"
1361 << " uint tempRes;\n"
1362 << testSrc
1363 << " out_color = float(tempRes);\n"
1364 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1365 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1366 << "}\n";
1367
1368 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1369 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1370 }
1371 else
1372 {
1373 DE_FATAL("Unsupported shader stage");
1374 }
1375 }
1376
getBufferDeclarations(vk::VkShaderStageFlags shaderStage,const std::string & formatName,const std::vector<std::string> & declarations,const deUint32 stage)1377 static std::string getBufferDeclarations (vk::VkShaderStageFlags shaderStage,
1378 const std::string& formatName,
1379 const std::vector<std::string>& declarations,
1380 const deUint32 stage)
1381 {
1382 if (declarations.empty())
1383 {
1384 const deUint32 stageCount = vkt::subgroups::getStagesCount(shaderStage);
1385 const deUint32 binding0 = stage;
1386 const deUint32 binding1 = stageCount;
1387 const bool fragment = (shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) && (stage == stageCount);
1388 const string buffer1 = fragment
1389 ? "layout(location = 0) out uint result;\n"
1390 : "layout(set = 0, binding = " + de::toString(binding0) + ", std430) buffer Buffer1\n"
1391 "{\n"
1392 " uint result[];\n"
1393 "};\n";
1394 //todo boza I suppose it can be "layout(set = 0, binding = " + de::toString(binding1) + ", std430) readonly buffer Buffer2\n"
1395 const string buffer2 = "layout(set = 0, binding = " + de::toString(binding1) + ", std430)" + (stageCount == 1 ? "" : " readonly") + " buffer Buffer" + (fragment ? "1" : "2") + "\n"
1396 "{\n"
1397 " " + formatName + " data[];\n"
1398 "};\n";
1399
1400 return buffer1 + buffer2;
1401 }
1402 else
1403 {
1404 return declarations[stage];
1405 }
1406 }
1407
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations,const bool avoidHelperInvocations,const std::string & tempRes)1408 void vkt::subgroups::initStdPrograms (vk::SourceCollections& programCollection,
1409 const vk::ShaderBuildOptions& buildOptions,
1410 vk::VkShaderStageFlags shaderStage,
1411 vk::VkFormat format,
1412 bool gsPointSize,
1413 const std::string& extHeader,
1414 const std::string& testSrc,
1415 const std::string& helperStr,
1416 const std::vector<std::string>& declarations,
1417 const bool avoidHelperInvocations,
1418 const std::string& tempRes)
1419 {
1420 const std::string formatName = subgroups::getFormatNameForGLSL(format);
1421
1422 if (isAllComputeStages(shaderStage))
1423 {
1424 std::ostringstream src;
1425
1426 src << "#version 450\n"
1427 << extHeader
1428 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
1429 "local_size_z_id = 2) in;\n"
1430 << getBufferDeclarations(shaderStage, formatName, declarations, 0)
1431 << "\n"
1432 << helperStr
1433 << "void main (void)\n"
1434 << "{\n"
1435 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1436 << " highp uint offset = globalSize.x * ((globalSize.y * "
1437 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1438 "gl_GlobalInvocationID.x;\n"
1439 << tempRes
1440 << testSrc
1441 << " result[offset] = tempRes;\n"
1442 << "}\n";
1443
1444 programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1445 }
1446 #ifndef CTS_USES_VULKANSC
1447 else if (isAllMeshShadingStages(shaderStage))
1448 {
1449 const bool testMesh = ((shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
1450 const bool testTask = ((shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
1451
1452 if (testMesh)
1453 {
1454 std::ostringstream mesh;
1455
1456 mesh
1457 << "#version 450\n"
1458 << "#extension GL_EXT_mesh_shader : enable\n"
1459 //<< "#extension GL_NV_mesh_shader : enable\n"
1460 << extHeader
1461 << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1462 << "layout (points) out;\n"
1463 << "layout (max_vertices = 1, max_primitives = 1) out;\n"
1464 << getBufferDeclarations(shaderStage, formatName, declarations, 0)
1465 << "\n"
1466 << helperStr
1467 << "void main (void)\n"
1468 << "{\n"
1469 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1470 //<< " uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1471 << " highp uint offset = globalSize.x * ((globalSize.y * "
1472 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1473 "gl_GlobalInvocationID.x;\n"
1474 << tempRes
1475 << testSrc
1476 << " result[offset] = tempRes;\n"
1477 << " SetMeshOutputsEXT(0u, 0u);\n"
1478 //<< " gl_PrimitiveCountNV = 0;\n"
1479 << "}\n";
1480
1481 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1482 }
1483 else
1484 {
1485 const std::string meshShaderNoSubgroups =
1486 "#version 450\n"
1487 "#extension GL_EXT_mesh_shader : enable\n"
1488 "\n"
1489 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
1490 "layout (points) out;\n"
1491 "layout (max_vertices = 1, max_primitives = 1) out;\n"
1492 "\n"
1493 "void main (void)\n"
1494 "{\n"
1495 " SetMeshOutputsEXT(0u, 0u);\n"
1496 "}\n"
1497 ;
1498 programCollection.glslSources.add("mesh") << glu::MeshSource(meshShaderNoSubgroups) << buildOptions;
1499 }
1500
1501 if (testTask)
1502 {
1503 const tcu::UVec3 emitSize = (testMesh ? tcu::UVec3(1u, 1u, 1u) : tcu::UVec3(0u, 0u, 0u));
1504 std::ostringstream task;
1505
1506 task
1507 << "#version 450\n"
1508 << "#extension GL_EXT_mesh_shader : enable\n"
1509 //<< "#extension GL_NV_mesh_shader : enable\n"
1510 << extHeader
1511 << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1512 << getBufferDeclarations(shaderStage, formatName, declarations, 0)
1513 << "\n"
1514 << helperStr
1515 << "void main (void)\n"
1516 << "{\n"
1517 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1518 //<< " uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1519 << " highp uint offset = globalSize.x * ((globalSize.y * "
1520 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1521 "gl_GlobalInvocationID.x;\n"
1522 << tempRes
1523 << testSrc
1524 << " result[offset] = tempRes;\n"
1525 << " EmitMeshTasksEXT(" << emitSize.x() << ", " << emitSize.y() << ", " << emitSize.z() << ");\n"
1526 //<< " gl_TaskCountNV = " << emitSize.x() << ";\n"
1527 << "}\n";
1528
1529 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1530 }
1531 }
1532 #endif // CTS_USES_VULKANSC
1533 else if (isAllGraphicsStages(shaderStage))
1534 {
1535 const string vertex =
1536 "#version 450\n"
1537 + extHeader
1538 + getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1539 "\n"
1540 + helperStr +
1541 "void main (void)\n"
1542 "{\n"
1543 " uint tempRes;\n"
1544 + testSrc +
1545 " result[gl_VertexIndex] = tempRes;\n"
1546 " float pixelSize = 2.0f/1024.0f;\n"
1547 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1548 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1549 " gl_PointSize = 1.0f;\n"
1550 "}\n";
1551
1552 const string tesc =
1553 "#version 450\n"
1554 + extHeader +
1555 "layout(vertices=1) out;\n"
1556 + getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1557 "\n"
1558 + helperStr +
1559 "void main (void)\n"
1560 "{\n"
1561 + tempRes
1562 + testSrc +
1563 " result[gl_PrimitiveID] = tempRes;\n"
1564 " if (gl_InvocationID == 0)\n"
1565 " {\n"
1566 " gl_TessLevelOuter[0] = 1.0f;\n"
1567 " gl_TessLevelOuter[1] = 1.0f;\n"
1568 " }\n"
1569 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1570 + (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1571 "}\n";
1572
1573 const string tese =
1574 "#version 450\n"
1575 + extHeader +
1576 "layout(isolines) in;\n"
1577 + getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1578 "\n"
1579 + helperStr +
1580 "void main (void)\n"
1581 "{\n"
1582 + tempRes
1583 + testSrc +
1584 " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1585 " float pixelSize = 2.0f/1024.0f;\n"
1586 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1587 + (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1588 "}\n";
1589
1590 const string geometry =
1591 "#version 450\n"
1592 + extHeader +
1593 "layout(${TOPOLOGY}) in;\n"
1594 "layout(points, max_vertices = 1) out;\n"
1595 + getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1596 "\n"
1597 + helperStr +
1598 "void main (void)\n"
1599 "{\n"
1600 + tempRes
1601 + testSrc +
1602 " result[gl_PrimitiveIDIn] = tempRes;\n"
1603 " gl_Position = gl_in[0].gl_Position;\n"
1604 + (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1605 " EmitVertex();\n"
1606 " EndPrimitive();\n"
1607 "}\n";
1608
1609 const string fragment =
1610 "#version 450\n"
1611 + extHeader
1612 + getBufferDeclarations(shaderStage, formatName, declarations, 4)
1613 + helperStr +
1614 "void main (void)\n"
1615 "{\n"
1616 + (avoidHelperInvocations ? " if (gl_HelperInvocation) return;\n" : "")
1617 + tempRes
1618 + testSrc +
1619 " result = tempRes;\n"
1620 "}\n";
1621
1622 subgroups::addNoSubgroupShader(programCollection);
1623
1624 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1625 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1626 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1627 subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1628 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1629 }
1630 #ifndef CTS_USES_VULKANSC
1631 else if (isAllRayTracingStages(shaderStage))
1632 {
1633 const std::string rgenShader =
1634 "#version 460 core\n"
1635 "#extension GL_EXT_ray_tracing: require\n"
1636 + extHeader +
1637 "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
1638 "layout(location = 0) callableDataEXT uvec4 callData;"
1639 "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
1640 + getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1641 "\n"
1642 + helperStr +
1643 "void main()\n"
1644 "{\n"
1645 + tempRes
1646 + testSrc +
1647 " uint rayFlags = 0;\n"
1648 " uint cullMask = 0xFF;\n"
1649 " float tmin = 0.0;\n"
1650 " float tmax = 9.0;\n"
1651 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
1652 " vec3 directHit = vec3(0.0, 0.0, -1.0);\n"
1653 " vec3 directMiss = vec3(0.0, 0.0, +1.0);\n"
1654 "\n"
1655 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
1656 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
1657 " executeCallableEXT(0, 0);"
1658 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1659 "}\n";
1660 const std::string ahitShader =
1661 "#version 460 core\n"
1662 "#extension GL_EXT_ray_tracing: require\n"
1663 + extHeader +
1664 "hitAttributeEXT vec3 attribs;\n"
1665 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1666 + getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1667 "\n"
1668 + helperStr +
1669 "void main()\n"
1670 "{\n"
1671 + tempRes
1672 + testSrc +
1673 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1674 "}\n";
1675 const std::string chitShader =
1676 "#version 460 core\n"
1677 "#extension GL_EXT_ray_tracing: require\n"
1678 + extHeader +
1679 "hitAttributeEXT vec3 attribs;\n"
1680 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1681 + getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1682 "\n"
1683 + helperStr +
1684 "void main()\n"
1685 "{\n"
1686 + tempRes
1687 + testSrc +
1688 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1689 "}\n";
1690 const std::string missShader =
1691 "#version 460 core\n"
1692 "#extension GL_EXT_ray_tracing: require\n"
1693 + extHeader +
1694 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1695 + getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1696 "\n"
1697 + helperStr +
1698 "void main()\n"
1699 "{\n"
1700 + tempRes
1701 + testSrc +
1702 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1703 "}\n";
1704 const std::string sectShader =
1705 "#version 460 core\n"
1706 "#extension GL_EXT_ray_tracing: require\n"
1707 + extHeader +
1708 "hitAttributeEXT vec3 hitAttribute;\n"
1709 + getBufferDeclarations(shaderStage, formatName, declarations, 4) +
1710 "\n"
1711 + helperStr +
1712 "void main()\n"
1713 "{\n"
1714 + tempRes
1715 + testSrc +
1716 " reportIntersectionEXT(0.75f, 0x7Eu);\n"
1717 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1718 "}\n";
1719 const std::string callShader =
1720 "#version 460 core\n"
1721 "#extension GL_EXT_ray_tracing: require\n"
1722 + extHeader +
1723 "layout(location = 0) callableDataInEXT float callData;\n"
1724 + getBufferDeclarations(shaderStage, formatName, declarations, 5) +
1725 "\n"
1726 + helperStr +
1727 "void main()\n"
1728 "{\n"
1729 + tempRes
1730 + testSrc +
1731 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1732 "}\n";
1733
1734 programCollection.glslSources.add("rgen") << glu::RaygenSource (rgenShader) << buildOptions;
1735 programCollection.glslSources.add("ahit") << glu::AnyHitSource (ahitShader) << buildOptions;
1736 programCollection.glslSources.add("chit") << glu::ClosestHitSource (chitShader) << buildOptions;
1737 programCollection.glslSources.add("miss") << glu::MissSource (missShader) << buildOptions;
1738 programCollection.glslSources.add("sect") << glu::IntersectionSource(sectShader) << buildOptions;
1739 programCollection.glslSources.add("call") << glu::CallableSource (callShader) << buildOptions;
1740
1741 subgroups::addRayTracingNoSubgroupShader(programCollection);
1742 }
1743 #endif // CTS_USES_VULKANSC
1744 else
1745 TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1746
1747 }
1748
isSubgroupSupported(Context & context)1749 bool vkt::subgroups::isSubgroupSupported (Context& context)
1750 {
1751 return context.contextSupports(vk::ApiVersion(0, 1, 1, 0));
1752 }
1753
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1754 bool vkt::subgroups::areSubgroupOperationsSupportedForStage (Context& context, const VkShaderStageFlags stage)
1755 {
1756 return (stage & (context.getSubgroupProperties().supportedStages)) ? true : false;
1757 }
1758
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1759 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice (Context& context, VkSubgroupFeatureFlagBits bit)
1760 {
1761 return (bit & (context.getSubgroupProperties().supportedOperations)) ? true : false;
1762 }
1763
areQuadOperationsSupportedForStages(Context & context,const VkShaderStageFlags stages)1764 bool vkt::subgroups::areQuadOperationsSupportedForStages (Context& context, const VkShaderStageFlags stages)
1765 {
1766 // Check general quad feature support first.
1767 if (!isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_QUAD_BIT))
1768 return false;
1769
1770 if (context.getSubgroupProperties().quadOperationsInAllStages == VK_TRUE)
1771 return true; // No problem, any stage works.
1772
1773 // Only frag and compute are supported.
1774 const VkShaderStageFlags fragCompute = (VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
1775 const VkShaderStageFlags otherStages = ~fragCompute;
1776 return ((stages & otherStages) == 0u);
1777 }
1778
isFragmentSSBOSupportedForDevice(Context & context)1779 bool vkt::subgroups::isFragmentSSBOSupportedForDevice (Context& context)
1780 {
1781 return context.getDeviceFeatures().fragmentStoresAndAtomics ? true : false;
1782 }
1783
isVertexSSBOSupportedForDevice(Context & context)1784 bool vkt::subgroups::isVertexSSBOSupportedForDevice (Context& context)
1785 {
1786 return context.getDeviceFeatures().vertexPipelineStoresAndAtomics ? true : false;
1787 }
1788
isInt64SupportedForDevice(Context & context)1789 bool vkt::subgroups::isInt64SupportedForDevice (Context& context)
1790 {
1791 return context.getDeviceFeatures().shaderInt64 ? true : false;
1792 }
1793
isTessellationAndGeometryPointSizeSupported(Context & context)1794 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1795 {
1796 return context.getDeviceFeatures().shaderTessellationAndGeometryPointSize ? true : false;
1797 }
1798
is16BitUBOStorageSupported(Context & context)1799 bool vkt::subgroups::is16BitUBOStorageSupported (Context& context)
1800 {
1801 return context.get16BitStorageFeatures().uniformAndStorageBuffer16BitAccess ? true : false;
1802 }
1803
is8BitUBOStorageSupported(Context & context)1804 bool vkt::subgroups::is8BitUBOStorageSupported (Context& context)
1805 {
1806 return context.get8BitStorageFeatures().uniformAndStorageBuffer8BitAccess ? true : false;
1807 }
1808
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1809 bool vkt::subgroups::isFormatSupportedForDevice (Context& context, vk::VkFormat format)
1810 {
1811 const VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures& subgroupExtendedTypesFeatures = context.getShaderSubgroupExtendedTypesFeatures();
1812 const VkPhysicalDeviceShaderFloat16Int8Features& float16Int8Features = context.getShaderFloat16Int8Features();
1813 const VkPhysicalDevice16BitStorageFeatures& storage16bit = context.get16BitStorageFeatures();
1814 const VkPhysicalDevice8BitStorageFeatures& storage8bit = context.get8BitStorageFeatures();
1815 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1816 bool shaderFloat64 = features.shaderFloat64 ? true : false;
1817 bool shaderInt16 = features.shaderInt16 ? true : false;
1818 bool shaderInt64 = features.shaderInt64 ? true : false;
1819 bool shaderSubgroupExtendedTypes = false;
1820 bool shaderFloat16 = false;
1821 bool shaderInt8 = false;
1822 bool storageBuffer16BitAccess = false;
1823 bool storageBuffer8BitAccess = false;
1824
1825 if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1826 context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1827 {
1828 shaderSubgroupExtendedTypes = subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes ? true : false;
1829 shaderFloat16 = float16Int8Features.shaderFloat16 ? true : false;
1830 shaderInt8 = float16Int8Features.shaderInt8 ? true : false;
1831
1832 if ( context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage") )
1833 storageBuffer16BitAccess = storage16bit.storageBuffer16BitAccess ? true : false;
1834
1835 if (context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage"))
1836 storageBuffer8BitAccess = storage8bit.storageBuffer8BitAccess ? true : false;
1837 }
1838
1839 switch (format)
1840 {
1841 default:
1842 return true;
1843 case VK_FORMAT_R16_SFLOAT:
1844 case VK_FORMAT_R16G16_SFLOAT:
1845 case VK_FORMAT_R16G16B16_SFLOAT:
1846 case VK_FORMAT_R16G16B16A16_SFLOAT:
1847 return shaderSubgroupExtendedTypes && shaderFloat16 && storageBuffer16BitAccess;
1848 case VK_FORMAT_R64_SFLOAT:
1849 case VK_FORMAT_R64G64_SFLOAT:
1850 case VK_FORMAT_R64G64B64_SFLOAT:
1851 case VK_FORMAT_R64G64B64A64_SFLOAT:
1852 return shaderFloat64;
1853 case VK_FORMAT_R8_SINT:
1854 case VK_FORMAT_R8G8_SINT:
1855 case VK_FORMAT_R8G8B8_SINT:
1856 case VK_FORMAT_R8G8B8A8_SINT:
1857 case VK_FORMAT_R8_UINT:
1858 case VK_FORMAT_R8G8_UINT:
1859 case VK_FORMAT_R8G8B8_UINT:
1860 case VK_FORMAT_R8G8B8A8_UINT:
1861 return shaderSubgroupExtendedTypes && shaderInt8 && storageBuffer8BitAccess;
1862 case VK_FORMAT_R16_SINT:
1863 case VK_FORMAT_R16G16_SINT:
1864 case VK_FORMAT_R16G16B16_SINT:
1865 case VK_FORMAT_R16G16B16A16_SINT:
1866 case VK_FORMAT_R16_UINT:
1867 case VK_FORMAT_R16G16_UINT:
1868 case VK_FORMAT_R16G16B16_UINT:
1869 case VK_FORMAT_R16G16B16A16_UINT:
1870 return shaderSubgroupExtendedTypes && shaderInt16 && storageBuffer16BitAccess;
1871 case VK_FORMAT_R64_SINT:
1872 case VK_FORMAT_R64G64_SINT:
1873 case VK_FORMAT_R64G64B64_SINT:
1874 case VK_FORMAT_R64G64B64A64_SINT:
1875 case VK_FORMAT_R64_UINT:
1876 case VK_FORMAT_R64G64_UINT:
1877 case VK_FORMAT_R64G64B64_UINT:
1878 case VK_FORMAT_R64G64B64A64_UINT:
1879 return shaderSubgroupExtendedTypes && shaderInt64;
1880 }
1881 }
1882
isSubgroupBroadcastDynamicIdSupported(Context & context)1883 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported (Context& context)
1884 {
1885 return context.contextSupports(vk::ApiVersion(0, 1, 2, 0)) &&
1886 vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice()).subgroupBroadcastDynamicId;
1887 }
1888
getFormatNameForGLSL(VkFormat format)1889 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1890 {
1891 switch (format)
1892 {
1893 case VK_FORMAT_R8_SINT: return "int8_t";
1894 case VK_FORMAT_R8G8_SINT: return "i8vec2";
1895 case VK_FORMAT_R8G8B8_SINT: return "i8vec3";
1896 case VK_FORMAT_R8G8B8A8_SINT: return "i8vec4";
1897 case VK_FORMAT_R8_UINT: return "uint8_t";
1898 case VK_FORMAT_R8G8_UINT: return "u8vec2";
1899 case VK_FORMAT_R8G8B8_UINT: return "u8vec3";
1900 case VK_FORMAT_R8G8B8A8_UINT: return "u8vec4";
1901 case VK_FORMAT_R16_SINT: return "int16_t";
1902 case VK_FORMAT_R16G16_SINT: return "i16vec2";
1903 case VK_FORMAT_R16G16B16_SINT: return "i16vec3";
1904 case VK_FORMAT_R16G16B16A16_SINT: return "i16vec4";
1905 case VK_FORMAT_R16_UINT: return "uint16_t";
1906 case VK_FORMAT_R16G16_UINT: return "u16vec2";
1907 case VK_FORMAT_R16G16B16_UINT: return "u16vec3";
1908 case VK_FORMAT_R16G16B16A16_UINT: return "u16vec4";
1909 case VK_FORMAT_R32_SINT: return "int";
1910 case VK_FORMAT_R32G32_SINT: return "ivec2";
1911 case VK_FORMAT_R32G32B32_SINT: return "ivec3";
1912 case VK_FORMAT_R32G32B32A32_SINT: return "ivec4";
1913 case VK_FORMAT_R32_UINT: return "uint";
1914 case VK_FORMAT_R32G32_UINT: return "uvec2";
1915 case VK_FORMAT_R32G32B32_UINT: return "uvec3";
1916 case VK_FORMAT_R32G32B32A32_UINT: return "uvec4";
1917 case VK_FORMAT_R64_SINT: return "int64_t";
1918 case VK_FORMAT_R64G64_SINT: return "i64vec2";
1919 case VK_FORMAT_R64G64B64_SINT: return "i64vec3";
1920 case VK_FORMAT_R64G64B64A64_SINT: return "i64vec4";
1921 case VK_FORMAT_R64_UINT: return "uint64_t";
1922 case VK_FORMAT_R64G64_UINT: return "u64vec2";
1923 case VK_FORMAT_R64G64B64_UINT: return "u64vec3";
1924 case VK_FORMAT_R64G64B64A64_UINT: return "u64vec4";
1925 case VK_FORMAT_R16_SFLOAT: return "float16_t";
1926 case VK_FORMAT_R16G16_SFLOAT: return "f16vec2";
1927 case VK_FORMAT_R16G16B16_SFLOAT: return "f16vec3";
1928 case VK_FORMAT_R16G16B16A16_SFLOAT: return "f16vec4";
1929 case VK_FORMAT_R32_SFLOAT: return "float";
1930 case VK_FORMAT_R32G32_SFLOAT: return "vec2";
1931 case VK_FORMAT_R32G32B32_SFLOAT: return "vec3";
1932 case VK_FORMAT_R32G32B32A32_SFLOAT: return "vec4";
1933 case VK_FORMAT_R64_SFLOAT: return "double";
1934 case VK_FORMAT_R64G64_SFLOAT: return "dvec2";
1935 case VK_FORMAT_R64G64B64_SFLOAT: return "dvec3";
1936 case VK_FORMAT_R64G64B64A64_SFLOAT: return "dvec4";
1937 case VK_FORMAT_R8_USCALED: return "bool";
1938 case VK_FORMAT_R8G8_USCALED: return "bvec2";
1939 case VK_FORMAT_R8G8B8_USCALED: return "bvec3";
1940 case VK_FORMAT_R8G8B8A8_USCALED: return "bvec4";
1941 default: TCU_THROW(InternalError, "Unhandled format");
1942 }
1943 }
1944
getAdditionalExtensionForFormat(vk::VkFormat format)1945 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1946 {
1947 switch (format)
1948 {
1949 default:
1950 return "";
1951 case VK_FORMAT_R8_SINT:
1952 case VK_FORMAT_R8G8_SINT:
1953 case VK_FORMAT_R8G8B8_SINT:
1954 case VK_FORMAT_R8G8B8A8_SINT:
1955 case VK_FORMAT_R8_UINT:
1956 case VK_FORMAT_R8G8_UINT:
1957 case VK_FORMAT_R8G8B8_UINT:
1958 case VK_FORMAT_R8G8B8A8_UINT:
1959 return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1960 case VK_FORMAT_R16_SINT:
1961 case VK_FORMAT_R16G16_SINT:
1962 case VK_FORMAT_R16G16B16_SINT:
1963 case VK_FORMAT_R16G16B16A16_SINT:
1964 case VK_FORMAT_R16_UINT:
1965 case VK_FORMAT_R16G16_UINT:
1966 case VK_FORMAT_R16G16B16_UINT:
1967 case VK_FORMAT_R16G16B16A16_UINT:
1968 return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1969 case VK_FORMAT_R64_SINT:
1970 case VK_FORMAT_R64G64_SINT:
1971 case VK_FORMAT_R64G64B64_SINT:
1972 case VK_FORMAT_R64G64B64A64_SINT:
1973 case VK_FORMAT_R64_UINT:
1974 case VK_FORMAT_R64G64_UINT:
1975 case VK_FORMAT_R64G64B64_UINT:
1976 case VK_FORMAT_R64G64B64A64_UINT:
1977 return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1978 case VK_FORMAT_R16_SFLOAT:
1979 case VK_FORMAT_R16G16_SFLOAT:
1980 case VK_FORMAT_R16G16B16_SFLOAT:
1981 case VK_FORMAT_R16G16B16A16_SFLOAT:
1982 return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1983 }
1984 }
1985
getAllFormats()1986 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats ()
1987 {
1988 std::vector<VkFormat> formats;
1989
1990 formats.push_back(VK_FORMAT_R8_SINT);
1991 formats.push_back(VK_FORMAT_R8G8_SINT);
1992 formats.push_back(VK_FORMAT_R8G8B8_SINT);
1993 formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1994 formats.push_back(VK_FORMAT_R8_UINT);
1995 formats.push_back(VK_FORMAT_R8G8_UINT);
1996 formats.push_back(VK_FORMAT_R8G8B8_UINT);
1997 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1998 formats.push_back(VK_FORMAT_R16_SINT);
1999 formats.push_back(VK_FORMAT_R16G16_SINT);
2000 formats.push_back(VK_FORMAT_R16G16B16_SINT);
2001 formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
2002 formats.push_back(VK_FORMAT_R16_UINT);
2003 formats.push_back(VK_FORMAT_R16G16_UINT);
2004 formats.push_back(VK_FORMAT_R16G16B16_UINT);
2005 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
2006 formats.push_back(VK_FORMAT_R32_SINT);
2007 formats.push_back(VK_FORMAT_R32G32_SINT);
2008 formats.push_back(VK_FORMAT_R32G32B32_SINT);
2009 formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
2010 formats.push_back(VK_FORMAT_R32_UINT);
2011 formats.push_back(VK_FORMAT_R32G32_UINT);
2012 formats.push_back(VK_FORMAT_R32G32B32_UINT);
2013 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
2014 formats.push_back(VK_FORMAT_R64_SINT);
2015 formats.push_back(VK_FORMAT_R64G64_SINT);
2016 formats.push_back(VK_FORMAT_R64G64B64_SINT);
2017 formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
2018 formats.push_back(VK_FORMAT_R64_UINT);
2019 formats.push_back(VK_FORMAT_R64G64_UINT);
2020 formats.push_back(VK_FORMAT_R64G64B64_UINT);
2021 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
2022 formats.push_back(VK_FORMAT_R16_SFLOAT);
2023 formats.push_back(VK_FORMAT_R16G16_SFLOAT);
2024 formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
2025 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
2026 formats.push_back(VK_FORMAT_R32_SFLOAT);
2027 formats.push_back(VK_FORMAT_R32G32_SFLOAT);
2028 formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
2029 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
2030 formats.push_back(VK_FORMAT_R64_SFLOAT);
2031 formats.push_back(VK_FORMAT_R64G64_SFLOAT);
2032 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
2033 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
2034 formats.push_back(VK_FORMAT_R8_USCALED);
2035 formats.push_back(VK_FORMAT_R8G8_USCALED);
2036 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
2037 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
2038
2039 return formats;
2040 }
2041
isFormatSigned(VkFormat format)2042 bool vkt::subgroups::isFormatSigned (VkFormat format)
2043 {
2044 switch (format)
2045 {
2046 default:
2047 return false;
2048 case VK_FORMAT_R8_SINT:
2049 case VK_FORMAT_R8G8_SINT:
2050 case VK_FORMAT_R8G8B8_SINT:
2051 case VK_FORMAT_R8G8B8A8_SINT:
2052 case VK_FORMAT_R16_SINT:
2053 case VK_FORMAT_R16G16_SINT:
2054 case VK_FORMAT_R16G16B16_SINT:
2055 case VK_FORMAT_R16G16B16A16_SINT:
2056 case VK_FORMAT_R32_SINT:
2057 case VK_FORMAT_R32G32_SINT:
2058 case VK_FORMAT_R32G32B32_SINT:
2059 case VK_FORMAT_R32G32B32A32_SINT:
2060 case VK_FORMAT_R64_SINT:
2061 case VK_FORMAT_R64G64_SINT:
2062 case VK_FORMAT_R64G64B64_SINT:
2063 case VK_FORMAT_R64G64B64A64_SINT:
2064 return true;
2065 }
2066 }
2067
isFormatUnsigned(VkFormat format)2068 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
2069 {
2070 switch (format)
2071 {
2072 default:
2073 return false;
2074 case VK_FORMAT_R8_UINT:
2075 case VK_FORMAT_R8G8_UINT:
2076 case VK_FORMAT_R8G8B8_UINT:
2077 case VK_FORMAT_R8G8B8A8_UINT:
2078 case VK_FORMAT_R16_UINT:
2079 case VK_FORMAT_R16G16_UINT:
2080 case VK_FORMAT_R16G16B16_UINT:
2081 case VK_FORMAT_R16G16B16A16_UINT:
2082 case VK_FORMAT_R32_UINT:
2083 case VK_FORMAT_R32G32_UINT:
2084 case VK_FORMAT_R32G32B32_UINT:
2085 case VK_FORMAT_R32G32B32A32_UINT:
2086 case VK_FORMAT_R64_UINT:
2087 case VK_FORMAT_R64G64_UINT:
2088 case VK_FORMAT_R64G64B64_UINT:
2089 case VK_FORMAT_R64G64B64A64_UINT:
2090 return true;
2091 }
2092 }
2093
isFormatFloat(VkFormat format)2094 bool vkt::subgroups::isFormatFloat (VkFormat format)
2095 {
2096 switch (format)
2097 {
2098 default:
2099 return false;
2100 case VK_FORMAT_R16_SFLOAT:
2101 case VK_FORMAT_R16G16_SFLOAT:
2102 case VK_FORMAT_R16G16B16_SFLOAT:
2103 case VK_FORMAT_R16G16B16A16_SFLOAT:
2104 case VK_FORMAT_R32_SFLOAT:
2105 case VK_FORMAT_R32G32_SFLOAT:
2106 case VK_FORMAT_R32G32B32_SFLOAT:
2107 case VK_FORMAT_R32G32B32A32_SFLOAT:
2108 case VK_FORMAT_R64_SFLOAT:
2109 case VK_FORMAT_R64G64_SFLOAT:
2110 case VK_FORMAT_R64G64B64_SFLOAT:
2111 case VK_FORMAT_R64G64B64A64_SFLOAT:
2112 return true;
2113 }
2114 }
2115
isFormatBool(VkFormat format)2116 bool vkt::subgroups::isFormatBool (VkFormat format)
2117 {
2118 switch (format)
2119 {
2120 default:
2121 return false;
2122 case VK_FORMAT_R8_USCALED:
2123 case VK_FORMAT_R8G8_USCALED:
2124 case VK_FORMAT_R8G8B8_USCALED:
2125 case VK_FORMAT_R8G8B8A8_USCALED:
2126 return true;
2127 }
2128 }
2129
isFormat8bitTy(VkFormat format)2130 bool vkt::subgroups::isFormat8bitTy (VkFormat format)
2131 {
2132 switch (format)
2133 {
2134 default:
2135 return false;
2136 case VK_FORMAT_R8_SINT:
2137 case VK_FORMAT_R8G8_SINT:
2138 case VK_FORMAT_R8G8B8_SINT:
2139 case VK_FORMAT_R8G8B8A8_SINT:
2140 case VK_FORMAT_R8_UINT:
2141 case VK_FORMAT_R8G8_UINT:
2142 case VK_FORMAT_R8G8B8_UINT:
2143 case VK_FORMAT_R8G8B8A8_UINT:
2144 return true;
2145 }
2146 }
2147
isFormat16BitTy(VkFormat format)2148 bool vkt::subgroups::isFormat16BitTy (VkFormat format)
2149 {
2150 switch (format)
2151 {
2152 default:
2153 return false;
2154 case VK_FORMAT_R16_SFLOAT:
2155 case VK_FORMAT_R16G16_SFLOAT:
2156 case VK_FORMAT_R16G16B16_SFLOAT:
2157 case VK_FORMAT_R16G16B16A16_SFLOAT:
2158 case VK_FORMAT_R16_SINT:
2159 case VK_FORMAT_R16G16_SINT:
2160 case VK_FORMAT_R16G16B16_SINT:
2161 case VK_FORMAT_R16G16B16A16_SINT:
2162 case VK_FORMAT_R16_UINT:
2163 case VK_FORMAT_R16G16_UINT:
2164 case VK_FORMAT_R16G16B16_UINT:
2165 case VK_FORMAT_R16G16B16A16_UINT:
2166 return true;
2167 }
2168 }
2169
setVertexShaderFrameBuffer(SourceCollections & programCollection)2170 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
2171 {
2172 /*
2173 "layout(location = 0) in highp vec4 in_position;\n"
2174 "void main (void)\n"
2175 "{\n"
2176 " gl_Position = in_position;\n"
2177 " gl_PointSize = 1.0f;\n"
2178 "}\n";
2179 */
2180 programCollection.spirvAsmSources.add("vert") <<
2181 "; SPIR-V\n"
2182 "; Version: 1.3\n"
2183 "; Generator: Khronos Glslang Reference Front End; 7\n"
2184 "; Bound: 25\n"
2185 "; Schema: 0\n"
2186 "OpCapability Shader\n"
2187 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2188 "OpMemoryModel Logical GLSL450\n"
2189 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2190 "OpMemberDecorate %11 0 BuiltIn Position\n"
2191 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2192 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2193 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2194 "OpDecorate %11 Block\n"
2195 "OpDecorate %17 Location 0\n"
2196 "%2 = OpTypeVoid\n"
2197 "%3 = OpTypeFunction %2\n"
2198 "%6 = OpTypeFloat 32\n"
2199 "%7 = OpTypeVector %6 4\n"
2200 "%8 = OpTypeInt 32 0\n"
2201 "%9 = OpConstant %8 1\n"
2202 "%10 = OpTypeArray %6 %9\n"
2203 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2204 "%12 = OpTypePointer Output %11\n"
2205 "%13 = OpVariable %12 Output\n"
2206 "%14 = OpTypeInt 32 1\n"
2207 "%15 = OpConstant %14 0\n"
2208 "%16 = OpTypePointer Input %7\n"
2209 "%17 = OpVariable %16 Input\n"
2210 "%19 = OpTypePointer Output %7\n"
2211 "%21 = OpConstant %14 1\n"
2212 "%22 = OpConstant %6 1\n"
2213 "%23 = OpTypePointer Output %6\n"
2214 "%4 = OpFunction %2 None %3\n"
2215 "%5 = OpLabel\n"
2216 "%18 = OpLoad %7 %17\n"
2217 "%20 = OpAccessChain %19 %13 %15\n"
2218 "OpStore %20 %18\n"
2219 "%24 = OpAccessChain %23 %13 %21\n"
2220 "OpStore %24 %22\n"
2221 "OpReturn\n"
2222 "OpFunctionEnd\n";
2223 }
2224
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2225 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
2226 {
2227 /*
2228 "layout(location = 0) in float in_color;\n"
2229 "layout(location = 0) out uint out_color;\n"
2230 "void main()\n"
2231 {\n"
2232 " out_color = uint(in_color);\n"
2233 "}\n";
2234 */
2235 programCollection.spirvAsmSources.add("fragment") <<
2236 "; SPIR-V\n"
2237 "; Version: 1.3\n"
2238 "; Generator: Khronos Glslang Reference Front End; 2\n"
2239 "; Bound: 14\n"
2240 "; Schema: 0\n"
2241 "OpCapability Shader\n"
2242 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2243 "OpMemoryModel Logical GLSL450\n"
2244 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2245 "OpExecutionMode %4 OriginUpperLeft\n"
2246 "OpDecorate %8 Location 0\n"
2247 "OpDecorate %11 Location 0\n"
2248 "%2 = OpTypeVoid\n"
2249 "%3 = OpTypeFunction %2\n"
2250 "%6 = OpTypeInt 32 0\n"
2251 "%7 = OpTypePointer Output %6\n"
2252 "%8 = OpVariable %7 Output\n"
2253 "%9 = OpTypeFloat 32\n"
2254 "%10 = OpTypePointer Input %9\n"
2255 "%11 = OpVariable %10 Input\n"
2256 "%4 = OpFunction %2 None %3\n"
2257 "%5 = OpLabel\n"
2258 "%12 = OpLoad %9 %11\n"
2259 "%13 = OpConvertFToU %6 %12\n"
2260 "OpStore %8 %13\n"
2261 "OpReturn\n"
2262 "OpFunctionEnd\n";
2263 }
2264
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2265 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
2266 {
2267 /*
2268 "#extension GL_KHR_shader_subgroup_basic: enable\n"
2269 "#extension GL_EXT_tessellation_shader : require\n"
2270 "layout(vertices = 2) out;\n"
2271 "void main (void)\n"
2272 "{\n"
2273 " if (gl_InvocationID == 0)\n"
2274 " {\n"
2275 " gl_TessLevelOuter[0] = 1.0f;\n"
2276 " gl_TessLevelOuter[1] = 1.0f;\n"
2277 " }\n"
2278 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2279 "}\n";
2280 */
2281 programCollection.spirvAsmSources.add("tesc") <<
2282 "; SPIR-V\n"
2283 "; Version: 1.3\n"
2284 "; Generator: Khronos Glslang Reference Front End; 2\n"
2285 "; Bound: 46\n"
2286 "; Schema: 0\n"
2287 "OpCapability Tessellation\n"
2288 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2289 "OpMemoryModel Logical GLSL450\n"
2290 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2291 "OpExecutionMode %4 OutputVertices 2\n"
2292 "OpDecorate %8 BuiltIn InvocationId\n"
2293 "OpDecorate %20 Patch\n"
2294 "OpDecorate %20 BuiltIn TessLevelOuter\n"
2295 "OpMemberDecorate %29 0 BuiltIn Position\n"
2296 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
2297 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2298 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2299 "OpDecorate %29 Block\n"
2300 "OpMemberDecorate %35 0 BuiltIn Position\n"
2301 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
2302 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2303 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2304 "OpDecorate %35 Block\n"
2305 "%2 = OpTypeVoid\n"
2306 "%3 = OpTypeFunction %2\n"
2307 "%6 = OpTypeInt 32 1\n"
2308 "%7 = OpTypePointer Input %6\n"
2309 "%8 = OpVariable %7 Input\n"
2310 "%10 = OpConstant %6 0\n"
2311 "%11 = OpTypeBool\n"
2312 "%15 = OpTypeFloat 32\n"
2313 "%16 = OpTypeInt 32 0\n"
2314 "%17 = OpConstant %16 4\n"
2315 "%18 = OpTypeArray %15 %17\n"
2316 "%19 = OpTypePointer Output %18\n"
2317 "%20 = OpVariable %19 Output\n"
2318 "%21 = OpConstant %15 1\n"
2319 "%22 = OpTypePointer Output %15\n"
2320 "%24 = OpConstant %6 1\n"
2321 "%26 = OpTypeVector %15 4\n"
2322 "%27 = OpConstant %16 1\n"
2323 "%28 = OpTypeArray %15 %27\n"
2324 "%29 = OpTypeStruct %26 %15 %28 %28\n"
2325 "%30 = OpConstant %16 2\n"
2326 "%31 = OpTypeArray %29 %30\n"
2327 "%32 = OpTypePointer Output %31\n"
2328 "%33 = OpVariable %32 Output\n"
2329 "%35 = OpTypeStruct %26 %15 %28 %28\n"
2330 "%36 = OpConstant %16 32\n"
2331 "%37 = OpTypeArray %35 %36\n"
2332 "%38 = OpTypePointer Input %37\n"
2333 "%39 = OpVariable %38 Input\n"
2334 "%41 = OpTypePointer Input %26\n"
2335 "%44 = OpTypePointer Output %26\n"
2336 "%4 = OpFunction %2 None %3\n"
2337 "%5 = OpLabel\n"
2338 "%9 = OpLoad %6 %8\n"
2339 "%12 = OpIEqual %11 %9 %10\n"
2340 "OpSelectionMerge %14 None\n"
2341 "OpBranchConditional %12 %13 %14\n"
2342 "%13 = OpLabel\n"
2343 "%23 = OpAccessChain %22 %20 %10\n"
2344 "OpStore %23 %21\n"
2345 "%25 = OpAccessChain %22 %20 %24\n"
2346 "OpStore %25 %21\n"
2347 "OpBranch %14\n"
2348 "%14 = OpLabel\n"
2349 "%34 = OpLoad %6 %8\n"
2350 "%40 = OpLoad %6 %8\n"
2351 "%42 = OpAccessChain %41 %39 %40 %10\n"
2352 "%43 = OpLoad %26 %42\n"
2353 "%45 = OpAccessChain %44 %33 %34 %10\n"
2354 "OpStore %45 %43\n"
2355 "OpReturn\n"
2356 "OpFunctionEnd\n";
2357 }
2358
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2359 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
2360 {
2361 /*
2362 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
2363 "#extension GL_EXT_tessellation_shader : require\n"
2364 "layout(isolines, equal_spacing, ccw ) in;\n"
2365 "layout(location = 0) in float in_color[];\n"
2366 "layout(location = 0) out float out_color;\n"
2367 "\n"
2368 "void main (void)\n"
2369 "{\n"
2370 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2371 " out_color = in_color[0];\n"
2372 "}\n";
2373 */
2374 programCollection.spirvAsmSources.add("tese") <<
2375 "; SPIR-V\n"
2376 "; Version: 1.3\n"
2377 "; Generator: Khronos Glslang Reference Front End; 2\n"
2378 "; Bound: 45\n"
2379 "; Schema: 0\n"
2380 "OpCapability Tessellation\n"
2381 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2382 "OpMemoryModel Logical GLSL450\n"
2383 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2384 "OpExecutionMode %4 Isolines\n"
2385 "OpExecutionMode %4 SpacingEqual\n"
2386 "OpExecutionMode %4 VertexOrderCcw\n"
2387 "OpMemberDecorate %11 0 BuiltIn Position\n"
2388 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2389 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2390 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2391 "OpDecorate %11 Block\n"
2392 "OpMemberDecorate %16 0 BuiltIn Position\n"
2393 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
2394 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2395 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2396 "OpDecorate %16 Block\n"
2397 "OpDecorate %29 BuiltIn TessCoord\n"
2398 "OpDecorate %39 Location 0\n"
2399 "OpDecorate %42 Location 0\n"
2400 "%2 = OpTypeVoid\n"
2401 "%3 = OpTypeFunction %2\n"
2402 "%6 = OpTypeFloat 32\n"
2403 "%7 = OpTypeVector %6 4\n"
2404 "%8 = OpTypeInt 32 0\n"
2405 "%9 = OpConstant %8 1\n"
2406 "%10 = OpTypeArray %6 %9\n"
2407 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2408 "%12 = OpTypePointer Output %11\n"
2409 "%13 = OpVariable %12 Output\n"
2410 "%14 = OpTypeInt 32 1\n"
2411 "%15 = OpConstant %14 0\n"
2412 "%16 = OpTypeStruct %7 %6 %10 %10\n"
2413 "%17 = OpConstant %8 32\n"
2414 "%18 = OpTypeArray %16 %17\n"
2415 "%19 = OpTypePointer Input %18\n"
2416 "%20 = OpVariable %19 Input\n"
2417 "%21 = OpTypePointer Input %7\n"
2418 "%24 = OpConstant %14 1\n"
2419 "%27 = OpTypeVector %6 3\n"
2420 "%28 = OpTypePointer Input %27\n"
2421 "%29 = OpVariable %28 Input\n"
2422 "%30 = OpConstant %8 0\n"
2423 "%31 = OpTypePointer Input %6\n"
2424 "%36 = OpTypePointer Output %7\n"
2425 "%38 = OpTypePointer Output %6\n"
2426 "%39 = OpVariable %38 Output\n"
2427 "%40 = OpTypeArray %6 %17\n"
2428 "%41 = OpTypePointer Input %40\n"
2429 "%42 = OpVariable %41 Input\n"
2430 "%4 = OpFunction %2 None %3\n"
2431 "%5 = OpLabel\n"
2432 "%22 = OpAccessChain %21 %20 %15 %15\n"
2433 "%23 = OpLoad %7 %22\n"
2434 "%25 = OpAccessChain %21 %20 %24 %15\n"
2435 "%26 = OpLoad %7 %25\n"
2436 "%32 = OpAccessChain %31 %29 %30\n"
2437 "%33 = OpLoad %6 %32\n"
2438 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2439 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2440 "%37 = OpAccessChain %36 %13 %15\n"
2441 "OpStore %37 %35\n"
2442 "%43 = OpAccessChain %31 %42 %15\n"
2443 "%44 = OpLoad %6 %43\n"
2444 "OpStore %39 %44\n"
2445 "OpReturn\n"
2446 "OpFunctionEnd\n";
2447 }
2448
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2449 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options, vk::GlslSourceCollection& collection)
2450 {
2451 tcu::StringTemplate geometryTemplate(glslTemplate);
2452
2453 map<string, string> linesParams;
2454 linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2455
2456 map<string, string> pointsParams;
2457 pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2458
2459 collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
2460 collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
2461 }
2462
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2463 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
2464 {
2465 tcu::StringTemplate geometryTemplate(spirvTemplate);
2466
2467 map<string, string> linesParams;
2468 linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2469
2470 map<string, string> pointsParams;
2471 pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2472
2473 collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
2474 collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
2475 }
2476
initializeMemory(Context & context,const Allocation & alloc,const subgroups::SSBOData & data)2477 void initializeMemory (Context& context, const Allocation& alloc, const subgroups::SSBOData& data)
2478 {
2479 const vk::VkFormat format = data.format;
2480 const vk::VkDeviceSize size = data.numElements *
2481 (data.isImage() ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2482 if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2483 {
2484 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2485
2486 switch (format)
2487 {
2488 default:
2489 DE_FATAL("Illegal buffer format");
2490 break;
2491 case VK_FORMAT_R8_SINT:
2492 case VK_FORMAT_R8G8_SINT:
2493 case VK_FORMAT_R8G8B8_SINT:
2494 case VK_FORMAT_R8G8B8A8_SINT:
2495 case VK_FORMAT_R8_UINT:
2496 case VK_FORMAT_R8G8_UINT:
2497 case VK_FORMAT_R8G8B8_UINT:
2498 case VK_FORMAT_R8G8B8A8_UINT:
2499 {
2500 deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2501
2502 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2503 {
2504 ptr[k] = rnd.getUint8();
2505 }
2506 }
2507 break;
2508 case VK_FORMAT_R16_SINT:
2509 case VK_FORMAT_R16G16_SINT:
2510 case VK_FORMAT_R16G16B16_SINT:
2511 case VK_FORMAT_R16G16B16A16_SINT:
2512 case VK_FORMAT_R16_UINT:
2513 case VK_FORMAT_R16G16_UINT:
2514 case VK_FORMAT_R16G16B16_UINT:
2515 case VK_FORMAT_R16G16B16A16_UINT:
2516 {
2517 deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2518
2519 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2520 {
2521 ptr[k] = rnd.getUint16();
2522 }
2523 }
2524 break;
2525 case VK_FORMAT_R8_USCALED:
2526 case VK_FORMAT_R8G8_USCALED:
2527 case VK_FORMAT_R8G8B8_USCALED:
2528 case VK_FORMAT_R8G8B8A8_USCALED:
2529 {
2530 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2531
2532 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2533 {
2534 deUint32 r = rnd.getUint32();
2535 ptr[k] = (r & 1) ? r : 0;
2536 }
2537 }
2538 break;
2539 case VK_FORMAT_R32_SINT:
2540 case VK_FORMAT_R32G32_SINT:
2541 case VK_FORMAT_R32G32B32_SINT:
2542 case VK_FORMAT_R32G32B32A32_SINT:
2543 case VK_FORMAT_R32_UINT:
2544 case VK_FORMAT_R32G32_UINT:
2545 case VK_FORMAT_R32G32B32_UINT:
2546 case VK_FORMAT_R32G32B32A32_UINT:
2547 {
2548 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2549
2550 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2551 {
2552 ptr[k] = rnd.getUint32();
2553 }
2554 }
2555 break;
2556 case VK_FORMAT_R64_SINT:
2557 case VK_FORMAT_R64G64_SINT:
2558 case VK_FORMAT_R64G64B64_SINT:
2559 case VK_FORMAT_R64G64B64A64_SINT:
2560 case VK_FORMAT_R64_UINT:
2561 case VK_FORMAT_R64G64_UINT:
2562 case VK_FORMAT_R64G64B64_UINT:
2563 case VK_FORMAT_R64G64B64A64_UINT:
2564 {
2565 deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2566
2567 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2568 {
2569 ptr[k] = rnd.getUint64();
2570 }
2571 }
2572 break;
2573 case VK_FORMAT_R16_SFLOAT:
2574 case VK_FORMAT_R16G16_SFLOAT:
2575 case VK_FORMAT_R16G16B16_SFLOAT:
2576 case VK_FORMAT_R16G16B16A16_SFLOAT:
2577 {
2578 deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2579
2580 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2581 {
2582 ptr[k] = deFloat32To16(rnd.getFloat());
2583 }
2584 }
2585 break;
2586 case VK_FORMAT_R32_SFLOAT:
2587 case VK_FORMAT_R32G32_SFLOAT:
2588 case VK_FORMAT_R32G32B32_SFLOAT:
2589 case VK_FORMAT_R32G32B32A32_SFLOAT:
2590 {
2591 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2592
2593 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2594 {
2595 ptr[k] = rnd.getFloat();
2596 }
2597 }
2598 break;
2599 case VK_FORMAT_R64_SFLOAT:
2600 case VK_FORMAT_R64G64_SFLOAT:
2601 case VK_FORMAT_R64G64B64_SFLOAT:
2602 case VK_FORMAT_R64G64B64A64_SFLOAT:
2603 {
2604 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2605
2606 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2607 {
2608 ptr[k] = rnd.getDouble();
2609 }
2610 }
2611 break;
2612 }
2613 }
2614 else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2615 {
2616 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2617
2618 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2619 {
2620 ptr[k] = 0;
2621 }
2622 }
2623
2624 if (subgroups::SSBOData::InitializeNone != data.initializeType)
2625 {
2626 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2627 }
2628 }
2629
getResultBinding(const VkShaderStageFlagBits shaderStage)2630 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2631 {
2632 switch(shaderStage)
2633 {
2634 case VK_SHADER_STAGE_VERTEX_BIT:
2635 return 0u;
2636 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2637 return 1u;
2638 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2639 return 2u;
2640 case VK_SHADER_STAGE_GEOMETRY_BIT:
2641 return 3u;
2642 default:
2643 DE_ASSERT(0);
2644 return -1;
2645 }
2646 DE_ASSERT(0);
2647 return -1;
2648 }
2649
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage)2650 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (Context& context,
2651 VkFormat format,
2652 const SSBOData* extraData,
2653 deUint32 extraDataCount,
2654 const void* internalData,
2655 subgroups::CheckResult checkResult,
2656 const VkShaderStageFlags shaderStage)
2657 {
2658 return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2659 }
2660
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage,const deUint32 tessShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2661 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize (Context& context,
2662 VkFormat format,
2663 const SSBOData* extraData,
2664 deUint32 extraDataCount,
2665 const void* internalData,
2666 subgroups::CheckResult checkResult,
2667 const VkShaderStageFlags shaderStage,
2668 const deUint32 tessShaderStageCreateFlags,
2669 const deUint32 requiredSubgroupSize)
2670 {
2671 const DeviceInterface& vk = context.getDeviceInterface();
2672 const VkDevice device = context.getDevice();
2673 const deUint32 maxWidth = getMaxWidth();
2674 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2675 DescriptorSetLayoutBuilder layoutBuilder;
2676 DescriptorPoolBuilder poolBuilder;
2677 DescriptorSetUpdateBuilder updateBuilder;
2678 Move <VkDescriptorPool> descriptorPool;
2679 Move <VkDescriptorSet> descriptorSet;
2680 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2681 const Unique<VkShaderModule> teCtrlShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("tesc"), 0u));
2682 const Unique<VkShaderModule> teEvalShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("tese"), 0u));
2683 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2684 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2685 const VkVertexInputBindingDescription vertexInputBinding =
2686 {
2687 0u, // deUint32 binding;
2688 static_cast<deUint32>(sizeof(tcu::Vec4)), // deUint32 stride;
2689 VK_VERTEX_INPUT_RATE_VERTEX // VkVertexInputRate inputRate;
2690 };
2691 const VkVertexInputAttributeDescription vertexInputAttribute =
2692 {
2693 0u, // deUint32 location;
2694 0u, // deUint32 binding;
2695 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
2696 0u // deUint32 offset;
2697 };
2698
2699 for (deUint32 i = 0u; i < extraDataCount; i++)
2700 {
2701 if (extraData[i].isImage())
2702 {
2703 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2704 }
2705 else
2706 {
2707 DE_ASSERT(extraData[i].isUBO());
2708 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2709 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2710 }
2711 const Allocation& alloc = inputBuffers[i]->getAllocation();
2712 initializeMemory(context, alloc, extraData[i]);
2713 }
2714
2715 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2716 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2717
2718 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2719
2720 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2721
2722 const deUint32 requiredSubgroupSizes[5] = {0u,
2723 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2724 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u),
2725 0u,
2726 0u};
2727
2728 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2729 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2730 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2731 *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2732 *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2733 0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2734 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u),
2735 0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2736
2737 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2738 poolBuilder.addType(inputBuffers[ndx]->getType());
2739
2740 if (extraDataCount > 0)
2741 {
2742 descriptorPool = poolBuilder.build(vk, device,
2743 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2744 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2745 }
2746
2747 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2748 {
2749 if (inputBuffers[buffersNdx]->isImage())
2750 {
2751 VkDescriptorImageInfo info =
2752 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2753 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2754
2755 updateBuilder.writeSingle(*descriptorSet,
2756 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2757 inputBuffers[buffersNdx]->getType(), &info);
2758 }
2759 else
2760 {
2761 VkDescriptorBufferInfo info =
2762 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2763 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2764
2765 updateBuilder.writeSingle(*descriptorSet,
2766 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2767 inputBuffers[buffersNdx]->getType(), &info);
2768 }
2769 }
2770
2771 updateBuilder.update(vk, device);
2772
2773 const VkQueue queue = context.getUniversalQueue();
2774 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2775 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
2776 const deUint32 subgroupSize = getSubgroupSize(context);
2777 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2778 const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
2779 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2780 unsigned totalIterations = 0u;
2781 unsigned failedIterations = 0u;
2782 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2783
2784 {
2785 const Allocation& alloc = vertexBuffer.getAllocation();
2786 std::vector<tcu::Vec4> data (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2787 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2788 float leftHandPosition = -1.0f;
2789
2790 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2791 {
2792 data[ndx][0] = leftHandPosition;
2793 leftHandPosition += pixelSize;
2794 data[ndx+1][0] = leftHandPosition;
2795 }
2796
2797 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2798 flushAlloc(vk, device, alloc);
2799 }
2800
2801 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2802 const VkViewport viewport = makeViewport(maxWidth, 1u);
2803 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2804 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2805 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2806 const VkDeviceSize vertexBufferOffset = 0u;
2807
2808 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2809 {
2810 totalIterations++;
2811
2812 beginCommandBuffer(vk, *cmdBuffer);
2813 {
2814
2815 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2816 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2817
2818 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2819
2820 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2821
2822 if (extraDataCount > 0)
2823 {
2824 vk.cmdBindDescriptorSets(*cmdBuffer,
2825 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2826 &descriptorSet.get(), 0u, DE_NULL);
2827 }
2828
2829 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2830 vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2831
2832 endRenderPass(vk, *cmdBuffer);
2833
2834 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2835 endCommandBuffer(vk, *cmdBuffer);
2836
2837 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2838 }
2839 context.resetCommandPoolForVKSC(device, *cmdPool);
2840
2841 {
2842 const Allocation& allocResult = imageBufferResult.getAllocation();
2843 invalidateAlloc(vk, device, allocResult);
2844
2845 std::vector<const void*> datas;
2846 datas.push_back(allocResult.getHostPtr());
2847 if (!checkResult(internalData, datas, width/2u, subgroupSize))
2848 failedIterations++;
2849 }
2850 }
2851
2852 if (0 < failedIterations)
2853 {
2854 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2855
2856 context.getTestContext().getLog()
2857 << TestLog::Message << valuesPassed << " / "
2858 << totalIterations << " values passed" << TestLog::EndMessage;
2859 return tcu::TestStatus::fail("Failed!");
2860 }
2861
2862 return tcu::TestStatus::pass("OK");
2863 }
2864
check(std::vector<const void * > datas,deUint32 width,deUint32 ref)2865 bool vkt::subgroups::check (std::vector<const void*> datas, deUint32 width, deUint32 ref)
2866 {
2867 const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2868
2869 for (deUint32 n = 0; n < width; ++n)
2870 {
2871 if (data[n] != ref)
2872 {
2873 return false;
2874 }
2875 }
2876
2877 return true;
2878 }
2879
checkComputeOrMesh(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 ref)2880 bool vkt::subgroups::checkComputeOrMesh (std::vector<const void*> datas,
2881 const deUint32 numWorkgroups[3],
2882 const deUint32 localSize[3],
2883 deUint32 ref)
2884 {
2885 const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2886 const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2887 const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2888
2889 return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2890 }
2891
makeGeometryFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)2892 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest (Context& context,
2893 VkFormat format,
2894 const SSBOData* extraData,
2895 deUint32 extraDataCount,
2896 const void* internalData,
2897 subgroups::CheckResult checkResult)
2898 {
2899 return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
2900 }
2901
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 geometryShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2902 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize (Context& context,
2903 VkFormat format,
2904 const SSBOData* extraData,
2905 deUint32 extraDataCount,
2906 const void* internalData,
2907 subgroups::CheckResult checkResult,
2908 const deUint32 geometryShaderStageCreateFlags,
2909 const deUint32 requiredSubgroupSize)
2910 {
2911 const DeviceInterface& vk = context.getDeviceInterface();
2912 const VkDevice device = context.getDevice();
2913 const deUint32 maxWidth = getMaxWidth();
2914 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2915 DescriptorSetLayoutBuilder layoutBuilder;
2916 DescriptorPoolBuilder poolBuilder;
2917 DescriptorSetUpdateBuilder updateBuilder;
2918 Move <VkDescriptorPool> descriptorPool;
2919 Move <VkDescriptorSet> descriptorSet;
2920 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2921 const Unique<VkShaderModule> geometryShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2922 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2923 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2924 const VkVertexInputBindingDescription vertexInputBinding =
2925 {
2926 0u, // deUint32 binding;
2927 static_cast<deUint32>(sizeof(tcu::Vec4)), // deUint32 stride;
2928 VK_VERTEX_INPUT_RATE_VERTEX // VkVertexInputRate inputRate;
2929 };
2930 const VkVertexInputAttributeDescription vertexInputAttribute =
2931 {
2932 0u, // deUint32 location;
2933 0u, // deUint32 binding;
2934 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
2935 0u // deUint32 offset;
2936 };
2937
2938 for (deUint32 i = 0u; i < extraDataCount; i++)
2939 {
2940 if (extraData[i].isImage())
2941 {
2942 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2943 }
2944 else
2945 {
2946 DE_ASSERT(extraData[i].isUBO());
2947 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2948 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2949 }
2950 const Allocation& alloc = inputBuffers[i]->getAllocation();
2951 initializeMemory(context, alloc, extraData[i]);
2952 }
2953
2954 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2955 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2956
2957 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2958
2959 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2960
2961 const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2962
2963 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2964 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2965 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2966 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2967 0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2968 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2969
2970 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2971 poolBuilder.addType(inputBuffers[ndx]->getType());
2972
2973 if (extraDataCount > 0)
2974 {
2975 descriptorPool = poolBuilder.build(vk, device,
2976 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2977 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2978 }
2979
2980 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2981 {
2982 if (inputBuffers[buffersNdx]->isImage())
2983 {
2984 VkDescriptorImageInfo info =
2985 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2986 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2987
2988 updateBuilder.writeSingle(*descriptorSet,
2989 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2990 inputBuffers[buffersNdx]->getType(), &info);
2991 }
2992 else
2993 {
2994 VkDescriptorBufferInfo info =
2995 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2996 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2997
2998 updateBuilder.writeSingle(*descriptorSet,
2999 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3000 inputBuffers[buffersNdx]->getType(), &info);
3001 }
3002 }
3003
3004 updateBuilder.update(vk, device);
3005
3006 const VkQueue queue = context.getUniversalQueue();
3007 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3008 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3009 const deUint32 subgroupSize = getSubgroupSize(context);
3010 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3011 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
3012 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3013 unsigned totalIterations = 0u;
3014 unsigned failedIterations = 0u;
3015 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3016
3017 {
3018 const Allocation& alloc = vertexBuffer.getAllocation();
3019 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3020 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3021 float leftHandPosition = -1.0f;
3022
3023 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3024 {
3025 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3026 leftHandPosition += pixelSize;
3027 }
3028
3029 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3030 flushAlloc(vk, device, alloc);
3031 }
3032
3033 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3034 const VkViewport viewport = makeViewport(maxWidth, 1u);
3035 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3036 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3037 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3038 const VkDeviceSize vertexBufferOffset = 0u;
3039
3040 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3041 {
3042 totalIterations++;
3043
3044 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3045 {
3046 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3047 initializeMemory(context, alloc, extraData[ndx]);
3048 }
3049
3050 beginCommandBuffer(vk, *cmdBuffer);
3051 {
3052 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3053
3054 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3055
3056 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3057
3058 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3059
3060 if (extraDataCount > 0)
3061 {
3062 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3063 &descriptorSet.get(), 0u, DE_NULL);
3064 }
3065
3066 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3067
3068 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3069
3070 endRenderPass(vk, *cmdBuffer);
3071
3072 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3073
3074 endCommandBuffer(vk, *cmdBuffer);
3075
3076 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3077 }
3078 context.resetCommandPoolForVKSC(device, *cmdPool);
3079
3080 {
3081 const Allocation& allocResult = imageBufferResult.getAllocation();
3082 invalidateAlloc(vk, device, allocResult);
3083
3084 std::vector<const void*> datas;
3085 datas.push_back(allocResult.getHostPtr());
3086 if (!checkResult(internalData, datas, width, subgroupSize))
3087 failedIterations++;
3088 }
3089 }
3090
3091 if (0 < failedIterations)
3092 {
3093 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3094
3095 context.getTestContext().getLog()
3096 << TestLog::Message << valuesPassed << " / "
3097 << totalIterations << " values passed" << TestLog::EndMessage;
3098
3099 return tcu::TestStatus::fail("Failed!");
3100 }
3101
3102 return tcu::TestStatus::pass("OK");
3103 }
3104
getPossibleGraphicsSubgroupStages(Context & context,const vk::VkShaderStageFlags testedStages)3105 vk::VkShaderStageFlags vkt::subgroups::getPossibleGraphicsSubgroupStages (Context& context, const vk::VkShaderStageFlags testedStages)
3106 {
3107 const VkPhysicalDeviceSubgroupProperties& subgroupProperties = context.getSubgroupProperties();
3108 VkShaderStageFlags stages = testedStages & subgroupProperties.supportedStages;
3109
3110 DE_ASSERT(isAllGraphicsStages(testedStages));
3111
3112 if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
3113 {
3114 if ((stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
3115 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
3116 else
3117 stages = VK_SHADER_STAGE_FRAGMENT_BIT;
3118 }
3119
3120 if (static_cast<VkShaderStageFlags>(0u) == stages)
3121 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
3122
3123 return stages;
3124 }
3125
allStages(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3126 tcu::TestStatus vkt::subgroups::allStages (Context& context,
3127 vk::VkFormat format,
3128 const SSBOData* extraData,
3129 deUint32 extraDataCount,
3130 const void* internalData,
3131 const VerificationFunctor& checkResult,
3132 const vk::VkShaderStageFlags shaderStage)
3133 {
3134 return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage,
3135 0u, 0u, 0u, 0u, 0u, DE_NULL);
3136 }
3137
allStagesRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStageTested,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])3138 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize (Context& context,
3139 vk::VkFormat format,
3140 const SSBOData* extraDatas,
3141 deUint32 extraDatasCount,
3142 const void* internalData,
3143 const VerificationFunctor& checkResult,
3144 const vk::VkShaderStageFlags shaderStageTested,
3145 const deUint32 vertexShaderStageCreateFlags,
3146 const deUint32 tessellationControlShaderStageCreateFlags,
3147 const deUint32 tessellationEvalShaderStageCreateFlags,
3148 const deUint32 geometryShaderStageCreateFlags,
3149 const deUint32 fragmentShaderStageCreateFlags,
3150 const deUint32 requiredSubgroupSize[5])
3151 {
3152 const DeviceInterface& vk = context.getDeviceInterface();
3153 const VkDevice device = context.getDevice();
3154 const deUint32 maxWidth = getMaxWidth();
3155 vector<VkShaderStageFlagBits> stagesVector;
3156 VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
3157
3158 Move<VkShaderModule> vertexShaderModule;
3159 Move<VkShaderModule> teCtrlShaderModule;
3160 Move<VkShaderModule> teEvalShaderModule;
3161 Move<VkShaderModule> geometryShaderModule;
3162 Move<VkShaderModule> fragmentShaderModule;
3163
3164 if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3165 {
3166 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3167 }
3168 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3169 {
3170 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3171 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3172 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3173 }
3174 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3175 {
3176 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3177 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3178 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3179 }
3180 if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3181 {
3182 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3183 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3184 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3185 }
3186 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3187 {
3188 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3189 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3190 }
3191
3192 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
3193 const string vert = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
3194 const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
3195 const string tese = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
3196
3197 shaderStageRequired = shaderStageTested | shaderStageRequired;
3198
3199 vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3200 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3201 {
3202 teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3203 teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3204 }
3205 if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3206 {
3207 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3208 {
3209 // tessellation shaders output line primitives
3210 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3211 }
3212 else
3213 {
3214 // otherwise points are processed by geometry shader
3215 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3216 }
3217 }
3218 if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3219 fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3220
3221 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
3222
3223 DescriptorSetLayoutBuilder layoutBuilder;
3224
3225 // The implicit result SSBO we use to store our outputs from the shader
3226 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3227 {
3228 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3229 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3230 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
3231
3232 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
3233 }
3234
3235 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3236 {
3237 const deUint32 datasNdx = ndx - stagesCount;
3238 if (extraDatas[datasNdx].isImage())
3239 {
3240 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3241 }
3242 else
3243 {
3244 const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3245 const auto size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
3246 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
3247 }
3248
3249 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3250 initializeMemory(context, alloc, extraDatas[datasNdx]);
3251
3252 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
3253 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
3254 }
3255
3256 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3257
3258 const Unique<VkPipelineLayout> pipelineLayout(
3259 makePipelineLayout(vk, device, *descriptorSetLayout));
3260
3261 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3262 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3263 shaderStageRequired,
3264 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
3265 *renderPass,
3266 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3267 DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT,
3268 vertexShaderStageCreateFlags, tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3269 geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3270
3271 Move <VkDescriptorPool> descriptorPool;
3272 Move <VkDescriptorSet> descriptorSet;
3273
3274 if (inputBuffers.size() > 0)
3275 {
3276 DescriptorPoolBuilder poolBuilder;
3277
3278 for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
3279 {
3280 poolBuilder.addType(inputBuffers[ndx]->getType());
3281 }
3282
3283 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3284
3285 // Create descriptor set
3286 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3287
3288 DescriptorSetUpdateBuilder updateBuilder;
3289
3290 for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3291 {
3292 deUint32 binding;
3293 if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
3294 else binding = extraDatas[ndx -stagesCount].binding;
3295
3296 if (inputBuffers[ndx]->isImage())
3297 {
3298 VkDescriptorImageInfo info =
3299 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3300 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3301
3302 updateBuilder.writeSingle( *descriptorSet,
3303 DescriptorSetUpdateBuilder::Location::binding(binding),
3304 inputBuffers[ndx]->getType(), &info);
3305 }
3306 else
3307 {
3308 VkDescriptorBufferInfo info =
3309 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
3310 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3311
3312 updateBuilder.writeSingle( *descriptorSet,
3313 DescriptorSetUpdateBuilder::Location::binding(binding),
3314 inputBuffers[ndx]->getType(), &info);
3315 }
3316 }
3317
3318 updateBuilder.update(vk, device);
3319 }
3320
3321 {
3322 const VkQueue queue = context.getUniversalQueue();
3323 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3324 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3325 const deUint32 subgroupSize = getSubgroupSize(context);
3326 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3327 unsigned totalIterations = 0u;
3328 unsigned failedIterations = 0u;
3329 Image resultImage (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3330 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3331 const VkViewport viewport = makeViewport(maxWidth, 1u);
3332 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3333 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3334 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3335 const VkImageSubresourceRange subresourceRange =
3336 {
3337 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
3338 0u, //deUint32 baseMipLevel
3339 1u, //deUint32 levelCount
3340 0u, //deUint32 baseArrayLayer
3341 1u //deUint32 layerCount
3342 };
3343
3344 const VkImageMemoryBarrier colorAttachmentBarrier = makeImageMemoryBarrier(
3345 (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3346 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
3347 resultImage.getImage(), subresourceRange);
3348
3349 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3350 {
3351 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3352 {
3353 // re-init the data
3354 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3355 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3356 }
3357
3358 totalIterations++;
3359
3360 beginCommandBuffer(vk, *cmdBuffer);
3361
3362 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
3363
3364 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3365
3366 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3367
3368 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3369
3370 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3371
3372 if (stagesCount + extraDatasCount > 0)
3373 vk.cmdBindDescriptorSets(*cmdBuffer,
3374 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3375 &descriptorSet.get(), 0u, DE_NULL);
3376
3377 vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3378
3379 endRenderPass(vk, *cmdBuffer);
3380
3381 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3382
3383 endCommandBuffer(vk, *cmdBuffer);
3384
3385 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3386
3387 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3388 {
3389 std::vector<const void*> datas;
3390 if (!inputBuffers[ndx]->isImage())
3391 {
3392 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
3393 invalidateAlloc(vk, device, resultAlloc);
3394 // we always have our result data first
3395 datas.push_back(resultAlloc.getHostPtr());
3396 }
3397
3398 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3399 {
3400 const deUint32 datasNdx = index - stagesCount;
3401 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3402 {
3403 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
3404 invalidateAlloc(vk, device, resultAlloc);
3405 // we always have our result data first
3406 datas.push_back(resultAlloc.getHostPtr());
3407 }
3408 }
3409
3410 // Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3411 const bool multiCall = ( stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT ||
3412 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
3413 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT ||
3414 stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT );
3415 const deUint32 usedWidth = ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3416
3417 if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3418 failedIterations++;
3419 }
3420 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3421 {
3422 std::vector<const void*> datas;
3423 const Allocation& resultAlloc = imageBufferResult.getAllocation();
3424 invalidateAlloc(vk, device, resultAlloc);
3425
3426 // we always have our result data first
3427 datas.push_back(resultAlloc.getHostPtr());
3428
3429 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3430 {
3431 const deUint32 datasNdx = index - stagesCount;
3432 if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3433 {
3434 const Allocation& alloc = inputBuffers[index]->getAllocation();
3435 invalidateAlloc(vk, device, alloc);
3436 // we always have our result data first
3437 datas.push_back(alloc.getHostPtr());
3438 }
3439 }
3440
3441 if (!checkResult(internalData, datas, width, subgroupSize, false))
3442 failedIterations++;
3443 }
3444
3445 context.resetCommandPoolForVKSC(device, *cmdPool);
3446 }
3447
3448 if (0 < failedIterations)
3449 {
3450 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3451
3452 context.getTestContext().getLog()
3453 << TestLog::Message << valuesPassed << " / "
3454 << totalIterations << " values passed" << TestLog::EndMessage;
3455
3456 return tcu::TestStatus::fail("Failed!");
3457 }
3458 }
3459
3460 return tcu::TestStatus::pass("OK");
3461 }
3462
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)3463 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest (Context& context,
3464 vk::VkFormat format,
3465 const SSBOData* extraData,
3466 deUint32 extraDataCount,
3467 const void* internalData,
3468 subgroups::CheckResult checkResult)
3469 {
3470 return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
3471 }
3472
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 vertexShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3473 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize (Context& context,
3474 vk::VkFormat format,
3475 const SSBOData* extraData,
3476 deUint32 extraDataCount,
3477 const void* internalData,
3478 subgroups::CheckResult checkResult,
3479 const deUint32 vertexShaderStageCreateFlags,
3480 const deUint32 requiredSubgroupSize)
3481 {
3482 const DeviceInterface& vk = context.getDeviceInterface();
3483 const VkDevice device = context.getDevice();
3484 const VkQueue queue = context.getUniversalQueue();
3485 const deUint32 maxWidth = getMaxWidth();
3486 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3487 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
3488 DescriptorSetLayoutBuilder layoutBuilder;
3489 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3490 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3491 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
3492 const VkVertexInputBindingDescription vertexInputBinding =
3493 {
3494 0u, // binding;
3495 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
3496 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
3497 };
3498 const VkVertexInputAttributeDescription vertexInputAttribute =
3499 {
3500 0u,
3501 0u,
3502 VK_FORMAT_R32G32B32A32_SFLOAT,
3503 0u
3504 };
3505
3506 for (deUint32 i = 0u; i < extraDataCount; i++)
3507 {
3508 if (extraData[i].isImage())
3509 {
3510 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
3511 }
3512 else
3513 {
3514 DE_ASSERT(extraData[i].isUBO());
3515 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3516 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3517 }
3518 const Allocation& alloc = inputBuffers[i]->getAllocation();
3519 initializeMemory(context, alloc, extraData[i]);
3520 }
3521
3522 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3523 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3524
3525 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
3526
3527 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
3528
3529 const deUint32 requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3530 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
3531 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3532 *vertexShaderModule, *fragmentShaderModule,
3533 DE_NULL, DE_NULL, DE_NULL,
3534 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3535 &vertexInputBinding, &vertexInputAttribute, true, format,
3536 vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3537 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3538 DescriptorPoolBuilder poolBuilder;
3539 DescriptorSetUpdateBuilder updateBuilder;
3540
3541
3542 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3543 poolBuilder.addType(inputBuffers[ndx]->getType());
3544
3545 Move <VkDescriptorPool> descriptorPool;
3546 Move <VkDescriptorSet> descriptorSet;
3547
3548 if (extraDataCount > 0)
3549 {
3550 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3551 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3552 }
3553
3554 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3555 {
3556 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3557 initializeMemory(context, alloc, extraData[ndx]);
3558 }
3559
3560 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3561 {
3562 if (inputBuffers[buffersNdx]->isImage())
3563 {
3564 VkDescriptorImageInfo info =
3565 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3566 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3567
3568 updateBuilder.writeSingle(*descriptorSet,
3569 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3570 inputBuffers[buffersNdx]->getType(), &info);
3571 }
3572 else
3573 {
3574 VkDescriptorBufferInfo info =
3575 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3576 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3577
3578 updateBuilder.writeSingle(*descriptorSet,
3579 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3580 inputBuffers[buffersNdx]->getType(), &info);
3581 }
3582 }
3583 updateBuilder.update(vk, device);
3584
3585 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3586
3587 const deUint32 subgroupSize = getSubgroupSize(context);
3588
3589 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3590
3591 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
3592 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3593
3594 unsigned totalIterations = 0u;
3595 unsigned failedIterations = 0u;
3596
3597 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3598
3599 {
3600 const Allocation& alloc = vertexBuffer.getAllocation();
3601 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3602 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3603 float leftHandPosition = -1.0f;
3604
3605 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3606 {
3607 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3608 leftHandPosition += pixelSize;
3609 }
3610
3611 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3612 flushAlloc(vk, device, alloc);
3613 }
3614
3615 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3616 const VkViewport viewport = makeViewport(maxWidth, 1u);
3617 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3618 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3619 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3620 const VkDeviceSize vertexBufferOffset = 0u;
3621
3622 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3623 {
3624 totalIterations++;
3625
3626 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3627 {
3628 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3629 initializeMemory(context, alloc, extraData[ndx]);
3630 }
3631
3632 beginCommandBuffer(vk, *cmdBuffer);
3633 {
3634 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3635
3636 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3637
3638 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3639
3640 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3641
3642 if (extraDataCount > 0)
3643 {
3644 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3645 &descriptorSet.get(), 0u, DE_NULL);
3646 }
3647
3648 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3649
3650 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3651
3652 endRenderPass(vk, *cmdBuffer);
3653
3654 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3655
3656 endCommandBuffer(vk, *cmdBuffer);
3657
3658 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3659 }
3660 context.resetCommandPoolForVKSC(device, *cmdPool);
3661
3662 {
3663 const Allocation& allocResult = imageBufferResult.getAllocation();
3664 invalidateAlloc(vk, device, allocResult);
3665
3666 std::vector<const void*> datas;
3667 datas.push_back(allocResult.getHostPtr());
3668 if (!checkResult(internalData, datas, width, subgroupSize))
3669 failedIterations++;
3670 }
3671 }
3672
3673 if (0 < failedIterations)
3674 {
3675 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3676
3677 context.getTestContext().getLog()
3678 << TestLog::Message << valuesPassed << " / "
3679 << totalIterations << " values passed" << TestLog::EndMessage;
3680
3681 return tcu::TestStatus::fail("Failed!");
3682 }
3683
3684 return tcu::TestStatus::pass("OK");
3685 }
3686
makeFragmentFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult)3687 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest (Context& context,
3688 VkFormat format,
3689 const SSBOData* extraDatas,
3690 deUint32 extraDatasCount,
3691 const void* internalData,
3692 CheckResultFragment checkResult)
3693 {
3694 return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData, checkResult, 0u, 0u);
3695 }
3696
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3697 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize (Context& context,
3698 VkFormat format,
3699 const SSBOData* extraDatas,
3700 deUint32 extraDatasCount,
3701 const void* internalData,
3702 CheckResultFragment checkResult,
3703 const deUint32 fragmentShaderStageCreateFlags,
3704 const deUint32 requiredSubgroupSize)
3705 {
3706 const DeviceInterface& vk = context.getDeviceInterface();
3707 const VkDevice device = context.getDevice();
3708 const VkQueue queue = context.getUniversalQueue();
3709 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3710 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3711 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3712 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers (extraDatasCount);
3713
3714 for (deUint32 i = 0; i < extraDatasCount; i++)
3715 {
3716 if (extraDatas[i].isImage())
3717 {
3718 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3719 }
3720 else
3721 {
3722 DE_ASSERT(extraDatas[i].isUBO());
3723
3724 const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3725
3726 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3727 }
3728
3729 const Allocation& alloc = inputBuffers[i]->getAllocation();
3730
3731 initializeMemory(context, alloc, extraDatas[i]);
3732 }
3733
3734 DescriptorSetLayoutBuilder layoutBuilder;
3735
3736 for (deUint32 i = 0; i < extraDatasCount; i++)
3737 {
3738 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3739 }
3740
3741 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3742 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3743 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3744 const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3745 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context,
3746 *pipelineLayout,
3747 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3748 *vertexShaderModule,
3749 *fragmentShaderModule,
3750 DE_NULL,
3751 DE_NULL,
3752 DE_NULL,
3753 *renderPass,
3754 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3755 DE_NULL,
3756 DE_NULL,
3757 true,
3758 VK_FORMAT_R32G32B32A32_SFLOAT,
3759 0u,
3760 0u,
3761 0u,
3762 0u,
3763 fragmentShaderStageCreateFlags,
3764 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3765 DescriptorPoolBuilder poolBuilder;
3766
3767 // To stop validation complaining, always add at least one type to pool.
3768 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3769 for (deUint32 i = 0; i < extraDatasCount; i++)
3770 {
3771 poolBuilder.addType(inputBuffers[i]->getType());
3772 }
3773
3774 Move<VkDescriptorPool> descriptorPool;
3775 // Create descriptor set
3776 Move<VkDescriptorSet> descriptorSet;
3777
3778 if (extraDatasCount > 0)
3779 {
3780 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3781
3782 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3783 }
3784
3785 DescriptorSetUpdateBuilder updateBuilder;
3786
3787 for (deUint32 i = 0; i < extraDatasCount; i++)
3788 {
3789 if (inputBuffers[i]->isImage())
3790 {
3791 const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3792
3793 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3794 }
3795 else
3796 {
3797 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3798
3799 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3800 }
3801 }
3802
3803 if (extraDatasCount > 0)
3804 updateBuilder.update(vk, device);
3805
3806 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3807 const deUint32 subgroupSize = getSubgroupSize(context);
3808 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3809 unsigned totalIterations = 0;
3810 unsigned failedIterations = 0;
3811
3812 for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3813 {
3814 for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3815 {
3816 totalIterations++;
3817
3818 // re-init the data
3819 for (deUint32 i = 0; i < extraDatasCount; i++)
3820 {
3821 const Allocation& alloc = inputBuffers[i]->getAllocation();
3822
3823 initializeMemory(context, alloc, extraDatas[i]);
3824 }
3825
3826 const VkDeviceSize formatSize = getFormatSizeInBytes(format);
3827 const VkDeviceSize resultImageSizeInBytes = width * height * formatSize;
3828 Image resultImage (context, width, height, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3829 Buffer resultBuffer (context, resultImageSizeInBytes, VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3830 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3831 VkViewport viewport = makeViewport(width, height);
3832 VkRect2D scissor = {{0, 0}, {width, height}};
3833
3834 beginCommandBuffer(vk, *cmdBuffer);
3835
3836 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3837
3838 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3839
3840 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3841
3842 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3843
3844 if (extraDatasCount > 0)
3845 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
3846
3847 vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3848
3849 endRenderPass(vk, *cmdBuffer);
3850
3851 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3852
3853 endCommandBuffer(vk, *cmdBuffer);
3854
3855 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3856
3857 std::vector<const void*> datas;
3858 {
3859 const Allocation& resultAlloc = resultBuffer.getAllocation();
3860 invalidateAlloc(vk, device, resultAlloc);
3861
3862 // we always have our result data first
3863 datas.push_back(resultAlloc.getHostPtr());
3864 }
3865
3866 if (!checkResult(internalData, datas, width, height, subgroupSize))
3867 {
3868 failedIterations++;
3869 }
3870
3871 context.resetCommandPoolForVKSC(device, *cmdPool);
3872 }
3873 }
3874
3875 if (0 < failedIterations)
3876 {
3877 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3878
3879 context.getTestContext().getLog()
3880 << TestLog::Message << valuesPassed << " / "
3881 << totalIterations << " values passed" << TestLog::EndMessage;
3882
3883 return tcu::TestStatus::fail("Failed!");
3884 }
3885
3886 return tcu::TestStatus::pass("OK");
3887 }
3888
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize)3889 Move<VkPipeline> makeComputePipeline (Context& context,
3890 const VkPipelineLayout pipelineLayout,
3891 const VkShaderModule shaderModule,
3892 const deUint32 pipelineShaderStageFlags,
3893 const deUint32 pipelineCreateFlags,
3894 VkPipeline basePipelineHandle,
3895 deUint32 localSizeX,
3896 deUint32 localSizeY,
3897 deUint32 localSizeZ,
3898 deUint32 requiredSubgroupSize)
3899 {
3900 const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
3901 const vk::VkSpecializationMapEntry entries[3] =
3902 {
3903 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
3904 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
3905 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3906 };
3907 const vk::VkSpecializationInfo info =
3908 {
3909 /* mapEntryCount = */ 3,
3910 /* pMapEntries = */ entries,
3911 /* dataSize = */ sizeof(localSize),
3912 /* pData = */ localSize
3913 };
3914 const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo =
3915 {
3916 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType sType;
3917 DE_NULL, // void* pNext;
3918 requiredSubgroupSize // uint32_t requiredSubgroupSize;
3919 };
3920 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
3921 {
3922 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3923 (requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL), // const void* pNext;
3924 pipelineShaderStageFlags, // VkPipelineShaderStageCreateFlags flags;
3925 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
3926 shaderModule, // VkShaderModule module;
3927 "main", // const char* pName;
3928 &info, // const VkSpecializationInfo* pSpecializationInfo;
3929 };
3930 const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
3931 {
3932 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
3933 DE_NULL, // const void* pNext;
3934 pipelineCreateFlags, // VkPipelineCreateFlags flags;
3935 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
3936 pipelineLayout, // VkPipelineLayout layout;
3937 basePipelineHandle, // VkPipeline basePipelineHandle;
3938 -1, // deInt32 basePipelineIndex;
3939 };
3940
3941 return createComputePipeline(context.getDeviceInterface(), context.getDevice(), DE_NULL, &pipelineCreateInfo);
3942 }
3943
3944 #ifndef CTS_USES_VULKANSC
makeMeshPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule taskModule,const VkShaderModule meshModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize,const VkRenderPass renderPass)3945 Move<VkPipeline> makeMeshPipeline (Context& context,
3946 const VkPipelineLayout pipelineLayout,
3947 const VkShaderModule taskModule,
3948 const VkShaderModule meshModule,
3949 const deUint32 pipelineShaderStageFlags,
3950 const deUint32 pipelineCreateFlags,
3951 VkPipeline basePipelineHandle,
3952 deUint32 localSizeX,
3953 deUint32 localSizeY,
3954 deUint32 localSizeZ,
3955 deUint32 requiredSubgroupSize,
3956 const VkRenderPass renderPass)
3957 {
3958 const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
3959 const vk::VkSpecializationMapEntry entries[3] =
3960 {
3961 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
3962 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
3963 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3964 };
3965 const vk::VkSpecializationInfo info =
3966 {
3967 /* mapEntryCount = */ 3,
3968 /* pMapEntries = */ entries,
3969 /* dataSize = */ sizeof(localSize),
3970 /* pData = */ localSize
3971 };
3972 const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo =
3973 {
3974 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType sType;
3975 DE_NULL, // void* pNext;
3976 requiredSubgroupSize // uint32_t requiredSubgroupSize;
3977 };
3978
3979 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* pSubgroupSizeCreateInfo = ((requiredSubgroupSize != 0u) ? &subgroupSizeCreateInfo : nullptr);
3980
3981 std::vector<VkPipelineShaderStageCreateInfo> shaderStageParams;
3982 vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
3983 {
3984 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3985 nullptr, // const void* pNext;
3986 pipelineShaderStageFlags, // VkPipelineShaderStageCreateFlags flags;
3987 VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM, // VkShaderStageFlagBits stage;
3988 DE_NULL, // VkShaderModule module;
3989 "main", // const char* pName;
3990 &info, // const VkSpecializationInfo* pSpecializationInfo;
3991 };
3992
3993 if (taskModule != DE_NULL)
3994 {
3995 pipelineShaderStageParams.module = taskModule;
3996 pipelineShaderStageParams.pNext = pSubgroupSizeCreateInfo;
3997 pipelineShaderStageParams.stage = VK_SHADER_STAGE_TASK_BIT_EXT;
3998 shaderStageParams.push_back(pipelineShaderStageParams);
3999 }
4000
4001 if (meshModule != DE_NULL)
4002 {
4003 pipelineShaderStageParams.module = meshModule;
4004 pipelineShaderStageParams.pNext = ((taskModule == DE_NULL) ? pSubgroupSizeCreateInfo : nullptr);
4005 pipelineShaderStageParams.stage = VK_SHADER_STAGE_MESH_BIT_EXT;
4006 shaderStageParams.push_back(pipelineShaderStageParams);
4007 }
4008
4009 const std::vector<VkViewport> viewports (1u, makeViewport(1u, 1u));
4010 const std::vector<VkRect2D> scissors (1u, makeRect2D(1u, 1u));
4011
4012 return makeGraphicsPipeline(context.getDeviceInterface(), context.getDevice(), basePipelineHandle, pipelineLayout, pipelineCreateFlags, shaderStageParams, renderPass, viewports, scissors);
4013 }
4014 #endif // CTS_USES_VULKANSC
4015
makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,deUint32 inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4016 tcu::TestStatus makeComputeOrMeshTestRequiredSubgroupSize (ComputeLike testType,
4017 Context& context,
4018 VkFormat format,
4019 const vkt::subgroups::SSBOData* inputs,
4020 deUint32 inputsCount,
4021 const void* internalData,
4022 vkt::subgroups::CheckResultCompute checkResult,
4023 const deUint32 pipelineShaderStageCreateFlags,
4024 const deUint32 numWorkgroups[3],
4025 const deBool isRequiredSubgroupSize,
4026 const deUint32 subgroupSize,
4027 const deUint32 localSizesToTest[][3],
4028 const deUint32 localSizesToTestCount)
4029 {
4030 const DeviceInterface& vk = context.getDeviceInterface();
4031 const VkDevice device = context.getDevice();
4032 const VkQueue queue = context.getUniversalQueue();
4033 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
4034 #ifndef CTS_USES_VULKANSC
4035 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
4036 #else
4037 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties = context.getSubgroupSizeControlPropertiesEXT();
4038 #endif // CTS_USES_VULKANSC
4039 const VkDeviceSize elementSize = getFormatSizeInBytes(format);
4040 const VkDeviceSize maxSubgroupSize = isRequiredSubgroupSize
4041 ? deMax32(subgroupSizeControlProperties.maxSubgroupSize, vkt::subgroups::maxSupportedSubgroupSize())
4042 : vkt::subgroups::maxSupportedSubgroupSize();
4043 const VkDeviceSize resultBufferSize = maxSubgroupSize * maxSubgroupSize * maxSubgroupSize;
4044 const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
4045 Buffer resultBuffer (context, resultBufferSizeInBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4046 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers (inputsCount);
4047 const auto shaderStageFlags = ((testType == ComputeLike::COMPUTE)
4048 ? VK_SHADER_STAGE_COMPUTE_BIT
4049 #ifndef CTS_USES_VULKANSC
4050 : (VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT));
4051 #else
4052 : 0);
4053 #endif // CTS_USES_VULKANSC
4054 const auto pipelineBindPoint = ((testType == ComputeLike::COMPUTE)
4055 ? VK_PIPELINE_BIND_POINT_COMPUTE
4056 : VK_PIPELINE_BIND_POINT_GRAPHICS);
4057 const auto pipelineStage = ((testType == ComputeLike::COMPUTE)
4058 ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
4059 #ifndef CTS_USES_VULKANSC
4060 : (VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT));
4061 #else
4062 : 0);
4063 #endif // CTS_USES_VULKANSC
4064 const auto renderArea = makeRect2D(1u, 1u);
4065
4066 std::vector<tcu::UVec3> usedLocalSizes;
4067 for (deUint32 i = 0; i < localSizesToTestCount; ++i)
4068 {
4069 usedLocalSizes.push_back(tcu::UVec3(localSizesToTest[i][0], localSizesToTest[i][1], localSizesToTest[i][2]));
4070 }
4071
4072 for (deUint32 i = 0; i < inputsCount; i++)
4073 {
4074 if (inputs[i].isImage())
4075 {
4076 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
4077 }
4078 else
4079 {
4080 const auto usage = (inputs[i].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4081 const auto size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4082 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4083 }
4084
4085 const Allocation& alloc = inputBuffers[i]->getAllocation();
4086
4087 initializeMemory(context, alloc, inputs[i]);
4088 }
4089
4090 DescriptorSetLayoutBuilder layoutBuilder;
4091 layoutBuilder.addBinding(
4092 resultBuffer.getType(), 1, shaderStageFlags, DE_NULL);
4093
4094 for (deUint32 i = 0; i < inputsCount; i++)
4095 {
4096 layoutBuilder.addBinding(
4097 inputBuffers[i]->getType(), 1, shaderStageFlags, DE_NULL);
4098 }
4099
4100 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
4101 layoutBuilder.build(vk, device));
4102
4103 Move<VkShaderModule> compShader;
4104 Move<VkShaderModule> meshShader;
4105 Move<VkShaderModule> taskShader;
4106 const auto& binaries = context.getBinaryCollection();
4107
4108 if (testType == ComputeLike::COMPUTE)
4109 {
4110 compShader = createShaderModule(vk, device, binaries.get("comp"));
4111 }
4112 else if (testType == ComputeLike::MESH)
4113 {
4114 meshShader = createShaderModule(vk, device, binaries.get("mesh"));
4115 if (binaries.contains("task"))
4116 taskShader = createShaderModule(vk, device, binaries.get("task"));
4117 }
4118 else
4119 {
4120 DE_ASSERT(false);
4121 }
4122
4123 const Unique<VkPipelineLayout> pipelineLayout(
4124 makePipelineLayout(vk, device, *descriptorSetLayout));
4125
4126 DescriptorPoolBuilder poolBuilder;
4127
4128 poolBuilder.addType(resultBuffer.getType());
4129
4130 for (deUint32 i = 0; i < inputsCount; i++)
4131 {
4132 poolBuilder.addType(inputBuffers[i]->getType());
4133 }
4134
4135 const Unique<VkDescriptorPool> descriptorPool (poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
4136 const Unique<VkDescriptorSet> descriptorSet (makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
4137 const VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
4138 DescriptorSetUpdateBuilder updateBuilder;
4139
4140 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
4141
4142 for (deUint32 i = 0; i < inputsCount; i++)
4143 {
4144 if (inputBuffers[i]->isImage())
4145 {
4146 const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4147
4148 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
4149 }
4150 else
4151 {
4152 vk::VkDeviceSize size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4153 VkDescriptorBufferInfo info = makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
4154
4155 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
4156 }
4157 }
4158
4159 updateBuilder.update(vk, device);
4160
4161 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
4162 unsigned totalIterations = 0;
4163 unsigned failedIterations = 0;
4164 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
4165 std::vector<de::SharedPtr<Move<VkPipeline>>> pipelines (localSizesToTestCount);
4166 const auto reqSubgroupSize = (isRequiredSubgroupSize ? subgroupSize : 0u);
4167 Move<VkRenderPass> renderPass;
4168 Move<VkFramebuffer> framebuffer;
4169
4170 if (testType == ComputeLike::MESH)
4171 {
4172 renderPass = makeRenderPass(vk, device);
4173 framebuffer = makeFramebuffer(vk, device, renderPass.get(), 0u, nullptr, renderArea.extent.width, renderArea.extent.height);
4174 }
4175
4176 context.getTestContext().touchWatchdog();
4177 {
4178 if (testType == ComputeLike::COMPUTE)
4179 {
4180 pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
4181 *pipelineLayout,
4182 *compShader,
4183 pipelineShaderStageCreateFlags,
4184 #ifndef CTS_USES_VULKANSC
4185 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4186 #else
4187 0u,
4188 #endif // CTS_USES_VULKANSC
4189 (VkPipeline) DE_NULL,
4190 usedLocalSizes[0][0],
4191 usedLocalSizes[0][1],
4192 usedLocalSizes[0][2],
4193 reqSubgroupSize)));
4194 }
4195 #ifndef CTS_USES_VULKANSC
4196 else if (testType == ComputeLike::MESH)
4197 {
4198 pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(context,
4199 pipelineLayout.get(),
4200 taskShader.get(),
4201 meshShader.get(),
4202 pipelineShaderStageCreateFlags,
4203 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4204 DE_NULL,
4205 usedLocalSizes[0][0],
4206 usedLocalSizes[0][1],
4207 usedLocalSizes[0][2],
4208 reqSubgroupSize,
4209 renderPass.get())));
4210 }
4211 #endif // CTS_USES_VULKANSC
4212 else
4213 {
4214 DE_ASSERT(false);
4215 }
4216 }
4217 context.getTestContext().touchWatchdog();
4218
4219 for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
4220 {
4221 const deUint32 nextX = usedLocalSizes[index][0];
4222 const deUint32 nextY = usedLocalSizes[index][1];
4223 const deUint32 nextZ = usedLocalSizes[index][2];
4224
4225 context.getTestContext().touchWatchdog();
4226 {
4227 if (testType == ComputeLike::COMPUTE)
4228 {
4229 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
4230 *pipelineLayout,
4231 *compShader,
4232 pipelineShaderStageCreateFlags,
4233 #ifndef CTS_USES_VULKANSC
4234 VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4235 #else
4236 0u,
4237 #endif // CTS_USES_VULKANSC
4238 **pipelines[0],
4239 nextX,
4240 nextY,
4241 nextZ,
4242 reqSubgroupSize)));
4243 }
4244 #ifndef CTS_USES_VULKANSC
4245 else if (testType == ComputeLike::MESH)
4246 {
4247 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(context,
4248 pipelineLayout.get(),
4249 taskShader.get(),
4250 meshShader.get(),
4251 pipelineShaderStageCreateFlags,
4252 VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4253 pipelines[0].get()->get(),
4254 nextX,
4255 nextY,
4256 nextZ,
4257 reqSubgroupSize,
4258 renderPass.get())));
4259 }
4260 #endif // CTS_USES_VULKANSC
4261 else
4262 {
4263 DE_ASSERT(false);
4264 }
4265 }
4266 context.getTestContext().touchWatchdog();
4267 }
4268
4269 for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
4270 {
4271 // we are running one test
4272 totalIterations++;
4273
4274 beginCommandBuffer(vk, *cmdBuffer);
4275 {
4276 if (testType == ComputeLike::MESH)
4277 beginRenderPass(vk, *cmdBuffer, renderPass.get(), framebuffer.get(), renderArea);
4278
4279 vk.cmdBindPipeline(*cmdBuffer, pipelineBindPoint, **pipelines[index]);
4280
4281 vk.cmdBindDescriptorSets(*cmdBuffer, pipelineBindPoint, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
4282
4283 if (testType == ComputeLike::COMPUTE)
4284 vk.cmdDispatch(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4285 #ifndef CTS_USES_VULKANSC
4286 else if (testType == ComputeLike::MESH)
4287 vk.cmdDrawMeshTasksEXT(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4288 //vk.cmdDrawMeshTasksNV(*cmdBuffer, numWorkgroups[0], 0);
4289 #endif // CTS_USES_VULKANSC
4290 else
4291 DE_ASSERT(false);
4292
4293 if (testType == ComputeLike::MESH)
4294 endRenderPass(vk, *cmdBuffer);
4295 }
4296
4297 // Make shader writes available.
4298 const auto postShaderBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4299 vk.cmdPipelineBarrier(*cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postShaderBarrier, 0u, nullptr, 0u, nullptr);
4300
4301 endCommandBuffer(vk, *cmdBuffer);
4302
4303 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4304
4305 std::vector<const void*> datas;
4306
4307 {
4308 const Allocation& resultAlloc = resultBuffer.getAllocation();
4309 invalidateAlloc(vk, device, resultAlloc);
4310
4311 // we always have our result data first
4312 datas.push_back(resultAlloc.getHostPtr());
4313 }
4314
4315 for (deUint32 i = 0; i < inputsCount; i++)
4316 {
4317 if (!inputBuffers[i]->isImage())
4318 {
4319 const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
4320 invalidateAlloc(vk, device, resultAlloc);
4321
4322 // we always have our result data first
4323 datas.push_back(resultAlloc.getHostPtr());
4324 }
4325 }
4326
4327 if (!checkResult(internalData, datas, numWorkgroups, usedLocalSizes[index].getPtr(), subgroupSize))
4328 {
4329 failedIterations++;
4330 }
4331 else
4332 {
4333 failedIterations = failedIterations + 0;
4334 }
4335
4336 context.resetCommandPoolForVKSC(device, *cmdPool);
4337 }
4338
4339 if (0 < failedIterations)
4340 {
4341 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4342
4343 context.getTestContext().getLog()
4344 << TestLog::Message << valuesPassed << " / "
4345 << totalIterations << " values passed" << TestLog::EndMessage;
4346
4347 return tcu::TestStatus::fail("Failed!");
4348 }
4349
4350 return tcu::TestStatus::pass("OK");
4351 }
4352
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4353 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize (Context& context,
4354 VkFormat format,
4355 const SSBOData* inputs,
4356 deUint32 inputsCount,
4357 const void* internalData,
4358 CheckResultCompute checkResult,
4359 const deUint32 pipelineShaderStageCreateFlags,
4360 const deUint32 numWorkgroups[3],
4361 const deBool isRequiredSubgroupSize,
4362 const deUint32 subgroupSize,
4363 const deUint32 localSizesToTest[][3],
4364 const deUint32 localSizesToTestCount)
4365 {
4366 return makeComputeOrMeshTestRequiredSubgroupSize(
4367 ComputeLike::COMPUTE,
4368 context,
4369 format,
4370 inputs,
4371 inputsCount,
4372 internalData,
4373 checkResult,
4374 pipelineShaderStageCreateFlags,
4375 numWorkgroups,
4376 isRequiredSubgroupSize,
4377 subgroupSize,
4378 localSizesToTest,
4379 localSizesToTestCount);
4380 }
4381
makeMeshTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4382 tcu::TestStatus vkt::subgroups::makeMeshTestRequiredSubgroupSize (Context& context,
4383 VkFormat format,
4384 const SSBOData* inputs,
4385 deUint32 inputsCount,
4386 const void* internalData,
4387 CheckResultCompute checkResult,
4388 const deUint32 pipelineShaderStageCreateFlags,
4389 const deUint32 numWorkgroups[3],
4390 const deBool isRequiredSubgroupSize,
4391 const deUint32 subgroupSize,
4392 const deUint32 localSizesToTest[][3],
4393 const deUint32 localSizesToTestCount)
4394 {
4395 return makeComputeOrMeshTestRequiredSubgroupSize(
4396 ComputeLike::MESH,
4397 context,
4398 format,
4399 inputs,
4400 inputsCount,
4401 internalData,
4402 checkResult,
4403 pipelineShaderStageCreateFlags,
4404 numWorkgroups,
4405 isRequiredSubgroupSize,
4406 subgroupSize,
4407 localSizesToTest,
4408 localSizesToTestCount);
4409 }
4410
makeComputeOrMeshTest(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,deUint32 inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4411 tcu::TestStatus makeComputeOrMeshTest (ComputeLike testType,
4412 Context& context,
4413 VkFormat format,
4414 const vkt::subgroups::SSBOData* inputs,
4415 deUint32 inputsCount,
4416 const void* internalData,
4417 vkt::subgroups::CheckResultCompute checkResult,
4418 deUint32 requiredSubgroupSize,
4419 const deUint32 pipelineShaderStageCreateFlags)
4420 {
4421 const uint32_t numWorkgroups[3] = {4, 2, 2};
4422 const bool isRequiredSubgroupSize = (requiredSubgroupSize != 0u);
4423 const uint32_t subgroupSize = (isRequiredSubgroupSize ? requiredSubgroupSize : vkt::subgroups::getSubgroupSize(context));
4424
4425 const deUint32 localSizesToTestCount = 8;
4426 deUint32 localSizesToTest[localSizesToTestCount][3] =
4427 {
4428 {1, 1, 1},
4429 {subgroupSize, 1, 1},
4430 {1, subgroupSize, 1},
4431 {1, 1, subgroupSize},
4432 {32, 4, 1},
4433 {1, 4, 32},
4434 {3, 5, 7},
4435 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
4436 };
4437
4438 if (testType == ComputeLike::COMPUTE)
4439 return makeComputeTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4440 numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4441 else
4442 return makeMeshTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4443 numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4444 }
4445
makeComputeTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4446 tcu::TestStatus vkt::subgroups::makeComputeTest (Context& context,
4447 VkFormat format,
4448 const SSBOData* inputs,
4449 deUint32 inputsCount,
4450 const void* internalData,
4451 CheckResultCompute checkResult,
4452 deUint32 requiredSubgroupSize,
4453 const deUint32 pipelineShaderStageCreateFlags)
4454 {
4455 return makeComputeOrMeshTest(ComputeLike::COMPUTE, context, format, inputs, inputsCount, internalData, checkResult, requiredSubgroupSize, pipelineShaderStageCreateFlags);
4456 }
4457
makeMeshTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4458 tcu::TestStatus vkt::subgroups::makeMeshTest (Context& context,
4459 VkFormat format,
4460 const SSBOData* inputs,
4461 deUint32 inputsCount,
4462 const void* internalData,
4463 CheckResultCompute checkResult,
4464 deUint32 requiredSubgroupSize,
4465 const deUint32 pipelineShaderStageCreateFlags)
4466 {
4467 return makeComputeOrMeshTest(ComputeLike::MESH, context, format, inputs, inputsCount, internalData, checkResult, requiredSubgroupSize, pipelineShaderStageCreateFlags);
4468 }
4469
checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)4470 static inline void checkShaderStageSetValidity (const VkShaderStageFlags shaderStages)
4471 {
4472 if (shaderStages == 0)
4473 TCU_THROW(InternalError, "Shader stage is not specified");
4474
4475 // It can actually be only 1 or 0.
4476 const deUint32 exclusivePipelinesCount = (isAllComputeStages(shaderStages) ? 1 : 0)
4477 + (isAllGraphicsStages(shaderStages) ? 1 : 0)
4478 #ifndef CTS_USES_VULKANSC
4479 + (isAllRayTracingStages(shaderStages) ? 1 : 0)
4480 + (isAllMeshShadingStages(shaderStages) ? 1 : 0)
4481 #endif // CTS_USES_VULKANSC
4482 ;
4483
4484 if (exclusivePipelinesCount != 1)
4485 TCU_THROW(InternalError, "Mix of shaders from different pipelines is detected");
4486 }
4487
supportedCheckShader(Context & context,const VkShaderStageFlags shaderStages)4488 void vkt::subgroups::supportedCheckShader (Context& context, const VkShaderStageFlags shaderStages)
4489 {
4490 checkShaderStageSetValidity(shaderStages);
4491
4492 if ((shaderStages & VK_SHADER_STAGE_GEOMETRY_BIT) != 0)
4493 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_GEOMETRY_SHADER);
4494
4495 if ((context.getSubgroupProperties().supportedStages & shaderStages) == 0)
4496 {
4497 if (isAllComputeStages(shaderStages))
4498 TCU_FAIL("Compute shader is required to support subgroup operations");
4499 else
4500 TCU_THROW(NotSupportedError, "Subgroup support is not available for test shader stage(s)");
4501 }
4502
4503 #ifndef CTS_USES_VULKANSC
4504 if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & shaderStages &&
4505 context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4506 !context.getPortabilitySubsetFeatures().tessellationIsolines)
4507 {
4508 TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4509 }
4510 #endif // CTS_USES_VULKANSC
4511 }
4512
4513
4514 namespace vkt
4515 {
4516 namespace subgroups
4517 {
4518 typedef std::vector< de::SharedPtr<BufferOrImage> > vectorBufferOrImage;
4519
4520 enum ShaderGroups
4521 {
4522 FIRST_GROUP = 0,
4523 RAYGEN_GROUP = FIRST_GROUP,
4524 MISS_GROUP,
4525 HIT_GROUP,
4526 CALL_GROUP,
4527 GROUP_COUNT
4528 };
4529
getAllRayTracingFormats()4530 const std::vector<vk::VkFormat> getAllRayTracingFormats()
4531 {
4532 std::vector<VkFormat> formats;
4533
4534 formats.push_back(VK_FORMAT_R8G8B8_SINT);
4535 formats.push_back(VK_FORMAT_R8_UINT);
4536 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
4537 formats.push_back(VK_FORMAT_R16G16B16_SINT);
4538 formats.push_back(VK_FORMAT_R16_UINT);
4539 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
4540 formats.push_back(VK_FORMAT_R32G32B32_SINT);
4541 formats.push_back(VK_FORMAT_R32_UINT);
4542 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
4543 formats.push_back(VK_FORMAT_R64G64B64_SINT);
4544 formats.push_back(VK_FORMAT_R64_UINT);
4545 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
4546 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
4547 formats.push_back(VK_FORMAT_R32_SFLOAT);
4548 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
4549 formats.push_back(VK_FORMAT_R64_SFLOAT);
4550 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
4551 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
4552 formats.push_back(VK_FORMAT_R8_USCALED);
4553 formats.push_back(VK_FORMAT_R8G8_USCALED);
4554 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
4555 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
4556
4557 return formats;
4558 }
4559
addRayTracingNoSubgroupShader(SourceCollections & programCollection)4560 void addRayTracingNoSubgroupShader (SourceCollections& programCollection)
4561 {
4562 const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
4563
4564 const std::string rgenShaderNoSubgroups =
4565 "#version 460 core\n"
4566 "#extension GL_EXT_ray_tracing: require\n"
4567 "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
4568 "layout(location = 0) callableDataEXT uvec4 callData;"
4569 "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
4570 "\n"
4571 "void main()\n"
4572 "{\n"
4573 " uint rayFlags = 0;\n"
4574 " uint cullMask = 0xFF;\n"
4575 " float tmin = 0.0;\n"
4576 " float tmax = 9.0;\n"
4577 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
4578 " vec3 directHit = vec3(0.0, 0.0, -1.0);\n"
4579 " vec3 directMiss = vec3(0.0, 0.0, +1.0);\n"
4580 "\n"
4581 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
4582 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
4583 " executeCallableEXT(0, 0);"
4584 "}\n";
4585 const std::string hitShaderNoSubgroups =
4586 "#version 460 core\n"
4587 "#extension GL_EXT_ray_tracing: require\n"
4588 "hitAttributeEXT vec3 attribs;\n"
4589 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4590 "\n"
4591 "void main()\n"
4592 "{\n"
4593 "}\n";
4594 const std::string missShaderNoSubgroups =
4595 "#version 460 core\n"
4596 "#extension GL_EXT_ray_tracing: require\n"
4597 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4598 "\n"
4599 "void main()\n"
4600 "{\n"
4601 "}\n";
4602 const std::string sectShaderNoSubgroups =
4603 "#version 460 core\n"
4604 "#extension GL_EXT_ray_tracing: require\n"
4605 "hitAttributeEXT vec3 hitAttribute;\n"
4606 "\n"
4607 "void main()\n"
4608 "{\n"
4609 " reportIntersectionEXT(0.75f, 0x7Eu);\n"
4610 "}\n";
4611 const std::string callShaderNoSubgroups =
4612 "#version 460 core\n"
4613 "#extension GL_EXT_ray_tracing: require\n"
4614 "layout(location = 0) callableDataInEXT float callData;\n"
4615 "\n"
4616 "void main()\n"
4617 "{\n"
4618 "}\n";
4619
4620 programCollection.glslSources.add("rgen_noSubgroup") << glu::RaygenSource (rgenShaderNoSubgroups) << buildOptions;
4621 programCollection.glslSources.add("ahit_noSubgroup") << glu::AnyHitSource (hitShaderNoSubgroups) << buildOptions;
4622 programCollection.glslSources.add("chit_noSubgroup") << glu::ClosestHitSource (hitShaderNoSubgroups) << buildOptions;
4623 programCollection.glslSources.add("miss_noSubgroup") << glu::MissSource (missShaderNoSubgroups) << buildOptions;
4624 programCollection.glslSources.add("sect_noSubgroup") << glu::IntersectionSource (sectShaderNoSubgroups) << buildOptions;
4625 programCollection.glslSources.add("call_noSubgroup") << glu::CallableSource (callShaderNoSubgroups) << buildOptions;
4626 }
4627
4628 #ifndef CTS_USES_VULKANSC
4629
enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)4630 static vector<VkShaderStageFlagBits> enumerateRayTracingShaderStages (const VkShaderStageFlags shaderStage)
4631 {
4632 vector<VkShaderStageFlagBits> result;
4633 const VkShaderStageFlagBits shaderStageFlags[] =
4634 {
4635 VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4636 VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4637 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4638 VK_SHADER_STAGE_MISS_BIT_KHR,
4639 VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4640 VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4641 };
4642
4643 for (auto shaderStageFlag: shaderStageFlags)
4644 {
4645 if (0 != (shaderStage & shaderStageFlag))
4646 result.push_back(shaderStageFlag);
4647 }
4648
4649 return result;
4650 }
4651
getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)4652 static deUint32 getRayTracingResultBinding (const VkShaderStageFlagBits shaderStage)
4653 {
4654 const VkShaderStageFlags shaderStageFlags[] =
4655 {
4656 VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4657 VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4658 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4659 VK_SHADER_STAGE_MISS_BIT_KHR,
4660 VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4661 VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4662 };
4663
4664 for (deUint32 shaderStageNdx = 0; shaderStageNdx < DE_LENGTH_OF_ARRAY(shaderStageFlags); ++shaderStageNdx)
4665 {
4666 if (0 != (shaderStage & shaderStageFlags[shaderStageNdx]))
4667 {
4668 DE_ASSERT(0 == (shaderStage & (~shaderStageFlags[shaderStageNdx])));
4669
4670 return shaderStageNdx;
4671 }
4672 }
4673
4674 TCU_THROW(InternalError, "Non-raytracing stage specified or no stage at all");
4675 }
4676
makeRayTracingInputBuffers(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector)4677 static vectorBufferOrImage makeRayTracingInputBuffers (Context& context,
4678 VkFormat format,
4679 const SSBOData* extraDatas,
4680 deUint32 extraDatasCount,
4681 const vector<VkShaderStageFlagBits>& stagesVector)
4682 {
4683 const size_t stagesCount = stagesVector.size();
4684 const VkDeviceSize shaderSize = getMaxWidth();
4685 const VkDeviceSize inputBufferSize = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
4686 vectorBufferOrImage inputBuffers (stagesCount + extraDatasCount);
4687
4688 // The implicit result SSBO we use to store our outputs from the shader
4689 for (size_t stageNdx = 0u; stageNdx < stagesCount; ++stageNdx)
4690 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, inputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
4691
4692 for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4693 {
4694 const size_t datasNdx = stageNdx - stagesCount;
4695
4696 if (extraDatas[datasNdx].isImage())
4697 {
4698 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
4699 }
4700 else
4701 {
4702 const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4703 const auto size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
4704 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4705 }
4706
4707 initializeMemory(context, inputBuffers[stageNdx]->getAllocation(), extraDatas[datasNdx]);
4708 }
4709
4710 return inputBuffers;
4711 }
4712
makeRayTracingDescriptorSetLayout(Context & context,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4713 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayout (Context& context,
4714 const SSBOData* extraDatas,
4715 deUint32 extraDatasCount,
4716 const vector<VkShaderStageFlagBits>& stagesVector,
4717 const vectorBufferOrImage& inputBuffers)
4718 {
4719 const DeviceInterface& vkd = context.getDeviceInterface();
4720 const VkDevice device = context.getDevice();
4721 const size_t stagesCount = stagesVector.size();
4722 DescriptorSetLayoutBuilder layoutBuilder;
4723
4724 // The implicit result SSBO we use to store our outputs from the shader
4725 for (size_t stageNdx = 0u; stageNdx < stagesVector.size(); ++stageNdx)
4726 {
4727 const deUint32 stageBinding = getRayTracingResultBinding(stagesVector[stageNdx]);
4728
4729 layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, stagesVector[stageNdx], stageBinding, DE_NULL);
4730 }
4731
4732 for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4733 {
4734 const size_t datasNdx = stageNdx - stagesCount;
4735
4736 layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
4737 }
4738
4739 return layoutBuilder.build(vkd, device);
4740 }
4741
makeRayTracingDescriptorSetLayoutAS(Context & context)4742 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayoutAS (Context& context)
4743 {
4744 const DeviceInterface& vkd = context.getDeviceInterface();
4745 const VkDevice device = context.getDevice();
4746 DescriptorSetLayoutBuilder layoutBuilder;
4747
4748 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
4749
4750 return layoutBuilder.build(vkd, device);
4751 }
4752
makeRayTracingDescriptorPool(Context & context,const vectorBufferOrImage & inputBuffers)4753 static Move<VkDescriptorPool> makeRayTracingDescriptorPool (Context& context,
4754 const vectorBufferOrImage& inputBuffers)
4755 {
4756 const DeviceInterface& vkd = context.getDeviceInterface();
4757 const VkDevice device = context.getDevice();
4758 const deUint32 maxDescriptorSets = 2u;
4759 DescriptorPoolBuilder poolBuilder;
4760 Move<VkDescriptorPool> result;
4761
4762 if (inputBuffers.size() > 0)
4763 {
4764 for (size_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
4765 poolBuilder.addType(inputBuffers[ndx]->getType());
4766 }
4767
4768 poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
4769
4770 result = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, maxDescriptorSets);
4771
4772 return result;
4773 }
4774
makeRayTracingDescriptorSet(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4775 static Move<VkDescriptorSet> makeRayTracingDescriptorSet (Context& context,
4776 VkDescriptorPool descriptorPool,
4777 VkDescriptorSetLayout descriptorSetLayout,
4778 const SSBOData* extraDatas,
4779 deUint32 extraDatasCount,
4780 const vector<VkShaderStageFlagBits>& stagesVector,
4781 const vectorBufferOrImage& inputBuffers)
4782 {
4783 const DeviceInterface& vkd = context.getDeviceInterface();
4784 const VkDevice device = context.getDevice();
4785 const size_t stagesCount = stagesVector.size();
4786 Move<VkDescriptorSet> descriptorSet;
4787
4788 if (inputBuffers.size() > 0)
4789 {
4790 DescriptorSetUpdateBuilder updateBuilder;
4791
4792 // Create descriptor set
4793 descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4794
4795 for (size_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
4796 {
4797 const deUint32 binding = (ndx < stagesCount)
4798 ? getRayTracingResultBinding(stagesVector[ndx])
4799 : extraDatas[ndx - stagesCount].binding;
4800
4801 if (inputBuffers[ndx]->isImage())
4802 {
4803 const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(), inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4804
4805 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4806 }
4807 else
4808 {
4809 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
4810
4811 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4812 }
4813 }
4814
4815 updateBuilder.update(vkd, device);
4816 }
4817
4818 return descriptorSet;
4819 }
4820
makeRayTracingDescriptorSetAS(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,de::MovePtr<TopLevelAccelerationStructure> & topLevelAccelerationStructure)4821 static Move<VkDescriptorSet> makeRayTracingDescriptorSetAS (Context& context,
4822 VkDescriptorPool descriptorPool,
4823 VkDescriptorSetLayout descriptorSetLayout,
4824 de::MovePtr<TopLevelAccelerationStructure>& topLevelAccelerationStructure)
4825 {
4826 const DeviceInterface& vkd = context.getDeviceInterface();
4827 const VkDevice device = context.getDevice();
4828 const TopLevelAccelerationStructure* topLevelAccelerationStructurePtr = topLevelAccelerationStructure.get();
4829 const VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet =
4830 {
4831 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // VkStructureType sType;
4832 DE_NULL, // const void* pNext;
4833 1u, // deUint32 accelerationStructureCount;
4834 topLevelAccelerationStructurePtr->getPtr(), // const VkAccelerationStructureKHR* pAccelerationStructures;
4835 };
4836 Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4837
4838 DescriptorSetUpdateBuilder()
4839 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
4840 .update(vkd, device);
4841
4842 return descriptorSet;
4843 }
4844
makeRayTracingPipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout0,const VkDescriptorSetLayout descriptorSetLayout1)4845 static Move<VkPipelineLayout> makeRayTracingPipelineLayout (Context& context,
4846 const VkDescriptorSetLayout descriptorSetLayout0,
4847 const VkDescriptorSetLayout descriptorSetLayout1)
4848 {
4849 const DeviceInterface& vkd = context.getDeviceInterface();
4850 const VkDevice device = context.getDevice();
4851 const std::vector<VkDescriptorSetLayout> descriptorSetLayouts { descriptorSetLayout0, descriptorSetLayout1 };
4852 const deUint32 descriptorSetLayoutsSize = static_cast<deUint32>(descriptorSetLayouts.size());
4853
4854 return makePipelineLayout(vkd, device, descriptorSetLayoutsSize, descriptorSetLayouts.data());
4855 }
4856
createTopAccelerationStructure(Context & context,de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)4857 static de::MovePtr<TopLevelAccelerationStructure> createTopAccelerationStructure (Context& context,
4858 de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)
4859 {
4860 const DeviceInterface& vkd = context.getDeviceInterface();
4861 const VkDevice device = context.getDevice();
4862 Allocator& allocator = context.getDefaultAllocator();
4863 de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
4864
4865 result->setInstanceCount(1);
4866 result->addInstance(bottomLevelAccelerationStructure);
4867 result->create(vkd, device, allocator);
4868
4869 return result;
4870 }
4871
createBottomAccelerationStructure(Context & context)4872 static de::SharedPtr<BottomLevelAccelerationStructure> createBottomAccelerationStructure (Context& context)
4873 {
4874 const DeviceInterface& vkd = context.getDeviceInterface();
4875 const VkDevice device = context.getDevice();
4876 Allocator& allocator = context.getDefaultAllocator();
4877 de::MovePtr<BottomLevelAccelerationStructure> result = makeBottomLevelAccelerationStructure();
4878 const std::vector<tcu::Vec3> geometryData { tcu::Vec3(-1.0f, -1.0f, -2.0f), tcu::Vec3(+1.0f, +1.0f, -1.0f) };
4879
4880 result->setGeometryCount(1u);
4881 result->addGeometry(geometryData, false);
4882 result->create(vkd, device, allocator, 0u);
4883
4884 return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
4885 }
4886
makeRayTracingPipeline(Context & context,const VkShaderStageFlags shaderStageTested,const VkPipelineLayout pipelineLayout,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6],Move<VkPipeline> & pipelineOut)4887 static de::MovePtr<RayTracingPipeline> makeRayTracingPipeline (Context& context,
4888 const VkShaderStageFlags shaderStageTested,
4889 const VkPipelineLayout pipelineLayout,
4890 const deUint32 shaderStageCreateFlags[6],
4891 const deUint32 requiredSubgroupSize[6],
4892 Move<VkPipeline>& pipelineOut)
4893 {
4894 const DeviceInterface& vkd = context.getDeviceInterface();
4895 const VkDevice device = context.getDevice();
4896 BinaryCollection& collection = context.getBinaryCollection();
4897 const char* shaderRgenName = (0 != (shaderStageTested & VK_SHADER_STAGE_RAYGEN_BIT_KHR)) ? "rgen" : "rgen_noSubgroup";
4898 const char* shaderAhitName = (0 != (shaderStageTested & VK_SHADER_STAGE_ANY_HIT_BIT_KHR)) ? "ahit" : "ahit_noSubgroup";
4899 const char* shaderChitName = (0 != (shaderStageTested & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)) ? "chit" : "chit_noSubgroup";
4900 const char* shaderMissName = (0 != (shaderStageTested & VK_SHADER_STAGE_MISS_BIT_KHR)) ? "miss" : "miss_noSubgroup";
4901 const char* shaderSectName = (0 != (shaderStageTested & VK_SHADER_STAGE_INTERSECTION_BIT_KHR)) ? "sect" : "sect_noSubgroup";
4902 const char* shaderCallName = (0 != (shaderStageTested & VK_SHADER_STAGE_CALLABLE_BIT_KHR)) ? "call" : "call_noSubgroup";
4903 const VkShaderModuleCreateFlags noShaderModuleCreateFlags = static_cast<VkShaderModuleCreateFlags>(0);
4904 Move<VkShaderModule> rgenShaderModule = createShaderModule(vkd, device, collection.get(shaderRgenName), noShaderModuleCreateFlags);
4905 Move<VkShaderModule> ahitShaderModule = createShaderModule(vkd, device, collection.get(shaderAhitName), noShaderModuleCreateFlags);
4906 Move<VkShaderModule> chitShaderModule = createShaderModule(vkd, device, collection.get(shaderChitName), noShaderModuleCreateFlags);
4907 Move<VkShaderModule> missShaderModule = createShaderModule(vkd, device, collection.get(shaderMissName), noShaderModuleCreateFlags);
4908 Move<VkShaderModule> sectShaderModule = createShaderModule(vkd, device, collection.get(shaderSectName), noShaderModuleCreateFlags);
4909 Move<VkShaderModule> callShaderModule = createShaderModule(vkd, device, collection.get(shaderCallName), noShaderModuleCreateFlags);
4910 const VkPipelineShaderStageCreateFlags noPipelineShaderStageCreateFlags = static_cast<VkPipelineShaderStageCreateFlags>(0);
4911 const VkPipelineShaderStageCreateFlags rgenPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[0];
4912 const VkPipelineShaderStageCreateFlags ahitPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[1];
4913 const VkPipelineShaderStageCreateFlags chitPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[2];
4914 const VkPipelineShaderStageCreateFlags missPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[3];
4915 const VkPipelineShaderStageCreateFlags sectPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[4];
4916 const VkPipelineShaderStageCreateFlags callPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[5];
4917 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[6] =
4918 {
4919 {
4920 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4921 DE_NULL,
4922 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
4923 },
4924 {
4925 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4926 DE_NULL,
4927 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
4928 },
4929 {
4930 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4931 DE_NULL,
4932 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
4933 },
4934 {
4935 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4936 DE_NULL,
4937 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
4938 },
4939 {
4940 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4941 DE_NULL,
4942 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
4943 },
4944 {
4945 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4946 DE_NULL,
4947 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[5] : 0u,
4948 },
4949 };
4950 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* rgenRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[0];
4951 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* ahitRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[1];
4952 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* chitRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[2];
4953 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* missRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[3];
4954 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* sectRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[4];
4955 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* callRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[5].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[5];
4956 de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
4957
4958 rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR , rgenShaderModule, RAYGEN_GROUP, DE_NULL, rgenPipelineShaderStageCreateFlags, rgenRequiredSubgroupSizeCreateInfo);
4959 rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR , ahitShaderModule, HIT_GROUP, DE_NULL, ahitPipelineShaderStageCreateFlags, ahitRequiredSubgroupSizeCreateInfo);
4960 rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR , chitShaderModule, HIT_GROUP, DE_NULL, chitPipelineShaderStageCreateFlags, chitRequiredSubgroupSizeCreateInfo);
4961 rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR , missShaderModule, MISS_GROUP, DE_NULL, missPipelineShaderStageCreateFlags, missRequiredSubgroupSizeCreateInfo);
4962 rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR , sectShaderModule, HIT_GROUP, DE_NULL, sectPipelineShaderStageCreateFlags, sectRequiredSubgroupSizeCreateInfo);
4963 rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR , callShaderModule, CALL_GROUP, DE_NULL, callPipelineShaderStageCreateFlags, callRequiredSubgroupSizeCreateInfo);
4964
4965 // Must execute createPipeline here, due to pNext pointers in calls to addShader are local
4966 pipelineOut = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
4967
4968 return rayTracingPipeline;
4969 }
4970
getPossibleRayTracingSubgroupStages(Context & context,const VkShaderStageFlags testedStages)4971 VkShaderStageFlags getPossibleRayTracingSubgroupStages (Context& context, const VkShaderStageFlags testedStages)
4972 {
4973 const VkPhysicalDeviceSubgroupProperties& subgroupProperties = context.getSubgroupProperties();
4974 const VkShaderStageFlags stages = testedStages & subgroupProperties.supportedStages;
4975
4976 DE_ASSERT(isAllRayTracingStages(testedStages));
4977
4978 return stages;
4979 }
4980
allRayTracingStages(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStage)4981 tcu::TestStatus allRayTracingStages (Context& context,
4982 VkFormat format,
4983 const SSBOData* extraDatas,
4984 deUint32 extraDataCount,
4985 const void* internalData,
4986 const VerificationFunctor& checkResult,
4987 const VkShaderStageFlags shaderStage)
4988 {
4989 return vkt::subgroups::allRayTracingStagesRequiredSubgroupSize(context,
4990 format,
4991 extraDatas,
4992 extraDataCount,
4993 internalData,
4994 checkResult,
4995 shaderStage,
4996 DE_NULL,
4997 DE_NULL);
4998 }
4999
allRayTracingStagesRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6])5000 tcu::TestStatus allRayTracingStagesRequiredSubgroupSize (Context& context,
5001 VkFormat format,
5002 const SSBOData* extraDatas,
5003 deUint32 extraDatasCount,
5004 const void* internalData,
5005 const VerificationFunctor& checkResult,
5006 const VkShaderStageFlags shaderStageTested,
5007 const deUint32 shaderStageCreateFlags[6],
5008 const deUint32 requiredSubgroupSize[6])
5009 {
5010 const DeviceInterface& vkd = context.getDeviceInterface();
5011 const VkDevice device = context.getDevice();
5012 const VkQueue queue = context.getUniversalQueue();
5013 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
5014 Allocator& allocator = context.getDefaultAllocator();
5015 const deUint32 subgroupSize = getSubgroupSize(context);
5016 const deUint32 maxWidth = getMaxWidth();
5017 const vector<VkShaderStageFlagBits> stagesVector = enumerateRayTracingShaderStages(shaderStageTested);
5018 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
5019 de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure = createBottomAccelerationStructure(context);
5020 de::MovePtr<TopLevelAccelerationStructure> topLevelAccelerationStructure = createTopAccelerationStructure(context, bottomLevelAccelerationStructure);
5021 vectorBufferOrImage inputBuffers = makeRayTracingInputBuffers(context, format, extraDatas, extraDatasCount, stagesVector);
5022 const Move<VkDescriptorSetLayout> descriptorSetLayout = makeRayTracingDescriptorSetLayout(context, extraDatas, extraDatasCount, stagesVector, inputBuffers);
5023 const Move<VkDescriptorSetLayout> descriptorSetLayoutAS = makeRayTracingDescriptorSetLayoutAS(context);
5024 const Move<VkPipelineLayout> pipelineLayout = makeRayTracingPipelineLayout(context, *descriptorSetLayout, *descriptorSetLayoutAS);
5025 Move<VkPipeline> pipeline = Move<VkPipeline>();
5026 const de::MovePtr<RayTracingPipeline> rayTracingPipeline = makeRayTracingPipeline(context, shaderStageTested, *pipelineLayout, shaderStageCreateFlags, requiredSubgroupSize, pipeline);
5027 const deUint32 shaderGroupHandleSize = context.getRayTracingPipelineProperties().shaderGroupHandleSize;
5028 const deUint32 shaderGroupBaseAlignment = context.getRayTracingPipelineProperties().shaderGroupBaseAlignment;
5029 de::MovePtr<BufferWithMemory> rgenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, RAYGEN_GROUP, 1u);
5030 de::MovePtr<BufferWithMemory> missShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, MISS_GROUP, 1u);
5031 de::MovePtr<BufferWithMemory> hitsShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, HIT_GROUP, 1u);
5032 de::MovePtr<BufferWithMemory> callShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, CALL_GROUP, 1u);
5033 const VkStridedDeviceAddressRegionKHR rgenShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, rgenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5034 const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5035 const VkStridedDeviceAddressRegionKHR hitsShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitsShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5036 const VkStridedDeviceAddressRegionKHR callShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5037 const Move<VkDescriptorPool> descriptorPool = makeRayTracingDescriptorPool(context, inputBuffers);
5038 const Move<VkDescriptorSet> descriptorSet = makeRayTracingDescriptorSet(context, *descriptorPool, *descriptorSetLayout, extraDatas, extraDatasCount, stagesVector, inputBuffers);
5039 const Move<VkDescriptorSet> descriptorSetAS = makeRayTracingDescriptorSetAS(context, *descriptorPool, *descriptorSetLayoutAS, topLevelAccelerationStructure);
5040 const Move<VkCommandPool> cmdPool = makeCommandPool(vkd, device, queueFamilyIndex);
5041 const Move<VkCommandBuffer> cmdBuffer = makeCommandBuffer(context, *cmdPool);
5042 deUint32 passIterations = 0u;
5043 deUint32 failIterations = 0u;
5044
5045 DE_ASSERT(shaderStageTested != 0);
5046
5047 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
5048 {
5049
5050 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
5051 {
5052 // re-init the data
5053 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
5054
5055 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
5056 }
5057
5058 beginCommandBuffer(vkd, *cmdBuffer);
5059 {
5060 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
5061
5062 bottomLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
5063 topLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
5064
5065 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 1u, 1u, &descriptorSetAS.get(), 0u, DE_NULL);
5066
5067 if (stagesCount + extraDatasCount > 0)
5068 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
5069
5070 cmdTraceRays(vkd,
5071 *cmdBuffer,
5072 &rgenShaderBindingTableRegion,
5073 &missShaderBindingTableRegion,
5074 &hitsShaderBindingTableRegion,
5075 &callShaderBindingTableRegion,
5076 width, 1, 1);
5077
5078 const VkMemoryBarrier postTraceMemoryBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
5079 cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_HOST_BIT, &postTraceMemoryBarrier);
5080 }
5081 endCommandBuffer(vkd, *cmdBuffer);
5082
5083 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
5084
5085 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
5086 {
5087 std::vector<const void*> datas;
5088
5089 if (!inputBuffers[ndx]->isImage())
5090 {
5091 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
5092
5093 invalidateAlloc(vkd, device, resultAlloc);
5094
5095 // we always have our result data first
5096 datas.push_back(resultAlloc.getHostPtr());
5097 }
5098
5099 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
5100 {
5101 const deUint32 datasNdx = index - stagesCount;
5102
5103 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
5104 {
5105 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
5106
5107 invalidateAlloc(vkd, device, resultAlloc);
5108
5109 // we always have our result data first
5110 datas.push_back(resultAlloc.getHostPtr());
5111 }
5112 }
5113
5114 if (!checkResult(internalData, datas, width, subgroupSize, false))
5115 failIterations++;
5116 else
5117 passIterations++;
5118 }
5119
5120 context.resetCommandPoolForVKSC(device, *cmdPool);
5121 }
5122
5123 if (failIterations > 0 || passIterations == 0)
5124 return tcu::TestStatus::fail("Failed " + de::toString(failIterations) + " out of " + de::toString(failIterations + passIterations) + " iterations.");
5125 else
5126 return tcu::TestStatus::pass("OK");
5127 }
5128 #endif // CTS_USES_VULKANSC
5129
5130 } // namespace subgroups
5131 } // nsamespace vkt
5132