1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Google Inc.
7 * Copyright (c) 2017 Codeplay Software Ltd.
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 */ /*!
22 * \file
23 * \brief Subgroups Tests Utils
24 */ /*--------------------------------------------------------------------*/
25
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vkRayTracingUtil.hpp"
28 #include "deFloat16.h"
29 #include "deRandom.hpp"
30 #include "tcuCommandLine.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37
38 using namespace tcu;
39 using namespace std;
40 using namespace vk;
41 using namespace vkt;
42
43 namespace
44 {
45
46 enum class ComputeLike { COMPUTE = 0, MESH };
47
getMaxWidth()48 deUint32 getMaxWidth ()
49 {
50 return 1024u;
51 }
52
getNextWidth(const deUint32 width)53 deUint32 getNextWidth (const deUint32 width)
54 {
55 if (width < 128)
56 {
57 // This ensures we test every value up to 128 (the max subgroup size).
58 return width + 1;
59 }
60 else
61 {
62 // And once we hit 128 we increment to only power of 2's to reduce testing time.
63 return width * 2;
64 }
65 }
66
getFormatSizeInBytes(const VkFormat format)67 deUint32 getFormatSizeInBytes (const VkFormat format)
68 {
69 switch (format)
70 {
71 default:
72 DE_FATAL("Unhandled format!");
73 return 0;
74 case VK_FORMAT_R8_SINT:
75 case VK_FORMAT_R8_UINT:
76 return static_cast<deUint32>(sizeof(deInt8));
77 case VK_FORMAT_R8G8_SINT:
78 case VK_FORMAT_R8G8_UINT:
79 return static_cast<deUint32>(sizeof(deInt8) * 2);
80 case VK_FORMAT_R8G8B8_SINT:
81 case VK_FORMAT_R8G8B8_UINT:
82 case VK_FORMAT_R8G8B8A8_SINT:
83 case VK_FORMAT_R8G8B8A8_UINT:
84 return static_cast<deUint32>(sizeof(deInt8) * 4);
85 case VK_FORMAT_R16_SINT:
86 case VK_FORMAT_R16_UINT:
87 case VK_FORMAT_R16_SFLOAT:
88 return static_cast<deUint32>(sizeof(deInt16));
89 case VK_FORMAT_R16G16_SINT:
90 case VK_FORMAT_R16G16_UINT:
91 case VK_FORMAT_R16G16_SFLOAT:
92 return static_cast<deUint32>(sizeof(deInt16) * 2);
93 case VK_FORMAT_R16G16B16_UINT:
94 case VK_FORMAT_R16G16B16_SINT:
95 case VK_FORMAT_R16G16B16_SFLOAT:
96 case VK_FORMAT_R16G16B16A16_SINT:
97 case VK_FORMAT_R16G16B16A16_UINT:
98 case VK_FORMAT_R16G16B16A16_SFLOAT:
99 return static_cast<deUint32>(sizeof(deInt16) * 4);
100 case VK_FORMAT_R32_SINT:
101 case VK_FORMAT_R32_UINT:
102 case VK_FORMAT_R32_SFLOAT:
103 return static_cast<deUint32>(sizeof(deInt32));
104 case VK_FORMAT_R32G32_SINT:
105 case VK_FORMAT_R32G32_UINT:
106 case VK_FORMAT_R32G32_SFLOAT:
107 return static_cast<deUint32>(sizeof(deInt32) * 2);
108 case VK_FORMAT_R32G32B32_SINT:
109 case VK_FORMAT_R32G32B32_UINT:
110 case VK_FORMAT_R32G32B32_SFLOAT:
111 case VK_FORMAT_R32G32B32A32_SINT:
112 case VK_FORMAT_R32G32B32A32_UINT:
113 case VK_FORMAT_R32G32B32A32_SFLOAT:
114 return static_cast<deUint32>(sizeof(deInt32) * 4);
115 case VK_FORMAT_R64_SINT:
116 case VK_FORMAT_R64_UINT:
117 case VK_FORMAT_R64_SFLOAT:
118 return static_cast<deUint32>(sizeof(deInt64));
119 case VK_FORMAT_R64G64_SINT:
120 case VK_FORMAT_R64G64_UINT:
121 case VK_FORMAT_R64G64_SFLOAT:
122 return static_cast<deUint32>(sizeof(deInt64) * 2);
123 case VK_FORMAT_R64G64B64_SINT:
124 case VK_FORMAT_R64G64B64_UINT:
125 case VK_FORMAT_R64G64B64_SFLOAT:
126 case VK_FORMAT_R64G64B64A64_SINT:
127 case VK_FORMAT_R64G64B64A64_UINT:
128 case VK_FORMAT_R64G64B64A64_SFLOAT:
129 return static_cast<deUint32>(sizeof(deInt64) * 4);
130 // The below formats are used to represent bool and bvec* types. These
131 // types are passed to the shader as int and ivec* types, before the
132 // calculations are done as booleans. We need a distinct type here so
133 // that the shader generators can switch on it and generate the correct
134 // shader source for testing.
135 case VK_FORMAT_R8_USCALED:
136 return static_cast<deUint32>(sizeof(deInt32));
137 case VK_FORMAT_R8G8_USCALED:
138 return static_cast<deUint32>(sizeof(deInt32) * 2);
139 case VK_FORMAT_R8G8B8_USCALED:
140 case VK_FORMAT_R8G8B8A8_USCALED:
141 return static_cast<deUint32>(sizeof(deInt32) * 4);
142 }
143 }
144
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)145 deUint32 getElementSizeInBytes (const VkFormat format,
146 const subgroups::SSBOData::InputDataLayoutType layout)
147 {
148 const deUint32 bytes = getFormatSizeInBytes(format);
149
150 if (layout == subgroups::SSBOData::LayoutStd140)
151 return bytes < 16 ? 16 : bytes;
152 else
153 return bytes;
154 }
155
makeRenderPass(Context & context,VkFormat format)156 Move<VkRenderPass> makeRenderPass (Context& context, VkFormat format)
157 {
158 const VkAttachmentReference colorReference =
159 {
160 0,
161 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
162 };
163 const VkSubpassDescription subpassDescription =
164 {
165 0u, // VkSubpassDescriptionFlags flags;
166 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
167 0, // deUint32 inputAttachmentCount;
168 DE_NULL, // const VkAttachmentReference* pInputAttachments;
169 1, // deUint32 colorAttachmentCount;
170 &colorReference, // const VkAttachmentReference* pColorAttachments;
171 DE_NULL, // const VkAttachmentReference* pResolveAttachments;
172 DE_NULL, // const VkAttachmentReference* pDepthStencilAttachment;
173 0, // deUint32 preserveAttachmentCount;
174 DE_NULL // const deUint32* pPreserveAttachments;
175 };
176 const VkSubpassDependency subpassDependencies[2] =
177 {
178 {
179 VK_SUBPASS_EXTERNAL, // deUint32 srcSubpass;
180 0u, // deUint32 dstSubpass;
181 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // VkPipelineStageFlags srcStageMask;
182 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // VkPipelineStageFlags dstStageMask;
183 VK_ACCESS_MEMORY_READ_BIT, // VkAccessFlags srcAccessMask;
184 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // VkAccessFlags dstAccessMask;
185 VK_DEPENDENCY_BY_REGION_BIT // VkDependencyFlags dependencyFlags;
186 },
187 {
188 0u, // deUint32 srcSubpass;
189 VK_SUBPASS_EXTERNAL, // deUint32 dstSubpass;
190 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // VkPipelineStageFlags srcStageMask;
191 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // VkPipelineStageFlags dstStageMask;
192 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // VkAccessFlags srcAccessMask;
193 VK_ACCESS_MEMORY_READ_BIT, // VkAccessFlags dstAccessMask;
194 VK_DEPENDENCY_BY_REGION_BIT // VkDependencyFlags dependencyFlags;
195 },
196 };
197 const VkAttachmentDescription attachmentDescription =
198 {
199 0u, // VkAttachmentDescriptionFlags flags;
200 format, // VkFormat format;
201 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
202 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
203 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
204 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
205 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
206 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
207 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL // VkImageLayout finalLayout;
208 };
209 const VkRenderPassCreateInfo renderPassCreateInfo =
210 {
211 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
212 DE_NULL, // const void* pNext;
213 0u, // VkRenderPassCreateFlags flags;
214 1, // deUint32 attachmentCount;
215 &attachmentDescription, // const VkAttachmentDescription* pAttachments;
216 1, // deUint32 subpassCount;
217 &subpassDescription, // const VkSubpassDescription* pSubpasses;
218 2, // deUint32 dependencyCount;
219 subpassDependencies // const VkSubpassDependency* pDependencies;
220 };
221
222 return createRenderPass(context.getDeviceInterface(), context.getDevice(), &renderPassCreateInfo);
223 }
224
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const deUint32 subpass,const deUint32 patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])225 Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface& vk,
226 const VkDevice device,
227 const VkPipelineLayout pipelineLayout,
228 const VkShaderModule vertexShaderModule,
229 const VkShaderModule tessellationControlShaderModule,
230 const VkShaderModule tessellationEvalShaderModule,
231 const VkShaderModule geometryShaderModule,
232 const VkShaderModule fragmentShaderModule,
233 const VkRenderPass renderPass,
234 const std::vector<VkViewport>& viewports,
235 const std::vector<VkRect2D>& scissors,
236 const VkPrimitiveTopology topology,
237 const deUint32 subpass,
238 const deUint32 patchControlPoints,
239 const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo,
240 const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo,
241 const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo,
242 const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo,
243 const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo,
244 const VkPipelineDynamicStateCreateInfo* dynamicStateCreateInfo,
245 const deUint32 vertexShaderStageCreateFlags,
246 const deUint32 tessellationControlShaderStageCreateFlags,
247 const deUint32 tessellationEvalShaderStageCreateFlags,
248 const deUint32 geometryShaderStageCreateFlags,
249 const deUint32 fragmentShaderStageCreateFlags,
250 const deUint32 requiredSubgroupSize[5])
251 {
252 const VkBool32 disableRasterization = (fragmentShaderModule == DE_NULL);
253 const bool hasTessellation = (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
254
255 VkPipelineShaderStageCreateInfo stageCreateInfo =
256 {
257 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType
258 DE_NULL, // const void* pNext
259 0u, // VkPipelineShaderStageCreateFlags flags
260 VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage
261 DE_NULL, // VkShaderModule module
262 "main", // const char* pName
263 DE_NULL // const VkSpecializationInfo* pSpecializationInfo
264 };
265
266 std::vector<VkPipelineShaderStageCreateInfo> pipelineShaderStageParams;
267
268 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] =
269 {
270 {
271 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
272 DE_NULL,
273 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
274 },
275 {
276 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
277 DE_NULL,
278 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
279 },
280 {
281 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
282 DE_NULL,
283 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
284 },
285 {
286 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
287 DE_NULL,
288 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
289 },
290 {
291 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
292 DE_NULL,
293 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
294 },
295 };
296
297 {
298 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[0] : DE_NULL;
299 stageCreateInfo.flags = vertexShaderStageCreateFlags;
300 stageCreateInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
301 stageCreateInfo.module = vertexShaderModule;
302 pipelineShaderStageParams.push_back(stageCreateInfo);
303 }
304
305 if (tessellationControlShaderModule != DE_NULL)
306 {
307 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[1] : DE_NULL;
308 stageCreateInfo.flags = tessellationControlShaderStageCreateFlags;
309 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
310 stageCreateInfo.module = tessellationControlShaderModule;
311 pipelineShaderStageParams.push_back(stageCreateInfo);
312 }
313
314 if (tessellationEvalShaderModule != DE_NULL)
315 {
316 stageCreateInfo.pNext = (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[2] : DE_NULL;
317 stageCreateInfo.flags = tessellationEvalShaderStageCreateFlags;
318 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
319 stageCreateInfo.module = tessellationEvalShaderModule;
320 pipelineShaderStageParams.push_back(stageCreateInfo);
321 }
322
323 if (geometryShaderModule != DE_NULL)
324 {
325 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[3] : DE_NULL;
326 stageCreateInfo.flags = geometryShaderStageCreateFlags;
327 stageCreateInfo.stage = VK_SHADER_STAGE_GEOMETRY_BIT;
328 stageCreateInfo.module = geometryShaderModule;
329 pipelineShaderStageParams.push_back(stageCreateInfo);
330 }
331
332 if (fragmentShaderModule != DE_NULL)
333 {
334 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[4] : DE_NULL;
335 stageCreateInfo.flags = fragmentShaderStageCreateFlags;
336 stageCreateInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
337 stageCreateInfo.module = fragmentShaderModule;
338 pipelineShaderStageParams.push_back(stageCreateInfo);
339 }
340
341 const VkVertexInputBindingDescription vertexInputBindingDescription =
342 {
343 0u, // deUint32 binding
344 sizeof(tcu::Vec4), // deUint32 stride
345 VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate
346 };
347
348 const VkVertexInputAttributeDescription vertexInputAttributeDescription =
349 {
350 0u, // deUint32 location
351 0u, // deUint32 binding
352 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format
353 0u // deUint32 offset
354 };
355
356 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfoDefault =
357 {
358 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType
359 DE_NULL, // const void* pNext
360 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags
361 1u, // deUint32 vertexBindingDescriptionCount
362 &vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions
363 1u, // deUint32 vertexAttributeDescriptionCount
364 &vertexInputAttributeDescription // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions
365 };
366
367 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo =
368 {
369 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType
370 DE_NULL, // const void* pNext
371 0u, // VkPipelineInputAssemblyStateCreateFlags flags
372 topology, // VkPrimitiveTopology topology
373 VK_FALSE // VkBool32 primitiveRestartEnable
374 };
375
376 const VkPipelineTessellationStateCreateInfo tessStateCreateInfo =
377 {
378 VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, // VkStructureType sType
379 DE_NULL, // const void* pNext
380 0u, // VkPipelineTessellationStateCreateFlags flags
381 patchControlPoints // deUint32 patchControlPoints
382 };
383
384 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
385 {
386 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
387 DE_NULL, // const void* pNext
388 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
389 viewports.empty() ? 1u : (deUint32)viewports.size(), // deUint32 viewportCount
390 viewports.empty() ? DE_NULL : &viewports[0], // const VkViewport* pViewports
391 viewports.empty() ? 1u : (deUint32)scissors.size(), // deUint32 scissorCount
392 scissors.empty() ? DE_NULL : &scissors[0] // const VkRect2D* pScissors
393 };
394
395 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfoDefault =
396 {
397 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType
398 DE_NULL, // const void* pNext
399 0u, // VkPipelineRasterizationStateCreateFlags flags
400 VK_FALSE, // VkBool32 depthClampEnable
401 disableRasterization, // VkBool32 rasterizerDiscardEnable
402 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode
403 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode
404 VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace
405 VK_FALSE, // VkBool32 depthBiasEnable
406 0.0f, // float depthBiasConstantFactor
407 0.0f, // float depthBiasClamp
408 0.0f, // float depthBiasSlopeFactor
409 1.0f // float lineWidth
410 };
411
412 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfoDefault =
413 {
414 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
415 DE_NULL, // const void* pNext
416 0u, // VkPipelineMultisampleStateCreateFlags flags
417 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples
418 VK_FALSE, // VkBool32 sampleShadingEnable
419 1.0f, // float minSampleShading
420 DE_NULL, // const VkSampleMask* pSampleMask
421 VK_FALSE, // VkBool32 alphaToCoverageEnable
422 VK_FALSE // VkBool32 alphaToOneEnable
423 };
424
425 const VkStencilOpState stencilOpState =
426 {
427 VK_STENCIL_OP_KEEP, // VkStencilOp failOp
428 VK_STENCIL_OP_KEEP, // VkStencilOp passOp
429 VK_STENCIL_OP_KEEP, // VkStencilOp depthFailOp
430 VK_COMPARE_OP_NEVER, // VkCompareOp compareOp
431 0, // deUint32 compareMask
432 0, // deUint32 writeMask
433 0 // deUint32 reference
434 };
435
436 const VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfoDefault =
437 {
438 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType
439 DE_NULL, // const void* pNext
440 0u, // VkPipelineDepthStencilStateCreateFlags flags
441 VK_FALSE, // VkBool32 depthTestEnable
442 VK_FALSE, // VkBool32 depthWriteEnable
443 VK_COMPARE_OP_LESS_OR_EQUAL, // VkCompareOp depthCompareOp
444 VK_FALSE, // VkBool32 depthBoundsTestEnable
445 VK_FALSE, // VkBool32 stencilTestEnable
446 stencilOpState, // VkStencilOpState front
447 stencilOpState, // VkStencilOpState back
448 0.0f, // float minDepthBounds
449 1.0f, // float maxDepthBounds
450 };
451
452 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
453 {
454 VK_FALSE, // VkBool32 blendEnable
455 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor
456 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor
457 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp
458 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor
459 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor
460 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp
461 VK_COLOR_COMPONENT_R_BIT // VkColorComponentFlags colorWriteMask
462 | VK_COLOR_COMPONENT_G_BIT
463 | VK_COLOR_COMPONENT_B_BIT
464 | VK_COLOR_COMPONENT_A_BIT
465 };
466
467 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfoDefault =
468 {
469 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType
470 DE_NULL, // const void* pNext
471 0u, // VkPipelineColorBlendStateCreateFlags flags
472 VK_FALSE, // VkBool32 logicOpEnable
473 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp
474 1u, // deUint32 attachmentCount
475 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments
476 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConstants[4]
477 };
478
479 std::vector<VkDynamicState> dynamicStates;
480
481 if (viewports.empty())
482 dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
483 if (scissors.empty())
484 dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
485
486 const VkPipelineDynamicStateCreateInfo dynamicStateCreateInfoDefault =
487 {
488 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType sType
489 DE_NULL, // const void* pNext
490 0u, // VkPipelineDynamicStateCreateFlags flags
491 (deUint32)dynamicStates.size(), // deUint32 dynamicStateCount
492 dynamicStates.empty() ? DE_NULL : &dynamicStates[0] // const VkDynamicState* pDynamicStates
493 };
494
495 const VkPipelineDynamicStateCreateInfo* dynamicStateCreateInfoDefaultPtr = dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
496
497 const VkGraphicsPipelineCreateInfo pipelineCreateInfo =
498 {
499 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType
500 DE_NULL, // const void* pNext
501 0u, // VkPipelineCreateFlags flags
502 (deUint32)pipelineShaderStageParams.size(), // deUint32 stageCount
503 &pipelineShaderStageParams[0], // const VkPipelineShaderStageCreateInfo* pStages
504 vertexInputStateCreateInfo ? vertexInputStateCreateInfo : &vertexInputStateCreateInfoDefault, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState
505 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState
506 hasTessellation ? &tessStateCreateInfo : DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState
507 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState
508 rasterizationStateCreateInfo ? rasterizationStateCreateInfo : &rasterizationStateCreateInfoDefault, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState
509 multisampleStateCreateInfo ? multisampleStateCreateInfo: &multisampleStateCreateInfoDefault, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState
510 depthStencilStateCreateInfo ? depthStencilStateCreateInfo : &depthStencilStateCreateInfoDefault, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState
511 colorBlendStateCreateInfo ? colorBlendStateCreateInfo : &colorBlendStateCreateInfoDefault, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState
512 dynamicStateCreateInfo ? dynamicStateCreateInfo : dynamicStateCreateInfoDefaultPtr, // const VkPipelineDynamicStateCreateInfo* pDynamicState
513 pipelineLayout, // VkPipelineLayout layout
514 renderPass, // VkRenderPass renderPass
515 subpass, // deUint32 subpass
516 DE_NULL, // VkPipeline basePipelineHandle
517 0 // deInt32 basePipelineIndex;
518 };
519
520 return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
521 }
522
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const deUint32 vertexShaderStageCreateFlags=0u,const deUint32 tessellationControlShaderStageCreateFlags=0u,const deUint32 tessellationEvalShaderStageCreateFlags=0u,const deUint32 geometryShaderStageCreateFlags=0u,const deUint32 fragmentShaderStageCreateFlags=0u,const deUint32 requiredSubgroupSize[5]=DE_NULL)523 Move<VkPipeline> makeGraphicsPipeline (Context& context,
524 const VkPipelineLayout pipelineLayout,
525 const VkShaderStageFlags stages,
526 const VkShaderModule vertexShaderModule,
527 const VkShaderModule fragmentShaderModule,
528 const VkShaderModule geometryShaderModule,
529 const VkShaderModule tessellationControlModule,
530 const VkShaderModule tessellationEvaluationModule,
531 const VkRenderPass renderPass,
532 const VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
533 const VkVertexInputBindingDescription* vertexInputBindingDescription = DE_NULL,
534 const VkVertexInputAttributeDescription* vertexInputAttributeDescriptions = DE_NULL,
535 const bool frameBufferTests = false,
536 const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
537 const deUint32 vertexShaderStageCreateFlags = 0u,
538 const deUint32 tessellationControlShaderStageCreateFlags = 0u,
539 const deUint32 tessellationEvalShaderStageCreateFlags = 0u,
540 const deUint32 geometryShaderStageCreateFlags = 0u,
541 const deUint32 fragmentShaderStageCreateFlags = 0u,
542 const deUint32 requiredSubgroupSize[5] = DE_NULL)
543 {
544 const std::vector<VkViewport> noViewports;
545 const std::vector<VkRect2D> noScissors;
546 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
547 {
548 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
549 DE_NULL, // const void* pNext;
550 0u, // VkPipelineVertexInputStateCreateFlags flags;
551 vertexInputBindingDescription == DE_NULL ? 0u : 1u, // deUint32 vertexBindingDescriptionCount;
552 vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
553 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u, // deUint32 vertexAttributeDescriptionCount;
554 vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
555 };
556 const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
557 const VkColorComponentFlags colorComponent = numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
558 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
559 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
560 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
561 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
562 {
563 VK_FALSE, // VkBool32 blendEnable;
564 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor;
565 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
566 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp;
567 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor;
568 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor;
569 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp;
570 colorComponent // VkColorComponentFlags colorWriteMask;
571 };
572 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
573 {
574 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
575 DE_NULL, // const void* pNext;
576 0u, // VkPipelineColorBlendStateCreateFlags flags;
577 VK_FALSE, // VkBool32 logicOpEnable;
578 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp;
579 1, // deUint32 attachmentCount;
580 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments;
581 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConstants[4];
582 };
583 const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
584
585 return makeGraphicsPipeline(context.getDeviceInterface(), // const DeviceInterface& vk
586 context.getDevice(), // const VkDevice device
587 pipelineLayout, // const VkPipelineLayout pipelineLayout
588 vertexShaderModule, // const VkShaderModule vertexShaderModule
589 tessellationControlModule, // const VkShaderModule tessellationControlShaderModule
590 tessellationEvaluationModule, // const VkShaderModule tessellationEvalShaderModule
591 geometryShaderModule, // const VkShaderModule geometryShaderModule
592 fragmentShaderModule, // const VkShaderModule fragmentShaderModule
593 renderPass, // const VkRenderPass renderPass
594 noViewports, // const std::vector<VkViewport>& viewports
595 noScissors, // const std::vector<VkRect2D>& scissors
596 topology, // const VkPrimitiveTopology topology
597 0u, // const deUint32 subpass
598 patchControlPoints, // const deUint32 patchControlPoints
599 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
600 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
601 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
602 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
603 &colorBlendStateCreateInfo, // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
604 DE_NULL, // const VkPipelineDynamicStateCreateInfo*
605 vertexShaderStageCreateFlags, // const deUint32 vertexShaderStageCreateFlags,
606 tessellationControlShaderStageCreateFlags, // const deUint32 tessellationControlShaderStageCreateFlags
607 tessellationEvalShaderStageCreateFlags, // const deUint32 tessellationEvalShaderStageCreateFlags
608 geometryShaderStageCreateFlags, // const deUint32 geometryShaderStageCreateFlags
609 fragmentShaderStageCreateFlags, // const deUint32 fragmentShaderStageCreateFlags
610 requiredSubgroupSize); // const deUint32 requiredSubgroupSize[5]
611 }
612
makeCommandBuffer(Context & context,const VkCommandPool commandPool)613 Move<VkCommandBuffer> makeCommandBuffer (Context& context, const VkCommandPool commandPool)
614 {
615 const VkCommandBufferAllocateInfo bufferAllocateParams =
616 {
617 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
618 DE_NULL, // const void* pNext;
619 commandPool, // VkCommandPool commandPool;
620 VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
621 1u, // deUint32 bufferCount;
622 };
623 return allocateCommandBuffer(context.getDeviceInterface(),
624 context.getDevice(), &bufferAllocateParams);
625 }
626
627 struct Buffer;
628 struct Image;
629
630 struct BufferOrImage
631 {
isImage__anon27672efa0111::BufferOrImage632 bool isImage() const
633 {
634 return m_isImage;
635 }
636
getAsBuffer__anon27672efa0111::BufferOrImage637 Buffer* getAsBuffer()
638 {
639 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
640 return reinterpret_cast<Buffer* >(this);
641 }
642
getAsImage__anon27672efa0111::BufferOrImage643 Image* getAsImage()
644 {
645 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
646 return reinterpret_cast<Image*>(this);
647 }
648
getType__anon27672efa0111::BufferOrImage649 virtual VkDescriptorType getType() const
650 {
651 if (m_isImage)
652 {
653 return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
654 }
655 else
656 {
657 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
658 }
659 }
660
getAllocation__anon27672efa0111::BufferOrImage661 Allocation& getAllocation() const
662 {
663 return *m_allocation;
664 }
665
~BufferOrImage__anon27672efa0111::BufferOrImage666 virtual ~BufferOrImage() {}
667
668 protected:
BufferOrImage__anon27672efa0111::BufferOrImage669 explicit BufferOrImage(bool image) : m_isImage(image) {}
670
671 bool m_isImage;
672 de::details::MovePtr<Allocation> m_allocation;
673 };
674
675 struct Buffer : public BufferOrImage
676 {
Buffer__anon27672efa0111::Buffer677 explicit Buffer (Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage)
678 : BufferOrImage (false)
679 , m_sizeInBytes (sizeInBytes)
680 , m_usage (usage)
681 {
682 const DeviceInterface& vkd = context.getDeviceInterface();
683 const VkDevice device = context.getDevice();
684
685 const vk::VkBufferCreateInfo bufferCreateInfo =
686 {
687 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
688 DE_NULL,
689 0u,
690 m_sizeInBytes,
691 m_usage,
692 VK_SHARING_MODE_EXCLUSIVE,
693 0u,
694 DE_NULL,
695 };
696 m_buffer = createBuffer(vkd, device, &bufferCreateInfo);
697
698 VkMemoryRequirements req = getBufferMemoryRequirements(vkd, device, *m_buffer);
699
700 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
701 VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
702 }
703
getType__anon27672efa0111::Buffer704 virtual VkDescriptorType getType() const
705 {
706 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
707 {
708 return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
709 }
710 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
711 }
712
getBuffer__anon27672efa0111::Buffer713 VkBuffer getBuffer () const
714 {
715 return *m_buffer;
716 }
717
getBufferPtr__anon27672efa0111::Buffer718 const VkBuffer* getBufferPtr () const
719 {
720 return &(*m_buffer);
721 }
722
getSize__anon27672efa0111::Buffer723 VkDeviceSize getSize () const
724 {
725 return m_sizeInBytes;
726 }
727
728 private:
729 Move<VkBuffer> m_buffer;
730 VkDeviceSize m_sizeInBytes;
731 const VkBufferUsageFlags m_usage;
732 };
733
734 struct Image : public BufferOrImage
735 {
Image__anon27672efa0111::Image736 explicit Image (Context& context, deUint32 width, deUint32 height, VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
737 : BufferOrImage(true)
738 {
739 const DeviceInterface& vk = context.getDeviceInterface();
740 const VkDevice device = context.getDevice();
741 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
742
743 const VkImageCreateInfo imageCreateInfo =
744 {
745 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
746 DE_NULL, // const void* pNext;
747 0, // VkImageCreateFlags flags;
748 VK_IMAGE_TYPE_2D, // VkImageType imageType;
749 format, // VkFormat format;
750 {width, height, 1}, // VkExtent3D extent;
751 1, // deUint32 mipLevels;
752 1, // deUint32 arrayLayers;
753 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
754 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
755 usage, // VkImageUsageFlags usage;
756 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
757 0u, // deUint32 queueFamilyIndexCount;
758 DE_NULL, // const deUint32* pQueueFamilyIndices;
759 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
760 };
761
762 const VkComponentMapping componentMapping =
763 {
764 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
765 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
766 };
767
768 const VkImageSubresourceRange subresourceRange =
769 {
770 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
771 0u, //deUint32 baseMipLevel
772 1u, //deUint32 levelCount
773 0u, //deUint32 baseArrayLayer
774 1u //deUint32 layerCount
775 };
776
777 const VkSamplerCreateInfo samplerCreateInfo =
778 {
779 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType;
780 DE_NULL, // const void* pNext;
781 0u, // VkSamplerCreateFlags flags;
782 VK_FILTER_NEAREST, // VkFilter magFilter;
783 VK_FILTER_NEAREST, // VkFilter minFilter;
784 VK_SAMPLER_MIPMAP_MODE_NEAREST, // VkSamplerMipmapMode mipmapMode;
785 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeU;
786 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeV;
787 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW;
788 0.0f, // float mipLodBias;
789 VK_FALSE, // VkBool32 anisotropyEnable;
790 1.0f, // float maxAnisotropy;
791 DE_FALSE, // VkBool32 compareEnable;
792 VK_COMPARE_OP_ALWAYS, // VkCompareOp compareOp;
793 0.0f, // float minLod;
794 0.0f, // float maxLod;
795 VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // VkBorderColor borderColor;
796 VK_FALSE, // VkBool32 unnormalizedCoordinates;
797 };
798
799 m_image = createImage(vk, device, &imageCreateInfo);
800
801 VkMemoryRequirements req = getImageMemoryRequirements(vk, device, *m_image);
802
803 req.size *= 2;
804 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
805
806 VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
807
808 const VkImageViewCreateInfo imageViewCreateInfo =
809 {
810 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
811 DE_NULL, // const void* pNext;
812 0, // VkImageViewCreateFlags flags;
813 *m_image, // VkImage image;
814 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
815 imageCreateInfo.format, // VkFormat format;
816 componentMapping, // VkComponentMapping components;
817 subresourceRange // VkImageSubresourceRange subresourceRange;
818 };
819
820 m_imageView = createImageView(vk, device, &imageViewCreateInfo);
821 m_sampler = createSampler(vk, device, &samplerCreateInfo);
822
823 // Transition input image layouts
824 {
825 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
826 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
827
828 beginCommandBuffer(vk, *cmdBuffer);
829
830 const VkImageMemoryBarrier imageBarrier = makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
831 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
832
833 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
834 (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
835
836 endCommandBuffer(vk, *cmdBuffer);
837 submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
838 }
839 }
840
getImage__anon27672efa0111::Image841 VkImage getImage () const
842 {
843 return *m_image;
844 }
845
getImageView__anon27672efa0111::Image846 VkImageView getImageView () const
847 {
848 return *m_imageView;
849 }
850
getSampler__anon27672efa0111::Image851 VkSampler getSampler () const
852 {
853 return *m_sampler;
854 }
855
856 private:
857 Move<VkImage> m_image;
858 Move<VkImageView> m_imageView;
859 Move<VkSampler> m_sampler;
860 };
861 }
862
getStagesCount(const VkShaderStageFlags shaderStages)863 deUint32 vkt::subgroups::getStagesCount (const VkShaderStageFlags shaderStages)
864 {
865 const deUint32 stageCount = isAllGraphicsStages(shaderStages) ? 4
866 : isAllComputeStages(shaderStages) ? 1
867 #ifndef CTS_USES_VULKANSC
868 : isAllRayTracingStages(shaderStages) ? 6
869 : isAllMeshShadingStages(shaderStages) ? 1
870 #endif // CTS_USES_VULKANSC
871 : 0;
872
873 DE_ASSERT(stageCount != 0);
874
875 return stageCount;
876 }
877
getSharedMemoryBallotHelper()878 std::string vkt::subgroups::getSharedMemoryBallotHelper ()
879 {
880 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
881 "uvec4 sharedMemoryBallot(bool vote)\n"
882 "{\n"
883 " uint groupOffset = gl_SubgroupID;\n"
884 " // One invocation in the group 0's the whole group's data\n"
885 " if (subgroupElect())\n"
886 " {\n"
887 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
888 " }\n"
889 " subgroupMemoryBarrierShared();\n"
890 " if (vote)\n"
891 " {\n"
892 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
893 " const highp uint bitToSet = 1u << invocationId;\n"
894 " switch (gl_SubgroupInvocationID / 32)\n"
895 " {\n"
896 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
897 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
898 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
899 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
900 " }\n"
901 " }\n"
902 " subgroupMemoryBarrierShared();\n"
903 " return superSecretComputeShaderHelper[groupOffset];\n"
904 "}\n";
905 }
906
getSharedMemoryBallotHelperARB()907 std::string vkt::subgroups::getSharedMemoryBallotHelperARB ()
908 {
909 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
910 "uint64_t sharedMemoryBallot(bool vote)\n"
911 "{\n"
912 " uint groupOffset = gl_SubgroupID;\n"
913 " // One invocation in the group 0's the whole group's data\n"
914 " if (subgroupElect())\n"
915 " {\n"
916 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
917 " }\n"
918 " subgroupMemoryBarrierShared();\n"
919 " if (vote)\n"
920 " {\n"
921 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
922 " const highp uint bitToSet = 1u << invocationId;\n"
923 " switch (gl_SubgroupInvocationID / 32)\n"
924 " {\n"
925 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
926 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
927 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
928 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
929 " }\n"
930 " }\n"
931 " subgroupMemoryBarrierShared();\n"
932 " return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
933 "}\n";
934 }
935
getSubgroupSize(Context & context)936 deUint32 vkt::subgroups::getSubgroupSize (Context& context)
937 {
938 return context.getSubgroupProperties().subgroupSize;
939 }
940
maxSupportedSubgroupSize()941 deUint32 vkt::subgroups::maxSupportedSubgroupSize ()
942 {
943 return 128u;
944 }
945
getShaderStageName(VkShaderStageFlags stage)946 std::string vkt::subgroups::getShaderStageName (VkShaderStageFlags stage)
947 {
948 switch (stage)
949 {
950 case VK_SHADER_STAGE_COMPUTE_BIT: return "compute";
951 case VK_SHADER_STAGE_FRAGMENT_BIT: return "fragment";
952 case VK_SHADER_STAGE_VERTEX_BIT: return "vertex";
953 case VK_SHADER_STAGE_GEOMETRY_BIT: return "geometry";
954 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return "tess_control";
955 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return "tess_eval";
956 #ifndef CTS_USES_VULKANSC
957 case VK_SHADER_STAGE_RAYGEN_BIT_KHR: return "rgen";
958 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: return "ahit";
959 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: return "chit";
960 case VK_SHADER_STAGE_MISS_BIT_KHR: return "miss";
961 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: return "sect";
962 case VK_SHADER_STAGE_CALLABLE_BIT_KHR: return "call";
963 case VK_SHADER_STAGE_MESH_BIT_EXT: return "mesh";
964 case VK_SHADER_STAGE_TASK_BIT_EXT: return "task";
965 #endif // CTS_USES_VULKANSC
966 default: TCU_THROW(InternalError, "Unhandled stage");
967 }
968 }
969
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)970 std::string vkt::subgroups::getSubgroupFeatureName (vk::VkSubgroupFeatureFlagBits bit)
971 {
972 switch (bit)
973 {
974 case VK_SUBGROUP_FEATURE_BASIC_BIT: return "VK_SUBGROUP_FEATURE_BASIC_BIT";
975 case VK_SUBGROUP_FEATURE_VOTE_BIT: return "VK_SUBGROUP_FEATURE_VOTE_BIT";
976 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT: return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
977 case VK_SUBGROUP_FEATURE_BALLOT_BIT: return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
978 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT: return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
979 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT: return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
980 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT: return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
981 case VK_SUBGROUP_FEATURE_QUAD_BIT: return "VK_SUBGROUP_FEATURE_QUAD_BIT";
982 default: TCU_THROW(InternalError, "Unknown subgroup feature category");
983 }
984 }
985
addNoSubgroupShader(SourceCollections & programCollection)986 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
987 {
988 {
989 /*
990 "#version 450\n"
991 "void main (void)\n"
992 "{\n"
993 " float pixelSize = 2.0f/1024.0f;\n"
994 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
995 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
996 " gl_PointSize = 1.0f;\n"
997 "}\n"
998 */
999 const std::string vertNoSubgroup =
1000 "; SPIR-V\n"
1001 "; Version: 1.3\n"
1002 "; Generator: Khronos Glslang Reference Front End; 1\n"
1003 "; Bound: 37\n"
1004 "; Schema: 0\n"
1005 "OpCapability Shader\n"
1006 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1007 "OpMemoryModel Logical GLSL450\n"
1008 "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
1009 "OpMemberDecorate %20 0 BuiltIn Position\n"
1010 "OpMemberDecorate %20 1 BuiltIn PointSize\n"
1011 "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
1012 "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
1013 "OpDecorate %20 Block\n"
1014 "OpDecorate %26 BuiltIn VertexIndex\n"
1015 "%2 = OpTypeVoid\n"
1016 "%3 = OpTypeFunction %2\n"
1017 "%6 = OpTypeFloat 32\n"
1018 "%7 = OpTypePointer Function %6\n"
1019 "%9 = OpConstant %6 0.00195313\n"
1020 "%12 = OpConstant %6 2\n"
1021 "%14 = OpConstant %6 1\n"
1022 "%16 = OpTypeVector %6 4\n"
1023 "%17 = OpTypeInt 32 0\n"
1024 "%18 = OpConstant %17 1\n"
1025 "%19 = OpTypeArray %6 %18\n"
1026 "%20 = OpTypeStruct %16 %6 %19 %19\n"
1027 "%21 = OpTypePointer Output %20\n"
1028 "%22 = OpVariable %21 Output\n"
1029 "%23 = OpTypeInt 32 1\n"
1030 "%24 = OpConstant %23 0\n"
1031 "%25 = OpTypePointer Input %23\n"
1032 "%26 = OpVariable %25 Input\n"
1033 "%33 = OpConstant %6 0\n"
1034 "%35 = OpTypePointer Output %16\n"
1035 "%37 = OpConstant %23 1\n"
1036 "%38 = OpTypePointer Output %6\n"
1037 "%4 = OpFunction %2 None %3\n"
1038 "%5 = OpLabel\n"
1039 "%8 = OpVariable %7 Function\n"
1040 "%10 = OpVariable %7 Function\n"
1041 "OpStore %8 %9\n"
1042 "%11 = OpLoad %6 %8\n"
1043 "%13 = OpFDiv %6 %11 %12\n"
1044 "%15 = OpFSub %6 %13 %14\n"
1045 "OpStore %10 %15\n"
1046 "%27 = OpLoad %23 %26\n"
1047 "%28 = OpConvertSToF %6 %27\n"
1048 "%29 = OpLoad %6 %8\n"
1049 "%30 = OpFMul %6 %28 %29\n"
1050 "%31 = OpLoad %6 %10\n"
1051 "%32 = OpFAdd %6 %30 %31\n"
1052 "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1053 "%36 = OpAccessChain %35 %22 %24\n"
1054 "OpStore %36 %34\n"
1055 "%39 = OpAccessChain %38 %22 %37\n"
1056 "OpStore %39 %14\n"
1057 "OpReturn\n"
1058 "OpFunctionEnd\n";
1059 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1060 }
1061
1062 {
1063 /*
1064 "#version 450\n"
1065 "layout(vertices=1) out;\n"
1066 "\n"
1067 "void main (void)\n"
1068 "{\n"
1069 " if (gl_InvocationID == 0)\n"
1070 " {\n"
1071 " gl_TessLevelOuter[0] = 1.0f;\n"
1072 " gl_TessLevelOuter[1] = 1.0f;\n"
1073 " }\n"
1074 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1075 "}\n"
1076 */
1077 const std::string tescNoSubgroup =
1078 "; SPIR-V\n"
1079 "; Version: 1.3\n"
1080 "; Generator: Khronos Glslang Reference Front End; 1\n"
1081 "; Bound: 45\n"
1082 "; Schema: 0\n"
1083 "OpCapability Tessellation\n"
1084 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1085 "OpMemoryModel Logical GLSL450\n"
1086 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1087 "OpExecutionMode %4 OutputVertices 1\n"
1088 "OpDecorate %8 BuiltIn InvocationId\n"
1089 "OpDecorate %20 Patch\n"
1090 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1091 "OpMemberDecorate %29 0 BuiltIn Position\n"
1092 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1093 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1094 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1095 "OpDecorate %29 Block\n"
1096 "OpMemberDecorate %34 0 BuiltIn Position\n"
1097 "OpMemberDecorate %34 1 BuiltIn PointSize\n"
1098 "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1099 "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1100 "OpDecorate %34 Block\n"
1101 "%2 = OpTypeVoid\n"
1102 "%3 = OpTypeFunction %2\n"
1103 "%6 = OpTypeInt 32 1\n"
1104 "%7 = OpTypePointer Input %6\n"
1105 "%8 = OpVariable %7 Input\n"
1106 "%10 = OpConstant %6 0\n"
1107 "%11 = OpTypeBool\n"
1108 "%15 = OpTypeFloat 32\n"
1109 "%16 = OpTypeInt 32 0\n"
1110 "%17 = OpConstant %16 4\n"
1111 "%18 = OpTypeArray %15 %17\n"
1112 "%19 = OpTypePointer Output %18\n"
1113 "%20 = OpVariable %19 Output\n"
1114 "%21 = OpConstant %15 1\n"
1115 "%22 = OpTypePointer Output %15\n"
1116 "%24 = OpConstant %6 1\n"
1117 "%26 = OpTypeVector %15 4\n"
1118 "%27 = OpConstant %16 1\n"
1119 "%28 = OpTypeArray %15 %27\n"
1120 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1121 "%30 = OpTypeArray %29 %27\n"
1122 "%31 = OpTypePointer Output %30\n"
1123 "%32 = OpVariable %31 Output\n"
1124 "%34 = OpTypeStruct %26 %15 %28 %28\n"
1125 "%35 = OpConstant %16 32\n"
1126 "%36 = OpTypeArray %34 %35\n"
1127 "%37 = OpTypePointer Input %36\n"
1128 "%38 = OpVariable %37 Input\n"
1129 "%40 = OpTypePointer Input %26\n"
1130 "%43 = OpTypePointer Output %26\n"
1131 "%4 = OpFunction %2 None %3\n"
1132 "%5 = OpLabel\n"
1133 "%9 = OpLoad %6 %8\n"
1134 "%12 = OpIEqual %11 %9 %10\n"
1135 "OpSelectionMerge %14 None\n"
1136 "OpBranchConditional %12 %13 %14\n"
1137 "%13 = OpLabel\n"
1138 "%23 = OpAccessChain %22 %20 %10\n"
1139 "OpStore %23 %21\n"
1140 "%25 = OpAccessChain %22 %20 %24\n"
1141 "OpStore %25 %21\n"
1142 "OpBranch %14\n"
1143 "%14 = OpLabel\n"
1144 "%33 = OpLoad %6 %8\n"
1145 "%39 = OpLoad %6 %8\n"
1146 "%41 = OpAccessChain %40 %38 %39 %10\n"
1147 "%42 = OpLoad %26 %41\n"
1148 "%44 = OpAccessChain %43 %32 %33 %10\n"
1149 "OpStore %44 %42\n"
1150 "OpReturn\n"
1151 "OpFunctionEnd\n";
1152 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1153 }
1154
1155 {
1156 /*
1157 "#version 450\n"
1158 "layout(isolines) in;\n"
1159 "\n"
1160 "void main (void)\n"
1161 "{\n"
1162 " float pixelSize = 2.0f/1024.0f;\n"
1163 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1164 "}\n";
1165 */
1166 const std::string teseNoSubgroup =
1167 "; SPIR-V\n"
1168 "; Version: 1.3\n"
1169 "; Generator: Khronos Glslang Reference Front End; 2\n"
1170 "; Bound: 42\n"
1171 "; Schema: 0\n"
1172 "OpCapability Tessellation\n"
1173 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1174 "OpMemoryModel Logical GLSL450\n"
1175 "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1176 "OpExecutionMode %4 Isolines\n"
1177 "OpExecutionMode %4 SpacingEqual\n"
1178 "OpExecutionMode %4 VertexOrderCcw\n"
1179 "OpMemberDecorate %14 0 BuiltIn Position\n"
1180 "OpMemberDecorate %14 1 BuiltIn PointSize\n"
1181 "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1182 "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1183 "OpDecorate %14 Block\n"
1184 "OpMemberDecorate %19 0 BuiltIn Position\n"
1185 "OpMemberDecorate %19 1 BuiltIn PointSize\n"
1186 "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1187 "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1188 "OpDecorate %19 Block\n"
1189 "OpDecorate %29 BuiltIn TessCoord\n"
1190 "%2 = OpTypeVoid\n"
1191 "%3 = OpTypeFunction %2\n"
1192 "%6 = OpTypeFloat 32\n"
1193 "%7 = OpTypePointer Function %6\n"
1194 "%9 = OpConstant %6 0.00195313\n"
1195 "%10 = OpTypeVector %6 4\n"
1196 "%11 = OpTypeInt 32 0\n"
1197 "%12 = OpConstant %11 1\n"
1198 "%13 = OpTypeArray %6 %12\n"
1199 "%14 = OpTypeStruct %10 %6 %13 %13\n"
1200 "%15 = OpTypePointer Output %14\n"
1201 "%16 = OpVariable %15 Output\n"
1202 "%17 = OpTypeInt 32 1\n"
1203 "%18 = OpConstant %17 0\n"
1204 "%19 = OpTypeStruct %10 %6 %13 %13\n"
1205 "%20 = OpConstant %11 32\n"
1206 "%21 = OpTypeArray %19 %20\n"
1207 "%22 = OpTypePointer Input %21\n"
1208 "%23 = OpVariable %22 Input\n"
1209 "%24 = OpTypePointer Input %10\n"
1210 "%27 = OpTypeVector %6 3\n"
1211 "%28 = OpTypePointer Input %27\n"
1212 "%29 = OpVariable %28 Input\n"
1213 "%30 = OpConstant %11 0\n"
1214 "%31 = OpTypePointer Input %6\n"
1215 "%36 = OpConstant %6 2\n"
1216 "%40 = OpTypePointer Output %10\n"
1217 "%4 = OpFunction %2 None %3\n"
1218 "%5 = OpLabel\n"
1219 "%8 = OpVariable %7 Function\n"
1220 "OpStore %8 %9\n"
1221 "%25 = OpAccessChain %24 %23 %18 %18\n"
1222 "%26 = OpLoad %10 %25\n"
1223 "%32 = OpAccessChain %31 %29 %30\n"
1224 "%33 = OpLoad %6 %32\n"
1225 "%34 = OpLoad %6 %8\n"
1226 "%35 = OpFMul %6 %33 %34\n"
1227 "%37 = OpFDiv %6 %35 %36\n"
1228 "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1229 "%39 = OpFAdd %10 %26 %38\n"
1230 "%41 = OpAccessChain %40 %16 %18\n"
1231 "OpStore %41 %39\n"
1232 "OpReturn\n"
1233 "OpFunctionEnd\n";
1234 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1235 }
1236
1237 }
1238
getFramebufferBufferDeclarations(const VkFormat & format,const std::vector<std::string> & declarations,const deUint32 stage)1239 static std::string getFramebufferBufferDeclarations (const VkFormat& format,
1240 const std::vector<std::string>& declarations,
1241 const deUint32 stage)
1242 {
1243 if (declarations.empty())
1244 {
1245 const std::string name = (stage == 0) ? "result" : "out_color";
1246 const std::string suffix = (stage == 2) ? "[]" : "";
1247 const std::string result =
1248 "layout(location = 0) out float " + name + suffix + ";\n"
1249 "layout(set = 0, binding = 0) uniform Buffer1\n"
1250 "{\n"
1251 " " + de::toString(subgroups::getFormatNameForGLSL(format)) + " data[" + de::toString(subgroups::maxSupportedSubgroupSize()) + "];\n"
1252 "};\n";
1253
1254 return result;
1255 }
1256 else
1257 {
1258 return declarations[stage];
1259 }
1260 }
1261
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations)1262 void vkt::subgroups::initStdFrameBufferPrograms (SourceCollections& programCollection,
1263 const vk::ShaderBuildOptions& buildOptions,
1264 VkShaderStageFlags shaderStage,
1265 VkFormat format,
1266 bool gsPointSize,
1267 const std::string& extHeader,
1268 const std::string& testSrc,
1269 const std::string& helperStr,
1270 const std::vector<std::string>& declarations)
1271 {
1272 subgroups::setFragmentShaderFrameBuffer(programCollection);
1273
1274 if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1275 subgroups::setVertexShaderFrameBuffer(programCollection);
1276
1277 if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1278 {
1279 std::ostringstream vertex;
1280
1281 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1282 << extHeader
1283 << "layout(location = 0) in highp vec4 in_position;\n"
1284 << getFramebufferBufferDeclarations(format, declarations, 0)
1285 << "\n"
1286 << helperStr
1287 << "void main (void)\n"
1288 << "{\n"
1289 << " uint tempRes;\n"
1290 << testSrc
1291 << " result = float(tempRes);\n"
1292 << " gl_Position = in_position;\n"
1293 << " gl_PointSize = 1.0f;\n"
1294 << "}\n";
1295
1296 programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1297 }
1298 else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1299 {
1300 std::ostringstream geometry;
1301
1302 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1303 << extHeader
1304 << "layout(points) in;\n"
1305 << "layout(points, max_vertices = 1) out;\n"
1306 << getFramebufferBufferDeclarations(format, declarations, 1)
1307 << "\n"
1308 << helperStr
1309 << "void main (void)\n"
1310 << "{\n"
1311 << " uint tempRes;\n"
1312 << testSrc
1313 << " out_color = float(tempRes);\n"
1314 << " gl_Position = gl_in[0].gl_Position;\n"
1315 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1316 << " EmitVertex();\n"
1317 << " EndPrimitive();\n"
1318 << "}\n";
1319
1320 programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1321 }
1322 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1323 {
1324 std::ostringstream controlSource;
1325
1326 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1327 << extHeader
1328 << "layout(vertices = 2) out;\n"
1329 << getFramebufferBufferDeclarations(format, declarations, 2)
1330 << "\n"
1331 << helperStr
1332 << "void main (void)\n"
1333 << "{\n"
1334 << " if (gl_InvocationID == 0)\n"
1335 << " {\n"
1336 << " gl_TessLevelOuter[0] = 1.0f;\n"
1337 << " gl_TessLevelOuter[1] = 1.0f;\n"
1338 << " }\n"
1339 << " uint tempRes;\n"
1340 << testSrc
1341 << " out_color[gl_InvocationID] = float(tempRes);\n"
1342 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1343 << (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "")
1344 << "}\n";
1345
1346 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1347 subgroups::setTesEvalShaderFrameBuffer(programCollection);
1348 }
1349 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1350 {
1351 ostringstream evaluationSource;
1352
1353 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1354 << extHeader
1355 << "layout(isolines, equal_spacing, ccw ) in;\n"
1356 << getFramebufferBufferDeclarations(format, declarations, 3)
1357 << "\n"
1358 << helperStr
1359 << "void main (void)\n"
1360 << "{\n"
1361 << " uint tempRes;\n"
1362 << testSrc
1363 << " out_color = float(tempRes);\n"
1364 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1365 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1366 << "}\n";
1367
1368 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1369 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1370 }
1371 else
1372 {
1373 DE_FATAL("Unsupported shader stage");
1374 }
1375 }
1376
getBufferDeclarations(vk::VkShaderStageFlags shaderStage,const std::string & formatName,const std::vector<std::string> & declarations,const deUint32 stage)1377 static std::string getBufferDeclarations (vk::VkShaderStageFlags shaderStage,
1378 const std::string& formatName,
1379 const std::vector<std::string>& declarations,
1380 const deUint32 stage)
1381 {
1382 if (declarations.empty())
1383 {
1384 const deUint32 stageCount = vkt::subgroups::getStagesCount(shaderStage);
1385 const deUint32 binding0 = stage;
1386 const deUint32 binding1 = stageCount;
1387 const bool fragment = (shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) && (stage == stageCount);
1388 const string buffer1 = fragment
1389 ? "layout(location = 0) out uint result;\n"
1390 : "layout(set = 0, binding = " + de::toString(binding0) + ", std430) buffer Buffer1\n"
1391 "{\n"
1392 " uint result[];\n"
1393 "};\n";
1394 //todo boza I suppose it can be "layout(set = 0, binding = " + de::toString(binding1) + ", std430) readonly buffer Buffer2\n"
1395 const string buffer2 = "layout(set = 0, binding = " + de::toString(binding1) + ", std430)" + (stageCount == 1 ? "" : " readonly") + " buffer Buffer" + (fragment ? "1" : "2") + "\n"
1396 "{\n"
1397 " " + formatName + " data[];\n"
1398 "};\n";
1399
1400 return buffer1 + buffer2;
1401 }
1402 else
1403 {
1404 return declarations[stage];
1405 }
1406 }
1407
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations,const bool avoidHelperInvocations,const std::string & tempRes)1408 void vkt::subgroups::initStdPrograms (vk::SourceCollections& programCollection,
1409 const vk::ShaderBuildOptions& buildOptions,
1410 vk::VkShaderStageFlags shaderStage,
1411 vk::VkFormat format,
1412 bool gsPointSize,
1413 const std::string& extHeader,
1414 const std::string& testSrc,
1415 const std::string& helperStr,
1416 const std::vector<std::string>& declarations,
1417 const bool avoidHelperInvocations,
1418 const std::string& tempRes)
1419 {
1420 const std::string formatName = subgroups::getFormatNameForGLSL(format);
1421
1422 if (isAllComputeStages(shaderStage))
1423 {
1424 std::ostringstream src;
1425
1426 src << "#version 450\n"
1427 << extHeader
1428 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
1429 "local_size_z_id = 2) in;\n"
1430 << getBufferDeclarations(shaderStage, formatName, declarations, 0)
1431 << "\n"
1432 << helperStr
1433 << "void main (void)\n"
1434 << "{\n"
1435 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1436 << " highp uint offset = globalSize.x * ((globalSize.y * "
1437 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1438 "gl_GlobalInvocationID.x;\n"
1439 << tempRes
1440 << testSrc
1441 << " result[offset] = tempRes;\n"
1442 << "}\n";
1443
1444 programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1445 }
1446 #ifndef CTS_USES_VULKANSC
1447 else if (isAllMeshShadingStages(shaderStage))
1448 {
1449 const bool testMesh = ((shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
1450 const bool testTask = ((shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
1451
1452 if (testMesh)
1453 {
1454 std::ostringstream mesh;
1455
1456 mesh
1457 << "#version 450\n"
1458 << "#extension GL_EXT_mesh_shader : enable\n"
1459 << extHeader
1460 << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1461 << "layout (points) out;\n"
1462 << "layout (max_vertices = 1, max_primitives = 1) out;\n"
1463 << getBufferDeclarations(shaderStage, formatName, declarations, 0)
1464 << "\n"
1465 << helperStr
1466 << "void main (void)\n"
1467 << "{\n"
1468 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1469 << " highp uint offset = globalSize.x * ((globalSize.y * "
1470 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1471 "gl_GlobalInvocationID.x;\n"
1472 << tempRes
1473 << testSrc
1474 << " result[offset] = tempRes;\n"
1475 << " SetMeshOutputsEXT(0u, 0u);\n"
1476 << "}\n";
1477
1478 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1479 }
1480 else
1481 {
1482 const std::string meshShaderNoSubgroups =
1483 "#version 450\n"
1484 "#extension GL_EXT_mesh_shader : enable\n"
1485 "\n"
1486 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
1487 "layout (points) out;\n"
1488 "layout (max_vertices = 1, max_primitives = 1) out;\n"
1489 "\n"
1490 "void main (void)\n"
1491 "{\n"
1492 " SetMeshOutputsEXT(0u, 0u);\n"
1493 "}\n"
1494 ;
1495 programCollection.glslSources.add("mesh") << glu::MeshSource(meshShaderNoSubgroups) << buildOptions;
1496 }
1497
1498 if (testTask)
1499 {
1500 const tcu::UVec3 emitSize = (testMesh ? tcu::UVec3(1u, 1u, 1u) : tcu::UVec3(0u, 0u, 0u));
1501 std::ostringstream task;
1502
1503 task
1504 << "#version 450\n"
1505 << "#extension GL_EXT_mesh_shader : enable\n"
1506 //<< "#extension GL_NV_mesh_shader : enable\n"
1507 << extHeader
1508 << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1509 << getBufferDeclarations(shaderStage, formatName, declarations, 0)
1510 << "\n"
1511 << helperStr
1512 << "void main (void)\n"
1513 << "{\n"
1514 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1515 //<< " uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1516 << " highp uint offset = globalSize.x * ((globalSize.y * "
1517 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1518 "gl_GlobalInvocationID.x;\n"
1519 << tempRes
1520 << testSrc
1521 << " result[offset] = tempRes;\n"
1522 << " EmitMeshTasksEXT(" << emitSize.x() << ", " << emitSize.y() << ", " << emitSize.z() << ");\n"
1523 //<< " gl_TaskCountNV = " << emitSize.x() << ";\n"
1524 << "}\n";
1525
1526 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1527 }
1528 }
1529 #endif // CTS_USES_VULKANSC
1530 else if (isAllGraphicsStages(shaderStage))
1531 {
1532 const string vertex =
1533 "#version 450\n"
1534 + extHeader
1535 + getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1536 "\n"
1537 + helperStr +
1538 "void main (void)\n"
1539 "{\n"
1540 " uint tempRes;\n"
1541 + testSrc +
1542 " result[gl_VertexIndex] = tempRes;\n"
1543 " float pixelSize = 2.0f/1024.0f;\n"
1544 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1545 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1546 " gl_PointSize = 1.0f;\n"
1547 "}\n";
1548
1549 const string tesc =
1550 "#version 450\n"
1551 + extHeader +
1552 "layout(vertices=1) out;\n"
1553 + getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1554 "\n"
1555 + helperStr +
1556 "void main (void)\n"
1557 "{\n"
1558 + tempRes
1559 + testSrc +
1560 " result[gl_PrimitiveID] = tempRes;\n"
1561 " if (gl_InvocationID == 0)\n"
1562 " {\n"
1563 " gl_TessLevelOuter[0] = 1.0f;\n"
1564 " gl_TessLevelOuter[1] = 1.0f;\n"
1565 " }\n"
1566 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1567 + (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1568 "}\n";
1569
1570 const string tese =
1571 "#version 450\n"
1572 + extHeader +
1573 "layout(isolines) in;\n"
1574 + getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1575 "\n"
1576 + helperStr +
1577 "void main (void)\n"
1578 "{\n"
1579 + tempRes
1580 + testSrc +
1581 " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1582 " float pixelSize = 2.0f/1024.0f;\n"
1583 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1584 + (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1585 "}\n";
1586
1587 const string geometry =
1588 "#version 450\n"
1589 + extHeader +
1590 "layout(${TOPOLOGY}) in;\n"
1591 "layout(points, max_vertices = 1) out;\n"
1592 + getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1593 "\n"
1594 + helperStr +
1595 "void main (void)\n"
1596 "{\n"
1597 + tempRes
1598 + testSrc +
1599 " result[gl_PrimitiveIDIn] = tempRes;\n"
1600 " gl_Position = gl_in[0].gl_Position;\n"
1601 + (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1602 " EmitVertex();\n"
1603 " EndPrimitive();\n"
1604 "}\n";
1605
1606 const string fragment =
1607 "#version 450\n"
1608 + extHeader
1609 + getBufferDeclarations(shaderStage, formatName, declarations, 4)
1610 + helperStr +
1611 "void main (void)\n"
1612 "{\n"
1613 + (avoidHelperInvocations ? " if (gl_HelperInvocation) return;\n" : "")
1614 + tempRes
1615 + testSrc +
1616 " result = tempRes;\n"
1617 "}\n";
1618
1619 subgroups::addNoSubgroupShader(programCollection);
1620
1621 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1622 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1623 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1624 subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1625 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1626 }
1627 #ifndef CTS_USES_VULKANSC
1628 else if (isAllRayTracingStages(shaderStage))
1629 {
1630 const std::string rgenShader =
1631 "#version 460 core\n"
1632 "#extension GL_EXT_ray_tracing: require\n"
1633 + extHeader +
1634 "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
1635 "layout(location = 0) callableDataEXT uvec4 callData;"
1636 "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
1637 + getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1638 "\n"
1639 + helperStr +
1640 "void main()\n"
1641 "{\n"
1642 + tempRes
1643 + testSrc +
1644 " uint rayFlags = 0;\n"
1645 " uint cullMask = 0xFF;\n"
1646 " float tmin = 0.0;\n"
1647 " float tmax = 9.0;\n"
1648 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
1649 " vec3 directHit = vec3(0.0, 0.0, -1.0);\n"
1650 " vec3 directMiss = vec3(0.0, 0.0, +1.0);\n"
1651 "\n"
1652 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
1653 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
1654 " executeCallableEXT(0, 0);"
1655 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1656 "}\n";
1657 const std::string ahitShader =
1658 "#version 460 core\n"
1659 "#extension GL_EXT_ray_tracing: require\n"
1660 + extHeader +
1661 "hitAttributeEXT vec3 attribs;\n"
1662 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1663 + getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1664 "\n"
1665 + helperStr +
1666 "void main()\n"
1667 "{\n"
1668 + tempRes
1669 + testSrc +
1670 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1671 "}\n";
1672 const std::string chitShader =
1673 "#version 460 core\n"
1674 "#extension GL_EXT_ray_tracing: require\n"
1675 + extHeader +
1676 "hitAttributeEXT vec3 attribs;\n"
1677 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1678 + getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1679 "\n"
1680 + helperStr +
1681 "void main()\n"
1682 "{\n"
1683 + tempRes
1684 + testSrc +
1685 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1686 "}\n";
1687 const std::string missShader =
1688 "#version 460 core\n"
1689 "#extension GL_EXT_ray_tracing: require\n"
1690 + extHeader +
1691 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1692 + getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1693 "\n"
1694 + helperStr +
1695 "void main()\n"
1696 "{\n"
1697 + tempRes
1698 + testSrc +
1699 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1700 "}\n";
1701 const std::string sectShader =
1702 "#version 460 core\n"
1703 "#extension GL_EXT_ray_tracing: require\n"
1704 + extHeader +
1705 "hitAttributeEXT vec3 hitAttribute;\n"
1706 + getBufferDeclarations(shaderStage, formatName, declarations, 4) +
1707 "\n"
1708 + helperStr +
1709 "void main()\n"
1710 "{\n"
1711 + tempRes
1712 + testSrc +
1713 " reportIntersectionEXT(0.75f, 0x7Eu);\n"
1714 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1715 "}\n";
1716 const std::string callShader =
1717 "#version 460 core\n"
1718 "#extension GL_EXT_ray_tracing: require\n"
1719 + extHeader +
1720 "layout(location = 0) callableDataInEXT float callData;\n"
1721 + getBufferDeclarations(shaderStage, formatName, declarations, 5) +
1722 "\n"
1723 + helperStr +
1724 "void main()\n"
1725 "{\n"
1726 + tempRes
1727 + testSrc +
1728 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1729 "}\n";
1730
1731 programCollection.glslSources.add("rgen") << glu::RaygenSource (rgenShader) << buildOptions;
1732 programCollection.glslSources.add("ahit") << glu::AnyHitSource (ahitShader) << buildOptions;
1733 programCollection.glslSources.add("chit") << glu::ClosestHitSource (chitShader) << buildOptions;
1734 programCollection.glslSources.add("miss") << glu::MissSource (missShader) << buildOptions;
1735 programCollection.glslSources.add("sect") << glu::IntersectionSource(sectShader) << buildOptions;
1736 programCollection.glslSources.add("call") << glu::CallableSource (callShader) << buildOptions;
1737
1738 subgroups::addRayTracingNoSubgroupShader(programCollection);
1739 }
1740 #endif // CTS_USES_VULKANSC
1741 else
1742 TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1743
1744 }
1745
isSubgroupSupported(Context & context)1746 bool vkt::subgroups::isSubgroupSupported (Context& context)
1747 {
1748 return context.contextSupports(vk::ApiVersion(0, 1, 1, 0));
1749 }
1750
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1751 bool vkt::subgroups::areSubgroupOperationsSupportedForStage (Context& context, const VkShaderStageFlags stage)
1752 {
1753 return (stage & (context.getSubgroupProperties().supportedStages)) ? true : false;
1754 }
1755
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1756 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice (Context& context, VkSubgroupFeatureFlagBits bit)
1757 {
1758 return (bit & (context.getSubgroupProperties().supportedOperations)) ? true : false;
1759 }
1760
areQuadOperationsSupportedForStages(Context & context,const VkShaderStageFlags stages)1761 bool vkt::subgroups::areQuadOperationsSupportedForStages (Context& context, const VkShaderStageFlags stages)
1762 {
1763 // Check general quad feature support first.
1764 if (!isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_QUAD_BIT))
1765 return false;
1766
1767 if (context.getSubgroupProperties().quadOperationsInAllStages == VK_TRUE)
1768 return true; // No problem, any stage works.
1769
1770 // Only frag and compute are supported.
1771 const VkShaderStageFlags fragCompute = (VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
1772 const VkShaderStageFlags otherStages = ~fragCompute;
1773 return ((stages & otherStages) == 0u);
1774 }
1775
isFragmentSSBOSupportedForDevice(Context & context)1776 bool vkt::subgroups::isFragmentSSBOSupportedForDevice (Context& context)
1777 {
1778 return context.getDeviceFeatures().fragmentStoresAndAtomics ? true : false;
1779 }
1780
isVertexSSBOSupportedForDevice(Context & context)1781 bool vkt::subgroups::isVertexSSBOSupportedForDevice (Context& context)
1782 {
1783 return context.getDeviceFeatures().vertexPipelineStoresAndAtomics ? true : false;
1784 }
1785
isInt64SupportedForDevice(Context & context)1786 bool vkt::subgroups::isInt64SupportedForDevice (Context& context)
1787 {
1788 return context.getDeviceFeatures().shaderInt64 ? true : false;
1789 }
1790
isTessellationAndGeometryPointSizeSupported(Context & context)1791 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1792 {
1793 return context.getDeviceFeatures().shaderTessellationAndGeometryPointSize ? true : false;
1794 }
1795
is16BitUBOStorageSupported(Context & context)1796 bool vkt::subgroups::is16BitUBOStorageSupported (Context& context)
1797 {
1798 return context.get16BitStorageFeatures().uniformAndStorageBuffer16BitAccess ? true : false;
1799 }
1800
is8BitUBOStorageSupported(Context & context)1801 bool vkt::subgroups::is8BitUBOStorageSupported (Context& context)
1802 {
1803 return context.get8BitStorageFeatures().uniformAndStorageBuffer8BitAccess ? true : false;
1804 }
1805
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1806 bool vkt::subgroups::isFormatSupportedForDevice (Context& context, vk::VkFormat format)
1807 {
1808 const VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures& subgroupExtendedTypesFeatures = context.getShaderSubgroupExtendedTypesFeatures();
1809 const VkPhysicalDeviceShaderFloat16Int8Features& float16Int8Features = context.getShaderFloat16Int8Features();
1810 const VkPhysicalDevice16BitStorageFeatures& storage16bit = context.get16BitStorageFeatures();
1811 const VkPhysicalDevice8BitStorageFeatures& storage8bit = context.get8BitStorageFeatures();
1812 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1813 bool shaderFloat64 = features.shaderFloat64 ? true : false;
1814 bool shaderInt16 = features.shaderInt16 ? true : false;
1815 bool shaderInt64 = features.shaderInt64 ? true : false;
1816 bool shaderSubgroupExtendedTypes = false;
1817 bool shaderFloat16 = false;
1818 bool shaderInt8 = false;
1819 bool storageBuffer16BitAccess = false;
1820 bool storageBuffer8BitAccess = false;
1821
1822 if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1823 context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1824 {
1825 shaderSubgroupExtendedTypes = subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes ? true : false;
1826 shaderFloat16 = float16Int8Features.shaderFloat16 ? true : false;
1827 shaderInt8 = float16Int8Features.shaderInt8 ? true : false;
1828
1829 if ( context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage") )
1830 storageBuffer16BitAccess = storage16bit.storageBuffer16BitAccess ? true : false;
1831
1832 if (context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage"))
1833 storageBuffer8BitAccess = storage8bit.storageBuffer8BitAccess ? true : false;
1834 }
1835
1836 switch (format)
1837 {
1838 default:
1839 return true;
1840 case VK_FORMAT_R16_SFLOAT:
1841 case VK_FORMAT_R16G16_SFLOAT:
1842 case VK_FORMAT_R16G16B16_SFLOAT:
1843 case VK_FORMAT_R16G16B16A16_SFLOAT:
1844 return shaderSubgroupExtendedTypes && shaderFloat16 && storageBuffer16BitAccess;
1845 case VK_FORMAT_R64_SFLOAT:
1846 case VK_FORMAT_R64G64_SFLOAT:
1847 case VK_FORMAT_R64G64B64_SFLOAT:
1848 case VK_FORMAT_R64G64B64A64_SFLOAT:
1849 return shaderFloat64;
1850 case VK_FORMAT_R8_SINT:
1851 case VK_FORMAT_R8G8_SINT:
1852 case VK_FORMAT_R8G8B8_SINT:
1853 case VK_FORMAT_R8G8B8A8_SINT:
1854 case VK_FORMAT_R8_UINT:
1855 case VK_FORMAT_R8G8_UINT:
1856 case VK_FORMAT_R8G8B8_UINT:
1857 case VK_FORMAT_R8G8B8A8_UINT:
1858 return shaderSubgroupExtendedTypes && shaderInt8 && storageBuffer8BitAccess;
1859 case VK_FORMAT_R16_SINT:
1860 case VK_FORMAT_R16G16_SINT:
1861 case VK_FORMAT_R16G16B16_SINT:
1862 case VK_FORMAT_R16G16B16A16_SINT:
1863 case VK_FORMAT_R16_UINT:
1864 case VK_FORMAT_R16G16_UINT:
1865 case VK_FORMAT_R16G16B16_UINT:
1866 case VK_FORMAT_R16G16B16A16_UINT:
1867 return shaderSubgroupExtendedTypes && shaderInt16 && storageBuffer16BitAccess;
1868 case VK_FORMAT_R64_SINT:
1869 case VK_FORMAT_R64G64_SINT:
1870 case VK_FORMAT_R64G64B64_SINT:
1871 case VK_FORMAT_R64G64B64A64_SINT:
1872 case VK_FORMAT_R64_UINT:
1873 case VK_FORMAT_R64G64_UINT:
1874 case VK_FORMAT_R64G64B64_UINT:
1875 case VK_FORMAT_R64G64B64A64_UINT:
1876 return shaderSubgroupExtendedTypes && shaderInt64;
1877 }
1878 }
1879
isSubgroupBroadcastDynamicIdSupported(Context & context)1880 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported (Context& context)
1881 {
1882 return context.contextSupports(vk::ApiVersion(0, 1, 2, 0)) &&
1883 vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice()).subgroupBroadcastDynamicId;
1884 }
1885
getFormatNameForGLSL(VkFormat format)1886 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1887 {
1888 switch (format)
1889 {
1890 case VK_FORMAT_R8_SINT: return "int8_t";
1891 case VK_FORMAT_R8G8_SINT: return "i8vec2";
1892 case VK_FORMAT_R8G8B8_SINT: return "i8vec3";
1893 case VK_FORMAT_R8G8B8A8_SINT: return "i8vec4";
1894 case VK_FORMAT_R8_UINT: return "uint8_t";
1895 case VK_FORMAT_R8G8_UINT: return "u8vec2";
1896 case VK_FORMAT_R8G8B8_UINT: return "u8vec3";
1897 case VK_FORMAT_R8G8B8A8_UINT: return "u8vec4";
1898 case VK_FORMAT_R16_SINT: return "int16_t";
1899 case VK_FORMAT_R16G16_SINT: return "i16vec2";
1900 case VK_FORMAT_R16G16B16_SINT: return "i16vec3";
1901 case VK_FORMAT_R16G16B16A16_SINT: return "i16vec4";
1902 case VK_FORMAT_R16_UINT: return "uint16_t";
1903 case VK_FORMAT_R16G16_UINT: return "u16vec2";
1904 case VK_FORMAT_R16G16B16_UINT: return "u16vec3";
1905 case VK_FORMAT_R16G16B16A16_UINT: return "u16vec4";
1906 case VK_FORMAT_R32_SINT: return "int";
1907 case VK_FORMAT_R32G32_SINT: return "ivec2";
1908 case VK_FORMAT_R32G32B32_SINT: return "ivec3";
1909 case VK_FORMAT_R32G32B32A32_SINT: return "ivec4";
1910 case VK_FORMAT_R32_UINT: return "uint";
1911 case VK_FORMAT_R32G32_UINT: return "uvec2";
1912 case VK_FORMAT_R32G32B32_UINT: return "uvec3";
1913 case VK_FORMAT_R32G32B32A32_UINT: return "uvec4";
1914 case VK_FORMAT_R64_SINT: return "int64_t";
1915 case VK_FORMAT_R64G64_SINT: return "i64vec2";
1916 case VK_FORMAT_R64G64B64_SINT: return "i64vec3";
1917 case VK_FORMAT_R64G64B64A64_SINT: return "i64vec4";
1918 case VK_FORMAT_R64_UINT: return "uint64_t";
1919 case VK_FORMAT_R64G64_UINT: return "u64vec2";
1920 case VK_FORMAT_R64G64B64_UINT: return "u64vec3";
1921 case VK_FORMAT_R64G64B64A64_UINT: return "u64vec4";
1922 case VK_FORMAT_R16_SFLOAT: return "float16_t";
1923 case VK_FORMAT_R16G16_SFLOAT: return "f16vec2";
1924 case VK_FORMAT_R16G16B16_SFLOAT: return "f16vec3";
1925 case VK_FORMAT_R16G16B16A16_SFLOAT: return "f16vec4";
1926 case VK_FORMAT_R32_SFLOAT: return "float";
1927 case VK_FORMAT_R32G32_SFLOAT: return "vec2";
1928 case VK_FORMAT_R32G32B32_SFLOAT: return "vec3";
1929 case VK_FORMAT_R32G32B32A32_SFLOAT: return "vec4";
1930 case VK_FORMAT_R64_SFLOAT: return "double";
1931 case VK_FORMAT_R64G64_SFLOAT: return "dvec2";
1932 case VK_FORMAT_R64G64B64_SFLOAT: return "dvec3";
1933 case VK_FORMAT_R64G64B64A64_SFLOAT: return "dvec4";
1934 case VK_FORMAT_R8_USCALED: return "bool";
1935 case VK_FORMAT_R8G8_USCALED: return "bvec2";
1936 case VK_FORMAT_R8G8B8_USCALED: return "bvec3";
1937 case VK_FORMAT_R8G8B8A8_USCALED: return "bvec4";
1938 default: TCU_THROW(InternalError, "Unhandled format");
1939 }
1940 }
1941
getAdditionalExtensionForFormat(vk::VkFormat format)1942 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1943 {
1944 switch (format)
1945 {
1946 default:
1947 return "";
1948 case VK_FORMAT_R8_SINT:
1949 case VK_FORMAT_R8G8_SINT:
1950 case VK_FORMAT_R8G8B8_SINT:
1951 case VK_FORMAT_R8G8B8A8_SINT:
1952 case VK_FORMAT_R8_UINT:
1953 case VK_FORMAT_R8G8_UINT:
1954 case VK_FORMAT_R8G8B8_UINT:
1955 case VK_FORMAT_R8G8B8A8_UINT:
1956 return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1957 case VK_FORMAT_R16_SINT:
1958 case VK_FORMAT_R16G16_SINT:
1959 case VK_FORMAT_R16G16B16_SINT:
1960 case VK_FORMAT_R16G16B16A16_SINT:
1961 case VK_FORMAT_R16_UINT:
1962 case VK_FORMAT_R16G16_UINT:
1963 case VK_FORMAT_R16G16B16_UINT:
1964 case VK_FORMAT_R16G16B16A16_UINT:
1965 return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1966 case VK_FORMAT_R64_SINT:
1967 case VK_FORMAT_R64G64_SINT:
1968 case VK_FORMAT_R64G64B64_SINT:
1969 case VK_FORMAT_R64G64B64A64_SINT:
1970 case VK_FORMAT_R64_UINT:
1971 case VK_FORMAT_R64G64_UINT:
1972 case VK_FORMAT_R64G64B64_UINT:
1973 case VK_FORMAT_R64G64B64A64_UINT:
1974 return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1975 case VK_FORMAT_R16_SFLOAT:
1976 case VK_FORMAT_R16G16_SFLOAT:
1977 case VK_FORMAT_R16G16B16_SFLOAT:
1978 case VK_FORMAT_R16G16B16A16_SFLOAT:
1979 return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1980 }
1981 }
1982
getAllFormats()1983 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats ()
1984 {
1985 std::vector<VkFormat> formats;
1986
1987 formats.push_back(VK_FORMAT_R8_SINT);
1988 formats.push_back(VK_FORMAT_R8G8_SINT);
1989 formats.push_back(VK_FORMAT_R8G8B8_SINT);
1990 formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1991 formats.push_back(VK_FORMAT_R8_UINT);
1992 formats.push_back(VK_FORMAT_R8G8_UINT);
1993 formats.push_back(VK_FORMAT_R8G8B8_UINT);
1994 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1995 formats.push_back(VK_FORMAT_R16_SINT);
1996 formats.push_back(VK_FORMAT_R16G16_SINT);
1997 formats.push_back(VK_FORMAT_R16G16B16_SINT);
1998 formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1999 formats.push_back(VK_FORMAT_R16_UINT);
2000 formats.push_back(VK_FORMAT_R16G16_UINT);
2001 formats.push_back(VK_FORMAT_R16G16B16_UINT);
2002 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
2003 formats.push_back(VK_FORMAT_R32_SINT);
2004 formats.push_back(VK_FORMAT_R32G32_SINT);
2005 formats.push_back(VK_FORMAT_R32G32B32_SINT);
2006 formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
2007 formats.push_back(VK_FORMAT_R32_UINT);
2008 formats.push_back(VK_FORMAT_R32G32_UINT);
2009 formats.push_back(VK_FORMAT_R32G32B32_UINT);
2010 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
2011 formats.push_back(VK_FORMAT_R64_SINT);
2012 formats.push_back(VK_FORMAT_R64G64_SINT);
2013 formats.push_back(VK_FORMAT_R64G64B64_SINT);
2014 formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
2015 formats.push_back(VK_FORMAT_R64_UINT);
2016 formats.push_back(VK_FORMAT_R64G64_UINT);
2017 formats.push_back(VK_FORMAT_R64G64B64_UINT);
2018 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
2019 formats.push_back(VK_FORMAT_R16_SFLOAT);
2020 formats.push_back(VK_FORMAT_R16G16_SFLOAT);
2021 formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
2022 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
2023 formats.push_back(VK_FORMAT_R32_SFLOAT);
2024 formats.push_back(VK_FORMAT_R32G32_SFLOAT);
2025 formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
2026 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
2027 formats.push_back(VK_FORMAT_R64_SFLOAT);
2028 formats.push_back(VK_FORMAT_R64G64_SFLOAT);
2029 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
2030 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
2031 formats.push_back(VK_FORMAT_R8_USCALED);
2032 formats.push_back(VK_FORMAT_R8G8_USCALED);
2033 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
2034 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
2035
2036 return formats;
2037 }
2038
isFormatSigned(VkFormat format)2039 bool vkt::subgroups::isFormatSigned (VkFormat format)
2040 {
2041 switch (format)
2042 {
2043 default:
2044 return false;
2045 case VK_FORMAT_R8_SINT:
2046 case VK_FORMAT_R8G8_SINT:
2047 case VK_FORMAT_R8G8B8_SINT:
2048 case VK_FORMAT_R8G8B8A8_SINT:
2049 case VK_FORMAT_R16_SINT:
2050 case VK_FORMAT_R16G16_SINT:
2051 case VK_FORMAT_R16G16B16_SINT:
2052 case VK_FORMAT_R16G16B16A16_SINT:
2053 case VK_FORMAT_R32_SINT:
2054 case VK_FORMAT_R32G32_SINT:
2055 case VK_FORMAT_R32G32B32_SINT:
2056 case VK_FORMAT_R32G32B32A32_SINT:
2057 case VK_FORMAT_R64_SINT:
2058 case VK_FORMAT_R64G64_SINT:
2059 case VK_FORMAT_R64G64B64_SINT:
2060 case VK_FORMAT_R64G64B64A64_SINT:
2061 return true;
2062 }
2063 }
2064
isFormatUnsigned(VkFormat format)2065 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
2066 {
2067 switch (format)
2068 {
2069 default:
2070 return false;
2071 case VK_FORMAT_R8_UINT:
2072 case VK_FORMAT_R8G8_UINT:
2073 case VK_FORMAT_R8G8B8_UINT:
2074 case VK_FORMAT_R8G8B8A8_UINT:
2075 case VK_FORMAT_R16_UINT:
2076 case VK_FORMAT_R16G16_UINT:
2077 case VK_FORMAT_R16G16B16_UINT:
2078 case VK_FORMAT_R16G16B16A16_UINT:
2079 case VK_FORMAT_R32_UINT:
2080 case VK_FORMAT_R32G32_UINT:
2081 case VK_FORMAT_R32G32B32_UINT:
2082 case VK_FORMAT_R32G32B32A32_UINT:
2083 case VK_FORMAT_R64_UINT:
2084 case VK_FORMAT_R64G64_UINT:
2085 case VK_FORMAT_R64G64B64_UINT:
2086 case VK_FORMAT_R64G64B64A64_UINT:
2087 return true;
2088 }
2089 }
2090
isFormatFloat(VkFormat format)2091 bool vkt::subgroups::isFormatFloat (VkFormat format)
2092 {
2093 switch (format)
2094 {
2095 default:
2096 return false;
2097 case VK_FORMAT_R16_SFLOAT:
2098 case VK_FORMAT_R16G16_SFLOAT:
2099 case VK_FORMAT_R16G16B16_SFLOAT:
2100 case VK_FORMAT_R16G16B16A16_SFLOAT:
2101 case VK_FORMAT_R32_SFLOAT:
2102 case VK_FORMAT_R32G32_SFLOAT:
2103 case VK_FORMAT_R32G32B32_SFLOAT:
2104 case VK_FORMAT_R32G32B32A32_SFLOAT:
2105 case VK_FORMAT_R64_SFLOAT:
2106 case VK_FORMAT_R64G64_SFLOAT:
2107 case VK_FORMAT_R64G64B64_SFLOAT:
2108 case VK_FORMAT_R64G64B64A64_SFLOAT:
2109 return true;
2110 }
2111 }
2112
isFormatBool(VkFormat format)2113 bool vkt::subgroups::isFormatBool (VkFormat format)
2114 {
2115 switch (format)
2116 {
2117 default:
2118 return false;
2119 case VK_FORMAT_R8_USCALED:
2120 case VK_FORMAT_R8G8_USCALED:
2121 case VK_FORMAT_R8G8B8_USCALED:
2122 case VK_FORMAT_R8G8B8A8_USCALED:
2123 return true;
2124 }
2125 }
2126
isFormat8bitTy(VkFormat format)2127 bool vkt::subgroups::isFormat8bitTy (VkFormat format)
2128 {
2129 switch (format)
2130 {
2131 default:
2132 return false;
2133 case VK_FORMAT_R8_SINT:
2134 case VK_FORMAT_R8G8_SINT:
2135 case VK_FORMAT_R8G8B8_SINT:
2136 case VK_FORMAT_R8G8B8A8_SINT:
2137 case VK_FORMAT_R8_UINT:
2138 case VK_FORMAT_R8G8_UINT:
2139 case VK_FORMAT_R8G8B8_UINT:
2140 case VK_FORMAT_R8G8B8A8_UINT:
2141 return true;
2142 }
2143 }
2144
isFormat16BitTy(VkFormat format)2145 bool vkt::subgroups::isFormat16BitTy (VkFormat format)
2146 {
2147 switch (format)
2148 {
2149 default:
2150 return false;
2151 case VK_FORMAT_R16_SFLOAT:
2152 case VK_FORMAT_R16G16_SFLOAT:
2153 case VK_FORMAT_R16G16B16_SFLOAT:
2154 case VK_FORMAT_R16G16B16A16_SFLOAT:
2155 case VK_FORMAT_R16_SINT:
2156 case VK_FORMAT_R16G16_SINT:
2157 case VK_FORMAT_R16G16B16_SINT:
2158 case VK_FORMAT_R16G16B16A16_SINT:
2159 case VK_FORMAT_R16_UINT:
2160 case VK_FORMAT_R16G16_UINT:
2161 case VK_FORMAT_R16G16B16_UINT:
2162 case VK_FORMAT_R16G16B16A16_UINT:
2163 return true;
2164 }
2165 }
2166
setVertexShaderFrameBuffer(SourceCollections & programCollection)2167 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
2168 {
2169 /*
2170 "layout(location = 0) in highp vec4 in_position;\n"
2171 "void main (void)\n"
2172 "{\n"
2173 " gl_Position = in_position;\n"
2174 " gl_PointSize = 1.0f;\n"
2175 "}\n";
2176 */
2177 programCollection.spirvAsmSources.add("vert") <<
2178 "; SPIR-V\n"
2179 "; Version: 1.3\n"
2180 "; Generator: Khronos Glslang Reference Front End; 7\n"
2181 "; Bound: 25\n"
2182 "; Schema: 0\n"
2183 "OpCapability Shader\n"
2184 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2185 "OpMemoryModel Logical GLSL450\n"
2186 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2187 "OpMemberDecorate %11 0 BuiltIn Position\n"
2188 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2189 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2190 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2191 "OpDecorate %11 Block\n"
2192 "OpDecorate %17 Location 0\n"
2193 "%2 = OpTypeVoid\n"
2194 "%3 = OpTypeFunction %2\n"
2195 "%6 = OpTypeFloat 32\n"
2196 "%7 = OpTypeVector %6 4\n"
2197 "%8 = OpTypeInt 32 0\n"
2198 "%9 = OpConstant %8 1\n"
2199 "%10 = OpTypeArray %6 %9\n"
2200 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2201 "%12 = OpTypePointer Output %11\n"
2202 "%13 = OpVariable %12 Output\n"
2203 "%14 = OpTypeInt 32 1\n"
2204 "%15 = OpConstant %14 0\n"
2205 "%16 = OpTypePointer Input %7\n"
2206 "%17 = OpVariable %16 Input\n"
2207 "%19 = OpTypePointer Output %7\n"
2208 "%21 = OpConstant %14 1\n"
2209 "%22 = OpConstant %6 1\n"
2210 "%23 = OpTypePointer Output %6\n"
2211 "%4 = OpFunction %2 None %3\n"
2212 "%5 = OpLabel\n"
2213 "%18 = OpLoad %7 %17\n"
2214 "%20 = OpAccessChain %19 %13 %15\n"
2215 "OpStore %20 %18\n"
2216 "%24 = OpAccessChain %23 %13 %21\n"
2217 "OpStore %24 %22\n"
2218 "OpReturn\n"
2219 "OpFunctionEnd\n";
2220 }
2221
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2222 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
2223 {
2224 /*
2225 "layout(location = 0) in float in_color;\n"
2226 "layout(location = 0) out uint out_color;\n"
2227 "void main()\n"
2228 {\n"
2229 " out_color = uint(in_color);\n"
2230 "}\n";
2231 */
2232 programCollection.spirvAsmSources.add("fragment") <<
2233 "; SPIR-V\n"
2234 "; Version: 1.3\n"
2235 "; Generator: Khronos Glslang Reference Front End; 2\n"
2236 "; Bound: 14\n"
2237 "; Schema: 0\n"
2238 "OpCapability Shader\n"
2239 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2240 "OpMemoryModel Logical GLSL450\n"
2241 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2242 "OpExecutionMode %4 OriginUpperLeft\n"
2243 "OpDecorate %8 Location 0\n"
2244 "OpDecorate %11 Location 0\n"
2245 "%2 = OpTypeVoid\n"
2246 "%3 = OpTypeFunction %2\n"
2247 "%6 = OpTypeInt 32 0\n"
2248 "%7 = OpTypePointer Output %6\n"
2249 "%8 = OpVariable %7 Output\n"
2250 "%9 = OpTypeFloat 32\n"
2251 "%10 = OpTypePointer Input %9\n"
2252 "%11 = OpVariable %10 Input\n"
2253 "%4 = OpFunction %2 None %3\n"
2254 "%5 = OpLabel\n"
2255 "%12 = OpLoad %9 %11\n"
2256 "%13 = OpConvertFToU %6 %12\n"
2257 "OpStore %8 %13\n"
2258 "OpReturn\n"
2259 "OpFunctionEnd\n";
2260 }
2261
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2262 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
2263 {
2264 /*
2265 "#extension GL_KHR_shader_subgroup_basic: enable\n"
2266 "#extension GL_EXT_tessellation_shader : require\n"
2267 "layout(vertices = 2) out;\n"
2268 "void main (void)\n"
2269 "{\n"
2270 " if (gl_InvocationID == 0)\n"
2271 " {\n"
2272 " gl_TessLevelOuter[0] = 1.0f;\n"
2273 " gl_TessLevelOuter[1] = 1.0f;\n"
2274 " }\n"
2275 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2276 "}\n";
2277 */
2278 programCollection.spirvAsmSources.add("tesc") <<
2279 "; SPIR-V\n"
2280 "; Version: 1.3\n"
2281 "; Generator: Khronos Glslang Reference Front End; 2\n"
2282 "; Bound: 46\n"
2283 "; Schema: 0\n"
2284 "OpCapability Tessellation\n"
2285 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2286 "OpMemoryModel Logical GLSL450\n"
2287 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2288 "OpExecutionMode %4 OutputVertices 2\n"
2289 "OpDecorate %8 BuiltIn InvocationId\n"
2290 "OpDecorate %20 Patch\n"
2291 "OpDecorate %20 BuiltIn TessLevelOuter\n"
2292 "OpMemberDecorate %29 0 BuiltIn Position\n"
2293 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
2294 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2295 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2296 "OpDecorate %29 Block\n"
2297 "OpMemberDecorate %35 0 BuiltIn Position\n"
2298 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
2299 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2300 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2301 "OpDecorate %35 Block\n"
2302 "%2 = OpTypeVoid\n"
2303 "%3 = OpTypeFunction %2\n"
2304 "%6 = OpTypeInt 32 1\n"
2305 "%7 = OpTypePointer Input %6\n"
2306 "%8 = OpVariable %7 Input\n"
2307 "%10 = OpConstant %6 0\n"
2308 "%11 = OpTypeBool\n"
2309 "%15 = OpTypeFloat 32\n"
2310 "%16 = OpTypeInt 32 0\n"
2311 "%17 = OpConstant %16 4\n"
2312 "%18 = OpTypeArray %15 %17\n"
2313 "%19 = OpTypePointer Output %18\n"
2314 "%20 = OpVariable %19 Output\n"
2315 "%21 = OpConstant %15 1\n"
2316 "%22 = OpTypePointer Output %15\n"
2317 "%24 = OpConstant %6 1\n"
2318 "%26 = OpTypeVector %15 4\n"
2319 "%27 = OpConstant %16 1\n"
2320 "%28 = OpTypeArray %15 %27\n"
2321 "%29 = OpTypeStruct %26 %15 %28 %28\n"
2322 "%30 = OpConstant %16 2\n"
2323 "%31 = OpTypeArray %29 %30\n"
2324 "%32 = OpTypePointer Output %31\n"
2325 "%33 = OpVariable %32 Output\n"
2326 "%35 = OpTypeStruct %26 %15 %28 %28\n"
2327 "%36 = OpConstant %16 32\n"
2328 "%37 = OpTypeArray %35 %36\n"
2329 "%38 = OpTypePointer Input %37\n"
2330 "%39 = OpVariable %38 Input\n"
2331 "%41 = OpTypePointer Input %26\n"
2332 "%44 = OpTypePointer Output %26\n"
2333 "%4 = OpFunction %2 None %3\n"
2334 "%5 = OpLabel\n"
2335 "%9 = OpLoad %6 %8\n"
2336 "%12 = OpIEqual %11 %9 %10\n"
2337 "OpSelectionMerge %14 None\n"
2338 "OpBranchConditional %12 %13 %14\n"
2339 "%13 = OpLabel\n"
2340 "%23 = OpAccessChain %22 %20 %10\n"
2341 "OpStore %23 %21\n"
2342 "%25 = OpAccessChain %22 %20 %24\n"
2343 "OpStore %25 %21\n"
2344 "OpBranch %14\n"
2345 "%14 = OpLabel\n"
2346 "%34 = OpLoad %6 %8\n"
2347 "%40 = OpLoad %6 %8\n"
2348 "%42 = OpAccessChain %41 %39 %40 %10\n"
2349 "%43 = OpLoad %26 %42\n"
2350 "%45 = OpAccessChain %44 %33 %34 %10\n"
2351 "OpStore %45 %43\n"
2352 "OpReturn\n"
2353 "OpFunctionEnd\n";
2354 }
2355
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2356 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
2357 {
2358 /*
2359 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
2360 "#extension GL_EXT_tessellation_shader : require\n"
2361 "layout(isolines, equal_spacing, ccw ) in;\n"
2362 "layout(location = 0) in float in_color[];\n"
2363 "layout(location = 0) out float out_color;\n"
2364 "\n"
2365 "void main (void)\n"
2366 "{\n"
2367 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2368 " out_color = in_color[0];\n"
2369 "}\n";
2370 */
2371 programCollection.spirvAsmSources.add("tese") <<
2372 "; SPIR-V\n"
2373 "; Version: 1.3\n"
2374 "; Generator: Khronos Glslang Reference Front End; 2\n"
2375 "; Bound: 45\n"
2376 "; Schema: 0\n"
2377 "OpCapability Tessellation\n"
2378 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2379 "OpMemoryModel Logical GLSL450\n"
2380 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2381 "OpExecutionMode %4 Isolines\n"
2382 "OpExecutionMode %4 SpacingEqual\n"
2383 "OpExecutionMode %4 VertexOrderCcw\n"
2384 "OpMemberDecorate %11 0 BuiltIn Position\n"
2385 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2386 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2387 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2388 "OpDecorate %11 Block\n"
2389 "OpMemberDecorate %16 0 BuiltIn Position\n"
2390 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
2391 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2392 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2393 "OpDecorate %16 Block\n"
2394 "OpDecorate %29 BuiltIn TessCoord\n"
2395 "OpDecorate %39 Location 0\n"
2396 "OpDecorate %42 Location 0\n"
2397 "%2 = OpTypeVoid\n"
2398 "%3 = OpTypeFunction %2\n"
2399 "%6 = OpTypeFloat 32\n"
2400 "%7 = OpTypeVector %6 4\n"
2401 "%8 = OpTypeInt 32 0\n"
2402 "%9 = OpConstant %8 1\n"
2403 "%10 = OpTypeArray %6 %9\n"
2404 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2405 "%12 = OpTypePointer Output %11\n"
2406 "%13 = OpVariable %12 Output\n"
2407 "%14 = OpTypeInt 32 1\n"
2408 "%15 = OpConstant %14 0\n"
2409 "%16 = OpTypeStruct %7 %6 %10 %10\n"
2410 "%17 = OpConstant %8 32\n"
2411 "%18 = OpTypeArray %16 %17\n"
2412 "%19 = OpTypePointer Input %18\n"
2413 "%20 = OpVariable %19 Input\n"
2414 "%21 = OpTypePointer Input %7\n"
2415 "%24 = OpConstant %14 1\n"
2416 "%27 = OpTypeVector %6 3\n"
2417 "%28 = OpTypePointer Input %27\n"
2418 "%29 = OpVariable %28 Input\n"
2419 "%30 = OpConstant %8 0\n"
2420 "%31 = OpTypePointer Input %6\n"
2421 "%36 = OpTypePointer Output %7\n"
2422 "%38 = OpTypePointer Output %6\n"
2423 "%39 = OpVariable %38 Output\n"
2424 "%40 = OpTypeArray %6 %17\n"
2425 "%41 = OpTypePointer Input %40\n"
2426 "%42 = OpVariable %41 Input\n"
2427 "%4 = OpFunction %2 None %3\n"
2428 "%5 = OpLabel\n"
2429 "%22 = OpAccessChain %21 %20 %15 %15\n"
2430 "%23 = OpLoad %7 %22\n"
2431 "%25 = OpAccessChain %21 %20 %24 %15\n"
2432 "%26 = OpLoad %7 %25\n"
2433 "%32 = OpAccessChain %31 %29 %30\n"
2434 "%33 = OpLoad %6 %32\n"
2435 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2436 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2437 "%37 = OpAccessChain %36 %13 %15\n"
2438 "OpStore %37 %35\n"
2439 "%43 = OpAccessChain %31 %42 %15\n"
2440 "%44 = OpLoad %6 %43\n"
2441 "OpStore %39 %44\n"
2442 "OpReturn\n"
2443 "OpFunctionEnd\n";
2444 }
2445
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2446 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options, vk::GlslSourceCollection& collection)
2447 {
2448 tcu::StringTemplate geometryTemplate(glslTemplate);
2449
2450 map<string, string> linesParams;
2451 linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2452
2453 map<string, string> pointsParams;
2454 pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2455
2456 collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
2457 collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
2458 }
2459
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2460 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
2461 {
2462 tcu::StringTemplate geometryTemplate(spirvTemplate);
2463
2464 map<string, string> linesParams;
2465 linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2466
2467 map<string, string> pointsParams;
2468 pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2469
2470 collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
2471 collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
2472 }
2473
initializeMemory(Context & context,const Allocation & alloc,const subgroups::SSBOData & data)2474 void initializeMemory (Context& context, const Allocation& alloc, const subgroups::SSBOData& data)
2475 {
2476 const vk::VkFormat format = data.format;
2477 const vk::VkDeviceSize size = data.numElements *
2478 (data.isImage() ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2479 if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2480 {
2481 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2482
2483 switch (format)
2484 {
2485 default:
2486 DE_FATAL("Illegal buffer format");
2487 break;
2488 case VK_FORMAT_R8_SINT:
2489 case VK_FORMAT_R8G8_SINT:
2490 case VK_FORMAT_R8G8B8_SINT:
2491 case VK_FORMAT_R8G8B8A8_SINT:
2492 case VK_FORMAT_R8_UINT:
2493 case VK_FORMAT_R8G8_UINT:
2494 case VK_FORMAT_R8G8B8_UINT:
2495 case VK_FORMAT_R8G8B8A8_UINT:
2496 {
2497 deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2498
2499 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2500 {
2501 ptr[k] = rnd.getUint8();
2502 }
2503 }
2504 break;
2505 case VK_FORMAT_R16_SINT:
2506 case VK_FORMAT_R16G16_SINT:
2507 case VK_FORMAT_R16G16B16_SINT:
2508 case VK_FORMAT_R16G16B16A16_SINT:
2509 case VK_FORMAT_R16_UINT:
2510 case VK_FORMAT_R16G16_UINT:
2511 case VK_FORMAT_R16G16B16_UINT:
2512 case VK_FORMAT_R16G16B16A16_UINT:
2513 {
2514 deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2515
2516 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2517 {
2518 ptr[k] = rnd.getUint16();
2519 }
2520 }
2521 break;
2522 case VK_FORMAT_R8_USCALED:
2523 case VK_FORMAT_R8G8_USCALED:
2524 case VK_FORMAT_R8G8B8_USCALED:
2525 case VK_FORMAT_R8G8B8A8_USCALED:
2526 {
2527 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2528
2529 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2530 {
2531 deUint32 r = rnd.getUint32();
2532 ptr[k] = (r & 1) ? r : 0;
2533 }
2534 }
2535 break;
2536 case VK_FORMAT_R32_SINT:
2537 case VK_FORMAT_R32G32_SINT:
2538 case VK_FORMAT_R32G32B32_SINT:
2539 case VK_FORMAT_R32G32B32A32_SINT:
2540 case VK_FORMAT_R32_UINT:
2541 case VK_FORMAT_R32G32_UINT:
2542 case VK_FORMAT_R32G32B32_UINT:
2543 case VK_FORMAT_R32G32B32A32_UINT:
2544 {
2545 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2546
2547 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2548 {
2549 ptr[k] = rnd.getUint32();
2550 }
2551 }
2552 break;
2553 case VK_FORMAT_R64_SINT:
2554 case VK_FORMAT_R64G64_SINT:
2555 case VK_FORMAT_R64G64B64_SINT:
2556 case VK_FORMAT_R64G64B64A64_SINT:
2557 case VK_FORMAT_R64_UINT:
2558 case VK_FORMAT_R64G64_UINT:
2559 case VK_FORMAT_R64G64B64_UINT:
2560 case VK_FORMAT_R64G64B64A64_UINT:
2561 {
2562 deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2563
2564 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2565 {
2566 ptr[k] = rnd.getUint64();
2567 }
2568 }
2569 break;
2570 case VK_FORMAT_R16_SFLOAT:
2571 case VK_FORMAT_R16G16_SFLOAT:
2572 case VK_FORMAT_R16G16B16_SFLOAT:
2573 case VK_FORMAT_R16G16B16A16_SFLOAT:
2574 {
2575 deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2576
2577 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2578 {
2579 ptr[k] = deFloat32To16(rnd.getFloat());
2580 }
2581 }
2582 break;
2583 case VK_FORMAT_R32_SFLOAT:
2584 case VK_FORMAT_R32G32_SFLOAT:
2585 case VK_FORMAT_R32G32B32_SFLOAT:
2586 case VK_FORMAT_R32G32B32A32_SFLOAT:
2587 {
2588 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2589
2590 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2591 {
2592 ptr[k] = rnd.getFloat();
2593 }
2594 }
2595 break;
2596 case VK_FORMAT_R64_SFLOAT:
2597 case VK_FORMAT_R64G64_SFLOAT:
2598 case VK_FORMAT_R64G64B64_SFLOAT:
2599 case VK_FORMAT_R64G64B64A64_SFLOAT:
2600 {
2601 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2602
2603 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2604 {
2605 ptr[k] = rnd.getDouble();
2606 }
2607 }
2608 break;
2609 }
2610 }
2611 else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2612 {
2613 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2614
2615 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2616 {
2617 ptr[k] = 0;
2618 }
2619 }
2620
2621 if (subgroups::SSBOData::InitializeNone != data.initializeType)
2622 {
2623 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2624 }
2625 }
2626
getResultBinding(const VkShaderStageFlagBits shaderStage)2627 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2628 {
2629 switch(shaderStage)
2630 {
2631 case VK_SHADER_STAGE_VERTEX_BIT:
2632 return 0u;
2633 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2634 return 1u;
2635 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2636 return 2u;
2637 case VK_SHADER_STAGE_GEOMETRY_BIT:
2638 return 3u;
2639 default:
2640 DE_ASSERT(0);
2641 return -1;
2642 }
2643 DE_ASSERT(0);
2644 return -1;
2645 }
2646
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage)2647 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (Context& context,
2648 VkFormat format,
2649 const SSBOData* extraData,
2650 deUint32 extraDataCount,
2651 const void* internalData,
2652 subgroups::CheckResult checkResult,
2653 const VkShaderStageFlags shaderStage)
2654 {
2655 return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2656 }
2657
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage,const deUint32 tessShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2658 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize (Context& context,
2659 VkFormat format,
2660 const SSBOData* extraData,
2661 deUint32 extraDataCount,
2662 const void* internalData,
2663 subgroups::CheckResult checkResult,
2664 const VkShaderStageFlags shaderStage,
2665 const deUint32 tessShaderStageCreateFlags,
2666 const deUint32 requiredSubgroupSize)
2667 {
2668 const DeviceInterface& vk = context.getDeviceInterface();
2669 const VkDevice device = context.getDevice();
2670 const deUint32 maxWidth = getMaxWidth();
2671 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2672 DescriptorSetLayoutBuilder layoutBuilder;
2673 DescriptorPoolBuilder poolBuilder;
2674 DescriptorSetUpdateBuilder updateBuilder;
2675 Move <VkDescriptorPool> descriptorPool;
2676 Move <VkDescriptorSet> descriptorSet;
2677 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2678 const Unique<VkShaderModule> teCtrlShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("tesc"), 0u));
2679 const Unique<VkShaderModule> teEvalShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("tese"), 0u));
2680 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2681 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2682 const VkVertexInputBindingDescription vertexInputBinding =
2683 {
2684 0u, // deUint32 binding;
2685 static_cast<deUint32>(sizeof(tcu::Vec4)), // deUint32 stride;
2686 VK_VERTEX_INPUT_RATE_VERTEX // VkVertexInputRate inputRate;
2687 };
2688 const VkVertexInputAttributeDescription vertexInputAttribute =
2689 {
2690 0u, // deUint32 location;
2691 0u, // deUint32 binding;
2692 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
2693 0u // deUint32 offset;
2694 };
2695
2696 for (deUint32 i = 0u; i < extraDataCount; i++)
2697 {
2698 if (extraData[i].isImage())
2699 {
2700 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2701 }
2702 else
2703 {
2704 DE_ASSERT(extraData[i].isUBO());
2705 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2706 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2707 }
2708 const Allocation& alloc = inputBuffers[i]->getAllocation();
2709 initializeMemory(context, alloc, extraData[i]);
2710 }
2711
2712 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2713 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2714
2715 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2716
2717 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2718
2719 const deUint32 requiredSubgroupSizes[5] = {0u,
2720 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2721 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u),
2722 0u,
2723 0u};
2724
2725 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2726 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2727 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2728 *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2729 *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2730 0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2731 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u),
2732 0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2733
2734 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2735 poolBuilder.addType(inputBuffers[ndx]->getType());
2736
2737 if (extraDataCount > 0)
2738 {
2739 descriptorPool = poolBuilder.build(vk, device,
2740 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2741 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2742 }
2743
2744 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2745 {
2746 if (inputBuffers[buffersNdx]->isImage())
2747 {
2748 VkDescriptorImageInfo info =
2749 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2750 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2751
2752 updateBuilder.writeSingle(*descriptorSet,
2753 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2754 inputBuffers[buffersNdx]->getType(), &info);
2755 }
2756 else
2757 {
2758 VkDescriptorBufferInfo info =
2759 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2760 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2761
2762 updateBuilder.writeSingle(*descriptorSet,
2763 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2764 inputBuffers[buffersNdx]->getType(), &info);
2765 }
2766 }
2767
2768 updateBuilder.update(vk, device);
2769
2770 const VkQueue queue = context.getUniversalQueue();
2771 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2772 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
2773 const deUint32 subgroupSize = getSubgroupSize(context);
2774 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2775 const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
2776 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2777 unsigned totalIterations = 0u;
2778 unsigned failedIterations = 0u;
2779 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2780
2781 {
2782 const Allocation& alloc = vertexBuffer.getAllocation();
2783 std::vector<tcu::Vec4> data (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2784 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2785 float leftHandPosition = -1.0f;
2786
2787 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2788 {
2789 data[ndx][0] = leftHandPosition;
2790 leftHandPosition += pixelSize;
2791 data[ndx+1][0] = leftHandPosition;
2792 }
2793
2794 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2795 flushAlloc(vk, device, alloc);
2796 }
2797
2798 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2799 const VkViewport viewport = makeViewport(maxWidth, 1u);
2800 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2801 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2802 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2803 const VkDeviceSize vertexBufferOffset = 0u;
2804
2805 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2806 {
2807 totalIterations++;
2808
2809 beginCommandBuffer(vk, *cmdBuffer);
2810 {
2811
2812 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2813 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2814
2815 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2816
2817 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2818
2819 if (extraDataCount > 0)
2820 {
2821 vk.cmdBindDescriptorSets(*cmdBuffer,
2822 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2823 &descriptorSet.get(), 0u, DE_NULL);
2824 }
2825
2826 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2827 vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2828
2829 endRenderPass(vk, *cmdBuffer);
2830
2831 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2832 endCommandBuffer(vk, *cmdBuffer);
2833
2834 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2835 }
2836 context.resetCommandPoolForVKSC(device, *cmdPool);
2837
2838 {
2839 const Allocation& allocResult = imageBufferResult.getAllocation();
2840 invalidateAlloc(vk, device, allocResult);
2841
2842 std::vector<const void*> datas;
2843 datas.push_back(allocResult.getHostPtr());
2844 if (!checkResult(internalData, datas, width/2u, subgroupSize))
2845 failedIterations++;
2846 }
2847 }
2848
2849 if (0 < failedIterations)
2850 {
2851 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2852
2853 context.getTestContext().getLog()
2854 << TestLog::Message << valuesPassed << " / "
2855 << totalIterations << " values passed" << TestLog::EndMessage;
2856 return tcu::TestStatus::fail("Failed!");
2857 }
2858
2859 return tcu::TestStatus::pass("OK");
2860 }
2861
check(std::vector<const void * > datas,deUint32 width,deUint32 ref)2862 bool vkt::subgroups::check (std::vector<const void*> datas, deUint32 width, deUint32 ref)
2863 {
2864 const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2865
2866 for (deUint32 n = 0; n < width; ++n)
2867 {
2868 if (data[n] != ref)
2869 {
2870 return false;
2871 }
2872 }
2873
2874 return true;
2875 }
2876
checkComputeOrMesh(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 ref)2877 bool vkt::subgroups::checkComputeOrMesh (std::vector<const void*> datas,
2878 const deUint32 numWorkgroups[3],
2879 const deUint32 localSize[3],
2880 deUint32 ref)
2881 {
2882 const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2883 const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2884 const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2885
2886 return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2887 }
2888
makeGeometryFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)2889 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest (Context& context,
2890 VkFormat format,
2891 const SSBOData* extraData,
2892 deUint32 extraDataCount,
2893 const void* internalData,
2894 subgroups::CheckResult checkResult)
2895 {
2896 return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
2897 }
2898
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 geometryShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2899 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize (Context& context,
2900 VkFormat format,
2901 const SSBOData* extraData,
2902 deUint32 extraDataCount,
2903 const void* internalData,
2904 subgroups::CheckResult checkResult,
2905 const deUint32 geometryShaderStageCreateFlags,
2906 const deUint32 requiredSubgroupSize)
2907 {
2908 const DeviceInterface& vk = context.getDeviceInterface();
2909 const VkDevice device = context.getDevice();
2910 const deUint32 maxWidth = getMaxWidth();
2911 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2912 DescriptorSetLayoutBuilder layoutBuilder;
2913 DescriptorPoolBuilder poolBuilder;
2914 DescriptorSetUpdateBuilder updateBuilder;
2915 Move <VkDescriptorPool> descriptorPool;
2916 Move <VkDescriptorSet> descriptorSet;
2917 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2918 const Unique<VkShaderModule> geometryShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2919 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2920 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2921 const VkVertexInputBindingDescription vertexInputBinding =
2922 {
2923 0u, // deUint32 binding;
2924 static_cast<deUint32>(sizeof(tcu::Vec4)), // deUint32 stride;
2925 VK_VERTEX_INPUT_RATE_VERTEX // VkVertexInputRate inputRate;
2926 };
2927 const VkVertexInputAttributeDescription vertexInputAttribute =
2928 {
2929 0u, // deUint32 location;
2930 0u, // deUint32 binding;
2931 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
2932 0u // deUint32 offset;
2933 };
2934
2935 for (deUint32 i = 0u; i < extraDataCount; i++)
2936 {
2937 if (extraData[i].isImage())
2938 {
2939 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2940 }
2941 else
2942 {
2943 DE_ASSERT(extraData[i].isUBO());
2944 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2945 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2946 }
2947 const Allocation& alloc = inputBuffers[i]->getAllocation();
2948 initializeMemory(context, alloc, extraData[i]);
2949 }
2950
2951 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2952 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2953
2954 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2955
2956 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2957
2958 const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2959
2960 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2961 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2962 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2963 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2964 0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2965 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2966
2967 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2968 poolBuilder.addType(inputBuffers[ndx]->getType());
2969
2970 if (extraDataCount > 0)
2971 {
2972 descriptorPool = poolBuilder.build(vk, device,
2973 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2974 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2975 }
2976
2977 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2978 {
2979 if (inputBuffers[buffersNdx]->isImage())
2980 {
2981 VkDescriptorImageInfo info =
2982 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2983 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2984
2985 updateBuilder.writeSingle(*descriptorSet,
2986 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2987 inputBuffers[buffersNdx]->getType(), &info);
2988 }
2989 else
2990 {
2991 VkDescriptorBufferInfo info =
2992 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2993 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2994
2995 updateBuilder.writeSingle(*descriptorSet,
2996 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2997 inputBuffers[buffersNdx]->getType(), &info);
2998 }
2999 }
3000
3001 updateBuilder.update(vk, device);
3002
3003 const VkQueue queue = context.getUniversalQueue();
3004 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3005 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3006 const deUint32 subgroupSize = getSubgroupSize(context);
3007 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3008 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
3009 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3010 unsigned totalIterations = 0u;
3011 unsigned failedIterations = 0u;
3012 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3013
3014 {
3015 const Allocation& alloc = vertexBuffer.getAllocation();
3016 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3017 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3018 float leftHandPosition = -1.0f;
3019
3020 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3021 {
3022 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3023 leftHandPosition += pixelSize;
3024 }
3025
3026 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3027 flushAlloc(vk, device, alloc);
3028 }
3029
3030 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3031 const VkViewport viewport = makeViewport(maxWidth, 1u);
3032 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3033 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3034 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3035 const VkDeviceSize vertexBufferOffset = 0u;
3036
3037 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3038 {
3039 totalIterations++;
3040
3041 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3042 {
3043 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3044 initializeMemory(context, alloc, extraData[ndx]);
3045 }
3046
3047 beginCommandBuffer(vk, *cmdBuffer);
3048 {
3049 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3050
3051 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3052
3053 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3054
3055 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3056
3057 if (extraDataCount > 0)
3058 {
3059 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3060 &descriptorSet.get(), 0u, DE_NULL);
3061 }
3062
3063 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3064
3065 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3066
3067 endRenderPass(vk, *cmdBuffer);
3068
3069 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3070
3071 endCommandBuffer(vk, *cmdBuffer);
3072
3073 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3074 }
3075 context.resetCommandPoolForVKSC(device, *cmdPool);
3076
3077 {
3078 const Allocation& allocResult = imageBufferResult.getAllocation();
3079 invalidateAlloc(vk, device, allocResult);
3080
3081 std::vector<const void*> datas;
3082 datas.push_back(allocResult.getHostPtr());
3083 if (!checkResult(internalData, datas, width, subgroupSize))
3084 failedIterations++;
3085 }
3086 }
3087
3088 if (0 < failedIterations)
3089 {
3090 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3091
3092 context.getTestContext().getLog()
3093 << TestLog::Message << valuesPassed << " / "
3094 << totalIterations << " values passed" << TestLog::EndMessage;
3095
3096 return tcu::TestStatus::fail("Failed!");
3097 }
3098
3099 return tcu::TestStatus::pass("OK");
3100 }
3101
getPossibleGraphicsSubgroupStages(Context & context,const vk::VkShaderStageFlags testedStages)3102 vk::VkShaderStageFlags vkt::subgroups::getPossibleGraphicsSubgroupStages (Context& context, const vk::VkShaderStageFlags testedStages)
3103 {
3104 const VkPhysicalDeviceSubgroupProperties& subgroupProperties = context.getSubgroupProperties();
3105 VkShaderStageFlags stages = testedStages & subgroupProperties.supportedStages;
3106
3107 DE_ASSERT(isAllGraphicsStages(testedStages));
3108
3109 if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
3110 {
3111 if ((stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
3112 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
3113 else
3114 stages = VK_SHADER_STAGE_FRAGMENT_BIT;
3115 }
3116
3117 if (static_cast<VkShaderStageFlags>(0u) == stages)
3118 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
3119
3120 return stages;
3121 }
3122
allStages(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3123 tcu::TestStatus vkt::subgroups::allStages (Context& context,
3124 vk::VkFormat format,
3125 const SSBOData* extraData,
3126 deUint32 extraDataCount,
3127 const void* internalData,
3128 const VerificationFunctor& checkResult,
3129 const vk::VkShaderStageFlags shaderStage)
3130 {
3131 return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage,
3132 0u, 0u, 0u, 0u, 0u, DE_NULL);
3133 }
3134
allStagesRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStageTested,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])3135 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize (Context& context,
3136 vk::VkFormat format,
3137 const SSBOData* extraDatas,
3138 deUint32 extraDatasCount,
3139 const void* internalData,
3140 const VerificationFunctor& checkResult,
3141 const vk::VkShaderStageFlags shaderStageTested,
3142 const deUint32 vertexShaderStageCreateFlags,
3143 const deUint32 tessellationControlShaderStageCreateFlags,
3144 const deUint32 tessellationEvalShaderStageCreateFlags,
3145 const deUint32 geometryShaderStageCreateFlags,
3146 const deUint32 fragmentShaderStageCreateFlags,
3147 const deUint32 requiredSubgroupSize[5])
3148 {
3149 const DeviceInterface& vk = context.getDeviceInterface();
3150 const VkDevice device = context.getDevice();
3151 const deUint32 maxWidth = getMaxWidth();
3152 vector<VkShaderStageFlagBits> stagesVector;
3153 VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
3154
3155 Move<VkShaderModule> vertexShaderModule;
3156 Move<VkShaderModule> teCtrlShaderModule;
3157 Move<VkShaderModule> teEvalShaderModule;
3158 Move<VkShaderModule> geometryShaderModule;
3159 Move<VkShaderModule> fragmentShaderModule;
3160
3161 if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3162 {
3163 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3164 }
3165 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3166 {
3167 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3168 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3169 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3170 }
3171 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3172 {
3173 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3174 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3175 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3176 }
3177 if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3178 {
3179 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3180 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3181 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3182 }
3183 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3184 {
3185 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3186 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3187 }
3188
3189 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
3190 const string vert = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
3191 const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
3192 const string tese = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
3193
3194 shaderStageRequired = shaderStageTested | shaderStageRequired;
3195
3196 vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3197 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3198 {
3199 teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3200 teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3201 }
3202 if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3203 {
3204 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3205 {
3206 // tessellation shaders output line primitives
3207 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3208 }
3209 else
3210 {
3211 // otherwise points are processed by geometry shader
3212 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3213 }
3214 }
3215 if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3216 fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3217
3218 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
3219
3220 DescriptorSetLayoutBuilder layoutBuilder;
3221
3222 // The implicit result SSBO we use to store our outputs from the shader
3223 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3224 {
3225 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3226 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3227 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
3228
3229 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
3230 }
3231
3232 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3233 {
3234 const deUint32 datasNdx = ndx - stagesCount;
3235 if (extraDatas[datasNdx].isImage())
3236 {
3237 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3238 }
3239 else
3240 {
3241 const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3242 const auto size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
3243 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
3244 }
3245
3246 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3247 initializeMemory(context, alloc, extraDatas[datasNdx]);
3248
3249 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
3250 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
3251 }
3252
3253 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3254
3255 const Unique<VkPipelineLayout> pipelineLayout(
3256 makePipelineLayout(vk, device, *descriptorSetLayout));
3257
3258 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3259 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3260 shaderStageRequired,
3261 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
3262 *renderPass,
3263 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3264 DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT,
3265 vertexShaderStageCreateFlags, tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3266 geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3267
3268 Move <VkDescriptorPool> descriptorPool;
3269 Move <VkDescriptorSet> descriptorSet;
3270
3271 if (inputBuffers.size() > 0)
3272 {
3273 DescriptorPoolBuilder poolBuilder;
3274
3275 for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
3276 {
3277 poolBuilder.addType(inputBuffers[ndx]->getType());
3278 }
3279
3280 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3281
3282 // Create descriptor set
3283 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3284
3285 DescriptorSetUpdateBuilder updateBuilder;
3286
3287 for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3288 {
3289 deUint32 binding;
3290 if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
3291 else binding = extraDatas[ndx -stagesCount].binding;
3292
3293 if (inputBuffers[ndx]->isImage())
3294 {
3295 VkDescriptorImageInfo info =
3296 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3297 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3298
3299 updateBuilder.writeSingle( *descriptorSet,
3300 DescriptorSetUpdateBuilder::Location::binding(binding),
3301 inputBuffers[ndx]->getType(), &info);
3302 }
3303 else
3304 {
3305 VkDescriptorBufferInfo info =
3306 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
3307 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3308
3309 updateBuilder.writeSingle( *descriptorSet,
3310 DescriptorSetUpdateBuilder::Location::binding(binding),
3311 inputBuffers[ndx]->getType(), &info);
3312 }
3313 }
3314
3315 updateBuilder.update(vk, device);
3316 }
3317
3318 {
3319 const VkQueue queue = context.getUniversalQueue();
3320 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3321 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3322 const deUint32 subgroupSize = getSubgroupSize(context);
3323 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3324 unsigned totalIterations = 0u;
3325 unsigned failedIterations = 0u;
3326 Image resultImage (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3327 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3328 const VkViewport viewport = makeViewport(maxWidth, 1u);
3329 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3330 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3331 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3332 const VkImageSubresourceRange subresourceRange =
3333 {
3334 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
3335 0u, //deUint32 baseMipLevel
3336 1u, //deUint32 levelCount
3337 0u, //deUint32 baseArrayLayer
3338 1u //deUint32 layerCount
3339 };
3340
3341 const VkImageMemoryBarrier colorAttachmentBarrier = makeImageMemoryBarrier(
3342 (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3343 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
3344 resultImage.getImage(), subresourceRange);
3345
3346 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3347 {
3348 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3349 {
3350 // re-init the data
3351 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3352 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3353 }
3354
3355 totalIterations++;
3356
3357 beginCommandBuffer(vk, *cmdBuffer);
3358
3359 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
3360
3361 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3362
3363 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3364
3365 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3366
3367 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3368
3369 if (stagesCount + extraDatasCount > 0)
3370 vk.cmdBindDescriptorSets(*cmdBuffer,
3371 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3372 &descriptorSet.get(), 0u, DE_NULL);
3373
3374 vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3375
3376 endRenderPass(vk, *cmdBuffer);
3377
3378 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3379
3380 endCommandBuffer(vk, *cmdBuffer);
3381
3382 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3383
3384 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3385 {
3386 std::vector<const void*> datas;
3387 if (!inputBuffers[ndx]->isImage())
3388 {
3389 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
3390 invalidateAlloc(vk, device, resultAlloc);
3391 // we always have our result data first
3392 datas.push_back(resultAlloc.getHostPtr());
3393 }
3394
3395 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3396 {
3397 const deUint32 datasNdx = index - stagesCount;
3398 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3399 {
3400 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
3401 invalidateAlloc(vk, device, resultAlloc);
3402 // we always have our result data first
3403 datas.push_back(resultAlloc.getHostPtr());
3404 }
3405 }
3406
3407 // Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3408 const bool multiCall = ( stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT ||
3409 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
3410 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT ||
3411 stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT );
3412 const deUint32 usedWidth = ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3413
3414 if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3415 failedIterations++;
3416 }
3417 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3418 {
3419 std::vector<const void*> datas;
3420 const Allocation& resultAlloc = imageBufferResult.getAllocation();
3421 invalidateAlloc(vk, device, resultAlloc);
3422
3423 // we always have our result data first
3424 datas.push_back(resultAlloc.getHostPtr());
3425
3426 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3427 {
3428 const deUint32 datasNdx = index - stagesCount;
3429 if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3430 {
3431 const Allocation& alloc = inputBuffers[index]->getAllocation();
3432 invalidateAlloc(vk, device, alloc);
3433 // we always have our result data first
3434 datas.push_back(alloc.getHostPtr());
3435 }
3436 }
3437
3438 if (!checkResult(internalData, datas, width, subgroupSize, false))
3439 failedIterations++;
3440 }
3441
3442 context.resetCommandPoolForVKSC(device, *cmdPool);
3443 }
3444
3445 if (0 < failedIterations)
3446 {
3447 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3448
3449 context.getTestContext().getLog()
3450 << TestLog::Message << valuesPassed << " / "
3451 << totalIterations << " values passed" << TestLog::EndMessage;
3452
3453 return tcu::TestStatus::fail("Failed!");
3454 }
3455 }
3456
3457 return tcu::TestStatus::pass("OK");
3458 }
3459
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)3460 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest (Context& context,
3461 vk::VkFormat format,
3462 const SSBOData* extraData,
3463 deUint32 extraDataCount,
3464 const void* internalData,
3465 subgroups::CheckResult checkResult)
3466 {
3467 return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
3468 }
3469
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 vertexShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3470 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize (Context& context,
3471 vk::VkFormat format,
3472 const SSBOData* extraData,
3473 deUint32 extraDataCount,
3474 const void* internalData,
3475 subgroups::CheckResult checkResult,
3476 const deUint32 vertexShaderStageCreateFlags,
3477 const deUint32 requiredSubgroupSize)
3478 {
3479 const DeviceInterface& vk = context.getDeviceInterface();
3480 const VkDevice device = context.getDevice();
3481 const VkQueue queue = context.getUniversalQueue();
3482 const deUint32 maxWidth = getMaxWidth();
3483 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3484 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
3485 DescriptorSetLayoutBuilder layoutBuilder;
3486 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3487 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3488 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
3489 const VkVertexInputBindingDescription vertexInputBinding =
3490 {
3491 0u, // binding;
3492 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
3493 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
3494 };
3495 const VkVertexInputAttributeDescription vertexInputAttribute =
3496 {
3497 0u,
3498 0u,
3499 VK_FORMAT_R32G32B32A32_SFLOAT,
3500 0u
3501 };
3502
3503 for (deUint32 i = 0u; i < extraDataCount; i++)
3504 {
3505 if (extraData[i].isImage())
3506 {
3507 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
3508 }
3509 else
3510 {
3511 DE_ASSERT(extraData[i].isUBO());
3512 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3513 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3514 }
3515 const Allocation& alloc = inputBuffers[i]->getAllocation();
3516 initializeMemory(context, alloc, extraData[i]);
3517 }
3518
3519 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3520 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3521
3522 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
3523
3524 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
3525
3526 const deUint32 requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3527 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
3528 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3529 *vertexShaderModule, *fragmentShaderModule,
3530 DE_NULL, DE_NULL, DE_NULL,
3531 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3532 &vertexInputBinding, &vertexInputAttribute, true, format,
3533 vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3534 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3535 DescriptorPoolBuilder poolBuilder;
3536 DescriptorSetUpdateBuilder updateBuilder;
3537
3538
3539 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3540 poolBuilder.addType(inputBuffers[ndx]->getType());
3541
3542 Move <VkDescriptorPool> descriptorPool;
3543 Move <VkDescriptorSet> descriptorSet;
3544
3545 if (extraDataCount > 0)
3546 {
3547 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3548 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3549 }
3550
3551 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3552 {
3553 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3554 initializeMemory(context, alloc, extraData[ndx]);
3555 }
3556
3557 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3558 {
3559 if (inputBuffers[buffersNdx]->isImage())
3560 {
3561 VkDescriptorImageInfo info =
3562 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3563 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3564
3565 updateBuilder.writeSingle(*descriptorSet,
3566 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3567 inputBuffers[buffersNdx]->getType(), &info);
3568 }
3569 else
3570 {
3571 VkDescriptorBufferInfo info =
3572 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3573 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3574
3575 updateBuilder.writeSingle(*descriptorSet,
3576 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3577 inputBuffers[buffersNdx]->getType(), &info);
3578 }
3579 }
3580 updateBuilder.update(vk, device);
3581
3582 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3583
3584 const deUint32 subgroupSize = getSubgroupSize(context);
3585
3586 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3587
3588 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
3589 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3590
3591 unsigned totalIterations = 0u;
3592 unsigned failedIterations = 0u;
3593
3594 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3595
3596 {
3597 const Allocation& alloc = vertexBuffer.getAllocation();
3598 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3599 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3600 float leftHandPosition = -1.0f;
3601
3602 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3603 {
3604 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3605 leftHandPosition += pixelSize;
3606 }
3607
3608 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3609 flushAlloc(vk, device, alloc);
3610 }
3611
3612 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3613 const VkViewport viewport = makeViewport(maxWidth, 1u);
3614 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3615 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3616 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3617 const VkDeviceSize vertexBufferOffset = 0u;
3618
3619 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3620 {
3621 totalIterations++;
3622
3623 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3624 {
3625 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3626 initializeMemory(context, alloc, extraData[ndx]);
3627 }
3628
3629 beginCommandBuffer(vk, *cmdBuffer);
3630 {
3631 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3632
3633 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3634
3635 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3636
3637 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3638
3639 if (extraDataCount > 0)
3640 {
3641 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3642 &descriptorSet.get(), 0u, DE_NULL);
3643 }
3644
3645 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3646
3647 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3648
3649 endRenderPass(vk, *cmdBuffer);
3650
3651 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3652
3653 endCommandBuffer(vk, *cmdBuffer);
3654
3655 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3656 }
3657 context.resetCommandPoolForVKSC(device, *cmdPool);
3658
3659 {
3660 const Allocation& allocResult = imageBufferResult.getAllocation();
3661 invalidateAlloc(vk, device, allocResult);
3662
3663 std::vector<const void*> datas;
3664 datas.push_back(allocResult.getHostPtr());
3665 if (!checkResult(internalData, datas, width, subgroupSize))
3666 failedIterations++;
3667 }
3668 }
3669
3670 if (0 < failedIterations)
3671 {
3672 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3673
3674 context.getTestContext().getLog()
3675 << TestLog::Message << valuesPassed << " / "
3676 << totalIterations << " values passed" << TestLog::EndMessage;
3677
3678 return tcu::TestStatus::fail("Failed!");
3679 }
3680
3681 return tcu::TestStatus::pass("OK");
3682 }
3683
makeFragmentFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult)3684 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest (Context& context,
3685 VkFormat format,
3686 const SSBOData* extraDatas,
3687 deUint32 extraDatasCount,
3688 const void* internalData,
3689 CheckResultFragment checkResult)
3690 {
3691 return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData, checkResult, 0u, 0u);
3692 }
3693
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3694 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize (Context& context,
3695 VkFormat format,
3696 const SSBOData* extraDatas,
3697 deUint32 extraDatasCount,
3698 const void* internalData,
3699 CheckResultFragment checkResult,
3700 const deUint32 fragmentShaderStageCreateFlags,
3701 const deUint32 requiredSubgroupSize)
3702 {
3703 const DeviceInterface& vk = context.getDeviceInterface();
3704 const VkDevice device = context.getDevice();
3705 const VkQueue queue = context.getUniversalQueue();
3706 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3707 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3708 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3709 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers (extraDatasCount);
3710
3711 for (deUint32 i = 0; i < extraDatasCount; i++)
3712 {
3713 if (extraDatas[i].isImage())
3714 {
3715 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3716 }
3717 else
3718 {
3719 DE_ASSERT(extraDatas[i].isUBO());
3720
3721 const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3722
3723 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3724 }
3725
3726 const Allocation& alloc = inputBuffers[i]->getAllocation();
3727
3728 initializeMemory(context, alloc, extraDatas[i]);
3729 }
3730
3731 DescriptorSetLayoutBuilder layoutBuilder;
3732
3733 for (deUint32 i = 0; i < extraDatasCount; i++)
3734 {
3735 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3736 }
3737
3738 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3739 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3740 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3741 const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3742 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context,
3743 *pipelineLayout,
3744 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3745 *vertexShaderModule,
3746 *fragmentShaderModule,
3747 DE_NULL,
3748 DE_NULL,
3749 DE_NULL,
3750 *renderPass,
3751 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3752 DE_NULL,
3753 DE_NULL,
3754 true,
3755 VK_FORMAT_R32G32B32A32_SFLOAT,
3756 0u,
3757 0u,
3758 0u,
3759 0u,
3760 fragmentShaderStageCreateFlags,
3761 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3762 DescriptorPoolBuilder poolBuilder;
3763
3764 // To stop validation complaining, always add at least one type to pool.
3765 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3766 for (deUint32 i = 0; i < extraDatasCount; i++)
3767 {
3768 poolBuilder.addType(inputBuffers[i]->getType());
3769 }
3770
3771 Move<VkDescriptorPool> descriptorPool;
3772 // Create descriptor set
3773 Move<VkDescriptorSet> descriptorSet;
3774
3775 if (extraDatasCount > 0)
3776 {
3777 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3778
3779 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3780 }
3781
3782 DescriptorSetUpdateBuilder updateBuilder;
3783
3784 for (deUint32 i = 0; i < extraDatasCount; i++)
3785 {
3786 if (inputBuffers[i]->isImage())
3787 {
3788 const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3789
3790 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3791 }
3792 else
3793 {
3794 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3795
3796 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3797 }
3798 }
3799
3800 if (extraDatasCount > 0)
3801 updateBuilder.update(vk, device);
3802
3803 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3804 const deUint32 subgroupSize = getSubgroupSize(context);
3805 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3806 unsigned totalIterations = 0;
3807 unsigned failedIterations = 0;
3808
3809 for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3810 {
3811 for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3812 {
3813 totalIterations++;
3814
3815 // re-init the data
3816 for (deUint32 i = 0; i < extraDatasCount; i++)
3817 {
3818 const Allocation& alloc = inputBuffers[i]->getAllocation();
3819
3820 initializeMemory(context, alloc, extraDatas[i]);
3821 }
3822
3823 const VkDeviceSize formatSize = getFormatSizeInBytes(format);
3824 const VkDeviceSize resultImageSizeInBytes = width * height * formatSize;
3825 Image resultImage (context, width, height, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3826 Buffer resultBuffer (context, resultImageSizeInBytes, VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3827 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3828 VkViewport viewport = makeViewport(width, height);
3829 VkRect2D scissor = {{0, 0}, {width, height}};
3830
3831 beginCommandBuffer(vk, *cmdBuffer);
3832
3833 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3834
3835 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3836
3837 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3838
3839 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3840
3841 if (extraDatasCount > 0)
3842 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
3843
3844 vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3845
3846 endRenderPass(vk, *cmdBuffer);
3847
3848 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3849
3850 endCommandBuffer(vk, *cmdBuffer);
3851
3852 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3853
3854 std::vector<const void*> datas;
3855 {
3856 const Allocation& resultAlloc = resultBuffer.getAllocation();
3857 invalidateAlloc(vk, device, resultAlloc);
3858
3859 // we always have our result data first
3860 datas.push_back(resultAlloc.getHostPtr());
3861 }
3862
3863 if (!checkResult(internalData, datas, width, height, subgroupSize))
3864 {
3865 failedIterations++;
3866 }
3867
3868 context.resetCommandPoolForVKSC(device, *cmdPool);
3869 }
3870 }
3871
3872 if (0 < failedIterations)
3873 {
3874 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3875
3876 context.getTestContext().getLog()
3877 << TestLog::Message << valuesPassed << " / "
3878 << totalIterations << " values passed" << TestLog::EndMessage;
3879
3880 return tcu::TestStatus::fail("Failed!");
3881 }
3882
3883 return tcu::TestStatus::pass("OK");
3884 }
3885
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize)3886 Move<VkPipeline> makeComputePipeline (Context& context,
3887 const VkPipelineLayout pipelineLayout,
3888 const VkShaderModule shaderModule,
3889 const deUint32 pipelineShaderStageFlags,
3890 const deUint32 pipelineCreateFlags,
3891 VkPipeline basePipelineHandle,
3892 deUint32 localSizeX,
3893 deUint32 localSizeY,
3894 deUint32 localSizeZ,
3895 deUint32 requiredSubgroupSize)
3896 {
3897 const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
3898 const vk::VkSpecializationMapEntry entries[3] =
3899 {
3900 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
3901 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
3902 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3903 };
3904 const vk::VkSpecializationInfo info =
3905 {
3906 /* mapEntryCount = */ 3,
3907 /* pMapEntries = */ entries,
3908 /* dataSize = */ sizeof(localSize),
3909 /* pData = */ localSize
3910 };
3911 const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo =
3912 {
3913 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType sType;
3914 DE_NULL, // void* pNext;
3915 requiredSubgroupSize // uint32_t requiredSubgroupSize;
3916 };
3917 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
3918 {
3919 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3920 (requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL), // const void* pNext;
3921 pipelineShaderStageFlags, // VkPipelineShaderStageCreateFlags flags;
3922 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
3923 shaderModule, // VkShaderModule module;
3924 "main", // const char* pName;
3925 &info, // const VkSpecializationInfo* pSpecializationInfo;
3926 };
3927 const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
3928 {
3929 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
3930 DE_NULL, // const void* pNext;
3931 pipelineCreateFlags, // VkPipelineCreateFlags flags;
3932 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
3933 pipelineLayout, // VkPipelineLayout layout;
3934 #ifndef CTS_USES_VULKANSC
3935 basePipelineHandle, // VkPipeline basePipelineHandle;
3936 -1, // deInt32 basePipelineIndex;
3937 #else
3938 DE_NULL, // VkPipeline basePipelineHandle;
3939 0, // deInt32 basePipelineIndex;
3940 #endif // CTS_USES_VULKANSC
3941 };
3942 static_cast<void>(basePipelineHandle);
3943
3944 return createComputePipeline(context.getDeviceInterface(), context.getDevice(), DE_NULL, &pipelineCreateInfo);
3945 }
3946
3947 #ifndef CTS_USES_VULKANSC
makeMeshPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule taskModule,const VkShaderModule meshModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize,const VkRenderPass renderPass)3948 Move<VkPipeline> makeMeshPipeline (Context& context,
3949 const VkPipelineLayout pipelineLayout,
3950 const VkShaderModule taskModule,
3951 const VkShaderModule meshModule,
3952 const deUint32 pipelineShaderStageFlags,
3953 const deUint32 pipelineCreateFlags,
3954 VkPipeline basePipelineHandle,
3955 deUint32 localSizeX,
3956 deUint32 localSizeY,
3957 deUint32 localSizeZ,
3958 deUint32 requiredSubgroupSize,
3959 const VkRenderPass renderPass)
3960 {
3961 const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
3962 const vk::VkSpecializationMapEntry entries[3] =
3963 {
3964 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
3965 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
3966 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3967 };
3968 const vk::VkSpecializationInfo info =
3969 {
3970 /* mapEntryCount = */ 3,
3971 /* pMapEntries = */ entries,
3972 /* dataSize = */ sizeof(localSize),
3973 /* pData = */ localSize
3974 };
3975 const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo =
3976 {
3977 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType sType;
3978 DE_NULL, // void* pNext;
3979 requiredSubgroupSize // uint32_t requiredSubgroupSize;
3980 };
3981
3982 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* pSubgroupSizeCreateInfo = ((requiredSubgroupSize != 0u) ? &subgroupSizeCreateInfo : nullptr);
3983
3984 std::vector<VkPipelineShaderStageCreateInfo> shaderStageParams;
3985 vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
3986 {
3987 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3988 nullptr, // const void* pNext;
3989 pipelineShaderStageFlags, // VkPipelineShaderStageCreateFlags flags;
3990 VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM, // VkShaderStageFlagBits stage;
3991 DE_NULL, // VkShaderModule module;
3992 "main", // const char* pName;
3993 &info, // const VkSpecializationInfo* pSpecializationInfo;
3994 };
3995
3996 if (taskModule != DE_NULL)
3997 {
3998 pipelineShaderStageParams.module = taskModule;
3999 pipelineShaderStageParams.pNext = pSubgroupSizeCreateInfo;
4000 pipelineShaderStageParams.stage = VK_SHADER_STAGE_TASK_BIT_EXT;
4001 shaderStageParams.push_back(pipelineShaderStageParams);
4002 }
4003
4004 if (meshModule != DE_NULL)
4005 {
4006 pipelineShaderStageParams.module = meshModule;
4007 pipelineShaderStageParams.pNext = ((taskModule == DE_NULL) ? pSubgroupSizeCreateInfo : nullptr);
4008 pipelineShaderStageParams.stage = VK_SHADER_STAGE_MESH_BIT_EXT;
4009 shaderStageParams.push_back(pipelineShaderStageParams);
4010 }
4011
4012 const std::vector<VkViewport> viewports (1u, makeViewport(1u, 1u));
4013 const std::vector<VkRect2D> scissors (1u, makeRect2D(1u, 1u));
4014
4015 return makeGraphicsPipeline(context.getDeviceInterface(), context.getDevice(), basePipelineHandle, pipelineLayout, pipelineCreateFlags, shaderStageParams, renderPass, viewports, scissors);
4016 }
4017 #endif // CTS_USES_VULKANSC
4018
makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,deUint32 inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4019 tcu::TestStatus makeComputeOrMeshTestRequiredSubgroupSize (ComputeLike testType,
4020 Context& context,
4021 VkFormat format,
4022 const vkt::subgroups::SSBOData* inputs,
4023 deUint32 inputsCount,
4024 const void* internalData,
4025 vkt::subgroups::CheckResultCompute checkResult,
4026 const deUint32 pipelineShaderStageCreateFlags,
4027 const deUint32 numWorkgroups[3],
4028 const deBool isRequiredSubgroupSize,
4029 const deUint32 subgroupSize,
4030 const deUint32 localSizesToTest[][3],
4031 const deUint32 localSizesToTestCount)
4032 {
4033 const DeviceInterface& vk = context.getDeviceInterface();
4034 const VkDevice device = context.getDevice();
4035 const VkQueue queue = context.getUniversalQueue();
4036 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
4037 #ifndef CTS_USES_VULKANSC
4038 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
4039 #else
4040 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties = context.getSubgroupSizeControlPropertiesEXT();
4041 #endif // CTS_USES_VULKANSC
4042 const VkDeviceSize elementSize = getFormatSizeInBytes(format);
4043 const VkDeviceSize maxSubgroupSize = isRequiredSubgroupSize
4044 ? deMax32(subgroupSizeControlProperties.maxSubgroupSize, vkt::subgroups::maxSupportedSubgroupSize())
4045 : vkt::subgroups::maxSupportedSubgroupSize();
4046 const VkDeviceSize resultBufferSize = maxSubgroupSize * maxSubgroupSize * maxSubgroupSize;
4047 const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
4048 Buffer resultBuffer (context, resultBufferSizeInBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4049 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers (inputsCount);
4050 const auto shaderStageFlags = ((testType == ComputeLike::COMPUTE)
4051 ? VK_SHADER_STAGE_COMPUTE_BIT
4052 #ifndef CTS_USES_VULKANSC
4053 : (VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT));
4054 #else
4055 : 0);
4056 #endif // CTS_USES_VULKANSC
4057 const auto pipelineBindPoint = ((testType == ComputeLike::COMPUTE)
4058 ? VK_PIPELINE_BIND_POINT_COMPUTE
4059 : VK_PIPELINE_BIND_POINT_GRAPHICS);
4060 const auto pipelineStage = ((testType == ComputeLike::COMPUTE)
4061 ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
4062 #ifndef CTS_USES_VULKANSC
4063 : (VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT));
4064 #else
4065 : 0);
4066 #endif // CTS_USES_VULKANSC
4067 const auto renderArea = makeRect2D(1u, 1u);
4068
4069 std::vector<tcu::UVec3> usedLocalSizes;
4070 for (deUint32 i = 0; i < localSizesToTestCount; ++i)
4071 {
4072 usedLocalSizes.push_back(tcu::UVec3(localSizesToTest[i][0], localSizesToTest[i][1], localSizesToTest[i][2]));
4073 }
4074
4075 for (deUint32 i = 0; i < inputsCount; i++)
4076 {
4077 if (inputs[i].isImage())
4078 {
4079 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
4080 }
4081 else
4082 {
4083 const auto usage = (inputs[i].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4084 const auto size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4085 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4086 }
4087
4088 const Allocation& alloc = inputBuffers[i]->getAllocation();
4089
4090 initializeMemory(context, alloc, inputs[i]);
4091 }
4092
4093 DescriptorSetLayoutBuilder layoutBuilder;
4094 layoutBuilder.addBinding(
4095 resultBuffer.getType(), 1, shaderStageFlags, DE_NULL);
4096
4097 for (deUint32 i = 0; i < inputsCount; i++)
4098 {
4099 layoutBuilder.addBinding(
4100 inputBuffers[i]->getType(), 1, shaderStageFlags, DE_NULL);
4101 }
4102
4103 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
4104 layoutBuilder.build(vk, device));
4105
4106 Move<VkShaderModule> compShader;
4107 Move<VkShaderModule> meshShader;
4108 Move<VkShaderModule> taskShader;
4109 const auto& binaries = context.getBinaryCollection();
4110
4111 if (testType == ComputeLike::COMPUTE)
4112 {
4113 compShader = createShaderModule(vk, device, binaries.get("comp"));
4114 }
4115 else if (testType == ComputeLike::MESH)
4116 {
4117 meshShader = createShaderModule(vk, device, binaries.get("mesh"));
4118 if (binaries.contains("task"))
4119 taskShader = createShaderModule(vk, device, binaries.get("task"));
4120 }
4121 else
4122 {
4123 DE_ASSERT(false);
4124 }
4125
4126 const Unique<VkPipelineLayout> pipelineLayout(
4127 makePipelineLayout(vk, device, *descriptorSetLayout));
4128
4129 DescriptorPoolBuilder poolBuilder;
4130
4131 poolBuilder.addType(resultBuffer.getType());
4132
4133 for (deUint32 i = 0; i < inputsCount; i++)
4134 {
4135 poolBuilder.addType(inputBuffers[i]->getType());
4136 }
4137
4138 const Unique<VkDescriptorPool> descriptorPool (poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
4139 const Unique<VkDescriptorSet> descriptorSet (makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
4140 const VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
4141 DescriptorSetUpdateBuilder updateBuilder;
4142
4143 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
4144
4145 for (deUint32 i = 0; i < inputsCount; i++)
4146 {
4147 if (inputBuffers[i]->isImage())
4148 {
4149 const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4150
4151 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
4152 }
4153 else
4154 {
4155 vk::VkDeviceSize size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4156 VkDescriptorBufferInfo info = makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
4157
4158 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
4159 }
4160 }
4161
4162 updateBuilder.update(vk, device);
4163
4164 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
4165 unsigned totalIterations = 0;
4166 unsigned failedIterations = 0;
4167 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
4168 std::vector<de::SharedPtr<Move<VkPipeline>>> pipelines (localSizesToTestCount);
4169 const auto reqSubgroupSize = (isRequiredSubgroupSize ? subgroupSize : 0u);
4170 Move<VkRenderPass> renderPass;
4171 Move<VkFramebuffer> framebuffer;
4172
4173 if (testType == ComputeLike::MESH)
4174 {
4175 renderPass = makeRenderPass(vk, device);
4176 framebuffer = makeFramebuffer(vk, device, renderPass.get(), 0u, nullptr, renderArea.extent.width, renderArea.extent.height);
4177 }
4178
4179 context.getTestContext().touchWatchdog();
4180 {
4181 if (testType == ComputeLike::COMPUTE)
4182 {
4183 pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
4184 *pipelineLayout,
4185 *compShader,
4186 pipelineShaderStageCreateFlags,
4187 #ifndef CTS_USES_VULKANSC
4188 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4189 #else
4190 0u,
4191 #endif // CTS_USES_VULKANSC
4192 (VkPipeline) DE_NULL,
4193 usedLocalSizes[0][0],
4194 usedLocalSizes[0][1],
4195 usedLocalSizes[0][2],
4196 reqSubgroupSize)));
4197 }
4198 #ifndef CTS_USES_VULKANSC
4199 else if (testType == ComputeLike::MESH)
4200 {
4201 pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(context,
4202 pipelineLayout.get(),
4203 taskShader.get(),
4204 meshShader.get(),
4205 pipelineShaderStageCreateFlags,
4206 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4207 DE_NULL,
4208 usedLocalSizes[0][0],
4209 usedLocalSizes[0][1],
4210 usedLocalSizes[0][2],
4211 reqSubgroupSize,
4212 renderPass.get())));
4213 }
4214 #endif // CTS_USES_VULKANSC
4215 else
4216 {
4217 DE_ASSERT(false);
4218 }
4219 }
4220 context.getTestContext().touchWatchdog();
4221
4222 for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
4223 {
4224 const deUint32 nextX = usedLocalSizes[index][0];
4225 const deUint32 nextY = usedLocalSizes[index][1];
4226 const deUint32 nextZ = usedLocalSizes[index][2];
4227
4228 context.getTestContext().touchWatchdog();
4229 {
4230 if (testType == ComputeLike::COMPUTE)
4231 {
4232 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
4233 *pipelineLayout,
4234 *compShader,
4235 pipelineShaderStageCreateFlags,
4236 #ifndef CTS_USES_VULKANSC
4237 VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4238 #else
4239 0u,
4240 #endif // CTS_USES_VULKANSC
4241 **pipelines[0],
4242 nextX,
4243 nextY,
4244 nextZ,
4245 reqSubgroupSize)));
4246 }
4247 #ifndef CTS_USES_VULKANSC
4248 else if (testType == ComputeLike::MESH)
4249 {
4250 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(context,
4251 pipelineLayout.get(),
4252 taskShader.get(),
4253 meshShader.get(),
4254 pipelineShaderStageCreateFlags,
4255 VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4256 pipelines[0].get()->get(),
4257 nextX,
4258 nextY,
4259 nextZ,
4260 reqSubgroupSize,
4261 renderPass.get())));
4262 }
4263 #endif // CTS_USES_VULKANSC
4264 else
4265 {
4266 DE_ASSERT(false);
4267 }
4268 }
4269 context.getTestContext().touchWatchdog();
4270 }
4271
4272 for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
4273 {
4274 // we are running one test
4275 totalIterations++;
4276
4277 beginCommandBuffer(vk, *cmdBuffer);
4278 {
4279 if (testType == ComputeLike::MESH)
4280 beginRenderPass(vk, *cmdBuffer, renderPass.get(), framebuffer.get(), renderArea);
4281
4282 vk.cmdBindPipeline(*cmdBuffer, pipelineBindPoint, **pipelines[index]);
4283
4284 vk.cmdBindDescriptorSets(*cmdBuffer, pipelineBindPoint, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
4285
4286 if (testType == ComputeLike::COMPUTE)
4287 vk.cmdDispatch(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4288 #ifndef CTS_USES_VULKANSC
4289 else if (testType == ComputeLike::MESH)
4290 vk.cmdDrawMeshTasksEXT(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4291 //vk.cmdDrawMeshTasksNV(*cmdBuffer, numWorkgroups[0], 0);
4292 #endif // CTS_USES_VULKANSC
4293 else
4294 DE_ASSERT(false);
4295
4296 if (testType == ComputeLike::MESH)
4297 endRenderPass(vk, *cmdBuffer);
4298 }
4299
4300 // Make shader writes available.
4301 const auto postShaderBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4302 vk.cmdPipelineBarrier(*cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postShaderBarrier, 0u, nullptr, 0u, nullptr);
4303
4304 endCommandBuffer(vk, *cmdBuffer);
4305
4306 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4307
4308 std::vector<const void*> datas;
4309
4310 {
4311 const Allocation& resultAlloc = resultBuffer.getAllocation();
4312 invalidateAlloc(vk, device, resultAlloc);
4313
4314 // we always have our result data first
4315 datas.push_back(resultAlloc.getHostPtr());
4316 }
4317
4318 for (deUint32 i = 0; i < inputsCount; i++)
4319 {
4320 if (!inputBuffers[i]->isImage())
4321 {
4322 const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
4323 invalidateAlloc(vk, device, resultAlloc);
4324
4325 // we always have our result data first
4326 datas.push_back(resultAlloc.getHostPtr());
4327 }
4328 }
4329
4330 if (!checkResult(internalData, datas, numWorkgroups, usedLocalSizes[index].getPtr(), subgroupSize))
4331 {
4332 failedIterations++;
4333 }
4334
4335 context.resetCommandPoolForVKSC(device, *cmdPool);
4336 }
4337
4338 if (0 < failedIterations)
4339 {
4340 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4341
4342 context.getTestContext().getLog()
4343 << TestLog::Message << valuesPassed << " / "
4344 << totalIterations << " values passed" << TestLog::EndMessage;
4345
4346 return tcu::TestStatus::fail("Failed!");
4347 }
4348
4349 return tcu::TestStatus::pass("OK");
4350 }
4351
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4352 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize (Context& context,
4353 VkFormat format,
4354 const SSBOData* inputs,
4355 deUint32 inputsCount,
4356 const void* internalData,
4357 CheckResultCompute checkResult,
4358 const deUint32 pipelineShaderStageCreateFlags,
4359 const deUint32 numWorkgroups[3],
4360 const deBool isRequiredSubgroupSize,
4361 const deUint32 subgroupSize,
4362 const deUint32 localSizesToTest[][3],
4363 const deUint32 localSizesToTestCount)
4364 {
4365 return makeComputeOrMeshTestRequiredSubgroupSize(
4366 ComputeLike::COMPUTE,
4367 context,
4368 format,
4369 inputs,
4370 inputsCount,
4371 internalData,
4372 checkResult,
4373 pipelineShaderStageCreateFlags,
4374 numWorkgroups,
4375 isRequiredSubgroupSize,
4376 subgroupSize,
4377 localSizesToTest,
4378 localSizesToTestCount);
4379 }
4380
makeMeshTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4381 tcu::TestStatus vkt::subgroups::makeMeshTestRequiredSubgroupSize (Context& context,
4382 VkFormat format,
4383 const SSBOData* inputs,
4384 deUint32 inputsCount,
4385 const void* internalData,
4386 CheckResultCompute checkResult,
4387 const deUint32 pipelineShaderStageCreateFlags,
4388 const deUint32 numWorkgroups[3],
4389 const deBool isRequiredSubgroupSize,
4390 const deUint32 subgroupSize,
4391 const deUint32 localSizesToTest[][3],
4392 const deUint32 localSizesToTestCount)
4393 {
4394 return makeComputeOrMeshTestRequiredSubgroupSize(
4395 ComputeLike::MESH,
4396 context,
4397 format,
4398 inputs,
4399 inputsCount,
4400 internalData,
4401 checkResult,
4402 pipelineShaderStageCreateFlags,
4403 numWorkgroups,
4404 isRequiredSubgroupSize,
4405 subgroupSize,
4406 localSizesToTest,
4407 localSizesToTestCount);
4408 }
4409
makeComputeOrMeshTest(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,deUint32 inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4410 tcu::TestStatus makeComputeOrMeshTest (ComputeLike testType,
4411 Context& context,
4412 VkFormat format,
4413 const vkt::subgroups::SSBOData* inputs,
4414 deUint32 inputsCount,
4415 const void* internalData,
4416 vkt::subgroups::CheckResultCompute checkResult,
4417 deUint32 requiredSubgroupSize,
4418 const deUint32 pipelineShaderStageCreateFlags)
4419 {
4420 const uint32_t numWorkgroups[3] = {4, 2, 2};
4421 const bool isRequiredSubgroupSize = (requiredSubgroupSize != 0u);
4422 const uint32_t subgroupSize = (isRequiredSubgroupSize ? requiredSubgroupSize : vkt::subgroups::getSubgroupSize(context));
4423
4424 const deUint32 localSizesToTestCount = 8;
4425 deUint32 localSizesToTest[localSizesToTestCount][3] =
4426 {
4427 {1, 1, 1},
4428 {subgroupSize, 1, 1},
4429 {1, subgroupSize, 1},
4430 {1, 1, subgroupSize},
4431 {32, 4, 1},
4432 {1, 4, 32},
4433 {3, 5, 7},
4434 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
4435 };
4436
4437 if (testType == ComputeLike::COMPUTE)
4438 return makeComputeTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4439 numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4440 else
4441 return makeMeshTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4442 numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4443 }
4444
makeComputeTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4445 tcu::TestStatus vkt::subgroups::makeComputeTest (Context& context,
4446 VkFormat format,
4447 const SSBOData* inputs,
4448 deUint32 inputsCount,
4449 const void* internalData,
4450 CheckResultCompute checkResult,
4451 deUint32 requiredSubgroupSize,
4452 const deUint32 pipelineShaderStageCreateFlags)
4453 {
4454 return makeComputeOrMeshTest(ComputeLike::COMPUTE, context, format, inputs, inputsCount, internalData, checkResult, requiredSubgroupSize, pipelineShaderStageCreateFlags);
4455 }
4456
makeMeshTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4457 tcu::TestStatus vkt::subgroups::makeMeshTest (Context& context,
4458 VkFormat format,
4459 const SSBOData* inputs,
4460 deUint32 inputsCount,
4461 const void* internalData,
4462 CheckResultCompute checkResult,
4463 deUint32 requiredSubgroupSize,
4464 const deUint32 pipelineShaderStageCreateFlags)
4465 {
4466 return makeComputeOrMeshTest(ComputeLike::MESH, context, format, inputs, inputsCount, internalData, checkResult, requiredSubgroupSize, pipelineShaderStageCreateFlags);
4467 }
4468
checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)4469 static inline void checkShaderStageSetValidity (const VkShaderStageFlags shaderStages)
4470 {
4471 if (shaderStages == 0)
4472 TCU_THROW(InternalError, "Shader stage is not specified");
4473
4474 // It can actually be only 1 or 0.
4475 const deUint32 exclusivePipelinesCount = (isAllComputeStages(shaderStages) ? 1 : 0)
4476 + (isAllGraphicsStages(shaderStages) ? 1 : 0)
4477 #ifndef CTS_USES_VULKANSC
4478 + (isAllRayTracingStages(shaderStages) ? 1 : 0)
4479 + (isAllMeshShadingStages(shaderStages) ? 1 : 0)
4480 #endif // CTS_USES_VULKANSC
4481 ;
4482
4483 if (exclusivePipelinesCount != 1)
4484 TCU_THROW(InternalError, "Mix of shaders from different pipelines is detected");
4485 }
4486
supportedCheckShader(Context & context,const VkShaderStageFlags shaderStages)4487 void vkt::subgroups::supportedCheckShader (Context& context, const VkShaderStageFlags shaderStages)
4488 {
4489 checkShaderStageSetValidity(shaderStages);
4490
4491 if ((context.getSubgroupProperties().supportedStages & shaderStages) == 0)
4492 {
4493 if (isAllComputeStages(shaderStages))
4494 TCU_FAIL("Compute shader is required to support subgroup operations");
4495 else
4496 TCU_THROW(NotSupportedError, "Subgroup support is not available for test shader stage(s)");
4497 }
4498
4499 #ifndef CTS_USES_VULKANSC
4500 if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & shaderStages &&
4501 context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4502 !context.getPortabilitySubsetFeatures().tessellationIsolines)
4503 {
4504 TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4505 }
4506 #endif // CTS_USES_VULKANSC
4507 }
4508
4509
4510 namespace vkt
4511 {
4512 namespace subgroups
4513 {
4514 typedef std::vector< de::SharedPtr<BufferOrImage> > vectorBufferOrImage;
4515
4516 enum ShaderGroups
4517 {
4518 FIRST_GROUP = 0,
4519 RAYGEN_GROUP = FIRST_GROUP,
4520 MISS_GROUP,
4521 HIT_GROUP,
4522 CALL_GROUP,
4523 GROUP_COUNT
4524 };
4525
getAllRayTracingFormats()4526 const std::vector<vk::VkFormat> getAllRayTracingFormats()
4527 {
4528 std::vector<VkFormat> formats;
4529
4530 formats.push_back(VK_FORMAT_R8G8B8_SINT);
4531 formats.push_back(VK_FORMAT_R8_UINT);
4532 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
4533 formats.push_back(VK_FORMAT_R16G16B16_SINT);
4534 formats.push_back(VK_FORMAT_R16_UINT);
4535 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
4536 formats.push_back(VK_FORMAT_R32G32B32_SINT);
4537 formats.push_back(VK_FORMAT_R32_UINT);
4538 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
4539 formats.push_back(VK_FORMAT_R64G64B64_SINT);
4540 formats.push_back(VK_FORMAT_R64_UINT);
4541 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
4542 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
4543 formats.push_back(VK_FORMAT_R32_SFLOAT);
4544 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
4545 formats.push_back(VK_FORMAT_R64_SFLOAT);
4546 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
4547 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
4548 formats.push_back(VK_FORMAT_R8_USCALED);
4549 formats.push_back(VK_FORMAT_R8G8_USCALED);
4550 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
4551 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
4552
4553 return formats;
4554 }
4555
addRayTracingNoSubgroupShader(SourceCollections & programCollection)4556 void addRayTracingNoSubgroupShader (SourceCollections& programCollection)
4557 {
4558 const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
4559
4560 const std::string rgenShaderNoSubgroups =
4561 "#version 460 core\n"
4562 "#extension GL_EXT_ray_tracing: require\n"
4563 "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
4564 "layout(location = 0) callableDataEXT uvec4 callData;"
4565 "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
4566 "\n"
4567 "void main()\n"
4568 "{\n"
4569 " uint rayFlags = 0;\n"
4570 " uint cullMask = 0xFF;\n"
4571 " float tmin = 0.0;\n"
4572 " float tmax = 9.0;\n"
4573 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
4574 " vec3 directHit = vec3(0.0, 0.0, -1.0);\n"
4575 " vec3 directMiss = vec3(0.0, 0.0, +1.0);\n"
4576 "\n"
4577 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
4578 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
4579 " executeCallableEXT(0, 0);"
4580 "}\n";
4581 const std::string hitShaderNoSubgroups =
4582 "#version 460 core\n"
4583 "#extension GL_EXT_ray_tracing: require\n"
4584 "hitAttributeEXT vec3 attribs;\n"
4585 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4586 "\n"
4587 "void main()\n"
4588 "{\n"
4589 "}\n";
4590 const std::string missShaderNoSubgroups =
4591 "#version 460 core\n"
4592 "#extension GL_EXT_ray_tracing: require\n"
4593 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4594 "\n"
4595 "void main()\n"
4596 "{\n"
4597 "}\n";
4598 const std::string sectShaderNoSubgroups =
4599 "#version 460 core\n"
4600 "#extension GL_EXT_ray_tracing: require\n"
4601 "hitAttributeEXT vec3 hitAttribute;\n"
4602 "\n"
4603 "void main()\n"
4604 "{\n"
4605 " reportIntersectionEXT(0.75f, 0x7Eu);\n"
4606 "}\n";
4607 const std::string callShaderNoSubgroups =
4608 "#version 460 core\n"
4609 "#extension GL_EXT_ray_tracing: require\n"
4610 "layout(location = 0) callableDataInEXT float callData;\n"
4611 "\n"
4612 "void main()\n"
4613 "{\n"
4614 "}\n";
4615
4616 programCollection.glslSources.add("rgen_noSubgroup") << glu::RaygenSource (rgenShaderNoSubgroups) << buildOptions;
4617 programCollection.glslSources.add("ahit_noSubgroup") << glu::AnyHitSource (hitShaderNoSubgroups) << buildOptions;
4618 programCollection.glslSources.add("chit_noSubgroup") << glu::ClosestHitSource (hitShaderNoSubgroups) << buildOptions;
4619 programCollection.glslSources.add("miss_noSubgroup") << glu::MissSource (missShaderNoSubgroups) << buildOptions;
4620 programCollection.glslSources.add("sect_noSubgroup") << glu::IntersectionSource (sectShaderNoSubgroups) << buildOptions;
4621 programCollection.glslSources.add("call_noSubgroup") << glu::CallableSource (callShaderNoSubgroups) << buildOptions;
4622 }
4623
4624 #ifndef CTS_USES_VULKANSC
4625
enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)4626 static vector<VkShaderStageFlagBits> enumerateRayTracingShaderStages (const VkShaderStageFlags shaderStage)
4627 {
4628 vector<VkShaderStageFlagBits> result;
4629 const VkShaderStageFlagBits shaderStageFlags[] =
4630 {
4631 VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4632 VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4633 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4634 VK_SHADER_STAGE_MISS_BIT_KHR,
4635 VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4636 VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4637 };
4638
4639 for (auto shaderStageFlag: shaderStageFlags)
4640 {
4641 if (0 != (shaderStage & shaderStageFlag))
4642 result.push_back(shaderStageFlag);
4643 }
4644
4645 return result;
4646 }
4647
getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)4648 static deUint32 getRayTracingResultBinding (const VkShaderStageFlagBits shaderStage)
4649 {
4650 const VkShaderStageFlags shaderStageFlags[] =
4651 {
4652 VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4653 VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4654 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4655 VK_SHADER_STAGE_MISS_BIT_KHR,
4656 VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4657 VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4658 };
4659
4660 for (deUint32 shaderStageNdx = 0; shaderStageNdx < DE_LENGTH_OF_ARRAY(shaderStageFlags); ++shaderStageNdx)
4661 {
4662 if (0 != (shaderStage & shaderStageFlags[shaderStageNdx]))
4663 {
4664 DE_ASSERT(0 == (shaderStage & (~shaderStageFlags[shaderStageNdx])));
4665
4666 return shaderStageNdx;
4667 }
4668 }
4669
4670 TCU_THROW(InternalError, "Non-raytracing stage specified or no stage at all");
4671 }
4672
makeRayTracingInputBuffers(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector)4673 static vectorBufferOrImage makeRayTracingInputBuffers (Context& context,
4674 VkFormat format,
4675 const SSBOData* extraDatas,
4676 deUint32 extraDatasCount,
4677 const vector<VkShaderStageFlagBits>& stagesVector)
4678 {
4679 const size_t stagesCount = stagesVector.size();
4680 const VkDeviceSize shaderSize = getMaxWidth();
4681 const VkDeviceSize inputBufferSize = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
4682 vectorBufferOrImage inputBuffers (stagesCount + extraDatasCount);
4683
4684 // The implicit result SSBO we use to store our outputs from the shader
4685 for (size_t stageNdx = 0u; stageNdx < stagesCount; ++stageNdx)
4686 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, inputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
4687
4688 for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4689 {
4690 const size_t datasNdx = stageNdx - stagesCount;
4691
4692 if (extraDatas[datasNdx].isImage())
4693 {
4694 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
4695 }
4696 else
4697 {
4698 const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4699 const auto size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
4700 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4701 }
4702
4703 initializeMemory(context, inputBuffers[stageNdx]->getAllocation(), extraDatas[datasNdx]);
4704 }
4705
4706 return inputBuffers;
4707 }
4708
makeRayTracingDescriptorSetLayout(Context & context,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4709 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayout (Context& context,
4710 const SSBOData* extraDatas,
4711 deUint32 extraDatasCount,
4712 const vector<VkShaderStageFlagBits>& stagesVector,
4713 const vectorBufferOrImage& inputBuffers)
4714 {
4715 const DeviceInterface& vkd = context.getDeviceInterface();
4716 const VkDevice device = context.getDevice();
4717 const size_t stagesCount = stagesVector.size();
4718 DescriptorSetLayoutBuilder layoutBuilder;
4719
4720 // The implicit result SSBO we use to store our outputs from the shader
4721 for (size_t stageNdx = 0u; stageNdx < stagesVector.size(); ++stageNdx)
4722 {
4723 const deUint32 stageBinding = getRayTracingResultBinding(stagesVector[stageNdx]);
4724
4725 layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, stagesVector[stageNdx], stageBinding, DE_NULL);
4726 }
4727
4728 for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4729 {
4730 const size_t datasNdx = stageNdx - stagesCount;
4731
4732 layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
4733 }
4734
4735 return layoutBuilder.build(vkd, device);
4736 }
4737
makeRayTracingDescriptorSetLayoutAS(Context & context)4738 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayoutAS (Context& context)
4739 {
4740 const DeviceInterface& vkd = context.getDeviceInterface();
4741 const VkDevice device = context.getDevice();
4742 DescriptorSetLayoutBuilder layoutBuilder;
4743
4744 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
4745
4746 return layoutBuilder.build(vkd, device);
4747 }
4748
makeRayTracingDescriptorPool(Context & context,const vectorBufferOrImage & inputBuffers)4749 static Move<VkDescriptorPool> makeRayTracingDescriptorPool (Context& context,
4750 const vectorBufferOrImage& inputBuffers)
4751 {
4752 const DeviceInterface& vkd = context.getDeviceInterface();
4753 const VkDevice device = context.getDevice();
4754 const deUint32 maxDescriptorSets = 2u;
4755 DescriptorPoolBuilder poolBuilder;
4756 Move<VkDescriptorPool> result;
4757
4758 if (inputBuffers.size() > 0)
4759 {
4760 for (size_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
4761 poolBuilder.addType(inputBuffers[ndx]->getType());
4762 }
4763
4764 poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
4765
4766 result = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, maxDescriptorSets);
4767
4768 return result;
4769 }
4770
makeRayTracingDescriptorSet(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4771 static Move<VkDescriptorSet> makeRayTracingDescriptorSet (Context& context,
4772 VkDescriptorPool descriptorPool,
4773 VkDescriptorSetLayout descriptorSetLayout,
4774 const SSBOData* extraDatas,
4775 deUint32 extraDatasCount,
4776 const vector<VkShaderStageFlagBits>& stagesVector,
4777 const vectorBufferOrImage& inputBuffers)
4778 {
4779 const DeviceInterface& vkd = context.getDeviceInterface();
4780 const VkDevice device = context.getDevice();
4781 const size_t stagesCount = stagesVector.size();
4782 Move<VkDescriptorSet> descriptorSet;
4783
4784 if (inputBuffers.size() > 0)
4785 {
4786 DescriptorSetUpdateBuilder updateBuilder;
4787
4788 // Create descriptor set
4789 descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4790
4791 for (size_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
4792 {
4793 const deUint32 binding = (ndx < stagesCount)
4794 ? getRayTracingResultBinding(stagesVector[ndx])
4795 : extraDatas[ndx - stagesCount].binding;
4796
4797 if (inputBuffers[ndx]->isImage())
4798 {
4799 const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(), inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4800
4801 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4802 }
4803 else
4804 {
4805 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
4806
4807 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4808 }
4809 }
4810
4811 updateBuilder.update(vkd, device);
4812 }
4813
4814 return descriptorSet;
4815 }
4816
makeRayTracingDescriptorSetAS(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,de::MovePtr<TopLevelAccelerationStructure> & topLevelAccelerationStructure)4817 static Move<VkDescriptorSet> makeRayTracingDescriptorSetAS (Context& context,
4818 VkDescriptorPool descriptorPool,
4819 VkDescriptorSetLayout descriptorSetLayout,
4820 de::MovePtr<TopLevelAccelerationStructure>& topLevelAccelerationStructure)
4821 {
4822 const DeviceInterface& vkd = context.getDeviceInterface();
4823 const VkDevice device = context.getDevice();
4824 const TopLevelAccelerationStructure* topLevelAccelerationStructurePtr = topLevelAccelerationStructure.get();
4825 const VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet =
4826 {
4827 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // VkStructureType sType;
4828 DE_NULL, // const void* pNext;
4829 1u, // deUint32 accelerationStructureCount;
4830 topLevelAccelerationStructurePtr->getPtr(), // const VkAccelerationStructureKHR* pAccelerationStructures;
4831 };
4832 Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4833
4834 DescriptorSetUpdateBuilder()
4835 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
4836 .update(vkd, device);
4837
4838 return descriptorSet;
4839 }
4840
makeRayTracingPipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout0,const VkDescriptorSetLayout descriptorSetLayout1)4841 static Move<VkPipelineLayout> makeRayTracingPipelineLayout (Context& context,
4842 const VkDescriptorSetLayout descriptorSetLayout0,
4843 const VkDescriptorSetLayout descriptorSetLayout1)
4844 {
4845 const DeviceInterface& vkd = context.getDeviceInterface();
4846 const VkDevice device = context.getDevice();
4847 const std::vector<VkDescriptorSetLayout> descriptorSetLayouts { descriptorSetLayout0, descriptorSetLayout1 };
4848 const deUint32 descriptorSetLayoutsSize = static_cast<deUint32>(descriptorSetLayouts.size());
4849
4850 return makePipelineLayout(vkd, device, descriptorSetLayoutsSize, descriptorSetLayouts.data());
4851 }
4852
createTopAccelerationStructure(Context & context,de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)4853 static de::MovePtr<TopLevelAccelerationStructure> createTopAccelerationStructure (Context& context,
4854 de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)
4855 {
4856 const DeviceInterface& vkd = context.getDeviceInterface();
4857 const VkDevice device = context.getDevice();
4858 Allocator& allocator = context.getDefaultAllocator();
4859 de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
4860
4861 result->setInstanceCount(1);
4862 result->addInstance(bottomLevelAccelerationStructure);
4863 result->create(vkd, device, allocator);
4864
4865 return result;
4866 }
4867
createBottomAccelerationStructure(Context & context)4868 static de::SharedPtr<BottomLevelAccelerationStructure> createBottomAccelerationStructure (Context& context)
4869 {
4870 const DeviceInterface& vkd = context.getDeviceInterface();
4871 const VkDevice device = context.getDevice();
4872 Allocator& allocator = context.getDefaultAllocator();
4873 de::MovePtr<BottomLevelAccelerationStructure> result = makeBottomLevelAccelerationStructure();
4874 const std::vector<tcu::Vec3> geometryData { tcu::Vec3(-1.0f, -1.0f, -2.0f), tcu::Vec3(+1.0f, +1.0f, -1.0f) };
4875
4876 result->setGeometryCount(1u);
4877 result->addGeometry(geometryData, false);
4878 result->create(vkd, device, allocator, 0u);
4879
4880 return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
4881 }
4882
makeRayTracingPipeline(Context & context,const VkShaderStageFlags shaderStageTested,const VkPipelineLayout pipelineLayout,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6],Move<VkPipeline> & pipelineOut)4883 static de::MovePtr<RayTracingPipeline> makeRayTracingPipeline (Context& context,
4884 const VkShaderStageFlags shaderStageTested,
4885 const VkPipelineLayout pipelineLayout,
4886 const deUint32 shaderStageCreateFlags[6],
4887 const deUint32 requiredSubgroupSize[6],
4888 Move<VkPipeline>& pipelineOut)
4889 {
4890 const DeviceInterface& vkd = context.getDeviceInterface();
4891 const VkDevice device = context.getDevice();
4892 BinaryCollection& collection = context.getBinaryCollection();
4893 const char* shaderRgenName = (0 != (shaderStageTested & VK_SHADER_STAGE_RAYGEN_BIT_KHR)) ? "rgen" : "rgen_noSubgroup";
4894 const char* shaderAhitName = (0 != (shaderStageTested & VK_SHADER_STAGE_ANY_HIT_BIT_KHR)) ? "ahit" : "ahit_noSubgroup";
4895 const char* shaderChitName = (0 != (shaderStageTested & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)) ? "chit" : "chit_noSubgroup";
4896 const char* shaderMissName = (0 != (shaderStageTested & VK_SHADER_STAGE_MISS_BIT_KHR)) ? "miss" : "miss_noSubgroup";
4897 const char* shaderSectName = (0 != (shaderStageTested & VK_SHADER_STAGE_INTERSECTION_BIT_KHR)) ? "sect" : "sect_noSubgroup";
4898 const char* shaderCallName = (0 != (shaderStageTested & VK_SHADER_STAGE_CALLABLE_BIT_KHR)) ? "call" : "call_noSubgroup";
4899 const VkShaderModuleCreateFlags noShaderModuleCreateFlags = static_cast<VkShaderModuleCreateFlags>(0);
4900 Move<VkShaderModule> rgenShaderModule = createShaderModule(vkd, device, collection.get(shaderRgenName), noShaderModuleCreateFlags);
4901 Move<VkShaderModule> ahitShaderModule = createShaderModule(vkd, device, collection.get(shaderAhitName), noShaderModuleCreateFlags);
4902 Move<VkShaderModule> chitShaderModule = createShaderModule(vkd, device, collection.get(shaderChitName), noShaderModuleCreateFlags);
4903 Move<VkShaderModule> missShaderModule = createShaderModule(vkd, device, collection.get(shaderMissName), noShaderModuleCreateFlags);
4904 Move<VkShaderModule> sectShaderModule = createShaderModule(vkd, device, collection.get(shaderSectName), noShaderModuleCreateFlags);
4905 Move<VkShaderModule> callShaderModule = createShaderModule(vkd, device, collection.get(shaderCallName), noShaderModuleCreateFlags);
4906 const VkPipelineShaderStageCreateFlags noPipelineShaderStageCreateFlags = static_cast<VkPipelineShaderStageCreateFlags>(0);
4907 const VkPipelineShaderStageCreateFlags rgenPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[0];
4908 const VkPipelineShaderStageCreateFlags ahitPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[1];
4909 const VkPipelineShaderStageCreateFlags chitPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[2];
4910 const VkPipelineShaderStageCreateFlags missPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[3];
4911 const VkPipelineShaderStageCreateFlags sectPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[4];
4912 const VkPipelineShaderStageCreateFlags callPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[5];
4913 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[6] =
4914 {
4915 {
4916 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4917 DE_NULL,
4918 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
4919 },
4920 {
4921 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4922 DE_NULL,
4923 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
4924 },
4925 {
4926 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4927 DE_NULL,
4928 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
4929 },
4930 {
4931 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4932 DE_NULL,
4933 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
4934 },
4935 {
4936 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4937 DE_NULL,
4938 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
4939 },
4940 {
4941 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4942 DE_NULL,
4943 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[5] : 0u,
4944 },
4945 };
4946 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* rgenRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[0];
4947 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* ahitRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[1];
4948 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* chitRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[2];
4949 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* missRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[3];
4950 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* sectRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[4];
4951 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* callRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[5].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[5];
4952 de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
4953
4954 rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR , rgenShaderModule, RAYGEN_GROUP, DE_NULL, rgenPipelineShaderStageCreateFlags, rgenRequiredSubgroupSizeCreateInfo);
4955 rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR , ahitShaderModule, HIT_GROUP, DE_NULL, ahitPipelineShaderStageCreateFlags, ahitRequiredSubgroupSizeCreateInfo);
4956 rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR , chitShaderModule, HIT_GROUP, DE_NULL, chitPipelineShaderStageCreateFlags, chitRequiredSubgroupSizeCreateInfo);
4957 rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR , missShaderModule, MISS_GROUP, DE_NULL, missPipelineShaderStageCreateFlags, missRequiredSubgroupSizeCreateInfo);
4958 rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR , sectShaderModule, HIT_GROUP, DE_NULL, sectPipelineShaderStageCreateFlags, sectRequiredSubgroupSizeCreateInfo);
4959 rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR , callShaderModule, CALL_GROUP, DE_NULL, callPipelineShaderStageCreateFlags, callRequiredSubgroupSizeCreateInfo);
4960
4961 // Must execute createPipeline here, due to pNext pointers in calls to addShader are local
4962 pipelineOut = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
4963
4964 return rayTracingPipeline;
4965 }
4966
getPossibleRayTracingSubgroupStages(Context & context,const VkShaderStageFlags testedStages)4967 VkShaderStageFlags getPossibleRayTracingSubgroupStages (Context& context, const VkShaderStageFlags testedStages)
4968 {
4969 const VkPhysicalDeviceSubgroupProperties& subgroupProperties = context.getSubgroupProperties();
4970 const VkShaderStageFlags stages = testedStages & subgroupProperties.supportedStages;
4971
4972 DE_ASSERT(isAllRayTracingStages(testedStages));
4973
4974 return stages;
4975 }
4976
allRayTracingStages(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStage)4977 tcu::TestStatus allRayTracingStages (Context& context,
4978 VkFormat format,
4979 const SSBOData* extraDatas,
4980 deUint32 extraDataCount,
4981 const void* internalData,
4982 const VerificationFunctor& checkResult,
4983 const VkShaderStageFlags shaderStage)
4984 {
4985 return vkt::subgroups::allRayTracingStagesRequiredSubgroupSize(context,
4986 format,
4987 extraDatas,
4988 extraDataCount,
4989 internalData,
4990 checkResult,
4991 shaderStage,
4992 DE_NULL,
4993 DE_NULL);
4994 }
4995
allRayTracingStagesRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6])4996 tcu::TestStatus allRayTracingStagesRequiredSubgroupSize (Context& context,
4997 VkFormat format,
4998 const SSBOData* extraDatas,
4999 deUint32 extraDatasCount,
5000 const void* internalData,
5001 const VerificationFunctor& checkResult,
5002 const VkShaderStageFlags shaderStageTested,
5003 const deUint32 shaderStageCreateFlags[6],
5004 const deUint32 requiredSubgroupSize[6])
5005 {
5006 const DeviceInterface& vkd = context.getDeviceInterface();
5007 const VkDevice device = context.getDevice();
5008 const VkQueue queue = context.getUniversalQueue();
5009 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
5010 Allocator& allocator = context.getDefaultAllocator();
5011 const deUint32 subgroupSize = getSubgroupSize(context);
5012 const deUint32 maxWidth = getMaxWidth();
5013 const vector<VkShaderStageFlagBits> stagesVector = enumerateRayTracingShaderStages(shaderStageTested);
5014 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
5015 de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure = createBottomAccelerationStructure(context);
5016 de::MovePtr<TopLevelAccelerationStructure> topLevelAccelerationStructure = createTopAccelerationStructure(context, bottomLevelAccelerationStructure);
5017 vectorBufferOrImage inputBuffers = makeRayTracingInputBuffers(context, format, extraDatas, extraDatasCount, stagesVector);
5018 const Move<VkDescriptorSetLayout> descriptorSetLayout = makeRayTracingDescriptorSetLayout(context, extraDatas, extraDatasCount, stagesVector, inputBuffers);
5019 const Move<VkDescriptorSetLayout> descriptorSetLayoutAS = makeRayTracingDescriptorSetLayoutAS(context);
5020 const Move<VkPipelineLayout> pipelineLayout = makeRayTracingPipelineLayout(context, *descriptorSetLayout, *descriptorSetLayoutAS);
5021 Move<VkPipeline> pipeline = Move<VkPipeline>();
5022 const de::MovePtr<RayTracingPipeline> rayTracingPipeline = makeRayTracingPipeline(context, shaderStageTested, *pipelineLayout, shaderStageCreateFlags, requiredSubgroupSize, pipeline);
5023 const deUint32 shaderGroupHandleSize = context.getRayTracingPipelineProperties().shaderGroupHandleSize;
5024 const deUint32 shaderGroupBaseAlignment = context.getRayTracingPipelineProperties().shaderGroupBaseAlignment;
5025 de::MovePtr<BufferWithMemory> rgenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, RAYGEN_GROUP, 1u);
5026 de::MovePtr<BufferWithMemory> missShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, MISS_GROUP, 1u);
5027 de::MovePtr<BufferWithMemory> hitsShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, HIT_GROUP, 1u);
5028 de::MovePtr<BufferWithMemory> callShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, CALL_GROUP, 1u);
5029 const VkStridedDeviceAddressRegionKHR rgenShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, rgenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5030 const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5031 const VkStridedDeviceAddressRegionKHR hitsShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitsShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5032 const VkStridedDeviceAddressRegionKHR callShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5033 const Move<VkDescriptorPool> descriptorPool = makeRayTracingDescriptorPool(context, inputBuffers);
5034 const Move<VkDescriptorSet> descriptorSet = makeRayTracingDescriptorSet(context, *descriptorPool, *descriptorSetLayout, extraDatas, extraDatasCount, stagesVector, inputBuffers);
5035 const Move<VkDescriptorSet> descriptorSetAS = makeRayTracingDescriptorSetAS(context, *descriptorPool, *descriptorSetLayoutAS, topLevelAccelerationStructure);
5036 const Move<VkCommandPool> cmdPool = makeCommandPool(vkd, device, queueFamilyIndex);
5037 const Move<VkCommandBuffer> cmdBuffer = makeCommandBuffer(context, *cmdPool);
5038 deUint32 passIterations = 0u;
5039 deUint32 failIterations = 0u;
5040
5041 DE_ASSERT(shaderStageTested != 0);
5042
5043 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
5044 {
5045
5046 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
5047 {
5048 // re-init the data
5049 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
5050
5051 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
5052 }
5053
5054 beginCommandBuffer(vkd, *cmdBuffer);
5055 {
5056 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
5057
5058 bottomLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
5059 topLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
5060
5061 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 1u, 1u, &descriptorSetAS.get(), 0u, DE_NULL);
5062
5063 if (stagesCount + extraDatasCount > 0)
5064 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
5065
5066 cmdTraceRays(vkd,
5067 *cmdBuffer,
5068 &rgenShaderBindingTableRegion,
5069 &missShaderBindingTableRegion,
5070 &hitsShaderBindingTableRegion,
5071 &callShaderBindingTableRegion,
5072 width, 1, 1);
5073
5074 const VkMemoryBarrier postTraceMemoryBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
5075 cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_HOST_BIT, &postTraceMemoryBarrier);
5076 }
5077 endCommandBuffer(vkd, *cmdBuffer);
5078
5079 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
5080
5081 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
5082 {
5083 std::vector<const void*> datas;
5084
5085 if (!inputBuffers[ndx]->isImage())
5086 {
5087 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
5088
5089 invalidateAlloc(vkd, device, resultAlloc);
5090
5091 // we always have our result data first
5092 datas.push_back(resultAlloc.getHostPtr());
5093 }
5094
5095 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
5096 {
5097 const deUint32 datasNdx = index - stagesCount;
5098
5099 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
5100 {
5101 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
5102
5103 invalidateAlloc(vkd, device, resultAlloc);
5104
5105 // we always have our result data first
5106 datas.push_back(resultAlloc.getHostPtr());
5107 }
5108 }
5109
5110 if (!checkResult(internalData, datas, width, subgroupSize, false))
5111 failIterations++;
5112 else
5113 passIterations++;
5114 }
5115
5116 context.resetCommandPoolForVKSC(device, *cmdPool);
5117 }
5118
5119 if (failIterations > 0 || passIterations == 0)
5120 return tcu::TestStatus::fail("Failed " + de::toString(failIterations) + " out of " + de::toString(failIterations + passIterations) + " iterations.");
5121 else
5122 return tcu::TestStatus::pass("OK");
5123 }
5124 #endif // CTS_USES_VULKANSC
5125
5126 } // namespace subgroups
5127 } // nsamespace vkt
5128