1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Google Inc.
7 * Copyright (c) 2017 Codeplay Software Ltd.
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 */ /*!
22 * \file
23 * \brief Subgroups Tests Utils
24 */ /*--------------------------------------------------------------------*/
25
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vkRayTracingUtil.hpp"
28 #include "deFloat16.h"
29 #include "deRandom.hpp"
30 #include "tcuCommandLine.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37
38 using namespace tcu;
39 using namespace std;
40 using namespace vk;
41 using namespace vkt;
42
43 namespace
44 {
45
46 enum class ComputeLike { COMPUTE = 0, MESH };
47
getMaxWidth()48 deUint32 getMaxWidth ()
49 {
50 return 1024u;
51 }
52
getNextWidth(const deUint32 width)53 deUint32 getNextWidth (const deUint32 width)
54 {
55 if (width < 128)
56 {
57 // This ensures we test every value up to 128 (the max subgroup size).
58 return width + 1;
59 }
60 else
61 {
62 // And once we hit 128 we increment to only power of 2's to reduce testing time.
63 return width * 2;
64 }
65 }
66
getFormatSizeInBytes(const VkFormat format)67 deUint32 getFormatSizeInBytes (const VkFormat format)
68 {
69 switch (format)
70 {
71 default:
72 DE_FATAL("Unhandled format!");
73 return 0;
74 case VK_FORMAT_R8_SINT:
75 case VK_FORMAT_R8_UINT:
76 return static_cast<deUint32>(sizeof(deInt8));
77 case VK_FORMAT_R8G8_SINT:
78 case VK_FORMAT_R8G8_UINT:
79 return static_cast<deUint32>(sizeof(deInt8) * 2);
80 case VK_FORMAT_R8G8B8_SINT:
81 case VK_FORMAT_R8G8B8_UINT:
82 case VK_FORMAT_R8G8B8A8_SINT:
83 case VK_FORMAT_R8G8B8A8_UINT:
84 return static_cast<deUint32>(sizeof(deInt8) * 4);
85 case VK_FORMAT_R16_SINT:
86 case VK_FORMAT_R16_UINT:
87 case VK_FORMAT_R16_SFLOAT:
88 return static_cast<deUint32>(sizeof(deInt16));
89 case VK_FORMAT_R16G16_SINT:
90 case VK_FORMAT_R16G16_UINT:
91 case VK_FORMAT_R16G16_SFLOAT:
92 return static_cast<deUint32>(sizeof(deInt16) * 2);
93 case VK_FORMAT_R16G16B16_UINT:
94 case VK_FORMAT_R16G16B16_SINT:
95 case VK_FORMAT_R16G16B16_SFLOAT:
96 case VK_FORMAT_R16G16B16A16_SINT:
97 case VK_FORMAT_R16G16B16A16_UINT:
98 case VK_FORMAT_R16G16B16A16_SFLOAT:
99 return static_cast<deUint32>(sizeof(deInt16) * 4);
100 case VK_FORMAT_R32_SINT:
101 case VK_FORMAT_R32_UINT:
102 case VK_FORMAT_R32_SFLOAT:
103 return static_cast<deUint32>(sizeof(deInt32));
104 case VK_FORMAT_R32G32_SINT:
105 case VK_FORMAT_R32G32_UINT:
106 case VK_FORMAT_R32G32_SFLOAT:
107 return static_cast<deUint32>(sizeof(deInt32) * 2);
108 case VK_FORMAT_R32G32B32_SINT:
109 case VK_FORMAT_R32G32B32_UINT:
110 case VK_FORMAT_R32G32B32_SFLOAT:
111 case VK_FORMAT_R32G32B32A32_SINT:
112 case VK_FORMAT_R32G32B32A32_UINT:
113 case VK_FORMAT_R32G32B32A32_SFLOAT:
114 return static_cast<deUint32>(sizeof(deInt32) * 4);
115 case VK_FORMAT_R64_SINT:
116 case VK_FORMAT_R64_UINT:
117 case VK_FORMAT_R64_SFLOAT:
118 return static_cast<deUint32>(sizeof(deInt64));
119 case VK_FORMAT_R64G64_SINT:
120 case VK_FORMAT_R64G64_UINT:
121 case VK_FORMAT_R64G64_SFLOAT:
122 return static_cast<deUint32>(sizeof(deInt64) * 2);
123 case VK_FORMAT_R64G64B64_SINT:
124 case VK_FORMAT_R64G64B64_UINT:
125 case VK_FORMAT_R64G64B64_SFLOAT:
126 case VK_FORMAT_R64G64B64A64_SINT:
127 case VK_FORMAT_R64G64B64A64_UINT:
128 case VK_FORMAT_R64G64B64A64_SFLOAT:
129 return static_cast<deUint32>(sizeof(deInt64) * 4);
130 // The below formats are used to represent bool and bvec* types. These
131 // types are passed to the shader as int and ivec* types, before the
132 // calculations are done as booleans. We need a distinct type here so
133 // that the shader generators can switch on it and generate the correct
134 // shader source for testing.
135 case VK_FORMAT_R8_USCALED:
136 return static_cast<deUint32>(sizeof(deInt32));
137 case VK_FORMAT_R8G8_USCALED:
138 return static_cast<deUint32>(sizeof(deInt32) * 2);
139 case VK_FORMAT_R8G8B8_USCALED:
140 case VK_FORMAT_R8G8B8A8_USCALED:
141 return static_cast<deUint32>(sizeof(deInt32) * 4);
142 }
143 }
144
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)145 deUint32 getElementSizeInBytes (const VkFormat format,
146 const subgroups::SSBOData::InputDataLayoutType layout)
147 {
148 const deUint32 bytes = getFormatSizeInBytes(format);
149
150 if (layout == subgroups::SSBOData::LayoutStd140)
151 return bytes < 16 ? 16 : bytes;
152 else
153 return bytes;
154 }
155
makeRenderPass(Context & context,VkFormat format)156 Move<VkRenderPass> makeRenderPass (Context& context, VkFormat format)
157 {
158 const VkAttachmentReference colorReference =
159 {
160 0,
161 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
162 };
163 const VkSubpassDescription subpassDescription =
164 {
165 0u, // VkSubpassDescriptionFlags flags;
166 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
167 0, // deUint32 inputAttachmentCount;
168 DE_NULL, // const VkAttachmentReference* pInputAttachments;
169 1, // deUint32 colorAttachmentCount;
170 &colorReference, // const VkAttachmentReference* pColorAttachments;
171 DE_NULL, // const VkAttachmentReference* pResolveAttachments;
172 DE_NULL, // const VkAttachmentReference* pDepthStencilAttachment;
173 0, // deUint32 preserveAttachmentCount;
174 DE_NULL // const deUint32* pPreserveAttachments;
175 };
176 const VkSubpassDependency subpassDependencies[2] =
177 {
178 {
179 VK_SUBPASS_EXTERNAL, // deUint32 srcSubpass;
180 0u, // deUint32 dstSubpass;
181 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // VkPipelineStageFlags srcStageMask;
182 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // VkPipelineStageFlags dstStageMask;
183 VK_ACCESS_MEMORY_READ_BIT, // VkAccessFlags srcAccessMask;
184 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // VkAccessFlags dstAccessMask;
185 VK_DEPENDENCY_BY_REGION_BIT // VkDependencyFlags dependencyFlags;
186 },
187 {
188 0u, // deUint32 srcSubpass;
189 VK_SUBPASS_EXTERNAL, // deUint32 dstSubpass;
190 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // VkPipelineStageFlags srcStageMask;
191 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // VkPipelineStageFlags dstStageMask;
192 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // VkAccessFlags srcAccessMask;
193 VK_ACCESS_MEMORY_READ_BIT, // VkAccessFlags dstAccessMask;
194 VK_DEPENDENCY_BY_REGION_BIT // VkDependencyFlags dependencyFlags;
195 },
196 };
197 const VkAttachmentDescription attachmentDescription =
198 {
199 0u, // VkAttachmentDescriptionFlags flags;
200 format, // VkFormat format;
201 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
202 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
203 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
204 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
205 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
206 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
207 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL // VkImageLayout finalLayout;
208 };
209 const VkRenderPassCreateInfo renderPassCreateInfo =
210 {
211 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
212 DE_NULL, // const void* pNext;
213 0u, // VkRenderPassCreateFlags flags;
214 1, // deUint32 attachmentCount;
215 &attachmentDescription, // const VkAttachmentDescription* pAttachments;
216 1, // deUint32 subpassCount;
217 &subpassDescription, // const VkSubpassDescription* pSubpasses;
218 2, // deUint32 dependencyCount;
219 subpassDependencies // const VkSubpassDependency* pDependencies;
220 };
221
222 return createRenderPass(context.getDeviceInterface(), context.getDevice(), &renderPassCreateInfo);
223 }
224
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const deUint32 subpass,const deUint32 patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])225 Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface& vk,
226 const VkDevice device,
227 const VkPipelineLayout pipelineLayout,
228 const VkShaderModule vertexShaderModule,
229 const VkShaderModule tessellationControlShaderModule,
230 const VkShaderModule tessellationEvalShaderModule,
231 const VkShaderModule geometryShaderModule,
232 const VkShaderModule fragmentShaderModule,
233 const VkRenderPass renderPass,
234 const std::vector<VkViewport>& viewports,
235 const std::vector<VkRect2D>& scissors,
236 const VkPrimitiveTopology topology,
237 const deUint32 subpass,
238 const deUint32 patchControlPoints,
239 const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo,
240 const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo,
241 const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo,
242 const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo,
243 const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo,
244 const VkPipelineDynamicStateCreateInfo* dynamicStateCreateInfo,
245 const deUint32 vertexShaderStageCreateFlags,
246 const deUint32 tessellationControlShaderStageCreateFlags,
247 const deUint32 tessellationEvalShaderStageCreateFlags,
248 const deUint32 geometryShaderStageCreateFlags,
249 const deUint32 fragmentShaderStageCreateFlags,
250 const deUint32 requiredSubgroupSize[5])
251 {
252 const VkBool32 disableRasterization = (fragmentShaderModule == DE_NULL);
253 const bool hasTessellation = (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
254
255 VkPipelineShaderStageCreateInfo stageCreateInfo =
256 {
257 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType
258 DE_NULL, // const void* pNext
259 0u, // VkPipelineShaderStageCreateFlags flags
260 VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage
261 DE_NULL, // VkShaderModule module
262 "main", // const char* pName
263 DE_NULL // const VkSpecializationInfo* pSpecializationInfo
264 };
265
266 std::vector<VkPipelineShaderStageCreateInfo> pipelineShaderStageParams;
267
268 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] =
269 {
270 {
271 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
272 DE_NULL,
273 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
274 },
275 {
276 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
277 DE_NULL,
278 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
279 },
280 {
281 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
282 DE_NULL,
283 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
284 },
285 {
286 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
287 DE_NULL,
288 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
289 },
290 {
291 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
292 DE_NULL,
293 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
294 },
295 };
296
297 {
298 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[0] : DE_NULL;
299 stageCreateInfo.flags = vertexShaderStageCreateFlags;
300 stageCreateInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
301 stageCreateInfo.module = vertexShaderModule;
302 pipelineShaderStageParams.push_back(stageCreateInfo);
303 }
304
305 if (tessellationControlShaderModule != DE_NULL)
306 {
307 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[1] : DE_NULL;
308 stageCreateInfo.flags = tessellationControlShaderStageCreateFlags;
309 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
310 stageCreateInfo.module = tessellationControlShaderModule;
311 pipelineShaderStageParams.push_back(stageCreateInfo);
312 }
313
314 if (tessellationEvalShaderModule != DE_NULL)
315 {
316 stageCreateInfo.pNext = (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[2] : DE_NULL;
317 stageCreateInfo.flags = tessellationEvalShaderStageCreateFlags;
318 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
319 stageCreateInfo.module = tessellationEvalShaderModule;
320 pipelineShaderStageParams.push_back(stageCreateInfo);
321 }
322
323 if (geometryShaderModule != DE_NULL)
324 {
325 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[3] : DE_NULL;
326 stageCreateInfo.flags = geometryShaderStageCreateFlags;
327 stageCreateInfo.stage = VK_SHADER_STAGE_GEOMETRY_BIT;
328 stageCreateInfo.module = geometryShaderModule;
329 pipelineShaderStageParams.push_back(stageCreateInfo);
330 }
331
332 if (fragmentShaderModule != DE_NULL)
333 {
334 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[4] : DE_NULL;
335 stageCreateInfo.flags = fragmentShaderStageCreateFlags;
336 stageCreateInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
337 stageCreateInfo.module = fragmentShaderModule;
338 pipelineShaderStageParams.push_back(stageCreateInfo);
339 }
340
341 const VkVertexInputBindingDescription vertexInputBindingDescription =
342 {
343 0u, // deUint32 binding
344 sizeof(tcu::Vec4), // deUint32 stride
345 VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate
346 };
347
348 const VkVertexInputAttributeDescription vertexInputAttributeDescription =
349 {
350 0u, // deUint32 location
351 0u, // deUint32 binding
352 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format
353 0u // deUint32 offset
354 };
355
356 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfoDefault =
357 {
358 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType
359 DE_NULL, // const void* pNext
360 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags
361 1u, // deUint32 vertexBindingDescriptionCount
362 &vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions
363 1u, // deUint32 vertexAttributeDescriptionCount
364 &vertexInputAttributeDescription // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions
365 };
366
367 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo =
368 {
369 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType
370 DE_NULL, // const void* pNext
371 0u, // VkPipelineInputAssemblyStateCreateFlags flags
372 topology, // VkPrimitiveTopology topology
373 VK_FALSE // VkBool32 primitiveRestartEnable
374 };
375
376 const VkPipelineTessellationStateCreateInfo tessStateCreateInfo =
377 {
378 VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, // VkStructureType sType
379 DE_NULL, // const void* pNext
380 0u, // VkPipelineTessellationStateCreateFlags flags
381 patchControlPoints // deUint32 patchControlPoints
382 };
383
384 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
385 {
386 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
387 DE_NULL, // const void* pNext
388 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
389 viewports.empty() ? 1u : (deUint32)viewports.size(), // deUint32 viewportCount
390 viewports.empty() ? DE_NULL : &viewports[0], // const VkViewport* pViewports
391 viewports.empty() ? 1u : (deUint32)scissors.size(), // deUint32 scissorCount
392 scissors.empty() ? DE_NULL : &scissors[0] // const VkRect2D* pScissors
393 };
394
395 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfoDefault =
396 {
397 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType
398 DE_NULL, // const void* pNext
399 0u, // VkPipelineRasterizationStateCreateFlags flags
400 VK_FALSE, // VkBool32 depthClampEnable
401 disableRasterization, // VkBool32 rasterizerDiscardEnable
402 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode
403 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode
404 VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace
405 VK_FALSE, // VkBool32 depthBiasEnable
406 0.0f, // float depthBiasConstantFactor
407 0.0f, // float depthBiasClamp
408 0.0f, // float depthBiasSlopeFactor
409 1.0f // float lineWidth
410 };
411
412 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfoDefault =
413 {
414 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
415 DE_NULL, // const void* pNext
416 0u, // VkPipelineMultisampleStateCreateFlags flags
417 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples
418 VK_FALSE, // VkBool32 sampleShadingEnable
419 1.0f, // float minSampleShading
420 DE_NULL, // const VkSampleMask* pSampleMask
421 VK_FALSE, // VkBool32 alphaToCoverageEnable
422 VK_FALSE // VkBool32 alphaToOneEnable
423 };
424
425 const VkStencilOpState stencilOpState =
426 {
427 VK_STENCIL_OP_KEEP, // VkStencilOp failOp
428 VK_STENCIL_OP_KEEP, // VkStencilOp passOp
429 VK_STENCIL_OP_KEEP, // VkStencilOp depthFailOp
430 VK_COMPARE_OP_NEVER, // VkCompareOp compareOp
431 0, // deUint32 compareMask
432 0, // deUint32 writeMask
433 0 // deUint32 reference
434 };
435
436 const VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfoDefault =
437 {
438 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType
439 DE_NULL, // const void* pNext
440 0u, // VkPipelineDepthStencilStateCreateFlags flags
441 VK_FALSE, // VkBool32 depthTestEnable
442 VK_FALSE, // VkBool32 depthWriteEnable
443 VK_COMPARE_OP_LESS_OR_EQUAL, // VkCompareOp depthCompareOp
444 VK_FALSE, // VkBool32 depthBoundsTestEnable
445 VK_FALSE, // VkBool32 stencilTestEnable
446 stencilOpState, // VkStencilOpState front
447 stencilOpState, // VkStencilOpState back
448 0.0f, // float minDepthBounds
449 1.0f, // float maxDepthBounds
450 };
451
452 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
453 {
454 VK_FALSE, // VkBool32 blendEnable
455 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor
456 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor
457 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp
458 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor
459 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor
460 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp
461 VK_COLOR_COMPONENT_R_BIT // VkColorComponentFlags colorWriteMask
462 | VK_COLOR_COMPONENT_G_BIT
463 | VK_COLOR_COMPONENT_B_BIT
464 | VK_COLOR_COMPONENT_A_BIT
465 };
466
467 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfoDefault =
468 {
469 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType
470 DE_NULL, // const void* pNext
471 0u, // VkPipelineColorBlendStateCreateFlags flags
472 VK_FALSE, // VkBool32 logicOpEnable
473 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp
474 1u, // deUint32 attachmentCount
475 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments
476 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConstants[4]
477 };
478
479 std::vector<VkDynamicState> dynamicStates;
480
481 if (viewports.empty())
482 dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
483 if (scissors.empty())
484 dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
485
486 const VkPipelineDynamicStateCreateInfo dynamicStateCreateInfoDefault =
487 {
488 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType sType
489 DE_NULL, // const void* pNext
490 0u, // VkPipelineDynamicStateCreateFlags flags
491 (deUint32)dynamicStates.size(), // deUint32 dynamicStateCount
492 dynamicStates.empty() ? DE_NULL : &dynamicStates[0] // const VkDynamicState* pDynamicStates
493 };
494
495 const VkPipelineDynamicStateCreateInfo* dynamicStateCreateInfoDefaultPtr = dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
496
497 const VkGraphicsPipelineCreateInfo pipelineCreateInfo =
498 {
499 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType
500 DE_NULL, // const void* pNext
501 0u, // VkPipelineCreateFlags flags
502 (deUint32)pipelineShaderStageParams.size(), // deUint32 stageCount
503 &pipelineShaderStageParams[0], // const VkPipelineShaderStageCreateInfo* pStages
504 vertexInputStateCreateInfo ? vertexInputStateCreateInfo : &vertexInputStateCreateInfoDefault, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState
505 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState
506 hasTessellation ? &tessStateCreateInfo : DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState
507 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState
508 rasterizationStateCreateInfo ? rasterizationStateCreateInfo : &rasterizationStateCreateInfoDefault, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState
509 multisampleStateCreateInfo ? multisampleStateCreateInfo: &multisampleStateCreateInfoDefault, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState
510 depthStencilStateCreateInfo ? depthStencilStateCreateInfo : &depthStencilStateCreateInfoDefault, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState
511 colorBlendStateCreateInfo ? colorBlendStateCreateInfo : &colorBlendStateCreateInfoDefault, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState
512 dynamicStateCreateInfo ? dynamicStateCreateInfo : dynamicStateCreateInfoDefaultPtr, // const VkPipelineDynamicStateCreateInfo* pDynamicState
513 pipelineLayout, // VkPipelineLayout layout
514 renderPass, // VkRenderPass renderPass
515 subpass, // deUint32 subpass
516 DE_NULL, // VkPipeline basePipelineHandle
517 0 // deInt32 basePipelineIndex;
518 };
519
520 return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
521 }
522
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const deUint32 vertexShaderStageCreateFlags=0u,const deUint32 tessellationControlShaderStageCreateFlags=0u,const deUint32 tessellationEvalShaderStageCreateFlags=0u,const deUint32 geometryShaderStageCreateFlags=0u,const deUint32 fragmentShaderStageCreateFlags=0u,const deUint32 requiredSubgroupSize[5]=DE_NULL)523 Move<VkPipeline> makeGraphicsPipeline (Context& context,
524 const VkPipelineLayout pipelineLayout,
525 const VkShaderStageFlags stages,
526 const VkShaderModule vertexShaderModule,
527 const VkShaderModule fragmentShaderModule,
528 const VkShaderModule geometryShaderModule,
529 const VkShaderModule tessellationControlModule,
530 const VkShaderModule tessellationEvaluationModule,
531 const VkRenderPass renderPass,
532 const VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
533 const VkVertexInputBindingDescription* vertexInputBindingDescription = DE_NULL,
534 const VkVertexInputAttributeDescription* vertexInputAttributeDescriptions = DE_NULL,
535 const bool frameBufferTests = false,
536 const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
537 const deUint32 vertexShaderStageCreateFlags = 0u,
538 const deUint32 tessellationControlShaderStageCreateFlags = 0u,
539 const deUint32 tessellationEvalShaderStageCreateFlags = 0u,
540 const deUint32 geometryShaderStageCreateFlags = 0u,
541 const deUint32 fragmentShaderStageCreateFlags = 0u,
542 const deUint32 requiredSubgroupSize[5] = DE_NULL)
543 {
544 const std::vector<VkViewport> noViewports;
545 const std::vector<VkRect2D> noScissors;
546 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
547 {
548 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
549 DE_NULL, // const void* pNext;
550 0u, // VkPipelineVertexInputStateCreateFlags flags;
551 vertexInputBindingDescription == DE_NULL ? 0u : 1u, // deUint32 vertexBindingDescriptionCount;
552 vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
553 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u, // deUint32 vertexAttributeDescriptionCount;
554 vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
555 };
556 const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
557 const VkColorComponentFlags colorComponent = numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
558 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
559 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
560 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
561 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
562 {
563 VK_FALSE, // VkBool32 blendEnable;
564 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor;
565 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
566 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp;
567 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor;
568 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor;
569 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp;
570 colorComponent // VkColorComponentFlags colorWriteMask;
571 };
572 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
573 {
574 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
575 DE_NULL, // const void* pNext;
576 0u, // VkPipelineColorBlendStateCreateFlags flags;
577 VK_FALSE, // VkBool32 logicOpEnable;
578 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp;
579 1, // deUint32 attachmentCount;
580 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments;
581 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConstants[4];
582 };
583 const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
584
585 return makeGraphicsPipeline(context.getDeviceInterface(), // const DeviceInterface& vk
586 context.getDevice(), // const VkDevice device
587 pipelineLayout, // const VkPipelineLayout pipelineLayout
588 vertexShaderModule, // const VkShaderModule vertexShaderModule
589 tessellationControlModule, // const VkShaderModule tessellationControlShaderModule
590 tessellationEvaluationModule, // const VkShaderModule tessellationEvalShaderModule
591 geometryShaderModule, // const VkShaderModule geometryShaderModule
592 fragmentShaderModule, // const VkShaderModule fragmentShaderModule
593 renderPass, // const VkRenderPass renderPass
594 noViewports, // const std::vector<VkViewport>& viewports
595 noScissors, // const std::vector<VkRect2D>& scissors
596 topology, // const VkPrimitiveTopology topology
597 0u, // const deUint32 subpass
598 patchControlPoints, // const deUint32 patchControlPoints
599 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
600 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
601 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
602 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
603 &colorBlendStateCreateInfo, // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
604 DE_NULL, // const VkPipelineDynamicStateCreateInfo*
605 vertexShaderStageCreateFlags, // const deUint32 vertexShaderStageCreateFlags,
606 tessellationControlShaderStageCreateFlags, // const deUint32 tessellationControlShaderStageCreateFlags
607 tessellationEvalShaderStageCreateFlags, // const deUint32 tessellationEvalShaderStageCreateFlags
608 geometryShaderStageCreateFlags, // const deUint32 geometryShaderStageCreateFlags
609 fragmentShaderStageCreateFlags, // const deUint32 fragmentShaderStageCreateFlags
610 requiredSubgroupSize); // const deUint32 requiredSubgroupSize[5]
611 }
612
makeCommandBuffer(Context & context,const VkCommandPool commandPool)613 Move<VkCommandBuffer> makeCommandBuffer (Context& context, const VkCommandPool commandPool)
614 {
615 const VkCommandBufferAllocateInfo bufferAllocateParams =
616 {
617 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
618 DE_NULL, // const void* pNext;
619 commandPool, // VkCommandPool commandPool;
620 VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
621 1u, // deUint32 bufferCount;
622 };
623 return allocateCommandBuffer(context.getDeviceInterface(),
624 context.getDevice(), &bufferAllocateParams);
625 }
626
627 struct Buffer;
628 struct Image;
629
630 struct BufferOrImage
631 {
isImage__anon89c025990111::BufferOrImage632 bool isImage() const
633 {
634 return m_isImage;
635 }
636
getAsBuffer__anon89c025990111::BufferOrImage637 Buffer* getAsBuffer()
638 {
639 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
640 return reinterpret_cast<Buffer* >(this);
641 }
642
getAsImage__anon89c025990111::BufferOrImage643 Image* getAsImage()
644 {
645 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
646 return reinterpret_cast<Image*>(this);
647 }
648
getType__anon89c025990111::BufferOrImage649 virtual VkDescriptorType getType() const
650 {
651 if (m_isImage)
652 {
653 return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
654 }
655 else
656 {
657 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
658 }
659 }
660
getAllocation__anon89c025990111::BufferOrImage661 Allocation& getAllocation() const
662 {
663 return *m_allocation;
664 }
665
~BufferOrImage__anon89c025990111::BufferOrImage666 virtual ~BufferOrImage() {}
667
668 protected:
BufferOrImage__anon89c025990111::BufferOrImage669 explicit BufferOrImage(bool image) : m_isImage(image) {}
670
671 bool m_isImage;
672 de::details::MovePtr<Allocation> m_allocation;
673 };
674
675 struct Buffer : public BufferOrImage
676 {
Buffer__anon89c025990111::Buffer677 explicit Buffer (Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage)
678 : BufferOrImage (false)
679 , m_sizeInBytes (sizeInBytes)
680 , m_usage (usage)
681 {
682 const DeviceInterface& vkd = context.getDeviceInterface();
683 const VkDevice device = context.getDevice();
684
685 const vk::VkBufferCreateInfo bufferCreateInfo =
686 {
687 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
688 DE_NULL,
689 0u,
690 m_sizeInBytes,
691 m_usage,
692 VK_SHARING_MODE_EXCLUSIVE,
693 0u,
694 DE_NULL,
695 };
696 m_buffer = createBuffer(vkd, device, &bufferCreateInfo);
697
698 VkMemoryRequirements req = getBufferMemoryRequirements(vkd, device, *m_buffer);
699
700 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
701 VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
702 }
703
getType__anon89c025990111::Buffer704 virtual VkDescriptorType getType() const
705 {
706 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
707 {
708 return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
709 }
710 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
711 }
712
getBuffer__anon89c025990111::Buffer713 VkBuffer getBuffer () const
714 {
715 return *m_buffer;
716 }
717
getBufferPtr__anon89c025990111::Buffer718 const VkBuffer* getBufferPtr () const
719 {
720 return &(*m_buffer);
721 }
722
getSize__anon89c025990111::Buffer723 VkDeviceSize getSize () const
724 {
725 return m_sizeInBytes;
726 }
727
728 private:
729 Move<VkBuffer> m_buffer;
730 VkDeviceSize m_sizeInBytes;
731 const VkBufferUsageFlags m_usage;
732 };
733
734 struct Image : public BufferOrImage
735 {
Image__anon89c025990111::Image736 explicit Image (Context& context, deUint32 width, deUint32 height, VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
737 : BufferOrImage(true)
738 {
739 const DeviceInterface& vk = context.getDeviceInterface();
740 const VkDevice device = context.getDevice();
741 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
742
743 const VkImageCreateInfo imageCreateInfo =
744 {
745 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
746 DE_NULL, // const void* pNext;
747 0, // VkImageCreateFlags flags;
748 VK_IMAGE_TYPE_2D, // VkImageType imageType;
749 format, // VkFormat format;
750 {width, height, 1}, // VkExtent3D extent;
751 1, // deUint32 mipLevels;
752 1, // deUint32 arrayLayers;
753 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
754 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
755 usage, // VkImageUsageFlags usage;
756 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
757 0u, // deUint32 queueFamilyIndexCount;
758 DE_NULL, // const deUint32* pQueueFamilyIndices;
759 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
760 };
761
762 const VkComponentMapping componentMapping =
763 {
764 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
765 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
766 };
767
768 const VkImageSubresourceRange subresourceRange =
769 {
770 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
771 0u, //deUint32 baseMipLevel
772 1u, //deUint32 levelCount
773 0u, //deUint32 baseArrayLayer
774 1u //deUint32 layerCount
775 };
776
777 const VkSamplerCreateInfo samplerCreateInfo =
778 {
779 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType;
780 DE_NULL, // const void* pNext;
781 0u, // VkSamplerCreateFlags flags;
782 VK_FILTER_NEAREST, // VkFilter magFilter;
783 VK_FILTER_NEAREST, // VkFilter minFilter;
784 VK_SAMPLER_MIPMAP_MODE_NEAREST, // VkSamplerMipmapMode mipmapMode;
785 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeU;
786 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeV;
787 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW;
788 0.0f, // float mipLodBias;
789 VK_FALSE, // VkBool32 anisotropyEnable;
790 1.0f, // float maxAnisotropy;
791 DE_FALSE, // VkBool32 compareEnable;
792 VK_COMPARE_OP_ALWAYS, // VkCompareOp compareOp;
793 0.0f, // float minLod;
794 0.0f, // float maxLod;
795 VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // VkBorderColor borderColor;
796 VK_FALSE, // VkBool32 unnormalizedCoordinates;
797 };
798
799 m_image = createImage(vk, device, &imageCreateInfo);
800
801 VkMemoryRequirements req = getImageMemoryRequirements(vk, device, *m_image);
802
803 req.size *= 2;
804 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
805
806 VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
807
808 const VkImageViewCreateInfo imageViewCreateInfo =
809 {
810 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
811 DE_NULL, // const void* pNext;
812 0, // VkImageViewCreateFlags flags;
813 *m_image, // VkImage image;
814 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
815 imageCreateInfo.format, // VkFormat format;
816 componentMapping, // VkComponentMapping components;
817 subresourceRange // VkImageSubresourceRange subresourceRange;
818 };
819
820 m_imageView = createImageView(vk, device, &imageViewCreateInfo);
821 m_sampler = createSampler(vk, device, &samplerCreateInfo);
822
823 // Transition input image layouts
824 {
825 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
826 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
827
828 beginCommandBuffer(vk, *cmdBuffer);
829
830 const VkImageMemoryBarrier imageBarrier = makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
831 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
832
833 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
834 (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
835
836 endCommandBuffer(vk, *cmdBuffer);
837 submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
838 }
839 }
840
getImage__anon89c025990111::Image841 VkImage getImage () const
842 {
843 return *m_image;
844 }
845
getImageView__anon89c025990111::Image846 VkImageView getImageView () const
847 {
848 return *m_imageView;
849 }
850
getSampler__anon89c025990111::Image851 VkSampler getSampler () const
852 {
853 return *m_sampler;
854 }
855
856 private:
857 Move<VkImage> m_image;
858 Move<VkImageView> m_imageView;
859 Move<VkSampler> m_sampler;
860 };
861 }
862
getStagesCount(const VkShaderStageFlags shaderStages)863 deUint32 vkt::subgroups::getStagesCount (const VkShaderStageFlags shaderStages)
864 {
865 const deUint32 stageCount = isAllGraphicsStages(shaderStages) ? 4
866 : isAllComputeStages(shaderStages) ? 1
867 #ifndef CTS_USES_VULKANSC
868 : isAllRayTracingStages(shaderStages) ? 6
869 : isAllMeshShadingStages(shaderStages) ? 1
870 #endif // CTS_USES_VULKANSC
871 : 0;
872
873 DE_ASSERT(stageCount != 0);
874
875 return stageCount;
876 }
877
getSharedMemoryBallotHelper()878 std::string vkt::subgroups::getSharedMemoryBallotHelper ()
879 {
880 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
881 "uvec4 sharedMemoryBallot(bool vote)\n"
882 "{\n"
883 " uint groupOffset = gl_SubgroupID;\n"
884 " // One invocation in the group 0's the whole group's data\n"
885 " if (subgroupElect())\n"
886 " {\n"
887 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
888 " }\n"
889 " subgroupMemoryBarrierShared();\n"
890 " if (vote)\n"
891 " {\n"
892 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
893 " const highp uint bitToSet = 1u << invocationId;\n"
894 " switch (gl_SubgroupInvocationID / 32)\n"
895 " {\n"
896 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
897 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
898 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
899 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
900 " }\n"
901 " }\n"
902 " subgroupMemoryBarrierShared();\n"
903 " return superSecretComputeShaderHelper[groupOffset];\n"
904 "}\n";
905 }
906
getSharedMemoryBallotHelperARB()907 std::string vkt::subgroups::getSharedMemoryBallotHelperARB ()
908 {
909 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
910 "uint64_t sharedMemoryBallot(bool vote)\n"
911 "{\n"
912 " uint groupOffset = gl_SubgroupID;\n"
913 " // One invocation in the group 0's the whole group's data\n"
914 " if (subgroupElect())\n"
915 " {\n"
916 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
917 " }\n"
918 " subgroupMemoryBarrierShared();\n"
919 " if (vote)\n"
920 " {\n"
921 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
922 " const highp uint bitToSet = 1u << invocationId;\n"
923 " switch (gl_SubgroupInvocationID / 32)\n"
924 " {\n"
925 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
926 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
927 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
928 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
929 " }\n"
930 " }\n"
931 " subgroupMemoryBarrierShared();\n"
932 " return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
933 "}\n";
934 }
935
getSubgroupSize(Context & context)936 deUint32 vkt::subgroups::getSubgroupSize (Context& context)
937 {
938 return context.getSubgroupProperties().subgroupSize;
939 }
940
maxSupportedSubgroupSize()941 deUint32 vkt::subgroups::maxSupportedSubgroupSize ()
942 {
943 return 128u;
944 }
945
getShaderStageName(VkShaderStageFlags stage)946 std::string vkt::subgroups::getShaderStageName (VkShaderStageFlags stage)
947 {
948 switch (stage)
949 {
950 case VK_SHADER_STAGE_COMPUTE_BIT: return "compute";
951 case VK_SHADER_STAGE_FRAGMENT_BIT: return "fragment";
952 case VK_SHADER_STAGE_VERTEX_BIT: return "vertex";
953 case VK_SHADER_STAGE_GEOMETRY_BIT: return "geometry";
954 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return "tess_control";
955 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return "tess_eval";
956 #ifndef CTS_USES_VULKANSC
957 case VK_SHADER_STAGE_RAYGEN_BIT_KHR: return "rgen";
958 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: return "ahit";
959 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: return "chit";
960 case VK_SHADER_STAGE_MISS_BIT_KHR: return "miss";
961 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: return "sect";
962 case VK_SHADER_STAGE_CALLABLE_BIT_KHR: return "call";
963 case VK_SHADER_STAGE_MESH_BIT_EXT: return "mesh";
964 case VK_SHADER_STAGE_TASK_BIT_EXT: return "task";
965 #endif // CTS_USES_VULKANSC
966 default: TCU_THROW(InternalError, "Unhandled stage");
967 }
968 }
969
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)970 std::string vkt::subgroups::getSubgroupFeatureName (vk::VkSubgroupFeatureFlagBits bit)
971 {
972 switch (bit)
973 {
974 case VK_SUBGROUP_FEATURE_BASIC_BIT: return "VK_SUBGROUP_FEATURE_BASIC_BIT";
975 case VK_SUBGROUP_FEATURE_VOTE_BIT: return "VK_SUBGROUP_FEATURE_VOTE_BIT";
976 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT: return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
977 case VK_SUBGROUP_FEATURE_BALLOT_BIT: return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
978 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT: return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
979 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT: return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
980 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT: return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
981 case VK_SUBGROUP_FEATURE_QUAD_BIT: return "VK_SUBGROUP_FEATURE_QUAD_BIT";
982 default: TCU_THROW(InternalError, "Unknown subgroup feature category");
983 }
984 }
985
addNoSubgroupShader(SourceCollections & programCollection)986 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
987 {
988 {
989 /*
990 "#version 450\n"
991 "void main (void)\n"
992 "{\n"
993 " float pixelSize = 2.0f/1024.0f;\n"
994 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
995 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
996 " gl_PointSize = 1.0f;\n"
997 "}\n"
998 */
999 const std::string vertNoSubgroup =
1000 "; SPIR-V\n"
1001 "; Version: 1.3\n"
1002 "; Generator: Khronos Glslang Reference Front End; 1\n"
1003 "; Bound: 37\n"
1004 "; Schema: 0\n"
1005 "OpCapability Shader\n"
1006 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1007 "OpMemoryModel Logical GLSL450\n"
1008 "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
1009 "OpMemberDecorate %20 0 BuiltIn Position\n"
1010 "OpMemberDecorate %20 1 BuiltIn PointSize\n"
1011 "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
1012 "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
1013 "OpDecorate %20 Block\n"
1014 "OpDecorate %26 BuiltIn VertexIndex\n"
1015 "%2 = OpTypeVoid\n"
1016 "%3 = OpTypeFunction %2\n"
1017 "%6 = OpTypeFloat 32\n"
1018 "%7 = OpTypePointer Function %6\n"
1019 "%9 = OpConstant %6 0.00195313\n"
1020 "%12 = OpConstant %6 2\n"
1021 "%14 = OpConstant %6 1\n"
1022 "%16 = OpTypeVector %6 4\n"
1023 "%17 = OpTypeInt 32 0\n"
1024 "%18 = OpConstant %17 1\n"
1025 "%19 = OpTypeArray %6 %18\n"
1026 "%20 = OpTypeStruct %16 %6 %19 %19\n"
1027 "%21 = OpTypePointer Output %20\n"
1028 "%22 = OpVariable %21 Output\n"
1029 "%23 = OpTypeInt 32 1\n"
1030 "%24 = OpConstant %23 0\n"
1031 "%25 = OpTypePointer Input %23\n"
1032 "%26 = OpVariable %25 Input\n"
1033 "%33 = OpConstant %6 0\n"
1034 "%35 = OpTypePointer Output %16\n"
1035 "%37 = OpConstant %23 1\n"
1036 "%38 = OpTypePointer Output %6\n"
1037 "%4 = OpFunction %2 None %3\n"
1038 "%5 = OpLabel\n"
1039 "%8 = OpVariable %7 Function\n"
1040 "%10 = OpVariable %7 Function\n"
1041 "OpStore %8 %9\n"
1042 "%11 = OpLoad %6 %8\n"
1043 "%13 = OpFDiv %6 %11 %12\n"
1044 "%15 = OpFSub %6 %13 %14\n"
1045 "OpStore %10 %15\n"
1046 "%27 = OpLoad %23 %26\n"
1047 "%28 = OpConvertSToF %6 %27\n"
1048 "%29 = OpLoad %6 %8\n"
1049 "%30 = OpFMul %6 %28 %29\n"
1050 "%31 = OpLoad %6 %10\n"
1051 "%32 = OpFAdd %6 %30 %31\n"
1052 "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1053 "%36 = OpAccessChain %35 %22 %24\n"
1054 "OpStore %36 %34\n"
1055 "%39 = OpAccessChain %38 %22 %37\n"
1056 "OpStore %39 %14\n"
1057 "OpReturn\n"
1058 "OpFunctionEnd\n";
1059 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1060 }
1061
1062 {
1063 /*
1064 "#version 450\n"
1065 "layout(vertices=1) out;\n"
1066 "\n"
1067 "void main (void)\n"
1068 "{\n"
1069 " if (gl_InvocationID == 0)\n"
1070 " {\n"
1071 " gl_TessLevelOuter[0] = 1.0f;\n"
1072 " gl_TessLevelOuter[1] = 1.0f;\n"
1073 " }\n"
1074 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1075 "}\n"
1076 */
1077 const std::string tescNoSubgroup =
1078 "; SPIR-V\n"
1079 "; Version: 1.3\n"
1080 "; Generator: Khronos Glslang Reference Front End; 1\n"
1081 "; Bound: 45\n"
1082 "; Schema: 0\n"
1083 "OpCapability Tessellation\n"
1084 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1085 "OpMemoryModel Logical GLSL450\n"
1086 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1087 "OpExecutionMode %4 OutputVertices 1\n"
1088 "OpDecorate %8 BuiltIn InvocationId\n"
1089 "OpDecorate %20 Patch\n"
1090 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1091 "OpMemberDecorate %29 0 BuiltIn Position\n"
1092 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1093 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1094 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1095 "OpDecorate %29 Block\n"
1096 "OpMemberDecorate %34 0 BuiltIn Position\n"
1097 "OpMemberDecorate %34 1 BuiltIn PointSize\n"
1098 "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1099 "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1100 "OpDecorate %34 Block\n"
1101 "%2 = OpTypeVoid\n"
1102 "%3 = OpTypeFunction %2\n"
1103 "%6 = OpTypeInt 32 1\n"
1104 "%7 = OpTypePointer Input %6\n"
1105 "%8 = OpVariable %7 Input\n"
1106 "%10 = OpConstant %6 0\n"
1107 "%11 = OpTypeBool\n"
1108 "%15 = OpTypeFloat 32\n"
1109 "%16 = OpTypeInt 32 0\n"
1110 "%17 = OpConstant %16 4\n"
1111 "%18 = OpTypeArray %15 %17\n"
1112 "%19 = OpTypePointer Output %18\n"
1113 "%20 = OpVariable %19 Output\n"
1114 "%21 = OpConstant %15 1\n"
1115 "%22 = OpTypePointer Output %15\n"
1116 "%24 = OpConstant %6 1\n"
1117 "%26 = OpTypeVector %15 4\n"
1118 "%27 = OpConstant %16 1\n"
1119 "%28 = OpTypeArray %15 %27\n"
1120 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1121 "%30 = OpTypeArray %29 %27\n"
1122 "%31 = OpTypePointer Output %30\n"
1123 "%32 = OpVariable %31 Output\n"
1124 "%34 = OpTypeStruct %26 %15 %28 %28\n"
1125 "%35 = OpConstant %16 32\n"
1126 "%36 = OpTypeArray %34 %35\n"
1127 "%37 = OpTypePointer Input %36\n"
1128 "%38 = OpVariable %37 Input\n"
1129 "%40 = OpTypePointer Input %26\n"
1130 "%43 = OpTypePointer Output %26\n"
1131 "%4 = OpFunction %2 None %3\n"
1132 "%5 = OpLabel\n"
1133 "%9 = OpLoad %6 %8\n"
1134 "%12 = OpIEqual %11 %9 %10\n"
1135 "OpSelectionMerge %14 None\n"
1136 "OpBranchConditional %12 %13 %14\n"
1137 "%13 = OpLabel\n"
1138 "%23 = OpAccessChain %22 %20 %10\n"
1139 "OpStore %23 %21\n"
1140 "%25 = OpAccessChain %22 %20 %24\n"
1141 "OpStore %25 %21\n"
1142 "OpBranch %14\n"
1143 "%14 = OpLabel\n"
1144 "%33 = OpLoad %6 %8\n"
1145 "%39 = OpLoad %6 %8\n"
1146 "%41 = OpAccessChain %40 %38 %39 %10\n"
1147 "%42 = OpLoad %26 %41\n"
1148 "%44 = OpAccessChain %43 %32 %33 %10\n"
1149 "OpStore %44 %42\n"
1150 "OpReturn\n"
1151 "OpFunctionEnd\n";
1152 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1153 }
1154
1155 {
1156 /*
1157 "#version 450\n"
1158 "layout(isolines) in;\n"
1159 "\n"
1160 "void main (void)\n"
1161 "{\n"
1162 " float pixelSize = 2.0f/1024.0f;\n"
1163 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1164 "}\n";
1165 */
1166 const std::string teseNoSubgroup =
1167 "; SPIR-V\n"
1168 "; Version: 1.3\n"
1169 "; Generator: Khronos Glslang Reference Front End; 2\n"
1170 "; Bound: 42\n"
1171 "; Schema: 0\n"
1172 "OpCapability Tessellation\n"
1173 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1174 "OpMemoryModel Logical GLSL450\n"
1175 "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1176 "OpExecutionMode %4 Isolines\n"
1177 "OpExecutionMode %4 SpacingEqual\n"
1178 "OpExecutionMode %4 VertexOrderCcw\n"
1179 "OpMemberDecorate %14 0 BuiltIn Position\n"
1180 "OpMemberDecorate %14 1 BuiltIn PointSize\n"
1181 "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1182 "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1183 "OpDecorate %14 Block\n"
1184 "OpMemberDecorate %19 0 BuiltIn Position\n"
1185 "OpMemberDecorate %19 1 BuiltIn PointSize\n"
1186 "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1187 "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1188 "OpDecorate %19 Block\n"
1189 "OpDecorate %29 BuiltIn TessCoord\n"
1190 "%2 = OpTypeVoid\n"
1191 "%3 = OpTypeFunction %2\n"
1192 "%6 = OpTypeFloat 32\n"
1193 "%7 = OpTypePointer Function %6\n"
1194 "%9 = OpConstant %6 0.00195313\n"
1195 "%10 = OpTypeVector %6 4\n"
1196 "%11 = OpTypeInt 32 0\n"
1197 "%12 = OpConstant %11 1\n"
1198 "%13 = OpTypeArray %6 %12\n"
1199 "%14 = OpTypeStruct %10 %6 %13 %13\n"
1200 "%15 = OpTypePointer Output %14\n"
1201 "%16 = OpVariable %15 Output\n"
1202 "%17 = OpTypeInt 32 1\n"
1203 "%18 = OpConstant %17 0\n"
1204 "%19 = OpTypeStruct %10 %6 %13 %13\n"
1205 "%20 = OpConstant %11 32\n"
1206 "%21 = OpTypeArray %19 %20\n"
1207 "%22 = OpTypePointer Input %21\n"
1208 "%23 = OpVariable %22 Input\n"
1209 "%24 = OpTypePointer Input %10\n"
1210 "%27 = OpTypeVector %6 3\n"
1211 "%28 = OpTypePointer Input %27\n"
1212 "%29 = OpVariable %28 Input\n"
1213 "%30 = OpConstant %11 0\n"
1214 "%31 = OpTypePointer Input %6\n"
1215 "%36 = OpConstant %6 2\n"
1216 "%40 = OpTypePointer Output %10\n"
1217 "%4 = OpFunction %2 None %3\n"
1218 "%5 = OpLabel\n"
1219 "%8 = OpVariable %7 Function\n"
1220 "OpStore %8 %9\n"
1221 "%25 = OpAccessChain %24 %23 %18 %18\n"
1222 "%26 = OpLoad %10 %25\n"
1223 "%32 = OpAccessChain %31 %29 %30\n"
1224 "%33 = OpLoad %6 %32\n"
1225 "%34 = OpLoad %6 %8\n"
1226 "%35 = OpFMul %6 %33 %34\n"
1227 "%37 = OpFDiv %6 %35 %36\n"
1228 "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1229 "%39 = OpFAdd %10 %26 %38\n"
1230 "%41 = OpAccessChain %40 %16 %18\n"
1231 "OpStore %41 %39\n"
1232 "OpReturn\n"
1233 "OpFunctionEnd\n";
1234 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1235 }
1236
1237 }
1238
getFramebufferBufferDeclarations(const VkFormat & format,const std::vector<std::string> & declarations,const deUint32 stage)1239 static std::string getFramebufferBufferDeclarations (const VkFormat& format,
1240 const std::vector<std::string>& declarations,
1241 const deUint32 stage)
1242 {
1243 if (declarations.empty())
1244 {
1245 const std::string name = (stage == 0) ? "result" : "out_color";
1246 const std::string suffix = (stage == 2) ? "[]" : "";
1247 const std::string result =
1248 "layout(location = 0) out float " + name + suffix + ";\n"
1249 "layout(set = 0, binding = 0) uniform Buffer1\n"
1250 "{\n"
1251 " " + de::toString(subgroups::getFormatNameForGLSL(format)) + " data[" + de::toString(subgroups::maxSupportedSubgroupSize()) + "];\n"
1252 "};\n";
1253
1254 return result;
1255 }
1256 else
1257 {
1258 return declarations[stage];
1259 }
1260 }
1261
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations)1262 void vkt::subgroups::initStdFrameBufferPrograms (SourceCollections& programCollection,
1263 const vk::ShaderBuildOptions& buildOptions,
1264 VkShaderStageFlags shaderStage,
1265 VkFormat format,
1266 bool gsPointSize,
1267 const std::string& extHeader,
1268 const std::string& testSrc,
1269 const std::string& helperStr,
1270 const std::vector<std::string>& declarations)
1271 {
1272 subgroups::setFragmentShaderFrameBuffer(programCollection);
1273
1274 if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1275 subgroups::setVertexShaderFrameBuffer(programCollection);
1276
1277 if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1278 {
1279 std::ostringstream vertex;
1280
1281 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1282 << extHeader
1283 << "layout(location = 0) in highp vec4 in_position;\n"
1284 << getFramebufferBufferDeclarations(format, declarations, 0)
1285 << "\n"
1286 << helperStr
1287 << "void main (void)\n"
1288 << "{\n"
1289 << " uint tempRes;\n"
1290 << testSrc
1291 << " result = float(tempRes);\n"
1292 << " gl_Position = in_position;\n"
1293 << " gl_PointSize = 1.0f;\n"
1294 << "}\n";
1295
1296 programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1297 }
1298 else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1299 {
1300 std::ostringstream geometry;
1301
1302 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1303 << extHeader
1304 << "layout(points) in;\n"
1305 << "layout(points, max_vertices = 1) out;\n"
1306 << getFramebufferBufferDeclarations(format, declarations, 1)
1307 << "\n"
1308 << helperStr
1309 << "void main (void)\n"
1310 << "{\n"
1311 << " uint tempRes;\n"
1312 << testSrc
1313 << " out_color = float(tempRes);\n"
1314 << " gl_Position = gl_in[0].gl_Position;\n"
1315 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1316 << " EmitVertex();\n"
1317 << " EndPrimitive();\n"
1318 << "}\n";
1319
1320 programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1321 }
1322 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1323 {
1324 std::ostringstream controlSource;
1325
1326 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1327 << extHeader
1328 << "layout(vertices = 2) out;\n"
1329 << getFramebufferBufferDeclarations(format, declarations, 2)
1330 << "\n"
1331 << helperStr
1332 << "void main (void)\n"
1333 << "{\n"
1334 << " if (gl_InvocationID == 0)\n"
1335 << " {\n"
1336 << " gl_TessLevelOuter[0] = 1.0f;\n"
1337 << " gl_TessLevelOuter[1] = 1.0f;\n"
1338 << " }\n"
1339 << " uint tempRes;\n"
1340 << testSrc
1341 << " out_color[gl_InvocationID] = float(tempRes);\n"
1342 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1343 << (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "")
1344 << "}\n";
1345
1346 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1347 subgroups::setTesEvalShaderFrameBuffer(programCollection);
1348 }
1349 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1350 {
1351 ostringstream evaluationSource;
1352
1353 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1354 << extHeader
1355 << "layout(isolines, equal_spacing, ccw ) in;\n"
1356 << getFramebufferBufferDeclarations(format, declarations, 3)
1357 << "\n"
1358 << helperStr
1359 << "void main (void)\n"
1360 << "{\n"
1361 << " uint tempRes;\n"
1362 << testSrc
1363 << " out_color = float(tempRes);\n"
1364 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1365 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1366 << "}\n";
1367
1368 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1369 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1370 }
1371 else
1372 {
1373 DE_FATAL("Unsupported shader stage");
1374 }
1375 }
1376
getBufferDeclarations(vk::VkShaderStageFlags shaderStage,const std::string & formatName,const std::vector<std::string> & declarations,const deUint32 stage)1377 static std::string getBufferDeclarations (vk::VkShaderStageFlags shaderStage,
1378 const std::string& formatName,
1379 const std::vector<std::string>& declarations,
1380 const deUint32 stage)
1381 {
1382 if (declarations.empty())
1383 {
1384 const deUint32 stageCount = vkt::subgroups::getStagesCount(shaderStage);
1385 const deUint32 binding0 = stage;
1386 const deUint32 binding1 = stageCount;
1387 const bool fragment = (shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) && (stage == stageCount);
1388 const string buffer1 = fragment
1389 ? "layout(location = 0) out uint result;\n"
1390 : "layout(set = 0, binding = " + de::toString(binding0) + ", std430) buffer Buffer1\n"
1391 "{\n"
1392 " uint result[];\n"
1393 "};\n";
1394 //todo boza I suppose it can be "layout(set = 0, binding = " + de::toString(binding1) + ", std430) readonly buffer Buffer2\n"
1395 const string buffer2 = "layout(set = 0, binding = " + de::toString(binding1) + ", std430)" + (stageCount == 1 ? "" : " readonly") + " buffer Buffer" + (fragment ? "1" : "2") + "\n"
1396 "{\n"
1397 " " + formatName + " data[];\n"
1398 "};\n";
1399
1400 return buffer1 + buffer2;
1401 }
1402 else
1403 {
1404 return declarations[stage];
1405 }
1406 }
1407
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations,const bool avoidHelperInvocations,const std::string & tempRes)1408 void vkt::subgroups::initStdPrograms (vk::SourceCollections& programCollection,
1409 const vk::ShaderBuildOptions& buildOptions,
1410 vk::VkShaderStageFlags shaderStage,
1411 vk::VkFormat format,
1412 bool gsPointSize,
1413 const std::string& extHeader,
1414 const std::string& testSrc,
1415 const std::string& helperStr,
1416 const std::vector<std::string>& declarations,
1417 const bool avoidHelperInvocations,
1418 const std::string& tempRes)
1419 {
1420 const std::string formatName = subgroups::getFormatNameForGLSL(format);
1421
1422 if (isAllComputeStages(shaderStage))
1423 {
1424 std::ostringstream src;
1425
1426 src << "#version 450\n"
1427 << extHeader
1428 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
1429 "local_size_z_id = 2) in;\n"
1430 << getBufferDeclarations(shaderStage, formatName, declarations, 0)
1431 << "\n"
1432 << helperStr
1433 << "void main (void)\n"
1434 << "{\n"
1435 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1436 << " highp uint offset = globalSize.x * ((globalSize.y * "
1437 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1438 "gl_GlobalInvocationID.x;\n"
1439 << tempRes
1440 << testSrc
1441 << " result[offset] = tempRes;\n"
1442 << "}\n";
1443
1444 programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1445 }
1446 #ifndef CTS_USES_VULKANSC
1447 else if (isAllMeshShadingStages(shaderStage))
1448 {
1449 const bool testMesh = ((shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
1450 const bool testTask = ((shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
1451
1452 if (testMesh)
1453 {
1454 std::ostringstream mesh;
1455
1456 mesh
1457 << "#version 450\n"
1458 << "#extension GL_EXT_mesh_shader : enable\n"
1459 //<< "#extension GL_NV_mesh_shader : enable\n"
1460 << extHeader
1461 << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1462 << "layout (points) out;\n"
1463 << "layout (max_vertices = 1, max_primitives = 1) out;\n"
1464 << getBufferDeclarations(shaderStage, formatName, declarations, 0)
1465 << "\n"
1466 << helperStr
1467 << "void main (void)\n"
1468 << "{\n"
1469 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1470 //<< " uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1471 << " highp uint offset = globalSize.x * ((globalSize.y * "
1472 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1473 "gl_GlobalInvocationID.x;\n"
1474 << tempRes
1475 << testSrc
1476 << " result[offset] = tempRes;\n"
1477 << " SetMeshOutputsEXT(0u, 0u);\n"
1478 //<< " gl_PrimitiveCountNV = 0;\n"
1479 << "}\n";
1480
1481 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1482 }
1483 else
1484 {
1485 const std::string meshShaderNoSubgroups =
1486 "#version 450\n"
1487 "#extension GL_EXT_mesh_shader : enable\n"
1488 "\n"
1489 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
1490 "layout (points) out;\n"
1491 "layout (max_vertices = 1, max_primitives = 1) out;\n"
1492 "\n"
1493 "void main (void)\n"
1494 "{\n"
1495 " SetMeshOutputsEXT(0u, 0u);\n"
1496 "}\n"
1497 ;
1498 programCollection.glslSources.add("mesh") << glu::MeshSource(meshShaderNoSubgroups) << buildOptions;
1499 }
1500
1501 if (testTask)
1502 {
1503 const tcu::UVec3 emitSize = (testMesh ? tcu::UVec3(1u, 1u, 1u) : tcu::UVec3(0u, 0u, 0u));
1504 std::ostringstream task;
1505
1506 task
1507 << "#version 450\n"
1508 << "#extension GL_EXT_mesh_shader : enable\n"
1509 //<< "#extension GL_NV_mesh_shader : enable\n"
1510 << extHeader
1511 << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1512 << getBufferDeclarations(shaderStage, formatName, declarations, 0)
1513 << "\n"
1514 << helperStr
1515 << "void main (void)\n"
1516 << "{\n"
1517 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1518 //<< " uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1519 << " highp uint offset = globalSize.x * ((globalSize.y * "
1520 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1521 "gl_GlobalInvocationID.x;\n"
1522 << tempRes
1523 << testSrc
1524 << " result[offset] = tempRes;\n"
1525 << " EmitMeshTasksEXT(" << emitSize.x() << ", " << emitSize.y() << ", " << emitSize.z() << ");\n"
1526 //<< " gl_TaskCountNV = " << emitSize.x() << ";\n"
1527 << "}\n";
1528
1529 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1530 }
1531 }
1532 #endif // CTS_USES_VULKANSC
1533 else if (isAllGraphicsStages(shaderStage))
1534 {
1535 const string vertex =
1536 "#version 450\n"
1537 + extHeader
1538 + getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1539 "\n"
1540 + helperStr +
1541 "void main (void)\n"
1542 "{\n"
1543 " uint tempRes;\n"
1544 + testSrc +
1545 " result[gl_VertexIndex] = tempRes;\n"
1546 " float pixelSize = 2.0f/1024.0f;\n"
1547 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1548 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1549 " gl_PointSize = 1.0f;\n"
1550 "}\n";
1551
1552 const string tesc =
1553 "#version 450\n"
1554 + extHeader +
1555 "layout(vertices=1) out;\n"
1556 + getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1557 "\n"
1558 + helperStr +
1559 "void main (void)\n"
1560 "{\n"
1561 + tempRes
1562 + testSrc +
1563 " result[gl_PrimitiveID] = tempRes;\n"
1564 " if (gl_InvocationID == 0)\n"
1565 " {\n"
1566 " gl_TessLevelOuter[0] = 1.0f;\n"
1567 " gl_TessLevelOuter[1] = 1.0f;\n"
1568 " }\n"
1569 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1570 + (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1571 "}\n";
1572
1573 const string tese =
1574 "#version 450\n"
1575 + extHeader +
1576 "layout(isolines) in;\n"
1577 + getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1578 "\n"
1579 + helperStr +
1580 "void main (void)\n"
1581 "{\n"
1582 + tempRes
1583 + testSrc +
1584 " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1585 " float pixelSize = 2.0f/1024.0f;\n"
1586 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1587 + (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1588 "}\n";
1589
1590 const string geometry =
1591 "#version 450\n"
1592 + extHeader +
1593 "layout(${TOPOLOGY}) in;\n"
1594 "layout(points, max_vertices = 1) out;\n"
1595 + getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1596 "\n"
1597 + helperStr +
1598 "void main (void)\n"
1599 "{\n"
1600 + tempRes
1601 + testSrc +
1602 " result[gl_PrimitiveIDIn] = tempRes;\n"
1603 " gl_Position = gl_in[0].gl_Position;\n"
1604 + (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1605 " EmitVertex();\n"
1606 " EndPrimitive();\n"
1607 "}\n";
1608
1609 const string fragment =
1610 "#version 450\n"
1611 + extHeader
1612 + getBufferDeclarations(shaderStage, formatName, declarations, 4)
1613 + helperStr +
1614 "void main (void)\n"
1615 "{\n"
1616 + (avoidHelperInvocations ? " if (gl_HelperInvocation) return;\n" : "")
1617 + tempRes
1618 + testSrc +
1619 " result = tempRes;\n"
1620 "}\n";
1621
1622 subgroups::addNoSubgroupShader(programCollection);
1623
1624 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1625 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1626 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1627 subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1628 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1629 }
1630 #ifndef CTS_USES_VULKANSC
1631 else if (isAllRayTracingStages(shaderStage))
1632 {
1633 const std::string rgenShader =
1634 "#version 460 core\n"
1635 "#extension GL_EXT_ray_tracing: require\n"
1636 + extHeader +
1637 "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
1638 "layout(location = 0) callableDataEXT uvec4 callData;"
1639 "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
1640 + getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1641 "\n"
1642 + helperStr +
1643 "void main()\n"
1644 "{\n"
1645 + tempRes
1646 + testSrc +
1647 " uint rayFlags = 0;\n"
1648 " uint cullMask = 0xFF;\n"
1649 " float tmin = 0.0;\n"
1650 " float tmax = 9.0;\n"
1651 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
1652 " vec3 directHit = vec3(0.0, 0.0, -1.0);\n"
1653 " vec3 directMiss = vec3(0.0, 0.0, +1.0);\n"
1654 "\n"
1655 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
1656 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
1657 " executeCallableEXT(0, 0);"
1658 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1659 "}\n";
1660 const std::string ahitShader =
1661 "#version 460 core\n"
1662 "#extension GL_EXT_ray_tracing: require\n"
1663 + extHeader +
1664 "hitAttributeEXT vec3 attribs;\n"
1665 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1666 + getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1667 "\n"
1668 + helperStr +
1669 "void main()\n"
1670 "{\n"
1671 + tempRes
1672 + testSrc +
1673 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1674 "}\n";
1675 const std::string chitShader =
1676 "#version 460 core\n"
1677 "#extension GL_EXT_ray_tracing: require\n"
1678 + extHeader +
1679 "hitAttributeEXT vec3 attribs;\n"
1680 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1681 + getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1682 "\n"
1683 + helperStr +
1684 "void main()\n"
1685 "{\n"
1686 + tempRes
1687 + testSrc +
1688 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1689 "}\n";
1690 const std::string missShader =
1691 "#version 460 core\n"
1692 "#extension GL_EXT_ray_tracing: require\n"
1693 + extHeader +
1694 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1695 + getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1696 "\n"
1697 + helperStr +
1698 "void main()\n"
1699 "{\n"
1700 + tempRes
1701 + testSrc +
1702 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1703 "}\n";
1704 const std::string sectShader =
1705 "#version 460 core\n"
1706 "#extension GL_EXT_ray_tracing: require\n"
1707 + extHeader +
1708 "hitAttributeEXT vec3 hitAttribute;\n"
1709 + getBufferDeclarations(shaderStage, formatName, declarations, 4) +
1710 "\n"
1711 + helperStr +
1712 "void main()\n"
1713 "{\n"
1714 + tempRes
1715 + testSrc +
1716 " reportIntersectionEXT(0.75f, 0x7Eu);\n"
1717 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1718 "}\n";
1719 const std::string callShader =
1720 "#version 460 core\n"
1721 "#extension GL_EXT_ray_tracing: require\n"
1722 + extHeader +
1723 "layout(location = 0) callableDataInEXT float callData;\n"
1724 + getBufferDeclarations(shaderStage, formatName, declarations, 5) +
1725 "\n"
1726 + helperStr +
1727 "void main()\n"
1728 "{\n"
1729 + tempRes
1730 + testSrc +
1731 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1732 "}\n";
1733
1734 programCollection.glslSources.add("rgen") << glu::RaygenSource (rgenShader) << buildOptions;
1735 programCollection.glslSources.add("ahit") << glu::AnyHitSource (ahitShader) << buildOptions;
1736 programCollection.glslSources.add("chit") << glu::ClosestHitSource (chitShader) << buildOptions;
1737 programCollection.glslSources.add("miss") << glu::MissSource (missShader) << buildOptions;
1738 programCollection.glslSources.add("sect") << glu::IntersectionSource(sectShader) << buildOptions;
1739 programCollection.glslSources.add("call") << glu::CallableSource (callShader) << buildOptions;
1740
1741 subgroups::addRayTracingNoSubgroupShader(programCollection);
1742 }
1743 #endif // CTS_USES_VULKANSC
1744 else
1745 TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1746
1747 }
1748
isSubgroupSupported(Context & context)1749 bool vkt::subgroups::isSubgroupSupported (Context& context)
1750 {
1751 return context.contextSupports(vk::ApiVersion(0, 1, 1, 0));
1752 }
1753
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1754 bool vkt::subgroups::areSubgroupOperationsSupportedForStage (Context& context, const VkShaderStageFlags stage)
1755 {
1756 return (stage & (context.getSubgroupProperties().supportedStages)) ? true : false;
1757 }
1758
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1759 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice (Context& context, VkSubgroupFeatureFlagBits bit)
1760 {
1761 return (bit & (context.getSubgroupProperties().supportedOperations)) ? true : false;
1762 }
1763
isFragmentSSBOSupportedForDevice(Context & context)1764 bool vkt::subgroups::isFragmentSSBOSupportedForDevice (Context& context)
1765 {
1766 return context.getDeviceFeatures().fragmentStoresAndAtomics ? true : false;
1767 }
1768
isVertexSSBOSupportedForDevice(Context & context)1769 bool vkt::subgroups::isVertexSSBOSupportedForDevice (Context& context)
1770 {
1771 return context.getDeviceFeatures().vertexPipelineStoresAndAtomics ? true : false;
1772 }
1773
isInt64SupportedForDevice(Context & context)1774 bool vkt::subgroups::isInt64SupportedForDevice (Context& context)
1775 {
1776 return context.getDeviceFeatures().shaderInt64 ? true : false;
1777 }
1778
isTessellationAndGeometryPointSizeSupported(Context & context)1779 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1780 {
1781 return context.getDeviceFeatures().shaderTessellationAndGeometryPointSize ? true : false;
1782 }
1783
is16BitUBOStorageSupported(Context & context)1784 bool vkt::subgroups::is16BitUBOStorageSupported (Context& context)
1785 {
1786 return context.get16BitStorageFeatures().uniformAndStorageBuffer16BitAccess ? true : false;
1787 }
1788
is8BitUBOStorageSupported(Context & context)1789 bool vkt::subgroups::is8BitUBOStorageSupported (Context& context)
1790 {
1791 return context.get8BitStorageFeatures().uniformAndStorageBuffer8BitAccess ? true : false;
1792 }
1793
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1794 bool vkt::subgroups::isFormatSupportedForDevice (Context& context, vk::VkFormat format)
1795 {
1796 const VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures& subgroupExtendedTypesFeatures = context.getShaderSubgroupExtendedTypesFeatures();
1797 const VkPhysicalDeviceShaderFloat16Int8Features& float16Int8Features = context.getShaderFloat16Int8Features();
1798 const VkPhysicalDevice16BitStorageFeatures& storage16bit = context.get16BitStorageFeatures();
1799 const VkPhysicalDevice8BitStorageFeatures& storage8bit = context.get8BitStorageFeatures();
1800 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1801 bool shaderFloat64 = features.shaderFloat64 ? true : false;
1802 bool shaderInt16 = features.shaderInt16 ? true : false;
1803 bool shaderInt64 = features.shaderInt64 ? true : false;
1804 bool shaderSubgroupExtendedTypes = false;
1805 bool shaderFloat16 = false;
1806 bool shaderInt8 = false;
1807 bool storageBuffer16BitAccess = false;
1808 bool storageBuffer8BitAccess = false;
1809
1810 if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1811 context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1812 {
1813 shaderSubgroupExtendedTypes = subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes ? true : false;
1814 shaderFloat16 = float16Int8Features.shaderFloat16 ? true : false;
1815 shaderInt8 = float16Int8Features.shaderInt8 ? true : false;
1816
1817 if ( context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage") )
1818 storageBuffer16BitAccess = storage16bit.storageBuffer16BitAccess ? true : false;
1819
1820 if (context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage"))
1821 storageBuffer8BitAccess = storage8bit.storageBuffer8BitAccess ? true : false;
1822 }
1823
1824 switch (format)
1825 {
1826 default:
1827 return true;
1828 case VK_FORMAT_R16_SFLOAT:
1829 case VK_FORMAT_R16G16_SFLOAT:
1830 case VK_FORMAT_R16G16B16_SFLOAT:
1831 case VK_FORMAT_R16G16B16A16_SFLOAT:
1832 return shaderSubgroupExtendedTypes && shaderFloat16 && storageBuffer16BitAccess;
1833 case VK_FORMAT_R64_SFLOAT:
1834 case VK_FORMAT_R64G64_SFLOAT:
1835 case VK_FORMAT_R64G64B64_SFLOAT:
1836 case VK_FORMAT_R64G64B64A64_SFLOAT:
1837 return shaderFloat64;
1838 case VK_FORMAT_R8_SINT:
1839 case VK_FORMAT_R8G8_SINT:
1840 case VK_FORMAT_R8G8B8_SINT:
1841 case VK_FORMAT_R8G8B8A8_SINT:
1842 case VK_FORMAT_R8_UINT:
1843 case VK_FORMAT_R8G8_UINT:
1844 case VK_FORMAT_R8G8B8_UINT:
1845 case VK_FORMAT_R8G8B8A8_UINT:
1846 return shaderSubgroupExtendedTypes && shaderInt8 && storageBuffer8BitAccess;
1847 case VK_FORMAT_R16_SINT:
1848 case VK_FORMAT_R16G16_SINT:
1849 case VK_FORMAT_R16G16B16_SINT:
1850 case VK_FORMAT_R16G16B16A16_SINT:
1851 case VK_FORMAT_R16_UINT:
1852 case VK_FORMAT_R16G16_UINT:
1853 case VK_FORMAT_R16G16B16_UINT:
1854 case VK_FORMAT_R16G16B16A16_UINT:
1855 return shaderSubgroupExtendedTypes && shaderInt16 && storageBuffer16BitAccess;
1856 case VK_FORMAT_R64_SINT:
1857 case VK_FORMAT_R64G64_SINT:
1858 case VK_FORMAT_R64G64B64_SINT:
1859 case VK_FORMAT_R64G64B64A64_SINT:
1860 case VK_FORMAT_R64_UINT:
1861 case VK_FORMAT_R64G64_UINT:
1862 case VK_FORMAT_R64G64B64_UINT:
1863 case VK_FORMAT_R64G64B64A64_UINT:
1864 return shaderSubgroupExtendedTypes && shaderInt64;
1865 }
1866 }
1867
isSubgroupBroadcastDynamicIdSupported(Context & context)1868 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported (Context& context)
1869 {
1870 return context.contextSupports(vk::ApiVersion(0, 1, 2, 0)) &&
1871 vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice()).subgroupBroadcastDynamicId;
1872 }
1873
getFormatNameForGLSL(VkFormat format)1874 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1875 {
1876 switch (format)
1877 {
1878 case VK_FORMAT_R8_SINT: return "int8_t";
1879 case VK_FORMAT_R8G8_SINT: return "i8vec2";
1880 case VK_FORMAT_R8G8B8_SINT: return "i8vec3";
1881 case VK_FORMAT_R8G8B8A8_SINT: return "i8vec4";
1882 case VK_FORMAT_R8_UINT: return "uint8_t";
1883 case VK_FORMAT_R8G8_UINT: return "u8vec2";
1884 case VK_FORMAT_R8G8B8_UINT: return "u8vec3";
1885 case VK_FORMAT_R8G8B8A8_UINT: return "u8vec4";
1886 case VK_FORMAT_R16_SINT: return "int16_t";
1887 case VK_FORMAT_R16G16_SINT: return "i16vec2";
1888 case VK_FORMAT_R16G16B16_SINT: return "i16vec3";
1889 case VK_FORMAT_R16G16B16A16_SINT: return "i16vec4";
1890 case VK_FORMAT_R16_UINT: return "uint16_t";
1891 case VK_FORMAT_R16G16_UINT: return "u16vec2";
1892 case VK_FORMAT_R16G16B16_UINT: return "u16vec3";
1893 case VK_FORMAT_R16G16B16A16_UINT: return "u16vec4";
1894 case VK_FORMAT_R32_SINT: return "int";
1895 case VK_FORMAT_R32G32_SINT: return "ivec2";
1896 case VK_FORMAT_R32G32B32_SINT: return "ivec3";
1897 case VK_FORMAT_R32G32B32A32_SINT: return "ivec4";
1898 case VK_FORMAT_R32_UINT: return "uint";
1899 case VK_FORMAT_R32G32_UINT: return "uvec2";
1900 case VK_FORMAT_R32G32B32_UINT: return "uvec3";
1901 case VK_FORMAT_R32G32B32A32_UINT: return "uvec4";
1902 case VK_FORMAT_R64_SINT: return "int64_t";
1903 case VK_FORMAT_R64G64_SINT: return "i64vec2";
1904 case VK_FORMAT_R64G64B64_SINT: return "i64vec3";
1905 case VK_FORMAT_R64G64B64A64_SINT: return "i64vec4";
1906 case VK_FORMAT_R64_UINT: return "uint64_t";
1907 case VK_FORMAT_R64G64_UINT: return "u64vec2";
1908 case VK_FORMAT_R64G64B64_UINT: return "u64vec3";
1909 case VK_FORMAT_R64G64B64A64_UINT: return "u64vec4";
1910 case VK_FORMAT_R16_SFLOAT: return "float16_t";
1911 case VK_FORMAT_R16G16_SFLOAT: return "f16vec2";
1912 case VK_FORMAT_R16G16B16_SFLOAT: return "f16vec3";
1913 case VK_FORMAT_R16G16B16A16_SFLOAT: return "f16vec4";
1914 case VK_FORMAT_R32_SFLOAT: return "float";
1915 case VK_FORMAT_R32G32_SFLOAT: return "vec2";
1916 case VK_FORMAT_R32G32B32_SFLOAT: return "vec3";
1917 case VK_FORMAT_R32G32B32A32_SFLOAT: return "vec4";
1918 case VK_FORMAT_R64_SFLOAT: return "double";
1919 case VK_FORMAT_R64G64_SFLOAT: return "dvec2";
1920 case VK_FORMAT_R64G64B64_SFLOAT: return "dvec3";
1921 case VK_FORMAT_R64G64B64A64_SFLOAT: return "dvec4";
1922 case VK_FORMAT_R8_USCALED: return "bool";
1923 case VK_FORMAT_R8G8_USCALED: return "bvec2";
1924 case VK_FORMAT_R8G8B8_USCALED: return "bvec3";
1925 case VK_FORMAT_R8G8B8A8_USCALED: return "bvec4";
1926 default: TCU_THROW(InternalError, "Unhandled format");
1927 }
1928 }
1929
getAdditionalExtensionForFormat(vk::VkFormat format)1930 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1931 {
1932 switch (format)
1933 {
1934 default:
1935 return "";
1936 case VK_FORMAT_R8_SINT:
1937 case VK_FORMAT_R8G8_SINT:
1938 case VK_FORMAT_R8G8B8_SINT:
1939 case VK_FORMAT_R8G8B8A8_SINT:
1940 case VK_FORMAT_R8_UINT:
1941 case VK_FORMAT_R8G8_UINT:
1942 case VK_FORMAT_R8G8B8_UINT:
1943 case VK_FORMAT_R8G8B8A8_UINT:
1944 return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1945 case VK_FORMAT_R16_SINT:
1946 case VK_FORMAT_R16G16_SINT:
1947 case VK_FORMAT_R16G16B16_SINT:
1948 case VK_FORMAT_R16G16B16A16_SINT:
1949 case VK_FORMAT_R16_UINT:
1950 case VK_FORMAT_R16G16_UINT:
1951 case VK_FORMAT_R16G16B16_UINT:
1952 case VK_FORMAT_R16G16B16A16_UINT:
1953 return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1954 case VK_FORMAT_R64_SINT:
1955 case VK_FORMAT_R64G64_SINT:
1956 case VK_FORMAT_R64G64B64_SINT:
1957 case VK_FORMAT_R64G64B64A64_SINT:
1958 case VK_FORMAT_R64_UINT:
1959 case VK_FORMAT_R64G64_UINT:
1960 case VK_FORMAT_R64G64B64_UINT:
1961 case VK_FORMAT_R64G64B64A64_UINT:
1962 return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1963 case VK_FORMAT_R16_SFLOAT:
1964 case VK_FORMAT_R16G16_SFLOAT:
1965 case VK_FORMAT_R16G16B16_SFLOAT:
1966 case VK_FORMAT_R16G16B16A16_SFLOAT:
1967 return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1968 }
1969 }
1970
getAllFormats()1971 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats ()
1972 {
1973 std::vector<VkFormat> formats;
1974
1975 formats.push_back(VK_FORMAT_R8_SINT);
1976 formats.push_back(VK_FORMAT_R8G8_SINT);
1977 formats.push_back(VK_FORMAT_R8G8B8_SINT);
1978 formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1979 formats.push_back(VK_FORMAT_R8_UINT);
1980 formats.push_back(VK_FORMAT_R8G8_UINT);
1981 formats.push_back(VK_FORMAT_R8G8B8_UINT);
1982 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1983 formats.push_back(VK_FORMAT_R16_SINT);
1984 formats.push_back(VK_FORMAT_R16G16_SINT);
1985 formats.push_back(VK_FORMAT_R16G16B16_SINT);
1986 formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1987 formats.push_back(VK_FORMAT_R16_UINT);
1988 formats.push_back(VK_FORMAT_R16G16_UINT);
1989 formats.push_back(VK_FORMAT_R16G16B16_UINT);
1990 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
1991 formats.push_back(VK_FORMAT_R32_SINT);
1992 formats.push_back(VK_FORMAT_R32G32_SINT);
1993 formats.push_back(VK_FORMAT_R32G32B32_SINT);
1994 formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
1995 formats.push_back(VK_FORMAT_R32_UINT);
1996 formats.push_back(VK_FORMAT_R32G32_UINT);
1997 formats.push_back(VK_FORMAT_R32G32B32_UINT);
1998 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
1999 formats.push_back(VK_FORMAT_R64_SINT);
2000 formats.push_back(VK_FORMAT_R64G64_SINT);
2001 formats.push_back(VK_FORMAT_R64G64B64_SINT);
2002 formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
2003 formats.push_back(VK_FORMAT_R64_UINT);
2004 formats.push_back(VK_FORMAT_R64G64_UINT);
2005 formats.push_back(VK_FORMAT_R64G64B64_UINT);
2006 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
2007 formats.push_back(VK_FORMAT_R16_SFLOAT);
2008 formats.push_back(VK_FORMAT_R16G16_SFLOAT);
2009 formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
2010 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
2011 formats.push_back(VK_FORMAT_R32_SFLOAT);
2012 formats.push_back(VK_FORMAT_R32G32_SFLOAT);
2013 formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
2014 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
2015 formats.push_back(VK_FORMAT_R64_SFLOAT);
2016 formats.push_back(VK_FORMAT_R64G64_SFLOAT);
2017 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
2018 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
2019 formats.push_back(VK_FORMAT_R8_USCALED);
2020 formats.push_back(VK_FORMAT_R8G8_USCALED);
2021 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
2022 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
2023
2024 return formats;
2025 }
2026
isFormatSigned(VkFormat format)2027 bool vkt::subgroups::isFormatSigned (VkFormat format)
2028 {
2029 switch (format)
2030 {
2031 default:
2032 return false;
2033 case VK_FORMAT_R8_SINT:
2034 case VK_FORMAT_R8G8_SINT:
2035 case VK_FORMAT_R8G8B8_SINT:
2036 case VK_FORMAT_R8G8B8A8_SINT:
2037 case VK_FORMAT_R16_SINT:
2038 case VK_FORMAT_R16G16_SINT:
2039 case VK_FORMAT_R16G16B16_SINT:
2040 case VK_FORMAT_R16G16B16A16_SINT:
2041 case VK_FORMAT_R32_SINT:
2042 case VK_FORMAT_R32G32_SINT:
2043 case VK_FORMAT_R32G32B32_SINT:
2044 case VK_FORMAT_R32G32B32A32_SINT:
2045 case VK_FORMAT_R64_SINT:
2046 case VK_FORMAT_R64G64_SINT:
2047 case VK_FORMAT_R64G64B64_SINT:
2048 case VK_FORMAT_R64G64B64A64_SINT:
2049 return true;
2050 }
2051 }
2052
isFormatUnsigned(VkFormat format)2053 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
2054 {
2055 switch (format)
2056 {
2057 default:
2058 return false;
2059 case VK_FORMAT_R8_UINT:
2060 case VK_FORMAT_R8G8_UINT:
2061 case VK_FORMAT_R8G8B8_UINT:
2062 case VK_FORMAT_R8G8B8A8_UINT:
2063 case VK_FORMAT_R16_UINT:
2064 case VK_FORMAT_R16G16_UINT:
2065 case VK_FORMAT_R16G16B16_UINT:
2066 case VK_FORMAT_R16G16B16A16_UINT:
2067 case VK_FORMAT_R32_UINT:
2068 case VK_FORMAT_R32G32_UINT:
2069 case VK_FORMAT_R32G32B32_UINT:
2070 case VK_FORMAT_R32G32B32A32_UINT:
2071 case VK_FORMAT_R64_UINT:
2072 case VK_FORMAT_R64G64_UINT:
2073 case VK_FORMAT_R64G64B64_UINT:
2074 case VK_FORMAT_R64G64B64A64_UINT:
2075 return true;
2076 }
2077 }
2078
isFormatFloat(VkFormat format)2079 bool vkt::subgroups::isFormatFloat (VkFormat format)
2080 {
2081 switch (format)
2082 {
2083 default:
2084 return false;
2085 case VK_FORMAT_R16_SFLOAT:
2086 case VK_FORMAT_R16G16_SFLOAT:
2087 case VK_FORMAT_R16G16B16_SFLOAT:
2088 case VK_FORMAT_R16G16B16A16_SFLOAT:
2089 case VK_FORMAT_R32_SFLOAT:
2090 case VK_FORMAT_R32G32_SFLOAT:
2091 case VK_FORMAT_R32G32B32_SFLOAT:
2092 case VK_FORMAT_R32G32B32A32_SFLOAT:
2093 case VK_FORMAT_R64_SFLOAT:
2094 case VK_FORMAT_R64G64_SFLOAT:
2095 case VK_FORMAT_R64G64B64_SFLOAT:
2096 case VK_FORMAT_R64G64B64A64_SFLOAT:
2097 return true;
2098 }
2099 }
2100
isFormatBool(VkFormat format)2101 bool vkt::subgroups::isFormatBool (VkFormat format)
2102 {
2103 switch (format)
2104 {
2105 default:
2106 return false;
2107 case VK_FORMAT_R8_USCALED:
2108 case VK_FORMAT_R8G8_USCALED:
2109 case VK_FORMAT_R8G8B8_USCALED:
2110 case VK_FORMAT_R8G8B8A8_USCALED:
2111 return true;
2112 }
2113 }
2114
isFormat8bitTy(VkFormat format)2115 bool vkt::subgroups::isFormat8bitTy (VkFormat format)
2116 {
2117 switch (format)
2118 {
2119 default:
2120 return false;
2121 case VK_FORMAT_R8_SINT:
2122 case VK_FORMAT_R8G8_SINT:
2123 case VK_FORMAT_R8G8B8_SINT:
2124 case VK_FORMAT_R8G8B8A8_SINT:
2125 case VK_FORMAT_R8_UINT:
2126 case VK_FORMAT_R8G8_UINT:
2127 case VK_FORMAT_R8G8B8_UINT:
2128 case VK_FORMAT_R8G8B8A8_UINT:
2129 return true;
2130 }
2131 }
2132
isFormat16BitTy(VkFormat format)2133 bool vkt::subgroups::isFormat16BitTy (VkFormat format)
2134 {
2135 switch (format)
2136 {
2137 default:
2138 return false;
2139 case VK_FORMAT_R16_SFLOAT:
2140 case VK_FORMAT_R16G16_SFLOAT:
2141 case VK_FORMAT_R16G16B16_SFLOAT:
2142 case VK_FORMAT_R16G16B16A16_SFLOAT:
2143 case VK_FORMAT_R16_SINT:
2144 case VK_FORMAT_R16G16_SINT:
2145 case VK_FORMAT_R16G16B16_SINT:
2146 case VK_FORMAT_R16G16B16A16_SINT:
2147 case VK_FORMAT_R16_UINT:
2148 case VK_FORMAT_R16G16_UINT:
2149 case VK_FORMAT_R16G16B16_UINT:
2150 case VK_FORMAT_R16G16B16A16_UINT:
2151 return true;
2152 }
2153 }
2154
setVertexShaderFrameBuffer(SourceCollections & programCollection)2155 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
2156 {
2157 /*
2158 "layout(location = 0) in highp vec4 in_position;\n"
2159 "void main (void)\n"
2160 "{\n"
2161 " gl_Position = in_position;\n"
2162 " gl_PointSize = 1.0f;\n"
2163 "}\n";
2164 */
2165 programCollection.spirvAsmSources.add("vert") <<
2166 "; SPIR-V\n"
2167 "; Version: 1.3\n"
2168 "; Generator: Khronos Glslang Reference Front End; 7\n"
2169 "; Bound: 25\n"
2170 "; Schema: 0\n"
2171 "OpCapability Shader\n"
2172 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2173 "OpMemoryModel Logical GLSL450\n"
2174 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2175 "OpMemberDecorate %11 0 BuiltIn Position\n"
2176 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2177 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2178 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2179 "OpDecorate %11 Block\n"
2180 "OpDecorate %17 Location 0\n"
2181 "%2 = OpTypeVoid\n"
2182 "%3 = OpTypeFunction %2\n"
2183 "%6 = OpTypeFloat 32\n"
2184 "%7 = OpTypeVector %6 4\n"
2185 "%8 = OpTypeInt 32 0\n"
2186 "%9 = OpConstant %8 1\n"
2187 "%10 = OpTypeArray %6 %9\n"
2188 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2189 "%12 = OpTypePointer Output %11\n"
2190 "%13 = OpVariable %12 Output\n"
2191 "%14 = OpTypeInt 32 1\n"
2192 "%15 = OpConstant %14 0\n"
2193 "%16 = OpTypePointer Input %7\n"
2194 "%17 = OpVariable %16 Input\n"
2195 "%19 = OpTypePointer Output %7\n"
2196 "%21 = OpConstant %14 1\n"
2197 "%22 = OpConstant %6 1\n"
2198 "%23 = OpTypePointer Output %6\n"
2199 "%4 = OpFunction %2 None %3\n"
2200 "%5 = OpLabel\n"
2201 "%18 = OpLoad %7 %17\n"
2202 "%20 = OpAccessChain %19 %13 %15\n"
2203 "OpStore %20 %18\n"
2204 "%24 = OpAccessChain %23 %13 %21\n"
2205 "OpStore %24 %22\n"
2206 "OpReturn\n"
2207 "OpFunctionEnd\n";
2208 }
2209
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2210 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
2211 {
2212 /*
2213 "layout(location = 0) in float in_color;\n"
2214 "layout(location = 0) out uint out_color;\n"
2215 "void main()\n"
2216 {\n"
2217 " out_color = uint(in_color);\n"
2218 "}\n";
2219 */
2220 programCollection.spirvAsmSources.add("fragment") <<
2221 "; SPIR-V\n"
2222 "; Version: 1.3\n"
2223 "; Generator: Khronos Glslang Reference Front End; 2\n"
2224 "; Bound: 14\n"
2225 "; Schema: 0\n"
2226 "OpCapability Shader\n"
2227 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2228 "OpMemoryModel Logical GLSL450\n"
2229 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2230 "OpExecutionMode %4 OriginUpperLeft\n"
2231 "OpDecorate %8 Location 0\n"
2232 "OpDecorate %11 Location 0\n"
2233 "%2 = OpTypeVoid\n"
2234 "%3 = OpTypeFunction %2\n"
2235 "%6 = OpTypeInt 32 0\n"
2236 "%7 = OpTypePointer Output %6\n"
2237 "%8 = OpVariable %7 Output\n"
2238 "%9 = OpTypeFloat 32\n"
2239 "%10 = OpTypePointer Input %9\n"
2240 "%11 = OpVariable %10 Input\n"
2241 "%4 = OpFunction %2 None %3\n"
2242 "%5 = OpLabel\n"
2243 "%12 = OpLoad %9 %11\n"
2244 "%13 = OpConvertFToU %6 %12\n"
2245 "OpStore %8 %13\n"
2246 "OpReturn\n"
2247 "OpFunctionEnd\n";
2248 }
2249
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2250 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
2251 {
2252 /*
2253 "#extension GL_KHR_shader_subgroup_basic: enable\n"
2254 "#extension GL_EXT_tessellation_shader : require\n"
2255 "layout(vertices = 2) out;\n"
2256 "void main (void)\n"
2257 "{\n"
2258 " if (gl_InvocationID == 0)\n"
2259 " {\n"
2260 " gl_TessLevelOuter[0] = 1.0f;\n"
2261 " gl_TessLevelOuter[1] = 1.0f;\n"
2262 " }\n"
2263 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2264 "}\n";
2265 */
2266 programCollection.spirvAsmSources.add("tesc") <<
2267 "; SPIR-V\n"
2268 "; Version: 1.3\n"
2269 "; Generator: Khronos Glslang Reference Front End; 2\n"
2270 "; Bound: 46\n"
2271 "; Schema: 0\n"
2272 "OpCapability Tessellation\n"
2273 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2274 "OpMemoryModel Logical GLSL450\n"
2275 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2276 "OpExecutionMode %4 OutputVertices 2\n"
2277 "OpDecorate %8 BuiltIn InvocationId\n"
2278 "OpDecorate %20 Patch\n"
2279 "OpDecorate %20 BuiltIn TessLevelOuter\n"
2280 "OpMemberDecorate %29 0 BuiltIn Position\n"
2281 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
2282 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2283 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2284 "OpDecorate %29 Block\n"
2285 "OpMemberDecorate %35 0 BuiltIn Position\n"
2286 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
2287 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2288 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2289 "OpDecorate %35 Block\n"
2290 "%2 = OpTypeVoid\n"
2291 "%3 = OpTypeFunction %2\n"
2292 "%6 = OpTypeInt 32 1\n"
2293 "%7 = OpTypePointer Input %6\n"
2294 "%8 = OpVariable %7 Input\n"
2295 "%10 = OpConstant %6 0\n"
2296 "%11 = OpTypeBool\n"
2297 "%15 = OpTypeFloat 32\n"
2298 "%16 = OpTypeInt 32 0\n"
2299 "%17 = OpConstant %16 4\n"
2300 "%18 = OpTypeArray %15 %17\n"
2301 "%19 = OpTypePointer Output %18\n"
2302 "%20 = OpVariable %19 Output\n"
2303 "%21 = OpConstant %15 1\n"
2304 "%22 = OpTypePointer Output %15\n"
2305 "%24 = OpConstant %6 1\n"
2306 "%26 = OpTypeVector %15 4\n"
2307 "%27 = OpConstant %16 1\n"
2308 "%28 = OpTypeArray %15 %27\n"
2309 "%29 = OpTypeStruct %26 %15 %28 %28\n"
2310 "%30 = OpConstant %16 2\n"
2311 "%31 = OpTypeArray %29 %30\n"
2312 "%32 = OpTypePointer Output %31\n"
2313 "%33 = OpVariable %32 Output\n"
2314 "%35 = OpTypeStruct %26 %15 %28 %28\n"
2315 "%36 = OpConstant %16 32\n"
2316 "%37 = OpTypeArray %35 %36\n"
2317 "%38 = OpTypePointer Input %37\n"
2318 "%39 = OpVariable %38 Input\n"
2319 "%41 = OpTypePointer Input %26\n"
2320 "%44 = OpTypePointer Output %26\n"
2321 "%4 = OpFunction %2 None %3\n"
2322 "%5 = OpLabel\n"
2323 "%9 = OpLoad %6 %8\n"
2324 "%12 = OpIEqual %11 %9 %10\n"
2325 "OpSelectionMerge %14 None\n"
2326 "OpBranchConditional %12 %13 %14\n"
2327 "%13 = OpLabel\n"
2328 "%23 = OpAccessChain %22 %20 %10\n"
2329 "OpStore %23 %21\n"
2330 "%25 = OpAccessChain %22 %20 %24\n"
2331 "OpStore %25 %21\n"
2332 "OpBranch %14\n"
2333 "%14 = OpLabel\n"
2334 "%34 = OpLoad %6 %8\n"
2335 "%40 = OpLoad %6 %8\n"
2336 "%42 = OpAccessChain %41 %39 %40 %10\n"
2337 "%43 = OpLoad %26 %42\n"
2338 "%45 = OpAccessChain %44 %33 %34 %10\n"
2339 "OpStore %45 %43\n"
2340 "OpReturn\n"
2341 "OpFunctionEnd\n";
2342 }
2343
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2344 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
2345 {
2346 /*
2347 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
2348 "#extension GL_EXT_tessellation_shader : require\n"
2349 "layout(isolines, equal_spacing, ccw ) in;\n"
2350 "layout(location = 0) in float in_color[];\n"
2351 "layout(location = 0) out float out_color;\n"
2352 "\n"
2353 "void main (void)\n"
2354 "{\n"
2355 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2356 " out_color = in_color[0];\n"
2357 "}\n";
2358 */
2359 programCollection.spirvAsmSources.add("tese") <<
2360 "; SPIR-V\n"
2361 "; Version: 1.3\n"
2362 "; Generator: Khronos Glslang Reference Front End; 2\n"
2363 "; Bound: 45\n"
2364 "; Schema: 0\n"
2365 "OpCapability Tessellation\n"
2366 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2367 "OpMemoryModel Logical GLSL450\n"
2368 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2369 "OpExecutionMode %4 Isolines\n"
2370 "OpExecutionMode %4 SpacingEqual\n"
2371 "OpExecutionMode %4 VertexOrderCcw\n"
2372 "OpMemberDecorate %11 0 BuiltIn Position\n"
2373 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2374 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2375 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2376 "OpDecorate %11 Block\n"
2377 "OpMemberDecorate %16 0 BuiltIn Position\n"
2378 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
2379 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2380 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2381 "OpDecorate %16 Block\n"
2382 "OpDecorate %29 BuiltIn TessCoord\n"
2383 "OpDecorate %39 Location 0\n"
2384 "OpDecorate %42 Location 0\n"
2385 "%2 = OpTypeVoid\n"
2386 "%3 = OpTypeFunction %2\n"
2387 "%6 = OpTypeFloat 32\n"
2388 "%7 = OpTypeVector %6 4\n"
2389 "%8 = OpTypeInt 32 0\n"
2390 "%9 = OpConstant %8 1\n"
2391 "%10 = OpTypeArray %6 %9\n"
2392 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2393 "%12 = OpTypePointer Output %11\n"
2394 "%13 = OpVariable %12 Output\n"
2395 "%14 = OpTypeInt 32 1\n"
2396 "%15 = OpConstant %14 0\n"
2397 "%16 = OpTypeStruct %7 %6 %10 %10\n"
2398 "%17 = OpConstant %8 32\n"
2399 "%18 = OpTypeArray %16 %17\n"
2400 "%19 = OpTypePointer Input %18\n"
2401 "%20 = OpVariable %19 Input\n"
2402 "%21 = OpTypePointer Input %7\n"
2403 "%24 = OpConstant %14 1\n"
2404 "%27 = OpTypeVector %6 3\n"
2405 "%28 = OpTypePointer Input %27\n"
2406 "%29 = OpVariable %28 Input\n"
2407 "%30 = OpConstant %8 0\n"
2408 "%31 = OpTypePointer Input %6\n"
2409 "%36 = OpTypePointer Output %7\n"
2410 "%38 = OpTypePointer Output %6\n"
2411 "%39 = OpVariable %38 Output\n"
2412 "%40 = OpTypeArray %6 %17\n"
2413 "%41 = OpTypePointer Input %40\n"
2414 "%42 = OpVariable %41 Input\n"
2415 "%4 = OpFunction %2 None %3\n"
2416 "%5 = OpLabel\n"
2417 "%22 = OpAccessChain %21 %20 %15 %15\n"
2418 "%23 = OpLoad %7 %22\n"
2419 "%25 = OpAccessChain %21 %20 %24 %15\n"
2420 "%26 = OpLoad %7 %25\n"
2421 "%32 = OpAccessChain %31 %29 %30\n"
2422 "%33 = OpLoad %6 %32\n"
2423 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2424 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2425 "%37 = OpAccessChain %36 %13 %15\n"
2426 "OpStore %37 %35\n"
2427 "%43 = OpAccessChain %31 %42 %15\n"
2428 "%44 = OpLoad %6 %43\n"
2429 "OpStore %39 %44\n"
2430 "OpReturn\n"
2431 "OpFunctionEnd\n";
2432 }
2433
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2434 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options, vk::GlslSourceCollection& collection)
2435 {
2436 tcu::StringTemplate geometryTemplate(glslTemplate);
2437
2438 map<string, string> linesParams;
2439 linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2440
2441 map<string, string> pointsParams;
2442 pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2443
2444 collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
2445 collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
2446 }
2447
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2448 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
2449 {
2450 tcu::StringTemplate geometryTemplate(spirvTemplate);
2451
2452 map<string, string> linesParams;
2453 linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2454
2455 map<string, string> pointsParams;
2456 pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2457
2458 collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
2459 collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
2460 }
2461
initializeMemory(Context & context,const Allocation & alloc,const subgroups::SSBOData & data)2462 void initializeMemory (Context& context, const Allocation& alloc, const subgroups::SSBOData& data)
2463 {
2464 const vk::VkFormat format = data.format;
2465 const vk::VkDeviceSize size = data.numElements *
2466 (data.isImage() ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2467 if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2468 {
2469 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2470
2471 switch (format)
2472 {
2473 default:
2474 DE_FATAL("Illegal buffer format");
2475 break;
2476 case VK_FORMAT_R8_SINT:
2477 case VK_FORMAT_R8G8_SINT:
2478 case VK_FORMAT_R8G8B8_SINT:
2479 case VK_FORMAT_R8G8B8A8_SINT:
2480 case VK_FORMAT_R8_UINT:
2481 case VK_FORMAT_R8G8_UINT:
2482 case VK_FORMAT_R8G8B8_UINT:
2483 case VK_FORMAT_R8G8B8A8_UINT:
2484 {
2485 deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2486
2487 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2488 {
2489 ptr[k] = rnd.getUint8();
2490 }
2491 }
2492 break;
2493 case VK_FORMAT_R16_SINT:
2494 case VK_FORMAT_R16G16_SINT:
2495 case VK_FORMAT_R16G16B16_SINT:
2496 case VK_FORMAT_R16G16B16A16_SINT:
2497 case VK_FORMAT_R16_UINT:
2498 case VK_FORMAT_R16G16_UINT:
2499 case VK_FORMAT_R16G16B16_UINT:
2500 case VK_FORMAT_R16G16B16A16_UINT:
2501 {
2502 deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2503
2504 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2505 {
2506 ptr[k] = rnd.getUint16();
2507 }
2508 }
2509 break;
2510 case VK_FORMAT_R8_USCALED:
2511 case VK_FORMAT_R8G8_USCALED:
2512 case VK_FORMAT_R8G8B8_USCALED:
2513 case VK_FORMAT_R8G8B8A8_USCALED:
2514 {
2515 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2516
2517 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2518 {
2519 deUint32 r = rnd.getUint32();
2520 ptr[k] = (r & 1) ? r : 0;
2521 }
2522 }
2523 break;
2524 case VK_FORMAT_R32_SINT:
2525 case VK_FORMAT_R32G32_SINT:
2526 case VK_FORMAT_R32G32B32_SINT:
2527 case VK_FORMAT_R32G32B32A32_SINT:
2528 case VK_FORMAT_R32_UINT:
2529 case VK_FORMAT_R32G32_UINT:
2530 case VK_FORMAT_R32G32B32_UINT:
2531 case VK_FORMAT_R32G32B32A32_UINT:
2532 {
2533 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2534
2535 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2536 {
2537 ptr[k] = rnd.getUint32();
2538 }
2539 }
2540 break;
2541 case VK_FORMAT_R64_SINT:
2542 case VK_FORMAT_R64G64_SINT:
2543 case VK_FORMAT_R64G64B64_SINT:
2544 case VK_FORMAT_R64G64B64A64_SINT:
2545 case VK_FORMAT_R64_UINT:
2546 case VK_FORMAT_R64G64_UINT:
2547 case VK_FORMAT_R64G64B64_UINT:
2548 case VK_FORMAT_R64G64B64A64_UINT:
2549 {
2550 deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2551
2552 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2553 {
2554 ptr[k] = rnd.getUint64();
2555 }
2556 }
2557 break;
2558 case VK_FORMAT_R16_SFLOAT:
2559 case VK_FORMAT_R16G16_SFLOAT:
2560 case VK_FORMAT_R16G16B16_SFLOAT:
2561 case VK_FORMAT_R16G16B16A16_SFLOAT:
2562 {
2563 deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2564
2565 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2566 {
2567 ptr[k] = deFloat32To16(rnd.getFloat());
2568 }
2569 }
2570 break;
2571 case VK_FORMAT_R32_SFLOAT:
2572 case VK_FORMAT_R32G32_SFLOAT:
2573 case VK_FORMAT_R32G32B32_SFLOAT:
2574 case VK_FORMAT_R32G32B32A32_SFLOAT:
2575 {
2576 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2577
2578 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2579 {
2580 ptr[k] = rnd.getFloat();
2581 }
2582 }
2583 break;
2584 case VK_FORMAT_R64_SFLOAT:
2585 case VK_FORMAT_R64G64_SFLOAT:
2586 case VK_FORMAT_R64G64B64_SFLOAT:
2587 case VK_FORMAT_R64G64B64A64_SFLOAT:
2588 {
2589 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2590
2591 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2592 {
2593 ptr[k] = rnd.getDouble();
2594 }
2595 }
2596 break;
2597 }
2598 }
2599 else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2600 {
2601 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2602
2603 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2604 {
2605 ptr[k] = 0;
2606 }
2607 }
2608
2609 if (subgroups::SSBOData::InitializeNone != data.initializeType)
2610 {
2611 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2612 }
2613 }
2614
getResultBinding(const VkShaderStageFlagBits shaderStage)2615 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2616 {
2617 switch(shaderStage)
2618 {
2619 case VK_SHADER_STAGE_VERTEX_BIT:
2620 return 0u;
2621 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2622 return 1u;
2623 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2624 return 2u;
2625 case VK_SHADER_STAGE_GEOMETRY_BIT:
2626 return 3u;
2627 default:
2628 DE_ASSERT(0);
2629 return -1;
2630 }
2631 DE_ASSERT(0);
2632 return -1;
2633 }
2634
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage)2635 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (Context& context,
2636 VkFormat format,
2637 const SSBOData* extraData,
2638 deUint32 extraDataCount,
2639 const void* internalData,
2640 subgroups::CheckResult checkResult,
2641 const VkShaderStageFlags shaderStage)
2642 {
2643 return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2644 }
2645
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage,const deUint32 tessShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2646 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize (Context& context,
2647 VkFormat format,
2648 const SSBOData* extraData,
2649 deUint32 extraDataCount,
2650 const void* internalData,
2651 subgroups::CheckResult checkResult,
2652 const VkShaderStageFlags shaderStage,
2653 const deUint32 tessShaderStageCreateFlags,
2654 const deUint32 requiredSubgroupSize)
2655 {
2656 const DeviceInterface& vk = context.getDeviceInterface();
2657 const VkDevice device = context.getDevice();
2658 const deUint32 maxWidth = getMaxWidth();
2659 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2660 DescriptorSetLayoutBuilder layoutBuilder;
2661 DescriptorPoolBuilder poolBuilder;
2662 DescriptorSetUpdateBuilder updateBuilder;
2663 Move <VkDescriptorPool> descriptorPool;
2664 Move <VkDescriptorSet> descriptorSet;
2665 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2666 const Unique<VkShaderModule> teCtrlShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("tesc"), 0u));
2667 const Unique<VkShaderModule> teEvalShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("tese"), 0u));
2668 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2669 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2670 const VkVertexInputBindingDescription vertexInputBinding =
2671 {
2672 0u, // deUint32 binding;
2673 static_cast<deUint32>(sizeof(tcu::Vec4)), // deUint32 stride;
2674 VK_VERTEX_INPUT_RATE_VERTEX // VkVertexInputRate inputRate;
2675 };
2676 const VkVertexInputAttributeDescription vertexInputAttribute =
2677 {
2678 0u, // deUint32 location;
2679 0u, // deUint32 binding;
2680 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
2681 0u // deUint32 offset;
2682 };
2683
2684 for (deUint32 i = 0u; i < extraDataCount; i++)
2685 {
2686 if (extraData[i].isImage())
2687 {
2688 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2689 }
2690 else
2691 {
2692 DE_ASSERT(extraData[i].isUBO());
2693 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2694 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2695 }
2696 const Allocation& alloc = inputBuffers[i]->getAllocation();
2697 initializeMemory(context, alloc, extraData[i]);
2698 }
2699
2700 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2701 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2702
2703 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2704
2705 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2706
2707 const deUint32 requiredSubgroupSizes[5] = {0u,
2708 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2709 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u),
2710 0u,
2711 0u};
2712
2713 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2714 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2715 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2716 *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2717 *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2718 0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2719 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u),
2720 0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2721
2722 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2723 poolBuilder.addType(inputBuffers[ndx]->getType());
2724
2725 if (extraDataCount > 0)
2726 {
2727 descriptorPool = poolBuilder.build(vk, device,
2728 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2729 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2730 }
2731
2732 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2733 {
2734 if (inputBuffers[buffersNdx]->isImage())
2735 {
2736 VkDescriptorImageInfo info =
2737 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2738 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2739
2740 updateBuilder.writeSingle(*descriptorSet,
2741 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2742 inputBuffers[buffersNdx]->getType(), &info);
2743 }
2744 else
2745 {
2746 VkDescriptorBufferInfo info =
2747 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2748 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2749
2750 updateBuilder.writeSingle(*descriptorSet,
2751 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2752 inputBuffers[buffersNdx]->getType(), &info);
2753 }
2754 }
2755
2756 updateBuilder.update(vk, device);
2757
2758 const VkQueue queue = context.getUniversalQueue();
2759 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2760 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
2761 const deUint32 subgroupSize = getSubgroupSize(context);
2762 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2763 const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
2764 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2765 unsigned totalIterations = 0u;
2766 unsigned failedIterations = 0u;
2767 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2768
2769 {
2770 const Allocation& alloc = vertexBuffer.getAllocation();
2771 std::vector<tcu::Vec4> data (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2772 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2773 float leftHandPosition = -1.0f;
2774
2775 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2776 {
2777 data[ndx][0] = leftHandPosition;
2778 leftHandPosition += pixelSize;
2779 data[ndx+1][0] = leftHandPosition;
2780 }
2781
2782 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2783 flushAlloc(vk, device, alloc);
2784 }
2785
2786 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2787 const VkViewport viewport = makeViewport(maxWidth, 1u);
2788 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2789 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2790 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2791 const VkDeviceSize vertexBufferOffset = 0u;
2792
2793 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2794 {
2795 totalIterations++;
2796
2797 beginCommandBuffer(vk, *cmdBuffer);
2798 {
2799
2800 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2801 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2802
2803 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2804
2805 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2806
2807 if (extraDataCount > 0)
2808 {
2809 vk.cmdBindDescriptorSets(*cmdBuffer,
2810 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2811 &descriptorSet.get(), 0u, DE_NULL);
2812 }
2813
2814 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2815 vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2816
2817 endRenderPass(vk, *cmdBuffer);
2818
2819 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2820 endCommandBuffer(vk, *cmdBuffer);
2821
2822 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2823 }
2824 context.resetCommandPoolForVKSC(device, *cmdPool);
2825
2826 {
2827 const Allocation& allocResult = imageBufferResult.getAllocation();
2828 invalidateAlloc(vk, device, allocResult);
2829
2830 std::vector<const void*> datas;
2831 datas.push_back(allocResult.getHostPtr());
2832 if (!checkResult(internalData, datas, width/2u, subgroupSize))
2833 failedIterations++;
2834 }
2835 }
2836
2837 if (0 < failedIterations)
2838 {
2839 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2840
2841 context.getTestContext().getLog()
2842 << TestLog::Message << valuesPassed << " / "
2843 << totalIterations << " values passed" << TestLog::EndMessage;
2844 return tcu::TestStatus::fail("Failed!");
2845 }
2846
2847 return tcu::TestStatus::pass("OK");
2848 }
2849
check(std::vector<const void * > datas,deUint32 width,deUint32 ref)2850 bool vkt::subgroups::check (std::vector<const void*> datas, deUint32 width, deUint32 ref)
2851 {
2852 const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2853
2854 for (deUint32 n = 0; n < width; ++n)
2855 {
2856 if (data[n] != ref)
2857 {
2858 return false;
2859 }
2860 }
2861
2862 return true;
2863 }
2864
checkComputeOrMesh(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 ref)2865 bool vkt::subgroups::checkComputeOrMesh (std::vector<const void*> datas,
2866 const deUint32 numWorkgroups[3],
2867 const deUint32 localSize[3],
2868 deUint32 ref)
2869 {
2870 const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2871 const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2872 const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2873
2874 return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2875 }
2876
makeGeometryFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)2877 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest (Context& context,
2878 VkFormat format,
2879 const SSBOData* extraData,
2880 deUint32 extraDataCount,
2881 const void* internalData,
2882 subgroups::CheckResult checkResult)
2883 {
2884 return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
2885 }
2886
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 geometryShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2887 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize (Context& context,
2888 VkFormat format,
2889 const SSBOData* extraData,
2890 deUint32 extraDataCount,
2891 const void* internalData,
2892 subgroups::CheckResult checkResult,
2893 const deUint32 geometryShaderStageCreateFlags,
2894 const deUint32 requiredSubgroupSize)
2895 {
2896 const DeviceInterface& vk = context.getDeviceInterface();
2897 const VkDevice device = context.getDevice();
2898 const deUint32 maxWidth = getMaxWidth();
2899 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2900 DescriptorSetLayoutBuilder layoutBuilder;
2901 DescriptorPoolBuilder poolBuilder;
2902 DescriptorSetUpdateBuilder updateBuilder;
2903 Move <VkDescriptorPool> descriptorPool;
2904 Move <VkDescriptorSet> descriptorSet;
2905 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2906 const Unique<VkShaderModule> geometryShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2907 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2908 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2909 const VkVertexInputBindingDescription vertexInputBinding =
2910 {
2911 0u, // deUint32 binding;
2912 static_cast<deUint32>(sizeof(tcu::Vec4)), // deUint32 stride;
2913 VK_VERTEX_INPUT_RATE_VERTEX // VkVertexInputRate inputRate;
2914 };
2915 const VkVertexInputAttributeDescription vertexInputAttribute =
2916 {
2917 0u, // deUint32 location;
2918 0u, // deUint32 binding;
2919 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
2920 0u // deUint32 offset;
2921 };
2922
2923 for (deUint32 i = 0u; i < extraDataCount; i++)
2924 {
2925 if (extraData[i].isImage())
2926 {
2927 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2928 }
2929 else
2930 {
2931 DE_ASSERT(extraData[i].isUBO());
2932 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2933 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2934 }
2935 const Allocation& alloc = inputBuffers[i]->getAllocation();
2936 initializeMemory(context, alloc, extraData[i]);
2937 }
2938
2939 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2940 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2941
2942 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2943
2944 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2945
2946 const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2947
2948 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2949 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2950 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2951 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2952 0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2953 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2954
2955 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2956 poolBuilder.addType(inputBuffers[ndx]->getType());
2957
2958 if (extraDataCount > 0)
2959 {
2960 descriptorPool = poolBuilder.build(vk, device,
2961 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2962 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2963 }
2964
2965 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2966 {
2967 if (inputBuffers[buffersNdx]->isImage())
2968 {
2969 VkDescriptorImageInfo info =
2970 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2971 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2972
2973 updateBuilder.writeSingle(*descriptorSet,
2974 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2975 inputBuffers[buffersNdx]->getType(), &info);
2976 }
2977 else
2978 {
2979 VkDescriptorBufferInfo info =
2980 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2981 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2982
2983 updateBuilder.writeSingle(*descriptorSet,
2984 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2985 inputBuffers[buffersNdx]->getType(), &info);
2986 }
2987 }
2988
2989 updateBuilder.update(vk, device);
2990
2991 const VkQueue queue = context.getUniversalQueue();
2992 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2993 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
2994 const deUint32 subgroupSize = getSubgroupSize(context);
2995 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2996 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
2997 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2998 unsigned totalIterations = 0u;
2999 unsigned failedIterations = 0u;
3000 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3001
3002 {
3003 const Allocation& alloc = vertexBuffer.getAllocation();
3004 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3005 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3006 float leftHandPosition = -1.0f;
3007
3008 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3009 {
3010 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3011 leftHandPosition += pixelSize;
3012 }
3013
3014 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3015 flushAlloc(vk, device, alloc);
3016 }
3017
3018 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3019 const VkViewport viewport = makeViewport(maxWidth, 1u);
3020 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3021 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3022 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3023 const VkDeviceSize vertexBufferOffset = 0u;
3024
3025 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3026 {
3027 totalIterations++;
3028
3029 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3030 {
3031 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3032 initializeMemory(context, alloc, extraData[ndx]);
3033 }
3034
3035 beginCommandBuffer(vk, *cmdBuffer);
3036 {
3037 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3038
3039 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3040
3041 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3042
3043 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3044
3045 if (extraDataCount > 0)
3046 {
3047 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3048 &descriptorSet.get(), 0u, DE_NULL);
3049 }
3050
3051 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3052
3053 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3054
3055 endRenderPass(vk, *cmdBuffer);
3056
3057 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3058
3059 endCommandBuffer(vk, *cmdBuffer);
3060
3061 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3062 }
3063 context.resetCommandPoolForVKSC(device, *cmdPool);
3064
3065 {
3066 const Allocation& allocResult = imageBufferResult.getAllocation();
3067 invalidateAlloc(vk, device, allocResult);
3068
3069 std::vector<const void*> datas;
3070 datas.push_back(allocResult.getHostPtr());
3071 if (!checkResult(internalData, datas, width, subgroupSize))
3072 failedIterations++;
3073 }
3074 }
3075
3076 if (0 < failedIterations)
3077 {
3078 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3079
3080 context.getTestContext().getLog()
3081 << TestLog::Message << valuesPassed << " / "
3082 << totalIterations << " values passed" << TestLog::EndMessage;
3083
3084 return tcu::TestStatus::fail("Failed!");
3085 }
3086
3087 return tcu::TestStatus::pass("OK");
3088 }
3089
getPossibleGraphicsSubgroupStages(Context & context,const vk::VkShaderStageFlags testedStages)3090 vk::VkShaderStageFlags vkt::subgroups::getPossibleGraphicsSubgroupStages (Context& context, const vk::VkShaderStageFlags testedStages)
3091 {
3092 const VkPhysicalDeviceSubgroupProperties& subgroupProperties = context.getSubgroupProperties();
3093 VkShaderStageFlags stages = testedStages & subgroupProperties.supportedStages;
3094
3095 DE_ASSERT(isAllGraphicsStages(testedStages));
3096
3097 if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
3098 {
3099 if ((stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
3100 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
3101 else
3102 stages = VK_SHADER_STAGE_FRAGMENT_BIT;
3103 }
3104
3105 if (static_cast<VkShaderStageFlags>(0u) == stages)
3106 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
3107
3108 return stages;
3109 }
3110
allStages(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3111 tcu::TestStatus vkt::subgroups::allStages (Context& context,
3112 vk::VkFormat format,
3113 const SSBOData* extraData,
3114 deUint32 extraDataCount,
3115 const void* internalData,
3116 const VerificationFunctor& checkResult,
3117 const vk::VkShaderStageFlags shaderStage)
3118 {
3119 return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage,
3120 0u, 0u, 0u, 0u, 0u, DE_NULL);
3121 }
3122
allStagesRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStageTested,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])3123 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize (Context& context,
3124 vk::VkFormat format,
3125 const SSBOData* extraDatas,
3126 deUint32 extraDatasCount,
3127 const void* internalData,
3128 const VerificationFunctor& checkResult,
3129 const vk::VkShaderStageFlags shaderStageTested,
3130 const deUint32 vertexShaderStageCreateFlags,
3131 const deUint32 tessellationControlShaderStageCreateFlags,
3132 const deUint32 tessellationEvalShaderStageCreateFlags,
3133 const deUint32 geometryShaderStageCreateFlags,
3134 const deUint32 fragmentShaderStageCreateFlags,
3135 const deUint32 requiredSubgroupSize[5])
3136 {
3137 const DeviceInterface& vk = context.getDeviceInterface();
3138 const VkDevice device = context.getDevice();
3139 const deUint32 maxWidth = getMaxWidth();
3140 vector<VkShaderStageFlagBits> stagesVector;
3141 VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
3142
3143 Move<VkShaderModule> vertexShaderModule;
3144 Move<VkShaderModule> teCtrlShaderModule;
3145 Move<VkShaderModule> teEvalShaderModule;
3146 Move<VkShaderModule> geometryShaderModule;
3147 Move<VkShaderModule> fragmentShaderModule;
3148
3149 if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3150 {
3151 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3152 }
3153 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3154 {
3155 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3156 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3157 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3158 }
3159 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3160 {
3161 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3162 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3163 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3164 }
3165 if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3166 {
3167 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3168 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3169 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3170 }
3171 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3172 {
3173 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3174 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3175 }
3176
3177 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
3178 const string vert = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
3179 const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
3180 const string tese = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
3181
3182 shaderStageRequired = shaderStageTested | shaderStageRequired;
3183
3184 vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3185 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3186 {
3187 teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3188 teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3189 }
3190 if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3191 {
3192 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3193 {
3194 // tessellation shaders output line primitives
3195 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3196 }
3197 else
3198 {
3199 // otherwise points are processed by geometry shader
3200 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3201 }
3202 }
3203 if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3204 fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3205
3206 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
3207
3208 DescriptorSetLayoutBuilder layoutBuilder;
3209
3210 // The implicit result SSBO we use to store our outputs from the shader
3211 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3212 {
3213 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3214 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3215 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
3216
3217 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
3218 }
3219
3220 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3221 {
3222 const deUint32 datasNdx = ndx - stagesCount;
3223 if (extraDatas[datasNdx].isImage())
3224 {
3225 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3226 }
3227 else
3228 {
3229 const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3230 const auto size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
3231 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
3232 }
3233
3234 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3235 initializeMemory(context, alloc, extraDatas[datasNdx]);
3236
3237 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
3238 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
3239 }
3240
3241 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3242
3243 const Unique<VkPipelineLayout> pipelineLayout(
3244 makePipelineLayout(vk, device, *descriptorSetLayout));
3245
3246 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3247 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3248 shaderStageRequired,
3249 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
3250 *renderPass,
3251 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3252 DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT,
3253 vertexShaderStageCreateFlags, tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3254 geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3255
3256 Move <VkDescriptorPool> descriptorPool;
3257 Move <VkDescriptorSet> descriptorSet;
3258
3259 if (inputBuffers.size() > 0)
3260 {
3261 DescriptorPoolBuilder poolBuilder;
3262
3263 for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
3264 {
3265 poolBuilder.addType(inputBuffers[ndx]->getType());
3266 }
3267
3268 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3269
3270 // Create descriptor set
3271 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3272
3273 DescriptorSetUpdateBuilder updateBuilder;
3274
3275 for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3276 {
3277 deUint32 binding;
3278 if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
3279 else binding = extraDatas[ndx -stagesCount].binding;
3280
3281 if (inputBuffers[ndx]->isImage())
3282 {
3283 VkDescriptorImageInfo info =
3284 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3285 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3286
3287 updateBuilder.writeSingle( *descriptorSet,
3288 DescriptorSetUpdateBuilder::Location::binding(binding),
3289 inputBuffers[ndx]->getType(), &info);
3290 }
3291 else
3292 {
3293 VkDescriptorBufferInfo info =
3294 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
3295 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3296
3297 updateBuilder.writeSingle( *descriptorSet,
3298 DescriptorSetUpdateBuilder::Location::binding(binding),
3299 inputBuffers[ndx]->getType(), &info);
3300 }
3301 }
3302
3303 updateBuilder.update(vk, device);
3304 }
3305
3306 {
3307 const VkQueue queue = context.getUniversalQueue();
3308 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3309 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3310 const deUint32 subgroupSize = getSubgroupSize(context);
3311 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3312 unsigned totalIterations = 0u;
3313 unsigned failedIterations = 0u;
3314 Image resultImage (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3315 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3316 const VkViewport viewport = makeViewport(maxWidth, 1u);
3317 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3318 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3319 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3320 const VkImageSubresourceRange subresourceRange =
3321 {
3322 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
3323 0u, //deUint32 baseMipLevel
3324 1u, //deUint32 levelCount
3325 0u, //deUint32 baseArrayLayer
3326 1u //deUint32 layerCount
3327 };
3328
3329 const VkImageMemoryBarrier colorAttachmentBarrier = makeImageMemoryBarrier(
3330 (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3331 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
3332 resultImage.getImage(), subresourceRange);
3333
3334 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3335 {
3336 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3337 {
3338 // re-init the data
3339 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3340 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3341 }
3342
3343 totalIterations++;
3344
3345 beginCommandBuffer(vk, *cmdBuffer);
3346
3347 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
3348
3349 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3350
3351 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3352
3353 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3354
3355 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3356
3357 if (stagesCount + extraDatasCount > 0)
3358 vk.cmdBindDescriptorSets(*cmdBuffer,
3359 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3360 &descriptorSet.get(), 0u, DE_NULL);
3361
3362 vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3363
3364 endRenderPass(vk, *cmdBuffer);
3365
3366 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3367
3368 endCommandBuffer(vk, *cmdBuffer);
3369
3370 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3371
3372 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3373 {
3374 std::vector<const void*> datas;
3375 if (!inputBuffers[ndx]->isImage())
3376 {
3377 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
3378 invalidateAlloc(vk, device, resultAlloc);
3379 // we always have our result data first
3380 datas.push_back(resultAlloc.getHostPtr());
3381 }
3382
3383 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3384 {
3385 const deUint32 datasNdx = index - stagesCount;
3386 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3387 {
3388 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
3389 invalidateAlloc(vk, device, resultAlloc);
3390 // we always have our result data first
3391 datas.push_back(resultAlloc.getHostPtr());
3392 }
3393 }
3394
3395 // Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3396 const bool multiCall = ( stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT ||
3397 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
3398 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT ||
3399 stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT );
3400 const deUint32 usedWidth = ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3401
3402 if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3403 failedIterations++;
3404 }
3405 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3406 {
3407 std::vector<const void*> datas;
3408 const Allocation& resultAlloc = imageBufferResult.getAllocation();
3409 invalidateAlloc(vk, device, resultAlloc);
3410
3411 // we always have our result data first
3412 datas.push_back(resultAlloc.getHostPtr());
3413
3414 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3415 {
3416 const deUint32 datasNdx = index - stagesCount;
3417 if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3418 {
3419 const Allocation& alloc = inputBuffers[index]->getAllocation();
3420 invalidateAlloc(vk, device, alloc);
3421 // we always have our result data first
3422 datas.push_back(alloc.getHostPtr());
3423 }
3424 }
3425
3426 if (!checkResult(internalData, datas, width, subgroupSize, false))
3427 failedIterations++;
3428 }
3429
3430 context.resetCommandPoolForVKSC(device, *cmdPool);
3431 }
3432
3433 if (0 < failedIterations)
3434 {
3435 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3436
3437 context.getTestContext().getLog()
3438 << TestLog::Message << valuesPassed << " / "
3439 << totalIterations << " values passed" << TestLog::EndMessage;
3440
3441 return tcu::TestStatus::fail("Failed!");
3442 }
3443 }
3444
3445 return tcu::TestStatus::pass("OK");
3446 }
3447
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)3448 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest (Context& context,
3449 vk::VkFormat format,
3450 const SSBOData* extraData,
3451 deUint32 extraDataCount,
3452 const void* internalData,
3453 subgroups::CheckResult checkResult)
3454 {
3455 return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
3456 }
3457
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 vertexShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3458 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize (Context& context,
3459 vk::VkFormat format,
3460 const SSBOData* extraData,
3461 deUint32 extraDataCount,
3462 const void* internalData,
3463 subgroups::CheckResult checkResult,
3464 const deUint32 vertexShaderStageCreateFlags,
3465 const deUint32 requiredSubgroupSize)
3466 {
3467 const DeviceInterface& vk = context.getDeviceInterface();
3468 const VkDevice device = context.getDevice();
3469 const VkQueue queue = context.getUniversalQueue();
3470 const deUint32 maxWidth = getMaxWidth();
3471 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3472 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
3473 DescriptorSetLayoutBuilder layoutBuilder;
3474 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3475 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3476 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
3477 const VkVertexInputBindingDescription vertexInputBinding =
3478 {
3479 0u, // binding;
3480 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
3481 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
3482 };
3483 const VkVertexInputAttributeDescription vertexInputAttribute =
3484 {
3485 0u,
3486 0u,
3487 VK_FORMAT_R32G32B32A32_SFLOAT,
3488 0u
3489 };
3490
3491 for (deUint32 i = 0u; i < extraDataCount; i++)
3492 {
3493 if (extraData[i].isImage())
3494 {
3495 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
3496 }
3497 else
3498 {
3499 DE_ASSERT(extraData[i].isUBO());
3500 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3501 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3502 }
3503 const Allocation& alloc = inputBuffers[i]->getAllocation();
3504 initializeMemory(context, alloc, extraData[i]);
3505 }
3506
3507 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3508 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3509
3510 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
3511
3512 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
3513
3514 const deUint32 requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3515 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
3516 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3517 *vertexShaderModule, *fragmentShaderModule,
3518 DE_NULL, DE_NULL, DE_NULL,
3519 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3520 &vertexInputBinding, &vertexInputAttribute, true, format,
3521 vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3522 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3523 DescriptorPoolBuilder poolBuilder;
3524 DescriptorSetUpdateBuilder updateBuilder;
3525
3526
3527 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3528 poolBuilder.addType(inputBuffers[ndx]->getType());
3529
3530 Move <VkDescriptorPool> descriptorPool;
3531 Move <VkDescriptorSet> descriptorSet;
3532
3533 if (extraDataCount > 0)
3534 {
3535 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3536 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3537 }
3538
3539 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3540 {
3541 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3542 initializeMemory(context, alloc, extraData[ndx]);
3543 }
3544
3545 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3546 {
3547 if (inputBuffers[buffersNdx]->isImage())
3548 {
3549 VkDescriptorImageInfo info =
3550 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3551 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3552
3553 updateBuilder.writeSingle(*descriptorSet,
3554 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3555 inputBuffers[buffersNdx]->getType(), &info);
3556 }
3557 else
3558 {
3559 VkDescriptorBufferInfo info =
3560 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3561 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3562
3563 updateBuilder.writeSingle(*descriptorSet,
3564 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3565 inputBuffers[buffersNdx]->getType(), &info);
3566 }
3567 }
3568 updateBuilder.update(vk, device);
3569
3570 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3571
3572 const deUint32 subgroupSize = getSubgroupSize(context);
3573
3574 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3575
3576 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
3577 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3578
3579 unsigned totalIterations = 0u;
3580 unsigned failedIterations = 0u;
3581
3582 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3583
3584 {
3585 const Allocation& alloc = vertexBuffer.getAllocation();
3586 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3587 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3588 float leftHandPosition = -1.0f;
3589
3590 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3591 {
3592 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3593 leftHandPosition += pixelSize;
3594 }
3595
3596 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3597 flushAlloc(vk, device, alloc);
3598 }
3599
3600 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3601 const VkViewport viewport = makeViewport(maxWidth, 1u);
3602 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3603 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3604 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3605 const VkDeviceSize vertexBufferOffset = 0u;
3606
3607 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3608 {
3609 totalIterations++;
3610
3611 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3612 {
3613 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3614 initializeMemory(context, alloc, extraData[ndx]);
3615 }
3616
3617 beginCommandBuffer(vk, *cmdBuffer);
3618 {
3619 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3620
3621 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3622
3623 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3624
3625 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3626
3627 if (extraDataCount > 0)
3628 {
3629 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3630 &descriptorSet.get(), 0u, DE_NULL);
3631 }
3632
3633 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3634
3635 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3636
3637 endRenderPass(vk, *cmdBuffer);
3638
3639 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3640
3641 endCommandBuffer(vk, *cmdBuffer);
3642
3643 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3644 }
3645 context.resetCommandPoolForVKSC(device, *cmdPool);
3646
3647 {
3648 const Allocation& allocResult = imageBufferResult.getAllocation();
3649 invalidateAlloc(vk, device, allocResult);
3650
3651 std::vector<const void*> datas;
3652 datas.push_back(allocResult.getHostPtr());
3653 if (!checkResult(internalData, datas, width, subgroupSize))
3654 failedIterations++;
3655 }
3656 }
3657
3658 if (0 < failedIterations)
3659 {
3660 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3661
3662 context.getTestContext().getLog()
3663 << TestLog::Message << valuesPassed << " / "
3664 << totalIterations << " values passed" << TestLog::EndMessage;
3665
3666 return tcu::TestStatus::fail("Failed!");
3667 }
3668
3669 return tcu::TestStatus::pass("OK");
3670 }
3671
makeFragmentFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult)3672 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest (Context& context,
3673 VkFormat format,
3674 const SSBOData* extraDatas,
3675 deUint32 extraDatasCount,
3676 const void* internalData,
3677 CheckResultFragment checkResult)
3678 {
3679 return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData, checkResult, 0u, 0u);
3680 }
3681
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3682 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize (Context& context,
3683 VkFormat format,
3684 const SSBOData* extraDatas,
3685 deUint32 extraDatasCount,
3686 const void* internalData,
3687 CheckResultFragment checkResult,
3688 const deUint32 fragmentShaderStageCreateFlags,
3689 const deUint32 requiredSubgroupSize)
3690 {
3691 const DeviceInterface& vk = context.getDeviceInterface();
3692 const VkDevice device = context.getDevice();
3693 const VkQueue queue = context.getUniversalQueue();
3694 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3695 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3696 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3697 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers (extraDatasCount);
3698
3699 for (deUint32 i = 0; i < extraDatasCount; i++)
3700 {
3701 if (extraDatas[i].isImage())
3702 {
3703 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3704 }
3705 else
3706 {
3707 DE_ASSERT(extraDatas[i].isUBO());
3708
3709 const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3710
3711 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3712 }
3713
3714 const Allocation& alloc = inputBuffers[i]->getAllocation();
3715
3716 initializeMemory(context, alloc, extraDatas[i]);
3717 }
3718
3719 DescriptorSetLayoutBuilder layoutBuilder;
3720
3721 for (deUint32 i = 0; i < extraDatasCount; i++)
3722 {
3723 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3724 }
3725
3726 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3727 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3728 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3729 const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3730 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context,
3731 *pipelineLayout,
3732 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3733 *vertexShaderModule,
3734 *fragmentShaderModule,
3735 DE_NULL,
3736 DE_NULL,
3737 DE_NULL,
3738 *renderPass,
3739 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3740 DE_NULL,
3741 DE_NULL,
3742 true,
3743 VK_FORMAT_R32G32B32A32_SFLOAT,
3744 0u,
3745 0u,
3746 0u,
3747 0u,
3748 fragmentShaderStageCreateFlags,
3749 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3750 DescriptorPoolBuilder poolBuilder;
3751
3752 // To stop validation complaining, always add at least one type to pool.
3753 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3754 for (deUint32 i = 0; i < extraDatasCount; i++)
3755 {
3756 poolBuilder.addType(inputBuffers[i]->getType());
3757 }
3758
3759 Move<VkDescriptorPool> descriptorPool;
3760 // Create descriptor set
3761 Move<VkDescriptorSet> descriptorSet;
3762
3763 if (extraDatasCount > 0)
3764 {
3765 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3766
3767 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3768 }
3769
3770 DescriptorSetUpdateBuilder updateBuilder;
3771
3772 for (deUint32 i = 0; i < extraDatasCount; i++)
3773 {
3774 if (inputBuffers[i]->isImage())
3775 {
3776 const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3777
3778 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3779 }
3780 else
3781 {
3782 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3783
3784 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3785 }
3786 }
3787
3788 if (extraDatasCount > 0)
3789 updateBuilder.update(vk, device);
3790
3791 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3792 const deUint32 subgroupSize = getSubgroupSize(context);
3793 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3794 unsigned totalIterations = 0;
3795 unsigned failedIterations = 0;
3796
3797 for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3798 {
3799 for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3800 {
3801 totalIterations++;
3802
3803 // re-init the data
3804 for (deUint32 i = 0; i < extraDatasCount; i++)
3805 {
3806 const Allocation& alloc = inputBuffers[i]->getAllocation();
3807
3808 initializeMemory(context, alloc, extraDatas[i]);
3809 }
3810
3811 const VkDeviceSize formatSize = getFormatSizeInBytes(format);
3812 const VkDeviceSize resultImageSizeInBytes = width * height * formatSize;
3813 Image resultImage (context, width, height, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3814 Buffer resultBuffer (context, resultImageSizeInBytes, VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3815 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3816 VkViewport viewport = makeViewport(width, height);
3817 VkRect2D scissor = {{0, 0}, {width, height}};
3818
3819 beginCommandBuffer(vk, *cmdBuffer);
3820
3821 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3822
3823 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3824
3825 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3826
3827 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3828
3829 if (extraDatasCount > 0)
3830 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
3831
3832 vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3833
3834 endRenderPass(vk, *cmdBuffer);
3835
3836 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3837
3838 endCommandBuffer(vk, *cmdBuffer);
3839
3840 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3841
3842 std::vector<const void*> datas;
3843 {
3844 const Allocation& resultAlloc = resultBuffer.getAllocation();
3845 invalidateAlloc(vk, device, resultAlloc);
3846
3847 // we always have our result data first
3848 datas.push_back(resultAlloc.getHostPtr());
3849 }
3850
3851 if (!checkResult(internalData, datas, width, height, subgroupSize))
3852 {
3853 failedIterations++;
3854 }
3855
3856 context.resetCommandPoolForVKSC(device, *cmdPool);
3857 }
3858 }
3859
3860 if (0 < failedIterations)
3861 {
3862 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3863
3864 context.getTestContext().getLog()
3865 << TestLog::Message << valuesPassed << " / "
3866 << totalIterations << " values passed" << TestLog::EndMessage;
3867
3868 return tcu::TestStatus::fail("Failed!");
3869 }
3870
3871 return tcu::TestStatus::pass("OK");
3872 }
3873
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize)3874 Move<VkPipeline> makeComputePipeline (Context& context,
3875 const VkPipelineLayout pipelineLayout,
3876 const VkShaderModule shaderModule,
3877 const deUint32 pipelineShaderStageFlags,
3878 const deUint32 pipelineCreateFlags,
3879 VkPipeline basePipelineHandle,
3880 deUint32 localSizeX,
3881 deUint32 localSizeY,
3882 deUint32 localSizeZ,
3883 deUint32 requiredSubgroupSize)
3884 {
3885 const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
3886 const vk::VkSpecializationMapEntry entries[3] =
3887 {
3888 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
3889 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
3890 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3891 };
3892 const vk::VkSpecializationInfo info =
3893 {
3894 /* mapEntryCount = */ 3,
3895 /* pMapEntries = */ entries,
3896 /* dataSize = */ sizeof(localSize),
3897 /* pData = */ localSize
3898 };
3899 const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo =
3900 {
3901 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType sType;
3902 DE_NULL, // void* pNext;
3903 requiredSubgroupSize // uint32_t requiredSubgroupSize;
3904 };
3905 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
3906 {
3907 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3908 (requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL), // const void* pNext;
3909 pipelineShaderStageFlags, // VkPipelineShaderStageCreateFlags flags;
3910 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
3911 shaderModule, // VkShaderModule module;
3912 "main", // const char* pName;
3913 &info, // const VkSpecializationInfo* pSpecializationInfo;
3914 };
3915 const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
3916 {
3917 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
3918 DE_NULL, // const void* pNext;
3919 pipelineCreateFlags, // VkPipelineCreateFlags flags;
3920 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
3921 pipelineLayout, // VkPipelineLayout layout;
3922 basePipelineHandle, // VkPipeline basePipelineHandle;
3923 -1, // deInt32 basePipelineIndex;
3924 };
3925
3926 return createComputePipeline(context.getDeviceInterface(), context.getDevice(), DE_NULL, &pipelineCreateInfo);
3927 }
3928
3929 #ifndef CTS_USES_VULKANSC
makeMeshPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule taskModule,const VkShaderModule meshModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize,const VkRenderPass renderPass)3930 Move<VkPipeline> makeMeshPipeline (Context& context,
3931 const VkPipelineLayout pipelineLayout,
3932 const VkShaderModule taskModule,
3933 const VkShaderModule meshModule,
3934 const deUint32 pipelineShaderStageFlags,
3935 const deUint32 pipelineCreateFlags,
3936 VkPipeline basePipelineHandle,
3937 deUint32 localSizeX,
3938 deUint32 localSizeY,
3939 deUint32 localSizeZ,
3940 deUint32 requiredSubgroupSize,
3941 const VkRenderPass renderPass)
3942 {
3943 const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
3944 const vk::VkSpecializationMapEntry entries[3] =
3945 {
3946 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
3947 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
3948 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3949 };
3950 const vk::VkSpecializationInfo info =
3951 {
3952 /* mapEntryCount = */ 3,
3953 /* pMapEntries = */ entries,
3954 /* dataSize = */ sizeof(localSize),
3955 /* pData = */ localSize
3956 };
3957 const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo =
3958 {
3959 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType sType;
3960 DE_NULL, // void* pNext;
3961 requiredSubgroupSize // uint32_t requiredSubgroupSize;
3962 };
3963
3964 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* pSubgroupSizeCreateInfo = ((requiredSubgroupSize != 0u) ? &subgroupSizeCreateInfo : nullptr);
3965
3966 std::vector<VkPipelineShaderStageCreateInfo> shaderStageParams;
3967 vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
3968 {
3969 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3970 nullptr, // const void* pNext;
3971 pipelineShaderStageFlags, // VkPipelineShaderStageCreateFlags flags;
3972 VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM, // VkShaderStageFlagBits stage;
3973 DE_NULL, // VkShaderModule module;
3974 "main", // const char* pName;
3975 &info, // const VkSpecializationInfo* pSpecializationInfo;
3976 };
3977
3978 if (taskModule != DE_NULL)
3979 {
3980 pipelineShaderStageParams.module = taskModule;
3981 pipelineShaderStageParams.pNext = pSubgroupSizeCreateInfo;
3982 pipelineShaderStageParams.stage = VK_SHADER_STAGE_TASK_BIT_EXT;
3983 shaderStageParams.push_back(pipelineShaderStageParams);
3984 }
3985
3986 if (meshModule != DE_NULL)
3987 {
3988 pipelineShaderStageParams.module = meshModule;
3989 pipelineShaderStageParams.pNext = ((taskModule == DE_NULL) ? pSubgroupSizeCreateInfo : nullptr);
3990 pipelineShaderStageParams.stage = VK_SHADER_STAGE_MESH_BIT_EXT;
3991 shaderStageParams.push_back(pipelineShaderStageParams);
3992 }
3993
3994 const std::vector<VkViewport> viewports (1u, makeViewport(1u, 1u));
3995 const std::vector<VkRect2D> scissors (1u, makeRect2D(1u, 1u));
3996
3997 return makeGraphicsPipeline(context.getDeviceInterface(), context.getDevice(), basePipelineHandle, pipelineLayout, pipelineCreateFlags, shaderStageParams, renderPass, viewports, scissors);
3998 }
3999 #endif // CTS_USES_VULKANSC
4000
makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,deUint32 inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4001 tcu::TestStatus makeComputeOrMeshTestRequiredSubgroupSize (ComputeLike testType,
4002 Context& context,
4003 VkFormat format,
4004 const vkt::subgroups::SSBOData* inputs,
4005 deUint32 inputsCount,
4006 const void* internalData,
4007 vkt::subgroups::CheckResultCompute checkResult,
4008 const deUint32 pipelineShaderStageCreateFlags,
4009 const deUint32 numWorkgroups[3],
4010 const deBool isRequiredSubgroupSize,
4011 const deUint32 subgroupSize,
4012 const deUint32 localSizesToTest[][3],
4013 const deUint32 localSizesToTestCount)
4014 {
4015 const DeviceInterface& vk = context.getDeviceInterface();
4016 const VkDevice device = context.getDevice();
4017 const VkQueue queue = context.getUniversalQueue();
4018 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
4019 #ifndef CTS_USES_VULKANSC
4020 const VkPhysicalDeviceSubgroupSizeControlProperties& subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
4021 #else
4022 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties = context.getSubgroupSizeControlPropertiesEXT();
4023 #endif // CTS_USES_VULKANSC
4024 const VkDeviceSize elementSize = getFormatSizeInBytes(format);
4025 const VkDeviceSize maxSubgroupSize = isRequiredSubgroupSize
4026 ? deMax32(subgroupSizeControlProperties.maxSubgroupSize, vkt::subgroups::maxSupportedSubgroupSize())
4027 : vkt::subgroups::maxSupportedSubgroupSize();
4028 const VkDeviceSize resultBufferSize = maxSubgroupSize * maxSubgroupSize * maxSubgroupSize;
4029 const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
4030 Buffer resultBuffer (context, resultBufferSizeInBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4031 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers (inputsCount);
4032 const auto shaderStageFlags = ((testType == ComputeLike::COMPUTE)
4033 ? VK_SHADER_STAGE_COMPUTE_BIT
4034 #ifndef CTS_USES_VULKANSC
4035 : (VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT));
4036 #else
4037 : 0);
4038 #endif // CTS_USES_VULKANSC
4039 const auto pipelineBindPoint = ((testType == ComputeLike::COMPUTE)
4040 ? VK_PIPELINE_BIND_POINT_COMPUTE
4041 : VK_PIPELINE_BIND_POINT_GRAPHICS);
4042 const auto pipelineStage = ((testType == ComputeLike::COMPUTE)
4043 ? VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
4044 #ifndef CTS_USES_VULKANSC
4045 : (VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT));
4046 #else
4047 : 0);
4048 #endif // CTS_USES_VULKANSC
4049 const auto renderArea = makeRect2D(1u, 1u);
4050
4051 std::vector<tcu::UVec3> usedLocalSizes;
4052 for (deUint32 i = 0; i < localSizesToTestCount; ++i)
4053 {
4054 usedLocalSizes.push_back(tcu::UVec3(localSizesToTest[i][0], localSizesToTest[i][1], localSizesToTest[i][2]));
4055 }
4056
4057 for (deUint32 i = 0; i < inputsCount; i++)
4058 {
4059 if (inputs[i].isImage())
4060 {
4061 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
4062 }
4063 else
4064 {
4065 const auto usage = (inputs[i].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4066 const auto size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4067 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4068 }
4069
4070 const Allocation& alloc = inputBuffers[i]->getAllocation();
4071
4072 initializeMemory(context, alloc, inputs[i]);
4073 }
4074
4075 DescriptorSetLayoutBuilder layoutBuilder;
4076 layoutBuilder.addBinding(
4077 resultBuffer.getType(), 1, shaderStageFlags, DE_NULL);
4078
4079 for (deUint32 i = 0; i < inputsCount; i++)
4080 {
4081 layoutBuilder.addBinding(
4082 inputBuffers[i]->getType(), 1, shaderStageFlags, DE_NULL);
4083 }
4084
4085 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
4086 layoutBuilder.build(vk, device));
4087
4088 Move<VkShaderModule> compShader;
4089 Move<VkShaderModule> meshShader;
4090 Move<VkShaderModule> taskShader;
4091 const auto& binaries = context.getBinaryCollection();
4092
4093 if (testType == ComputeLike::COMPUTE)
4094 {
4095 compShader = createShaderModule(vk, device, binaries.get("comp"));
4096 }
4097 else if (testType == ComputeLike::MESH)
4098 {
4099 meshShader = createShaderModule(vk, device, binaries.get("mesh"));
4100 if (binaries.contains("task"))
4101 taskShader = createShaderModule(vk, device, binaries.get("task"));
4102 }
4103 else
4104 {
4105 DE_ASSERT(false);
4106 }
4107
4108 const Unique<VkPipelineLayout> pipelineLayout(
4109 makePipelineLayout(vk, device, *descriptorSetLayout));
4110
4111 DescriptorPoolBuilder poolBuilder;
4112
4113 poolBuilder.addType(resultBuffer.getType());
4114
4115 for (deUint32 i = 0; i < inputsCount; i++)
4116 {
4117 poolBuilder.addType(inputBuffers[i]->getType());
4118 }
4119
4120 const Unique<VkDescriptorPool> descriptorPool (poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
4121 const Unique<VkDescriptorSet> descriptorSet (makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
4122 const VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
4123 DescriptorSetUpdateBuilder updateBuilder;
4124
4125 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
4126
4127 for (deUint32 i = 0; i < inputsCount; i++)
4128 {
4129 if (inputBuffers[i]->isImage())
4130 {
4131 const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4132
4133 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
4134 }
4135 else
4136 {
4137 vk::VkDeviceSize size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4138 VkDescriptorBufferInfo info = makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
4139
4140 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
4141 }
4142 }
4143
4144 updateBuilder.update(vk, device);
4145
4146 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
4147 unsigned totalIterations = 0;
4148 unsigned failedIterations = 0;
4149 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
4150 std::vector<de::SharedPtr<Move<VkPipeline>>> pipelines (localSizesToTestCount);
4151 const auto reqSubgroupSize = (isRequiredSubgroupSize ? subgroupSize : 0u);
4152 Move<VkRenderPass> renderPass;
4153 Move<VkFramebuffer> framebuffer;
4154
4155 if (testType == ComputeLike::MESH)
4156 {
4157 renderPass = makeRenderPass(vk, device);
4158 framebuffer = makeFramebuffer(vk, device, renderPass.get(), 0u, nullptr, renderArea.extent.width, renderArea.extent.height);
4159 }
4160
4161 context.getTestContext().touchWatchdog();
4162 {
4163 if (testType == ComputeLike::COMPUTE)
4164 {
4165 pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
4166 *pipelineLayout,
4167 *compShader,
4168 pipelineShaderStageCreateFlags,
4169 #ifndef CTS_USES_VULKANSC
4170 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4171 #else
4172 0u,
4173 #endif // CTS_USES_VULKANSC
4174 (VkPipeline) DE_NULL,
4175 usedLocalSizes[0][0],
4176 usedLocalSizes[0][1],
4177 usedLocalSizes[0][2],
4178 reqSubgroupSize)));
4179 }
4180 #ifndef CTS_USES_VULKANSC
4181 else if (testType == ComputeLike::MESH)
4182 {
4183 pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(context,
4184 pipelineLayout.get(),
4185 taskShader.get(),
4186 meshShader.get(),
4187 pipelineShaderStageCreateFlags,
4188 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4189 DE_NULL,
4190 usedLocalSizes[0][0],
4191 usedLocalSizes[0][1],
4192 usedLocalSizes[0][2],
4193 reqSubgroupSize,
4194 renderPass.get())));
4195 }
4196 #endif // CTS_USES_VULKANSC
4197 else
4198 {
4199 DE_ASSERT(false);
4200 }
4201 }
4202 context.getTestContext().touchWatchdog();
4203
4204 for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
4205 {
4206 const deUint32 nextX = usedLocalSizes[index][0];
4207 const deUint32 nextY = usedLocalSizes[index][1];
4208 const deUint32 nextZ = usedLocalSizes[index][2];
4209
4210 context.getTestContext().touchWatchdog();
4211 {
4212 if (testType == ComputeLike::COMPUTE)
4213 {
4214 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
4215 *pipelineLayout,
4216 *compShader,
4217 pipelineShaderStageCreateFlags,
4218 #ifndef CTS_USES_VULKANSC
4219 VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4220 #else
4221 0u,
4222 #endif // CTS_USES_VULKANSC
4223 **pipelines[0],
4224 nextX,
4225 nextY,
4226 nextZ,
4227 reqSubgroupSize)));
4228 }
4229 #ifndef CTS_USES_VULKANSC
4230 else if (testType == ComputeLike::MESH)
4231 {
4232 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(context,
4233 pipelineLayout.get(),
4234 taskShader.get(),
4235 meshShader.get(),
4236 pipelineShaderStageCreateFlags,
4237 VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4238 pipelines[0].get()->get(),
4239 nextX,
4240 nextY,
4241 nextZ,
4242 reqSubgroupSize,
4243 renderPass.get())));
4244 }
4245 #endif // CTS_USES_VULKANSC
4246 else
4247 {
4248 DE_ASSERT(false);
4249 }
4250 }
4251 context.getTestContext().touchWatchdog();
4252 }
4253
4254 for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
4255 {
4256 // we are running one test
4257 totalIterations++;
4258
4259 beginCommandBuffer(vk, *cmdBuffer);
4260 {
4261 if (testType == ComputeLike::MESH)
4262 beginRenderPass(vk, *cmdBuffer, renderPass.get(), framebuffer.get(), renderArea);
4263
4264 vk.cmdBindPipeline(*cmdBuffer, pipelineBindPoint, **pipelines[index]);
4265
4266 vk.cmdBindDescriptorSets(*cmdBuffer, pipelineBindPoint, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
4267
4268 if (testType == ComputeLike::COMPUTE)
4269 vk.cmdDispatch(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4270 #ifndef CTS_USES_VULKANSC
4271 else if (testType == ComputeLike::MESH)
4272 vk.cmdDrawMeshTasksEXT(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4273 //vk.cmdDrawMeshTasksNV(*cmdBuffer, numWorkgroups[0], 0);
4274 #endif // CTS_USES_VULKANSC
4275 else
4276 DE_ASSERT(false);
4277
4278 if (testType == ComputeLike::MESH)
4279 endRenderPass(vk, *cmdBuffer);
4280 }
4281
4282 // Make shader writes available.
4283 const auto postShaderBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4284 vk.cmdPipelineBarrier(*cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postShaderBarrier, 0u, nullptr, 0u, nullptr);
4285
4286 endCommandBuffer(vk, *cmdBuffer);
4287
4288 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4289
4290 std::vector<const void*> datas;
4291
4292 {
4293 const Allocation& resultAlloc = resultBuffer.getAllocation();
4294 invalidateAlloc(vk, device, resultAlloc);
4295
4296 // we always have our result data first
4297 datas.push_back(resultAlloc.getHostPtr());
4298 }
4299
4300 for (deUint32 i = 0; i < inputsCount; i++)
4301 {
4302 if (!inputBuffers[i]->isImage())
4303 {
4304 const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
4305 invalidateAlloc(vk, device, resultAlloc);
4306
4307 // we always have our result data first
4308 datas.push_back(resultAlloc.getHostPtr());
4309 }
4310 }
4311
4312 if (!checkResult(internalData, datas, numWorkgroups, usedLocalSizes[index].getPtr(), subgroupSize))
4313 {
4314 failedIterations++;
4315 }
4316 else
4317 {
4318 failedIterations = failedIterations + 0;
4319 }
4320
4321 context.resetCommandPoolForVKSC(device, *cmdPool);
4322 }
4323
4324 if (0 < failedIterations)
4325 {
4326 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4327
4328 context.getTestContext().getLog()
4329 << TestLog::Message << valuesPassed << " / "
4330 << totalIterations << " values passed" << TestLog::EndMessage;
4331
4332 return tcu::TestStatus::fail("Failed!");
4333 }
4334
4335 return tcu::TestStatus::pass("OK");
4336 }
4337
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4338 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize (Context& context,
4339 VkFormat format,
4340 const SSBOData* inputs,
4341 deUint32 inputsCount,
4342 const void* internalData,
4343 CheckResultCompute checkResult,
4344 const deUint32 pipelineShaderStageCreateFlags,
4345 const deUint32 numWorkgroups[3],
4346 const deBool isRequiredSubgroupSize,
4347 const deUint32 subgroupSize,
4348 const deUint32 localSizesToTest[][3],
4349 const deUint32 localSizesToTestCount)
4350 {
4351 return makeComputeOrMeshTestRequiredSubgroupSize(
4352 ComputeLike::COMPUTE,
4353 context,
4354 format,
4355 inputs,
4356 inputsCount,
4357 internalData,
4358 checkResult,
4359 pipelineShaderStageCreateFlags,
4360 numWorkgroups,
4361 isRequiredSubgroupSize,
4362 subgroupSize,
4363 localSizesToTest,
4364 localSizesToTestCount);
4365 }
4366
makeMeshTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)4367 tcu::TestStatus vkt::subgroups::makeMeshTestRequiredSubgroupSize (Context& context,
4368 VkFormat format,
4369 const SSBOData* inputs,
4370 deUint32 inputsCount,
4371 const void* internalData,
4372 CheckResultCompute checkResult,
4373 const deUint32 pipelineShaderStageCreateFlags,
4374 const deUint32 numWorkgroups[3],
4375 const deBool isRequiredSubgroupSize,
4376 const deUint32 subgroupSize,
4377 const deUint32 localSizesToTest[][3],
4378 const deUint32 localSizesToTestCount)
4379 {
4380 return makeComputeOrMeshTestRequiredSubgroupSize(
4381 ComputeLike::MESH,
4382 context,
4383 format,
4384 inputs,
4385 inputsCount,
4386 internalData,
4387 checkResult,
4388 pipelineShaderStageCreateFlags,
4389 numWorkgroups,
4390 isRequiredSubgroupSize,
4391 subgroupSize,
4392 localSizesToTest,
4393 localSizesToTestCount);
4394 }
4395
makeComputeOrMeshTest(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,deUint32 inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4396 tcu::TestStatus makeComputeOrMeshTest (ComputeLike testType,
4397 Context& context,
4398 VkFormat format,
4399 const vkt::subgroups::SSBOData* inputs,
4400 deUint32 inputsCount,
4401 const void* internalData,
4402 vkt::subgroups::CheckResultCompute checkResult,
4403 deUint32 requiredSubgroupSize,
4404 const deUint32 pipelineShaderStageCreateFlags)
4405 {
4406 const uint32_t numWorkgroups[3] = {4, 2, 2};
4407 const bool isRequiredSubgroupSize = (requiredSubgroupSize != 0u);
4408 const uint32_t subgroupSize = (isRequiredSubgroupSize ? requiredSubgroupSize : vkt::subgroups::getSubgroupSize(context));
4409
4410 const deUint32 localSizesToTestCount = 8;
4411 deUint32 localSizesToTest[localSizesToTestCount][3] =
4412 {
4413 {1, 1, 1},
4414 {subgroupSize, 1, 1},
4415 {1, subgroupSize, 1},
4416 {1, 1, subgroupSize},
4417 {32, 4, 1},
4418 {1, 4, 32},
4419 {3, 5, 7},
4420 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
4421 };
4422
4423 if (testType == ComputeLike::COMPUTE)
4424 return makeComputeTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4425 numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4426 else
4427 return makeMeshTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4428 numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4429 }
4430
makeComputeTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4431 tcu::TestStatus vkt::subgroups::makeComputeTest (Context& context,
4432 VkFormat format,
4433 const SSBOData* inputs,
4434 deUint32 inputsCount,
4435 const void* internalData,
4436 CheckResultCompute checkResult,
4437 deUint32 requiredSubgroupSize,
4438 const deUint32 pipelineShaderStageCreateFlags)
4439 {
4440 return makeComputeOrMeshTest(ComputeLike::COMPUTE, context, format, inputs, inputsCount, internalData, checkResult, requiredSubgroupSize, pipelineShaderStageCreateFlags);
4441 }
4442
makeMeshTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4443 tcu::TestStatus vkt::subgroups::makeMeshTest (Context& context,
4444 VkFormat format,
4445 const SSBOData* inputs,
4446 deUint32 inputsCount,
4447 const void* internalData,
4448 CheckResultCompute checkResult,
4449 deUint32 requiredSubgroupSize,
4450 const deUint32 pipelineShaderStageCreateFlags)
4451 {
4452 return makeComputeOrMeshTest(ComputeLike::MESH, context, format, inputs, inputsCount, internalData, checkResult, requiredSubgroupSize, pipelineShaderStageCreateFlags);
4453 }
4454
checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)4455 static inline void checkShaderStageSetValidity (const VkShaderStageFlags shaderStages)
4456 {
4457 if (shaderStages == 0)
4458 TCU_THROW(InternalError, "Shader stage is not specified");
4459
4460 // It can actually be only 1 or 0.
4461 const deUint32 exclusivePipelinesCount = (isAllComputeStages(shaderStages) ? 1 : 0)
4462 + (isAllGraphicsStages(shaderStages) ? 1 : 0)
4463 #ifndef CTS_USES_VULKANSC
4464 + (isAllRayTracingStages(shaderStages) ? 1 : 0)
4465 + (isAllMeshShadingStages(shaderStages) ? 1 : 0)
4466 #endif // CTS_USES_VULKANSC
4467 ;
4468
4469 if (exclusivePipelinesCount != 1)
4470 TCU_THROW(InternalError, "Mix of shaders from different pipelines is detected");
4471 }
4472
supportedCheckShader(Context & context,const VkShaderStageFlags shaderStages)4473 void vkt::subgroups::supportedCheckShader (Context& context, const VkShaderStageFlags shaderStages)
4474 {
4475 checkShaderStageSetValidity(shaderStages);
4476
4477 if ((shaderStages & VK_SHADER_STAGE_GEOMETRY_BIT) != 0)
4478 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_GEOMETRY_SHADER);
4479
4480 if ((context.getSubgroupProperties().supportedStages & shaderStages) == 0)
4481 {
4482 if (isAllComputeStages(shaderStages))
4483 TCU_FAIL("Compute shader is required to support subgroup operations");
4484 else
4485 TCU_THROW(NotSupportedError, "Subgroup support is not available for test shader stage(s)");
4486 }
4487
4488 #ifndef CTS_USES_VULKANSC
4489 if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & shaderStages &&
4490 context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4491 !context.getPortabilitySubsetFeatures().tessellationIsolines)
4492 {
4493 TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4494 }
4495 #endif // CTS_USES_VULKANSC
4496 }
4497
4498
4499 namespace vkt
4500 {
4501 namespace subgroups
4502 {
4503 typedef std::vector< de::SharedPtr<BufferOrImage> > vectorBufferOrImage;
4504
4505 enum ShaderGroups
4506 {
4507 FIRST_GROUP = 0,
4508 RAYGEN_GROUP = FIRST_GROUP,
4509 MISS_GROUP,
4510 HIT_GROUP,
4511 CALL_GROUP,
4512 GROUP_COUNT
4513 };
4514
getAllRayTracingFormats()4515 const std::vector<vk::VkFormat> getAllRayTracingFormats()
4516 {
4517 std::vector<VkFormat> formats;
4518
4519 formats.push_back(VK_FORMAT_R8G8B8_SINT);
4520 formats.push_back(VK_FORMAT_R8_UINT);
4521 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
4522 formats.push_back(VK_FORMAT_R16G16B16_SINT);
4523 formats.push_back(VK_FORMAT_R16_UINT);
4524 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
4525 formats.push_back(VK_FORMAT_R32G32B32_SINT);
4526 formats.push_back(VK_FORMAT_R32_UINT);
4527 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
4528 formats.push_back(VK_FORMAT_R64G64B64_SINT);
4529 formats.push_back(VK_FORMAT_R64_UINT);
4530 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
4531 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
4532 formats.push_back(VK_FORMAT_R32_SFLOAT);
4533 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
4534 formats.push_back(VK_FORMAT_R64_SFLOAT);
4535 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
4536 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
4537 formats.push_back(VK_FORMAT_R8_USCALED);
4538 formats.push_back(VK_FORMAT_R8G8_USCALED);
4539 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
4540 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
4541
4542 return formats;
4543 }
4544
addRayTracingNoSubgroupShader(SourceCollections & programCollection)4545 void addRayTracingNoSubgroupShader (SourceCollections& programCollection)
4546 {
4547 const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
4548
4549 const std::string rgenShaderNoSubgroups =
4550 "#version 460 core\n"
4551 "#extension GL_EXT_ray_tracing: require\n"
4552 "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
4553 "layout(location = 0) callableDataEXT uvec4 callData;"
4554 "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
4555 "\n"
4556 "void main()\n"
4557 "{\n"
4558 " uint rayFlags = 0;\n"
4559 " uint cullMask = 0xFF;\n"
4560 " float tmin = 0.0;\n"
4561 " float tmax = 9.0;\n"
4562 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
4563 " vec3 directHit = vec3(0.0, 0.0, -1.0);\n"
4564 " vec3 directMiss = vec3(0.0, 0.0, +1.0);\n"
4565 "\n"
4566 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
4567 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
4568 " executeCallableEXT(0, 0);"
4569 "}\n";
4570 const std::string hitShaderNoSubgroups =
4571 "#version 460 core\n"
4572 "#extension GL_EXT_ray_tracing: require\n"
4573 "hitAttributeEXT vec3 attribs;\n"
4574 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4575 "\n"
4576 "void main()\n"
4577 "{\n"
4578 "}\n";
4579 const std::string missShaderNoSubgroups =
4580 "#version 460 core\n"
4581 "#extension GL_EXT_ray_tracing: require\n"
4582 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4583 "\n"
4584 "void main()\n"
4585 "{\n"
4586 "}\n";
4587 const std::string sectShaderNoSubgroups =
4588 "#version 460 core\n"
4589 "#extension GL_EXT_ray_tracing: require\n"
4590 "hitAttributeEXT vec3 hitAttribute;\n"
4591 "\n"
4592 "void main()\n"
4593 "{\n"
4594 " reportIntersectionEXT(0.75f, 0x7Eu);\n"
4595 "}\n";
4596 const std::string callShaderNoSubgroups =
4597 "#version 460 core\n"
4598 "#extension GL_EXT_ray_tracing: require\n"
4599 "layout(location = 0) callableDataInEXT float callData;\n"
4600 "\n"
4601 "void main()\n"
4602 "{\n"
4603 "}\n";
4604
4605 programCollection.glslSources.add("rgen_noSubgroup") << glu::RaygenSource (rgenShaderNoSubgroups) << buildOptions;
4606 programCollection.glslSources.add("ahit_noSubgroup") << glu::AnyHitSource (hitShaderNoSubgroups) << buildOptions;
4607 programCollection.glslSources.add("chit_noSubgroup") << glu::ClosestHitSource (hitShaderNoSubgroups) << buildOptions;
4608 programCollection.glslSources.add("miss_noSubgroup") << glu::MissSource (missShaderNoSubgroups) << buildOptions;
4609 programCollection.glslSources.add("sect_noSubgroup") << glu::IntersectionSource (sectShaderNoSubgroups) << buildOptions;
4610 programCollection.glslSources.add("call_noSubgroup") << glu::CallableSource (callShaderNoSubgroups) << buildOptions;
4611 }
4612
4613 #ifndef CTS_USES_VULKANSC
4614
enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)4615 static vector<VkShaderStageFlagBits> enumerateRayTracingShaderStages (const VkShaderStageFlags shaderStage)
4616 {
4617 vector<VkShaderStageFlagBits> result;
4618 const VkShaderStageFlagBits shaderStageFlags[] =
4619 {
4620 VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4621 VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4622 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4623 VK_SHADER_STAGE_MISS_BIT_KHR,
4624 VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4625 VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4626 };
4627
4628 for (auto shaderStageFlag: shaderStageFlags)
4629 {
4630 if (0 != (shaderStage & shaderStageFlag))
4631 result.push_back(shaderStageFlag);
4632 }
4633
4634 return result;
4635 }
4636
getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)4637 static deUint32 getRayTracingResultBinding (const VkShaderStageFlagBits shaderStage)
4638 {
4639 const VkShaderStageFlags shaderStageFlags[] =
4640 {
4641 VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4642 VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4643 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4644 VK_SHADER_STAGE_MISS_BIT_KHR,
4645 VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4646 VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4647 };
4648
4649 for (deUint32 shaderStageNdx = 0; shaderStageNdx < DE_LENGTH_OF_ARRAY(shaderStageFlags); ++shaderStageNdx)
4650 {
4651 if (0 != (shaderStage & shaderStageFlags[shaderStageNdx]))
4652 {
4653 DE_ASSERT(0 == (shaderStage & (~shaderStageFlags[shaderStageNdx])));
4654
4655 return shaderStageNdx;
4656 }
4657 }
4658
4659 TCU_THROW(InternalError, "Non-raytracing stage specified or no stage at all");
4660 }
4661
makeRayTracingInputBuffers(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector)4662 static vectorBufferOrImage makeRayTracingInputBuffers (Context& context,
4663 VkFormat format,
4664 const SSBOData* extraDatas,
4665 deUint32 extraDatasCount,
4666 const vector<VkShaderStageFlagBits>& stagesVector)
4667 {
4668 const size_t stagesCount = stagesVector.size();
4669 const VkDeviceSize shaderSize = getMaxWidth();
4670 const VkDeviceSize inputBufferSize = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
4671 vectorBufferOrImage inputBuffers (stagesCount + extraDatasCount);
4672
4673 // The implicit result SSBO we use to store our outputs from the shader
4674 for (size_t stageNdx = 0u; stageNdx < stagesCount; ++stageNdx)
4675 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, inputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
4676
4677 for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4678 {
4679 const size_t datasNdx = stageNdx - stagesCount;
4680
4681 if (extraDatas[datasNdx].isImage())
4682 {
4683 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
4684 }
4685 else
4686 {
4687 const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4688 const auto size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
4689 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4690 }
4691
4692 initializeMemory(context, inputBuffers[stageNdx]->getAllocation(), extraDatas[datasNdx]);
4693 }
4694
4695 return inputBuffers;
4696 }
4697
makeRayTracingDescriptorSetLayout(Context & context,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4698 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayout (Context& context,
4699 const SSBOData* extraDatas,
4700 deUint32 extraDatasCount,
4701 const vector<VkShaderStageFlagBits>& stagesVector,
4702 const vectorBufferOrImage& inputBuffers)
4703 {
4704 const DeviceInterface& vkd = context.getDeviceInterface();
4705 const VkDevice device = context.getDevice();
4706 const size_t stagesCount = stagesVector.size();
4707 DescriptorSetLayoutBuilder layoutBuilder;
4708
4709 // The implicit result SSBO we use to store our outputs from the shader
4710 for (size_t stageNdx = 0u; stageNdx < stagesVector.size(); ++stageNdx)
4711 {
4712 const deUint32 stageBinding = getRayTracingResultBinding(stagesVector[stageNdx]);
4713
4714 layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, stagesVector[stageNdx], stageBinding, DE_NULL);
4715 }
4716
4717 for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4718 {
4719 const size_t datasNdx = stageNdx - stagesCount;
4720
4721 layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
4722 }
4723
4724 return layoutBuilder.build(vkd, device);
4725 }
4726
makeRayTracingDescriptorSetLayoutAS(Context & context)4727 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayoutAS (Context& context)
4728 {
4729 const DeviceInterface& vkd = context.getDeviceInterface();
4730 const VkDevice device = context.getDevice();
4731 DescriptorSetLayoutBuilder layoutBuilder;
4732
4733 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
4734
4735 return layoutBuilder.build(vkd, device);
4736 }
4737
makeRayTracingDescriptorPool(Context & context,const vectorBufferOrImage & inputBuffers)4738 static Move<VkDescriptorPool> makeRayTracingDescriptorPool (Context& context,
4739 const vectorBufferOrImage& inputBuffers)
4740 {
4741 const DeviceInterface& vkd = context.getDeviceInterface();
4742 const VkDevice device = context.getDevice();
4743 const deUint32 maxDescriptorSets = 2u;
4744 DescriptorPoolBuilder poolBuilder;
4745 Move<VkDescriptorPool> result;
4746
4747 if (inputBuffers.size() > 0)
4748 {
4749 for (size_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
4750 poolBuilder.addType(inputBuffers[ndx]->getType());
4751 }
4752
4753 poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
4754
4755 result = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, maxDescriptorSets);
4756
4757 return result;
4758 }
4759
makeRayTracingDescriptorSet(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4760 static Move<VkDescriptorSet> makeRayTracingDescriptorSet (Context& context,
4761 VkDescriptorPool descriptorPool,
4762 VkDescriptorSetLayout descriptorSetLayout,
4763 const SSBOData* extraDatas,
4764 deUint32 extraDatasCount,
4765 const vector<VkShaderStageFlagBits>& stagesVector,
4766 const vectorBufferOrImage& inputBuffers)
4767 {
4768 const DeviceInterface& vkd = context.getDeviceInterface();
4769 const VkDevice device = context.getDevice();
4770 const size_t stagesCount = stagesVector.size();
4771 Move<VkDescriptorSet> descriptorSet;
4772
4773 if (inputBuffers.size() > 0)
4774 {
4775 DescriptorSetUpdateBuilder updateBuilder;
4776
4777 // Create descriptor set
4778 descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4779
4780 for (size_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
4781 {
4782 const deUint32 binding = (ndx < stagesCount)
4783 ? getRayTracingResultBinding(stagesVector[ndx])
4784 : extraDatas[ndx - stagesCount].binding;
4785
4786 if (inputBuffers[ndx]->isImage())
4787 {
4788 const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(), inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4789
4790 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4791 }
4792 else
4793 {
4794 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
4795
4796 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4797 }
4798 }
4799
4800 updateBuilder.update(vkd, device);
4801 }
4802
4803 return descriptorSet;
4804 }
4805
makeRayTracingDescriptorSetAS(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,de::MovePtr<TopLevelAccelerationStructure> & topLevelAccelerationStructure)4806 static Move<VkDescriptorSet> makeRayTracingDescriptorSetAS (Context& context,
4807 VkDescriptorPool descriptorPool,
4808 VkDescriptorSetLayout descriptorSetLayout,
4809 de::MovePtr<TopLevelAccelerationStructure>& topLevelAccelerationStructure)
4810 {
4811 const DeviceInterface& vkd = context.getDeviceInterface();
4812 const VkDevice device = context.getDevice();
4813 const TopLevelAccelerationStructure* topLevelAccelerationStructurePtr = topLevelAccelerationStructure.get();
4814 const VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet =
4815 {
4816 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // VkStructureType sType;
4817 DE_NULL, // const void* pNext;
4818 1u, // deUint32 accelerationStructureCount;
4819 topLevelAccelerationStructurePtr->getPtr(), // const VkAccelerationStructureKHR* pAccelerationStructures;
4820 };
4821 Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4822
4823 DescriptorSetUpdateBuilder()
4824 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
4825 .update(vkd, device);
4826
4827 return descriptorSet;
4828 }
4829
makeRayTracingPipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout0,const VkDescriptorSetLayout descriptorSetLayout1)4830 static Move<VkPipelineLayout> makeRayTracingPipelineLayout (Context& context,
4831 const VkDescriptorSetLayout descriptorSetLayout0,
4832 const VkDescriptorSetLayout descriptorSetLayout1)
4833 {
4834 const DeviceInterface& vkd = context.getDeviceInterface();
4835 const VkDevice device = context.getDevice();
4836 const std::vector<VkDescriptorSetLayout> descriptorSetLayouts { descriptorSetLayout0, descriptorSetLayout1 };
4837 const deUint32 descriptorSetLayoutsSize = static_cast<deUint32>(descriptorSetLayouts.size());
4838
4839 return makePipelineLayout(vkd, device, descriptorSetLayoutsSize, descriptorSetLayouts.data());
4840 }
4841
createTopAccelerationStructure(Context & context,de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)4842 static de::MovePtr<TopLevelAccelerationStructure> createTopAccelerationStructure (Context& context,
4843 de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)
4844 {
4845 const DeviceInterface& vkd = context.getDeviceInterface();
4846 const VkDevice device = context.getDevice();
4847 Allocator& allocator = context.getDefaultAllocator();
4848 de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
4849
4850 result->setInstanceCount(1);
4851 result->addInstance(bottomLevelAccelerationStructure);
4852 result->create(vkd, device, allocator);
4853
4854 return result;
4855 }
4856
createBottomAccelerationStructure(Context & context)4857 static de::SharedPtr<BottomLevelAccelerationStructure> createBottomAccelerationStructure (Context& context)
4858 {
4859 const DeviceInterface& vkd = context.getDeviceInterface();
4860 const VkDevice device = context.getDevice();
4861 Allocator& allocator = context.getDefaultAllocator();
4862 de::MovePtr<BottomLevelAccelerationStructure> result = makeBottomLevelAccelerationStructure();
4863 const std::vector<tcu::Vec3> geometryData { tcu::Vec3(-1.0f, -1.0f, -2.0f), tcu::Vec3(+1.0f, +1.0f, -1.0f) };
4864
4865 result->setGeometryCount(1u);
4866 result->addGeometry(geometryData, false);
4867 result->create(vkd, device, allocator, 0u);
4868
4869 return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
4870 }
4871
makeRayTracingPipeline(Context & context,const VkShaderStageFlags shaderStageTested,const VkPipelineLayout pipelineLayout,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6],Move<VkPipeline> & pipelineOut)4872 static de::MovePtr<RayTracingPipeline> makeRayTracingPipeline (Context& context,
4873 const VkShaderStageFlags shaderStageTested,
4874 const VkPipelineLayout pipelineLayout,
4875 const deUint32 shaderStageCreateFlags[6],
4876 const deUint32 requiredSubgroupSize[6],
4877 Move<VkPipeline>& pipelineOut)
4878 {
4879 const DeviceInterface& vkd = context.getDeviceInterface();
4880 const VkDevice device = context.getDevice();
4881 BinaryCollection& collection = context.getBinaryCollection();
4882 const char* shaderRgenName = (0 != (shaderStageTested & VK_SHADER_STAGE_RAYGEN_BIT_KHR)) ? "rgen" : "rgen_noSubgroup";
4883 const char* shaderAhitName = (0 != (shaderStageTested & VK_SHADER_STAGE_ANY_HIT_BIT_KHR)) ? "ahit" : "ahit_noSubgroup";
4884 const char* shaderChitName = (0 != (shaderStageTested & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)) ? "chit" : "chit_noSubgroup";
4885 const char* shaderMissName = (0 != (shaderStageTested & VK_SHADER_STAGE_MISS_BIT_KHR)) ? "miss" : "miss_noSubgroup";
4886 const char* shaderSectName = (0 != (shaderStageTested & VK_SHADER_STAGE_INTERSECTION_BIT_KHR)) ? "sect" : "sect_noSubgroup";
4887 const char* shaderCallName = (0 != (shaderStageTested & VK_SHADER_STAGE_CALLABLE_BIT_KHR)) ? "call" : "call_noSubgroup";
4888 const VkShaderModuleCreateFlags noShaderModuleCreateFlags = static_cast<VkShaderModuleCreateFlags>(0);
4889 Move<VkShaderModule> rgenShaderModule = createShaderModule(vkd, device, collection.get(shaderRgenName), noShaderModuleCreateFlags);
4890 Move<VkShaderModule> ahitShaderModule = createShaderModule(vkd, device, collection.get(shaderAhitName), noShaderModuleCreateFlags);
4891 Move<VkShaderModule> chitShaderModule = createShaderModule(vkd, device, collection.get(shaderChitName), noShaderModuleCreateFlags);
4892 Move<VkShaderModule> missShaderModule = createShaderModule(vkd, device, collection.get(shaderMissName), noShaderModuleCreateFlags);
4893 Move<VkShaderModule> sectShaderModule = createShaderModule(vkd, device, collection.get(shaderSectName), noShaderModuleCreateFlags);
4894 Move<VkShaderModule> callShaderModule = createShaderModule(vkd, device, collection.get(shaderCallName), noShaderModuleCreateFlags);
4895 const VkPipelineShaderStageCreateFlags noPipelineShaderStageCreateFlags = static_cast<VkPipelineShaderStageCreateFlags>(0);
4896 const VkPipelineShaderStageCreateFlags rgenPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[0];
4897 const VkPipelineShaderStageCreateFlags ahitPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[1];
4898 const VkPipelineShaderStageCreateFlags chitPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[2];
4899 const VkPipelineShaderStageCreateFlags missPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[3];
4900 const VkPipelineShaderStageCreateFlags sectPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[4];
4901 const VkPipelineShaderStageCreateFlags callPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[5];
4902 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[6] =
4903 {
4904 {
4905 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4906 DE_NULL,
4907 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
4908 },
4909 {
4910 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4911 DE_NULL,
4912 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
4913 },
4914 {
4915 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4916 DE_NULL,
4917 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
4918 },
4919 {
4920 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4921 DE_NULL,
4922 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
4923 },
4924 {
4925 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4926 DE_NULL,
4927 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
4928 },
4929 {
4930 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4931 DE_NULL,
4932 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[5] : 0u,
4933 },
4934 };
4935 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* rgenRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[0];
4936 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* ahitRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[1];
4937 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* chitRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[2];
4938 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* missRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[3];
4939 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* sectRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[4];
4940 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* callRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[5].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[5];
4941 de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
4942
4943 rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR , rgenShaderModule, RAYGEN_GROUP, DE_NULL, rgenPipelineShaderStageCreateFlags, rgenRequiredSubgroupSizeCreateInfo);
4944 rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR , ahitShaderModule, HIT_GROUP, DE_NULL, ahitPipelineShaderStageCreateFlags, ahitRequiredSubgroupSizeCreateInfo);
4945 rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR , chitShaderModule, HIT_GROUP, DE_NULL, chitPipelineShaderStageCreateFlags, chitRequiredSubgroupSizeCreateInfo);
4946 rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR , missShaderModule, MISS_GROUP, DE_NULL, missPipelineShaderStageCreateFlags, missRequiredSubgroupSizeCreateInfo);
4947 rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR , sectShaderModule, HIT_GROUP, DE_NULL, sectPipelineShaderStageCreateFlags, sectRequiredSubgroupSizeCreateInfo);
4948 rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR , callShaderModule, CALL_GROUP, DE_NULL, callPipelineShaderStageCreateFlags, callRequiredSubgroupSizeCreateInfo);
4949
4950 // Must execute createPipeline here, due to pNext pointers in calls to addShader are local
4951 pipelineOut = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
4952
4953 return rayTracingPipeline;
4954 }
4955
getPossibleRayTracingSubgroupStages(Context & context,const VkShaderStageFlags testedStages)4956 VkShaderStageFlags getPossibleRayTracingSubgroupStages (Context& context, const VkShaderStageFlags testedStages)
4957 {
4958 const VkPhysicalDeviceSubgroupProperties& subgroupProperties = context.getSubgroupProperties();
4959 const VkShaderStageFlags stages = testedStages & subgroupProperties.supportedStages;
4960
4961 DE_ASSERT(isAllRayTracingStages(testedStages));
4962
4963 return stages;
4964 }
4965
allRayTracingStages(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStage)4966 tcu::TestStatus allRayTracingStages (Context& context,
4967 VkFormat format,
4968 const SSBOData* extraDatas,
4969 deUint32 extraDataCount,
4970 const void* internalData,
4971 const VerificationFunctor& checkResult,
4972 const VkShaderStageFlags shaderStage)
4973 {
4974 return vkt::subgroups::allRayTracingStagesRequiredSubgroupSize(context,
4975 format,
4976 extraDatas,
4977 extraDataCount,
4978 internalData,
4979 checkResult,
4980 shaderStage,
4981 DE_NULL,
4982 DE_NULL);
4983 }
4984
allRayTracingStagesRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6])4985 tcu::TestStatus allRayTracingStagesRequiredSubgroupSize (Context& context,
4986 VkFormat format,
4987 const SSBOData* extraDatas,
4988 deUint32 extraDatasCount,
4989 const void* internalData,
4990 const VerificationFunctor& checkResult,
4991 const VkShaderStageFlags shaderStageTested,
4992 const deUint32 shaderStageCreateFlags[6],
4993 const deUint32 requiredSubgroupSize[6])
4994 {
4995 const DeviceInterface& vkd = context.getDeviceInterface();
4996 const VkDevice device = context.getDevice();
4997 const VkQueue queue = context.getUniversalQueue();
4998 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
4999 Allocator& allocator = context.getDefaultAllocator();
5000 const deUint32 subgroupSize = getSubgroupSize(context);
5001 const deUint32 maxWidth = getMaxWidth();
5002 const vector<VkShaderStageFlagBits> stagesVector = enumerateRayTracingShaderStages(shaderStageTested);
5003 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
5004 de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure = createBottomAccelerationStructure(context);
5005 de::MovePtr<TopLevelAccelerationStructure> topLevelAccelerationStructure = createTopAccelerationStructure(context, bottomLevelAccelerationStructure);
5006 vectorBufferOrImage inputBuffers = makeRayTracingInputBuffers(context, format, extraDatas, extraDatasCount, stagesVector);
5007 const Move<VkDescriptorSetLayout> descriptorSetLayout = makeRayTracingDescriptorSetLayout(context, extraDatas, extraDatasCount, stagesVector, inputBuffers);
5008 const Move<VkDescriptorSetLayout> descriptorSetLayoutAS = makeRayTracingDescriptorSetLayoutAS(context);
5009 const Move<VkPipelineLayout> pipelineLayout = makeRayTracingPipelineLayout(context, *descriptorSetLayout, *descriptorSetLayoutAS);
5010 Move<VkPipeline> pipeline = Move<VkPipeline>();
5011 const de::MovePtr<RayTracingPipeline> rayTracingPipeline = makeRayTracingPipeline(context, shaderStageTested, *pipelineLayout, shaderStageCreateFlags, requiredSubgroupSize, pipeline);
5012 const deUint32 shaderGroupHandleSize = context.getRayTracingPipelineProperties().shaderGroupHandleSize;
5013 const deUint32 shaderGroupBaseAlignment = context.getRayTracingPipelineProperties().shaderGroupBaseAlignment;
5014 de::MovePtr<BufferWithMemory> rgenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, RAYGEN_GROUP, 1u);
5015 de::MovePtr<BufferWithMemory> missShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, MISS_GROUP, 1u);
5016 de::MovePtr<BufferWithMemory> hitsShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, HIT_GROUP, 1u);
5017 de::MovePtr<BufferWithMemory> callShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, CALL_GROUP, 1u);
5018 const VkStridedDeviceAddressRegionKHR rgenShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, rgenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5019 const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5020 const VkStridedDeviceAddressRegionKHR hitsShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitsShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5021 const VkStridedDeviceAddressRegionKHR callShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
5022 const Move<VkDescriptorPool> descriptorPool = makeRayTracingDescriptorPool(context, inputBuffers);
5023 const Move<VkDescriptorSet> descriptorSet = makeRayTracingDescriptorSet(context, *descriptorPool, *descriptorSetLayout, extraDatas, extraDatasCount, stagesVector, inputBuffers);
5024 const Move<VkDescriptorSet> descriptorSetAS = makeRayTracingDescriptorSetAS(context, *descriptorPool, *descriptorSetLayoutAS, topLevelAccelerationStructure);
5025 const Move<VkCommandPool> cmdPool = makeCommandPool(vkd, device, queueFamilyIndex);
5026 const Move<VkCommandBuffer> cmdBuffer = makeCommandBuffer(context, *cmdPool);
5027 deUint32 passIterations = 0u;
5028 deUint32 failIterations = 0u;
5029
5030 DE_ASSERT(shaderStageTested != 0);
5031
5032 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
5033 {
5034
5035 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
5036 {
5037 // re-init the data
5038 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
5039
5040 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
5041 }
5042
5043 beginCommandBuffer(vkd, *cmdBuffer);
5044 {
5045 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
5046
5047 bottomLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
5048 topLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
5049
5050 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 1u, 1u, &descriptorSetAS.get(), 0u, DE_NULL);
5051
5052 if (stagesCount + extraDatasCount > 0)
5053 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
5054
5055 cmdTraceRays(vkd,
5056 *cmdBuffer,
5057 &rgenShaderBindingTableRegion,
5058 &missShaderBindingTableRegion,
5059 &hitsShaderBindingTableRegion,
5060 &callShaderBindingTableRegion,
5061 width, 1, 1);
5062
5063 const VkMemoryBarrier postTraceMemoryBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
5064 cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_HOST_BIT, &postTraceMemoryBarrier);
5065 }
5066 endCommandBuffer(vkd, *cmdBuffer);
5067
5068 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
5069
5070 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
5071 {
5072 std::vector<const void*> datas;
5073
5074 if (!inputBuffers[ndx]->isImage())
5075 {
5076 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
5077
5078 invalidateAlloc(vkd, device, resultAlloc);
5079
5080 // we always have our result data first
5081 datas.push_back(resultAlloc.getHostPtr());
5082 }
5083
5084 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
5085 {
5086 const deUint32 datasNdx = index - stagesCount;
5087
5088 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
5089 {
5090 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
5091
5092 invalidateAlloc(vkd, device, resultAlloc);
5093
5094 // we always have our result data first
5095 datas.push_back(resultAlloc.getHostPtr());
5096 }
5097 }
5098
5099 if (!checkResult(internalData, datas, width, subgroupSize, false))
5100 failIterations++;
5101 else
5102 passIterations++;
5103 }
5104
5105 context.resetCommandPoolForVKSC(device, *cmdPool);
5106 }
5107
5108 if (failIterations > 0 || passIterations == 0)
5109 return tcu::TestStatus::fail("Failed " + de::toString(failIterations) + " out of " + de::toString(failIterations + passIterations) + " iterations.");
5110 else
5111 return tcu::TestStatus::pass("OK");
5112 }
5113 #endif // CTS_USES_VULKANSC
5114
5115 } // namespace subgroups
5116 } // nsamespace vkt
5117