1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Google Inc.
7 * Copyright (c) 2017 Codeplay Software Ltd.
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 */ /*!
22 * \file
23 * \brief Subgroups Tests Utils
24 */ /*--------------------------------------------------------------------*/
25
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vkRayTracingUtil.hpp"
28 #include "tcuFloat.hpp"
29 #include "deRandom.hpp"
30 #include "tcuCommandLine.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37
38 using namespace tcu;
39 using namespace std;
40 using namespace vk;
41 using namespace vkt;
42
43 namespace
44 {
45
46 enum class ComputeLike
47 {
48 COMPUTE = 0,
49 MESH
50 };
51
getMaxWidth()52 uint32_t getMaxWidth()
53 {
54 return 1024u;
55 }
56
getNextWidth(const uint32_t width)57 uint32_t getNextWidth(const uint32_t width)
58 {
59 if (width < 128)
60 {
61 // This ensures we test every value up to 128 (the max subgroup size).
62 return width + 1;
63 }
64 else
65 {
66 // And once we hit 128 we increment to only power of 2's to reduce testing time.
67 return width * 2;
68 }
69 }
70
getFormatSizeInBytes(const VkFormat format)71 uint32_t getFormatSizeInBytes(const VkFormat format)
72 {
73 switch (format)
74 {
75 default:
76 DE_FATAL("Unhandled format!");
77 return 0;
78 case VK_FORMAT_R8_SINT:
79 case VK_FORMAT_R8_UINT:
80 return static_cast<uint32_t>(sizeof(int8_t));
81 case VK_FORMAT_R8G8_SINT:
82 case VK_FORMAT_R8G8_UINT:
83 return static_cast<uint32_t>(sizeof(int8_t) * 2);
84 case VK_FORMAT_R8G8B8_SINT:
85 case VK_FORMAT_R8G8B8_UINT:
86 case VK_FORMAT_R8G8B8A8_SINT:
87 case VK_FORMAT_R8G8B8A8_UINT:
88 return static_cast<uint32_t>(sizeof(int8_t) * 4);
89 case VK_FORMAT_R16_SINT:
90 case VK_FORMAT_R16_UINT:
91 case VK_FORMAT_R16_SFLOAT:
92 return static_cast<uint32_t>(sizeof(int16_t));
93 case VK_FORMAT_R16G16_SINT:
94 case VK_FORMAT_R16G16_UINT:
95 case VK_FORMAT_R16G16_SFLOAT:
96 return static_cast<uint32_t>(sizeof(int16_t) * 2);
97 case VK_FORMAT_R16G16B16_UINT:
98 case VK_FORMAT_R16G16B16_SINT:
99 case VK_FORMAT_R16G16B16_SFLOAT:
100 case VK_FORMAT_R16G16B16A16_SINT:
101 case VK_FORMAT_R16G16B16A16_UINT:
102 case VK_FORMAT_R16G16B16A16_SFLOAT:
103 return static_cast<uint32_t>(sizeof(int16_t) * 4);
104 case VK_FORMAT_R32_SINT:
105 case VK_FORMAT_R32_UINT:
106 case VK_FORMAT_R32_SFLOAT:
107 return static_cast<uint32_t>(sizeof(int32_t));
108 case VK_FORMAT_R32G32_SINT:
109 case VK_FORMAT_R32G32_UINT:
110 case VK_FORMAT_R32G32_SFLOAT:
111 return static_cast<uint32_t>(sizeof(int32_t) * 2);
112 case VK_FORMAT_R32G32B32_SINT:
113 case VK_FORMAT_R32G32B32_UINT:
114 case VK_FORMAT_R32G32B32_SFLOAT:
115 case VK_FORMAT_R32G32B32A32_SINT:
116 case VK_FORMAT_R32G32B32A32_UINT:
117 case VK_FORMAT_R32G32B32A32_SFLOAT:
118 return static_cast<uint32_t>(sizeof(int32_t) * 4);
119 case VK_FORMAT_R64_SINT:
120 case VK_FORMAT_R64_UINT:
121 case VK_FORMAT_R64_SFLOAT:
122 return static_cast<uint32_t>(sizeof(int64_t));
123 case VK_FORMAT_R64G64_SINT:
124 case VK_FORMAT_R64G64_UINT:
125 case VK_FORMAT_R64G64_SFLOAT:
126 return static_cast<uint32_t>(sizeof(int64_t) * 2);
127 case VK_FORMAT_R64G64B64_SINT:
128 case VK_FORMAT_R64G64B64_UINT:
129 case VK_FORMAT_R64G64B64_SFLOAT:
130 case VK_FORMAT_R64G64B64A64_SINT:
131 case VK_FORMAT_R64G64B64A64_UINT:
132 case VK_FORMAT_R64G64B64A64_SFLOAT:
133 return static_cast<uint32_t>(sizeof(int64_t) * 4);
134 // The below formats are used to represent bool and bvec* types. These
135 // types are passed to the shader as int and ivec* types, before the
136 // calculations are done as booleans. We need a distinct type here so
137 // that the shader generators can switch on it and generate the correct
138 // shader source for testing.
139 case VK_FORMAT_R8_USCALED:
140 return static_cast<uint32_t>(sizeof(int32_t));
141 case VK_FORMAT_R8G8_USCALED:
142 return static_cast<uint32_t>(sizeof(int32_t) * 2);
143 case VK_FORMAT_R8G8B8_USCALED:
144 case VK_FORMAT_R8G8B8A8_USCALED:
145 return static_cast<uint32_t>(sizeof(int32_t) * 4);
146 }
147 }
148
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)149 uint32_t getElementSizeInBytes(const VkFormat format, const subgroups::SSBOData::InputDataLayoutType layout)
150 {
151 const uint32_t bytes = getFormatSizeInBytes(format);
152
153 if (layout == subgroups::SSBOData::LayoutStd140)
154 return bytes < 16 ? 16 : bytes;
155 else
156 return bytes;
157 }
158
makeRenderPass(Context & context,VkFormat format)159 Move<VkRenderPass> makeRenderPass(Context &context, VkFormat format)
160 {
161 const VkAttachmentReference colorReference = {0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL};
162 const VkSubpassDescription subpassDescription = {
163 0u, // VkSubpassDescriptionFlags flags;
164 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
165 0, // uint32_t inputAttachmentCount;
166 nullptr, // const VkAttachmentReference* pInputAttachments;
167 1, // uint32_t colorAttachmentCount;
168 &colorReference, // const VkAttachmentReference* pColorAttachments;
169 nullptr, // const VkAttachmentReference* pResolveAttachments;
170 nullptr, // const VkAttachmentReference* pDepthStencilAttachment;
171 0, // uint32_t preserveAttachmentCount;
172 nullptr // const uint32_t* pPreserveAttachments;
173 };
174 const VkSubpassDependency subpassDependencies[2] = {
175 {
176 VK_SUBPASS_EXTERNAL, // uint32_t srcSubpass;
177 0u, // uint32_t dstSubpass;
178 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // VkPipelineStageFlags srcStageMask;
179 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // VkPipelineStageFlags dstStageMask;
180 VK_ACCESS_MEMORY_READ_BIT, // VkAccessFlags srcAccessMask;
181 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // VkAccessFlags dstAccessMask;
182 VK_DEPENDENCY_BY_REGION_BIT // VkDependencyFlags dependencyFlags;
183 },
184 {
185 0u, // uint32_t srcSubpass;
186 VK_SUBPASS_EXTERNAL, // uint32_t dstSubpass;
187 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // VkPipelineStageFlags srcStageMask;
188 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // VkPipelineStageFlags dstStageMask;
189 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // VkAccessFlags srcAccessMask;
190 VK_ACCESS_MEMORY_READ_BIT, // VkAccessFlags dstAccessMask;
191 VK_DEPENDENCY_BY_REGION_BIT // VkDependencyFlags dependencyFlags;
192 },
193 };
194 const VkAttachmentDescription attachmentDescription = {
195 0u, // VkAttachmentDescriptionFlags flags;
196 format, // VkFormat format;
197 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
198 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
199 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
200 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
201 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
202 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
203 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL // VkImageLayout finalLayout;
204 };
205 const VkRenderPassCreateInfo renderPassCreateInfo = {
206 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
207 nullptr, // const void* pNext;
208 0u, // VkRenderPassCreateFlags flags;
209 1, // uint32_t attachmentCount;
210 &attachmentDescription, // const VkAttachmentDescription* pAttachments;
211 1, // uint32_t subpassCount;
212 &subpassDescription, // const VkSubpassDescription* pSubpasses;
213 2, // uint32_t dependencyCount;
214 subpassDependencies // const VkSubpassDependency* pDependencies;
215 };
216
217 return createRenderPass(context.getDeviceInterface(), context.getDevice(), &renderPassCreateInfo);
218 }
219
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const uint32_t subpass,const uint32_t patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const uint32_t vertexShaderStageCreateFlags,const uint32_t tessellationControlShaderStageCreateFlags,const uint32_t tessellationEvalShaderStageCreateFlags,const uint32_t geometryShaderStageCreateFlags,const uint32_t fragmentShaderStageCreateFlags,const uint32_t requiredSubgroupSize[5])220 Move<VkPipeline> makeGraphicsPipeline(
221 const DeviceInterface &vk, const VkDevice device, const VkPipelineLayout pipelineLayout,
222 const VkShaderModule vertexShaderModule, const VkShaderModule tessellationControlShaderModule,
223 const VkShaderModule tessellationEvalShaderModule, const VkShaderModule geometryShaderModule,
224 const VkShaderModule fragmentShaderModule, const VkRenderPass renderPass, const std::vector<VkViewport> &viewports,
225 const std::vector<VkRect2D> &scissors, const VkPrimitiveTopology topology, const uint32_t subpass,
226 const uint32_t patchControlPoints, const VkPipelineVertexInputStateCreateInfo *vertexInputStateCreateInfo,
227 const VkPipelineRasterizationStateCreateInfo *rasterizationStateCreateInfo,
228 const VkPipelineMultisampleStateCreateInfo *multisampleStateCreateInfo,
229 const VkPipelineDepthStencilStateCreateInfo *depthStencilStateCreateInfo,
230 const VkPipelineColorBlendStateCreateInfo *colorBlendStateCreateInfo,
231 const VkPipelineDynamicStateCreateInfo *dynamicStateCreateInfo, const uint32_t vertexShaderStageCreateFlags,
232 const uint32_t tessellationControlShaderStageCreateFlags, const uint32_t tessellationEvalShaderStageCreateFlags,
233 const uint32_t geometryShaderStageCreateFlags, const uint32_t fragmentShaderStageCreateFlags,
234 const uint32_t requiredSubgroupSize[5])
235 {
236 const VkBool32 disableRasterization = (fragmentShaderModule == VK_NULL_HANDLE);
237 const bool hasTessellation =
238 (tessellationControlShaderModule != VK_NULL_HANDLE || tessellationEvalShaderModule != VK_NULL_HANDLE);
239
240 VkPipelineShaderStageCreateInfo stageCreateInfo = {
241 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType
242 nullptr, // const void* pNext
243 0u, // VkPipelineShaderStageCreateFlags flags
244 VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage
245 VK_NULL_HANDLE, // VkShaderModule module
246 "main", // const char* pName
247 nullptr // const VkSpecializationInfo* pSpecializationInfo
248 };
249
250 std::vector<VkPipelineShaderStageCreateInfo> pipelineShaderStageParams;
251
252 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] = {
253 {
254 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
255 nullptr,
256 requiredSubgroupSize != nullptr ? requiredSubgroupSize[0] : 0u,
257 },
258 {
259 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
260 nullptr,
261 requiredSubgroupSize != nullptr ? requiredSubgroupSize[1] : 0u,
262 },
263 {
264 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
265 nullptr,
266 requiredSubgroupSize != nullptr ? requiredSubgroupSize[2] : 0u,
267 },
268 {
269 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
270 nullptr,
271 requiredSubgroupSize != nullptr ? requiredSubgroupSize[3] : 0u,
272 },
273 {
274 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
275 nullptr,
276 requiredSubgroupSize != nullptr ? requiredSubgroupSize[4] : 0u,
277 },
278 };
279
280 {
281 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ?
282 &requiredSubgroupSizeCreateInfo[0] :
283 nullptr;
284 stageCreateInfo.flags = vertexShaderStageCreateFlags;
285 stageCreateInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
286 stageCreateInfo.module = vertexShaderModule;
287 pipelineShaderStageParams.push_back(stageCreateInfo);
288 }
289
290 if (tessellationControlShaderModule != VK_NULL_HANDLE)
291 {
292 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ?
293 &requiredSubgroupSizeCreateInfo[1] :
294 nullptr;
295 stageCreateInfo.flags = tessellationControlShaderStageCreateFlags;
296 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
297 stageCreateInfo.module = tessellationControlShaderModule;
298 pipelineShaderStageParams.push_back(stageCreateInfo);
299 }
300
301 if (tessellationEvalShaderModule != VK_NULL_HANDLE)
302 {
303 stageCreateInfo.pNext =
304 (requiredSubgroupSize != nullptr && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ?
305 &requiredSubgroupSizeCreateInfo[2] :
306 nullptr;
307 stageCreateInfo.flags = tessellationEvalShaderStageCreateFlags;
308 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
309 stageCreateInfo.module = tessellationEvalShaderModule;
310 pipelineShaderStageParams.push_back(stageCreateInfo);
311 }
312
313 if (geometryShaderModule != VK_NULL_HANDLE)
314 {
315 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ?
316 &requiredSubgroupSizeCreateInfo[3] :
317 nullptr;
318 stageCreateInfo.flags = geometryShaderStageCreateFlags;
319 stageCreateInfo.stage = VK_SHADER_STAGE_GEOMETRY_BIT;
320 stageCreateInfo.module = geometryShaderModule;
321 pipelineShaderStageParams.push_back(stageCreateInfo);
322 }
323
324 if (fragmentShaderModule != VK_NULL_HANDLE)
325 {
326 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ?
327 &requiredSubgroupSizeCreateInfo[4] :
328 nullptr;
329 stageCreateInfo.flags = fragmentShaderStageCreateFlags;
330 stageCreateInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
331 stageCreateInfo.module = fragmentShaderModule;
332 pipelineShaderStageParams.push_back(stageCreateInfo);
333 }
334
335 const VkVertexInputBindingDescription vertexInputBindingDescription = {
336 0u, // uint32_t binding
337 sizeof(tcu::Vec4), // uint32_t stride
338 VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate
339 };
340
341 const VkVertexInputAttributeDescription vertexInputAttributeDescription = {
342 0u, // uint32_t location
343 0u, // uint32_t binding
344 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format
345 0u // uint32_t offset
346 };
347
348 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfoDefault = {
349 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType
350 nullptr, // const void* pNext
351 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags
352 1u, // uint32_t vertexBindingDescriptionCount
353 &vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions
354 1u, // uint32_t vertexAttributeDescriptionCount
355 &vertexInputAttributeDescription // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions
356 };
357
358 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo = {
359 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType
360 nullptr, // const void* pNext
361 0u, // VkPipelineInputAssemblyStateCreateFlags flags
362 topology, // VkPrimitiveTopology topology
363 VK_FALSE // VkBool32 primitiveRestartEnable
364 };
365
366 const VkPipelineTessellationStateCreateInfo tessStateCreateInfo = {
367 VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, // VkStructureType sType
368 nullptr, // const void* pNext
369 0u, // VkPipelineTessellationStateCreateFlags flags
370 patchControlPoints // uint32_t patchControlPoints
371 };
372
373 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo = {
374 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
375 nullptr, // const void* pNext
376 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
377 viewports.empty() ? 1u :
378 (uint32_t)viewports.size(), // uint32_t viewportCount
379 viewports.empty() ? nullptr : &viewports[0], // const VkViewport* pViewports
380 viewports.empty() ? 1u : (uint32_t)scissors.size(), // uint32_t scissorCount
381 scissors.empty() ? nullptr : &scissors[0] // const VkRect2D* pScissors
382 };
383
384 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfoDefault = {
385 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType
386 nullptr, // const void* pNext
387 0u, // VkPipelineRasterizationStateCreateFlags flags
388 VK_FALSE, // VkBool32 depthClampEnable
389 disableRasterization, // VkBool32 rasterizerDiscardEnable
390 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode
391 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode
392 VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace
393 VK_FALSE, // VkBool32 depthBiasEnable
394 0.0f, // float depthBiasConstantFactor
395 0.0f, // float depthBiasClamp
396 0.0f, // float depthBiasSlopeFactor
397 1.0f // float lineWidth
398 };
399
400 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfoDefault = {
401 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
402 nullptr, // const void* pNext
403 0u, // VkPipelineMultisampleStateCreateFlags flags
404 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples
405 VK_FALSE, // VkBool32 sampleShadingEnable
406 1.0f, // float minSampleShading
407 nullptr, // const VkSampleMask* pSampleMask
408 VK_FALSE, // VkBool32 alphaToCoverageEnable
409 VK_FALSE // VkBool32 alphaToOneEnable
410 };
411
412 const VkStencilOpState stencilOpState = {
413 VK_STENCIL_OP_KEEP, // VkStencilOp failOp
414 VK_STENCIL_OP_KEEP, // VkStencilOp passOp
415 VK_STENCIL_OP_KEEP, // VkStencilOp depthFailOp
416 VK_COMPARE_OP_NEVER, // VkCompareOp compareOp
417 0, // uint32_t compareMask
418 0, // uint32_t writeMask
419 0 // uint32_t reference
420 };
421
422 const VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfoDefault = {
423 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType
424 nullptr, // const void* pNext
425 0u, // VkPipelineDepthStencilStateCreateFlags flags
426 VK_FALSE, // VkBool32 depthTestEnable
427 VK_FALSE, // VkBool32 depthWriteEnable
428 VK_COMPARE_OP_LESS_OR_EQUAL, // VkCompareOp depthCompareOp
429 VK_FALSE, // VkBool32 depthBoundsTestEnable
430 VK_FALSE, // VkBool32 stencilTestEnable
431 stencilOpState, // VkStencilOpState front
432 stencilOpState, // VkStencilOpState back
433 0.0f, // float minDepthBounds
434 1.0f, // float maxDepthBounds
435 };
436
437 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {
438 VK_FALSE, // VkBool32 blendEnable
439 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor
440 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor
441 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp
442 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor
443 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor
444 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp
445 VK_COLOR_COMPONENT_R_BIT // VkColorComponentFlags colorWriteMask
446 | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT};
447
448 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfoDefault = {
449 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType
450 nullptr, // const void* pNext
451 0u, // VkPipelineColorBlendStateCreateFlags flags
452 VK_FALSE, // VkBool32 logicOpEnable
453 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp
454 1u, // uint32_t attachmentCount
455 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments
456 {0.0f, 0.0f, 0.0f, 0.0f} // float blendConstants[4]
457 };
458
459 std::vector<VkDynamicState> dynamicStates;
460
461 if (viewports.empty())
462 dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
463 if (scissors.empty())
464 dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
465
466 const VkPipelineDynamicStateCreateInfo dynamicStateCreateInfoDefault = {
467 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType sType
468 nullptr, // const void* pNext
469 0u, // VkPipelineDynamicStateCreateFlags flags
470 (uint32_t)dynamicStates.size(), // uint32_t dynamicStateCount
471 dynamicStates.empty() ? nullptr : &dynamicStates[0] // const VkDynamicState* pDynamicStates
472 };
473
474 const VkPipelineDynamicStateCreateInfo *dynamicStateCreateInfoDefaultPtr =
475 dynamicStates.empty() ? nullptr : &dynamicStateCreateInfoDefault;
476
477 const VkGraphicsPipelineCreateInfo pipelineCreateInfo = {
478 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType
479 nullptr, // const void* pNext
480 0u, // VkPipelineCreateFlags flags
481 (uint32_t)pipelineShaderStageParams.size(), // uint32_t stageCount
482 &pipelineShaderStageParams[0], // const VkPipelineShaderStageCreateInfo* pStages
483 vertexInputStateCreateInfo ?
484 vertexInputStateCreateInfo :
485 &vertexInputStateCreateInfoDefault, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState
486 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState
487 hasTessellation ? &tessStateCreateInfo :
488 nullptr, // const VkPipelineTessellationStateCreateInfo* pTessellationState
489 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState
490 rasterizationStateCreateInfo ?
491 rasterizationStateCreateInfo :
492 &rasterizationStateCreateInfoDefault, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState
493 multisampleStateCreateInfo ?
494 multisampleStateCreateInfo :
495 &multisampleStateCreateInfoDefault, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState
496 depthStencilStateCreateInfo ?
497 depthStencilStateCreateInfo :
498 &depthStencilStateCreateInfoDefault, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState
499 colorBlendStateCreateInfo ?
500 colorBlendStateCreateInfo :
501 &colorBlendStateCreateInfoDefault, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState
502 dynamicStateCreateInfo ?
503 dynamicStateCreateInfo :
504 dynamicStateCreateInfoDefaultPtr, // const VkPipelineDynamicStateCreateInfo* pDynamicState
505 pipelineLayout, // VkPipelineLayout layout
506 renderPass, // VkRenderPass renderPass
507 subpass, // uint32_t subpass
508 VK_NULL_HANDLE, // VkPipeline basePipelineHandle
509 0 // int32_t basePipelineIndex;
510 };
511
512 return createGraphicsPipeline(vk, device, VK_NULL_HANDLE, &pipelineCreateInfo);
513 }
514
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=nullptr,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=nullptr,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const uint32_t vertexShaderStageCreateFlags=0u,const uint32_t tessellationControlShaderStageCreateFlags=0u,const uint32_t tessellationEvalShaderStageCreateFlags=0u,const uint32_t geometryShaderStageCreateFlags=0u,const uint32_t fragmentShaderStageCreateFlags=0u,const uint32_t requiredSubgroupSize[5]=nullptr)515 Move<VkPipeline> makeGraphicsPipeline(
516 Context &context, const VkPipelineLayout pipelineLayout, const VkShaderStageFlags stages,
517 const VkShaderModule vertexShaderModule, const VkShaderModule fragmentShaderModule,
518 const VkShaderModule geometryShaderModule, const VkShaderModule tessellationControlModule,
519 const VkShaderModule tessellationEvaluationModule, const VkRenderPass renderPass,
520 const VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
521 const VkVertexInputBindingDescription *vertexInputBindingDescription = nullptr,
522 const VkVertexInputAttributeDescription *vertexInputAttributeDescriptions = nullptr,
523 const bool frameBufferTests = false, const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
524 const uint32_t vertexShaderStageCreateFlags = 0u, const uint32_t tessellationControlShaderStageCreateFlags = 0u,
525 const uint32_t tessellationEvalShaderStageCreateFlags = 0u, const uint32_t geometryShaderStageCreateFlags = 0u,
526 const uint32_t fragmentShaderStageCreateFlags = 0u, const uint32_t requiredSubgroupSize[5] = nullptr)
527 {
528 const std::vector<VkViewport> noViewports;
529 const std::vector<VkRect2D> noScissors;
530 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = {
531 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
532 nullptr, // const void* pNext;
533 0u, // VkPipelineVertexInputStateCreateFlags flags;
534 vertexInputBindingDescription == nullptr ? 0u : 1u, // uint32_t vertexBindingDescriptionCount;
535 vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
536 vertexInputAttributeDescriptions == nullptr ? 0u : 1u, // uint32_t vertexAttributeDescriptionCount;
537 vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
538 };
539 const uint32_t numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
540 const VkColorComponentFlags colorComponent =
541 numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
542 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
543 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
544 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT |
545 VK_COLOR_COMPONENT_A_BIT;
546 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {
547 VK_FALSE, // VkBool32 blendEnable;
548 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor;
549 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
550 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp;
551 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor;
552 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor;
553 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp;
554 colorComponent // VkColorComponentFlags colorWriteMask;
555 };
556 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo = {
557 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
558 nullptr, // const void* pNext;
559 0u, // VkPipelineColorBlendStateCreateFlags flags;
560 VK_FALSE, // VkBool32 logicOpEnable;
561 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp;
562 1, // uint32_t attachmentCount;
563 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments;
564 {0.0f, 0.0f, 0.0f, 0.0f} // float blendConstants[4];
565 };
566 const uint32_t patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
567
568 return makeGraphicsPipeline(
569 context.getDeviceInterface(), // const DeviceInterface& vk
570 context.getDevice(), // const VkDevice device
571 pipelineLayout, // const VkPipelineLayout pipelineLayout
572 vertexShaderModule, // const VkShaderModule vertexShaderModule
573 tessellationControlModule, // const VkShaderModule tessellationControlShaderModule
574 tessellationEvaluationModule, // const VkShaderModule tessellationEvalShaderModule
575 geometryShaderModule, // const VkShaderModule geometryShaderModule
576 fragmentShaderModule, // const VkShaderModule fragmentShaderModule
577 renderPass, // const VkRenderPass renderPass
578 noViewports, // const std::vector<VkViewport>& viewports
579 noScissors, // const std::vector<VkRect2D>& scissors
580 topology, // const VkPrimitiveTopology topology
581 0u, // const uint32_t subpass
582 patchControlPoints, // const uint32_t patchControlPoints
583 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
584 nullptr, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
585 nullptr, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
586 nullptr, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
587 &colorBlendStateCreateInfo, // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
588 nullptr, // const VkPipelineDynamicStateCreateInfo*
589 vertexShaderStageCreateFlags, // const uint32_t vertexShaderStageCreateFlags,
590 tessellationControlShaderStageCreateFlags, // const uint32_t tessellationControlShaderStageCreateFlags
591 tessellationEvalShaderStageCreateFlags, // const uint32_t tessellationEvalShaderStageCreateFlags
592 geometryShaderStageCreateFlags, // const uint32_t geometryShaderStageCreateFlags
593 fragmentShaderStageCreateFlags, // const uint32_t fragmentShaderStageCreateFlags
594 requiredSubgroupSize); // const uint32_t requiredSubgroupSize[5]
595 }
596
makeCommandBuffer(Context & context,const VkCommandPool commandPool)597 Move<VkCommandBuffer> makeCommandBuffer(Context &context, const VkCommandPool commandPool)
598 {
599 const VkCommandBufferAllocateInfo bufferAllocateParams = {
600 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
601 nullptr, // const void* pNext;
602 commandPool, // VkCommandPool commandPool;
603 VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
604 1u, // uint32_t bufferCount;
605 };
606 return allocateCommandBuffer(context.getDeviceInterface(), context.getDevice(), &bufferAllocateParams);
607 }
608
609 struct Buffer;
610 struct Image;
611
612 struct BufferOrImage
613 {
isImage__anon084a9e710111::BufferOrImage614 bool isImage() const
615 {
616 return m_isImage;
617 }
618
getAsBuffer__anon084a9e710111::BufferOrImage619 Buffer *getAsBuffer()
620 {
621 if (m_isImage)
622 DE_FATAL("Trying to get a buffer as an image!");
623 return reinterpret_cast<Buffer *>(this);
624 }
625
getAsImage__anon084a9e710111::BufferOrImage626 Image *getAsImage()
627 {
628 if (!m_isImage)
629 DE_FATAL("Trying to get an image as a buffer!");
630 return reinterpret_cast<Image *>(this);
631 }
632
getType__anon084a9e710111::BufferOrImage633 virtual VkDescriptorType getType() const
634 {
635 if (m_isImage)
636 {
637 return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
638 }
639 else
640 {
641 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
642 }
643 }
644
getAllocation__anon084a9e710111::BufferOrImage645 Allocation &getAllocation() const
646 {
647 return *m_allocation;
648 }
649
~BufferOrImage__anon084a9e710111::BufferOrImage650 virtual ~BufferOrImage()
651 {
652 }
653
654 protected:
BufferOrImage__anon084a9e710111::BufferOrImage655 explicit BufferOrImage(bool image) : m_isImage(image)
656 {
657 }
658
659 bool m_isImage;
660 de::details::MovePtr<Allocation> m_allocation;
661 };
662
663 struct Buffer : public BufferOrImage
664 {
Buffer__anon084a9e710111::Buffer665 explicit Buffer(Context &context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage)
666 : BufferOrImage(false)
667 , m_sizeInBytes(sizeInBytes)
668 , m_usage(usage)
669 {
670 const DeviceInterface &vkd = context.getDeviceInterface();
671 const VkDevice device = context.getDevice();
672
673 const vk::VkBufferCreateInfo bufferCreateInfo = {
674 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
675 nullptr,
676 0u,
677 m_sizeInBytes,
678 m_usage,
679 VK_SHARING_MODE_EXCLUSIVE,
680 0u,
681 nullptr,
682 };
683 m_buffer = createBuffer(vkd, device, &bufferCreateInfo);
684
685 VkMemoryRequirements req = getBufferMemoryRequirements(vkd, device, *m_buffer);
686
687 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
688 VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
689 }
690
getType__anon084a9e710111::Buffer691 virtual VkDescriptorType getType() const
692 {
693 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
694 {
695 return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
696 }
697 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
698 }
699
getBuffer__anon084a9e710111::Buffer700 VkBuffer getBuffer() const
701 {
702 return *m_buffer;
703 }
704
getBufferPtr__anon084a9e710111::Buffer705 const VkBuffer *getBufferPtr() const
706 {
707 return &(*m_buffer);
708 }
709
getSize__anon084a9e710111::Buffer710 VkDeviceSize getSize() const
711 {
712 return m_sizeInBytes;
713 }
714
715 private:
716 Move<VkBuffer> m_buffer;
717 VkDeviceSize m_sizeInBytes;
718 const VkBufferUsageFlags m_usage;
719 };
720
721 struct Image : public BufferOrImage
722 {
Image__anon084a9e710111::Image723 explicit Image(Context &context, uint32_t width, uint32_t height, VkFormat format,
724 VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
725 : BufferOrImage(true)
726 {
727 const DeviceInterface &vk = context.getDeviceInterface();
728 const VkDevice device = context.getDevice();
729 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
730
731 const VkImageCreateInfo imageCreateInfo = {
732 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
733 nullptr, // const void* pNext;
734 0, // VkImageCreateFlags flags;
735 VK_IMAGE_TYPE_2D, // VkImageType imageType;
736 format, // VkFormat format;
737 {width, height, 1}, // VkExtent3D extent;
738 1, // uint32_t mipLevels;
739 1, // uint32_t arrayLayers;
740 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
741 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
742 usage, // VkImageUsageFlags usage;
743 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
744 0u, // uint32_t queueFamilyIndexCount;
745 nullptr, // const uint32_t* pQueueFamilyIndices;
746 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
747 };
748
749 const VkComponentMapping componentMapping = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
750 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
751
752 const VkImageSubresourceRange subresourceRange = {
753 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
754 0u, //uint32_t baseMipLevel
755 1u, //uint32_t levelCount
756 0u, //uint32_t baseArrayLayer
757 1u //uint32_t layerCount
758 };
759
760 const VkSamplerCreateInfo samplerCreateInfo = {
761 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType;
762 nullptr, // const void* pNext;
763 0u, // VkSamplerCreateFlags flags;
764 VK_FILTER_NEAREST, // VkFilter magFilter;
765 VK_FILTER_NEAREST, // VkFilter minFilter;
766 VK_SAMPLER_MIPMAP_MODE_NEAREST, // VkSamplerMipmapMode mipmapMode;
767 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeU;
768 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeV;
769 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW;
770 0.0f, // float mipLodBias;
771 VK_FALSE, // VkBool32 anisotropyEnable;
772 1.0f, // float maxAnisotropy;
773 false, // VkBool32 compareEnable;
774 VK_COMPARE_OP_ALWAYS, // VkCompareOp compareOp;
775 0.0f, // float minLod;
776 0.0f, // float maxLod;
777 VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // VkBorderColor borderColor;
778 VK_FALSE, // VkBool32 unnormalizedCoordinates;
779 };
780
781 m_image = createImage(vk, device, &imageCreateInfo);
782
783 VkMemoryRequirements req = getImageMemoryRequirements(vk, device, *m_image);
784
785 req.size *= 2;
786 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
787
788 VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
789
790 const VkImageViewCreateInfo imageViewCreateInfo = {
791 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
792 nullptr, // const void* pNext;
793 0, // VkImageViewCreateFlags flags;
794 *m_image, // VkImage image;
795 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
796 imageCreateInfo.format, // VkFormat format;
797 componentMapping, // VkComponentMapping components;
798 subresourceRange // VkImageSubresourceRange subresourceRange;
799 };
800
801 m_imageView = createImageView(vk, device, &imageViewCreateInfo);
802 m_sampler = createSampler(vk, device, &samplerCreateInfo);
803
804 // Transition input image layouts
805 {
806 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
807 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
808
809 beginCommandBuffer(vk, *cmdBuffer);
810
811 const VkImageMemoryBarrier imageBarrier =
812 makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
813 VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
814
815 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
816 (VkDependencyFlags)0, 0u, nullptr, 0u, nullptr, 1u, &imageBarrier);
817
818 endCommandBuffer(vk, *cmdBuffer);
819 submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
820 }
821 }
822
getImage__anon084a9e710111::Image823 VkImage getImage() const
824 {
825 return *m_image;
826 }
827
getImageView__anon084a9e710111::Image828 VkImageView getImageView() const
829 {
830 return *m_imageView;
831 }
832
getSampler__anon084a9e710111::Image833 VkSampler getSampler() const
834 {
835 return *m_sampler;
836 }
837
838 private:
839 Move<VkImage> m_image;
840 Move<VkImageView> m_imageView;
841 Move<VkSampler> m_sampler;
842 };
843 } // namespace
844
getStagesCount(const VkShaderStageFlags shaderStages)845 uint32_t vkt::subgroups::getStagesCount(const VkShaderStageFlags shaderStages)
846 {
847 const uint32_t stageCount = isAllGraphicsStages(shaderStages) ? 4 :
848 isAllComputeStages(shaderStages) ? 1
849 #ifndef CTS_USES_VULKANSC
850 :
851 isAllRayTracingStages(shaderStages) ? 6 :
852 isAllMeshShadingStages(shaderStages) ? 1
853 #endif // CTS_USES_VULKANSC
854 :
855 0;
856
857 DE_ASSERT(stageCount != 0);
858
859 return stageCount;
860 }
861
getSharedMemoryBallotHelper()862 std::string vkt::subgroups::getSharedMemoryBallotHelper()
863 {
864 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * "
865 "gl_WorkGroupSize.z];\n"
866 "uvec4 sharedMemoryBallot(bool vote)\n"
867 "{\n"
868 " uint groupOffset = gl_SubgroupID;\n"
869 " // One invocation in the group 0's the whole group's data\n"
870 " if (subgroupElect())\n"
871 " {\n"
872 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
873 " }\n"
874 " subgroupMemoryBarrierShared();\n"
875 " if (vote)\n"
876 " {\n"
877 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
878 " const highp uint bitToSet = 1u << invocationId;\n"
879 " switch (gl_SubgroupInvocationID / 32)\n"
880 " {\n"
881 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
882 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
883 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
884 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
885 " }\n"
886 " }\n"
887 " subgroupMemoryBarrierShared();\n"
888 " return superSecretComputeShaderHelper[groupOffset];\n"
889 "}\n";
890 }
891
getSharedMemoryBallotHelperARB()892 std::string vkt::subgroups::getSharedMemoryBallotHelperARB()
893 {
894 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * "
895 "gl_WorkGroupSize.z];\n"
896 "uint64_t sharedMemoryBallot(bool vote)\n"
897 "{\n"
898 " uint groupOffset = gl_SubgroupID;\n"
899 " // One invocation in the group 0's the whole group's data\n"
900 " if (subgroupElect())\n"
901 " {\n"
902 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
903 " }\n"
904 " subgroupMemoryBarrierShared();\n"
905 " if (vote)\n"
906 " {\n"
907 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
908 " const highp uint bitToSet = 1u << invocationId;\n"
909 " switch (gl_SubgroupInvocationID / 32)\n"
910 " {\n"
911 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
912 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
913 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
914 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
915 " }\n"
916 " }\n"
917 " subgroupMemoryBarrierShared();\n"
918 " return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
919 "}\n";
920 }
921
getSubgroupSize(Context & context)922 uint32_t vkt::subgroups::getSubgroupSize(Context &context)
923 {
924 return context.getSubgroupProperties().subgroupSize;
925 }
926
maxSupportedSubgroupSize()927 uint32_t vkt::subgroups::maxSupportedSubgroupSize()
928 {
929 return 128u;
930 }
931
getShaderStageName(VkShaderStageFlags stage)932 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
933 {
934 switch (stage)
935 {
936 case VK_SHADER_STAGE_COMPUTE_BIT:
937 return "compute";
938 case VK_SHADER_STAGE_FRAGMENT_BIT:
939 return "fragment";
940 case VK_SHADER_STAGE_VERTEX_BIT:
941 return "vertex";
942 case VK_SHADER_STAGE_GEOMETRY_BIT:
943 return "geometry";
944 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
945 return "tess_control";
946 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
947 return "tess_eval";
948 #ifndef CTS_USES_VULKANSC
949 case VK_SHADER_STAGE_RAYGEN_BIT_KHR:
950 return "rgen";
951 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR:
952 return "ahit";
953 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR:
954 return "chit";
955 case VK_SHADER_STAGE_MISS_BIT_KHR:
956 return "miss";
957 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR:
958 return "sect";
959 case VK_SHADER_STAGE_CALLABLE_BIT_KHR:
960 return "call";
961 case VK_SHADER_STAGE_MESH_BIT_EXT:
962 return "mesh";
963 case VK_SHADER_STAGE_TASK_BIT_EXT:
964 return "task";
965 #endif // CTS_USES_VULKANSC
966 default:
967 TCU_THROW(InternalError, "Unhandled stage");
968 }
969 }
970
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)971 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
972 {
973 switch (bit)
974 {
975 case VK_SUBGROUP_FEATURE_BASIC_BIT:
976 return "VK_SUBGROUP_FEATURE_BASIC_BIT";
977 case VK_SUBGROUP_FEATURE_VOTE_BIT:
978 return "VK_SUBGROUP_FEATURE_VOTE_BIT";
979 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
980 return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
981 case VK_SUBGROUP_FEATURE_BALLOT_BIT:
982 return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
983 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
984 return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
985 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
986 return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
987 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
988 return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
989 case VK_SUBGROUP_FEATURE_QUAD_BIT:
990 return "VK_SUBGROUP_FEATURE_QUAD_BIT";
991 default:
992 TCU_THROW(InternalError, "Unknown subgroup feature category");
993 }
994 }
995
addNoSubgroupShader(SourceCollections & programCollection)996 void vkt::subgroups::addNoSubgroupShader(SourceCollections &programCollection)
997 {
998 {
999 /*
1000 "#version 450\n"
1001 "void main (void)\n"
1002 "{\n"
1003 " float pixelSize = 2.0f/1024.0f;\n"
1004 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1005 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1006 " gl_PointSize = 1.0f;\n"
1007 "}\n"
1008 */
1009 const std::string vertNoSubgroup = "; SPIR-V\n"
1010 "; Version: 1.3\n"
1011 "; Generator: Khronos Glslang Reference Front End; 1\n"
1012 "; Bound: 37\n"
1013 "; Schema: 0\n"
1014 "OpCapability Shader\n"
1015 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1016 "OpMemoryModel Logical GLSL450\n"
1017 "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
1018 "OpMemberDecorate %20 0 BuiltIn Position\n"
1019 "OpMemberDecorate %20 1 BuiltIn PointSize\n"
1020 "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
1021 "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
1022 "OpDecorate %20 Block\n"
1023 "OpDecorate %26 BuiltIn VertexIndex\n"
1024 "%2 = OpTypeVoid\n"
1025 "%3 = OpTypeFunction %2\n"
1026 "%6 = OpTypeFloat 32\n"
1027 "%7 = OpTypePointer Function %6\n"
1028 "%9 = OpConstant %6 0.00195313\n"
1029 "%12 = OpConstant %6 2\n"
1030 "%14 = OpConstant %6 1\n"
1031 "%16 = OpTypeVector %6 4\n"
1032 "%17 = OpTypeInt 32 0\n"
1033 "%18 = OpConstant %17 1\n"
1034 "%19 = OpTypeArray %6 %18\n"
1035 "%20 = OpTypeStruct %16 %6 %19 %19\n"
1036 "%21 = OpTypePointer Output %20\n"
1037 "%22 = OpVariable %21 Output\n"
1038 "%23 = OpTypeInt 32 1\n"
1039 "%24 = OpConstant %23 0\n"
1040 "%25 = OpTypePointer Input %23\n"
1041 "%26 = OpVariable %25 Input\n"
1042 "%33 = OpConstant %6 0\n"
1043 "%35 = OpTypePointer Output %16\n"
1044 "%37 = OpConstant %23 1\n"
1045 "%38 = OpTypePointer Output %6\n"
1046 "%4 = OpFunction %2 None %3\n"
1047 "%5 = OpLabel\n"
1048 "%8 = OpVariable %7 Function\n"
1049 "%10 = OpVariable %7 Function\n"
1050 "OpStore %8 %9\n"
1051 "%11 = OpLoad %6 %8\n"
1052 "%13 = OpFDiv %6 %11 %12\n"
1053 "%15 = OpFSub %6 %13 %14\n"
1054 "OpStore %10 %15\n"
1055 "%27 = OpLoad %23 %26\n"
1056 "%28 = OpConvertSToF %6 %27\n"
1057 "%29 = OpLoad %6 %8\n"
1058 "%30 = OpFMul %6 %28 %29\n"
1059 "%31 = OpLoad %6 %10\n"
1060 "%32 = OpFAdd %6 %30 %31\n"
1061 "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1062 "%36 = OpAccessChain %35 %22 %24\n"
1063 "OpStore %36 %34\n"
1064 "%39 = OpAccessChain %38 %22 %37\n"
1065 "OpStore %39 %14\n"
1066 "OpReturn\n"
1067 "OpFunctionEnd\n";
1068 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1069 }
1070
1071 {
1072 /*
1073 "#version 450\n"
1074 "layout(vertices=1) out;\n"
1075 "\n"
1076 "void main (void)\n"
1077 "{\n"
1078 " if (gl_InvocationID == 0)\n"
1079 " {\n"
1080 " gl_TessLevelOuter[0] = 1.0f;\n"
1081 " gl_TessLevelOuter[1] = 1.0f;\n"
1082 " }\n"
1083 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1084 "}\n"
1085 */
1086 const std::string tescNoSubgroup = "; SPIR-V\n"
1087 "; Version: 1.3\n"
1088 "; Generator: Khronos Glslang Reference Front End; 1\n"
1089 "; Bound: 45\n"
1090 "; Schema: 0\n"
1091 "OpCapability Tessellation\n"
1092 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1093 "OpMemoryModel Logical GLSL450\n"
1094 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1095 "OpExecutionMode %4 OutputVertices 1\n"
1096 "OpDecorate %8 BuiltIn InvocationId\n"
1097 "OpDecorate %20 Patch\n"
1098 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1099 "OpMemberDecorate %29 0 BuiltIn Position\n"
1100 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1101 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1102 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1103 "OpDecorate %29 Block\n"
1104 "OpMemberDecorate %34 0 BuiltIn Position\n"
1105 "OpMemberDecorate %34 1 BuiltIn PointSize\n"
1106 "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1107 "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1108 "OpDecorate %34 Block\n"
1109 "%2 = OpTypeVoid\n"
1110 "%3 = OpTypeFunction %2\n"
1111 "%6 = OpTypeInt 32 1\n"
1112 "%7 = OpTypePointer Input %6\n"
1113 "%8 = OpVariable %7 Input\n"
1114 "%10 = OpConstant %6 0\n"
1115 "%11 = OpTypeBool\n"
1116 "%15 = OpTypeFloat 32\n"
1117 "%16 = OpTypeInt 32 0\n"
1118 "%17 = OpConstant %16 4\n"
1119 "%18 = OpTypeArray %15 %17\n"
1120 "%19 = OpTypePointer Output %18\n"
1121 "%20 = OpVariable %19 Output\n"
1122 "%21 = OpConstant %15 1\n"
1123 "%22 = OpTypePointer Output %15\n"
1124 "%24 = OpConstant %6 1\n"
1125 "%26 = OpTypeVector %15 4\n"
1126 "%27 = OpConstant %16 1\n"
1127 "%28 = OpTypeArray %15 %27\n"
1128 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1129 "%30 = OpTypeArray %29 %27\n"
1130 "%31 = OpTypePointer Output %30\n"
1131 "%32 = OpVariable %31 Output\n"
1132 "%34 = OpTypeStruct %26 %15 %28 %28\n"
1133 "%35 = OpConstant %16 32\n"
1134 "%36 = OpTypeArray %34 %35\n"
1135 "%37 = OpTypePointer Input %36\n"
1136 "%38 = OpVariable %37 Input\n"
1137 "%40 = OpTypePointer Input %26\n"
1138 "%43 = OpTypePointer Output %26\n"
1139 "%4 = OpFunction %2 None %3\n"
1140 "%5 = OpLabel\n"
1141 "%9 = OpLoad %6 %8\n"
1142 "%12 = OpIEqual %11 %9 %10\n"
1143 "OpSelectionMerge %14 None\n"
1144 "OpBranchConditional %12 %13 %14\n"
1145 "%13 = OpLabel\n"
1146 "%23 = OpAccessChain %22 %20 %10\n"
1147 "OpStore %23 %21\n"
1148 "%25 = OpAccessChain %22 %20 %24\n"
1149 "OpStore %25 %21\n"
1150 "OpBranch %14\n"
1151 "%14 = OpLabel\n"
1152 "%33 = OpLoad %6 %8\n"
1153 "%39 = OpLoad %6 %8\n"
1154 "%41 = OpAccessChain %40 %38 %39 %10\n"
1155 "%42 = OpLoad %26 %41\n"
1156 "%44 = OpAccessChain %43 %32 %33 %10\n"
1157 "OpStore %44 %42\n"
1158 "OpReturn\n"
1159 "OpFunctionEnd\n";
1160 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1161 }
1162
1163 {
1164 /*
1165 "#version 450\n"
1166 "layout(isolines) in;\n"
1167 "\n"
1168 "void main (void)\n"
1169 "{\n"
1170 " float pixelSize = 2.0f/1024.0f;\n"
1171 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1172 "}\n";
1173 */
1174 const std::string teseNoSubgroup = "; SPIR-V\n"
1175 "; Version: 1.3\n"
1176 "; Generator: Khronos Glslang Reference Front End; 2\n"
1177 "; Bound: 42\n"
1178 "; Schema: 0\n"
1179 "OpCapability Tessellation\n"
1180 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1181 "OpMemoryModel Logical GLSL450\n"
1182 "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1183 "OpExecutionMode %4 Isolines\n"
1184 "OpExecutionMode %4 SpacingEqual\n"
1185 "OpExecutionMode %4 VertexOrderCcw\n"
1186 "OpMemberDecorate %14 0 BuiltIn Position\n"
1187 "OpMemberDecorate %14 1 BuiltIn PointSize\n"
1188 "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1189 "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1190 "OpDecorate %14 Block\n"
1191 "OpMemberDecorate %19 0 BuiltIn Position\n"
1192 "OpMemberDecorate %19 1 BuiltIn PointSize\n"
1193 "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1194 "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1195 "OpDecorate %19 Block\n"
1196 "OpDecorate %29 BuiltIn TessCoord\n"
1197 "%2 = OpTypeVoid\n"
1198 "%3 = OpTypeFunction %2\n"
1199 "%6 = OpTypeFloat 32\n"
1200 "%7 = OpTypePointer Function %6\n"
1201 "%9 = OpConstant %6 0.00195313\n"
1202 "%10 = OpTypeVector %6 4\n"
1203 "%11 = OpTypeInt 32 0\n"
1204 "%12 = OpConstant %11 1\n"
1205 "%13 = OpTypeArray %6 %12\n"
1206 "%14 = OpTypeStruct %10 %6 %13 %13\n"
1207 "%15 = OpTypePointer Output %14\n"
1208 "%16 = OpVariable %15 Output\n"
1209 "%17 = OpTypeInt 32 1\n"
1210 "%18 = OpConstant %17 0\n"
1211 "%19 = OpTypeStruct %10 %6 %13 %13\n"
1212 "%20 = OpConstant %11 32\n"
1213 "%21 = OpTypeArray %19 %20\n"
1214 "%22 = OpTypePointer Input %21\n"
1215 "%23 = OpVariable %22 Input\n"
1216 "%24 = OpTypePointer Input %10\n"
1217 "%27 = OpTypeVector %6 3\n"
1218 "%28 = OpTypePointer Input %27\n"
1219 "%29 = OpVariable %28 Input\n"
1220 "%30 = OpConstant %11 0\n"
1221 "%31 = OpTypePointer Input %6\n"
1222 "%36 = OpConstant %6 2\n"
1223 "%40 = OpTypePointer Output %10\n"
1224 "%4 = OpFunction %2 None %3\n"
1225 "%5 = OpLabel\n"
1226 "%8 = OpVariable %7 Function\n"
1227 "OpStore %8 %9\n"
1228 "%25 = OpAccessChain %24 %23 %18 %18\n"
1229 "%26 = OpLoad %10 %25\n"
1230 "%32 = OpAccessChain %31 %29 %30\n"
1231 "%33 = OpLoad %6 %32\n"
1232 "%34 = OpLoad %6 %8\n"
1233 "%35 = OpFMul %6 %33 %34\n"
1234 "%37 = OpFDiv %6 %35 %36\n"
1235 "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1236 "%39 = OpFAdd %10 %26 %38\n"
1237 "%41 = OpAccessChain %40 %16 %18\n"
1238 "OpStore %41 %39\n"
1239 "OpReturn\n"
1240 "OpFunctionEnd\n";
1241 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1242 }
1243 }
1244
getFramebufferBufferDeclarations(const VkFormat & format,const std::vector<std::string> & declarations,const uint32_t stage)1245 static std::string getFramebufferBufferDeclarations(const VkFormat &format,
1246 const std::vector<std::string> &declarations, const uint32_t stage)
1247 {
1248 if (declarations.empty())
1249 {
1250 const std::string name = (stage == 0) ? "result" : "out_color";
1251 const std::string suffix = (stage == 2) ? "[]" : "";
1252 const std::string result = "layout(location = 0) out float " + name + suffix +
1253 ";\n"
1254 "layout(set = 0, binding = 0) uniform Buffer1\n"
1255 "{\n"
1256 " " +
1257 de::toString(subgroups::getFormatNameForGLSL(format)) + " data[" +
1258 de::toString(subgroups::maxSupportedSubgroupSize()) +
1259 "];\n"
1260 "};\n";
1261
1262 return result;
1263 }
1264 else
1265 {
1266 return declarations[stage];
1267 }
1268 }
1269
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations)1270 void vkt::subgroups::initStdFrameBufferPrograms(SourceCollections &programCollection,
1271 const vk::ShaderBuildOptions &buildOptions,
1272 VkShaderStageFlags shaderStage, VkFormat format, bool gsPointSize,
1273 const std::string &extHeader, const std::string &testSrc,
1274 const std::string &helperStr,
1275 const std::vector<std::string> &declarations)
1276 {
1277 subgroups::setFragmentShaderFrameBuffer(programCollection);
1278
1279 if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1280 subgroups::setVertexShaderFrameBuffer(programCollection);
1281
1282 if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1283 {
1284 std::ostringstream vertex;
1285
1286 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1287 << extHeader << "layout(location = 0) in highp vec4 in_position;\n"
1288 << getFramebufferBufferDeclarations(format, declarations, 0) << "\n"
1289 << helperStr << "void main (void)\n"
1290 << "{\n"
1291 << " uint tempRes;\n"
1292 << testSrc << " result = float(tempRes);\n"
1293 << " gl_Position = in_position;\n"
1294 << " gl_PointSize = 1.0f;\n"
1295 << "}\n";
1296
1297 programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1298 }
1299 else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1300 {
1301 std::ostringstream geometry;
1302
1303 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1304 << extHeader << "layout(points) in;\n"
1305 << "layout(points, max_vertices = 1) out;\n"
1306 << getFramebufferBufferDeclarations(format, declarations, 1) << "\n"
1307 << helperStr << "void main (void)\n"
1308 << "{\n"
1309 << " uint tempRes;\n"
1310 << testSrc << " out_color = float(tempRes);\n"
1311 << " gl_Position = gl_in[0].gl_Position;\n"
1312 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") << " EmitVertex();\n"
1313 << " EndPrimitive();\n"
1314 << "}\n";
1315
1316 programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1317 }
1318 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1319 {
1320 std::ostringstream controlSource;
1321
1322 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1323 << extHeader << "layout(vertices = 2) out;\n"
1324 << getFramebufferBufferDeclarations(format, declarations, 2) << "\n"
1325 << helperStr << "void main (void)\n"
1326 << "{\n"
1327 << " if (gl_InvocationID == 0)\n"
1328 << " {\n"
1329 << " gl_TessLevelOuter[0] = 1.0f;\n"
1330 << " gl_TessLevelOuter[1] = 1.0f;\n"
1331 << " }\n"
1332 << " uint tempRes;\n"
1333 << testSrc << " out_color[gl_InvocationID] = float(tempRes);\n"
1334 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1335 << (gsPointSize ?
1336 " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" :
1337 "")
1338 << "}\n";
1339
1340 programCollection.glslSources.add("tesc")
1341 << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1342 subgroups::setTesEvalShaderFrameBuffer(programCollection);
1343 }
1344 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1345 {
1346 ostringstream evaluationSource;
1347
1348 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1349 << extHeader << "layout(isolines, equal_spacing, ccw ) in;\n"
1350 << getFramebufferBufferDeclarations(format, declarations, 3) << "\n"
1351 << helperStr << "void main (void)\n"
1352 << "{\n"
1353 << " uint tempRes;\n"
1354 << testSrc << " out_color = float(tempRes);\n"
1355 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1356 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") << "}\n";
1357
1358 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1359 programCollection.glslSources.add("tese")
1360 << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1361 }
1362 else
1363 {
1364 DE_FATAL("Unsupported shader stage");
1365 }
1366 }
1367
getBufferDeclarations(vk::VkShaderStageFlags shaderStage,const std::string & formatName,const std::vector<std::string> & declarations,const uint32_t stage)1368 static std::string getBufferDeclarations(vk::VkShaderStageFlags shaderStage, const std::string &formatName,
1369 const std::vector<std::string> &declarations, const uint32_t stage)
1370 {
1371 if (declarations.empty())
1372 {
1373 const uint32_t stageCount = vkt::subgroups::getStagesCount(shaderStage);
1374 const uint32_t binding0 = stage;
1375 const uint32_t binding1 = stageCount;
1376 const bool fragment = (shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) && (stage == stageCount);
1377 const string buffer1 = fragment ? "layout(location = 0) out uint result;\n" :
1378 "layout(set = 0, binding = " + de::toString(binding0) +
1379 ", std430) buffer Buffer1\n"
1380 "{\n"
1381 " uint result[];\n"
1382 "};\n";
1383 //todo boza I suppose it can be "layout(set = 0, binding = " + de::toString(binding1) + ", std430) readonly buffer Buffer2\n"
1384 const string buffer2 = "layout(set = 0, binding = " + de::toString(binding1) + ", std430)" +
1385 (stageCount == 1 ? "" : " readonly") + " buffer Buffer" + (fragment ? "1" : "2") +
1386 "\n"
1387 "{\n"
1388 " " +
1389 formatName +
1390 " data[];\n"
1391 "};\n";
1392
1393 return buffer1 + buffer2;
1394 }
1395 else
1396 {
1397 return declarations[stage];
1398 }
1399 }
1400
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations,const bool avoidHelperInvocations,const std::string & tempRes)1401 void vkt::subgroups::initStdPrograms(vk::SourceCollections &programCollection,
1402 const vk::ShaderBuildOptions &buildOptions, vk::VkShaderStageFlags shaderStage,
1403 vk::VkFormat format, bool gsPointSize, const std::string &extHeader,
1404 const std::string &testSrc, const std::string &helperStr,
1405 const std::vector<std::string> &declarations, const bool avoidHelperInvocations,
1406 const std::string &tempRes)
1407 {
1408 const std::string formatName = subgroups::getFormatNameForGLSL(format);
1409
1410 if (isAllComputeStages(shaderStage))
1411 {
1412 std::ostringstream src;
1413
1414 src << "#version 450\n"
1415 << extHeader
1416 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
1417 "local_size_z_id = 2) in;\n"
1418 << getBufferDeclarations(shaderStage, formatName, declarations, 0) << "\n"
1419 << helperStr << "void main (void)\n"
1420 << "{\n"
1421 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1422 << " highp uint offset = globalSize.x * ((globalSize.y * "
1423 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1424 "gl_GlobalInvocationID.x;\n"
1425 << tempRes << testSrc << " result[offset] = tempRes;\n"
1426 << "}\n";
1427
1428 programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1429 }
1430 #ifndef CTS_USES_VULKANSC
1431 else if (isAllMeshShadingStages(shaderStage))
1432 {
1433 const bool testMesh = ((shaderStage & VK_SHADER_STAGE_MESH_BIT_EXT) != 0u);
1434 const bool testTask = ((shaderStage & VK_SHADER_STAGE_TASK_BIT_EXT) != 0u);
1435
1436 if (testMesh)
1437 {
1438 std::ostringstream mesh;
1439
1440 mesh << "#version 450\n"
1441 << "#extension GL_EXT_mesh_shader : enable\n"
1442 << extHeader << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1443 << "layout (points) out;\n"
1444 << "layout (max_vertices = 1, max_primitives = 1) out;\n"
1445 << getBufferDeclarations(shaderStage, formatName, declarations, 0) << "\n"
1446 << helperStr << "void main (void)\n"
1447 << "{\n"
1448 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1449 << " highp uint offset = globalSize.x * ((globalSize.y * "
1450 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1451 "gl_GlobalInvocationID.x;\n"
1452 << tempRes << testSrc << " result[offset] = tempRes;\n"
1453 << " SetMeshOutputsEXT(0u, 0u);\n"
1454 << "}\n";
1455
1456 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1457 }
1458 else
1459 {
1460 const std::string meshShaderNoSubgroups =
1461 "#version 450\n"
1462 "#extension GL_EXT_mesh_shader : enable\n"
1463 "\n"
1464 "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
1465 "layout (points) out;\n"
1466 "layout (max_vertices = 1, max_primitives = 1) out;\n"
1467 "\n"
1468 "void main (void)\n"
1469 "{\n"
1470 " SetMeshOutputsEXT(0u, 0u);\n"
1471 "}\n";
1472 programCollection.glslSources.add("mesh") << glu::MeshSource(meshShaderNoSubgroups) << buildOptions;
1473 }
1474
1475 if (testTask)
1476 {
1477 const tcu::UVec3 emitSize = (testMesh ? tcu::UVec3(1u, 1u, 1u) : tcu::UVec3(0u, 0u, 0u));
1478 std::ostringstream task;
1479
1480 task << "#version 450\n"
1481 << "#extension GL_EXT_mesh_shader : enable\n"
1482 //<< "#extension GL_NV_mesh_shader : enable\n"
1483 << extHeader << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
1484 << getBufferDeclarations(shaderStage, formatName, declarations, 0) << "\n"
1485 << helperStr << "void main (void)\n"
1486 << "{\n"
1487 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1488 //<< " uvec3 globalSize = uvec3(0, 0, 0)/*gl_NumWorkGroups*/ * gl_WorkGroupSize;\n"
1489 << " highp uint offset = globalSize.x * ((globalSize.y * "
1490 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1491 "gl_GlobalInvocationID.x;\n"
1492 << tempRes << testSrc << " result[offset] = tempRes;\n"
1493 << " EmitMeshTasksEXT(" << emitSize.x() << ", " << emitSize.y() << ", " << emitSize.z()
1494 << ");\n"
1495 //<< " gl_TaskCountNV = " << emitSize.x() << ";\n"
1496 << "}\n";
1497
1498 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1499 }
1500 }
1501 #endif // CTS_USES_VULKANSC
1502 else if (isAllGraphicsStages(shaderStage))
1503 {
1504 const string vertex =
1505 "#version 450\n" + extHeader + getBufferDeclarations(shaderStage, formatName, declarations, 0) + "\n" +
1506 helperStr +
1507 "void main (void)\n"
1508 "{\n"
1509 " uint tempRes;\n" +
1510 testSrc +
1511 " result[gl_VertexIndex] = tempRes;\n"
1512 " float pixelSize = 2.0f/1024.0f;\n"
1513 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1514 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1515 " gl_PointSize = 1.0f;\n"
1516 "}\n";
1517
1518 const string tesc =
1519 "#version 450\n" + extHeader + "layout(vertices=1) out;\n" +
1520 getBufferDeclarations(shaderStage, formatName, declarations, 1) + "\n" + helperStr +
1521 "void main (void)\n"
1522 "{\n" +
1523 tempRes + testSrc +
1524 " result[gl_PrimitiveID] = tempRes;\n"
1525 " if (gl_InvocationID == 0)\n"
1526 " {\n"
1527 " gl_TessLevelOuter[0] = 1.0f;\n"
1528 " gl_TessLevelOuter[1] = 1.0f;\n"
1529 " }\n"
1530 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n" +
1531 (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1532 "}\n";
1533
1534 const string tese = "#version 450\n" + extHeader + "layout(isolines) in;\n" +
1535 getBufferDeclarations(shaderStage, formatName, declarations, 2) + "\n" + helperStr +
1536 "void main (void)\n"
1537 "{\n" +
1538 tempRes + testSrc +
1539 " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1540 " float pixelSize = 2.0f/1024.0f;\n"
1541 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n" +
1542 (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") + "}\n";
1543
1544 const string geometry = "#version 450\n" + extHeader +
1545 "layout(${TOPOLOGY}) in;\n"
1546 "layout(points, max_vertices = 1) out;\n" +
1547 getBufferDeclarations(shaderStage, formatName, declarations, 3) + "\n" + helperStr +
1548 "void main (void)\n"
1549 "{\n" +
1550 tempRes + testSrc +
1551 " result[gl_PrimitiveIDIn] = tempRes;\n"
1552 " gl_Position = gl_in[0].gl_Position;\n" +
1553 (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1554 " EmitVertex();\n"
1555 " EndPrimitive();\n"
1556 "}\n";
1557
1558 const string fragment =
1559 "#version 450\n" + extHeader + getBufferDeclarations(shaderStage, formatName, declarations, 4) + helperStr +
1560 "void main (void)\n"
1561 "{\n" +
1562 (avoidHelperInvocations ? " if (gl_HelperInvocation) return;\n" : "") + tempRes + testSrc +
1563 " result = tempRes;\n"
1564 "}\n";
1565
1566 subgroups::addNoSubgroupShader(programCollection);
1567
1568 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1569 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1570 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1571 subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1572 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment) << buildOptions;
1573 }
1574 #ifndef CTS_USES_VULKANSC
1575 else if (isAllRayTracingStages(shaderStage))
1576 {
1577 const std::string rgenShader =
1578 "#version 460 core\n"
1579 "#extension GL_EXT_ray_tracing: require\n" +
1580 extHeader +
1581 "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
1582 "layout(location = 0) callableDataEXT uvec4 callData;"
1583 "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n" +
1584 getBufferDeclarations(shaderStage, formatName, declarations, 0) + "\n" + helperStr +
1585 "void main()\n"
1586 "{\n" +
1587 tempRes + testSrc +
1588 " uint rayFlags = 0;\n"
1589 " uint cullMask = 0xFF;\n"
1590 " float tmin = 0.0;\n"
1591 " float tmax = 9.0;\n"
1592 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), "
1593 "(float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
1594 " vec3 directHit = vec3(0.0, 0.0, -1.0);\n"
1595 " vec3 directMiss = vec3(0.0, 0.0, +1.0);\n"
1596 "\n"
1597 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
1598 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
1599 " executeCallableEXT(0, 0);"
1600 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1601 "}\n";
1602 const std::string ahitShader = "#version 460 core\n"
1603 "#extension GL_EXT_ray_tracing: require\n" +
1604 extHeader +
1605 "hitAttributeEXT vec3 attribs;\n"
1606 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n" +
1607 getBufferDeclarations(shaderStage, formatName, declarations, 1) + "\n" +
1608 helperStr +
1609 "void main()\n"
1610 "{\n" +
1611 tempRes + testSrc +
1612 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1613 "}\n";
1614 const std::string chitShader = "#version 460 core\n"
1615 "#extension GL_EXT_ray_tracing: require\n" +
1616 extHeader +
1617 "hitAttributeEXT vec3 attribs;\n"
1618 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n" +
1619 getBufferDeclarations(shaderStage, formatName, declarations, 2) + "\n" +
1620 helperStr +
1621 "void main()\n"
1622 "{\n" +
1623 tempRes + testSrc +
1624 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1625 "}\n";
1626 const std::string missShader = "#version 460 core\n"
1627 "#extension GL_EXT_ray_tracing: require\n" +
1628 extHeader + "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n" +
1629 getBufferDeclarations(shaderStage, formatName, declarations, 3) + "\n" +
1630 helperStr +
1631 "void main()\n"
1632 "{\n" +
1633 tempRes + testSrc +
1634 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1635 "}\n";
1636 const std::string sectShader = "#version 460 core\n"
1637 "#extension GL_EXT_ray_tracing: require\n" +
1638 extHeader + "hitAttributeEXT vec3 hitAttribute;\n" +
1639 getBufferDeclarations(shaderStage, formatName, declarations, 4) + "\n" +
1640 helperStr +
1641 "void main()\n"
1642 "{\n" +
1643 tempRes + testSrc +
1644 " reportIntersectionEXT(0.75f, 0x7Eu);\n"
1645 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1646 "}\n";
1647 const std::string callShader = "#version 460 core\n"
1648 "#extension GL_EXT_ray_tracing: require\n" +
1649 extHeader + "layout(location = 0) callableDataInEXT float callData;\n" +
1650 getBufferDeclarations(shaderStage, formatName, declarations, 5) + "\n" +
1651 helperStr +
1652 "void main()\n"
1653 "{\n" +
1654 tempRes + testSrc +
1655 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1656 "}\n";
1657
1658 programCollection.glslSources.add("rgen") << glu::RaygenSource(rgenShader) << buildOptions;
1659 programCollection.glslSources.add("ahit") << glu::AnyHitSource(ahitShader) << buildOptions;
1660 programCollection.glslSources.add("chit") << glu::ClosestHitSource(chitShader) << buildOptions;
1661 programCollection.glslSources.add("miss") << glu::MissSource(missShader) << buildOptions;
1662 programCollection.glslSources.add("sect") << glu::IntersectionSource(sectShader) << buildOptions;
1663 programCollection.glslSources.add("call") << glu::CallableSource(callShader) << buildOptions;
1664
1665 subgroups::addRayTracingNoSubgroupShader(programCollection);
1666 }
1667 #endif // CTS_USES_VULKANSC
1668 else
1669 TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1670 }
1671
isSubgroupSupported(Context & context)1672 bool vkt::subgroups::isSubgroupSupported(Context &context)
1673 {
1674 return context.contextSupports(vk::ApiVersion(0, 1, 1, 0));
1675 }
1676
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1677 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(Context &context, const VkShaderStageFlags stage)
1678 {
1679 return (stage & (context.getSubgroupProperties().supportedStages)) ? true : false;
1680 }
1681
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1682 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(Context &context, VkSubgroupFeatureFlagBits bit)
1683 {
1684 return (bit & (context.getSubgroupProperties().supportedOperations)) ? true : false;
1685 }
1686
areQuadOperationsSupportedForStages(Context & context,const VkShaderStageFlags stages)1687 bool vkt::subgroups::areQuadOperationsSupportedForStages(Context &context, const VkShaderStageFlags stages)
1688 {
1689 // Check general quad feature support first.
1690 if (!isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_QUAD_BIT))
1691 return false;
1692
1693 if (context.getSubgroupProperties().quadOperationsInAllStages == VK_TRUE)
1694 return true; // No problem, any stage works.
1695
1696 // Only frag and compute are supported.
1697 const VkShaderStageFlags fragCompute = (VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT);
1698 const VkShaderStageFlags otherStages = ~fragCompute;
1699 return ((stages & otherStages) == 0u);
1700 }
1701
isFragmentSSBOSupportedForDevice(Context & context)1702 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context &context)
1703 {
1704 return context.getDeviceFeatures().fragmentStoresAndAtomics ? true : false;
1705 }
1706
isVertexSSBOSupportedForDevice(Context & context)1707 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context &context)
1708 {
1709 return context.getDeviceFeatures().vertexPipelineStoresAndAtomics ? true : false;
1710 }
1711
isInt64SupportedForDevice(Context & context)1712 bool vkt::subgroups::isInt64SupportedForDevice(Context &context)
1713 {
1714 return context.getDeviceFeatures().shaderInt64 ? true : false;
1715 }
1716
isTessellationAndGeometryPointSizeSupported(Context & context)1717 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported(Context &context)
1718 {
1719 return context.getDeviceFeatures().shaderTessellationAndGeometryPointSize ? true : false;
1720 }
1721
is16BitUBOStorageSupported(Context & context)1722 bool vkt::subgroups::is16BitUBOStorageSupported(Context &context)
1723 {
1724 return context.get16BitStorageFeatures().uniformAndStorageBuffer16BitAccess ? true : false;
1725 }
1726
is8BitUBOStorageSupported(Context & context)1727 bool vkt::subgroups::is8BitUBOStorageSupported(Context &context)
1728 {
1729 return context.get8BitStorageFeatures().uniformAndStorageBuffer8BitAccess ? true : false;
1730 }
1731
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1732 bool vkt::subgroups::isFormatSupportedForDevice(Context &context, vk::VkFormat format)
1733 {
1734 const VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures &subgroupExtendedTypesFeatures =
1735 context.getShaderSubgroupExtendedTypesFeatures();
1736 const VkPhysicalDeviceShaderFloat16Int8Features &float16Int8Features = context.getShaderFloat16Int8Features();
1737 const VkPhysicalDevice16BitStorageFeatures &storage16bit = context.get16BitStorageFeatures();
1738 const VkPhysicalDevice8BitStorageFeatures &storage8bit = context.get8BitStorageFeatures();
1739 const VkPhysicalDeviceFeatures &features = context.getDeviceFeatures();
1740 bool shaderFloat64 = features.shaderFloat64 ? true : false;
1741 bool shaderInt16 = features.shaderInt16 ? true : false;
1742 bool shaderInt64 = features.shaderInt64 ? true : false;
1743 bool shaderSubgroupExtendedTypes = false;
1744 bool shaderFloat16 = false;
1745 bool shaderInt8 = false;
1746 bool storageBuffer16BitAccess = false;
1747 bool storageBuffer8BitAccess = false;
1748
1749 if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1750 context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1751 {
1752 shaderSubgroupExtendedTypes = subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes ? true : false;
1753 shaderFloat16 = float16Int8Features.shaderFloat16 ? true : false;
1754 shaderInt8 = float16Int8Features.shaderInt8 ? true : false;
1755
1756 if (context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage"))
1757 storageBuffer16BitAccess = storage16bit.storageBuffer16BitAccess ? true : false;
1758
1759 if (context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage"))
1760 storageBuffer8BitAccess = storage8bit.storageBuffer8BitAccess ? true : false;
1761 }
1762
1763 switch (format)
1764 {
1765 default:
1766 return true;
1767 case VK_FORMAT_R16_SFLOAT:
1768 case VK_FORMAT_R16G16_SFLOAT:
1769 case VK_FORMAT_R16G16B16_SFLOAT:
1770 case VK_FORMAT_R16G16B16A16_SFLOAT:
1771 return shaderSubgroupExtendedTypes && shaderFloat16 && storageBuffer16BitAccess;
1772 case VK_FORMAT_R64_SFLOAT:
1773 case VK_FORMAT_R64G64_SFLOAT:
1774 case VK_FORMAT_R64G64B64_SFLOAT:
1775 case VK_FORMAT_R64G64B64A64_SFLOAT:
1776 return shaderFloat64;
1777 case VK_FORMAT_R8_SINT:
1778 case VK_FORMAT_R8G8_SINT:
1779 case VK_FORMAT_R8G8B8_SINT:
1780 case VK_FORMAT_R8G8B8A8_SINT:
1781 case VK_FORMAT_R8_UINT:
1782 case VK_FORMAT_R8G8_UINT:
1783 case VK_FORMAT_R8G8B8_UINT:
1784 case VK_FORMAT_R8G8B8A8_UINT:
1785 return shaderSubgroupExtendedTypes && shaderInt8 && storageBuffer8BitAccess;
1786 case VK_FORMAT_R16_SINT:
1787 case VK_FORMAT_R16G16_SINT:
1788 case VK_FORMAT_R16G16B16_SINT:
1789 case VK_FORMAT_R16G16B16A16_SINT:
1790 case VK_FORMAT_R16_UINT:
1791 case VK_FORMAT_R16G16_UINT:
1792 case VK_FORMAT_R16G16B16_UINT:
1793 case VK_FORMAT_R16G16B16A16_UINT:
1794 return shaderSubgroupExtendedTypes && shaderInt16 && storageBuffer16BitAccess;
1795 case VK_FORMAT_R64_SINT:
1796 case VK_FORMAT_R64G64_SINT:
1797 case VK_FORMAT_R64G64B64_SINT:
1798 case VK_FORMAT_R64G64B64A64_SINT:
1799 case VK_FORMAT_R64_UINT:
1800 case VK_FORMAT_R64G64_UINT:
1801 case VK_FORMAT_R64G64B64_UINT:
1802 case VK_FORMAT_R64G64B64A64_UINT:
1803 return shaderSubgroupExtendedTypes && shaderInt64;
1804 }
1805 }
1806
isSubgroupBroadcastDynamicIdSupported(Context & context)1807 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported(Context &context)
1808 {
1809 return context.contextSupports(vk::ApiVersion(0, 1, 2, 0)) &&
1810 vk::getPhysicalDeviceVulkan12Features(context.getInstanceInterface(), context.getPhysicalDevice())
1811 .subgroupBroadcastDynamicId;
1812 }
1813
isSubgroupRotateSpecVersionValid(Context & context)1814 bool vkt::subgroups::isSubgroupRotateSpecVersionValid(Context &context)
1815 {
1816 // Ensure "VK_KHR_shader_subgroup_rotate" extension's spec version is at least 2
1817 {
1818 const std::string extensionName = "VK_KHR_shader_subgroup_rotate";
1819 const std::vector<VkExtensionProperties> deviceExtensionProperties =
1820 enumerateDeviceExtensionProperties(context.getInstanceInterface(), context.getPhysicalDevice(), nullptr);
1821
1822 for (const auto &property : deviceExtensionProperties)
1823 {
1824 if (property.extensionName == extensionName && property.specVersion < 2)
1825 {
1826 return false;
1827 }
1828 }
1829 }
1830 return true;
1831 }
1832
getFormatNameForGLSL(VkFormat format)1833 std::string vkt::subgroups::getFormatNameForGLSL(VkFormat format)
1834 {
1835 switch (format)
1836 {
1837 case VK_FORMAT_R8_SINT:
1838 return "int8_t";
1839 case VK_FORMAT_R8G8_SINT:
1840 return "i8vec2";
1841 case VK_FORMAT_R8G8B8_SINT:
1842 return "i8vec3";
1843 case VK_FORMAT_R8G8B8A8_SINT:
1844 return "i8vec4";
1845 case VK_FORMAT_R8_UINT:
1846 return "uint8_t";
1847 case VK_FORMAT_R8G8_UINT:
1848 return "u8vec2";
1849 case VK_FORMAT_R8G8B8_UINT:
1850 return "u8vec3";
1851 case VK_FORMAT_R8G8B8A8_UINT:
1852 return "u8vec4";
1853 case VK_FORMAT_R16_SINT:
1854 return "int16_t";
1855 case VK_FORMAT_R16G16_SINT:
1856 return "i16vec2";
1857 case VK_FORMAT_R16G16B16_SINT:
1858 return "i16vec3";
1859 case VK_FORMAT_R16G16B16A16_SINT:
1860 return "i16vec4";
1861 case VK_FORMAT_R16_UINT:
1862 return "uint16_t";
1863 case VK_FORMAT_R16G16_UINT:
1864 return "u16vec2";
1865 case VK_FORMAT_R16G16B16_UINT:
1866 return "u16vec3";
1867 case VK_FORMAT_R16G16B16A16_UINT:
1868 return "u16vec4";
1869 case VK_FORMAT_R32_SINT:
1870 return "int";
1871 case VK_FORMAT_R32G32_SINT:
1872 return "ivec2";
1873 case VK_FORMAT_R32G32B32_SINT:
1874 return "ivec3";
1875 case VK_FORMAT_R32G32B32A32_SINT:
1876 return "ivec4";
1877 case VK_FORMAT_R32_UINT:
1878 return "uint";
1879 case VK_FORMAT_R32G32_UINT:
1880 return "uvec2";
1881 case VK_FORMAT_R32G32B32_UINT:
1882 return "uvec3";
1883 case VK_FORMAT_R32G32B32A32_UINT:
1884 return "uvec4";
1885 case VK_FORMAT_R64_SINT:
1886 return "int64_t";
1887 case VK_FORMAT_R64G64_SINT:
1888 return "i64vec2";
1889 case VK_FORMAT_R64G64B64_SINT:
1890 return "i64vec3";
1891 case VK_FORMAT_R64G64B64A64_SINT:
1892 return "i64vec4";
1893 case VK_FORMAT_R64_UINT:
1894 return "uint64_t";
1895 case VK_FORMAT_R64G64_UINT:
1896 return "u64vec2";
1897 case VK_FORMAT_R64G64B64_UINT:
1898 return "u64vec3";
1899 case VK_FORMAT_R64G64B64A64_UINT:
1900 return "u64vec4";
1901 case VK_FORMAT_R16_SFLOAT:
1902 return "float16_t";
1903 case VK_FORMAT_R16G16_SFLOAT:
1904 return "f16vec2";
1905 case VK_FORMAT_R16G16B16_SFLOAT:
1906 return "f16vec3";
1907 case VK_FORMAT_R16G16B16A16_SFLOAT:
1908 return "f16vec4";
1909 case VK_FORMAT_R32_SFLOAT:
1910 return "float";
1911 case VK_FORMAT_R32G32_SFLOAT:
1912 return "vec2";
1913 case VK_FORMAT_R32G32B32_SFLOAT:
1914 return "vec3";
1915 case VK_FORMAT_R32G32B32A32_SFLOAT:
1916 return "vec4";
1917 case VK_FORMAT_R64_SFLOAT:
1918 return "double";
1919 case VK_FORMAT_R64G64_SFLOAT:
1920 return "dvec2";
1921 case VK_FORMAT_R64G64B64_SFLOAT:
1922 return "dvec3";
1923 case VK_FORMAT_R64G64B64A64_SFLOAT:
1924 return "dvec4";
1925 case VK_FORMAT_R8_USCALED:
1926 return "bool";
1927 case VK_FORMAT_R8G8_USCALED:
1928 return "bvec2";
1929 case VK_FORMAT_R8G8B8_USCALED:
1930 return "bvec3";
1931 case VK_FORMAT_R8G8B8A8_USCALED:
1932 return "bvec4";
1933 default:
1934 TCU_THROW(InternalError, "Unhandled format");
1935 }
1936 }
1937
getAdditionalExtensionForFormat(vk::VkFormat format)1938 std::string vkt::subgroups::getAdditionalExtensionForFormat(vk::VkFormat format)
1939 {
1940 switch (format)
1941 {
1942 default:
1943 return "";
1944 case VK_FORMAT_R8_SINT:
1945 case VK_FORMAT_R8G8_SINT:
1946 case VK_FORMAT_R8G8B8_SINT:
1947 case VK_FORMAT_R8G8B8A8_SINT:
1948 case VK_FORMAT_R8_UINT:
1949 case VK_FORMAT_R8G8_UINT:
1950 case VK_FORMAT_R8G8B8_UINT:
1951 case VK_FORMAT_R8G8B8A8_UINT:
1952 return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1953 case VK_FORMAT_R16_SINT:
1954 case VK_FORMAT_R16G16_SINT:
1955 case VK_FORMAT_R16G16B16_SINT:
1956 case VK_FORMAT_R16G16B16A16_SINT:
1957 case VK_FORMAT_R16_UINT:
1958 case VK_FORMAT_R16G16_UINT:
1959 case VK_FORMAT_R16G16B16_UINT:
1960 case VK_FORMAT_R16G16B16A16_UINT:
1961 return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1962 case VK_FORMAT_R64_SINT:
1963 case VK_FORMAT_R64G64_SINT:
1964 case VK_FORMAT_R64G64B64_SINT:
1965 case VK_FORMAT_R64G64B64A64_SINT:
1966 case VK_FORMAT_R64_UINT:
1967 case VK_FORMAT_R64G64_UINT:
1968 case VK_FORMAT_R64G64B64_UINT:
1969 case VK_FORMAT_R64G64B64A64_UINT:
1970 return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1971 case VK_FORMAT_R16_SFLOAT:
1972 case VK_FORMAT_R16G16_SFLOAT:
1973 case VK_FORMAT_R16G16B16_SFLOAT:
1974 case VK_FORMAT_R16G16B16A16_SFLOAT:
1975 return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1976 }
1977 }
1978
getAllFormats()1979 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats()
1980 {
1981 std::vector<VkFormat> formats;
1982
1983 formats.push_back(VK_FORMAT_R8_SINT);
1984 formats.push_back(VK_FORMAT_R8G8_SINT);
1985 formats.push_back(VK_FORMAT_R8G8B8_SINT);
1986 formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1987 formats.push_back(VK_FORMAT_R8_UINT);
1988 formats.push_back(VK_FORMAT_R8G8_UINT);
1989 formats.push_back(VK_FORMAT_R8G8B8_UINT);
1990 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1991 formats.push_back(VK_FORMAT_R16_SINT);
1992 formats.push_back(VK_FORMAT_R16G16_SINT);
1993 formats.push_back(VK_FORMAT_R16G16B16_SINT);
1994 formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1995 formats.push_back(VK_FORMAT_R16_UINT);
1996 formats.push_back(VK_FORMAT_R16G16_UINT);
1997 formats.push_back(VK_FORMAT_R16G16B16_UINT);
1998 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
1999 formats.push_back(VK_FORMAT_R32_SINT);
2000 formats.push_back(VK_FORMAT_R32G32_SINT);
2001 formats.push_back(VK_FORMAT_R32G32B32_SINT);
2002 formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
2003 formats.push_back(VK_FORMAT_R32_UINT);
2004 formats.push_back(VK_FORMAT_R32G32_UINT);
2005 formats.push_back(VK_FORMAT_R32G32B32_UINT);
2006 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
2007 formats.push_back(VK_FORMAT_R64_SINT);
2008 formats.push_back(VK_FORMAT_R64G64_SINT);
2009 formats.push_back(VK_FORMAT_R64G64B64_SINT);
2010 formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
2011 formats.push_back(VK_FORMAT_R64_UINT);
2012 formats.push_back(VK_FORMAT_R64G64_UINT);
2013 formats.push_back(VK_FORMAT_R64G64B64_UINT);
2014 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
2015 formats.push_back(VK_FORMAT_R16_SFLOAT);
2016 formats.push_back(VK_FORMAT_R16G16_SFLOAT);
2017 formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
2018 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
2019 formats.push_back(VK_FORMAT_R32_SFLOAT);
2020 formats.push_back(VK_FORMAT_R32G32_SFLOAT);
2021 formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
2022 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
2023 formats.push_back(VK_FORMAT_R64_SFLOAT);
2024 formats.push_back(VK_FORMAT_R64G64_SFLOAT);
2025 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
2026 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
2027 formats.push_back(VK_FORMAT_R8_USCALED);
2028 formats.push_back(VK_FORMAT_R8G8_USCALED);
2029 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
2030 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
2031
2032 return formats;
2033 }
2034
isFormatSigned(VkFormat format)2035 bool vkt::subgroups::isFormatSigned(VkFormat format)
2036 {
2037 switch (format)
2038 {
2039 default:
2040 return false;
2041 case VK_FORMAT_R8_SINT:
2042 case VK_FORMAT_R8G8_SINT:
2043 case VK_FORMAT_R8G8B8_SINT:
2044 case VK_FORMAT_R8G8B8A8_SINT:
2045 case VK_FORMAT_R16_SINT:
2046 case VK_FORMAT_R16G16_SINT:
2047 case VK_FORMAT_R16G16B16_SINT:
2048 case VK_FORMAT_R16G16B16A16_SINT:
2049 case VK_FORMAT_R32_SINT:
2050 case VK_FORMAT_R32G32_SINT:
2051 case VK_FORMAT_R32G32B32_SINT:
2052 case VK_FORMAT_R32G32B32A32_SINT:
2053 case VK_FORMAT_R64_SINT:
2054 case VK_FORMAT_R64G64_SINT:
2055 case VK_FORMAT_R64G64B64_SINT:
2056 case VK_FORMAT_R64G64B64A64_SINT:
2057 return true;
2058 }
2059 }
2060
isFormatUnsigned(VkFormat format)2061 bool vkt::subgroups::isFormatUnsigned(VkFormat format)
2062 {
2063 switch (format)
2064 {
2065 default:
2066 return false;
2067 case VK_FORMAT_R8_UINT:
2068 case VK_FORMAT_R8G8_UINT:
2069 case VK_FORMAT_R8G8B8_UINT:
2070 case VK_FORMAT_R8G8B8A8_UINT:
2071 case VK_FORMAT_R16_UINT:
2072 case VK_FORMAT_R16G16_UINT:
2073 case VK_FORMAT_R16G16B16_UINT:
2074 case VK_FORMAT_R16G16B16A16_UINT:
2075 case VK_FORMAT_R32_UINT:
2076 case VK_FORMAT_R32G32_UINT:
2077 case VK_FORMAT_R32G32B32_UINT:
2078 case VK_FORMAT_R32G32B32A32_UINT:
2079 case VK_FORMAT_R64_UINT:
2080 case VK_FORMAT_R64G64_UINT:
2081 case VK_FORMAT_R64G64B64_UINT:
2082 case VK_FORMAT_R64G64B64A64_UINT:
2083 return true;
2084 }
2085 }
2086
isFormatFloat(VkFormat format)2087 bool vkt::subgroups::isFormatFloat(VkFormat format)
2088 {
2089 switch (format)
2090 {
2091 default:
2092 return false;
2093 case VK_FORMAT_R16_SFLOAT:
2094 case VK_FORMAT_R16G16_SFLOAT:
2095 case VK_FORMAT_R16G16B16_SFLOAT:
2096 case VK_FORMAT_R16G16B16A16_SFLOAT:
2097 case VK_FORMAT_R32_SFLOAT:
2098 case VK_FORMAT_R32G32_SFLOAT:
2099 case VK_FORMAT_R32G32B32_SFLOAT:
2100 case VK_FORMAT_R32G32B32A32_SFLOAT:
2101 case VK_FORMAT_R64_SFLOAT:
2102 case VK_FORMAT_R64G64_SFLOAT:
2103 case VK_FORMAT_R64G64B64_SFLOAT:
2104 case VK_FORMAT_R64G64B64A64_SFLOAT:
2105 return true;
2106 }
2107 }
2108
isFormatBool(VkFormat format)2109 bool vkt::subgroups::isFormatBool(VkFormat format)
2110 {
2111 switch (format)
2112 {
2113 default:
2114 return false;
2115 case VK_FORMAT_R8_USCALED:
2116 case VK_FORMAT_R8G8_USCALED:
2117 case VK_FORMAT_R8G8B8_USCALED:
2118 case VK_FORMAT_R8G8B8A8_USCALED:
2119 return true;
2120 }
2121 }
2122
isFormat8bitTy(VkFormat format)2123 bool vkt::subgroups::isFormat8bitTy(VkFormat format)
2124 {
2125 switch (format)
2126 {
2127 default:
2128 return false;
2129 case VK_FORMAT_R8_SINT:
2130 case VK_FORMAT_R8G8_SINT:
2131 case VK_FORMAT_R8G8B8_SINT:
2132 case VK_FORMAT_R8G8B8A8_SINT:
2133 case VK_FORMAT_R8_UINT:
2134 case VK_FORMAT_R8G8_UINT:
2135 case VK_FORMAT_R8G8B8_UINT:
2136 case VK_FORMAT_R8G8B8A8_UINT:
2137 return true;
2138 }
2139 }
2140
isFormat16BitTy(VkFormat format)2141 bool vkt::subgroups::isFormat16BitTy(VkFormat format)
2142 {
2143 switch (format)
2144 {
2145 default:
2146 return false;
2147 case VK_FORMAT_R16_SFLOAT:
2148 case VK_FORMAT_R16G16_SFLOAT:
2149 case VK_FORMAT_R16G16B16_SFLOAT:
2150 case VK_FORMAT_R16G16B16A16_SFLOAT:
2151 case VK_FORMAT_R16_SINT:
2152 case VK_FORMAT_R16G16_SINT:
2153 case VK_FORMAT_R16G16B16_SINT:
2154 case VK_FORMAT_R16G16B16A16_SINT:
2155 case VK_FORMAT_R16_UINT:
2156 case VK_FORMAT_R16G16_UINT:
2157 case VK_FORMAT_R16G16B16_UINT:
2158 case VK_FORMAT_R16G16B16A16_UINT:
2159 return true;
2160 }
2161 }
2162
setVertexShaderFrameBuffer(SourceCollections & programCollection)2163 void vkt::subgroups::setVertexShaderFrameBuffer(SourceCollections &programCollection)
2164 {
2165 /*
2166 "layout(location = 0) in highp vec4 in_position;\n"
2167 "void main (void)\n"
2168 "{\n"
2169 " gl_Position = in_position;\n"
2170 " gl_PointSize = 1.0f;\n"
2171 "}\n";
2172 */
2173 programCollection.spirvAsmSources.add("vert") << "; SPIR-V\n"
2174 "; Version: 1.3\n"
2175 "; Generator: Khronos Glslang Reference Front End; 7\n"
2176 "; Bound: 25\n"
2177 "; Schema: 0\n"
2178 "OpCapability Shader\n"
2179 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2180 "OpMemoryModel Logical GLSL450\n"
2181 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2182 "OpMemberDecorate %11 0 BuiltIn Position\n"
2183 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2184 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2185 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2186 "OpDecorate %11 Block\n"
2187 "OpDecorate %17 Location 0\n"
2188 "%2 = OpTypeVoid\n"
2189 "%3 = OpTypeFunction %2\n"
2190 "%6 = OpTypeFloat 32\n"
2191 "%7 = OpTypeVector %6 4\n"
2192 "%8 = OpTypeInt 32 0\n"
2193 "%9 = OpConstant %8 1\n"
2194 "%10 = OpTypeArray %6 %9\n"
2195 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2196 "%12 = OpTypePointer Output %11\n"
2197 "%13 = OpVariable %12 Output\n"
2198 "%14 = OpTypeInt 32 1\n"
2199 "%15 = OpConstant %14 0\n"
2200 "%16 = OpTypePointer Input %7\n"
2201 "%17 = OpVariable %16 Input\n"
2202 "%19 = OpTypePointer Output %7\n"
2203 "%21 = OpConstant %14 1\n"
2204 "%22 = OpConstant %6 1\n"
2205 "%23 = OpTypePointer Output %6\n"
2206 "%4 = OpFunction %2 None %3\n"
2207 "%5 = OpLabel\n"
2208 "%18 = OpLoad %7 %17\n"
2209 "%20 = OpAccessChain %19 %13 %15\n"
2210 "OpStore %20 %18\n"
2211 "%24 = OpAccessChain %23 %13 %21\n"
2212 "OpStore %24 %22\n"
2213 "OpReturn\n"
2214 "OpFunctionEnd\n";
2215 }
2216
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2217 void vkt::subgroups::setFragmentShaderFrameBuffer(vk::SourceCollections &programCollection)
2218 {
2219 /*
2220 "layout(location = 0) in float in_color;\n"
2221 "layout(location = 0) out uint out_color;\n"
2222 "void main()\n"
2223 {\n"
2224 " out_color = uint(in_color);\n"
2225 "}\n";
2226 */
2227 programCollection.spirvAsmSources.add("fragment") << "; SPIR-V\n"
2228 "; Version: 1.3\n"
2229 "; Generator: Khronos Glslang Reference Front End; 2\n"
2230 "; Bound: 14\n"
2231 "; Schema: 0\n"
2232 "OpCapability Shader\n"
2233 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2234 "OpMemoryModel Logical GLSL450\n"
2235 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2236 "OpExecutionMode %4 OriginUpperLeft\n"
2237 "OpDecorate %8 Location 0\n"
2238 "OpDecorate %11 Location 0\n"
2239 "%2 = OpTypeVoid\n"
2240 "%3 = OpTypeFunction %2\n"
2241 "%6 = OpTypeInt 32 0\n"
2242 "%7 = OpTypePointer Output %6\n"
2243 "%8 = OpVariable %7 Output\n"
2244 "%9 = OpTypeFloat 32\n"
2245 "%10 = OpTypePointer Input %9\n"
2246 "%11 = OpVariable %10 Input\n"
2247 "%4 = OpFunction %2 None %3\n"
2248 "%5 = OpLabel\n"
2249 "%12 = OpLoad %9 %11\n"
2250 "%13 = OpConvertFToU %6 %12\n"
2251 "OpStore %8 %13\n"
2252 "OpReturn\n"
2253 "OpFunctionEnd\n";
2254 }
2255
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2256 void vkt::subgroups::setTesCtrlShaderFrameBuffer(vk::SourceCollections &programCollection)
2257 {
2258 /*
2259 "#extension GL_KHR_shader_subgroup_basic: enable\n"
2260 "#extension GL_EXT_tessellation_shader : require\n"
2261 "layout(vertices = 2) out;\n"
2262 "void main (void)\n"
2263 "{\n"
2264 " if (gl_InvocationID == 0)\n"
2265 " {\n"
2266 " gl_TessLevelOuter[0] = 1.0f;\n"
2267 " gl_TessLevelOuter[1] = 1.0f;\n"
2268 " }\n"
2269 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2270 "}\n";
2271 */
2272 programCollection.spirvAsmSources.add("tesc") << "; SPIR-V\n"
2273 "; Version: 1.3\n"
2274 "; Generator: Khronos Glslang Reference Front End; 2\n"
2275 "; Bound: 46\n"
2276 "; Schema: 0\n"
2277 "OpCapability Tessellation\n"
2278 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2279 "OpMemoryModel Logical GLSL450\n"
2280 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2281 "OpExecutionMode %4 OutputVertices 2\n"
2282 "OpDecorate %8 BuiltIn InvocationId\n"
2283 "OpDecorate %20 Patch\n"
2284 "OpDecorate %20 BuiltIn TessLevelOuter\n"
2285 "OpMemberDecorate %29 0 BuiltIn Position\n"
2286 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
2287 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2288 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2289 "OpDecorate %29 Block\n"
2290 "OpMemberDecorate %35 0 BuiltIn Position\n"
2291 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
2292 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2293 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2294 "OpDecorate %35 Block\n"
2295 "%2 = OpTypeVoid\n"
2296 "%3 = OpTypeFunction %2\n"
2297 "%6 = OpTypeInt 32 1\n"
2298 "%7 = OpTypePointer Input %6\n"
2299 "%8 = OpVariable %7 Input\n"
2300 "%10 = OpConstant %6 0\n"
2301 "%11 = OpTypeBool\n"
2302 "%15 = OpTypeFloat 32\n"
2303 "%16 = OpTypeInt 32 0\n"
2304 "%17 = OpConstant %16 4\n"
2305 "%18 = OpTypeArray %15 %17\n"
2306 "%19 = OpTypePointer Output %18\n"
2307 "%20 = OpVariable %19 Output\n"
2308 "%21 = OpConstant %15 1\n"
2309 "%22 = OpTypePointer Output %15\n"
2310 "%24 = OpConstant %6 1\n"
2311 "%26 = OpTypeVector %15 4\n"
2312 "%27 = OpConstant %16 1\n"
2313 "%28 = OpTypeArray %15 %27\n"
2314 "%29 = OpTypeStruct %26 %15 %28 %28\n"
2315 "%30 = OpConstant %16 2\n"
2316 "%31 = OpTypeArray %29 %30\n"
2317 "%32 = OpTypePointer Output %31\n"
2318 "%33 = OpVariable %32 Output\n"
2319 "%35 = OpTypeStruct %26 %15 %28 %28\n"
2320 "%36 = OpConstant %16 32\n"
2321 "%37 = OpTypeArray %35 %36\n"
2322 "%38 = OpTypePointer Input %37\n"
2323 "%39 = OpVariable %38 Input\n"
2324 "%41 = OpTypePointer Input %26\n"
2325 "%44 = OpTypePointer Output %26\n"
2326 "%4 = OpFunction %2 None %3\n"
2327 "%5 = OpLabel\n"
2328 "%9 = OpLoad %6 %8\n"
2329 "%12 = OpIEqual %11 %9 %10\n"
2330 "OpSelectionMerge %14 None\n"
2331 "OpBranchConditional %12 %13 %14\n"
2332 "%13 = OpLabel\n"
2333 "%23 = OpAccessChain %22 %20 %10\n"
2334 "OpStore %23 %21\n"
2335 "%25 = OpAccessChain %22 %20 %24\n"
2336 "OpStore %25 %21\n"
2337 "OpBranch %14\n"
2338 "%14 = OpLabel\n"
2339 "%34 = OpLoad %6 %8\n"
2340 "%40 = OpLoad %6 %8\n"
2341 "%42 = OpAccessChain %41 %39 %40 %10\n"
2342 "%43 = OpLoad %26 %42\n"
2343 "%45 = OpAccessChain %44 %33 %34 %10\n"
2344 "OpStore %45 %43\n"
2345 "OpReturn\n"
2346 "OpFunctionEnd\n";
2347 }
2348
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2349 void vkt::subgroups::setTesEvalShaderFrameBuffer(vk::SourceCollections &programCollection)
2350 {
2351 /*
2352 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
2353 "#extension GL_EXT_tessellation_shader : require\n"
2354 "layout(isolines, equal_spacing, ccw ) in;\n"
2355 "layout(location = 0) in float in_color[];\n"
2356 "layout(location = 0) out float out_color;\n"
2357 "\n"
2358 "void main (void)\n"
2359 "{\n"
2360 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2361 " out_color = in_color[0];\n"
2362 "}\n";
2363 */
2364 programCollection.spirvAsmSources.add("tese")
2365 << "; SPIR-V\n"
2366 "; Version: 1.3\n"
2367 "; Generator: Khronos Glslang Reference Front End; 2\n"
2368 "; Bound: 45\n"
2369 "; Schema: 0\n"
2370 "OpCapability Tessellation\n"
2371 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2372 "OpMemoryModel Logical GLSL450\n"
2373 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2374 "OpExecutionMode %4 Isolines\n"
2375 "OpExecutionMode %4 SpacingEqual\n"
2376 "OpExecutionMode %4 VertexOrderCcw\n"
2377 "OpMemberDecorate %11 0 BuiltIn Position\n"
2378 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2379 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2380 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2381 "OpDecorate %11 Block\n"
2382 "OpMemberDecorate %16 0 BuiltIn Position\n"
2383 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
2384 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2385 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2386 "OpDecorate %16 Block\n"
2387 "OpDecorate %29 BuiltIn TessCoord\n"
2388 "OpDecorate %39 Location 0\n"
2389 "OpDecorate %42 Location 0\n"
2390 "%2 = OpTypeVoid\n"
2391 "%3 = OpTypeFunction %2\n"
2392 "%6 = OpTypeFloat 32\n"
2393 "%7 = OpTypeVector %6 4\n"
2394 "%8 = OpTypeInt 32 0\n"
2395 "%9 = OpConstant %8 1\n"
2396 "%10 = OpTypeArray %6 %9\n"
2397 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2398 "%12 = OpTypePointer Output %11\n"
2399 "%13 = OpVariable %12 Output\n"
2400 "%14 = OpTypeInt 32 1\n"
2401 "%15 = OpConstant %14 0\n"
2402 "%16 = OpTypeStruct %7 %6 %10 %10\n"
2403 "%17 = OpConstant %8 32\n"
2404 "%18 = OpTypeArray %16 %17\n"
2405 "%19 = OpTypePointer Input %18\n"
2406 "%20 = OpVariable %19 Input\n"
2407 "%21 = OpTypePointer Input %7\n"
2408 "%24 = OpConstant %14 1\n"
2409 "%27 = OpTypeVector %6 3\n"
2410 "%28 = OpTypePointer Input %27\n"
2411 "%29 = OpVariable %28 Input\n"
2412 "%30 = OpConstant %8 0\n"
2413 "%31 = OpTypePointer Input %6\n"
2414 "%36 = OpTypePointer Output %7\n"
2415 "%38 = OpTypePointer Output %6\n"
2416 "%39 = OpVariable %38 Output\n"
2417 "%40 = OpTypeArray %6 %17\n"
2418 "%41 = OpTypePointer Input %40\n"
2419 "%42 = OpVariable %41 Input\n"
2420 "%4 = OpFunction %2 None %3\n"
2421 "%5 = OpLabel\n"
2422 "%22 = OpAccessChain %21 %20 %15 %15\n"
2423 "%23 = OpLoad %7 %22\n"
2424 "%25 = OpAccessChain %21 %20 %24 %15\n"
2425 "%26 = OpLoad %7 %25\n"
2426 "%32 = OpAccessChain %31 %29 %30\n"
2427 "%33 = OpLoad %6 %32\n"
2428 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2429 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2430 "%37 = OpAccessChain %36 %13 %15\n"
2431 "OpStore %37 %35\n"
2432 "%43 = OpAccessChain %31 %42 %15\n"
2433 "%44 = OpLoad %6 %43\n"
2434 "OpStore %39 %44\n"
2435 "OpReturn\n"
2436 "OpFunctionEnd\n";
2437 }
2438
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2439 void vkt::subgroups::addGeometryShadersFromTemplate(const std::string &glslTemplate,
2440 const vk::ShaderBuildOptions &options,
2441 vk::GlslSourceCollection &collection)
2442 {
2443 tcu::StringTemplate geometryTemplate(glslTemplate);
2444
2445 map<string, string> linesParams;
2446 linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2447
2448 map<string, string> pointsParams;
2449 pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2450
2451 collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
2452 collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
2453 }
2454
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2455 void vkt::subgroups::addGeometryShadersFromTemplate(const std::string &spirvTemplate,
2456 const vk::SpirVAsmBuildOptions &options,
2457 vk::SpirVAsmCollection &collection)
2458 {
2459 tcu::StringTemplate geometryTemplate(spirvTemplate);
2460
2461 map<string, string> linesParams;
2462 linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2463
2464 map<string, string> pointsParams;
2465 pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2466
2467 collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
2468 collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
2469 }
2470
initializeMemory(Context & context,const Allocation & alloc,const subgroups::SSBOData & data)2471 void initializeMemory(Context &context, const Allocation &alloc, const subgroups::SSBOData &data)
2472 {
2473 const vk::VkFormat format = data.format;
2474 const vk::VkDeviceSize size =
2475 data.numElements * (data.isImage() ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2476 if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2477 {
2478 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2479
2480 switch (format)
2481 {
2482 default:
2483 DE_FATAL("Illegal buffer format");
2484 break;
2485 case VK_FORMAT_R8_SINT:
2486 case VK_FORMAT_R8G8_SINT:
2487 case VK_FORMAT_R8G8B8_SINT:
2488 case VK_FORMAT_R8G8B8A8_SINT:
2489 case VK_FORMAT_R8_UINT:
2490 case VK_FORMAT_R8G8_UINT:
2491 case VK_FORMAT_R8G8B8_UINT:
2492 case VK_FORMAT_R8G8B8A8_UINT:
2493 {
2494 uint8_t *ptr = reinterpret_cast<uint8_t *>(alloc.getHostPtr());
2495
2496 for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint8_t)); k++)
2497 {
2498 ptr[k] = rnd.getUint8();
2499 }
2500 }
2501 break;
2502 case VK_FORMAT_R16_SINT:
2503 case VK_FORMAT_R16G16_SINT:
2504 case VK_FORMAT_R16G16B16_SINT:
2505 case VK_FORMAT_R16G16B16A16_SINT:
2506 case VK_FORMAT_R16_UINT:
2507 case VK_FORMAT_R16G16_UINT:
2508 case VK_FORMAT_R16G16B16_UINT:
2509 case VK_FORMAT_R16G16B16A16_UINT:
2510 {
2511 uint16_t *ptr = reinterpret_cast<uint16_t *>(alloc.getHostPtr());
2512
2513 for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint16_t)); k++)
2514 {
2515 ptr[k] = rnd.getUint16();
2516 }
2517 }
2518 break;
2519 case VK_FORMAT_R8_USCALED:
2520 case VK_FORMAT_R8G8_USCALED:
2521 case VK_FORMAT_R8G8B8_USCALED:
2522 case VK_FORMAT_R8G8B8A8_USCALED:
2523 {
2524 uint32_t *ptr = reinterpret_cast<uint32_t *>(alloc.getHostPtr());
2525
2526 for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint32_t)); k++)
2527 {
2528 uint32_t r = rnd.getUint32();
2529 ptr[k] = (r & 1) ? r : 0;
2530 }
2531 }
2532 break;
2533 case VK_FORMAT_R32_SINT:
2534 case VK_FORMAT_R32G32_SINT:
2535 case VK_FORMAT_R32G32B32_SINT:
2536 case VK_FORMAT_R32G32B32A32_SINT:
2537 case VK_FORMAT_R32_UINT:
2538 case VK_FORMAT_R32G32_UINT:
2539 case VK_FORMAT_R32G32B32_UINT:
2540 case VK_FORMAT_R32G32B32A32_UINT:
2541 {
2542 uint32_t *ptr = reinterpret_cast<uint32_t *>(alloc.getHostPtr());
2543
2544 for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint32_t)); k++)
2545 {
2546 ptr[k] = rnd.getUint32();
2547 }
2548 }
2549 break;
2550 case VK_FORMAT_R64_SINT:
2551 case VK_FORMAT_R64G64_SINT:
2552 case VK_FORMAT_R64G64B64_SINT:
2553 case VK_FORMAT_R64G64B64A64_SINT:
2554 case VK_FORMAT_R64_UINT:
2555 case VK_FORMAT_R64G64_UINT:
2556 case VK_FORMAT_R64G64B64_UINT:
2557 case VK_FORMAT_R64G64B64A64_UINT:
2558 {
2559 uint64_t *ptr = reinterpret_cast<uint64_t *>(alloc.getHostPtr());
2560
2561 for (vk::VkDeviceSize k = 0; k < (size / sizeof(uint64_t)); k++)
2562 {
2563 ptr[k] = rnd.getUint64();
2564 }
2565 }
2566 break;
2567 case VK_FORMAT_R16_SFLOAT:
2568 case VK_FORMAT_R16G16_SFLOAT:
2569 case VK_FORMAT_R16G16B16_SFLOAT:
2570 case VK_FORMAT_R16G16B16A16_SFLOAT:
2571 {
2572 float16_t *const ptr = reinterpret_cast<float16_t *>(alloc.getHostPtr());
2573
2574 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float16_t)); k++)
2575 {
2576 ptr[k] = tcu::Float16(rnd.getFloat()).bits();
2577 }
2578 }
2579 break;
2580 case VK_FORMAT_R32_SFLOAT:
2581 case VK_FORMAT_R32G32_SFLOAT:
2582 case VK_FORMAT_R32G32B32_SFLOAT:
2583 case VK_FORMAT_R32G32B32A32_SFLOAT:
2584 {
2585 float *ptr = reinterpret_cast<float *>(alloc.getHostPtr());
2586
2587 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2588 {
2589 ptr[k] = rnd.getFloat();
2590 }
2591 }
2592 break;
2593 case VK_FORMAT_R64_SFLOAT:
2594 case VK_FORMAT_R64G64_SFLOAT:
2595 case VK_FORMAT_R64G64B64_SFLOAT:
2596 case VK_FORMAT_R64G64B64A64_SFLOAT:
2597 {
2598 double *ptr = reinterpret_cast<double *>(alloc.getHostPtr());
2599
2600 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2601 {
2602 ptr[k] = rnd.getDouble();
2603 }
2604 }
2605 break;
2606 }
2607 }
2608 else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2609 {
2610 uint32_t *ptr = reinterpret_cast<uint32_t *>(alloc.getHostPtr());
2611
2612 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2613 {
2614 ptr[k] = 0;
2615 }
2616 }
2617
2618 if (subgroups::SSBOData::InitializeNone != data.initializeType)
2619 {
2620 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2621 }
2622 }
2623
getResultBinding(const VkShaderStageFlagBits shaderStage)2624 uint32_t getResultBinding(const VkShaderStageFlagBits shaderStage)
2625 {
2626 switch (shaderStage)
2627 {
2628 case VK_SHADER_STAGE_VERTEX_BIT:
2629 return 0u;
2630 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2631 return 1u;
2632 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2633 return 2u;
2634 case VK_SHADER_STAGE_GEOMETRY_BIT:
2635 return 3u;
2636 default:
2637 DE_ASSERT(0);
2638 return -1;
2639 }
2640 DE_ASSERT(0);
2641 return -1;
2642 }
2643
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage)2644 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
2645 Context &context, VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
2646 subgroups::CheckResult checkResult, const VkShaderStageFlags shaderStage)
2647 {
2648 return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
2649 context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2650 }
2651
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage,const uint32_t tessShaderStageCreateFlags,const uint32_t requiredSubgroupSize)2652 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
2653 Context &context, VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
2654 subgroups::CheckResult checkResult, const VkShaderStageFlags shaderStage, const uint32_t tessShaderStageCreateFlags,
2655 const uint32_t requiredSubgroupSize)
2656 {
2657 const DeviceInterface &vk = context.getDeviceInterface();
2658 const VkDevice device = context.getDevice();
2659 const uint32_t maxWidth = getMaxWidth();
2660 vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDataCount);
2661 DescriptorSetLayoutBuilder layoutBuilder;
2662 DescriptorPoolBuilder poolBuilder;
2663 DescriptorSetUpdateBuilder updateBuilder;
2664 Move<VkDescriptorPool> descriptorPool;
2665 Move<VkDescriptorSet> descriptorSet;
2666 const Unique<VkShaderModule> vertexShaderModule(
2667 createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2668 const Unique<VkShaderModule> teCtrlShaderModule(
2669 createShaderModule(vk, device, context.getBinaryCollection().get("tesc"), 0u));
2670 const Unique<VkShaderModule> teEvalShaderModule(
2671 createShaderModule(vk, device, context.getBinaryCollection().get("tese"), 0u));
2672 const Unique<VkShaderModule> fragmentShaderModule(
2673 createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2674 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2675 const VkVertexInputBindingDescription vertexInputBinding = {
2676 0u, // uint32_t binding;
2677 static_cast<uint32_t>(sizeof(tcu::Vec4)), // uint32_t stride;
2678 VK_VERTEX_INPUT_RATE_VERTEX // VkVertexInputRate inputRate;
2679 };
2680 const VkVertexInputAttributeDescription vertexInputAttribute = {
2681 0u, // uint32_t location;
2682 0u, // uint32_t binding;
2683 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
2684 0u // uint32_t offset;
2685 };
2686
2687 for (uint32_t i = 0u; i < extraDataCount; i++)
2688 {
2689 if (extraData[i].isImage())
2690 {
2691 inputBuffers[i] = de::SharedPtr<BufferOrImage>(
2692 new Image(context, static_cast<uint32_t>(extraData[i].numElements), 1u, extraData[i].format));
2693 }
2694 else
2695 {
2696 DE_ASSERT(extraData[i].isUBO());
2697 vk::VkDeviceSize size =
2698 getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2699 inputBuffers[i] =
2700 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2701 }
2702 const Allocation &alloc = inputBuffers[i]->getAllocation();
2703 initializeMemory(context, alloc, extraData[i]);
2704 }
2705
2706 for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2707 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, nullptr);
2708
2709 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
2710
2711 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2712
2713 const uint32_t requiredSubgroupSizes[5] = {
2714 0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2715 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u), 0u, 0u};
2716
2717 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
2718 context, *pipelineLayout,
2719 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
2720 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2721 *vertexShaderModule, *fragmentShaderModule, VK_NULL_HANDLE, *teCtrlShaderModule, *teEvalShaderModule,
2722 *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format, 0u,
2723 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2724 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u), 0u, 0u,
2725 requiredSubgroupSize != 0u ? requiredSubgroupSizes : nullptr));
2726
2727 for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2728 poolBuilder.addType(inputBuffers[ndx]->getType());
2729
2730 if (extraDataCount > 0)
2731 {
2732 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2733 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2734 }
2735
2736 for (uint32_t buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2737 {
2738 if (inputBuffers[buffersNdx]->isImage())
2739 {
2740 VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2741 inputBuffers[buffersNdx]->getAsImage()->getImageView(),
2742 VK_IMAGE_LAYOUT_GENERAL);
2743
2744 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2745 inputBuffers[buffersNdx]->getType(), &info);
2746 }
2747 else
2748 {
2749 VkDescriptorBufferInfo info =
2750 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 0ull,
2751 inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2752
2753 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2754 inputBuffers[buffersNdx]->getType(), &info);
2755 }
2756 }
2757
2758 updateBuilder.update(vk, device);
2759
2760 const VkQueue queue = context.getUniversalQueue();
2761 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2762 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2763 const uint32_t subgroupSize = getSubgroupSize(context);
2764 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
2765 const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
2766 Buffer vertexBuffer(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2767 unsigned totalIterations = 0u;
2768 unsigned failedIterations = 0u;
2769 Image discardableImage(context, maxWidth, 1u, format,
2770 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2771
2772 {
2773 const Allocation &alloc = vertexBuffer.getAllocation();
2774 std::vector<tcu::Vec4> data(2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2775 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2776 float leftHandPosition = -1.0f;
2777
2778 for (uint32_t ndx = 0u; ndx < data.size(); ndx += 2u)
2779 {
2780 data[ndx][0] = leftHandPosition;
2781 leftHandPosition += pixelSize;
2782 data[ndx + 1][0] = leftHandPosition;
2783 }
2784
2785 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2786 flushAlloc(vk, device, alloc);
2787 }
2788
2789 const Unique<VkFramebuffer> framebuffer(
2790 makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2791 const VkViewport viewport = makeViewport(maxWidth, 1u);
2792 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2793 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2794 Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2795 const VkDeviceSize vertexBufferOffset = 0u;
2796
2797 for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
2798 {
2799 totalIterations++;
2800
2801 beginCommandBuffer(vk, *cmdBuffer);
2802 {
2803
2804 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2805 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2806
2807 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2808
2809 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2810
2811 if (extraDataCount > 0)
2812 {
2813 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2814 &descriptorSet.get(), 0u, nullptr);
2815 }
2816
2817 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2818 vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2819
2820 endRenderPass(vk, *cmdBuffer);
2821
2822 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(),
2823 tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
2824 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2825 endCommandBuffer(vk, *cmdBuffer);
2826
2827 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2828 }
2829 context.resetCommandPoolForVKSC(device, *cmdPool);
2830
2831 {
2832 const Allocation &allocResult = imageBufferResult.getAllocation();
2833 invalidateAlloc(vk, device, allocResult);
2834
2835 std::vector<const void *> datas;
2836 datas.push_back(allocResult.getHostPtr());
2837 if (!checkResult(internalData, datas, width / 2u, subgroupSize))
2838 failedIterations++;
2839 }
2840 }
2841
2842 if (0 < failedIterations)
2843 {
2844 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2845
2846 context.getTestContext().getLog()
2847 << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
2848 return tcu::TestStatus::fail("Failed!");
2849 }
2850
2851 return tcu::TestStatus::pass("OK");
2852 }
2853
check(std::vector<const void * > datas,uint32_t width,uint32_t ref)2854 bool vkt::subgroups::check(std::vector<const void *> datas, uint32_t width, uint32_t ref)
2855 {
2856 const uint32_t *data = reinterpret_cast<const uint32_t *>(datas[0]);
2857
2858 for (uint32_t n = 0; n < width; ++n)
2859 {
2860 if (data[n] != ref)
2861 {
2862 return false;
2863 }
2864 }
2865
2866 return true;
2867 }
2868
checkComputeOrMesh(std::vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t ref)2869 bool vkt::subgroups::checkComputeOrMesh(std::vector<const void *> datas, const uint32_t numWorkgroups[3],
2870 const uint32_t localSize[3], uint32_t ref)
2871 {
2872 const uint32_t globalSizeX = numWorkgroups[0] * localSize[0];
2873 const uint32_t globalSizeY = numWorkgroups[1] * localSize[1];
2874 const uint32_t globalSizeZ = numWorkgroups[2] * localSize[2];
2875
2876 return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2877 }
2878
makeGeometryFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult)2879 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(Context &context, VkFormat format,
2880 const SSBOData *extraData, uint32_t extraDataCount,
2881 const void *internalData,
2882 subgroups::CheckResult checkResult)
2883 {
2884 return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData,
2885 checkResult, 0u, 0u);
2886 }
2887
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const uint32_t geometryShaderStageCreateFlags,const uint32_t requiredSubgroupSize)2888 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(
2889 Context &context, VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
2890 subgroups::CheckResult checkResult, const uint32_t geometryShaderStageCreateFlags,
2891 const uint32_t requiredSubgroupSize)
2892 {
2893 const DeviceInterface &vk = context.getDeviceInterface();
2894 const VkDevice device = context.getDevice();
2895 const uint32_t maxWidth = getMaxWidth();
2896 vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDataCount);
2897 DescriptorSetLayoutBuilder layoutBuilder;
2898 DescriptorPoolBuilder poolBuilder;
2899 DescriptorSetUpdateBuilder updateBuilder;
2900 Move<VkDescriptorPool> descriptorPool;
2901 Move<VkDescriptorSet> descriptorSet;
2902 const Unique<VkShaderModule> vertexShaderModule(
2903 createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2904 const Unique<VkShaderModule> geometryShaderModule(
2905 createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2906 const Unique<VkShaderModule> fragmentShaderModule(
2907 createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2908 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2909 const VkVertexInputBindingDescription vertexInputBinding = {
2910 0u, // uint32_t binding;
2911 static_cast<uint32_t>(sizeof(tcu::Vec4)), // uint32_t stride;
2912 VK_VERTEX_INPUT_RATE_VERTEX // VkVertexInputRate inputRate;
2913 };
2914 const VkVertexInputAttributeDescription vertexInputAttribute = {
2915 0u, // uint32_t location;
2916 0u, // uint32_t binding;
2917 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
2918 0u // uint32_t offset;
2919 };
2920
2921 for (uint32_t i = 0u; i < extraDataCount; i++)
2922 {
2923 if (extraData[i].isImage())
2924 {
2925 inputBuffers[i] = de::SharedPtr<BufferOrImage>(
2926 new Image(context, static_cast<uint32_t>(extraData[i].numElements), 1u, extraData[i].format));
2927 }
2928 else
2929 {
2930 DE_ASSERT(extraData[i].isUBO());
2931 vk::VkDeviceSize size =
2932 getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2933 inputBuffers[i] =
2934 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2935 }
2936 const Allocation &alloc = inputBuffers[i]->getAllocation();
2937 initializeMemory(context, alloc, extraData[i]);
2938 }
2939
2940 for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2941 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, nullptr);
2942
2943 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
2944
2945 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
2946
2947 const uint32_t requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2948
2949 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
2950 context, *pipelineLayout,
2951 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT, *vertexShaderModule,
2952 *fragmentShaderModule, *geometryShaderModule, VK_NULL_HANDLE, VK_NULL_HANDLE, *renderPass,
2953 VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format, 0u, 0u, 0u,
2954 geometryShaderStageCreateFlags, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : nullptr));
2955
2956 for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
2957 poolBuilder.addType(inputBuffers[ndx]->getType());
2958
2959 if (extraDataCount > 0)
2960 {
2961 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2962 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2963 }
2964
2965 for (uint32_t buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2966 {
2967 if (inputBuffers[buffersNdx]->isImage())
2968 {
2969 VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2970 inputBuffers[buffersNdx]->getAsImage()->getImageView(),
2971 VK_IMAGE_LAYOUT_GENERAL);
2972
2973 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2974 inputBuffers[buffersNdx]->getType(), &info);
2975 }
2976 else
2977 {
2978 VkDescriptorBufferInfo info =
2979 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 0ull,
2980 inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2981
2982 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2983 inputBuffers[buffersNdx]->getType(), &info);
2984 }
2985 }
2986
2987 updateBuilder.update(vk, device);
2988
2989 const VkQueue queue = context.getUniversalQueue();
2990 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2991 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
2992 const uint32_t subgroupSize = getSubgroupSize(context);
2993 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
2994 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
2995 Buffer vertexBuffer(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2996 unsigned totalIterations = 0u;
2997 unsigned failedIterations = 0u;
2998 Image discardableImage(context, maxWidth, 1u, format,
2999 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3000
3001 {
3002 const Allocation &alloc = vertexBuffer.getAllocation();
3003 std::vector<tcu::Vec4> data(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3004 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3005 float leftHandPosition = -1.0f;
3006
3007 for (uint32_t ndx = 0u; ndx < maxWidth; ++ndx)
3008 {
3009 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3010 leftHandPosition += pixelSize;
3011 }
3012
3013 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3014 flushAlloc(vk, device, alloc);
3015 }
3016
3017 const Unique<VkFramebuffer> framebuffer(
3018 makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3019 const VkViewport viewport = makeViewport(maxWidth, 1u);
3020 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3021 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3022 Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3023 const VkDeviceSize vertexBufferOffset = 0u;
3024
3025 for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
3026 {
3027 totalIterations++;
3028
3029 for (uint32_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
3030 {
3031 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3032 initializeMemory(context, alloc, extraData[ndx]);
3033 }
3034
3035 beginCommandBuffer(vk, *cmdBuffer);
3036 {
3037 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3038
3039 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3040
3041 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3042
3043 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3044
3045 if (extraDataCount > 0)
3046 {
3047 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3048 &descriptorSet.get(), 0u, nullptr);
3049 }
3050
3051 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3052
3053 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3054
3055 endRenderPass(vk, *cmdBuffer);
3056
3057 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(),
3058 tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3059 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3060
3061 endCommandBuffer(vk, *cmdBuffer);
3062
3063 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3064 }
3065 context.resetCommandPoolForVKSC(device, *cmdPool);
3066
3067 {
3068 const Allocation &allocResult = imageBufferResult.getAllocation();
3069 invalidateAlloc(vk, device, allocResult);
3070
3071 std::vector<const void *> datas;
3072 datas.push_back(allocResult.getHostPtr());
3073 if (!checkResult(internalData, datas, width, subgroupSize))
3074 failedIterations++;
3075 }
3076 }
3077
3078 if (0 < failedIterations)
3079 {
3080 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3081
3082 context.getTestContext().getLog()
3083 << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
3084
3085 return tcu::TestStatus::fail("Failed!");
3086 }
3087
3088 return tcu::TestStatus::pass("OK");
3089 }
3090
getPossibleGraphicsSubgroupStages(Context & context,const vk::VkShaderStageFlags testedStages)3091 vk::VkShaderStageFlags vkt::subgroups::getPossibleGraphicsSubgroupStages(Context &context,
3092 const vk::VkShaderStageFlags testedStages)
3093 {
3094 const VkPhysicalDeviceSubgroupProperties &subgroupProperties = context.getSubgroupProperties();
3095 VkShaderStageFlags stages = testedStages & subgroupProperties.supportedStages;
3096
3097 DE_ASSERT(isAllGraphicsStages(testedStages));
3098
3099 if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
3100 {
3101 if ((stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
3102 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
3103 else
3104 stages = VK_SHADER_STAGE_FRAGMENT_BIT;
3105 }
3106
3107 if (static_cast<VkShaderStageFlags>(0u) == stages)
3108 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
3109
3110 return stages;
3111 }
3112
allStages(Context & context,vk::VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3113 tcu::TestStatus vkt::subgroups::allStages(Context &context, vk::VkFormat format, const SSBOData *extraData,
3114 uint32_t extraDataCount, const void *internalData,
3115 const VerificationFunctor &checkResult,
3116 const vk::VkShaderStageFlags shaderStage)
3117 {
3118 return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData,
3119 checkResult, shaderStage, 0u, 0u, 0u, 0u, 0u, nullptr);
3120 }
3121
allStagesRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStageTested,const uint32_t vertexShaderStageCreateFlags,const uint32_t tessellationControlShaderStageCreateFlags,const uint32_t tessellationEvalShaderStageCreateFlags,const uint32_t geometryShaderStageCreateFlags,const uint32_t fragmentShaderStageCreateFlags,const uint32_t requiredSubgroupSize[5])3122 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize(
3123 Context &context, vk::VkFormat format, const SSBOData *extraDatas, uint32_t extraDatasCount,
3124 const void *internalData, const VerificationFunctor &checkResult, const vk::VkShaderStageFlags shaderStageTested,
3125 const uint32_t vertexShaderStageCreateFlags, const uint32_t tessellationControlShaderStageCreateFlags,
3126 const uint32_t tessellationEvalShaderStageCreateFlags, const uint32_t geometryShaderStageCreateFlags,
3127 const uint32_t fragmentShaderStageCreateFlags, const uint32_t requiredSubgroupSize[5])
3128 {
3129 const DeviceInterface &vk = context.getDeviceInterface();
3130 const VkDevice device = context.getDevice();
3131 const uint32_t maxWidth = getMaxWidth();
3132 vector<VkShaderStageFlagBits> stagesVector;
3133 VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
3134
3135 Move<VkShaderModule> vertexShaderModule;
3136 Move<VkShaderModule> teCtrlShaderModule;
3137 Move<VkShaderModule> teEvalShaderModule;
3138 Move<VkShaderModule> geometryShaderModule;
3139 Move<VkShaderModule> fragmentShaderModule;
3140
3141 if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3142 {
3143 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3144 }
3145 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3146 {
3147 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3148 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ?
3149 (VkShaderStageFlags)0u :
3150 (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3151 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ?
3152 (VkShaderStageFlags)0u :
3153 (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3154 }
3155 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3156 {
3157 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3158 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ?
3159 (VkShaderStageFlags)0u :
3160 (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3161 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ?
3162 (VkShaderStageFlags)0u :
3163 (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3164 }
3165 if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3166 {
3167 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3168 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3169 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags)0 : required;
3170 }
3171 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3172 {
3173 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3174 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags)0 : required;
3175 }
3176
3177 const uint32_t stagesCount = static_cast<uint32_t>(stagesVector.size());
3178 const string vert = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
3179 const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
3180 const string tese =
3181 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
3182
3183 shaderStageRequired = shaderStageTested | shaderStageRequired;
3184
3185 vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3186 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3187 {
3188 teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3189 teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3190 }
3191 if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3192 {
3193 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3194 {
3195 // tessellation shaders output line primitives
3196 geometryShaderModule =
3197 createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3198 }
3199 else
3200 {
3201 // otherwise points are processed by geometry shader
3202 geometryShaderModule =
3203 createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3204 }
3205 }
3206 if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3207 fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3208
3209 std::vector<de::SharedPtr<BufferOrImage>> inputBuffers(stagesCount + extraDatasCount);
3210
3211 DescriptorSetLayoutBuilder layoutBuilder;
3212
3213 // The implicit result SSBO we use to store our outputs from the shader
3214 for (uint32_t ndx = 0u; ndx < stagesCount; ++ndx)
3215 {
3216 const VkDeviceSize shaderSize =
3217 (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3218 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3219 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
3220
3221 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx],
3222 getResultBinding(stagesVector[ndx]), nullptr);
3223 }
3224
3225 for (uint32_t ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3226 {
3227 const uint32_t datasNdx = ndx - stagesCount;
3228 if (extraDatas[datasNdx].isImage())
3229 {
3230 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(
3231 context, static_cast<uint32_t>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3232 }
3233 else
3234 {
3235 const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT :
3236 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3237 const auto size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) *
3238 extraDatas[datasNdx].numElements;
3239 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
3240 }
3241
3242 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3243 initializeMemory(context, alloc, extraDatas[datasNdx]);
3244
3245 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, extraDatas[datasNdx].stages,
3246 extraDatas[datasNdx].binding, nullptr);
3247 }
3248
3249 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3250
3251 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3252
3253 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3254 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
3255 context, *pipelineLayout, shaderStageRequired, *vertexShaderModule, *fragmentShaderModule,
3256 *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule, *renderPass,
3257 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST :
3258 VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3259 nullptr, nullptr, false, VK_FORMAT_R32G32B32A32_SFLOAT, vertexShaderStageCreateFlags,
3260 tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3261 geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3262
3263 Move<VkDescriptorPool> descriptorPool;
3264 Move<VkDescriptorSet> descriptorSet;
3265
3266 if (inputBuffers.size() > 0)
3267 {
3268 DescriptorPoolBuilder poolBuilder;
3269
3270 for (uint32_t ndx = 0u; ndx < static_cast<uint32_t>(inputBuffers.size()); ndx++)
3271 {
3272 poolBuilder.addType(inputBuffers[ndx]->getType());
3273 }
3274
3275 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3276
3277 // Create descriptor set
3278 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3279
3280 DescriptorSetUpdateBuilder updateBuilder;
3281
3282 for (uint32_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3283 {
3284 uint32_t binding;
3285 if (ndx < stagesCount)
3286 binding = getResultBinding(stagesVector[ndx]);
3287 else
3288 binding = extraDatas[ndx - stagesCount].binding;
3289
3290 if (inputBuffers[ndx]->isImage())
3291 {
3292 VkDescriptorImageInfo info =
3293 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3294 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3295
3296 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
3297 inputBuffers[ndx]->getType(), &info);
3298 }
3299 else
3300 {
3301 VkDescriptorBufferInfo info = makeDescriptorBufferInfo(
3302 inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3303
3304 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
3305 inputBuffers[ndx]->getType(), &info);
3306 }
3307 }
3308
3309 updateBuilder.update(vk, device);
3310 }
3311
3312 {
3313 const VkQueue queue = context.getUniversalQueue();
3314 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3315 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
3316 const uint32_t subgroupSize = getSubgroupSize(context);
3317 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
3318 unsigned totalIterations = 0u;
3319 unsigned failedIterations = 0u;
3320 Image resultImage(context, maxWidth, 1, format,
3321 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3322 const Unique<VkFramebuffer> framebuffer(
3323 makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3324 const VkViewport viewport = makeViewport(maxWidth, 1u);
3325 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3326 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3327 Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3328 const VkImageSubresourceRange subresourceRange = {
3329 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
3330 0u, //uint32_t baseMipLevel
3331 1u, //uint32_t levelCount
3332 0u, //uint32_t baseArrayLayer
3333 1u //uint32_t layerCount
3334 };
3335
3336 const VkImageMemoryBarrier colorAttachmentBarrier =
3337 makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
3338 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, resultImage.getImage(), subresourceRange);
3339
3340 for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
3341 {
3342 for (uint32_t ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3343 {
3344 // re-init the data
3345 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3346 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3347 }
3348
3349 totalIterations++;
3350
3351 beginCommandBuffer(vk, *cmdBuffer);
3352
3353 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
3354 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, nullptr, 0u,
3355 nullptr, 1u, &colorAttachmentBarrier);
3356
3357 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3358
3359 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3360
3361 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3362
3363 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3364
3365 if (stagesCount + extraDatasCount > 0)
3366 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3367 &descriptorSet.get(), 0u, nullptr);
3368
3369 vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3370
3371 endRenderPass(vk, *cmdBuffer);
3372
3373 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(),
3374 tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3375 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3376
3377 endCommandBuffer(vk, *cmdBuffer);
3378
3379 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3380
3381 for (uint32_t ndx = 0u; ndx < stagesCount; ++ndx)
3382 {
3383 std::vector<const void *> datas;
3384 if (!inputBuffers[ndx]->isImage())
3385 {
3386 const Allocation &resultAlloc = inputBuffers[ndx]->getAllocation();
3387 invalidateAlloc(vk, device, resultAlloc);
3388 // we always have our result data first
3389 datas.push_back(resultAlloc.getHostPtr());
3390 }
3391
3392 for (uint32_t index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3393 {
3394 const uint32_t datasNdx = index - stagesCount;
3395 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3396 {
3397 const Allocation &resultAlloc = inputBuffers[index]->getAllocation();
3398 invalidateAlloc(vk, device, resultAlloc);
3399 // we always have our result data first
3400 datas.push_back(resultAlloc.getHostPtr());
3401 }
3402 }
3403
3404 // Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3405 const bool multiCall = (stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT ||
3406 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
3407 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT ||
3408 stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT);
3409 const uint32_t usedWidth =
3410 ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3411
3412 if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3413 failedIterations++;
3414 }
3415 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3416 {
3417 std::vector<const void *> datas;
3418 const Allocation &resultAlloc = imageBufferResult.getAllocation();
3419 invalidateAlloc(vk, device, resultAlloc);
3420
3421 // we always have our result data first
3422 datas.push_back(resultAlloc.getHostPtr());
3423
3424 for (uint32_t index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3425 {
3426 const uint32_t datasNdx = index - stagesCount;
3427 if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3428 {
3429 const Allocation &alloc = inputBuffers[index]->getAllocation();
3430 invalidateAlloc(vk, device, alloc);
3431 // we always have our result data first
3432 datas.push_back(alloc.getHostPtr());
3433 }
3434 }
3435
3436 if (!checkResult(internalData, datas, width, subgroupSize, false))
3437 failedIterations++;
3438 }
3439
3440 context.resetCommandPoolForVKSC(device, *cmdPool);
3441 }
3442
3443 if (0 < failedIterations)
3444 {
3445 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3446
3447 context.getTestContext().getLog() << TestLog::Message << valuesPassed << " / " << totalIterations
3448 << " values passed" << TestLog::EndMessage;
3449
3450 return tcu::TestStatus::fail("Failed!");
3451 }
3452 }
3453
3454 return tcu::TestStatus::pass("OK");
3455 }
3456
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult)3457 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context &context, vk::VkFormat format,
3458 const SSBOData *extraData, uint32_t extraDataCount,
3459 const void *internalData, subgroups::CheckResult checkResult)
3460 {
3461 return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData,
3462 checkResult, 0u, 0u);
3463 }
3464
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraData,uint32_t extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const uint32_t vertexShaderStageCreateFlags,const uint32_t requiredSubgroupSize)3465 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(
3466 Context &context, vk::VkFormat format, const SSBOData *extraData, uint32_t extraDataCount, const void *internalData,
3467 subgroups::CheckResult checkResult, const uint32_t vertexShaderStageCreateFlags,
3468 const uint32_t requiredSubgroupSize)
3469 {
3470 const DeviceInterface &vk = context.getDeviceInterface();
3471 const VkDevice device = context.getDevice();
3472 const VkQueue queue = context.getUniversalQueue();
3473 const uint32_t maxWidth = getMaxWidth();
3474 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3475 vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDataCount);
3476 DescriptorSetLayoutBuilder layoutBuilder;
3477 const Unique<VkShaderModule> vertexShaderModule(
3478 createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3479 const Unique<VkShaderModule> fragmentShaderModule(
3480 createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3481 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3482 const VkVertexInputBindingDescription vertexInputBinding = {
3483 0u, // binding;
3484 static_cast<uint32_t>(sizeof(tcu::Vec4)), // stride;
3485 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
3486 };
3487 const VkVertexInputAttributeDescription vertexInputAttribute = {0u, 0u, VK_FORMAT_R32G32B32A32_SFLOAT, 0u};
3488
3489 for (uint32_t i = 0u; i < extraDataCount; i++)
3490 {
3491 if (extraData[i].isImage())
3492 {
3493 inputBuffers[i] = de::SharedPtr<BufferOrImage>(
3494 new Image(context, static_cast<uint32_t>(extraData[i].numElements), 1u, extraData[i].format));
3495 }
3496 else
3497 {
3498 DE_ASSERT(extraData[i].isUBO());
3499 vk::VkDeviceSize size =
3500 getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3501 inputBuffers[i] =
3502 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3503 }
3504 const Allocation &alloc = inputBuffers[i]->getAllocation();
3505 initializeMemory(context, alloc, extraData[i]);
3506 }
3507
3508 for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
3509 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, nullptr);
3510
3511 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3512
3513 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3514
3515 const uint32_t requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3516 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
3517 context, *pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, *vertexShaderModule,
3518 *fragmentShaderModule, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, *renderPass,
3519 VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
3520 vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : nullptr));
3521 DescriptorPoolBuilder poolBuilder;
3522 DescriptorSetUpdateBuilder updateBuilder;
3523
3524 for (uint32_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
3525 poolBuilder.addType(inputBuffers[ndx]->getType());
3526
3527 Move<VkDescriptorPool> descriptorPool;
3528 Move<VkDescriptorSet> descriptorSet;
3529
3530 if (extraDataCount > 0)
3531 {
3532 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3533 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3534 }
3535
3536 for (uint32_t ndx = 0u; ndx < extraDataCount; ndx++)
3537 {
3538 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3539 initializeMemory(context, alloc, extraData[ndx]);
3540 }
3541
3542 for (uint32_t buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3543 {
3544 if (inputBuffers[buffersNdx]->isImage())
3545 {
3546 VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3547 inputBuffers[buffersNdx]->getAsImage()->getImageView(),
3548 VK_IMAGE_LAYOUT_GENERAL);
3549
3550 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3551 inputBuffers[buffersNdx]->getType(), &info);
3552 }
3553 else
3554 {
3555 VkDescriptorBufferInfo info =
3556 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 0ull,
3557 inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3558
3559 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3560 inputBuffers[buffersNdx]->getType(), &info);
3561 }
3562 }
3563 updateBuilder.update(vk, device);
3564
3565 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
3566
3567 const uint32_t subgroupSize = getSubgroupSize(context);
3568
3569 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
3570
3571 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
3572 Buffer vertexBuffer(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3573
3574 unsigned totalIterations = 0u;
3575 unsigned failedIterations = 0u;
3576
3577 Image discardableImage(context, maxWidth, 1u, format,
3578 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3579
3580 {
3581 const Allocation &alloc = vertexBuffer.getAllocation();
3582 std::vector<tcu::Vec4> data(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3583 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3584 float leftHandPosition = -1.0f;
3585
3586 for (uint32_t ndx = 0u; ndx < maxWidth; ++ndx)
3587 {
3588 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3589 leftHandPosition += pixelSize;
3590 }
3591
3592 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3593 flushAlloc(vk, device, alloc);
3594 }
3595
3596 const Unique<VkFramebuffer> framebuffer(
3597 makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3598 const VkViewport viewport = makeViewport(maxWidth, 1u);
3599 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3600 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3601 Buffer imageBufferResult(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3602 const VkDeviceSize vertexBufferOffset = 0u;
3603
3604 for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
3605 {
3606 totalIterations++;
3607
3608 for (uint32_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
3609 {
3610 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
3611 initializeMemory(context, alloc, extraData[ndx]);
3612 }
3613
3614 beginCommandBuffer(vk, *cmdBuffer);
3615 {
3616 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3617
3618 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3619
3620 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3621
3622 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3623
3624 if (extraDataCount > 0)
3625 {
3626 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3627 &descriptorSet.get(), 0u, nullptr);
3628 }
3629
3630 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3631
3632 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3633
3634 endRenderPass(vk, *cmdBuffer);
3635
3636 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(),
3637 tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3638 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3639
3640 endCommandBuffer(vk, *cmdBuffer);
3641
3642 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3643 }
3644 context.resetCommandPoolForVKSC(device, *cmdPool);
3645
3646 {
3647 const Allocation &allocResult = imageBufferResult.getAllocation();
3648 invalidateAlloc(vk, device, allocResult);
3649
3650 std::vector<const void *> datas;
3651 datas.push_back(allocResult.getHostPtr());
3652 if (!checkResult(internalData, datas, width, subgroupSize))
3653 failedIterations++;
3654 }
3655 }
3656
3657 if (0 < failedIterations)
3658 {
3659 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3660
3661 context.getTestContext().getLog()
3662 << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
3663
3664 return tcu::TestStatus::fail("Failed!");
3665 }
3666
3667 return tcu::TestStatus::pass("OK");
3668 }
3669
makeFragmentFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,CheckResultFragment checkResult)3670 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest(Context &context, VkFormat format,
3671 const SSBOData *extraDatas, uint32_t extraDatasCount,
3672 const void *internalData, CheckResultFragment checkResult)
3673 {
3674 return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData,
3675 checkResult, 0u, 0u);
3676 }
3677
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,CheckResultFragment checkResult,const uint32_t fragmentShaderStageCreateFlags,const uint32_t requiredSubgroupSize)3678 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(
3679 Context &context, VkFormat format, const SSBOData *extraDatas, uint32_t extraDatasCount, const void *internalData,
3680 CheckResultFragment checkResult, const uint32_t fragmentShaderStageCreateFlags, const uint32_t requiredSubgroupSize)
3681 {
3682 const DeviceInterface &vk = context.getDeviceInterface();
3683 const VkDevice device = context.getDevice();
3684 const VkQueue queue = context.getUniversalQueue();
3685 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3686 const Unique<VkShaderModule> vertexShaderModule(
3687 createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3688 const Unique<VkShaderModule> fragmentShaderModule(
3689 createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3690 std::vector<de::SharedPtr<BufferOrImage>> inputBuffers(extraDatasCount);
3691
3692 for (uint32_t i = 0; i < extraDatasCount; i++)
3693 {
3694 if (extraDatas[i].isImage())
3695 {
3696 inputBuffers[i] = de::SharedPtr<BufferOrImage>(
3697 new Image(context, static_cast<uint32_t>(extraDatas[i].numElements), 1, extraDatas[i].format));
3698 }
3699 else
3700 {
3701 DE_ASSERT(extraDatas[i].isUBO());
3702
3703 const vk::VkDeviceSize size =
3704 getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3705
3706 inputBuffers[i] =
3707 de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3708 }
3709
3710 const Allocation &alloc = inputBuffers[i]->getAllocation();
3711
3712 initializeMemory(context, alloc, extraDatas[i]);
3713 }
3714
3715 DescriptorSetLayoutBuilder layoutBuilder;
3716
3717 for (uint32_t i = 0; i < extraDatasCount; i++)
3718 {
3719 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_FRAGMENT_BIT, nullptr);
3720 }
3721
3722 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3723 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3724 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3725 const uint32_t requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3726 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(
3727 context, *pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, *vertexShaderModule,
3728 *fragmentShaderModule, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, *renderPass,
3729 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, nullptr, nullptr, true, VK_FORMAT_R32G32B32A32_SFLOAT, 0u, 0u, 0u, 0u,
3730 fragmentShaderStageCreateFlags, requiredSubgroupSize != 0u ? requiredSubgroupSizes : nullptr));
3731 DescriptorPoolBuilder poolBuilder;
3732
3733 // To stop validation complaining, always add at least one type to pool.
3734 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3735 for (uint32_t i = 0; i < extraDatasCount; i++)
3736 {
3737 poolBuilder.addType(inputBuffers[i]->getType());
3738 }
3739
3740 Move<VkDescriptorPool> descriptorPool;
3741 // Create descriptor set
3742 Move<VkDescriptorSet> descriptorSet;
3743
3744 if (extraDatasCount > 0)
3745 {
3746 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3747
3748 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3749 }
3750
3751 DescriptorSetUpdateBuilder updateBuilder;
3752
3753 for (uint32_t i = 0; i < extraDatasCount; i++)
3754 {
3755 if (inputBuffers[i]->isImage())
3756 {
3757 const VkDescriptorImageInfo info =
3758 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
3759 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3760
3761 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
3762 inputBuffers[i]->getType(), &info);
3763 }
3764 else
3765 {
3766 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(
3767 inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3768
3769 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
3770 inputBuffers[i]->getType(), &info);
3771 }
3772 }
3773
3774 if (extraDatasCount > 0)
3775 updateBuilder.update(vk, device);
3776
3777 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
3778 const uint32_t subgroupSize = getSubgroupSize(context);
3779 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
3780 unsigned totalIterations = 0;
3781 unsigned failedIterations = 0;
3782
3783 for (uint32_t width = 8; width <= subgroupSize; width *= 2)
3784 {
3785 for (uint32_t height = 8; height <= subgroupSize; height *= 2)
3786 {
3787 totalIterations++;
3788
3789 // re-init the data
3790 for (uint32_t i = 0; i < extraDatasCount; i++)
3791 {
3792 const Allocation &alloc = inputBuffers[i]->getAllocation();
3793
3794 initializeMemory(context, alloc, extraDatas[i]);
3795 }
3796
3797 const VkDeviceSize formatSize = getFormatSizeInBytes(format);
3798 const VkDeviceSize resultImageSizeInBytes = width * height * formatSize;
3799 Image resultImage(context, width, height, format,
3800 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3801 Buffer resultBuffer(context, resultImageSizeInBytes, VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3802 const Unique<VkFramebuffer> framebuffer(
3803 makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3804 VkViewport viewport = makeViewport(width, height);
3805 VkRect2D scissor = {{0, 0}, {width, height}};
3806
3807 beginCommandBuffer(vk, *cmdBuffer);
3808
3809 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3810
3811 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3812
3813 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height),
3814 tcu::Vec4(0.0f));
3815
3816 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3817
3818 if (extraDatasCount > 0)
3819 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3820 &descriptorSet.get(), 0u, nullptr);
3821
3822 vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3823
3824 endRenderPass(vk, *cmdBuffer);
3825
3826 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(),
3827 tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3828 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3829
3830 endCommandBuffer(vk, *cmdBuffer);
3831
3832 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3833
3834 std::vector<const void *> datas;
3835 {
3836 const Allocation &resultAlloc = resultBuffer.getAllocation();
3837 invalidateAlloc(vk, device, resultAlloc);
3838
3839 // we always have our result data first
3840 datas.push_back(resultAlloc.getHostPtr());
3841 }
3842
3843 if (!checkResult(internalData, datas, width, height, subgroupSize))
3844 {
3845 failedIterations++;
3846 }
3847
3848 context.resetCommandPoolForVKSC(device, *cmdPool);
3849 }
3850 }
3851
3852 if (0 < failedIterations)
3853 {
3854 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3855
3856 context.getTestContext().getLog()
3857 << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
3858
3859 return tcu::TestStatus::fail("Failed!");
3860 }
3861
3862 return tcu::TestStatus::pass("OK");
3863 }
3864
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const uint32_t pipelineShaderStageFlags,const uint32_t pipelineCreateFlags,VkPipeline basePipelineHandle,uint32_t localSizeX,uint32_t localSizeY,uint32_t localSizeZ,uint32_t requiredSubgroupSize)3865 Move<VkPipeline> makeComputePipeline(Context &context, const VkPipelineLayout pipelineLayout,
3866 const VkShaderModule shaderModule, const uint32_t pipelineShaderStageFlags,
3867 const uint32_t pipelineCreateFlags, VkPipeline basePipelineHandle,
3868 uint32_t localSizeX, uint32_t localSizeY, uint32_t localSizeZ,
3869 uint32_t requiredSubgroupSize)
3870 {
3871 const uint32_t localSize[3] = {localSizeX, localSizeY, localSizeZ};
3872 const vk::VkSpecializationMapEntry entries[3] = {
3873 {0, sizeof(uint32_t) * 0, sizeof(uint32_t)},
3874 {1, sizeof(uint32_t) * 1, sizeof(uint32_t)},
3875 {2, static_cast<uint32_t>(sizeof(uint32_t) * 2), sizeof(uint32_t)},
3876 };
3877 const vk::VkSpecializationInfo info = {/* mapEntryCount = */ 3,
3878 /* pMapEntries = */ entries,
3879 /* dataSize = */ sizeof(localSize),
3880 /* pData = */ localSize};
3881 const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo = {
3882 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType sType;
3883 nullptr, // void* pNext;
3884 requiredSubgroupSize // uint32_t requiredSubgroupSize;
3885 };
3886 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
3887 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3888 (requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : nullptr), // const void* pNext;
3889 pipelineShaderStageFlags, // VkPipelineShaderStageCreateFlags flags;
3890 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
3891 shaderModule, // VkShaderModule module;
3892 "main", // const char* pName;
3893 &info, // const VkSpecializationInfo* pSpecializationInfo;
3894 };
3895 const vk::VkComputePipelineCreateInfo pipelineCreateInfo = {
3896 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
3897 nullptr, // const void* pNext;
3898 pipelineCreateFlags, // VkPipelineCreateFlags flags;
3899 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
3900 pipelineLayout, // VkPipelineLayout layout;
3901 #ifndef CTS_USES_VULKANSC
3902 basePipelineHandle, // VkPipeline basePipelineHandle;
3903 -1, // int32_t basePipelineIndex;
3904 #else
3905 VK_NULL_HANDLE, // VkPipeline basePipelineHandle;
3906 0, // int32_t basePipelineIndex;
3907 #endif // CTS_USES_VULKANSC
3908 };
3909 static_cast<void>(basePipelineHandle);
3910
3911 return createComputePipeline(context.getDeviceInterface(), context.getDevice(), VK_NULL_HANDLE,
3912 &pipelineCreateInfo);
3913 }
3914
3915 #ifndef CTS_USES_VULKANSC
makeMeshPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule taskModule,const VkShaderModule meshModule,const uint32_t pipelineShaderStageFlags,const uint32_t pipelineCreateFlags,VkPipeline basePipelineHandle,uint32_t localSizeX,uint32_t localSizeY,uint32_t localSizeZ,uint32_t requiredSubgroupSize,const VkRenderPass renderPass)3916 Move<VkPipeline> makeMeshPipeline(Context &context, const VkPipelineLayout pipelineLayout,
3917 const VkShaderModule taskModule, const VkShaderModule meshModule,
3918 const uint32_t pipelineShaderStageFlags, const uint32_t pipelineCreateFlags,
3919 VkPipeline basePipelineHandle, uint32_t localSizeX, uint32_t localSizeY,
3920 uint32_t localSizeZ, uint32_t requiredSubgroupSize, const VkRenderPass renderPass)
3921 {
3922 const uint32_t localSize[3] = {localSizeX, localSizeY, localSizeZ};
3923 const vk::VkSpecializationMapEntry entries[3] = {
3924 {0, sizeof(uint32_t) * 0, sizeof(uint32_t)},
3925 {1, sizeof(uint32_t) * 1, sizeof(uint32_t)},
3926 {2, static_cast<uint32_t>(sizeof(uint32_t) * 2), sizeof(uint32_t)},
3927 };
3928 const vk::VkSpecializationInfo info = {/* mapEntryCount = */ 3,
3929 /* pMapEntries = */ entries,
3930 /* dataSize = */ sizeof(localSize),
3931 /* pData = */ localSize};
3932 const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo = {
3933 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType sType;
3934 nullptr, // void* pNext;
3935 requiredSubgroupSize // uint32_t requiredSubgroupSize;
3936 };
3937
3938 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *pSubgroupSizeCreateInfo =
3939 ((requiredSubgroupSize != 0u) ? &subgroupSizeCreateInfo : nullptr);
3940
3941 std::vector<VkPipelineShaderStageCreateInfo> shaderStageParams;
3942 vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams = {
3943 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3944 nullptr, // const void* pNext;
3945 pipelineShaderStageFlags, // VkPipelineShaderStageCreateFlags flags;
3946 VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM, // VkShaderStageFlagBits stage;
3947 VK_NULL_HANDLE, // VkShaderModule module;
3948 "main", // const char* pName;
3949 &info, // const VkSpecializationInfo* pSpecializationInfo;
3950 };
3951
3952 if (taskModule != VK_NULL_HANDLE)
3953 {
3954 pipelineShaderStageParams.module = taskModule;
3955 pipelineShaderStageParams.pNext = pSubgroupSizeCreateInfo;
3956 pipelineShaderStageParams.stage = VK_SHADER_STAGE_TASK_BIT_EXT;
3957 shaderStageParams.push_back(pipelineShaderStageParams);
3958 }
3959
3960 if (meshModule != VK_NULL_HANDLE)
3961 {
3962 pipelineShaderStageParams.module = meshModule;
3963 pipelineShaderStageParams.pNext = ((taskModule == VK_NULL_HANDLE) ? pSubgroupSizeCreateInfo : nullptr);
3964 pipelineShaderStageParams.stage = VK_SHADER_STAGE_MESH_BIT_EXT;
3965 shaderStageParams.push_back(pipelineShaderStageParams);
3966 }
3967
3968 const std::vector<VkViewport> viewports(1u, makeViewport(1u, 1u));
3969 const std::vector<VkRect2D> scissors(1u, makeRect2D(1u, 1u));
3970
3971 return makeGraphicsPipeline(context.getDeviceInterface(), context.getDevice(), basePipelineHandle, pipelineLayout,
3972 pipelineCreateFlags, shaderStageParams, renderPass, viewports, scissors);
3973 }
3974 #endif // CTS_USES_VULKANSC
3975
makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,uint32_t inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,const uint32_t pipelineShaderStageCreateFlags,const uint32_t numWorkgroups[3],const bool isRequiredSubgroupSize,const uint32_t subgroupSize,const uint32_t localSizesToTest[][3],const uint32_t localSizesToTestCount)3976 tcu::TestStatus makeComputeOrMeshTestRequiredSubgroupSize(
3977 ComputeLike testType, Context &context, VkFormat format, const vkt::subgroups::SSBOData *inputs,
3978 uint32_t inputsCount, const void *internalData, vkt::subgroups::CheckResultCompute checkResult,
3979 const uint32_t pipelineShaderStageCreateFlags, const uint32_t numWorkgroups[3], const bool isRequiredSubgroupSize,
3980 const uint32_t subgroupSize, const uint32_t localSizesToTest[][3], const uint32_t localSizesToTestCount)
3981 {
3982 const DeviceInterface &vk = context.getDeviceInterface();
3983 const VkDevice device = context.getDevice();
3984 const VkQueue queue = context.getUniversalQueue();
3985 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3986 const auto &subgroupSizeControlProperties = context.getSubgroupSizeControlProperties();
3987 const VkDeviceSize elementSize = getFormatSizeInBytes(format);
3988 const VkDeviceSize maxSubgroupSize = isRequiredSubgroupSize ? deMax32(subgroupSizeControlProperties.maxSubgroupSize,
3989 vkt::subgroups::maxSupportedSubgroupSize()) :
3990 vkt::subgroups::maxSupportedSubgroupSize();
3991 const VkDeviceSize resultBufferSize = maxSubgroupSize * maxSubgroupSize * maxSubgroupSize;
3992 const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
3993 Buffer resultBuffer(context, resultBufferSizeInBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
3994 std::vector<de::SharedPtr<BufferOrImage>> inputBuffers(inputsCount);
3995 const auto shaderStageFlags =
3996 ((testType == ComputeLike::COMPUTE) ? VK_SHADER_STAGE_COMPUTE_BIT
3997 #ifndef CTS_USES_VULKANSC
3998 :
3999 (VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT));
4000 #else
4001 :
4002 0);
4003 #endif // CTS_USES_VULKANSC
4004 const auto pipelineBindPoint =
4005 ((testType == ComputeLike::COMPUTE) ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS);
4006 const auto pipelineStage = ((testType == ComputeLike::COMPUTE) ?
4007 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
4008 #ifndef CTS_USES_VULKANSC
4009 :
4010 (VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT | VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT));
4011 #else
4012 :
4013 0);
4014 #endif // CTS_USES_VULKANSC
4015 const auto renderArea = makeRect2D(1u, 1u);
4016
4017 std::vector<tcu::UVec3> usedLocalSizes;
4018 for (uint32_t i = 0; i < localSizesToTestCount; ++i)
4019 {
4020 usedLocalSizes.push_back(tcu::UVec3(localSizesToTest[i][0], localSizesToTest[i][1], localSizesToTest[i][2]));
4021 }
4022
4023 for (uint32_t i = 0; i < inputsCount; i++)
4024 {
4025 if (inputs[i].isImage())
4026 {
4027 inputBuffers[i] = de::SharedPtr<BufferOrImage>(
4028 new Image(context, static_cast<uint32_t>(inputs[i].numElements), 1, inputs[i].format));
4029 }
4030 else
4031 {
4032 const auto usage =
4033 (inputs[i].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT : VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4034 const auto size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4035 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4036 }
4037
4038 const Allocation &alloc = inputBuffers[i]->getAllocation();
4039
4040 initializeMemory(context, alloc, inputs[i]);
4041 }
4042
4043 DescriptorSetLayoutBuilder layoutBuilder;
4044 layoutBuilder.addBinding(resultBuffer.getType(), 1, shaderStageFlags, nullptr);
4045
4046 for (uint32_t i = 0; i < inputsCount; i++)
4047 {
4048 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, shaderStageFlags, nullptr);
4049 }
4050
4051 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
4052
4053 Move<VkShaderModule> compShader;
4054 Move<VkShaderModule> meshShader;
4055 Move<VkShaderModule> taskShader;
4056 const auto &binaries = context.getBinaryCollection();
4057
4058 if (testType == ComputeLike::COMPUTE)
4059 {
4060 compShader = createShaderModule(vk, device, binaries.get("comp"));
4061 }
4062 else if (testType == ComputeLike::MESH)
4063 {
4064 meshShader = createShaderModule(vk, device, binaries.get("mesh"));
4065 if (binaries.contains("task"))
4066 taskShader = createShaderModule(vk, device, binaries.get("task"));
4067 }
4068 else
4069 {
4070 DE_ASSERT(false);
4071 }
4072
4073 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
4074
4075 DescriptorPoolBuilder poolBuilder;
4076
4077 poolBuilder.addType(resultBuffer.getType());
4078
4079 for (uint32_t i = 0; i < inputsCount; i++)
4080 {
4081 poolBuilder.addType(inputBuffers[i]->getType());
4082 }
4083
4084 const Unique<VkDescriptorPool> descriptorPool(
4085 poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
4086 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
4087 const VkDescriptorBufferInfo resultDescriptorInfo =
4088 makeDescriptorBufferInfo(resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
4089 DescriptorSetUpdateBuilder updateBuilder;
4090
4091 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
4092 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
4093
4094 for (uint32_t i = 0; i < inputsCount; i++)
4095 {
4096 if (inputBuffers[i]->isImage())
4097 {
4098 const VkDescriptorImageInfo info =
4099 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
4100 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4101
4102 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1),
4103 inputBuffers[i]->getType(), &info);
4104 }
4105 else
4106 {
4107 vk::VkDeviceSize size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
4108 VkDescriptorBufferInfo info =
4109 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
4110
4111 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1),
4112 inputBuffers[i]->getType(), &info);
4113 }
4114 }
4115
4116 updateBuilder.update(vk, device);
4117
4118 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, device, queueFamilyIndex));
4119 unsigned totalIterations = 0;
4120 unsigned failedIterations = 0;
4121 const Unique<VkCommandBuffer> cmdBuffer(makeCommandBuffer(context, *cmdPool));
4122 std::vector<de::SharedPtr<Move<VkPipeline>>> pipelines(localSizesToTestCount);
4123 const auto reqSubgroupSize = (isRequiredSubgroupSize ? subgroupSize : 0u);
4124 Move<VkRenderPass> renderPass;
4125 Move<VkFramebuffer> framebuffer;
4126
4127 if (testType == ComputeLike::MESH)
4128 {
4129 renderPass = makeRenderPass(vk, device);
4130 framebuffer = makeFramebuffer(vk, device, renderPass.get(), 0u, nullptr, renderArea.extent.width,
4131 renderArea.extent.height);
4132 }
4133
4134 context.getTestContext().touchWatchdog();
4135 {
4136 if (testType == ComputeLike::COMPUTE)
4137 {
4138 pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(
4139 context, *pipelineLayout, *compShader, pipelineShaderStageCreateFlags,
4140 #ifndef CTS_USES_VULKANSC
4141 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
4142 #else
4143 0u,
4144 #endif // CTS_USES_VULKANSC
4145 VK_NULL_HANDLE, usedLocalSizes[0][0], usedLocalSizes[0][1], usedLocalSizes[0][2], reqSubgroupSize)));
4146 }
4147 #ifndef CTS_USES_VULKANSC
4148 else if (testType == ComputeLike::MESH)
4149 {
4150 pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(
4151 context, pipelineLayout.get(), taskShader.get(), meshShader.get(), pipelineShaderStageCreateFlags,
4152 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT, VK_NULL_HANDLE, usedLocalSizes[0][0], usedLocalSizes[0][1],
4153 usedLocalSizes[0][2], reqSubgroupSize, renderPass.get())));
4154 }
4155 #endif // CTS_USES_VULKANSC
4156 else
4157 {
4158 DE_ASSERT(false);
4159 }
4160 }
4161 context.getTestContext().touchWatchdog();
4162
4163 for (uint32_t index = 1; index < (localSizesToTestCount - 1); index++)
4164 {
4165 const uint32_t nextX = usedLocalSizes[index][0];
4166 const uint32_t nextY = usedLocalSizes[index][1];
4167 const uint32_t nextZ = usedLocalSizes[index][2];
4168
4169 context.getTestContext().touchWatchdog();
4170 {
4171 if (testType == ComputeLike::COMPUTE)
4172 {
4173 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(
4174 makeComputePipeline(context, *pipelineLayout, *compShader, pipelineShaderStageCreateFlags,
4175 #ifndef CTS_USES_VULKANSC
4176 VK_PIPELINE_CREATE_DERIVATIVE_BIT,
4177 #else
4178 0u,
4179 #endif // CTS_USES_VULKANSC
4180 **pipelines[0], nextX, nextY, nextZ, reqSubgroupSize)));
4181 }
4182 #ifndef CTS_USES_VULKANSC
4183 else if (testType == ComputeLike::MESH)
4184 {
4185 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeMeshPipeline(
4186 context, pipelineLayout.get(), taskShader.get(), meshShader.get(), pipelineShaderStageCreateFlags,
4187 VK_PIPELINE_CREATE_DERIVATIVE_BIT, pipelines[0].get()->get(), nextX, nextY, nextZ, reqSubgroupSize,
4188 renderPass.get())));
4189 }
4190 #endif // CTS_USES_VULKANSC
4191 else
4192 {
4193 DE_ASSERT(false);
4194 }
4195 }
4196 context.getTestContext().touchWatchdog();
4197 }
4198
4199 for (uint32_t index = 0; index < (localSizesToTestCount - 1); index++)
4200 {
4201 // we are running one test
4202 totalIterations++;
4203
4204 beginCommandBuffer(vk, *cmdBuffer);
4205 {
4206 if (testType == ComputeLike::MESH)
4207 beginRenderPass(vk, *cmdBuffer, renderPass.get(), framebuffer.get(), renderArea);
4208
4209 vk.cmdBindPipeline(*cmdBuffer, pipelineBindPoint, **pipelines[index]);
4210
4211 vk.cmdBindDescriptorSets(*cmdBuffer, pipelineBindPoint, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u,
4212 nullptr);
4213
4214 if (testType == ComputeLike::COMPUTE)
4215 vk.cmdDispatch(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4216 #ifndef CTS_USES_VULKANSC
4217 else if (testType == ComputeLike::MESH)
4218 vk.cmdDrawMeshTasksEXT(*cmdBuffer, numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
4219 //vk.cmdDrawMeshTasksNV(*cmdBuffer, numWorkgroups[0], 0);
4220 #endif // CTS_USES_VULKANSC
4221 else
4222 DE_ASSERT(false);
4223
4224 if (testType == ComputeLike::MESH)
4225 endRenderPass(vk, *cmdBuffer);
4226 }
4227
4228 // Make shader writes available.
4229 const auto postShaderBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4230 vk.cmdPipelineBarrier(*cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u, &postShaderBarrier, 0u,
4231 nullptr, 0u, nullptr);
4232
4233 endCommandBuffer(vk, *cmdBuffer);
4234
4235 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
4236
4237 std::vector<const void *> datas;
4238
4239 {
4240 const Allocation &resultAlloc = resultBuffer.getAllocation();
4241 invalidateAlloc(vk, device, resultAlloc);
4242
4243 // we always have our result data first
4244 datas.push_back(resultAlloc.getHostPtr());
4245 }
4246
4247 for (uint32_t i = 0; i < inputsCount; i++)
4248 {
4249 if (!inputBuffers[i]->isImage())
4250 {
4251 const Allocation &resultAlloc = inputBuffers[i]->getAllocation();
4252 invalidateAlloc(vk, device, resultAlloc);
4253
4254 // we always have our result data first
4255 datas.push_back(resultAlloc.getHostPtr());
4256 }
4257 }
4258
4259 if (!checkResult(internalData, datas, numWorkgroups, usedLocalSizes[index].getPtr(), subgroupSize))
4260 {
4261 failedIterations++;
4262 }
4263
4264 context.resetCommandPoolForVKSC(device, *cmdPool);
4265 }
4266
4267 if (0 < failedIterations)
4268 {
4269 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4270
4271 context.getTestContext().getLog()
4272 << TestLog::Message << valuesPassed << " / " << totalIterations << " values passed" << TestLog::EndMessage;
4273
4274 return tcu::TestStatus::fail("Failed!");
4275 }
4276
4277 return tcu::TestStatus::pass("OK");
4278 }
4279
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,const uint32_t pipelineShaderStageCreateFlags,const uint32_t numWorkgroups[3],const bool isRequiredSubgroupSize,const uint32_t subgroupSize,const uint32_t localSizesToTest[][3],const uint32_t localSizesToTestCount)4280 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize(
4281 Context &context, VkFormat format, const SSBOData *inputs, uint32_t inputsCount, const void *internalData,
4282 CheckResultCompute checkResult, const uint32_t pipelineShaderStageCreateFlags, const uint32_t numWorkgroups[3],
4283 const bool isRequiredSubgroupSize, const uint32_t subgroupSize, const uint32_t localSizesToTest[][3],
4284 const uint32_t localSizesToTestCount)
4285 {
4286 return makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike::COMPUTE, context, format, inputs, inputsCount,
4287 internalData, checkResult, pipelineShaderStageCreateFlags,
4288 numWorkgroups, isRequiredSubgroupSize, subgroupSize,
4289 localSizesToTest, localSizesToTestCount);
4290 }
4291
makeMeshTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,const uint32_t pipelineShaderStageCreateFlags,const uint32_t numWorkgroups[3],const bool isRequiredSubgroupSize,const uint32_t subgroupSize,const uint32_t localSizesToTest[][3],const uint32_t localSizesToTestCount)4292 tcu::TestStatus vkt::subgroups::makeMeshTestRequiredSubgroupSize(
4293 Context &context, VkFormat format, const SSBOData *inputs, uint32_t inputsCount, const void *internalData,
4294 CheckResultCompute checkResult, const uint32_t pipelineShaderStageCreateFlags, const uint32_t numWorkgroups[3],
4295 const bool isRequiredSubgroupSize, const uint32_t subgroupSize, const uint32_t localSizesToTest[][3],
4296 const uint32_t localSizesToTestCount)
4297 {
4298 return makeComputeOrMeshTestRequiredSubgroupSize(ComputeLike::MESH, context, format, inputs, inputsCount,
4299 internalData, checkResult, pipelineShaderStageCreateFlags,
4300 numWorkgroups, isRequiredSubgroupSize, subgroupSize,
4301 localSizesToTest, localSizesToTestCount);
4302 }
4303
makeComputeOrMeshTest(ComputeLike testType,Context & context,VkFormat format,const vkt::subgroups::SSBOData * inputs,uint32_t inputsCount,const void * internalData,vkt::subgroups::CheckResultCompute checkResult,uint32_t requiredSubgroupSize,const uint32_t pipelineShaderStageCreateFlags)4304 tcu::TestStatus makeComputeOrMeshTest(ComputeLike testType, Context &context, VkFormat format,
4305 const vkt::subgroups::SSBOData *inputs, uint32_t inputsCount,
4306 const void *internalData, vkt::subgroups::CheckResultCompute checkResult,
4307 uint32_t requiredSubgroupSize, const uint32_t pipelineShaderStageCreateFlags)
4308 {
4309 const uint32_t numWorkgroups[3] = {4, 2, 2};
4310 const bool isRequiredSubgroupSize = (requiredSubgroupSize != 0u);
4311 const uint32_t subgroupSize =
4312 (isRequiredSubgroupSize ? requiredSubgroupSize : vkt::subgroups::getSubgroupSize(context));
4313
4314 const uint32_t localSizesToTestCount = 8;
4315 uint32_t localSizesToTest[localSizesToTestCount][3] = {
4316 {1, 1, 1}, {subgroupSize, 1, 1}, {1, subgroupSize, 1}, {1, 1, subgroupSize}, {32, 4, 1}, {1, 4, 32}, {3, 5, 7},
4317 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
4318 };
4319
4320 if (testType == ComputeLike::COMPUTE)
4321 return makeComputeTestRequiredSubgroupSize(
4322 context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4323 numWorkgroups, isRequiredSubgroupSize, subgroupSize, localSizesToTest, localSizesToTestCount);
4324 else
4325 return makeMeshTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult,
4326 pipelineShaderStageCreateFlags, numWorkgroups, isRequiredSubgroupSize,
4327 subgroupSize, localSizesToTest, localSizesToTestCount);
4328 }
4329
makeComputeTest(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,uint32_t requiredSubgroupSize,const uint32_t pipelineShaderStageCreateFlags)4330 tcu::TestStatus vkt::subgroups::makeComputeTest(Context &context, VkFormat format, const SSBOData *inputs,
4331 uint32_t inputsCount, const void *internalData,
4332 CheckResultCompute checkResult, uint32_t requiredSubgroupSize,
4333 const uint32_t pipelineShaderStageCreateFlags)
4334 {
4335 return makeComputeOrMeshTest(ComputeLike::COMPUTE, context, format, inputs, inputsCount, internalData, checkResult,
4336 requiredSubgroupSize, pipelineShaderStageCreateFlags);
4337 }
4338
makeMeshTest(Context & context,VkFormat format,const SSBOData * inputs,uint32_t inputsCount,const void * internalData,CheckResultCompute checkResult,uint32_t requiredSubgroupSize,const uint32_t pipelineShaderStageCreateFlags)4339 tcu::TestStatus vkt::subgroups::makeMeshTest(Context &context, VkFormat format, const SSBOData *inputs,
4340 uint32_t inputsCount, const void *internalData,
4341 CheckResultCompute checkResult, uint32_t requiredSubgroupSize,
4342 const uint32_t pipelineShaderStageCreateFlags)
4343 {
4344 return makeComputeOrMeshTest(ComputeLike::MESH, context, format, inputs, inputsCount, internalData, checkResult,
4345 requiredSubgroupSize, pipelineShaderStageCreateFlags);
4346 }
4347
checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)4348 static inline void checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)
4349 {
4350 if (shaderStages == 0)
4351 TCU_THROW(InternalError, "Shader stage is not specified");
4352
4353 // It can actually be only 1 or 0.
4354 const uint32_t exclusivePipelinesCount =
4355 (isAllComputeStages(shaderStages) ? 1 : 0) + (isAllGraphicsStages(shaderStages) ? 1 : 0)
4356 #ifndef CTS_USES_VULKANSC
4357 + (isAllRayTracingStages(shaderStages) ? 1 : 0) + (isAllMeshShadingStages(shaderStages) ? 1 : 0)
4358 #endif // CTS_USES_VULKANSC
4359 ;
4360
4361 if (exclusivePipelinesCount != 1)
4362 TCU_THROW(InternalError, "Mix of shaders from different pipelines is detected");
4363 }
4364
supportedCheckShader(Context & context,const VkShaderStageFlags shaderStages)4365 void vkt::subgroups::supportedCheckShader(Context &context, const VkShaderStageFlags shaderStages)
4366 {
4367 checkShaderStageSetValidity(shaderStages);
4368
4369 if ((context.getSubgroupProperties().supportedStages & shaderStages) == 0)
4370 {
4371 if (isAllComputeStages(shaderStages))
4372 TCU_FAIL("Compute shader is required to support subgroup operations");
4373 else
4374 TCU_THROW(NotSupportedError, "Subgroup support is not available for test shader stage(s)");
4375 }
4376
4377 #ifndef CTS_USES_VULKANSC
4378 if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & shaderStages &&
4379 context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4380 !context.getPortabilitySubsetFeatures().tessellationIsolines)
4381 {
4382 TCU_THROW(NotSupportedError,
4383 "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4384 }
4385 #endif // CTS_USES_VULKANSC
4386 }
4387
4388 namespace vkt
4389 {
4390 namespace subgroups
4391 {
4392 typedef std::vector<de::SharedPtr<BufferOrImage>> vectorBufferOrImage;
4393
4394 enum ShaderGroups
4395 {
4396 FIRST_GROUP = 0,
4397 RAYGEN_GROUP = FIRST_GROUP,
4398 MISS_GROUP,
4399 HIT_GROUP,
4400 CALL_GROUP,
4401 GROUP_COUNT
4402 };
4403
getAllRayTracingFormats()4404 const std::vector<vk::VkFormat> getAllRayTracingFormats()
4405 {
4406 std::vector<VkFormat> formats;
4407
4408 formats.push_back(VK_FORMAT_R8G8B8_SINT);
4409 formats.push_back(VK_FORMAT_R8_UINT);
4410 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
4411 formats.push_back(VK_FORMAT_R16G16B16_SINT);
4412 formats.push_back(VK_FORMAT_R16_UINT);
4413 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
4414 formats.push_back(VK_FORMAT_R32G32B32_SINT);
4415 formats.push_back(VK_FORMAT_R32_UINT);
4416 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
4417 formats.push_back(VK_FORMAT_R64G64B64_SINT);
4418 formats.push_back(VK_FORMAT_R64_UINT);
4419 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
4420 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
4421 formats.push_back(VK_FORMAT_R32_SFLOAT);
4422 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
4423 formats.push_back(VK_FORMAT_R64_SFLOAT);
4424 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
4425 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
4426 formats.push_back(VK_FORMAT_R8_USCALED);
4427 formats.push_back(VK_FORMAT_R8G8_USCALED);
4428 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
4429 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
4430
4431 return formats;
4432 }
4433
addRayTracingNoSubgroupShader(SourceCollections & programCollection)4434 void addRayTracingNoSubgroupShader(SourceCollections &programCollection)
4435 {
4436 const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
4437
4438 const std::string rgenShaderNoSubgroups =
4439 "#version 460 core\n"
4440 "#extension GL_EXT_ray_tracing: require\n"
4441 "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
4442 "layout(location = 0) callableDataEXT uvec4 callData;"
4443 "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
4444 "\n"
4445 "void main()\n"
4446 "{\n"
4447 " uint rayFlags = 0;\n"
4448 " uint cullMask = 0xFF;\n"
4449 " float tmin = 0.0;\n"
4450 " float tmax = 9.0;\n"
4451 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), "
4452 "(float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
4453 " vec3 directHit = vec3(0.0, 0.0, -1.0);\n"
4454 " vec3 directMiss = vec3(0.0, 0.0, +1.0);\n"
4455 "\n"
4456 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
4457 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
4458 " executeCallableEXT(0, 0);"
4459 "}\n";
4460 const std::string hitShaderNoSubgroups = "#version 460 core\n"
4461 "#extension GL_EXT_ray_tracing: require\n"
4462 "hitAttributeEXT vec3 attribs;\n"
4463 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4464 "\n"
4465 "void main()\n"
4466 "{\n"
4467 "}\n";
4468 const std::string missShaderNoSubgroups = "#version 460 core\n"
4469 "#extension GL_EXT_ray_tracing: require\n"
4470 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4471 "\n"
4472 "void main()\n"
4473 "{\n"
4474 "}\n";
4475 const std::string sectShaderNoSubgroups = "#version 460 core\n"
4476 "#extension GL_EXT_ray_tracing: require\n"
4477 "hitAttributeEXT vec3 hitAttribute;\n"
4478 "\n"
4479 "void main()\n"
4480 "{\n"
4481 " reportIntersectionEXT(0.75f, 0x7Eu);\n"
4482 "}\n";
4483 const std::string callShaderNoSubgroups = "#version 460 core\n"
4484 "#extension GL_EXT_ray_tracing: require\n"
4485 "layout(location = 0) callableDataInEXT float callData;\n"
4486 "\n"
4487 "void main()\n"
4488 "{\n"
4489 "}\n";
4490
4491 programCollection.glslSources.add("rgen_noSubgroup") << glu::RaygenSource(rgenShaderNoSubgroups) << buildOptions;
4492 programCollection.glslSources.add("ahit_noSubgroup") << glu::AnyHitSource(hitShaderNoSubgroups) << buildOptions;
4493 programCollection.glslSources.add("chit_noSubgroup") << glu::ClosestHitSource(hitShaderNoSubgroups) << buildOptions;
4494 programCollection.glslSources.add("miss_noSubgroup") << glu::MissSource(missShaderNoSubgroups) << buildOptions;
4495 programCollection.glslSources.add("sect_noSubgroup")
4496 << glu::IntersectionSource(sectShaderNoSubgroups) << buildOptions;
4497 programCollection.glslSources.add("call_noSubgroup") << glu::CallableSource(callShaderNoSubgroups) << buildOptions;
4498 }
4499
4500 #ifndef CTS_USES_VULKANSC
4501
enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)4502 static vector<VkShaderStageFlagBits> enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)
4503 {
4504 vector<VkShaderStageFlagBits> result;
4505 const VkShaderStageFlagBits shaderStageFlags[] = {
4506 VK_SHADER_STAGE_RAYGEN_BIT_KHR, VK_SHADER_STAGE_ANY_HIT_BIT_KHR, VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4507 VK_SHADER_STAGE_MISS_BIT_KHR, VK_SHADER_STAGE_INTERSECTION_BIT_KHR, VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4508 };
4509
4510 for (auto shaderStageFlag : shaderStageFlags)
4511 {
4512 if (0 != (shaderStage & shaderStageFlag))
4513 result.push_back(shaderStageFlag);
4514 }
4515
4516 return result;
4517 }
4518
getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)4519 static uint32_t getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)
4520 {
4521 const VkShaderStageFlags shaderStageFlags[] = {
4522 VK_SHADER_STAGE_RAYGEN_BIT_KHR, VK_SHADER_STAGE_ANY_HIT_BIT_KHR, VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4523 VK_SHADER_STAGE_MISS_BIT_KHR, VK_SHADER_STAGE_INTERSECTION_BIT_KHR, VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4524 };
4525
4526 for (uint32_t shaderStageNdx = 0; shaderStageNdx < DE_LENGTH_OF_ARRAY(shaderStageFlags); ++shaderStageNdx)
4527 {
4528 if (0 != (shaderStage & shaderStageFlags[shaderStageNdx]))
4529 {
4530 DE_ASSERT(0 == (shaderStage & (~shaderStageFlags[shaderStageNdx])));
4531
4532 return shaderStageNdx;
4533 }
4534 }
4535
4536 TCU_THROW(InternalError, "Non-raytracing stage specified or no stage at all");
4537 }
4538
makeRayTracingInputBuffers(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector)4539 static vectorBufferOrImage makeRayTracingInputBuffers(Context &context, VkFormat format, const SSBOData *extraDatas,
4540 uint32_t extraDatasCount,
4541 const vector<VkShaderStageFlagBits> &stagesVector)
4542 {
4543 const size_t stagesCount = stagesVector.size();
4544 const VkDeviceSize shaderSize = getMaxWidth();
4545 const VkDeviceSize inputBufferSize = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
4546 vectorBufferOrImage inputBuffers(stagesCount + extraDatasCount);
4547
4548 // The implicit result SSBO we use to store our outputs from the shader
4549 for (size_t stageNdx = 0u; stageNdx < stagesCount; ++stageNdx)
4550 inputBuffers[stageNdx] =
4551 de::SharedPtr<BufferOrImage>(new Buffer(context, inputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
4552
4553 for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4554 {
4555 const size_t datasNdx = stageNdx - stagesCount;
4556
4557 if (extraDatas[datasNdx].isImage())
4558 {
4559 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Image(
4560 context, static_cast<uint32_t>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
4561 }
4562 else
4563 {
4564 const auto usage = (extraDatas[datasNdx].isUBO() ? VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT :
4565 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
4566 const auto size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) *
4567 extraDatas[datasNdx].numElements;
4568 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, usage));
4569 }
4570
4571 initializeMemory(context, inputBuffers[stageNdx]->getAllocation(), extraDatas[datasNdx]);
4572 }
4573
4574 return inputBuffers;
4575 }
4576
makeRayTracingDescriptorSetLayout(Context & context,const SSBOData * extraDatas,uint32_t extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4577 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayout(Context &context, const SSBOData *extraDatas,
4578 uint32_t extraDatasCount,
4579 const vector<VkShaderStageFlagBits> &stagesVector,
4580 const vectorBufferOrImage &inputBuffers)
4581 {
4582 const DeviceInterface &vkd = context.getDeviceInterface();
4583 const VkDevice device = context.getDevice();
4584 const size_t stagesCount = stagesVector.size();
4585 DescriptorSetLayoutBuilder layoutBuilder;
4586
4587 // The implicit result SSBO we use to store our outputs from the shader
4588 for (size_t stageNdx = 0u; stageNdx < stagesVector.size(); ++stageNdx)
4589 {
4590 const uint32_t stageBinding = getRayTracingResultBinding(stagesVector[stageNdx]);
4591
4592 layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, stagesVector[stageNdx], stageBinding,
4593 nullptr);
4594 }
4595
4596 for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4597 {
4598 const size_t datasNdx = stageNdx - stagesCount;
4599
4600 layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, extraDatas[datasNdx].stages,
4601 extraDatas[datasNdx].binding, nullptr);
4602 }
4603
4604 return layoutBuilder.build(vkd, device);
4605 }
4606
makeRayTracingDescriptorSetLayoutAS(Context & context)4607 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayoutAS(Context &context)
4608 {
4609 const DeviceInterface &vkd = context.getDeviceInterface();
4610 const VkDevice device = context.getDevice();
4611 DescriptorSetLayoutBuilder layoutBuilder;
4612
4613 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
4614
4615 return layoutBuilder.build(vkd, device);
4616 }
4617
makeRayTracingDescriptorPool(Context & context,const vectorBufferOrImage & inputBuffers)4618 static Move<VkDescriptorPool> makeRayTracingDescriptorPool(Context &context, const vectorBufferOrImage &inputBuffers)
4619 {
4620 const DeviceInterface &vkd = context.getDeviceInterface();
4621 const VkDevice device = context.getDevice();
4622 const uint32_t maxDescriptorSets = 2u;
4623 DescriptorPoolBuilder poolBuilder;
4624 Move<VkDescriptorPool> result;
4625
4626 if (inputBuffers.size() > 0)
4627 {
4628 for (size_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
4629 poolBuilder.addType(inputBuffers[ndx]->getType());
4630 }
4631
4632 poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
4633
4634 result = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, maxDescriptorSets);
4635
4636 return result;
4637 }
4638
makeRayTracingDescriptorSet(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,const SSBOData * extraDatas,uint32_t extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4639 static Move<VkDescriptorSet> makeRayTracingDescriptorSet(Context &context, VkDescriptorPool descriptorPool,
4640 VkDescriptorSetLayout descriptorSetLayout,
4641 const SSBOData *extraDatas, uint32_t extraDatasCount,
4642 const vector<VkShaderStageFlagBits> &stagesVector,
4643 const vectorBufferOrImage &inputBuffers)
4644 {
4645 const DeviceInterface &vkd = context.getDeviceInterface();
4646 const VkDevice device = context.getDevice();
4647 const size_t stagesCount = stagesVector.size();
4648 Move<VkDescriptorSet> descriptorSet;
4649
4650 if (inputBuffers.size() > 0)
4651 {
4652 DescriptorSetUpdateBuilder updateBuilder;
4653
4654 // Create descriptor set
4655 descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4656
4657 for (size_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
4658 {
4659 const uint32_t binding = (ndx < stagesCount) ? getRayTracingResultBinding(stagesVector[ndx]) :
4660 extraDatas[ndx - stagesCount].binding;
4661
4662 if (inputBuffers[ndx]->isImage())
4663 {
4664 const VkDescriptorImageInfo info =
4665 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
4666 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4667
4668 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
4669 inputBuffers[ndx]->getType(), &info);
4670 }
4671 else
4672 {
4673 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(
4674 inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
4675
4676 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding),
4677 inputBuffers[ndx]->getType(), &info);
4678 }
4679 }
4680
4681 updateBuilder.update(vkd, device);
4682 }
4683
4684 return descriptorSet;
4685 }
4686
makeRayTracingDescriptorSetAS(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,de::MovePtr<TopLevelAccelerationStructure> & topLevelAccelerationStructure)4687 static Move<VkDescriptorSet> makeRayTracingDescriptorSetAS(
4688 Context &context, VkDescriptorPool descriptorPool, VkDescriptorSetLayout descriptorSetLayout,
4689 de::MovePtr<TopLevelAccelerationStructure> &topLevelAccelerationStructure)
4690 {
4691 const DeviceInterface &vkd = context.getDeviceInterface();
4692 const VkDevice device = context.getDevice();
4693 const TopLevelAccelerationStructure *topLevelAccelerationStructurePtr = topLevelAccelerationStructure.get();
4694 const VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
4695 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // VkStructureType sType;
4696 nullptr, // const void* pNext;
4697 1u, // uint32_t accelerationStructureCount;
4698 topLevelAccelerationStructurePtr->getPtr(), // const VkAccelerationStructureKHR* pAccelerationStructures;
4699 };
4700 Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4701
4702 DescriptorSetUpdateBuilder()
4703 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
4704 VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
4705 .update(vkd, device);
4706
4707 return descriptorSet;
4708 }
4709
makeRayTracingPipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout0,const VkDescriptorSetLayout descriptorSetLayout1)4710 static Move<VkPipelineLayout> makeRayTracingPipelineLayout(Context &context,
4711 const VkDescriptorSetLayout descriptorSetLayout0,
4712 const VkDescriptorSetLayout descriptorSetLayout1)
4713 {
4714 const DeviceInterface &vkd = context.getDeviceInterface();
4715 const VkDevice device = context.getDevice();
4716 const std::vector<VkDescriptorSetLayout> descriptorSetLayouts{descriptorSetLayout0, descriptorSetLayout1};
4717 const uint32_t descriptorSetLayoutsSize = static_cast<uint32_t>(descriptorSetLayouts.size());
4718
4719 return makePipelineLayout(vkd, device, descriptorSetLayoutsSize, descriptorSetLayouts.data());
4720 }
4721
createTopAccelerationStructure(Context & context,de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)4722 static de::MovePtr<TopLevelAccelerationStructure> createTopAccelerationStructure(
4723 Context &context, de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)
4724 {
4725 const DeviceInterface &vkd = context.getDeviceInterface();
4726 const VkDevice device = context.getDevice();
4727 Allocator &allocator = context.getDefaultAllocator();
4728 de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
4729
4730 result->setInstanceCount(1);
4731 result->addInstance(bottomLevelAccelerationStructure);
4732 result->create(vkd, device, allocator);
4733
4734 return result;
4735 }
4736
createBottomAccelerationStructure(Context & context)4737 static de::SharedPtr<BottomLevelAccelerationStructure> createBottomAccelerationStructure(Context &context)
4738 {
4739 const DeviceInterface &vkd = context.getDeviceInterface();
4740 const VkDevice device = context.getDevice();
4741 Allocator &allocator = context.getDefaultAllocator();
4742 de::MovePtr<BottomLevelAccelerationStructure> result = makeBottomLevelAccelerationStructure();
4743 const std::vector<tcu::Vec3> geometryData{tcu::Vec3(-1.0f, -1.0f, -2.0f), tcu::Vec3(+1.0f, +1.0f, -1.0f)};
4744
4745 result->setGeometryCount(1u);
4746 result->addGeometry(geometryData, false);
4747 result->create(vkd, device, allocator, 0u);
4748
4749 return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
4750 }
4751
makeRayTracingPipeline(Context & context,const VkShaderStageFlags shaderStageTested,const VkPipelineLayout pipelineLayout,const uint32_t shaderStageCreateFlags[6],const uint32_t requiredSubgroupSize[6],Move<VkPipeline> & pipelineOut)4752 static de::MovePtr<RayTracingPipeline> makeRayTracingPipeline(
4753 Context &context, const VkShaderStageFlags shaderStageTested, const VkPipelineLayout pipelineLayout,
4754 const uint32_t shaderStageCreateFlags[6], const uint32_t requiredSubgroupSize[6], Move<VkPipeline> &pipelineOut)
4755 {
4756 const DeviceInterface &vkd = context.getDeviceInterface();
4757 const VkDevice device = context.getDevice();
4758 BinaryCollection &collection = context.getBinaryCollection();
4759 const char *shaderRgenName =
4760 (0 != (shaderStageTested & VK_SHADER_STAGE_RAYGEN_BIT_KHR)) ? "rgen" : "rgen_noSubgroup";
4761 const char *shaderAhitName =
4762 (0 != (shaderStageTested & VK_SHADER_STAGE_ANY_HIT_BIT_KHR)) ? "ahit" : "ahit_noSubgroup";
4763 const char *shaderChitName =
4764 (0 != (shaderStageTested & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)) ? "chit" : "chit_noSubgroup";
4765 const char *shaderMissName = (0 != (shaderStageTested & VK_SHADER_STAGE_MISS_BIT_KHR)) ? "miss" : "miss_noSubgroup";
4766 const char *shaderSectName =
4767 (0 != (shaderStageTested & VK_SHADER_STAGE_INTERSECTION_BIT_KHR)) ? "sect" : "sect_noSubgroup";
4768 const char *shaderCallName =
4769 (0 != (shaderStageTested & VK_SHADER_STAGE_CALLABLE_BIT_KHR)) ? "call" : "call_noSubgroup";
4770 const VkShaderModuleCreateFlags noShaderModuleCreateFlags = static_cast<VkShaderModuleCreateFlags>(0);
4771 Move<VkShaderModule> rgenShaderModule =
4772 createShaderModule(vkd, device, collection.get(shaderRgenName), noShaderModuleCreateFlags);
4773 Move<VkShaderModule> ahitShaderModule =
4774 createShaderModule(vkd, device, collection.get(shaderAhitName), noShaderModuleCreateFlags);
4775 Move<VkShaderModule> chitShaderModule =
4776 createShaderModule(vkd, device, collection.get(shaderChitName), noShaderModuleCreateFlags);
4777 Move<VkShaderModule> missShaderModule =
4778 createShaderModule(vkd, device, collection.get(shaderMissName), noShaderModuleCreateFlags);
4779 Move<VkShaderModule> sectShaderModule =
4780 createShaderModule(vkd, device, collection.get(shaderSectName), noShaderModuleCreateFlags);
4781 Move<VkShaderModule> callShaderModule =
4782 createShaderModule(vkd, device, collection.get(shaderCallName), noShaderModuleCreateFlags);
4783 const VkPipelineShaderStageCreateFlags noPipelineShaderStageCreateFlags =
4784 static_cast<VkPipelineShaderStageCreateFlags>(0);
4785 const VkPipelineShaderStageCreateFlags rgenPipelineShaderStageCreateFlags =
4786 (shaderStageCreateFlags == nullptr) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[0];
4787 const VkPipelineShaderStageCreateFlags ahitPipelineShaderStageCreateFlags =
4788 (shaderStageCreateFlags == nullptr) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[1];
4789 const VkPipelineShaderStageCreateFlags chitPipelineShaderStageCreateFlags =
4790 (shaderStageCreateFlags == nullptr) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[2];
4791 const VkPipelineShaderStageCreateFlags missPipelineShaderStageCreateFlags =
4792 (shaderStageCreateFlags == nullptr) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[3];
4793 const VkPipelineShaderStageCreateFlags sectPipelineShaderStageCreateFlags =
4794 (shaderStageCreateFlags == nullptr) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[4];
4795 const VkPipelineShaderStageCreateFlags callPipelineShaderStageCreateFlags =
4796 (shaderStageCreateFlags == nullptr) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[5];
4797 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[6] = {
4798 {
4799 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4800 nullptr,
4801 requiredSubgroupSize != nullptr ? requiredSubgroupSize[0] : 0u,
4802 },
4803 {
4804 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4805 nullptr,
4806 requiredSubgroupSize != nullptr ? requiredSubgroupSize[1] : 0u,
4807 },
4808 {
4809 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4810 nullptr,
4811 requiredSubgroupSize != nullptr ? requiredSubgroupSize[2] : 0u,
4812 },
4813 {
4814 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4815 nullptr,
4816 requiredSubgroupSize != nullptr ? requiredSubgroupSize[3] : 0u,
4817 },
4818 {
4819 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4820 nullptr,
4821 requiredSubgroupSize != nullptr ? requiredSubgroupSize[4] : 0u,
4822 },
4823 {
4824 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4825 nullptr,
4826 requiredSubgroupSize != nullptr ? requiredSubgroupSize[5] : 0u,
4827 },
4828 };
4829 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rgenRequiredSubgroupSizeCreateInfo =
4830 (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize == 0) ? nullptr : &requiredSubgroupSizeCreateInfo[0];
4831 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *ahitRequiredSubgroupSizeCreateInfo =
4832 (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize == 0) ? nullptr : &requiredSubgroupSizeCreateInfo[1];
4833 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *chitRequiredSubgroupSizeCreateInfo =
4834 (requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize == 0) ? nullptr : &requiredSubgroupSizeCreateInfo[2];
4835 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *missRequiredSubgroupSizeCreateInfo =
4836 (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize == 0) ? nullptr : &requiredSubgroupSizeCreateInfo[3];
4837 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *sectRequiredSubgroupSizeCreateInfo =
4838 (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize == 0) ? nullptr : &requiredSubgroupSizeCreateInfo[4];
4839 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *callRequiredSubgroupSizeCreateInfo =
4840 (requiredSubgroupSizeCreateInfo[5].requiredSubgroupSize == 0) ? nullptr : &requiredSubgroupSizeCreateInfo[5];
4841 de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
4842
4843 rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, rgenShaderModule, RAYGEN_GROUP, nullptr,
4844 rgenPipelineShaderStageCreateFlags, rgenRequiredSubgroupSizeCreateInfo);
4845 rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR, ahitShaderModule, HIT_GROUP, nullptr,
4846 ahitPipelineShaderStageCreateFlags, ahitRequiredSubgroupSizeCreateInfo);
4847 rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, chitShaderModule, HIT_GROUP, nullptr,
4848 chitPipelineShaderStageCreateFlags, chitRequiredSubgroupSizeCreateInfo);
4849 rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, missShaderModule, MISS_GROUP, nullptr,
4850 missPipelineShaderStageCreateFlags, missRequiredSubgroupSizeCreateInfo);
4851 rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, sectShaderModule, HIT_GROUP, nullptr,
4852 sectPipelineShaderStageCreateFlags, sectRequiredSubgroupSizeCreateInfo);
4853 rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR, callShaderModule, CALL_GROUP, nullptr,
4854 callPipelineShaderStageCreateFlags, callRequiredSubgroupSizeCreateInfo);
4855
4856 // Must execute createPipeline here, due to pNext pointers in calls to addShader are local
4857 pipelineOut = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
4858
4859 return rayTracingPipeline;
4860 }
4861
getPossibleRayTracingSubgroupStages(Context & context,const VkShaderStageFlags testedStages)4862 VkShaderStageFlags getPossibleRayTracingSubgroupStages(Context &context, const VkShaderStageFlags testedStages)
4863 {
4864 const VkPhysicalDeviceSubgroupProperties &subgroupProperties = context.getSubgroupProperties();
4865 const VkShaderStageFlags stages = testedStages & subgroupProperties.supportedStages;
4866
4867 DE_ASSERT(isAllRayTracingStages(testedStages));
4868
4869 return stages;
4870 }
4871
allRayTracingStages(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStage)4872 tcu::TestStatus allRayTracingStages(Context &context, VkFormat format, const SSBOData *extraDatas,
4873 uint32_t extraDataCount, const void *internalData,
4874 const VerificationFunctor &checkResult, const VkShaderStageFlags shaderStage)
4875 {
4876 return vkt::subgroups::allRayTracingStagesRequiredSubgroupSize(
4877 context, format, extraDatas, extraDataCount, internalData, checkResult, shaderStage, nullptr, nullptr);
4878 }
4879
allRayTracingStagesRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,uint32_t extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const uint32_t shaderStageCreateFlags[6],const uint32_t requiredSubgroupSize[6])4880 tcu::TestStatus allRayTracingStagesRequiredSubgroupSize(Context &context, VkFormat format, const SSBOData *extraDatas,
4881 uint32_t extraDatasCount, const void *internalData,
4882 const VerificationFunctor &checkResult,
4883 const VkShaderStageFlags shaderStageTested,
4884 const uint32_t shaderStageCreateFlags[6],
4885 const uint32_t requiredSubgroupSize[6])
4886 {
4887 const DeviceInterface &vkd = context.getDeviceInterface();
4888 const VkDevice device = context.getDevice();
4889 const VkQueue queue = context.getUniversalQueue();
4890 const uint32_t queueFamilyIndex = context.getUniversalQueueFamilyIndex();
4891 Allocator &allocator = context.getDefaultAllocator();
4892 const uint32_t subgroupSize = getSubgroupSize(context);
4893 const uint32_t maxWidth = getMaxWidth();
4894 const vector<VkShaderStageFlagBits> stagesVector = enumerateRayTracingShaderStages(shaderStageTested);
4895 const uint32_t stagesCount = static_cast<uint32_t>(stagesVector.size());
4896 de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure =
4897 createBottomAccelerationStructure(context);
4898 de::MovePtr<TopLevelAccelerationStructure> topLevelAccelerationStructure =
4899 createTopAccelerationStructure(context, bottomLevelAccelerationStructure);
4900 vectorBufferOrImage inputBuffers =
4901 makeRayTracingInputBuffers(context, format, extraDatas, extraDatasCount, stagesVector);
4902 const Move<VkDescriptorSetLayout> descriptorSetLayout =
4903 makeRayTracingDescriptorSetLayout(context, extraDatas, extraDatasCount, stagesVector, inputBuffers);
4904 const Move<VkDescriptorSetLayout> descriptorSetLayoutAS = makeRayTracingDescriptorSetLayoutAS(context);
4905 const Move<VkPipelineLayout> pipelineLayout =
4906 makeRayTracingPipelineLayout(context, *descriptorSetLayout, *descriptorSetLayoutAS);
4907 Move<VkPipeline> pipeline = Move<VkPipeline>();
4908 const de::MovePtr<RayTracingPipeline> rayTracingPipeline = makeRayTracingPipeline(
4909 context, shaderStageTested, *pipelineLayout, shaderStageCreateFlags, requiredSubgroupSize, pipeline);
4910 const uint32_t shaderGroupHandleSize = context.getRayTracingPipelineProperties().shaderGroupHandleSize;
4911 const uint32_t shaderGroupBaseAlignment = context.getRayTracingPipelineProperties().shaderGroupBaseAlignment;
4912 de::MovePtr<BufferWithMemory> rgenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4913 vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, RAYGEN_GROUP, 1u);
4914 de::MovePtr<BufferWithMemory> missShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4915 vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, MISS_GROUP, 1u);
4916 de::MovePtr<BufferWithMemory> hitsShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4917 vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, HIT_GROUP, 1u);
4918 de::MovePtr<BufferWithMemory> callShaderBindingTable = rayTracingPipeline->createShaderBindingTable(
4919 vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, CALL_GROUP, 1u);
4920 const VkStridedDeviceAddressRegionKHR rgenShaderBindingTableRegion =
4921 makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, rgenShaderBindingTable->get(), 0),
4922 shaderGroupHandleSize, shaderGroupHandleSize);
4923 const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion =
4924 makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0),
4925 shaderGroupHandleSize, shaderGroupHandleSize);
4926 const VkStridedDeviceAddressRegionKHR hitsShaderBindingTableRegion =
4927 makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitsShaderBindingTable->get(), 0),
4928 shaderGroupHandleSize, shaderGroupHandleSize);
4929 const VkStridedDeviceAddressRegionKHR callShaderBindingTableRegion =
4930 makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callShaderBindingTable->get(), 0),
4931 shaderGroupHandleSize, shaderGroupHandleSize);
4932 const Move<VkDescriptorPool> descriptorPool = makeRayTracingDescriptorPool(context, inputBuffers);
4933 const Move<VkDescriptorSet> descriptorSet = makeRayTracingDescriptorSet(
4934 context, *descriptorPool, *descriptorSetLayout, extraDatas, extraDatasCount, stagesVector, inputBuffers);
4935 const Move<VkDescriptorSet> descriptorSetAS =
4936 makeRayTracingDescriptorSetAS(context, *descriptorPool, *descriptorSetLayoutAS, topLevelAccelerationStructure);
4937 const Move<VkCommandPool> cmdPool = makeCommandPool(vkd, device, queueFamilyIndex);
4938 const Move<VkCommandBuffer> cmdBuffer = makeCommandBuffer(context, *cmdPool);
4939 uint32_t passIterations = 0u;
4940 uint32_t failIterations = 0u;
4941
4942 DE_ASSERT(shaderStageTested != 0);
4943
4944 for (uint32_t width = 1u; width < maxWidth; width = getNextWidth(width))
4945 {
4946
4947 for (uint32_t ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
4948 {
4949 // re-init the data
4950 const Allocation &alloc = inputBuffers[ndx]->getAllocation();
4951
4952 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
4953 }
4954
4955 beginCommandBuffer(vkd, *cmdBuffer);
4956 {
4957 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4958
4959 bottomLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
4960 topLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
4961
4962 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 1u, 1u,
4963 &descriptorSetAS.get(), 0u, nullptr);
4964
4965 if (stagesCount + extraDatasCount > 0)
4966 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0u, 1u,
4967 &descriptorSet.get(), 0u, nullptr);
4968
4969 cmdTraceRays(vkd, *cmdBuffer, &rgenShaderBindingTableRegion, &missShaderBindingTableRegion,
4970 &hitsShaderBindingTableRegion, &callShaderBindingTableRegion, width, 1, 1);
4971
4972 const VkMemoryBarrier postTraceMemoryBarrier =
4973 makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4974 cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
4975 VK_PIPELINE_STAGE_HOST_BIT, &postTraceMemoryBarrier);
4976 }
4977 endCommandBuffer(vkd, *cmdBuffer);
4978
4979 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
4980
4981 for (uint32_t ndx = 0u; ndx < stagesCount; ++ndx)
4982 {
4983 std::vector<const void *> datas;
4984
4985 if (!inputBuffers[ndx]->isImage())
4986 {
4987 const Allocation &resultAlloc = inputBuffers[ndx]->getAllocation();
4988
4989 invalidateAlloc(vkd, device, resultAlloc);
4990
4991 // we always have our result data first
4992 datas.push_back(resultAlloc.getHostPtr());
4993 }
4994
4995 for (uint32_t index = stagesCount; index < stagesCount + extraDatasCount; ++index)
4996 {
4997 const uint32_t datasNdx = index - stagesCount;
4998
4999 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
5000 {
5001 const Allocation &resultAlloc = inputBuffers[index]->getAllocation();
5002
5003 invalidateAlloc(vkd, device, resultAlloc);
5004
5005 // we always have our result data first
5006 datas.push_back(resultAlloc.getHostPtr());
5007 }
5008 }
5009
5010 if (!checkResult(internalData, datas, width, subgroupSize, false))
5011 failIterations++;
5012 else
5013 passIterations++;
5014 }
5015
5016 context.resetCommandPoolForVKSC(device, *cmdPool);
5017 }
5018
5019 if (failIterations > 0 || passIterations == 0)
5020 return tcu::TestStatus::fail("Failed " + de::toString(failIterations) + " out of " +
5021 de::toString(failIterations + passIterations) + " iterations.");
5022 else
5023 return tcu::TestStatus::pass("OK");
5024 }
5025 #endif // CTS_USES_VULKANSC
5026
5027 } // namespace subgroups
5028 } // namespace vkt
5029