1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Google Inc.
7 * Copyright (c) 2017 Codeplay Software Ltd.
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 */ /*!
22 * \file
23 * \brief Subgroups Tests Utils
24 */ /*--------------------------------------------------------------------*/
25
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vkRayTracingUtil.hpp"
28 #include "deFloat16.h"
29 #include "deRandom.hpp"
30 #include "tcuCommandLine.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkTypeUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37
38 using namespace tcu;
39 using namespace std;
40 using namespace vk;
41 using namespace vkt;
42
43 namespace
44 {
45
getMaxWidth()46 deUint32 getMaxWidth ()
47 {
48 return 1024u;
49 }
50
getNextWidth(const deUint32 width)51 deUint32 getNextWidth (const deUint32 width)
52 {
53 if (width < 128)
54 {
55 // This ensures we test every value up to 128 (the max subgroup size).
56 return width + 1;
57 }
58 else
59 {
60 // And once we hit 128 we increment to only power of 2's to reduce testing time.
61 return width * 2;
62 }
63 }
64
getFormatSizeInBytes(const VkFormat format)65 deUint32 getFormatSizeInBytes (const VkFormat format)
66 {
67 switch (format)
68 {
69 default:
70 DE_FATAL("Unhandled format!");
71 return 0;
72 case VK_FORMAT_R8_SINT:
73 case VK_FORMAT_R8_UINT:
74 return static_cast<deUint32>(sizeof(deInt8));
75 case VK_FORMAT_R8G8_SINT:
76 case VK_FORMAT_R8G8_UINT:
77 return static_cast<deUint32>(sizeof(deInt8) * 2);
78 case VK_FORMAT_R8G8B8_SINT:
79 case VK_FORMAT_R8G8B8_UINT:
80 case VK_FORMAT_R8G8B8A8_SINT:
81 case VK_FORMAT_R8G8B8A8_UINT:
82 return static_cast<deUint32>(sizeof(deInt8) * 4);
83 case VK_FORMAT_R16_SINT:
84 case VK_FORMAT_R16_UINT:
85 case VK_FORMAT_R16_SFLOAT:
86 return static_cast<deUint32>(sizeof(deInt16));
87 case VK_FORMAT_R16G16_SINT:
88 case VK_FORMAT_R16G16_UINT:
89 case VK_FORMAT_R16G16_SFLOAT:
90 return static_cast<deUint32>(sizeof(deInt16) * 2);
91 case VK_FORMAT_R16G16B16_UINT:
92 case VK_FORMAT_R16G16B16_SINT:
93 case VK_FORMAT_R16G16B16_SFLOAT:
94 case VK_FORMAT_R16G16B16A16_SINT:
95 case VK_FORMAT_R16G16B16A16_UINT:
96 case VK_FORMAT_R16G16B16A16_SFLOAT:
97 return static_cast<deUint32>(sizeof(deInt16) * 4);
98 case VK_FORMAT_R32_SINT:
99 case VK_FORMAT_R32_UINT:
100 case VK_FORMAT_R32_SFLOAT:
101 return static_cast<deUint32>(sizeof(deInt32));
102 case VK_FORMAT_R32G32_SINT:
103 case VK_FORMAT_R32G32_UINT:
104 case VK_FORMAT_R32G32_SFLOAT:
105 return static_cast<deUint32>(sizeof(deInt32) * 2);
106 case VK_FORMAT_R32G32B32_SINT:
107 case VK_FORMAT_R32G32B32_UINT:
108 case VK_FORMAT_R32G32B32_SFLOAT:
109 case VK_FORMAT_R32G32B32A32_SINT:
110 case VK_FORMAT_R32G32B32A32_UINT:
111 case VK_FORMAT_R32G32B32A32_SFLOAT:
112 return static_cast<deUint32>(sizeof(deInt32) * 4);
113 case VK_FORMAT_R64_SINT:
114 case VK_FORMAT_R64_UINT:
115 case VK_FORMAT_R64_SFLOAT:
116 return static_cast<deUint32>(sizeof(deInt64));
117 case VK_FORMAT_R64G64_SINT:
118 case VK_FORMAT_R64G64_UINT:
119 case VK_FORMAT_R64G64_SFLOAT:
120 return static_cast<deUint32>(sizeof(deInt64) * 2);
121 case VK_FORMAT_R64G64B64_SINT:
122 case VK_FORMAT_R64G64B64_UINT:
123 case VK_FORMAT_R64G64B64_SFLOAT:
124 case VK_FORMAT_R64G64B64A64_SINT:
125 case VK_FORMAT_R64G64B64A64_UINT:
126 case VK_FORMAT_R64G64B64A64_SFLOAT:
127 return static_cast<deUint32>(sizeof(deInt64) * 4);
128 // The below formats are used to represent bool and bvec* types. These
129 // types are passed to the shader as int and ivec* types, before the
130 // calculations are done as booleans. We need a distinct type here so
131 // that the shader generators can switch on it and generate the correct
132 // shader source for testing.
133 case VK_FORMAT_R8_USCALED:
134 return static_cast<deUint32>(sizeof(deInt32));
135 case VK_FORMAT_R8G8_USCALED:
136 return static_cast<deUint32>(sizeof(deInt32) * 2);
137 case VK_FORMAT_R8G8B8_USCALED:
138 case VK_FORMAT_R8G8B8A8_USCALED:
139 return static_cast<deUint32>(sizeof(deInt32) * 4);
140 }
141 }
142
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)143 deUint32 getElementSizeInBytes (const VkFormat format,
144 const subgroups::SSBOData::InputDataLayoutType layout)
145 {
146 const deUint32 bytes = getFormatSizeInBytes(format);
147
148 if (layout == subgroups::SSBOData::LayoutStd140)
149 return bytes < 16 ? 16 : bytes;
150 else
151 return bytes;
152 }
153
makeRenderPass(Context & context,VkFormat format)154 Move<VkRenderPass> makeRenderPass (Context& context, VkFormat format)
155 {
156 const VkAttachmentReference colorReference =
157 {
158 0,
159 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
160 };
161 const VkSubpassDescription subpassDescription =
162 {
163 0u, // VkSubpassDescriptionFlags flags;
164 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
165 0, // deUint32 inputAttachmentCount;
166 DE_NULL, // const VkAttachmentReference* pInputAttachments;
167 1, // deUint32 colorAttachmentCount;
168 &colorReference, // const VkAttachmentReference* pColorAttachments;
169 DE_NULL, // const VkAttachmentReference* pResolveAttachments;
170 DE_NULL, // const VkAttachmentReference* pDepthStencilAttachment;
171 0, // deUint32 preserveAttachmentCount;
172 DE_NULL // const deUint32* pPreserveAttachments;
173 };
174 const VkSubpassDependency subpassDependencies[2] =
175 {
176 {
177 VK_SUBPASS_EXTERNAL, // deUint32 srcSubpass;
178 0u, // deUint32 dstSubpass;
179 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // VkPipelineStageFlags srcStageMask;
180 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // VkPipelineStageFlags dstStageMask;
181 VK_ACCESS_MEMORY_READ_BIT, // VkAccessFlags srcAccessMask;
182 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // VkAccessFlags dstAccessMask;
183 VK_DEPENDENCY_BY_REGION_BIT // VkDependencyFlags dependencyFlags;
184 },
185 {
186 0u, // deUint32 srcSubpass;
187 VK_SUBPASS_EXTERNAL, // deUint32 dstSubpass;
188 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // VkPipelineStageFlags srcStageMask;
189 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, // VkPipelineStageFlags dstStageMask;
190 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // VkAccessFlags srcAccessMask;
191 VK_ACCESS_MEMORY_READ_BIT, // VkAccessFlags dstAccessMask;
192 VK_DEPENDENCY_BY_REGION_BIT // VkDependencyFlags dependencyFlags;
193 },
194 };
195 const VkAttachmentDescription attachmentDescription =
196 {
197 0u, // VkAttachmentDescriptionFlags flags;
198 format, // VkFormat format;
199 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
200 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
201 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
202 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
203 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
204 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
205 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL // VkImageLayout finalLayout;
206 };
207 const VkRenderPassCreateInfo renderPassCreateInfo =
208 {
209 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
210 DE_NULL, // const void* pNext;
211 0u, // VkRenderPassCreateFlags flags;
212 1, // deUint32 attachmentCount;
213 &attachmentDescription, // const VkAttachmentDescription* pAttachments;
214 1, // deUint32 subpassCount;
215 &subpassDescription, // const VkSubpassDescription* pSubpasses;
216 2, // deUint32 dependencyCount;
217 subpassDependencies // const VkSubpassDependency* pDependencies;
218 };
219
220 return createRenderPass(context.getDeviceInterface(), context.getDevice(), &renderPassCreateInfo);
221 }
222
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkShaderModule vertexShaderModule,const VkShaderModule tessellationControlShaderModule,const VkShaderModule tessellationEvalShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule fragmentShaderModule,const VkRenderPass renderPass,const std::vector<VkViewport> & viewports,const std::vector<VkRect2D> & scissors,const VkPrimitiveTopology topology,const deUint32 subpass,const deUint32 patchControlPoints,const VkPipelineVertexInputStateCreateInfo * vertexInputStateCreateInfo,const VkPipelineRasterizationStateCreateInfo * rasterizationStateCreateInfo,const VkPipelineMultisampleStateCreateInfo * multisampleStateCreateInfo,const VkPipelineDepthStencilStateCreateInfo * depthStencilStateCreateInfo,const VkPipelineColorBlendStateCreateInfo * colorBlendStateCreateInfo,const VkPipelineDynamicStateCreateInfo * dynamicStateCreateInfo,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])223 Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface& vk,
224 const VkDevice device,
225 const VkPipelineLayout pipelineLayout,
226 const VkShaderModule vertexShaderModule,
227 const VkShaderModule tessellationControlShaderModule,
228 const VkShaderModule tessellationEvalShaderModule,
229 const VkShaderModule geometryShaderModule,
230 const VkShaderModule fragmentShaderModule,
231 const VkRenderPass renderPass,
232 const std::vector<VkViewport>& viewports,
233 const std::vector<VkRect2D>& scissors,
234 const VkPrimitiveTopology topology,
235 const deUint32 subpass,
236 const deUint32 patchControlPoints,
237 const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo,
238 const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo,
239 const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo,
240 const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo,
241 const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo,
242 const VkPipelineDynamicStateCreateInfo* dynamicStateCreateInfo,
243 const deUint32 vertexShaderStageCreateFlags,
244 const deUint32 tessellationControlShaderStageCreateFlags,
245 const deUint32 tessellationEvalShaderStageCreateFlags,
246 const deUint32 geometryShaderStageCreateFlags,
247 const deUint32 fragmentShaderStageCreateFlags,
248 const deUint32 requiredSubgroupSize[5])
249 {
250 const VkBool32 disableRasterization = (fragmentShaderModule == DE_NULL);
251 const bool hasTessellation = (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
252
253 VkPipelineShaderStageCreateInfo stageCreateInfo =
254 {
255 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType
256 DE_NULL, // const void* pNext
257 0u, // VkPipelineShaderStageCreateFlags flags
258 VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage
259 DE_NULL, // VkShaderModule module
260 "main", // const char* pName
261 DE_NULL // const VkSpecializationInfo* pSpecializationInfo
262 };
263
264 std::vector<VkPipelineShaderStageCreateInfo> pipelineShaderStageParams;
265
266 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] =
267 {
268 {
269 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
270 DE_NULL,
271 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
272 },
273 {
274 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
275 DE_NULL,
276 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
277 },
278 {
279 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
280 DE_NULL,
281 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
282 },
283 {
284 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
285 DE_NULL,
286 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
287 },
288 {
289 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
290 DE_NULL,
291 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
292 },
293 };
294
295 {
296 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[0] : DE_NULL;
297 stageCreateInfo.flags = vertexShaderStageCreateFlags;
298 stageCreateInfo.stage = VK_SHADER_STAGE_VERTEX_BIT;
299 stageCreateInfo.module = vertexShaderModule;
300 pipelineShaderStageParams.push_back(stageCreateInfo);
301 }
302
303 if (tessellationControlShaderModule != DE_NULL)
304 {
305 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[1] : DE_NULL;
306 stageCreateInfo.flags = tessellationControlShaderStageCreateFlags;
307 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
308 stageCreateInfo.module = tessellationControlShaderModule;
309 pipelineShaderStageParams.push_back(stageCreateInfo);
310 }
311
312 if (tessellationEvalShaderModule != DE_NULL)
313 {
314 stageCreateInfo.pNext = (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[2] : DE_NULL;
315 stageCreateInfo.flags = tessellationEvalShaderStageCreateFlags;
316 stageCreateInfo.stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
317 stageCreateInfo.module = tessellationEvalShaderModule;
318 pipelineShaderStageParams.push_back(stageCreateInfo);
319 }
320
321 if (geometryShaderModule != DE_NULL)
322 {
323 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[3] : DE_NULL;
324 stageCreateInfo.flags = geometryShaderStageCreateFlags;
325 stageCreateInfo.stage = VK_SHADER_STAGE_GEOMETRY_BIT;
326 stageCreateInfo.module = geometryShaderModule;
327 pipelineShaderStageParams.push_back(stageCreateInfo);
328 }
329
330 if (fragmentShaderModule != DE_NULL)
331 {
332 stageCreateInfo.pNext = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[4] : DE_NULL;
333 stageCreateInfo.flags = fragmentShaderStageCreateFlags;
334 stageCreateInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
335 stageCreateInfo.module = fragmentShaderModule;
336 pipelineShaderStageParams.push_back(stageCreateInfo);
337 }
338
339 const VkVertexInputBindingDescription vertexInputBindingDescription =
340 {
341 0u, // deUint32 binding
342 sizeof(tcu::Vec4), // deUint32 stride
343 VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate
344 };
345
346 const VkVertexInputAttributeDescription vertexInputAttributeDescription =
347 {
348 0u, // deUint32 location
349 0u, // deUint32 binding
350 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format
351 0u // deUint32 offset
352 };
353
354 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfoDefault =
355 {
356 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType
357 DE_NULL, // const void* pNext
358 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags
359 1u, // deUint32 vertexBindingDescriptionCount
360 &vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions
361 1u, // deUint32 vertexAttributeDescriptionCount
362 &vertexInputAttributeDescription // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions
363 };
364
365 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo =
366 {
367 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType
368 DE_NULL, // const void* pNext
369 0u, // VkPipelineInputAssemblyStateCreateFlags flags
370 topology, // VkPrimitiveTopology topology
371 VK_FALSE // VkBool32 primitiveRestartEnable
372 };
373
374 const VkPipelineTessellationStateCreateInfo tessStateCreateInfo =
375 {
376 VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, // VkStructureType sType
377 DE_NULL, // const void* pNext
378 0u, // VkPipelineTessellationStateCreateFlags flags
379 patchControlPoints // deUint32 patchControlPoints
380 };
381
382 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
383 {
384 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
385 DE_NULL, // const void* pNext
386 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
387 viewports.empty() ? 1u : (deUint32)viewports.size(), // deUint32 viewportCount
388 viewports.empty() ? DE_NULL : &viewports[0], // const VkViewport* pViewports
389 viewports.empty() ? 1u : (deUint32)scissors.size(), // deUint32 scissorCount
390 scissors.empty() ? DE_NULL : &scissors[0] // const VkRect2D* pScissors
391 };
392
393 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfoDefault =
394 {
395 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType
396 DE_NULL, // const void* pNext
397 0u, // VkPipelineRasterizationStateCreateFlags flags
398 VK_FALSE, // VkBool32 depthClampEnable
399 disableRasterization, // VkBool32 rasterizerDiscardEnable
400 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode
401 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode
402 VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace
403 VK_FALSE, // VkBool32 depthBiasEnable
404 0.0f, // float depthBiasConstantFactor
405 0.0f, // float depthBiasClamp
406 0.0f, // float depthBiasSlopeFactor
407 1.0f // float lineWidth
408 };
409
410 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfoDefault =
411 {
412 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
413 DE_NULL, // const void* pNext
414 0u, // VkPipelineMultisampleStateCreateFlags flags
415 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples
416 VK_FALSE, // VkBool32 sampleShadingEnable
417 1.0f, // float minSampleShading
418 DE_NULL, // const VkSampleMask* pSampleMask
419 VK_FALSE, // VkBool32 alphaToCoverageEnable
420 VK_FALSE // VkBool32 alphaToOneEnable
421 };
422
423 const VkStencilOpState stencilOpState =
424 {
425 VK_STENCIL_OP_KEEP, // VkStencilOp failOp
426 VK_STENCIL_OP_KEEP, // VkStencilOp passOp
427 VK_STENCIL_OP_KEEP, // VkStencilOp depthFailOp
428 VK_COMPARE_OP_NEVER, // VkCompareOp compareOp
429 0, // deUint32 compareMask
430 0, // deUint32 writeMask
431 0 // deUint32 reference
432 };
433
434 const VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfoDefault =
435 {
436 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType
437 DE_NULL, // const void* pNext
438 0u, // VkPipelineDepthStencilStateCreateFlags flags
439 VK_FALSE, // VkBool32 depthTestEnable
440 VK_FALSE, // VkBool32 depthWriteEnable
441 VK_COMPARE_OP_LESS_OR_EQUAL, // VkCompareOp depthCompareOp
442 VK_FALSE, // VkBool32 depthBoundsTestEnable
443 VK_FALSE, // VkBool32 stencilTestEnable
444 stencilOpState, // VkStencilOpState front
445 stencilOpState, // VkStencilOpState back
446 0.0f, // float minDepthBounds
447 1.0f, // float maxDepthBounds
448 };
449
450 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
451 {
452 VK_FALSE, // VkBool32 blendEnable
453 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor
454 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor
455 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp
456 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor
457 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor
458 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp
459 VK_COLOR_COMPONENT_R_BIT // VkColorComponentFlags colorWriteMask
460 | VK_COLOR_COMPONENT_G_BIT
461 | VK_COLOR_COMPONENT_B_BIT
462 | VK_COLOR_COMPONENT_A_BIT
463 };
464
465 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfoDefault =
466 {
467 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType
468 DE_NULL, // const void* pNext
469 0u, // VkPipelineColorBlendStateCreateFlags flags
470 VK_FALSE, // VkBool32 logicOpEnable
471 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp
472 1u, // deUint32 attachmentCount
473 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments
474 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConstants[4]
475 };
476
477 std::vector<VkDynamicState> dynamicStates;
478
479 if (viewports.empty())
480 dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
481 if (scissors.empty())
482 dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
483
484 const VkPipelineDynamicStateCreateInfo dynamicStateCreateInfoDefault =
485 {
486 VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, // VkStructureType sType
487 DE_NULL, // const void* pNext
488 0u, // VkPipelineDynamicStateCreateFlags flags
489 (deUint32)dynamicStates.size(), // deUint32 dynamicStateCount
490 dynamicStates.empty() ? DE_NULL : &dynamicStates[0] // const VkDynamicState* pDynamicStates
491 };
492
493 const VkPipelineDynamicStateCreateInfo* dynamicStateCreateInfoDefaultPtr = dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
494
495 const VkGraphicsPipelineCreateInfo pipelineCreateInfo =
496 {
497 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType
498 DE_NULL, // const void* pNext
499 0u, // VkPipelineCreateFlags flags
500 (deUint32)pipelineShaderStageParams.size(), // deUint32 stageCount
501 &pipelineShaderStageParams[0], // const VkPipelineShaderStageCreateInfo* pStages
502 vertexInputStateCreateInfo ? vertexInputStateCreateInfo : &vertexInputStateCreateInfoDefault, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState
503 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState
504 hasTessellation ? &tessStateCreateInfo : DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState
505 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState
506 rasterizationStateCreateInfo ? rasterizationStateCreateInfo : &rasterizationStateCreateInfoDefault, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState
507 multisampleStateCreateInfo ? multisampleStateCreateInfo: &multisampleStateCreateInfoDefault, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState
508 depthStencilStateCreateInfo ? depthStencilStateCreateInfo : &depthStencilStateCreateInfoDefault, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState
509 colorBlendStateCreateInfo ? colorBlendStateCreateInfo : &colorBlendStateCreateInfoDefault, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState
510 dynamicStateCreateInfo ? dynamicStateCreateInfo : dynamicStateCreateInfoDefaultPtr, // const VkPipelineDynamicStateCreateInfo* pDynamicState
511 pipelineLayout, // VkPipelineLayout layout
512 renderPass, // VkRenderPass renderPass
513 subpass, // deUint32 subpass
514 DE_NULL, // VkPipeline basePipelineHandle
515 0 // deInt32 basePipelineIndex;
516 };
517
518 return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
519 }
520
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT,const deUint32 vertexShaderStageCreateFlags=0u,const deUint32 tessellationControlShaderStageCreateFlags=0u,const deUint32 tessellationEvalShaderStageCreateFlags=0u,const deUint32 geometryShaderStageCreateFlags=0u,const deUint32 fragmentShaderStageCreateFlags=0u,const deUint32 requiredSubgroupSize[5]=DE_NULL)521 Move<VkPipeline> makeGraphicsPipeline (Context& context,
522 const VkPipelineLayout pipelineLayout,
523 const VkShaderStageFlags stages,
524 const VkShaderModule vertexShaderModule,
525 const VkShaderModule fragmentShaderModule,
526 const VkShaderModule geometryShaderModule,
527 const VkShaderModule tessellationControlModule,
528 const VkShaderModule tessellationEvaluationModule,
529 const VkRenderPass renderPass,
530 const VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
531 const VkVertexInputBindingDescription* vertexInputBindingDescription = DE_NULL,
532 const VkVertexInputAttributeDescription* vertexInputAttributeDescriptions = DE_NULL,
533 const bool frameBufferTests = false,
534 const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
535 const deUint32 vertexShaderStageCreateFlags = 0u,
536 const deUint32 tessellationControlShaderStageCreateFlags = 0u,
537 const deUint32 tessellationEvalShaderStageCreateFlags = 0u,
538 const deUint32 geometryShaderStageCreateFlags = 0u,
539 const deUint32 fragmentShaderStageCreateFlags = 0u,
540 const deUint32 requiredSubgroupSize[5] = DE_NULL)
541 {
542 const std::vector<VkViewport> noViewports;
543 const std::vector<VkRect2D> noScissors;
544 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
545 {
546 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
547 DE_NULL, // const void* pNext;
548 0u, // VkPipelineVertexInputStateCreateFlags flags;
549 vertexInputBindingDescription == DE_NULL ? 0u : 1u, // deUint32 vertexBindingDescriptionCount;
550 vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
551 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u, // deUint32 vertexAttributeDescriptionCount;
552 vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
553 };
554 const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
555 const VkColorComponentFlags colorComponent = numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
556 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
557 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
558 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
559 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
560 {
561 VK_FALSE, // VkBool32 blendEnable;
562 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcColorBlendFactor;
563 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
564 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp;
565 VK_BLEND_FACTOR_ZERO, // VkBlendFactor srcAlphaBlendFactor;
566 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor;
567 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp;
568 colorComponent // VkColorComponentFlags colorWriteMask;
569 };
570 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
571 {
572 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
573 DE_NULL, // const void* pNext;
574 0u, // VkPipelineColorBlendStateCreateFlags flags;
575 VK_FALSE, // VkBool32 logicOpEnable;
576 VK_LOGIC_OP_CLEAR, // VkLogicOp logicOp;
577 1, // deUint32 attachmentCount;
578 &colorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments;
579 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConstants[4];
580 };
581 const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
582
583 return makeGraphicsPipeline(context.getDeviceInterface(), // const DeviceInterface& vk
584 context.getDevice(), // const VkDevice device
585 pipelineLayout, // const VkPipelineLayout pipelineLayout
586 vertexShaderModule, // const VkShaderModule vertexShaderModule
587 tessellationControlModule, // const VkShaderModule tessellationControlShaderModule
588 tessellationEvaluationModule, // const VkShaderModule tessellationEvalShaderModule
589 geometryShaderModule, // const VkShaderModule geometryShaderModule
590 fragmentShaderModule, // const VkShaderModule fragmentShaderModule
591 renderPass, // const VkRenderPass renderPass
592 noViewports, // const std::vector<VkViewport>& viewports
593 noScissors, // const std::vector<VkRect2D>& scissors
594 topology, // const VkPrimitiveTopology topology
595 0u, // const deUint32 subpass
596 patchControlPoints, // const deUint32 patchControlPoints
597 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
598 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
599 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
600 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
601 &colorBlendStateCreateInfo, // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
602 DE_NULL, // const VkPipelineDynamicStateCreateInfo*
603 vertexShaderStageCreateFlags, // const deUint32 vertexShaderStageCreateFlags,
604 tessellationControlShaderStageCreateFlags, // const deUint32 tessellationControlShaderStageCreateFlags
605 tessellationEvalShaderStageCreateFlags, // const deUint32 tessellationEvalShaderStageCreateFlags
606 geometryShaderStageCreateFlags, // const deUint32 geometryShaderStageCreateFlags
607 fragmentShaderStageCreateFlags, // const deUint32 fragmentShaderStageCreateFlags
608 requiredSubgroupSize); // const deUint32 requiredSubgroupSize[5]
609 }
610
makeCommandBuffer(Context & context,const VkCommandPool commandPool)611 Move<VkCommandBuffer> makeCommandBuffer (Context& context, const VkCommandPool commandPool)
612 {
613 const VkCommandBufferAllocateInfo bufferAllocateParams =
614 {
615 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
616 DE_NULL, // const void* pNext;
617 commandPool, // VkCommandPool commandPool;
618 VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
619 1u, // deUint32 bufferCount;
620 };
621 return allocateCommandBuffer(context.getDeviceInterface(),
622 context.getDevice(), &bufferAllocateParams);
623 }
624
625 struct Buffer;
626 struct Image;
627
628 struct BufferOrImage
629 {
isImage__anonfdc00a3a0111::BufferOrImage630 bool isImage() const
631 {
632 return m_isImage;
633 }
634
getAsBuffer__anonfdc00a3a0111::BufferOrImage635 Buffer* getAsBuffer()
636 {
637 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
638 return reinterpret_cast<Buffer* >(this);
639 }
640
getAsImage__anonfdc00a3a0111::BufferOrImage641 Image* getAsImage()
642 {
643 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
644 return reinterpret_cast<Image*>(this);
645 }
646
getType__anonfdc00a3a0111::BufferOrImage647 virtual VkDescriptorType getType() const
648 {
649 if (m_isImage)
650 {
651 return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
652 }
653 else
654 {
655 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
656 }
657 }
658
getAllocation__anonfdc00a3a0111::BufferOrImage659 Allocation& getAllocation() const
660 {
661 return *m_allocation;
662 }
663
~BufferOrImage__anonfdc00a3a0111::BufferOrImage664 virtual ~BufferOrImage() {}
665
666 protected:
BufferOrImage__anonfdc00a3a0111::BufferOrImage667 explicit BufferOrImage(bool image) : m_isImage(image) {}
668
669 bool m_isImage;
670 de::details::MovePtr<Allocation> m_allocation;
671 };
672
673 struct Buffer : public BufferOrImage
674 {
Buffer__anonfdc00a3a0111::Buffer675 explicit Buffer (Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
676 : BufferOrImage (false)
677 , m_sizeInBytes (sizeInBytes)
678 , m_usage (usage)
679 {
680 const DeviceInterface& vkd = context.getDeviceInterface();
681 const VkDevice device = context.getDevice();
682
683 const vk::VkBufferCreateInfo bufferCreateInfo =
684 {
685 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
686 DE_NULL,
687 0u,
688 m_sizeInBytes,
689 m_usage,
690 VK_SHARING_MODE_EXCLUSIVE,
691 0u,
692 DE_NULL,
693 };
694 m_buffer = createBuffer(vkd, device, &bufferCreateInfo);
695
696 VkMemoryRequirements req = getBufferMemoryRequirements(vkd, device, *m_buffer);
697
698 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::HostVisible);
699 VK_CHECK(vkd.bindBufferMemory(device, *m_buffer, m_allocation->getMemory(), m_allocation->getOffset()));
700 }
701
getType__anonfdc00a3a0111::Buffer702 virtual VkDescriptorType getType() const
703 {
704 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
705 {
706 return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
707 }
708 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
709 }
710
getBuffer__anonfdc00a3a0111::Buffer711 VkBuffer getBuffer () const
712 {
713 return *m_buffer;
714 }
715
getBufferPtr__anonfdc00a3a0111::Buffer716 const VkBuffer* getBufferPtr () const
717 {
718 return &(*m_buffer);
719 }
720
getSize__anonfdc00a3a0111::Buffer721 VkDeviceSize getSize () const
722 {
723 return m_sizeInBytes;
724 }
725
726 private:
727 Move<VkBuffer> m_buffer;
728 VkDeviceSize m_sizeInBytes;
729 const VkBufferUsageFlags m_usage;
730 };
731
732 struct Image : public BufferOrImage
733 {
Image__anonfdc00a3a0111::Image734 explicit Image (Context& context, deUint32 width, deUint32 height, VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
735 : BufferOrImage(true)
736 {
737 const DeviceInterface& vk = context.getDeviceInterface();
738 const VkDevice device = context.getDevice();
739 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
740
741 const VkImageCreateInfo imageCreateInfo =
742 {
743 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
744 DE_NULL, // const void* pNext;
745 0, // VkImageCreateFlags flags;
746 VK_IMAGE_TYPE_2D, // VkImageType imageType;
747 format, // VkFormat format;
748 {width, height, 1}, // VkExtent3D extent;
749 1, // deUint32 mipLevels;
750 1, // deUint32 arrayLayers;
751 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
752 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
753 usage, // VkImageUsageFlags usage;
754 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
755 0u, // deUint32 queueFamilyIndexCount;
756 DE_NULL, // const deUint32* pQueueFamilyIndices;
757 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
758 };
759
760 const VkComponentMapping componentMapping =
761 {
762 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
763 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
764 };
765
766 const VkImageSubresourceRange subresourceRange =
767 {
768 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
769 0u, //deUint32 baseMipLevel
770 1u, //deUint32 levelCount
771 0u, //deUint32 baseArrayLayer
772 1u //deUint32 layerCount
773 };
774
775 const VkSamplerCreateInfo samplerCreateInfo =
776 {
777 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType;
778 DE_NULL, // const void* pNext;
779 0u, // VkSamplerCreateFlags flags;
780 VK_FILTER_NEAREST, // VkFilter magFilter;
781 VK_FILTER_NEAREST, // VkFilter minFilter;
782 VK_SAMPLER_MIPMAP_MODE_NEAREST, // VkSamplerMipmapMode mipmapMode;
783 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeU;
784 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeV;
785 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW;
786 0.0f, // float mipLodBias;
787 VK_FALSE, // VkBool32 anisotropyEnable;
788 1.0f, // float maxAnisotropy;
789 DE_FALSE, // VkBool32 compareEnable;
790 VK_COMPARE_OP_ALWAYS, // VkCompareOp compareOp;
791 0.0f, // float minLod;
792 0.0f, // float maxLod;
793 VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // VkBorderColor borderColor;
794 VK_FALSE, // VkBool32 unnormalizedCoordinates;
795 };
796
797 m_image = createImage(vk, device, &imageCreateInfo);
798
799 VkMemoryRequirements req = getImageMemoryRequirements(vk, device, *m_image);
800
801 req.size *= 2;
802 m_allocation = context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
803
804 VK_CHECK(vk.bindImageMemory(device, *m_image, m_allocation->getMemory(), m_allocation->getOffset()));
805
806 const VkImageViewCreateInfo imageViewCreateInfo =
807 {
808 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
809 DE_NULL, // const void* pNext;
810 0, // VkImageViewCreateFlags flags;
811 *m_image, // VkImage image;
812 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
813 imageCreateInfo.format, // VkFormat format;
814 componentMapping, // VkComponentMapping components;
815 subresourceRange // VkImageSubresourceRange subresourceRange;
816 };
817
818 m_imageView = createImageView(vk, device, &imageViewCreateInfo);
819 m_sampler = createSampler(vk, device, &samplerCreateInfo);
820
821 // Transition input image layouts
822 {
823 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
824 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
825
826 beginCommandBuffer(vk, *cmdBuffer);
827
828 const VkImageMemoryBarrier imageBarrier = makeImageMemoryBarrier((VkAccessFlags)0u, VK_ACCESS_TRANSFER_WRITE_BIT,
829 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, *m_image, subresourceRange);
830
831 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
832 (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &imageBarrier);
833
834 endCommandBuffer(vk, *cmdBuffer);
835 submitCommandsAndWait(vk, device, context.getUniversalQueue(), *cmdBuffer);
836 }
837 }
838
getImage__anonfdc00a3a0111::Image839 VkImage getImage () const
840 {
841 return *m_image;
842 }
843
getImageView__anonfdc00a3a0111::Image844 VkImageView getImageView () const
845 {
846 return *m_imageView;
847 }
848
getSampler__anonfdc00a3a0111::Image849 VkSampler getSampler () const
850 {
851 return *m_sampler;
852 }
853
854 private:
855 Move<VkImage> m_image;
856 Move<VkImageView> m_imageView;
857 Move<VkSampler> m_sampler;
858 };
859 }
860
getStagesCount(const VkShaderStageFlags shaderStages)861 deUint32 vkt::subgroups::getStagesCount (const VkShaderStageFlags shaderStages)
862 {
863 const deUint32 stageCount = isAllRayTracingStages(shaderStages) ? 6
864 : isAllGraphicsStages(shaderStages) ? 4
865 : isAllComputeStages(shaderStages) ? 1
866 : 0;
867
868 DE_ASSERT(stageCount != 0);
869
870 return stageCount;
871 }
872
getSharedMemoryBallotHelper()873 std::string vkt::subgroups::getSharedMemoryBallotHelper ()
874 {
875 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
876 "uvec4 sharedMemoryBallot(bool vote)\n"
877 "{\n"
878 " uint groupOffset = gl_SubgroupID;\n"
879 " // One invocation in the group 0's the whole group's data\n"
880 " if (subgroupElect())\n"
881 " {\n"
882 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
883 " }\n"
884 " subgroupMemoryBarrierShared();\n"
885 " if (vote)\n"
886 " {\n"
887 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
888 " const highp uint bitToSet = 1u << invocationId;\n"
889 " switch (gl_SubgroupInvocationID / 32)\n"
890 " {\n"
891 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
892 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
893 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
894 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
895 " }\n"
896 " }\n"
897 " subgroupMemoryBarrierShared();\n"
898 " return superSecretComputeShaderHelper[groupOffset];\n"
899 "}\n";
900 }
901
getSharedMemoryBallotHelperARB()902 std::string vkt::subgroups::getSharedMemoryBallotHelperARB ()
903 {
904 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
905 "uint64_t sharedMemoryBallot(bool vote)\n"
906 "{\n"
907 " uint groupOffset = gl_SubgroupID;\n"
908 " // One invocation in the group 0's the whole group's data\n"
909 " if (subgroupElect())\n"
910 " {\n"
911 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
912 " }\n"
913 " subgroupMemoryBarrierShared();\n"
914 " if (vote)\n"
915 " {\n"
916 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
917 " const highp uint bitToSet = 1u << invocationId;\n"
918 " switch (gl_SubgroupInvocationID / 32)\n"
919 " {\n"
920 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
921 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
922 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
923 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
924 " }\n"
925 " }\n"
926 " subgroupMemoryBarrierShared();\n"
927 " return packUint2x32(superSecretComputeShaderHelper[groupOffset].xy);\n"
928 "}\n";
929 }
930
getSubgroupSize(Context & context)931 deUint32 vkt::subgroups::getSubgroupSize (Context& context)
932 {
933 return context.getSubgroupProperties().subgroupSize;
934 }
935
maxSupportedSubgroupSize()936 deUint32 vkt::subgroups::maxSupportedSubgroupSize ()
937 {
938 return 128u;
939 }
940
getShaderStageName(VkShaderStageFlags stage)941 std::string vkt::subgroups::getShaderStageName (VkShaderStageFlags stage)
942 {
943 switch (stage)
944 {
945 case VK_SHADER_STAGE_COMPUTE_BIT: return "compute";
946 case VK_SHADER_STAGE_FRAGMENT_BIT: return "fragment";
947 case VK_SHADER_STAGE_VERTEX_BIT: return "vertex";
948 case VK_SHADER_STAGE_GEOMETRY_BIT: return "geometry";
949 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return "tess_control";
950 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return "tess_eval";
951 case VK_SHADER_STAGE_RAYGEN_BIT_KHR: return "rgen";
952 case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: return "ahit";
953 case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: return "chit";
954 case VK_SHADER_STAGE_MISS_BIT_KHR: return "miss";
955 case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: return "sect";
956 case VK_SHADER_STAGE_CALLABLE_BIT_KHR: return "call";
957 default: TCU_THROW(InternalError, "Unhandled stage");
958 }
959 }
960
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)961 std::string vkt::subgroups::getSubgroupFeatureName (vk::VkSubgroupFeatureFlagBits bit)
962 {
963 switch (bit)
964 {
965 case VK_SUBGROUP_FEATURE_BASIC_BIT: return "VK_SUBGROUP_FEATURE_BASIC_BIT";
966 case VK_SUBGROUP_FEATURE_VOTE_BIT: return "VK_SUBGROUP_FEATURE_VOTE_BIT";
967 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT: return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
968 case VK_SUBGROUP_FEATURE_BALLOT_BIT: return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
969 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT: return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
970 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT: return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
971 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT: return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
972 case VK_SUBGROUP_FEATURE_QUAD_BIT: return "VK_SUBGROUP_FEATURE_QUAD_BIT";
973 default: TCU_THROW(InternalError, "Unknown subgroup feature category");
974 }
975 }
976
addNoSubgroupShader(SourceCollections & programCollection)977 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
978 {
979 {
980 /*
981 "#version 450\n"
982 "void main (void)\n"
983 "{\n"
984 " float pixelSize = 2.0f/1024.0f;\n"
985 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
986 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
987 " gl_PointSize = 1.0f;\n"
988 "}\n"
989 */
990 const std::string vertNoSubgroup =
991 "; SPIR-V\n"
992 "; Version: 1.3\n"
993 "; Generator: Khronos Glslang Reference Front End; 1\n"
994 "; Bound: 37\n"
995 "; Schema: 0\n"
996 "OpCapability Shader\n"
997 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
998 "OpMemoryModel Logical GLSL450\n"
999 "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
1000 "OpMemberDecorate %20 0 BuiltIn Position\n"
1001 "OpMemberDecorate %20 1 BuiltIn PointSize\n"
1002 "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
1003 "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
1004 "OpDecorate %20 Block\n"
1005 "OpDecorate %26 BuiltIn VertexIndex\n"
1006 "%2 = OpTypeVoid\n"
1007 "%3 = OpTypeFunction %2\n"
1008 "%6 = OpTypeFloat 32\n"
1009 "%7 = OpTypePointer Function %6\n"
1010 "%9 = OpConstant %6 0.00195313\n"
1011 "%12 = OpConstant %6 2\n"
1012 "%14 = OpConstant %6 1\n"
1013 "%16 = OpTypeVector %6 4\n"
1014 "%17 = OpTypeInt 32 0\n"
1015 "%18 = OpConstant %17 1\n"
1016 "%19 = OpTypeArray %6 %18\n"
1017 "%20 = OpTypeStruct %16 %6 %19 %19\n"
1018 "%21 = OpTypePointer Output %20\n"
1019 "%22 = OpVariable %21 Output\n"
1020 "%23 = OpTypeInt 32 1\n"
1021 "%24 = OpConstant %23 0\n"
1022 "%25 = OpTypePointer Input %23\n"
1023 "%26 = OpVariable %25 Input\n"
1024 "%33 = OpConstant %6 0\n"
1025 "%35 = OpTypePointer Output %16\n"
1026 "%37 = OpConstant %23 1\n"
1027 "%38 = OpTypePointer Output %6\n"
1028 "%4 = OpFunction %2 None %3\n"
1029 "%5 = OpLabel\n"
1030 "%8 = OpVariable %7 Function\n"
1031 "%10 = OpVariable %7 Function\n"
1032 "OpStore %8 %9\n"
1033 "%11 = OpLoad %6 %8\n"
1034 "%13 = OpFDiv %6 %11 %12\n"
1035 "%15 = OpFSub %6 %13 %14\n"
1036 "OpStore %10 %15\n"
1037 "%27 = OpLoad %23 %26\n"
1038 "%28 = OpConvertSToF %6 %27\n"
1039 "%29 = OpLoad %6 %8\n"
1040 "%30 = OpFMul %6 %28 %29\n"
1041 "%31 = OpLoad %6 %10\n"
1042 "%32 = OpFAdd %6 %30 %31\n"
1043 "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
1044 "%36 = OpAccessChain %35 %22 %24\n"
1045 "OpStore %36 %34\n"
1046 "%39 = OpAccessChain %38 %22 %37\n"
1047 "OpStore %39 %14\n"
1048 "OpReturn\n"
1049 "OpFunctionEnd\n";
1050 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
1051 }
1052
1053 {
1054 /*
1055 "#version 450\n"
1056 "layout(vertices=1) out;\n"
1057 "\n"
1058 "void main (void)\n"
1059 "{\n"
1060 " if (gl_InvocationID == 0)\n"
1061 " {\n"
1062 " gl_TessLevelOuter[0] = 1.0f;\n"
1063 " gl_TessLevelOuter[1] = 1.0f;\n"
1064 " }\n"
1065 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1066 "}\n"
1067 */
1068 const std::string tescNoSubgroup =
1069 "; SPIR-V\n"
1070 "; Version: 1.3\n"
1071 "; Generator: Khronos Glslang Reference Front End; 1\n"
1072 "; Bound: 45\n"
1073 "; Schema: 0\n"
1074 "OpCapability Tessellation\n"
1075 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1076 "OpMemoryModel Logical GLSL450\n"
1077 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
1078 "OpExecutionMode %4 OutputVertices 1\n"
1079 "OpDecorate %8 BuiltIn InvocationId\n"
1080 "OpDecorate %20 Patch\n"
1081 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1082 "OpMemberDecorate %29 0 BuiltIn Position\n"
1083 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1084 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1085 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1086 "OpDecorate %29 Block\n"
1087 "OpMemberDecorate %34 0 BuiltIn Position\n"
1088 "OpMemberDecorate %34 1 BuiltIn PointSize\n"
1089 "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
1090 "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
1091 "OpDecorate %34 Block\n"
1092 "%2 = OpTypeVoid\n"
1093 "%3 = OpTypeFunction %2\n"
1094 "%6 = OpTypeInt 32 1\n"
1095 "%7 = OpTypePointer Input %6\n"
1096 "%8 = OpVariable %7 Input\n"
1097 "%10 = OpConstant %6 0\n"
1098 "%11 = OpTypeBool\n"
1099 "%15 = OpTypeFloat 32\n"
1100 "%16 = OpTypeInt 32 0\n"
1101 "%17 = OpConstant %16 4\n"
1102 "%18 = OpTypeArray %15 %17\n"
1103 "%19 = OpTypePointer Output %18\n"
1104 "%20 = OpVariable %19 Output\n"
1105 "%21 = OpConstant %15 1\n"
1106 "%22 = OpTypePointer Output %15\n"
1107 "%24 = OpConstant %6 1\n"
1108 "%26 = OpTypeVector %15 4\n"
1109 "%27 = OpConstant %16 1\n"
1110 "%28 = OpTypeArray %15 %27\n"
1111 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1112 "%30 = OpTypeArray %29 %27\n"
1113 "%31 = OpTypePointer Output %30\n"
1114 "%32 = OpVariable %31 Output\n"
1115 "%34 = OpTypeStruct %26 %15 %28 %28\n"
1116 "%35 = OpConstant %16 32\n"
1117 "%36 = OpTypeArray %34 %35\n"
1118 "%37 = OpTypePointer Input %36\n"
1119 "%38 = OpVariable %37 Input\n"
1120 "%40 = OpTypePointer Input %26\n"
1121 "%43 = OpTypePointer Output %26\n"
1122 "%4 = OpFunction %2 None %3\n"
1123 "%5 = OpLabel\n"
1124 "%9 = OpLoad %6 %8\n"
1125 "%12 = OpIEqual %11 %9 %10\n"
1126 "OpSelectionMerge %14 None\n"
1127 "OpBranchConditional %12 %13 %14\n"
1128 "%13 = OpLabel\n"
1129 "%23 = OpAccessChain %22 %20 %10\n"
1130 "OpStore %23 %21\n"
1131 "%25 = OpAccessChain %22 %20 %24\n"
1132 "OpStore %25 %21\n"
1133 "OpBranch %14\n"
1134 "%14 = OpLabel\n"
1135 "%33 = OpLoad %6 %8\n"
1136 "%39 = OpLoad %6 %8\n"
1137 "%41 = OpAccessChain %40 %38 %39 %10\n"
1138 "%42 = OpLoad %26 %41\n"
1139 "%44 = OpAccessChain %43 %32 %33 %10\n"
1140 "OpStore %44 %42\n"
1141 "OpReturn\n"
1142 "OpFunctionEnd\n";
1143 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
1144 }
1145
1146 {
1147 /*
1148 "#version 450\n"
1149 "layout(isolines) in;\n"
1150 "\n"
1151 "void main (void)\n"
1152 "{\n"
1153 " float pixelSize = 2.0f/1024.0f;\n"
1154 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1155 "}\n";
1156 */
1157 const std::string teseNoSubgroup =
1158 "; SPIR-V\n"
1159 "; Version: 1.3\n"
1160 "; Generator: Khronos Glslang Reference Front End; 2\n"
1161 "; Bound: 42\n"
1162 "; Schema: 0\n"
1163 "OpCapability Tessellation\n"
1164 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1165 "OpMemoryModel Logical GLSL450\n"
1166 "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
1167 "OpExecutionMode %4 Isolines\n"
1168 "OpExecutionMode %4 SpacingEqual\n"
1169 "OpExecutionMode %4 VertexOrderCcw\n"
1170 "OpMemberDecorate %14 0 BuiltIn Position\n"
1171 "OpMemberDecorate %14 1 BuiltIn PointSize\n"
1172 "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
1173 "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
1174 "OpDecorate %14 Block\n"
1175 "OpMemberDecorate %19 0 BuiltIn Position\n"
1176 "OpMemberDecorate %19 1 BuiltIn PointSize\n"
1177 "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
1178 "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
1179 "OpDecorate %19 Block\n"
1180 "OpDecorate %29 BuiltIn TessCoord\n"
1181 "%2 = OpTypeVoid\n"
1182 "%3 = OpTypeFunction %2\n"
1183 "%6 = OpTypeFloat 32\n"
1184 "%7 = OpTypePointer Function %6\n"
1185 "%9 = OpConstant %6 0.00195313\n"
1186 "%10 = OpTypeVector %6 4\n"
1187 "%11 = OpTypeInt 32 0\n"
1188 "%12 = OpConstant %11 1\n"
1189 "%13 = OpTypeArray %6 %12\n"
1190 "%14 = OpTypeStruct %10 %6 %13 %13\n"
1191 "%15 = OpTypePointer Output %14\n"
1192 "%16 = OpVariable %15 Output\n"
1193 "%17 = OpTypeInt 32 1\n"
1194 "%18 = OpConstant %17 0\n"
1195 "%19 = OpTypeStruct %10 %6 %13 %13\n"
1196 "%20 = OpConstant %11 32\n"
1197 "%21 = OpTypeArray %19 %20\n"
1198 "%22 = OpTypePointer Input %21\n"
1199 "%23 = OpVariable %22 Input\n"
1200 "%24 = OpTypePointer Input %10\n"
1201 "%27 = OpTypeVector %6 3\n"
1202 "%28 = OpTypePointer Input %27\n"
1203 "%29 = OpVariable %28 Input\n"
1204 "%30 = OpConstant %11 0\n"
1205 "%31 = OpTypePointer Input %6\n"
1206 "%36 = OpConstant %6 2\n"
1207 "%40 = OpTypePointer Output %10\n"
1208 "%4 = OpFunction %2 None %3\n"
1209 "%5 = OpLabel\n"
1210 "%8 = OpVariable %7 Function\n"
1211 "OpStore %8 %9\n"
1212 "%25 = OpAccessChain %24 %23 %18 %18\n"
1213 "%26 = OpLoad %10 %25\n"
1214 "%32 = OpAccessChain %31 %29 %30\n"
1215 "%33 = OpLoad %6 %32\n"
1216 "%34 = OpLoad %6 %8\n"
1217 "%35 = OpFMul %6 %33 %34\n"
1218 "%37 = OpFDiv %6 %35 %36\n"
1219 "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
1220 "%39 = OpFAdd %10 %26 %38\n"
1221 "%41 = OpAccessChain %40 %16 %18\n"
1222 "OpStore %41 %39\n"
1223 "OpReturn\n"
1224 "OpFunctionEnd\n";
1225 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
1226 }
1227
1228 }
1229
getFramebufferBufferDeclarations(const VkFormat & format,const std::vector<std::string> & declarations,const deUint32 stage)1230 static std::string getFramebufferBufferDeclarations (const VkFormat& format,
1231 const std::vector<std::string>& declarations,
1232 const deUint32 stage)
1233 {
1234 if (declarations.empty())
1235 {
1236 const std::string name = (stage == 0) ? "result" : "out_color";
1237 const std::string suffix = (stage == 2) ? "[]" : "";
1238 const std::string result =
1239 "layout(location = 0) out float " + name + suffix + ";\n"
1240 "layout(set = 0, binding = 0) uniform Buffer1\n"
1241 "{\n"
1242 " " + de::toString(subgroups::getFormatNameForGLSL(format)) + " data[" + de::toString(subgroups::maxSupportedSubgroupSize()) + "];\n"
1243 "};\n";
1244
1245 return result;
1246 }
1247 else
1248 {
1249 return declarations[stage];
1250 }
1251 }
1252
initStdFrameBufferPrograms(SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,VkShaderStageFlags shaderStage,VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations)1253 void vkt::subgroups::initStdFrameBufferPrograms (SourceCollections& programCollection,
1254 const vk::ShaderBuildOptions& buildOptions,
1255 VkShaderStageFlags shaderStage,
1256 VkFormat format,
1257 bool gsPointSize,
1258 const std::string& extHeader,
1259 const std::string& testSrc,
1260 const std::string& helperStr,
1261 const std::vector<std::string>& declarations)
1262 {
1263 subgroups::setFragmentShaderFrameBuffer(programCollection);
1264
1265 if (shaderStage != VK_SHADER_STAGE_VERTEX_BIT)
1266 subgroups::setVertexShaderFrameBuffer(programCollection);
1267
1268 if (shaderStage == VK_SHADER_STAGE_VERTEX_BIT)
1269 {
1270 std::ostringstream vertex;
1271
1272 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1273 << extHeader
1274 << "layout(location = 0) in highp vec4 in_position;\n"
1275 << getFramebufferBufferDeclarations(format, declarations, 0)
1276 << "\n"
1277 << helperStr
1278 << "void main (void)\n"
1279 << "{\n"
1280 << " uint tempRes;\n"
1281 << testSrc
1282 << " result = float(tempRes);\n"
1283 << " gl_Position = in_position;\n"
1284 << " gl_PointSize = 1.0f;\n"
1285 << "}\n";
1286
1287 programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
1288 }
1289 else if (shaderStage == VK_SHADER_STAGE_GEOMETRY_BIT)
1290 {
1291 std::ostringstream geometry;
1292
1293 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1294 << extHeader
1295 << "layout(points) in;\n"
1296 << "layout(points, max_vertices = 1) out;\n"
1297 << getFramebufferBufferDeclarations(format, declarations, 1)
1298 << "\n"
1299 << helperStr
1300 << "void main (void)\n"
1301 << "{\n"
1302 << " uint tempRes;\n"
1303 << testSrc
1304 << " out_color = float(tempRes);\n"
1305 << " gl_Position = gl_in[0].gl_Position;\n"
1306 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1307 << " EmitVertex();\n"
1308 << " EndPrimitive();\n"
1309 << "}\n";
1310
1311 programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
1312 }
1313 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1314 {
1315 std::ostringstream controlSource;
1316
1317 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1318 << extHeader
1319 << "layout(vertices = 2) out;\n"
1320 << getFramebufferBufferDeclarations(format, declarations, 2)
1321 << "\n"
1322 << helperStr
1323 << "void main (void)\n"
1324 << "{\n"
1325 << " if (gl_InvocationID == 0)\n"
1326 << " {\n"
1327 << " gl_TessLevelOuter[0] = 1.0f;\n"
1328 << " gl_TessLevelOuter[1] = 1.0f;\n"
1329 << " }\n"
1330 << " uint tempRes;\n"
1331 << testSrc
1332 << " out_color[gl_InvocationID] = float(tempRes);\n"
1333 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1334 << (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "")
1335 << "}\n";
1336
1337 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(controlSource.str()) << buildOptions;
1338 subgroups::setTesEvalShaderFrameBuffer(programCollection);
1339 }
1340 else if (shaderStage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1341 {
1342 ostringstream evaluationSource;
1343
1344 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
1345 << extHeader
1346 << "layout(isolines, equal_spacing, ccw ) in;\n"
1347 << getFramebufferBufferDeclarations(format, declarations, 3)
1348 << "\n"
1349 << helperStr
1350 << "void main (void)\n"
1351 << "{\n"
1352 << " uint tempRes;\n"
1353 << testSrc
1354 << " out_color = float(tempRes);\n"
1355 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1356 << (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "")
1357 << "}\n";
1358
1359 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
1360 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
1361 }
1362 else
1363 {
1364 DE_FATAL("Unsupported shader stage");
1365 }
1366 }
1367
getBufferDeclarations(vk::VkShaderStageFlags shaderStage,const std::string & formatName,const std::vector<std::string> & declarations,const deUint32 stage)1368 static std::string getBufferDeclarations (vk::VkShaderStageFlags shaderStage,
1369 const std::string& formatName,
1370 const std::vector<std::string>& declarations,
1371 const deUint32 stage)
1372 {
1373 if (declarations.empty())
1374 {
1375 const deUint32 stageCount = vkt::subgroups::getStagesCount(shaderStage);
1376 const deUint32 binding0 = stage;
1377 const deUint32 binding1 = stageCount;
1378 const bool fragment = (shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) && (stage == stageCount);
1379 const string buffer1 = fragment
1380 ? "layout(location = 0) out uint result;\n"
1381 : "layout(set = 0, binding = " + de::toString(binding0) + ", std430) buffer Buffer1\n"
1382 "{\n"
1383 " uint result[];\n"
1384 "};\n";
1385 //todo boza I suppose it can be "layout(set = 0, binding = " + de::toString(binding1) + ", std430) readonly buffer Buffer2\n"
1386 const string buffer2 = "layout(set = 0, binding = " + de::toString(binding1) + ", std430)" + (stageCount == 1 ? "" : " readonly") + " buffer Buffer" + (fragment ? "1" : "2") + "\n"
1387 "{\n"
1388 " " + formatName + " data[];\n"
1389 "};\n";
1390
1391 return buffer1 + buffer2;
1392 }
1393 else
1394 {
1395 return declarations[stage];
1396 }
1397 }
1398
initStdPrograms(vk::SourceCollections & programCollection,const vk::ShaderBuildOptions & buildOptions,vk::VkShaderStageFlags shaderStage,vk::VkFormat format,bool gsPointSize,const std::string & extHeader,const std::string & testSrc,const std::string & helperStr,const std::vector<std::string> & declarations,const bool avoidHelperInvocations,const std::string & tempRes)1399 void vkt::subgroups::initStdPrograms (vk::SourceCollections& programCollection,
1400 const vk::ShaderBuildOptions& buildOptions,
1401 vk::VkShaderStageFlags shaderStage,
1402 vk::VkFormat format,
1403 bool gsPointSize,
1404 const std::string& extHeader,
1405 const std::string& testSrc,
1406 const std::string& helperStr,
1407 const std::vector<std::string>& declarations,
1408 const bool avoidHelperInvocations,
1409 const std::string& tempRes)
1410 {
1411 const std::string formatName = subgroups::getFormatNameForGLSL(format);
1412
1413 if (isAllComputeStages(shaderStage))
1414 {
1415 std::ostringstream src;
1416
1417 src << "#version 450\n"
1418 << extHeader
1419 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
1420 "local_size_z_id = 2) in;\n"
1421 << getBufferDeclarations(shaderStage, formatName, declarations, 0)
1422 << "\n"
1423 << helperStr
1424 << "void main (void)\n"
1425 << "{\n"
1426 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1427 << " highp uint offset = globalSize.x * ((globalSize.y * "
1428 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
1429 "gl_GlobalInvocationID.x;\n"
1430 << tempRes
1431 << testSrc
1432 << " result[offset] = tempRes;\n"
1433 << "}\n";
1434
1435 programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << buildOptions;
1436 }
1437 else if (isAllGraphicsStages(shaderStage))
1438 {
1439 const string vertex =
1440 "#version 450\n"
1441 + extHeader
1442 + getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1443 "\n"
1444 + helperStr +
1445 "void main (void)\n"
1446 "{\n"
1447 " uint tempRes;\n"
1448 + testSrc +
1449 " result[gl_VertexIndex] = tempRes;\n"
1450 " float pixelSize = 2.0f/1024.0f;\n"
1451 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1452 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1453 " gl_PointSize = 1.0f;\n"
1454 "}\n";
1455
1456 const string tesc =
1457 "#version 450\n"
1458 + extHeader +
1459 "layout(vertices=1) out;\n"
1460 + getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1461 "\n"
1462 + helperStr +
1463 "void main (void)\n"
1464 "{\n"
1465 + tempRes
1466 + testSrc +
1467 " result[gl_PrimitiveID] = tempRes;\n"
1468 " if (gl_InvocationID == 0)\n"
1469 " {\n"
1470 " gl_TessLevelOuter[0] = 1.0f;\n"
1471 " gl_TessLevelOuter[1] = 1.0f;\n"
1472 " }\n"
1473 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1474 + (gsPointSize ? " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n" : "") +
1475 "}\n";
1476
1477 const string tese =
1478 "#version 450\n"
1479 + extHeader +
1480 "layout(isolines) in;\n"
1481 + getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1482 "\n"
1483 + helperStr +
1484 "void main (void)\n"
1485 "{\n"
1486 + tempRes
1487 + testSrc +
1488 " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempRes;\n"
1489 " float pixelSize = 2.0f/1024.0f;\n"
1490 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1491 + (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1492 "}\n";
1493
1494 const string geometry =
1495 "#version 450\n"
1496 + extHeader +
1497 "layout(${TOPOLOGY}) in;\n"
1498 "layout(points, max_vertices = 1) out;\n"
1499 + getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1500 "\n"
1501 + helperStr +
1502 "void main (void)\n"
1503 "{\n"
1504 + tempRes
1505 + testSrc +
1506 " result[gl_PrimitiveIDIn] = tempRes;\n"
1507 " gl_Position = gl_in[0].gl_Position;\n"
1508 + (gsPointSize ? " gl_PointSize = gl_in[0].gl_PointSize;\n" : "") +
1509 " EmitVertex();\n"
1510 " EndPrimitive();\n"
1511 "}\n";
1512
1513 const string fragment =
1514 "#version 450\n"
1515 + extHeader
1516 + getBufferDeclarations(shaderStage, formatName, declarations, 4)
1517 + helperStr +
1518 "void main (void)\n"
1519 "{\n"
1520 + (avoidHelperInvocations ? " if (gl_HelperInvocation) return;\n" : "")
1521 + tempRes
1522 + testSrc +
1523 " result = tempRes;\n"
1524 "}\n";
1525
1526 subgroups::addNoSubgroupShader(programCollection);
1527
1528 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
1529 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc) << buildOptions;
1530 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese) << buildOptions;
1531 subgroups::addGeometryShadersFromTemplate(geometry, buildOptions, programCollection.glslSources);
1532 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragment)<< buildOptions;
1533 }
1534 else if (isAllRayTracingStages(shaderStage))
1535 {
1536 const std::string rgenShader =
1537 "#version 460 core\n"
1538 "#extension GL_EXT_ray_tracing: require\n"
1539 + extHeader +
1540 "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
1541 "layout(location = 0) callableDataEXT uvec4 callData;"
1542 "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
1543 + getBufferDeclarations(shaderStage, formatName, declarations, 0) +
1544 "\n"
1545 + helperStr +
1546 "void main()\n"
1547 "{\n"
1548 + tempRes
1549 + testSrc +
1550 " uint rayFlags = 0;\n"
1551 " uint cullMask = 0xFF;\n"
1552 " float tmin = 0.0;\n"
1553 " float tmax = 9.0;\n"
1554 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
1555 " vec3 directHit = vec3(0.0, 0.0, -1.0);\n"
1556 " vec3 directMiss = vec3(0.0, 0.0, +1.0);\n"
1557 "\n"
1558 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
1559 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
1560 " executeCallableEXT(0, 0);"
1561 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1562 "}\n";
1563 const std::string ahitShader =
1564 "#version 460 core\n"
1565 "#extension GL_EXT_ray_tracing: require\n"
1566 + extHeader +
1567 "hitAttributeEXT vec3 attribs;\n"
1568 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1569 + getBufferDeclarations(shaderStage, formatName, declarations, 1) +
1570 "\n"
1571 + helperStr +
1572 "void main()\n"
1573 "{\n"
1574 + tempRes
1575 + testSrc +
1576 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1577 "}\n";
1578 const std::string chitShader =
1579 "#version 460 core\n"
1580 "#extension GL_EXT_ray_tracing: require\n"
1581 + extHeader +
1582 "hitAttributeEXT vec3 attribs;\n"
1583 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1584 + getBufferDeclarations(shaderStage, formatName, declarations, 2) +
1585 "\n"
1586 + helperStr +
1587 "void main()\n"
1588 "{\n"
1589 + tempRes
1590 + testSrc +
1591 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1592 "}\n";
1593 const std::string missShader =
1594 "#version 460 core\n"
1595 "#extension GL_EXT_ray_tracing: require\n"
1596 + extHeader +
1597 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
1598 + getBufferDeclarations(shaderStage, formatName, declarations, 3) +
1599 "\n"
1600 + helperStr +
1601 "void main()\n"
1602 "{\n"
1603 + tempRes
1604 + testSrc +
1605 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1606 "}\n";
1607 const std::string sectShader =
1608 "#version 460 core\n"
1609 "#extension GL_EXT_ray_tracing: require\n"
1610 + extHeader +
1611 "hitAttributeEXT vec3 hitAttribute;\n"
1612 + getBufferDeclarations(shaderStage, formatName, declarations, 4) +
1613 "\n"
1614 + helperStr +
1615 "void main()\n"
1616 "{\n"
1617 + tempRes
1618 + testSrc +
1619 " reportIntersectionEXT(0.75f, gl_HitKindFrontFacingTriangleEXT);\n"
1620 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1621 "}\n";
1622 const std::string callShader =
1623 "#version 460 core\n"
1624 "#extension GL_EXT_ray_tracing: require\n"
1625 + extHeader +
1626 "layout(location = 0) callableDataInEXT float callData;\n"
1627 + getBufferDeclarations(shaderStage, formatName, declarations, 5) +
1628 "\n"
1629 + helperStr +
1630 "void main()\n"
1631 "{\n"
1632 + tempRes
1633 + testSrc +
1634 " result[gl_LaunchIDEXT.x] = tempRes;\n"
1635 "}\n";
1636
1637 programCollection.glslSources.add("rgen") << glu::RaygenSource (rgenShader) << buildOptions;
1638 programCollection.glslSources.add("ahit") << glu::AnyHitSource (ahitShader) << buildOptions;
1639 programCollection.glslSources.add("chit") << glu::ClosestHitSource (chitShader) << buildOptions;
1640 programCollection.glslSources.add("miss") << glu::MissSource (missShader) << buildOptions;
1641 programCollection.glslSources.add("sect") << glu::IntersectionSource(sectShader) << buildOptions;
1642 programCollection.glslSources.add("call") << glu::CallableSource (callShader) << buildOptions;
1643
1644 subgroups::addRayTracingNoSubgroupShader(programCollection);
1645 }
1646 else
1647 TCU_THROW(InternalError, "Unknown stage or invalid stage set");
1648
1649 }
1650
isSubgroupSupported(Context & context)1651 bool vkt::subgroups::isSubgroupSupported (Context& context)
1652 {
1653 return context.contextSupports(vk::ApiVersion(1, 1, 0));
1654 }
1655
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)1656 bool vkt::subgroups::areSubgroupOperationsSupportedForStage (Context& context, const VkShaderStageFlags stage)
1657 {
1658 return (stage & (context.getSubgroupProperties().supportedStages)) ? true : false;
1659 }
1660
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)1661 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice (Context& context, VkSubgroupFeatureFlagBits bit)
1662 {
1663 return (bit & (context.getSubgroupProperties().supportedOperations)) ? true : false;
1664 }
1665
isFragmentSSBOSupportedForDevice(Context & context)1666 bool vkt::subgroups::isFragmentSSBOSupportedForDevice (Context& context)
1667 {
1668 return context.getDeviceFeatures().fragmentStoresAndAtomics ? true : false;
1669 }
1670
isVertexSSBOSupportedForDevice(Context & context)1671 bool vkt::subgroups::isVertexSSBOSupportedForDevice (Context& context)
1672 {
1673 return context.getDeviceFeatures().vertexPipelineStoresAndAtomics ? true : false;
1674 }
1675
isInt64SupportedForDevice(Context & context)1676 bool vkt::subgroups::isInt64SupportedForDevice (Context& context)
1677 {
1678 return context.getDeviceFeatures().shaderInt64 ? true : false;
1679 }
1680
isTessellationAndGeometryPointSizeSupported(Context & context)1681 bool vkt::subgroups::isTessellationAndGeometryPointSizeSupported (Context& context)
1682 {
1683 return context.getDeviceFeatures().shaderTessellationAndGeometryPointSize ? true : false;
1684 }
1685
is16BitUBOStorageSupported(Context & context)1686 bool vkt::subgroups::is16BitUBOStorageSupported (Context& context)
1687 {
1688 return context.get16BitStorageFeatures().uniformAndStorageBuffer16BitAccess ? true : false;
1689 }
1690
is8BitUBOStorageSupported(Context & context)1691 bool vkt::subgroups::is8BitUBOStorageSupported (Context& context)
1692 {
1693 return context.get8BitStorageFeatures().uniformAndStorageBuffer8BitAccess ? true : false;
1694 }
1695
isFormatSupportedForDevice(Context & context,vk::VkFormat format)1696 bool vkt::subgroups::isFormatSupportedForDevice (Context& context, vk::VkFormat format)
1697 {
1698 const VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures& subgroupExtendedTypesFeatures = context.getShaderSubgroupExtendedTypesFeatures();
1699 const VkPhysicalDeviceShaderFloat16Int8Features& float16Int8Features = context.getShaderFloat16Int8Features();
1700 const VkPhysicalDevice16BitStorageFeatures& storage16bit = context.get16BitStorageFeatures();
1701 const VkPhysicalDevice8BitStorageFeatures& storage8bit = context.get8BitStorageFeatures();
1702 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1703 bool shaderFloat64 = features.shaderFloat64 ? true : false;
1704 bool shaderInt16 = features.shaderInt16 ? true : false;
1705 bool shaderInt64 = features.shaderInt64 ? true : false;
1706 bool shaderSubgroupExtendedTypes = false;
1707 bool shaderFloat16 = false;
1708 bool shaderInt8 = false;
1709 bool storageBuffer16BitAccess = false;
1710 bool storageBuffer8BitAccess = false;
1711
1712 if (context.isDeviceFunctionalitySupported("VK_KHR_shader_subgroup_extended_types") &&
1713 context.isDeviceFunctionalitySupported("VK_KHR_shader_float16_int8"))
1714 {
1715 shaderSubgroupExtendedTypes = subgroupExtendedTypesFeatures.shaderSubgroupExtendedTypes ? true : false;
1716 shaderFloat16 = float16Int8Features.shaderFloat16 ? true : false;
1717 shaderInt8 = float16Int8Features.shaderInt8 ? true : false;
1718
1719 if ( context.isDeviceFunctionalitySupported("VK_KHR_16bit_storage") )
1720 storageBuffer16BitAccess = storage16bit.storageBuffer16BitAccess ? true : false;
1721
1722 if (context.isDeviceFunctionalitySupported("VK_KHR_8bit_storage"))
1723 storageBuffer8BitAccess = storage8bit.storageBuffer8BitAccess ? true : false;
1724 }
1725
1726 switch (format)
1727 {
1728 default:
1729 return true;
1730 case VK_FORMAT_R16_SFLOAT:
1731 case VK_FORMAT_R16G16_SFLOAT:
1732 case VK_FORMAT_R16G16B16_SFLOAT:
1733 case VK_FORMAT_R16G16B16A16_SFLOAT:
1734 return shaderSubgroupExtendedTypes && shaderFloat16 && storageBuffer16BitAccess;
1735 case VK_FORMAT_R64_SFLOAT:
1736 case VK_FORMAT_R64G64_SFLOAT:
1737 case VK_FORMAT_R64G64B64_SFLOAT:
1738 case VK_FORMAT_R64G64B64A64_SFLOAT:
1739 return shaderFloat64;
1740 case VK_FORMAT_R8_SINT:
1741 case VK_FORMAT_R8G8_SINT:
1742 case VK_FORMAT_R8G8B8_SINT:
1743 case VK_FORMAT_R8G8B8A8_SINT:
1744 case VK_FORMAT_R8_UINT:
1745 case VK_FORMAT_R8G8_UINT:
1746 case VK_FORMAT_R8G8B8_UINT:
1747 case VK_FORMAT_R8G8B8A8_UINT:
1748 return shaderSubgroupExtendedTypes && shaderInt8 && storageBuffer8BitAccess;
1749 case VK_FORMAT_R16_SINT:
1750 case VK_FORMAT_R16G16_SINT:
1751 case VK_FORMAT_R16G16B16_SINT:
1752 case VK_FORMAT_R16G16B16A16_SINT:
1753 case VK_FORMAT_R16_UINT:
1754 case VK_FORMAT_R16G16_UINT:
1755 case VK_FORMAT_R16G16B16_UINT:
1756 case VK_FORMAT_R16G16B16A16_UINT:
1757 return shaderSubgroupExtendedTypes && shaderInt16 && storageBuffer16BitAccess;
1758 case VK_FORMAT_R64_SINT:
1759 case VK_FORMAT_R64G64_SINT:
1760 case VK_FORMAT_R64G64B64_SINT:
1761 case VK_FORMAT_R64G64B64A64_SINT:
1762 case VK_FORMAT_R64_UINT:
1763 case VK_FORMAT_R64G64_UINT:
1764 case VK_FORMAT_R64G64B64_UINT:
1765 case VK_FORMAT_R64G64B64A64_UINT:
1766 return shaderSubgroupExtendedTypes && shaderInt64;
1767 }
1768 }
1769
isSubgroupBroadcastDynamicIdSupported(Context & context)1770 bool vkt::subgroups::isSubgroupBroadcastDynamicIdSupported (Context& context)
1771 {
1772 return context.contextSupports(vk::ApiVersion(1, 2, 0)) && context.getDeviceVulkan12Features().subgroupBroadcastDynamicId;
1773 }
1774
getFormatNameForGLSL(VkFormat format)1775 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1776 {
1777 switch (format)
1778 {
1779 case VK_FORMAT_R8_SINT: return "int8_t";
1780 case VK_FORMAT_R8G8_SINT: return "i8vec2";
1781 case VK_FORMAT_R8G8B8_SINT: return "i8vec3";
1782 case VK_FORMAT_R8G8B8A8_SINT: return "i8vec4";
1783 case VK_FORMAT_R8_UINT: return "uint8_t";
1784 case VK_FORMAT_R8G8_UINT: return "u8vec2";
1785 case VK_FORMAT_R8G8B8_UINT: return "u8vec3";
1786 case VK_FORMAT_R8G8B8A8_UINT: return "u8vec4";
1787 case VK_FORMAT_R16_SINT: return "int16_t";
1788 case VK_FORMAT_R16G16_SINT: return "i16vec2";
1789 case VK_FORMAT_R16G16B16_SINT: return "i16vec3";
1790 case VK_FORMAT_R16G16B16A16_SINT: return "i16vec4";
1791 case VK_FORMAT_R16_UINT: return "uint16_t";
1792 case VK_FORMAT_R16G16_UINT: return "u16vec2";
1793 case VK_FORMAT_R16G16B16_UINT: return "u16vec3";
1794 case VK_FORMAT_R16G16B16A16_UINT: return "u16vec4";
1795 case VK_FORMAT_R32_SINT: return "int";
1796 case VK_FORMAT_R32G32_SINT: return "ivec2";
1797 case VK_FORMAT_R32G32B32_SINT: return "ivec3";
1798 case VK_FORMAT_R32G32B32A32_SINT: return "ivec4";
1799 case VK_FORMAT_R32_UINT: return "uint";
1800 case VK_FORMAT_R32G32_UINT: return "uvec2";
1801 case VK_FORMAT_R32G32B32_UINT: return "uvec3";
1802 case VK_FORMAT_R32G32B32A32_UINT: return "uvec4";
1803 case VK_FORMAT_R64_SINT: return "int64_t";
1804 case VK_FORMAT_R64G64_SINT: return "i64vec2";
1805 case VK_FORMAT_R64G64B64_SINT: return "i64vec3";
1806 case VK_FORMAT_R64G64B64A64_SINT: return "i64vec4";
1807 case VK_FORMAT_R64_UINT: return "uint64_t";
1808 case VK_FORMAT_R64G64_UINT: return "u64vec2";
1809 case VK_FORMAT_R64G64B64_UINT: return "u64vec3";
1810 case VK_FORMAT_R64G64B64A64_UINT: return "u64vec4";
1811 case VK_FORMAT_R16_SFLOAT: return "float16_t";
1812 case VK_FORMAT_R16G16_SFLOAT: return "f16vec2";
1813 case VK_FORMAT_R16G16B16_SFLOAT: return "f16vec3";
1814 case VK_FORMAT_R16G16B16A16_SFLOAT: return "f16vec4";
1815 case VK_FORMAT_R32_SFLOAT: return "float";
1816 case VK_FORMAT_R32G32_SFLOAT: return "vec2";
1817 case VK_FORMAT_R32G32B32_SFLOAT: return "vec3";
1818 case VK_FORMAT_R32G32B32A32_SFLOAT: return "vec4";
1819 case VK_FORMAT_R64_SFLOAT: return "double";
1820 case VK_FORMAT_R64G64_SFLOAT: return "dvec2";
1821 case VK_FORMAT_R64G64B64_SFLOAT: return "dvec3";
1822 case VK_FORMAT_R64G64B64A64_SFLOAT: return "dvec4";
1823 case VK_FORMAT_R8_USCALED: return "bool";
1824 case VK_FORMAT_R8G8_USCALED: return "bvec2";
1825 case VK_FORMAT_R8G8B8_USCALED: return "bvec3";
1826 case VK_FORMAT_R8G8B8A8_USCALED: return "bvec4";
1827 default: TCU_THROW(InternalError, "Unhandled format");
1828 }
1829 }
1830
getAdditionalExtensionForFormat(vk::VkFormat format)1831 std::string vkt::subgroups::getAdditionalExtensionForFormat (vk::VkFormat format)
1832 {
1833 switch (format)
1834 {
1835 default:
1836 return "";
1837 case VK_FORMAT_R8_SINT:
1838 case VK_FORMAT_R8G8_SINT:
1839 case VK_FORMAT_R8G8B8_SINT:
1840 case VK_FORMAT_R8G8B8A8_SINT:
1841 case VK_FORMAT_R8_UINT:
1842 case VK_FORMAT_R8G8_UINT:
1843 case VK_FORMAT_R8G8B8_UINT:
1844 case VK_FORMAT_R8G8B8A8_UINT:
1845 return "#extension GL_EXT_shader_subgroup_extended_types_int8 : enable\n";
1846 case VK_FORMAT_R16_SINT:
1847 case VK_FORMAT_R16G16_SINT:
1848 case VK_FORMAT_R16G16B16_SINT:
1849 case VK_FORMAT_R16G16B16A16_SINT:
1850 case VK_FORMAT_R16_UINT:
1851 case VK_FORMAT_R16G16_UINT:
1852 case VK_FORMAT_R16G16B16_UINT:
1853 case VK_FORMAT_R16G16B16A16_UINT:
1854 return "#extension GL_EXT_shader_subgroup_extended_types_int16 : enable\n";
1855 case VK_FORMAT_R64_SINT:
1856 case VK_FORMAT_R64G64_SINT:
1857 case VK_FORMAT_R64G64B64_SINT:
1858 case VK_FORMAT_R64G64B64A64_SINT:
1859 case VK_FORMAT_R64_UINT:
1860 case VK_FORMAT_R64G64_UINT:
1861 case VK_FORMAT_R64G64B64_UINT:
1862 case VK_FORMAT_R64G64B64A64_UINT:
1863 return "#extension GL_EXT_shader_subgroup_extended_types_int64 : enable\n";
1864 case VK_FORMAT_R16_SFLOAT:
1865 case VK_FORMAT_R16G16_SFLOAT:
1866 case VK_FORMAT_R16G16B16_SFLOAT:
1867 case VK_FORMAT_R16G16B16A16_SFLOAT:
1868 return "#extension GL_EXT_shader_subgroup_extended_types_float16 : enable\n";
1869 }
1870 }
1871
getAllFormats()1872 const std::vector<vk::VkFormat> vkt::subgroups::getAllFormats ()
1873 {
1874 std::vector<VkFormat> formats;
1875
1876 formats.push_back(VK_FORMAT_R8_SINT);
1877 formats.push_back(VK_FORMAT_R8G8_SINT);
1878 formats.push_back(VK_FORMAT_R8G8B8_SINT);
1879 formats.push_back(VK_FORMAT_R8G8B8A8_SINT);
1880 formats.push_back(VK_FORMAT_R8_UINT);
1881 formats.push_back(VK_FORMAT_R8G8_UINT);
1882 formats.push_back(VK_FORMAT_R8G8B8_UINT);
1883 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
1884 formats.push_back(VK_FORMAT_R16_SINT);
1885 formats.push_back(VK_FORMAT_R16G16_SINT);
1886 formats.push_back(VK_FORMAT_R16G16B16_SINT);
1887 formats.push_back(VK_FORMAT_R16G16B16A16_SINT);
1888 formats.push_back(VK_FORMAT_R16_UINT);
1889 formats.push_back(VK_FORMAT_R16G16_UINT);
1890 formats.push_back(VK_FORMAT_R16G16B16_UINT);
1891 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
1892 formats.push_back(VK_FORMAT_R32_SINT);
1893 formats.push_back(VK_FORMAT_R32G32_SINT);
1894 formats.push_back(VK_FORMAT_R32G32B32_SINT);
1895 formats.push_back(VK_FORMAT_R32G32B32A32_SINT);
1896 formats.push_back(VK_FORMAT_R32_UINT);
1897 formats.push_back(VK_FORMAT_R32G32_UINT);
1898 formats.push_back(VK_FORMAT_R32G32B32_UINT);
1899 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
1900 formats.push_back(VK_FORMAT_R64_SINT);
1901 formats.push_back(VK_FORMAT_R64G64_SINT);
1902 formats.push_back(VK_FORMAT_R64G64B64_SINT);
1903 formats.push_back(VK_FORMAT_R64G64B64A64_SINT);
1904 formats.push_back(VK_FORMAT_R64_UINT);
1905 formats.push_back(VK_FORMAT_R64G64_UINT);
1906 formats.push_back(VK_FORMAT_R64G64B64_UINT);
1907 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
1908 formats.push_back(VK_FORMAT_R16_SFLOAT);
1909 formats.push_back(VK_FORMAT_R16G16_SFLOAT);
1910 formats.push_back(VK_FORMAT_R16G16B16_SFLOAT);
1911 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
1912 formats.push_back(VK_FORMAT_R32_SFLOAT);
1913 formats.push_back(VK_FORMAT_R32G32_SFLOAT);
1914 formats.push_back(VK_FORMAT_R32G32B32_SFLOAT);
1915 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
1916 formats.push_back(VK_FORMAT_R64_SFLOAT);
1917 formats.push_back(VK_FORMAT_R64G64_SFLOAT);
1918 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
1919 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
1920 formats.push_back(VK_FORMAT_R8_USCALED);
1921 formats.push_back(VK_FORMAT_R8G8_USCALED);
1922 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
1923 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
1924
1925 return formats;
1926 }
1927
isFormatSigned(VkFormat format)1928 bool vkt::subgroups::isFormatSigned (VkFormat format)
1929 {
1930 switch (format)
1931 {
1932 default:
1933 return false;
1934 case VK_FORMAT_R8_SINT:
1935 case VK_FORMAT_R8G8_SINT:
1936 case VK_FORMAT_R8G8B8_SINT:
1937 case VK_FORMAT_R8G8B8A8_SINT:
1938 case VK_FORMAT_R16_SINT:
1939 case VK_FORMAT_R16G16_SINT:
1940 case VK_FORMAT_R16G16B16_SINT:
1941 case VK_FORMAT_R16G16B16A16_SINT:
1942 case VK_FORMAT_R32_SINT:
1943 case VK_FORMAT_R32G32_SINT:
1944 case VK_FORMAT_R32G32B32_SINT:
1945 case VK_FORMAT_R32G32B32A32_SINT:
1946 case VK_FORMAT_R64_SINT:
1947 case VK_FORMAT_R64G64_SINT:
1948 case VK_FORMAT_R64G64B64_SINT:
1949 case VK_FORMAT_R64G64B64A64_SINT:
1950 return true;
1951 }
1952 }
1953
isFormatUnsigned(VkFormat format)1954 bool vkt::subgroups::isFormatUnsigned (VkFormat format)
1955 {
1956 switch (format)
1957 {
1958 default:
1959 return false;
1960 case VK_FORMAT_R8_UINT:
1961 case VK_FORMAT_R8G8_UINT:
1962 case VK_FORMAT_R8G8B8_UINT:
1963 case VK_FORMAT_R8G8B8A8_UINT:
1964 case VK_FORMAT_R16_UINT:
1965 case VK_FORMAT_R16G16_UINT:
1966 case VK_FORMAT_R16G16B16_UINT:
1967 case VK_FORMAT_R16G16B16A16_UINT:
1968 case VK_FORMAT_R32_UINT:
1969 case VK_FORMAT_R32G32_UINT:
1970 case VK_FORMAT_R32G32B32_UINT:
1971 case VK_FORMAT_R32G32B32A32_UINT:
1972 case VK_FORMAT_R64_UINT:
1973 case VK_FORMAT_R64G64_UINT:
1974 case VK_FORMAT_R64G64B64_UINT:
1975 case VK_FORMAT_R64G64B64A64_UINT:
1976 return true;
1977 }
1978 }
1979
isFormatFloat(VkFormat format)1980 bool vkt::subgroups::isFormatFloat (VkFormat format)
1981 {
1982 switch (format)
1983 {
1984 default:
1985 return false;
1986 case VK_FORMAT_R16_SFLOAT:
1987 case VK_FORMAT_R16G16_SFLOAT:
1988 case VK_FORMAT_R16G16B16_SFLOAT:
1989 case VK_FORMAT_R16G16B16A16_SFLOAT:
1990 case VK_FORMAT_R32_SFLOAT:
1991 case VK_FORMAT_R32G32_SFLOAT:
1992 case VK_FORMAT_R32G32B32_SFLOAT:
1993 case VK_FORMAT_R32G32B32A32_SFLOAT:
1994 case VK_FORMAT_R64_SFLOAT:
1995 case VK_FORMAT_R64G64_SFLOAT:
1996 case VK_FORMAT_R64G64B64_SFLOAT:
1997 case VK_FORMAT_R64G64B64A64_SFLOAT:
1998 return true;
1999 }
2000 }
2001
isFormatBool(VkFormat format)2002 bool vkt::subgroups::isFormatBool (VkFormat format)
2003 {
2004 switch (format)
2005 {
2006 default:
2007 return false;
2008 case VK_FORMAT_R8_USCALED:
2009 case VK_FORMAT_R8G8_USCALED:
2010 case VK_FORMAT_R8G8B8_USCALED:
2011 case VK_FORMAT_R8G8B8A8_USCALED:
2012 return true;
2013 }
2014 }
2015
isFormat8bitTy(VkFormat format)2016 bool vkt::subgroups::isFormat8bitTy (VkFormat format)
2017 {
2018 switch (format)
2019 {
2020 default:
2021 return false;
2022 case VK_FORMAT_R8_SINT:
2023 case VK_FORMAT_R8G8_SINT:
2024 case VK_FORMAT_R8G8B8_SINT:
2025 case VK_FORMAT_R8G8B8A8_SINT:
2026 case VK_FORMAT_R8_UINT:
2027 case VK_FORMAT_R8G8_UINT:
2028 case VK_FORMAT_R8G8B8_UINT:
2029 case VK_FORMAT_R8G8B8A8_UINT:
2030 return true;
2031 }
2032 }
2033
isFormat16BitTy(VkFormat format)2034 bool vkt::subgroups::isFormat16BitTy (VkFormat format)
2035 {
2036 switch (format)
2037 {
2038 default:
2039 return false;
2040 case VK_FORMAT_R16_SFLOAT:
2041 case VK_FORMAT_R16G16_SFLOAT:
2042 case VK_FORMAT_R16G16B16_SFLOAT:
2043 case VK_FORMAT_R16G16B16A16_SFLOAT:
2044 case VK_FORMAT_R16_SINT:
2045 case VK_FORMAT_R16G16_SINT:
2046 case VK_FORMAT_R16G16B16_SINT:
2047 case VK_FORMAT_R16G16B16A16_SINT:
2048 case VK_FORMAT_R16_UINT:
2049 case VK_FORMAT_R16G16_UINT:
2050 case VK_FORMAT_R16G16B16_UINT:
2051 case VK_FORMAT_R16G16B16A16_UINT:
2052 return true;
2053 }
2054 }
2055
setVertexShaderFrameBuffer(SourceCollections & programCollection)2056 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
2057 {
2058 /*
2059 "layout(location = 0) in highp vec4 in_position;\n"
2060 "void main (void)\n"
2061 "{\n"
2062 " gl_Position = in_position;\n"
2063 " gl_PointSize = 1.0f;\n"
2064 "}\n";
2065 */
2066 programCollection.spirvAsmSources.add("vert") <<
2067 "; SPIR-V\n"
2068 "; Version: 1.3\n"
2069 "; Generator: Khronos Glslang Reference Front End; 7\n"
2070 "; Bound: 25\n"
2071 "; Schema: 0\n"
2072 "OpCapability Shader\n"
2073 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2074 "OpMemoryModel Logical GLSL450\n"
2075 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
2076 "OpMemberDecorate %11 0 BuiltIn Position\n"
2077 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2078 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2079 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2080 "OpDecorate %11 Block\n"
2081 "OpDecorate %17 Location 0\n"
2082 "%2 = OpTypeVoid\n"
2083 "%3 = OpTypeFunction %2\n"
2084 "%6 = OpTypeFloat 32\n"
2085 "%7 = OpTypeVector %6 4\n"
2086 "%8 = OpTypeInt 32 0\n"
2087 "%9 = OpConstant %8 1\n"
2088 "%10 = OpTypeArray %6 %9\n"
2089 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2090 "%12 = OpTypePointer Output %11\n"
2091 "%13 = OpVariable %12 Output\n"
2092 "%14 = OpTypeInt 32 1\n"
2093 "%15 = OpConstant %14 0\n"
2094 "%16 = OpTypePointer Input %7\n"
2095 "%17 = OpVariable %16 Input\n"
2096 "%19 = OpTypePointer Output %7\n"
2097 "%21 = OpConstant %14 1\n"
2098 "%22 = OpConstant %6 1\n"
2099 "%23 = OpTypePointer Output %6\n"
2100 "%4 = OpFunction %2 None %3\n"
2101 "%5 = OpLabel\n"
2102 "%18 = OpLoad %7 %17\n"
2103 "%20 = OpAccessChain %19 %13 %15\n"
2104 "OpStore %20 %18\n"
2105 "%24 = OpAccessChain %23 %13 %21\n"
2106 "OpStore %24 %22\n"
2107 "OpReturn\n"
2108 "OpFunctionEnd\n";
2109 }
2110
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)2111 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
2112 {
2113 /*
2114 "layout(location = 0) in float in_color;\n"
2115 "layout(location = 0) out uint out_color;\n"
2116 "void main()\n"
2117 {\n"
2118 " out_color = uint(in_color);\n"
2119 "}\n";
2120 */
2121 programCollection.spirvAsmSources.add("fragment") <<
2122 "; SPIR-V\n"
2123 "; Version: 1.3\n"
2124 "; Generator: Khronos Glslang Reference Front End; 2\n"
2125 "; Bound: 14\n"
2126 "; Schema: 0\n"
2127 "OpCapability Shader\n"
2128 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2129 "OpMemoryModel Logical GLSL450\n"
2130 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
2131 "OpExecutionMode %4 OriginUpperLeft\n"
2132 "OpDecorate %8 Location 0\n"
2133 "OpDecorate %11 Location 0\n"
2134 "%2 = OpTypeVoid\n"
2135 "%3 = OpTypeFunction %2\n"
2136 "%6 = OpTypeInt 32 0\n"
2137 "%7 = OpTypePointer Output %6\n"
2138 "%8 = OpVariable %7 Output\n"
2139 "%9 = OpTypeFloat 32\n"
2140 "%10 = OpTypePointer Input %9\n"
2141 "%11 = OpVariable %10 Input\n"
2142 "%4 = OpFunction %2 None %3\n"
2143 "%5 = OpLabel\n"
2144 "%12 = OpLoad %9 %11\n"
2145 "%13 = OpConvertFToU %6 %12\n"
2146 "OpStore %8 %13\n"
2147 "OpReturn\n"
2148 "OpFunctionEnd\n";
2149 }
2150
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)2151 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
2152 {
2153 /*
2154 "#extension GL_KHR_shader_subgroup_basic: enable\n"
2155 "#extension GL_EXT_tessellation_shader : require\n"
2156 "layout(vertices = 2) out;\n"
2157 "void main (void)\n"
2158 "{\n"
2159 " if (gl_InvocationID == 0)\n"
2160 " {\n"
2161 " gl_TessLevelOuter[0] = 1.0f;\n"
2162 " gl_TessLevelOuter[1] = 1.0f;\n"
2163 " }\n"
2164 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
2165 "}\n";
2166 */
2167 programCollection.spirvAsmSources.add("tesc") <<
2168 "; SPIR-V\n"
2169 "; Version: 1.3\n"
2170 "; Generator: Khronos Glslang Reference Front End; 2\n"
2171 "; Bound: 46\n"
2172 "; Schema: 0\n"
2173 "OpCapability Tessellation\n"
2174 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2175 "OpMemoryModel Logical GLSL450\n"
2176 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
2177 "OpExecutionMode %4 OutputVertices 2\n"
2178 "OpDecorate %8 BuiltIn InvocationId\n"
2179 "OpDecorate %20 Patch\n"
2180 "OpDecorate %20 BuiltIn TessLevelOuter\n"
2181 "OpMemberDecorate %29 0 BuiltIn Position\n"
2182 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
2183 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
2184 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
2185 "OpDecorate %29 Block\n"
2186 "OpMemberDecorate %35 0 BuiltIn Position\n"
2187 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
2188 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
2189 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
2190 "OpDecorate %35 Block\n"
2191 "%2 = OpTypeVoid\n"
2192 "%3 = OpTypeFunction %2\n"
2193 "%6 = OpTypeInt 32 1\n"
2194 "%7 = OpTypePointer Input %6\n"
2195 "%8 = OpVariable %7 Input\n"
2196 "%10 = OpConstant %6 0\n"
2197 "%11 = OpTypeBool\n"
2198 "%15 = OpTypeFloat 32\n"
2199 "%16 = OpTypeInt 32 0\n"
2200 "%17 = OpConstant %16 4\n"
2201 "%18 = OpTypeArray %15 %17\n"
2202 "%19 = OpTypePointer Output %18\n"
2203 "%20 = OpVariable %19 Output\n"
2204 "%21 = OpConstant %15 1\n"
2205 "%22 = OpTypePointer Output %15\n"
2206 "%24 = OpConstant %6 1\n"
2207 "%26 = OpTypeVector %15 4\n"
2208 "%27 = OpConstant %16 1\n"
2209 "%28 = OpTypeArray %15 %27\n"
2210 "%29 = OpTypeStruct %26 %15 %28 %28\n"
2211 "%30 = OpConstant %16 2\n"
2212 "%31 = OpTypeArray %29 %30\n"
2213 "%32 = OpTypePointer Output %31\n"
2214 "%33 = OpVariable %32 Output\n"
2215 "%35 = OpTypeStruct %26 %15 %28 %28\n"
2216 "%36 = OpConstant %16 32\n"
2217 "%37 = OpTypeArray %35 %36\n"
2218 "%38 = OpTypePointer Input %37\n"
2219 "%39 = OpVariable %38 Input\n"
2220 "%41 = OpTypePointer Input %26\n"
2221 "%44 = OpTypePointer Output %26\n"
2222 "%4 = OpFunction %2 None %3\n"
2223 "%5 = OpLabel\n"
2224 "%9 = OpLoad %6 %8\n"
2225 "%12 = OpIEqual %11 %9 %10\n"
2226 "OpSelectionMerge %14 None\n"
2227 "OpBranchConditional %12 %13 %14\n"
2228 "%13 = OpLabel\n"
2229 "%23 = OpAccessChain %22 %20 %10\n"
2230 "OpStore %23 %21\n"
2231 "%25 = OpAccessChain %22 %20 %24\n"
2232 "OpStore %25 %21\n"
2233 "OpBranch %14\n"
2234 "%14 = OpLabel\n"
2235 "%34 = OpLoad %6 %8\n"
2236 "%40 = OpLoad %6 %8\n"
2237 "%42 = OpAccessChain %41 %39 %40 %10\n"
2238 "%43 = OpLoad %26 %42\n"
2239 "%45 = OpAccessChain %44 %33 %34 %10\n"
2240 "OpStore %45 %43\n"
2241 "OpReturn\n"
2242 "OpFunctionEnd\n";
2243 }
2244
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)2245 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
2246 {
2247 /*
2248 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
2249 "#extension GL_EXT_tessellation_shader : require\n"
2250 "layout(isolines, equal_spacing, ccw ) in;\n"
2251 "layout(location = 0) in float in_color[];\n"
2252 "layout(location = 0) out float out_color;\n"
2253 "\n"
2254 "void main (void)\n"
2255 "{\n"
2256 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
2257 " out_color = in_color[0];\n"
2258 "}\n";
2259 */
2260 programCollection.spirvAsmSources.add("tese") <<
2261 "; SPIR-V\n"
2262 "; Version: 1.3\n"
2263 "; Generator: Khronos Glslang Reference Front End; 2\n"
2264 "; Bound: 45\n"
2265 "; Schema: 0\n"
2266 "OpCapability Tessellation\n"
2267 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2268 "OpMemoryModel Logical GLSL450\n"
2269 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
2270 "OpExecutionMode %4 Isolines\n"
2271 "OpExecutionMode %4 SpacingEqual\n"
2272 "OpExecutionMode %4 VertexOrderCcw\n"
2273 "OpMemberDecorate %11 0 BuiltIn Position\n"
2274 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
2275 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
2276 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
2277 "OpDecorate %11 Block\n"
2278 "OpMemberDecorate %16 0 BuiltIn Position\n"
2279 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
2280 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
2281 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
2282 "OpDecorate %16 Block\n"
2283 "OpDecorate %29 BuiltIn TessCoord\n"
2284 "OpDecorate %39 Location 0\n"
2285 "OpDecorate %42 Location 0\n"
2286 "%2 = OpTypeVoid\n"
2287 "%3 = OpTypeFunction %2\n"
2288 "%6 = OpTypeFloat 32\n"
2289 "%7 = OpTypeVector %6 4\n"
2290 "%8 = OpTypeInt 32 0\n"
2291 "%9 = OpConstant %8 1\n"
2292 "%10 = OpTypeArray %6 %9\n"
2293 "%11 = OpTypeStruct %7 %6 %10 %10\n"
2294 "%12 = OpTypePointer Output %11\n"
2295 "%13 = OpVariable %12 Output\n"
2296 "%14 = OpTypeInt 32 1\n"
2297 "%15 = OpConstant %14 0\n"
2298 "%16 = OpTypeStruct %7 %6 %10 %10\n"
2299 "%17 = OpConstant %8 32\n"
2300 "%18 = OpTypeArray %16 %17\n"
2301 "%19 = OpTypePointer Input %18\n"
2302 "%20 = OpVariable %19 Input\n"
2303 "%21 = OpTypePointer Input %7\n"
2304 "%24 = OpConstant %14 1\n"
2305 "%27 = OpTypeVector %6 3\n"
2306 "%28 = OpTypePointer Input %27\n"
2307 "%29 = OpVariable %28 Input\n"
2308 "%30 = OpConstant %8 0\n"
2309 "%31 = OpTypePointer Input %6\n"
2310 "%36 = OpTypePointer Output %7\n"
2311 "%38 = OpTypePointer Output %6\n"
2312 "%39 = OpVariable %38 Output\n"
2313 "%40 = OpTypeArray %6 %17\n"
2314 "%41 = OpTypePointer Input %40\n"
2315 "%42 = OpVariable %41 Input\n"
2316 "%4 = OpFunction %2 None %3\n"
2317 "%5 = OpLabel\n"
2318 "%22 = OpAccessChain %21 %20 %15 %15\n"
2319 "%23 = OpLoad %7 %22\n"
2320 "%25 = OpAccessChain %21 %20 %24 %15\n"
2321 "%26 = OpLoad %7 %25\n"
2322 "%32 = OpAccessChain %31 %29 %30\n"
2323 "%33 = OpLoad %6 %32\n"
2324 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
2325 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
2326 "%37 = OpAccessChain %36 %13 %15\n"
2327 "OpStore %37 %35\n"
2328 "%43 = OpAccessChain %31 %42 %15\n"
2329 "%44 = OpLoad %6 %43\n"
2330 "OpStore %39 %44\n"
2331 "OpReturn\n"
2332 "OpFunctionEnd\n";
2333 }
2334
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)2335 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options, vk::GlslSourceCollection& collection)
2336 {
2337 tcu::StringTemplate geometryTemplate(glslTemplate);
2338
2339 map<string, string> linesParams;
2340 linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
2341
2342 map<string, string> pointsParams;
2343 pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
2344
2345 collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
2346 collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
2347 }
2348
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)2349 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
2350 {
2351 tcu::StringTemplate geometryTemplate(spirvTemplate);
2352
2353 map<string, string> linesParams;
2354 linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
2355
2356 map<string, string> pointsParams;
2357 pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
2358
2359 collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
2360 collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
2361 }
2362
initializeMemory(Context & context,const Allocation & alloc,const subgroups::SSBOData & data)2363 void initializeMemory (Context& context, const Allocation& alloc, const subgroups::SSBOData& data)
2364 {
2365 const vk::VkFormat format = data.format;
2366 const vk::VkDeviceSize size = data.numElements *
2367 (data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
2368 if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
2369 {
2370 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
2371
2372 switch (format)
2373 {
2374 default:
2375 DE_FATAL("Illegal buffer format");
2376 break;
2377 case VK_FORMAT_R8_SINT:
2378 case VK_FORMAT_R8G8_SINT:
2379 case VK_FORMAT_R8G8B8_SINT:
2380 case VK_FORMAT_R8G8B8A8_SINT:
2381 case VK_FORMAT_R8_UINT:
2382 case VK_FORMAT_R8G8_UINT:
2383 case VK_FORMAT_R8G8B8_UINT:
2384 case VK_FORMAT_R8G8B8A8_UINT:
2385 {
2386 deUint8* ptr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
2387
2388 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint8)); k++)
2389 {
2390 ptr[k] = rnd.getUint8();
2391 }
2392 }
2393 break;
2394 case VK_FORMAT_R16_SINT:
2395 case VK_FORMAT_R16G16_SINT:
2396 case VK_FORMAT_R16G16B16_SINT:
2397 case VK_FORMAT_R16G16B16A16_SINT:
2398 case VK_FORMAT_R16_UINT:
2399 case VK_FORMAT_R16G16_UINT:
2400 case VK_FORMAT_R16G16B16_UINT:
2401 case VK_FORMAT_R16G16B16A16_UINT:
2402 {
2403 deUint16* ptr = reinterpret_cast<deUint16*>(alloc.getHostPtr());
2404
2405 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint16)); k++)
2406 {
2407 ptr[k] = rnd.getUint16();
2408 }
2409 }
2410 break;
2411 case VK_FORMAT_R8_USCALED:
2412 case VK_FORMAT_R8G8_USCALED:
2413 case VK_FORMAT_R8G8B8_USCALED:
2414 case VK_FORMAT_R8G8B8A8_USCALED:
2415 {
2416 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2417
2418 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2419 {
2420 deUint32 r = rnd.getUint32();
2421 ptr[k] = (r & 1) ? r : 0;
2422 }
2423 }
2424 break;
2425 case VK_FORMAT_R32_SINT:
2426 case VK_FORMAT_R32G32_SINT:
2427 case VK_FORMAT_R32G32B32_SINT:
2428 case VK_FORMAT_R32G32B32A32_SINT:
2429 case VK_FORMAT_R32_UINT:
2430 case VK_FORMAT_R32G32_UINT:
2431 case VK_FORMAT_R32G32B32_UINT:
2432 case VK_FORMAT_R32G32B32A32_UINT:
2433 {
2434 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2435
2436 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
2437 {
2438 ptr[k] = rnd.getUint32();
2439 }
2440 }
2441 break;
2442 case VK_FORMAT_R64_SINT:
2443 case VK_FORMAT_R64G64_SINT:
2444 case VK_FORMAT_R64G64B64_SINT:
2445 case VK_FORMAT_R64G64B64A64_SINT:
2446 case VK_FORMAT_R64_UINT:
2447 case VK_FORMAT_R64G64_UINT:
2448 case VK_FORMAT_R64G64B64_UINT:
2449 case VK_FORMAT_R64G64B64A64_UINT:
2450 {
2451 deUint64* ptr = reinterpret_cast<deUint64*>(alloc.getHostPtr());
2452
2453 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint64)); k++)
2454 {
2455 ptr[k] = rnd.getUint64();
2456 }
2457 }
2458 break;
2459 case VK_FORMAT_R16_SFLOAT:
2460 case VK_FORMAT_R16G16_SFLOAT:
2461 case VK_FORMAT_R16G16B16_SFLOAT:
2462 case VK_FORMAT_R16G16B16A16_SFLOAT:
2463 {
2464 deFloat16* ptr = reinterpret_cast<deFloat16*>(alloc.getHostPtr());
2465
2466 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deFloat16)); k++)
2467 {
2468 ptr[k] = deFloat32To16(rnd.getFloat());
2469 }
2470 }
2471 break;
2472 case VK_FORMAT_R32_SFLOAT:
2473 case VK_FORMAT_R32G32_SFLOAT:
2474 case VK_FORMAT_R32G32B32_SFLOAT:
2475 case VK_FORMAT_R32G32B32A32_SFLOAT:
2476 {
2477 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
2478
2479 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
2480 {
2481 ptr[k] = rnd.getFloat();
2482 }
2483 }
2484 break;
2485 case VK_FORMAT_R64_SFLOAT:
2486 case VK_FORMAT_R64G64_SFLOAT:
2487 case VK_FORMAT_R64G64B64_SFLOAT:
2488 case VK_FORMAT_R64G64B64A64_SFLOAT:
2489 {
2490 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
2491
2492 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
2493 {
2494 ptr[k] = rnd.getDouble();
2495 }
2496 }
2497 break;
2498 }
2499 }
2500 else if (subgroups::SSBOData::InitializeZero == data.initializeType)
2501 {
2502 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
2503
2504 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
2505 {
2506 ptr[k] = 0;
2507 }
2508 }
2509
2510 if (subgroups::SSBOData::InitializeNone != data.initializeType)
2511 {
2512 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2513 }
2514 }
2515
getResultBinding(const VkShaderStageFlagBits shaderStage)2516 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
2517 {
2518 switch(shaderStage)
2519 {
2520 case VK_SHADER_STAGE_VERTEX_BIT:
2521 return 0u;
2522 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2523 return 1u;
2524 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2525 return 2u;
2526 case VK_SHADER_STAGE_GEOMETRY_BIT:
2527 return 3u;
2528 default:
2529 DE_ASSERT(0);
2530 return -1;
2531 }
2532 DE_ASSERT(0);
2533 return -1;
2534 }
2535
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage)2536 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (Context& context,
2537 VkFormat format,
2538 const SSBOData* extraData,
2539 deUint32 extraDataCount,
2540 const void* internalData,
2541 subgroups::CheckResult checkResult,
2542 const VkShaderStageFlags shaderStage)
2543 {
2544 return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
2545 }
2546
makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const VkShaderStageFlags shaderStage,const deUint32 tessShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2547 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize (Context& context,
2548 VkFormat format,
2549 const SSBOData* extraData,
2550 deUint32 extraDataCount,
2551 const void* internalData,
2552 subgroups::CheckResult checkResult,
2553 const VkShaderStageFlags shaderStage,
2554 const deUint32 tessShaderStageCreateFlags,
2555 const deUint32 requiredSubgroupSize)
2556 {
2557 const DeviceInterface& vk = context.getDeviceInterface();
2558 const VkDevice device = context.getDevice();
2559 const deUint32 maxWidth = getMaxWidth();
2560 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2561 DescriptorSetLayoutBuilder layoutBuilder;
2562 DescriptorPoolBuilder poolBuilder;
2563 DescriptorSetUpdateBuilder updateBuilder;
2564 Move <VkDescriptorPool> descriptorPool;
2565 Move <VkDescriptorSet> descriptorSet;
2566 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2567 const Unique<VkShaderModule> teCtrlShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("tesc"), 0u));
2568 const Unique<VkShaderModule> teEvalShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("tese"), 0u));
2569 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2570 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2571 const VkVertexInputBindingDescription vertexInputBinding =
2572 {
2573 0u, // deUint32 binding;
2574 static_cast<deUint32>(sizeof(tcu::Vec4)), // deUint32 stride;
2575 VK_VERTEX_INPUT_RATE_VERTEX // VkVertexInputRate inputRate;
2576 };
2577 const VkVertexInputAttributeDescription vertexInputAttribute =
2578 {
2579 0u, // deUint32 location;
2580 0u, // deUint32 binding;
2581 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
2582 0u // deUint32 offset;
2583 };
2584
2585 for (deUint32 i = 0u; i < extraDataCount; i++)
2586 {
2587 if (extraData[i].isImage)
2588 {
2589 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2590 }
2591 else
2592 {
2593 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2594 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2595 }
2596 const Allocation& alloc = inputBuffers[i]->getAllocation();
2597 initializeMemory(context, alloc, extraData[i]);
2598 }
2599
2600 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2601 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
2602
2603 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2604
2605 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2606
2607 const deUint32 requiredSubgroupSizes[5] = {0u,
2608 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
2609 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u),
2610 0u,
2611 0u};
2612
2613 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2614 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
2615 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
2616 *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
2617 *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2618 0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
2619 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u),
2620 0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2621
2622 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2623 poolBuilder.addType(inputBuffers[ndx]->getType());
2624
2625 if (extraDataCount > 0)
2626 {
2627 descriptorPool = poolBuilder.build(vk, device,
2628 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2629 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2630 }
2631
2632 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2633 {
2634 if (inputBuffers[buffersNdx]->isImage())
2635 {
2636 VkDescriptorImageInfo info =
2637 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2638 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2639
2640 updateBuilder.writeSingle(*descriptorSet,
2641 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2642 inputBuffers[buffersNdx]->getType(), &info);
2643 }
2644 else
2645 {
2646 VkDescriptorBufferInfo info =
2647 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2648 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2649
2650 updateBuilder.writeSingle(*descriptorSet,
2651 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2652 inputBuffers[buffersNdx]->getType(), &info);
2653 }
2654 }
2655
2656 updateBuilder.update(vk, device);
2657
2658 const VkQueue queue = context.getUniversalQueue();
2659 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2660 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
2661 const deUint32 subgroupSize = getSubgroupSize(context);
2662 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2663 const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
2664 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2665 unsigned totalIterations = 0u;
2666 unsigned failedIterations = 0u;
2667 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2668
2669 {
2670 const Allocation& alloc = vertexBuffer.getAllocation();
2671 std::vector<tcu::Vec4> data (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
2672 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2673 float leftHandPosition = -1.0f;
2674
2675 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
2676 {
2677 data[ndx][0] = leftHandPosition;
2678 leftHandPosition += pixelSize;
2679 data[ndx+1][0] = leftHandPosition;
2680 }
2681
2682 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
2683 flushAlloc(vk, device, alloc);
2684 }
2685
2686 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2687 const VkViewport viewport = makeViewport(maxWidth, 1u);
2688 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2689 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2690 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2691 const VkDeviceSize vertexBufferOffset = 0u;
2692
2693 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2694 {
2695 totalIterations++;
2696
2697 beginCommandBuffer(vk, *cmdBuffer);
2698 {
2699
2700 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2701 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2702
2703 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2704
2705 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2706
2707 if (extraDataCount > 0)
2708 {
2709 vk.cmdBindDescriptorSets(*cmdBuffer,
2710 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2711 &descriptorSet.get(), 0u, DE_NULL);
2712 }
2713
2714 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2715 vk.cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
2716
2717 endRenderPass(vk, *cmdBuffer);
2718
2719 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2720 endCommandBuffer(vk, *cmdBuffer);
2721
2722 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2723 }
2724
2725 {
2726 const Allocation& allocResult = imageBufferResult.getAllocation();
2727 invalidateAlloc(vk, device, allocResult);
2728
2729 std::vector<const void*> datas;
2730 datas.push_back(allocResult.getHostPtr());
2731 if (!checkResult(internalData, datas, width/2u, subgroupSize))
2732 failedIterations++;
2733 }
2734 }
2735
2736 if (0 < failedIterations)
2737 {
2738 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2739
2740 context.getTestContext().getLog()
2741 << TestLog::Message << valuesPassed << " / "
2742 << totalIterations << " values passed" << TestLog::EndMessage;
2743 return tcu::TestStatus::fail("Failed!");
2744 }
2745
2746 return tcu::TestStatus::pass("OK");
2747 }
2748
check(std::vector<const void * > datas,deUint32 width,deUint32 ref)2749 bool vkt::subgroups::check (std::vector<const void*> datas, deUint32 width, deUint32 ref)
2750 {
2751 const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
2752
2753 for (deUint32 n = 0; n < width; ++n)
2754 {
2755 if (data[n] != ref)
2756 {
2757 return false;
2758 }
2759 }
2760
2761 return true;
2762 }
2763
checkCompute(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 ref)2764 bool vkt::subgroups::checkCompute (std::vector<const void*> datas,
2765 const deUint32 numWorkgroups[3],
2766 const deUint32 localSize[3],
2767 deUint32 ref)
2768 {
2769 const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
2770 const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
2771 const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
2772
2773 return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
2774 }
2775
makeGeometryFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)2776 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest (Context& context,
2777 VkFormat format,
2778 const SSBOData* extraData,
2779 deUint32 extraDataCount,
2780 const void* internalData,
2781 subgroups::CheckResult checkResult)
2782 {
2783 return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
2784 }
2785
makeGeometryFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 geometryShaderStageCreateFlags,const deUint32 requiredSubgroupSize)2786 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize (Context& context,
2787 VkFormat format,
2788 const SSBOData* extraData,
2789 deUint32 extraDataCount,
2790 const void* internalData,
2791 subgroups::CheckResult checkResult,
2792 const deUint32 geometryShaderStageCreateFlags,
2793 const deUint32 requiredSubgroupSize)
2794 {
2795 const DeviceInterface& vk = context.getDeviceInterface();
2796 const VkDevice device = context.getDevice();
2797 const deUint32 maxWidth = getMaxWidth();
2798 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2799 DescriptorSetLayoutBuilder layoutBuilder;
2800 DescriptorPoolBuilder poolBuilder;
2801 DescriptorSetUpdateBuilder updateBuilder;
2802 Move <VkDescriptorPool> descriptorPool;
2803 Move <VkDescriptorSet> descriptorSet;
2804 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
2805 const Unique<VkShaderModule> geometryShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("geometry"), 0u));
2806 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
2807 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2808 const VkVertexInputBindingDescription vertexInputBinding =
2809 {
2810 0u, // deUint32 binding;
2811 static_cast<deUint32>(sizeof(tcu::Vec4)), // deUint32 stride;
2812 VK_VERTEX_INPUT_RATE_VERTEX // VkVertexInputRate inputRate;
2813 };
2814 const VkVertexInputAttributeDescription vertexInputAttribute =
2815 {
2816 0u, // deUint32 location;
2817 0u, // deUint32 binding;
2818 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
2819 0u // deUint32 offset;
2820 };
2821
2822 for (deUint32 i = 0u; i < extraDataCount; i++)
2823 {
2824 if (extraData[i].isImage)
2825 {
2826 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2827 }
2828 else
2829 {
2830 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2831 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2832 }
2833 const Allocation& alloc = inputBuffers[i]->getAllocation();
2834 initializeMemory(context, alloc, extraData[i]);
2835 }
2836
2837 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2838 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
2839
2840 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
2841
2842 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
2843
2844 const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
2845
2846 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2847 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
2848 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
2849 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
2850 0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
2851 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
2852
2853 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2854 poolBuilder.addType(inputBuffers[ndx]->getType());
2855
2856 if (extraDataCount > 0)
2857 {
2858 descriptorPool = poolBuilder.build(vk, device,
2859 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2860 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
2861 }
2862
2863 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2864 {
2865 if (inputBuffers[buffersNdx]->isImage())
2866 {
2867 VkDescriptorImageInfo info =
2868 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2869 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2870
2871 updateBuilder.writeSingle(*descriptorSet,
2872 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2873 inputBuffers[buffersNdx]->getType(), &info);
2874 }
2875 else
2876 {
2877 VkDescriptorBufferInfo info =
2878 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2879 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2880
2881 updateBuilder.writeSingle(*descriptorSet,
2882 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2883 inputBuffers[buffersNdx]->getType(), &info);
2884 }
2885 }
2886
2887 updateBuilder.update(vk, device);
2888
2889 const VkQueue queue = context.getUniversalQueue();
2890 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
2891 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
2892 const deUint32 subgroupSize = getSubgroupSize(context);
2893 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2894 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
2895 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2896 unsigned totalIterations = 0u;
2897 unsigned failedIterations = 0u;
2898 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2899
2900 {
2901 const Allocation& alloc = vertexBuffer.getAllocation();
2902 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2903 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2904 float leftHandPosition = -1.0f;
2905
2906 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2907 {
2908 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2909 leftHandPosition += pixelSize;
2910 }
2911
2912 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2913 flushAlloc(vk, device, alloc);
2914 }
2915
2916 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
2917 const VkViewport viewport = makeViewport(maxWidth, 1u);
2918 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2919 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2920 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2921 const VkDeviceSize vertexBufferOffset = 0u;
2922
2923 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
2924 {
2925 totalIterations++;
2926
2927 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2928 {
2929 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2930 initializeMemory(context, alloc, extraData[ndx]);
2931 }
2932
2933 beginCommandBuffer(vk, *cmdBuffer);
2934 {
2935 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2936
2937 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2938
2939 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2940
2941 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2942
2943 if (extraDataCount > 0)
2944 {
2945 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2946 &descriptorSet.get(), 0u, DE_NULL);
2947 }
2948
2949 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2950
2951 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2952
2953 endRenderPass(vk, *cmdBuffer);
2954
2955 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2956
2957 endCommandBuffer(vk, *cmdBuffer);
2958
2959 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
2960 }
2961
2962 {
2963 const Allocation& allocResult = imageBufferResult.getAllocation();
2964 invalidateAlloc(vk, device, allocResult);
2965
2966 std::vector<const void*> datas;
2967 datas.push_back(allocResult.getHostPtr());
2968 if (!checkResult(internalData, datas, width, subgroupSize))
2969 failedIterations++;
2970 }
2971 }
2972
2973 if (0 < failedIterations)
2974 {
2975 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
2976
2977 context.getTestContext().getLog()
2978 << TestLog::Message << valuesPassed << " / "
2979 << totalIterations << " values passed" << TestLog::EndMessage;
2980
2981 return tcu::TestStatus::fail("Failed!");
2982 }
2983
2984 return tcu::TestStatus::pass("OK");
2985 }
2986
getPossibleGraphicsSubgroupStages(Context & context,const vk::VkShaderStageFlags testedStages)2987 vk::VkShaderStageFlags vkt::subgroups::getPossibleGraphicsSubgroupStages (Context& context, const vk::VkShaderStageFlags testedStages)
2988 {
2989 const VkPhysicalDeviceSubgroupProperties& subgroupProperties = context.getSubgroupProperties();
2990 VkShaderStageFlags stages = testedStages & subgroupProperties.supportedStages;
2991
2992 DE_ASSERT(isAllGraphicsStages(testedStages));
2993
2994 if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
2995 {
2996 if ((stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
2997 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
2998 else
2999 stages = VK_SHADER_STAGE_FRAGMENT_BIT;
3000 }
3001
3002 if (static_cast<VkShaderStageFlags>(0u) == stages)
3003 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
3004
3005 return stages;
3006 }
3007
allStages(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStage)3008 tcu::TestStatus vkt::subgroups::allStages (Context& context,
3009 vk::VkFormat format,
3010 const SSBOData* extraData,
3011 deUint32 extraDataCount,
3012 const void* internalData,
3013 const VerificationFunctor& checkResult,
3014 const vk::VkShaderStageFlags shaderStage)
3015 {
3016 return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage,
3017 0u, 0u, 0u, 0u, 0u, DE_NULL);
3018 }
3019
allStagesRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const vk::VkShaderStageFlags shaderStageTested,const deUint32 vertexShaderStageCreateFlags,const deUint32 tessellationControlShaderStageCreateFlags,const deUint32 tessellationEvalShaderStageCreateFlags,const deUint32 geometryShaderStageCreateFlags,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize[5])3020 tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize (Context& context,
3021 vk::VkFormat format,
3022 const SSBOData* extraDatas,
3023 deUint32 extraDatasCount,
3024 const void* internalData,
3025 const VerificationFunctor& checkResult,
3026 const vk::VkShaderStageFlags shaderStageTested,
3027 const deUint32 vertexShaderStageCreateFlags,
3028 const deUint32 tessellationControlShaderStageCreateFlags,
3029 const deUint32 tessellationEvalShaderStageCreateFlags,
3030 const deUint32 geometryShaderStageCreateFlags,
3031 const deUint32 fragmentShaderStageCreateFlags,
3032 const deUint32 requiredSubgroupSize[5])
3033 {
3034 const DeviceInterface& vk = context.getDeviceInterface();
3035 const VkDevice device = context.getDevice();
3036 const deUint32 maxWidth = getMaxWidth();
3037 vector<VkShaderStageFlagBits> stagesVector;
3038 VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
3039
3040 Move<VkShaderModule> vertexShaderModule;
3041 Move<VkShaderModule> teCtrlShaderModule;
3042 Move<VkShaderModule> teEvalShaderModule;
3043 Move<VkShaderModule> geometryShaderModule;
3044 Move<VkShaderModule> fragmentShaderModule;
3045
3046 if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
3047 {
3048 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
3049 }
3050 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3051 {
3052 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
3053 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3054 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3055 }
3056 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3057 {
3058 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
3059 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
3060 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3061 }
3062 if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
3063 {
3064 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
3065 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3066 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3067 }
3068 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3069 {
3070 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
3071 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
3072 }
3073
3074 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
3075 const string vert = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
3076 const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
3077 const string tese = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
3078
3079 shaderStageRequired = shaderStageTested | shaderStageRequired;
3080
3081 vertexShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(vert), 0u);
3082 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
3083 {
3084 teCtrlShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tesc), 0u);
3085 teEvalShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get(tese), 0u);
3086 }
3087 if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
3088 {
3089 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3090 {
3091 // tessellation shaders output line primitives
3092 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_lines"), 0u);
3093 }
3094 else
3095 {
3096 // otherwise points are processed by geometry shader
3097 geometryShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("geometry_points"), 0u);
3098 }
3099 }
3100 if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
3101 fragmentShaderModule = createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u);
3102
3103 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
3104
3105 DescriptorSetLayoutBuilder layoutBuilder;
3106 // The implicit result SSBO we use to store our outputs from the shader
3107 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3108 {
3109 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
3110 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
3111 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3112
3113 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
3114 }
3115
3116 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3117 {
3118 const deUint32 datasNdx = ndx - stagesCount;
3119 if (extraDatas[datasNdx].isImage)
3120 {
3121 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
3122 }
3123 else
3124 {
3125 const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
3126 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3127 }
3128
3129 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3130 initializeMemory(context, alloc, extraDatas[datasNdx]);
3131
3132 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
3133 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
3134 }
3135
3136 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3137
3138 const Unique<VkPipelineLayout> pipelineLayout(
3139 makePipelineLayout(vk, device, *descriptorSetLayout));
3140
3141 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3142 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
3143 shaderStageRequired,
3144 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
3145 *renderPass,
3146 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3147 DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT,
3148 vertexShaderStageCreateFlags, tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
3149 geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
3150
3151 Move <VkDescriptorPool> descriptorPool;
3152 Move <VkDescriptorSet> descriptorSet;
3153
3154 if (inputBuffers.size() > 0)
3155 {
3156 DescriptorPoolBuilder poolBuilder;
3157
3158 for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
3159 {
3160 poolBuilder.addType(inputBuffers[ndx]->getType());
3161 }
3162
3163 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3164
3165 // Create descriptor set
3166 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3167
3168 DescriptorSetUpdateBuilder updateBuilder;
3169
3170 for (deUint32 ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
3171 {
3172 deUint32 binding;
3173 if (ndx < stagesCount) binding = getResultBinding(stagesVector[ndx]);
3174 else binding = extraDatas[ndx -stagesCount].binding;
3175
3176 if (inputBuffers[ndx]->isImage())
3177 {
3178 VkDescriptorImageInfo info =
3179 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
3180 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3181
3182 updateBuilder.writeSingle( *descriptorSet,
3183 DescriptorSetUpdateBuilder::Location::binding(binding),
3184 inputBuffers[ndx]->getType(), &info);
3185 }
3186 else
3187 {
3188 VkDescriptorBufferInfo info =
3189 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
3190 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
3191
3192 updateBuilder.writeSingle( *descriptorSet,
3193 DescriptorSetUpdateBuilder::Location::binding(binding),
3194 inputBuffers[ndx]->getType(), &info);
3195 }
3196 }
3197
3198 updateBuilder.update(vk, device);
3199 }
3200
3201 {
3202 const VkQueue queue = context.getUniversalQueue();
3203 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3204 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3205 const deUint32 subgroupSize = getSubgroupSize(context);
3206 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3207 unsigned totalIterations = 0u;
3208 unsigned failedIterations = 0u;
3209 Image resultImage (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3210 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), maxWidth, 1u));
3211 const VkViewport viewport = makeViewport(maxWidth, 1u);
3212 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3213 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3214 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3215 const VkImageSubresourceRange subresourceRange =
3216 {
3217 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
3218 0u, //deUint32 baseMipLevel
3219 1u, //deUint32 levelCount
3220 0u, //deUint32 baseArrayLayer
3221 1u //deUint32 layerCount
3222 };
3223
3224 const VkImageMemoryBarrier colorAttachmentBarrier = makeImageMemoryBarrier(
3225 (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
3226 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
3227 resultImage.getImage(), subresourceRange);
3228
3229 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3230 {
3231 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
3232 {
3233 // re-init the data
3234 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3235 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
3236 }
3237
3238 totalIterations++;
3239
3240 beginCommandBuffer(vk, *cmdBuffer);
3241
3242 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
3243
3244 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3245
3246 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3247
3248 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3249
3250 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3251
3252 if (stagesCount + extraDatasCount > 0)
3253 vk.cmdBindDescriptorSets(*cmdBuffer,
3254 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3255 &descriptorSet.get(), 0u, DE_NULL);
3256
3257 vk.cmdDraw(*cmdBuffer, width, 1, 0, 0);
3258
3259 endRenderPass(vk, *cmdBuffer);
3260
3261 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3262
3263 endCommandBuffer(vk, *cmdBuffer);
3264
3265 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3266
3267 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
3268 {
3269 std::vector<const void*> datas;
3270 if (!inputBuffers[ndx]->isImage())
3271 {
3272 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
3273 invalidateAlloc(vk, device, resultAlloc);
3274 // we always have our result data first
3275 datas.push_back(resultAlloc.getHostPtr());
3276 }
3277
3278 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3279 {
3280 const deUint32 datasNdx = index - stagesCount;
3281 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
3282 {
3283 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
3284 invalidateAlloc(vk, device, resultAlloc);
3285 // we always have our result data first
3286 datas.push_back(resultAlloc.getHostPtr());
3287 }
3288 }
3289
3290 // Any stage in the vertex pipeline may be called multiple times per vertex, so we may need >= non-strict comparisons.
3291 const bool multiCall = ( stagesVector[ndx] == VK_SHADER_STAGE_VERTEX_BIT ||
3292 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
3293 stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT ||
3294 stagesVector[ndx] == VK_SHADER_STAGE_GEOMETRY_BIT );
3295 const deUint32 usedWidth = ((stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width);
3296
3297 if (!checkResult(internalData, datas, usedWidth, subgroupSize, multiCall))
3298 failedIterations++;
3299 }
3300 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
3301 {
3302 std::vector<const void*> datas;
3303 const Allocation& resultAlloc = imageBufferResult.getAllocation();
3304 invalidateAlloc(vk, device, resultAlloc);
3305
3306 // we always have our result data first
3307 datas.push_back(resultAlloc.getHostPtr());
3308
3309 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
3310 {
3311 const deUint32 datasNdx = index - stagesCount;
3312 if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
3313 {
3314 const Allocation& alloc = inputBuffers[index]->getAllocation();
3315 invalidateAlloc(vk, device, alloc);
3316 // we always have our result data first
3317 datas.push_back(alloc.getHostPtr());
3318 }
3319 }
3320
3321 if (!checkResult(internalData, datas, width, subgroupSize, false))
3322 failedIterations++;
3323 }
3324
3325 vk.resetCommandBuffer(*cmdBuffer, 0);
3326 }
3327
3328 if (0 < failedIterations)
3329 {
3330 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3331
3332 context.getTestContext().getLog()
3333 << TestLog::Message << valuesPassed << " / "
3334 << totalIterations << " values passed" << TestLog::EndMessage;
3335
3336 return tcu::TestStatus::fail("Failed!");
3337 }
3338 }
3339
3340 return tcu::TestStatus::pass("OK");
3341 }
3342
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult)3343 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest (Context& context,
3344 vk::VkFormat format,
3345 const SSBOData* extraData,
3346 deUint32 extraDataCount,
3347 const void* internalData,
3348 subgroups::CheckResult checkResult)
3349 {
3350 return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, 0u, 0u);
3351 }
3352
makeVertexFrameBufferTestRequiredSubgroupSize(Context & context,vk::VkFormat format,const SSBOData * extraData,deUint32 extraDataCount,const void * internalData,subgroups::CheckResult checkResult,const deUint32 vertexShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3353 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize (Context& context,
3354 vk::VkFormat format,
3355 const SSBOData* extraData,
3356 deUint32 extraDataCount,
3357 const void* internalData,
3358 subgroups::CheckResult checkResult,
3359 const deUint32 vertexShaderStageCreateFlags,
3360 const deUint32 requiredSubgroupSize)
3361 {
3362 const DeviceInterface& vk = context.getDeviceInterface();
3363 const VkDevice device = context.getDevice();
3364 const VkQueue queue = context.getUniversalQueue();
3365 const deUint32 maxWidth = getMaxWidth();
3366 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3367 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
3368 DescriptorSetLayoutBuilder layoutBuilder;
3369 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3370 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3371 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
3372 const VkVertexInputBindingDescription vertexInputBinding =
3373 {
3374 0u, // binding;
3375 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
3376 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
3377 };
3378 const VkVertexInputAttributeDescription vertexInputAttribute =
3379 {
3380 0u,
3381 0u,
3382 VK_FORMAT_R32G32B32A32_SFLOAT,
3383 0u
3384 };
3385
3386 for (deUint32 i = 0u; i < extraDataCount; i++)
3387 {
3388 if (extraData[i].isImage)
3389 {
3390 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
3391 }
3392 else
3393 {
3394 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
3395 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3396 }
3397 const Allocation& alloc = inputBuffers[i]->getAllocation();
3398 initializeMemory(context, alloc, extraData[i]);
3399 }
3400
3401 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3402 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
3403
3404 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(vk, device));
3405
3406 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(vk, device, *descriptorSetLayout));
3407
3408 const deUint32 requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
3409 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
3410 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3411 *vertexShaderModule, *fragmentShaderModule,
3412 DE_NULL, DE_NULL, DE_NULL,
3413 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
3414 &vertexInputBinding, &vertexInputAttribute, true, format,
3415 vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
3416 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3417 DescriptorPoolBuilder poolBuilder;
3418 DescriptorSetUpdateBuilder updateBuilder;
3419
3420
3421 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3422 poolBuilder.addType(inputBuffers[ndx]->getType());
3423
3424 Move <VkDescriptorPool> descriptorPool;
3425 Move <VkDescriptorSet> descriptorSet;
3426
3427 if (extraDataCount > 0)
3428 {
3429 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3430 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3431 }
3432
3433 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
3434 {
3435 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3436 initializeMemory(context, alloc, extraData[ndx]);
3437 }
3438
3439 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
3440 {
3441 if (inputBuffers[buffersNdx]->isImage())
3442 {
3443 VkDescriptorImageInfo info =
3444 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
3445 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3446
3447 updateBuilder.writeSingle(*descriptorSet,
3448 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3449 inputBuffers[buffersNdx]->getType(), &info);
3450 }
3451 else
3452 {
3453 VkDescriptorBufferInfo info =
3454 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
3455 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
3456
3457 updateBuilder.writeSingle(*descriptorSet,
3458 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
3459 inputBuffers[buffersNdx]->getType(), &info);
3460 }
3461 }
3462 updateBuilder.update(vk, device);
3463
3464 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3465
3466 const deUint32 subgroupSize = getSubgroupSize(context);
3467
3468 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3469
3470 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
3471 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
3472
3473 unsigned totalIterations = 0u;
3474 unsigned failedIterations = 0u;
3475
3476 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3477
3478 {
3479 const Allocation& alloc = vertexBuffer.getAllocation();
3480 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
3481 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
3482 float leftHandPosition = -1.0f;
3483
3484 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
3485 {
3486 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
3487 leftHandPosition += pixelSize;
3488 }
3489
3490 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
3491 flushAlloc(vk, device, alloc);
3492 }
3493
3494 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, discardableImage.getImageView(), maxWidth, 1u));
3495 const VkViewport viewport = makeViewport(maxWidth, 1u);
3496 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
3497 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
3498 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3499 const VkDeviceSize vertexBufferOffset = 0u;
3500
3501 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
3502 {
3503 totalIterations++;
3504
3505 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
3506 {
3507 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
3508 initializeMemory(context, alloc, extraData[ndx]);
3509 }
3510
3511 beginCommandBuffer(vk, *cmdBuffer);
3512 {
3513 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3514
3515 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3516
3517 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
3518
3519 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3520
3521 if (extraDataCount > 0)
3522 {
3523 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
3524 &descriptorSet.get(), 0u, DE_NULL);
3525 }
3526
3527 vk.cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
3528
3529 vk.cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
3530
3531 endRenderPass(vk, *cmdBuffer);
3532
3533 copyImageToBuffer(vk, *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3534
3535 endCommandBuffer(vk, *cmdBuffer);
3536
3537 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3538 }
3539
3540 {
3541 const Allocation& allocResult = imageBufferResult.getAllocation();
3542 invalidateAlloc(vk, device, allocResult);
3543
3544 std::vector<const void*> datas;
3545 datas.push_back(allocResult.getHostPtr());
3546 if (!checkResult(internalData, datas, width, subgroupSize))
3547 failedIterations++;
3548 }
3549 }
3550
3551 if (0 < failedIterations)
3552 {
3553 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3554
3555 context.getTestContext().getLog()
3556 << TestLog::Message << valuesPassed << " / "
3557 << totalIterations << " values passed" << TestLog::EndMessage;
3558
3559 return tcu::TestStatus::fail("Failed!");
3560 }
3561
3562 return tcu::TestStatus::pass("OK");
3563 }
3564
makeFragmentFrameBufferTest(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult)3565 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest (Context& context,
3566 VkFormat format,
3567 const SSBOData* extraDatas,
3568 deUint32 extraDatasCount,
3569 const void* internalData,
3570 CheckResultFragment checkResult)
3571 {
3572 return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData, checkResult, 0u, 0u);
3573 }
3574
makeFragmentFrameBufferTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,CheckResultFragment checkResult,const deUint32 fragmentShaderStageCreateFlags,const deUint32 requiredSubgroupSize)3575 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize (Context& context,
3576 VkFormat format,
3577 const SSBOData* extraDatas,
3578 deUint32 extraDatasCount,
3579 const void* internalData,
3580 CheckResultFragment checkResult,
3581 const deUint32 fragmentShaderStageCreateFlags,
3582 const deUint32 requiredSubgroupSize)
3583 {
3584 const DeviceInterface& vk = context.getDeviceInterface();
3585 const VkDevice device = context.getDevice();
3586 const VkQueue queue = context.getUniversalQueue();
3587 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3588 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("vert"), 0u));
3589 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(vk, device, context.getBinaryCollection().get("fragment"), 0u));
3590 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers (extraDatasCount);
3591
3592 for (deUint32 i = 0; i < extraDatasCount; i++)
3593 {
3594 if (extraDatas[i].isImage)
3595 {
3596 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
3597 }
3598 else
3599 {
3600 const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
3601
3602 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
3603 }
3604
3605 const Allocation& alloc = inputBuffers[i]->getAllocation();
3606
3607 initializeMemory(context, alloc, extraDatas[i]);
3608 }
3609
3610 DescriptorSetLayoutBuilder layoutBuilder;
3611
3612 for (deUint32 i = 0; i < extraDatasCount; i++)
3613 {
3614 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
3615 }
3616
3617 const Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
3618 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, device, *descriptorSetLayout));
3619 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
3620 const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
3621 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context,
3622 *pipelineLayout,
3623 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
3624 *vertexShaderModule,
3625 *fragmentShaderModule,
3626 DE_NULL,
3627 DE_NULL,
3628 DE_NULL,
3629 *renderPass,
3630 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3631 DE_NULL,
3632 DE_NULL,
3633 true,
3634 VK_FORMAT_R32G32B32A32_SFLOAT,
3635 0u,
3636 0u,
3637 0u,
3638 0u,
3639 fragmentShaderStageCreateFlags,
3640 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
3641 DescriptorPoolBuilder poolBuilder;
3642
3643 // To stop validation complaining, always add at least one type to pool.
3644 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3645 for (deUint32 i = 0; i < extraDatasCount; i++)
3646 {
3647 poolBuilder.addType(inputBuffers[i]->getType());
3648 }
3649
3650 Move<VkDescriptorPool> descriptorPool;
3651 // Create descriptor set
3652 Move<VkDescriptorSet> descriptorSet;
3653
3654 if (extraDatasCount > 0)
3655 {
3656 descriptorPool = poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3657
3658 descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
3659 }
3660
3661 DescriptorSetUpdateBuilder updateBuilder;
3662
3663 for (deUint32 i = 0; i < extraDatasCount; i++)
3664 {
3665 if (inputBuffers[i]->isImage())
3666 {
3667 const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3668
3669 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3670 }
3671 else
3672 {
3673 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[i]->getAsBuffer()->getSize());
3674
3675 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i), inputBuffers[i]->getType(), &info);
3676 }
3677 }
3678
3679 if (extraDatasCount > 0)
3680 updateBuilder.update(vk, device);
3681
3682 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3683 const deUint32 subgroupSize = getSubgroupSize(context);
3684 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3685 unsigned totalIterations = 0;
3686 unsigned failedIterations = 0;
3687
3688 for (deUint32 width = 8; width <= subgroupSize; width *= 2)
3689 {
3690 for (deUint32 height = 8; height <= subgroupSize; height *= 2)
3691 {
3692 totalIterations++;
3693
3694 // re-init the data
3695 for (deUint32 i = 0; i < extraDatasCount; i++)
3696 {
3697 const Allocation& alloc = inputBuffers[i]->getAllocation();
3698
3699 initializeMemory(context, alloc, extraDatas[i]);
3700 }
3701
3702 const VkDeviceSize formatSize = getFormatSizeInBytes(format);
3703 const VkDeviceSize resultImageSizeInBytes = width * height * formatSize;
3704 Image resultImage (context, width, height, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
3705 Buffer resultBuffer (context, resultImageSizeInBytes, VK_IMAGE_USAGE_TRANSFER_DST_BIT);
3706 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(vk, device, *renderPass, resultImage.getImageView(), width, height));
3707 VkViewport viewport = makeViewport(width, height);
3708 VkRect2D scissor = {{0, 0}, {width, height}};
3709
3710 beginCommandBuffer(vk, *cmdBuffer);
3711
3712 vk.cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
3713
3714 vk.cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
3715
3716 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
3717
3718 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
3719
3720 if (extraDatasCount > 0)
3721 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
3722
3723 vk.cmdDraw(*cmdBuffer, 4, 1, 0, 0);
3724
3725 endRenderPass(vk, *cmdBuffer);
3726
3727 copyImageToBuffer(vk, *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
3728
3729 endCommandBuffer(vk, *cmdBuffer);
3730
3731 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3732
3733 std::vector<const void*> datas;
3734 {
3735 const Allocation& resultAlloc = resultBuffer.getAllocation();
3736 invalidateAlloc(vk, device, resultAlloc);
3737
3738 // we always have our result data first
3739 datas.push_back(resultAlloc.getHostPtr());
3740 }
3741
3742 if (!checkResult(internalData, datas, width, height, subgroupSize))
3743 {
3744 failedIterations++;
3745 }
3746
3747 vk.resetCommandBuffer(*cmdBuffer, 0);
3748 }
3749 }
3750
3751 if (0 < failedIterations)
3752 {
3753 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
3754
3755 context.getTestContext().getLog()
3756 << TestLog::Message << valuesPassed << " / "
3757 << totalIterations << " values passed" << TestLog::EndMessage;
3758
3759 return tcu::TestStatus::fail("Failed!");
3760 }
3761
3762 return tcu::TestStatus::pass("OK");
3763 }
3764
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,const deUint32 pipelineShaderStageFlags,const deUint32 pipelineCreateFlags,VkPipeline basePipelineHandle,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ,deUint32 requiredSubgroupSize)3765 Move<VkPipeline> makeComputePipeline (Context& context,
3766 const VkPipelineLayout pipelineLayout,
3767 const VkShaderModule shaderModule,
3768 const deUint32 pipelineShaderStageFlags,
3769 const deUint32 pipelineCreateFlags,
3770 VkPipeline basePipelineHandle,
3771 deUint32 localSizeX,
3772 deUint32 localSizeY,
3773 deUint32 localSizeZ,
3774 deUint32 requiredSubgroupSize)
3775 {
3776 const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
3777 const vk::VkSpecializationMapEntry entries[3] =
3778 {
3779 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
3780 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
3781 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
3782 };
3783 const vk::VkSpecializationInfo info =
3784 {
3785 /* mapEntryCount = */ 3,
3786 /* pMapEntries = */ entries,
3787 /* dataSize = */ sizeof(localSize),
3788 /* pData = */ localSize
3789 };
3790 const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo =
3791 {
3792 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType sType;
3793 DE_NULL, // void* pNext;
3794 requiredSubgroupSize // uint32_t requiredSubgroupSize;
3795 };
3796 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
3797 {
3798 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
3799 (requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL), // const void* pNext;
3800 pipelineShaderStageFlags, // VkPipelineShaderStageCreateFlags flags;
3801 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
3802 shaderModule, // VkShaderModule module;
3803 "main", // const char* pName;
3804 &info, // const VkSpecializationInfo* pSpecializationInfo;
3805 };
3806 const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
3807 {
3808 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
3809 DE_NULL, // const void* pNext;
3810 pipelineCreateFlags, // VkPipelineCreateFlags flags;
3811 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
3812 pipelineLayout, // VkPipelineLayout layout;
3813 basePipelineHandle, // VkPipeline basePipelineHandle;
3814 -1, // deInt32 basePipelineIndex;
3815 };
3816
3817 return createComputePipeline(context.getDeviceInterface(), context.getDevice(), DE_NULL, &pipelineCreateInfo);
3818 }
3819
makeComputeTestRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,const deUint32 pipelineShaderStageCreateFlags,const deUint32 numWorkgroups[3],const deBool isRequiredSubgroupSize,const deUint32 subgroupSize,const deUint32 localSizesToTest[][3],const deUint32 localSizesToTestCount)3820 tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize (Context& context,
3821 VkFormat format,
3822 const SSBOData* inputs,
3823 deUint32 inputsCount,
3824 const void* internalData,
3825 CheckResultCompute checkResult,
3826 const deUint32 pipelineShaderStageCreateFlags,
3827 const deUint32 numWorkgroups[3],
3828 const deBool isRequiredSubgroupSize,
3829 const deUint32 subgroupSize,
3830 const deUint32 localSizesToTest[][3],
3831 const deUint32 localSizesToTestCount)
3832 {
3833 const DeviceInterface& vk = context.getDeviceInterface();
3834 const VkDevice device = context.getDevice();
3835 const VkQueue queue = context.getUniversalQueue();
3836 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
3837 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties = context.getSubgroupSizeControlPropertiesEXT();
3838 const VkDeviceSize elementSize = getFormatSizeInBytes(format);
3839 const VkDeviceSize maxSubgroupSize = isRequiredSubgroupSize
3840 ? deMax32(subgroupSizeControlProperties.maxSubgroupSize, maxSupportedSubgroupSize())
3841 : maxSupportedSubgroupSize();
3842 const VkDeviceSize resultBufferSize = maxSubgroupSize * maxSubgroupSize * maxSubgroupSize;
3843 const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
3844 Buffer resultBuffer (context, resultBufferSizeInBytes);
3845 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers (inputsCount);
3846
3847 for (deUint32 i = 0; i < inputsCount; i++)
3848 {
3849 if (inputs[i].isImage)
3850 {
3851 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
3852 }
3853 else
3854 {
3855 const vk::VkDeviceSize size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3856
3857 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
3858 }
3859
3860 const Allocation& alloc = inputBuffers[i]->getAllocation();
3861
3862 initializeMemory(context, alloc, inputs[i]);
3863 }
3864
3865 DescriptorSetLayoutBuilder layoutBuilder;
3866 layoutBuilder.addBinding(
3867 resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3868
3869 for (deUint32 i = 0; i < inputsCount; i++)
3870 {
3871 layoutBuilder.addBinding(
3872 inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
3873 }
3874
3875 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
3876 layoutBuilder.build(vk, device));
3877
3878 const Unique<VkShaderModule> shaderModule(
3879 createShaderModule(vk, device,
3880 context.getBinaryCollection().get("comp"), 0u));
3881 const Unique<VkPipelineLayout> pipelineLayout(
3882 makePipelineLayout(vk, device, *descriptorSetLayout));
3883
3884 DescriptorPoolBuilder poolBuilder;
3885
3886 poolBuilder.addType(resultBuffer.getType());
3887
3888 for (deUint32 i = 0; i < inputsCount; i++)
3889 {
3890 poolBuilder.addType(inputBuffers[i]->getType());
3891 }
3892
3893 const Unique<VkDescriptorPool> descriptorPool (poolBuilder.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
3894 const Unique<VkDescriptorSet> descriptorSet (makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
3895 const VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
3896 DescriptorSetUpdateBuilder updateBuilder;
3897
3898 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
3899
3900 for (deUint32 i = 0; i < inputsCount; i++)
3901 {
3902 if (inputBuffers[i]->isImage())
3903 {
3904 const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
3905
3906 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
3907 }
3908 else
3909 {
3910 vk::VkDeviceSize size = getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
3911 VkDescriptorBufferInfo info = makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
3912
3913 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i + 1), inputBuffers[i]->getType(), &info);
3914 }
3915 }
3916
3917 updateBuilder.update(vk, device);
3918
3919 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, device, queueFamilyIndex));
3920 unsigned totalIterations = 0;
3921 unsigned failedIterations = 0;
3922 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
3923 std::vector<de::SharedPtr<Move<VkPipeline>>> pipelines (localSizesToTestCount);
3924
3925 context.getTestContext().touchWatchdog();
3926 {
3927 pipelines[0] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
3928 *pipelineLayout,
3929 *shaderModule,
3930 pipelineShaderStageCreateFlags,
3931 VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT,
3932 (VkPipeline) DE_NULL,
3933 localSizesToTest[0][0],
3934 localSizesToTest[0][1],
3935 localSizesToTest[0][2],
3936 isRequiredSubgroupSize ? subgroupSize : 0u)));
3937 }
3938 context.getTestContext().touchWatchdog();
3939
3940 for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
3941 {
3942 const deUint32 nextX = localSizesToTest[index][0];
3943 const deUint32 nextY = localSizesToTest[index][1];
3944 const deUint32 nextZ = localSizesToTest[index][2];
3945
3946 context.getTestContext().touchWatchdog();
3947 {
3948 pipelines[index] = de::SharedPtr<Move<VkPipeline>>(new Move<VkPipeline>(makeComputePipeline(context,
3949 *pipelineLayout,
3950 *shaderModule,
3951 pipelineShaderStageCreateFlags,
3952 VK_PIPELINE_CREATE_DERIVATIVE_BIT,
3953 **pipelines[0],
3954 nextX,
3955 nextY,
3956 nextZ,
3957 isRequiredSubgroupSize ? subgroupSize : 0u)));
3958 }
3959 context.getTestContext().touchWatchdog();
3960 }
3961
3962 for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
3963 {
3964 // we are running one test
3965 totalIterations++;
3966
3967 beginCommandBuffer(vk, *cmdBuffer);
3968 {
3969 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, **pipelines[index]);
3970
3971 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
3972
3973 vk.cmdDispatch(*cmdBuffer,numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
3974 }
3975 endCommandBuffer(vk, *cmdBuffer);
3976
3977 submitCommandsAndWait(vk, device, queue, *cmdBuffer);
3978
3979 std::vector<const void*> datas;
3980
3981 {
3982 const Allocation& resultAlloc = resultBuffer.getAllocation();
3983 invalidateAlloc(vk, device, resultAlloc);
3984
3985 // we always have our result data first
3986 datas.push_back(resultAlloc.getHostPtr());
3987 }
3988
3989 for (deUint32 i = 0; i < inputsCount; i++)
3990 {
3991 if (!inputBuffers[i]->isImage())
3992 {
3993 const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
3994 invalidateAlloc(vk, device, resultAlloc);
3995
3996 // we always have our result data first
3997 datas.push_back(resultAlloc.getHostPtr());
3998 }
3999 }
4000
4001 if (!checkResult(internalData, datas, numWorkgroups, localSizesToTest[index], subgroupSize))
4002 {
4003 failedIterations++;
4004 }
4005
4006 vk.resetCommandBuffer(*cmdBuffer, 0);
4007 }
4008
4009 if (0 < failedIterations)
4010 {
4011 unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
4012
4013 context.getTestContext().getLog()
4014 << TestLog::Message << valuesPassed << " / "
4015 << totalIterations << " values passed" << TestLog::EndMessage;
4016
4017 return tcu::TestStatus::fail("Failed!");
4018 }
4019
4020 return tcu::TestStatus::pass("OK");
4021 }
4022
makeComputeTest(Context & context,VkFormat format,const SSBOData * inputs,deUint32 inputsCount,const void * internalData,CheckResultCompute checkResult,deUint32 requiredSubgroupSize,const deUint32 pipelineShaderStageCreateFlags)4023 tcu::TestStatus vkt::subgroups::makeComputeTest (Context& context,
4024 VkFormat format,
4025 const SSBOData* inputs,
4026 deUint32 inputsCount,
4027 const void* internalData,
4028 CheckResultCompute checkResult,
4029 deUint32 requiredSubgroupSize,
4030 const deUint32 pipelineShaderStageCreateFlags)
4031 {
4032 const deUint32 numWorkgroups[3] = {4, 2, 2};
4033 deUint32 subgroupSize = requiredSubgroupSize;
4034
4035 if(requiredSubgroupSize == 0)
4036 subgroupSize = vkt::subgroups::getSubgroupSize(context);
4037
4038 const deUint32 localSizesToTestCount = 8;
4039 deUint32 localSizesToTest[localSizesToTestCount][3] =
4040 {
4041 {1, 1, 1},
4042 {subgroupSize, 1, 1},
4043 {1, subgroupSize, 1},
4044 {1, 1, subgroupSize},
4045 {32, 4, 1},
4046 {1, 4, 32},
4047 {3, 5, 7},
4048 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
4049 };
4050
4051 return makeComputeTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
4052 numWorkgroups, requiredSubgroupSize != 0u, subgroupSize, localSizesToTest, localSizesToTestCount);
4053 }
4054
checkShaderStageSetValidity(const VkShaderStageFlags shaderStages)4055 static inline void checkShaderStageSetValidity (const VkShaderStageFlags shaderStages)
4056 {
4057 if (shaderStages == 0)
4058 TCU_THROW(InternalError, "Shader stage is not specified");
4059
4060 // It can actually be only 1 or 0.
4061 const deUint32 exclusivePipelinesCount = (isAllComputeStages(shaderStages) ? 1 :0)
4062 + (isAllGraphicsStages(shaderStages) ? 1 :0)
4063 + (isAllRayTracingStages(shaderStages) ? 1 :0);
4064
4065 if (exclusivePipelinesCount != 1)
4066 TCU_THROW(InternalError, "Mix of shaders from different pipelines is detected");
4067 }
4068
supportedCheckShader(Context & context,const VkShaderStageFlags shaderStages)4069 void vkt::subgroups::supportedCheckShader (Context& context, const VkShaderStageFlags shaderStages)
4070 {
4071 checkShaderStageSetValidity(shaderStages);
4072
4073 if ((context.getSubgroupProperties().supportedStages & shaderStages) == 0)
4074 {
4075 if (isAllComputeStages(shaderStages))
4076 TCU_FAIL("Compute shader is required to support subgroup operations");
4077 else
4078 TCU_THROW(NotSupportedError, "Subgroup support is not available for test shader stage(s)");
4079 }
4080
4081 if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & shaderStages &&
4082 context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4083 !context.getPortabilitySubsetFeatures().tessellationIsolines)
4084 {
4085 TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4086 }
4087 }
4088
4089
4090 namespace vkt
4091 {
4092 namespace subgroups
4093 {
4094 typedef std::vector< de::SharedPtr<BufferOrImage> > vectorBufferOrImage;
4095
4096 enum ShaderGroups
4097 {
4098 FIRST_GROUP = 0,
4099 RAYGEN_GROUP = FIRST_GROUP,
4100 MISS_GROUP,
4101 HIT_GROUP,
4102 CALL_GROUP,
4103 GROUP_COUNT
4104 };
4105
getAllRayTracingFormats()4106 const std::vector<vk::VkFormat> getAllRayTracingFormats()
4107 {
4108 std::vector<VkFormat> formats;
4109
4110 formats.push_back(VK_FORMAT_R8G8B8_SINT);
4111 formats.push_back(VK_FORMAT_R8_UINT);
4112 formats.push_back(VK_FORMAT_R8G8B8A8_UINT);
4113 formats.push_back(VK_FORMAT_R16G16B16_SINT);
4114 formats.push_back(VK_FORMAT_R16_UINT);
4115 formats.push_back(VK_FORMAT_R16G16B16A16_UINT);
4116 formats.push_back(VK_FORMAT_R32G32B32_SINT);
4117 formats.push_back(VK_FORMAT_R32_UINT);
4118 formats.push_back(VK_FORMAT_R32G32B32A32_UINT);
4119 formats.push_back(VK_FORMAT_R64G64B64_SINT);
4120 formats.push_back(VK_FORMAT_R64_UINT);
4121 formats.push_back(VK_FORMAT_R64G64B64A64_UINT);
4122 formats.push_back(VK_FORMAT_R16G16B16A16_SFLOAT);
4123 formats.push_back(VK_FORMAT_R32_SFLOAT);
4124 formats.push_back(VK_FORMAT_R32G32B32A32_SFLOAT);
4125 formats.push_back(VK_FORMAT_R64_SFLOAT);
4126 formats.push_back(VK_FORMAT_R64G64B64_SFLOAT);
4127 formats.push_back(VK_FORMAT_R64G64B64A64_SFLOAT);
4128 formats.push_back(VK_FORMAT_R8_USCALED);
4129 formats.push_back(VK_FORMAT_R8G8_USCALED);
4130 formats.push_back(VK_FORMAT_R8G8B8_USCALED);
4131 formats.push_back(VK_FORMAT_R8G8B8A8_USCALED);
4132
4133 return formats;
4134 }
4135
addRayTracingNoSubgroupShader(SourceCollections & programCollection)4136 void addRayTracingNoSubgroupShader (SourceCollections& programCollection)
4137 {
4138 const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
4139
4140 const std::string rgenShaderNoSubgroups =
4141 "#version 460 core\n"
4142 "#extension GL_EXT_ray_tracing: require\n"
4143 "layout(location = 0) rayPayloadEXT uvec4 payload;\n"
4144 "layout(location = 0) callableDataEXT uvec4 callData;"
4145 "layout(set = 1, binding = 0) uniform accelerationStructureEXT topLevelAS;\n"
4146 "\n"
4147 "void main()\n"
4148 "{\n"
4149 " uint rayFlags = 0;\n"
4150 " uint cullMask = 0xFF;\n"
4151 " float tmin = 0.0;\n"
4152 " float tmax = 9.0;\n"
4153 " vec3 origin = vec3((float(gl_LaunchIDEXT.x) + 0.5f) / float(gl_LaunchSizeEXT.x), (float(gl_LaunchIDEXT.y) + 0.5f) / float(gl_LaunchSizeEXT.y), 0.0);\n"
4154 " vec3 directHit = vec3(0.0, 0.0, -1.0);\n"
4155 " vec3 directMiss = vec3(0.0, 0.0, +1.0);\n"
4156 "\n"
4157 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directHit, tmax, 0);\n"
4158 " traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, directMiss, tmax, 0);\n"
4159 " executeCallableEXT(0, 0);"
4160 "}\n";
4161 const std::string hitShaderNoSubgroups =
4162 "#version 460 core\n"
4163 "#extension GL_EXT_ray_tracing: require\n"
4164 "hitAttributeEXT vec3 attribs;\n"
4165 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4166 "\n"
4167 "void main()\n"
4168 "{\n"
4169 "}\n";
4170 const std::string missShaderNoSubgroups =
4171 "#version 460 core\n"
4172 "#extension GL_EXT_ray_tracing: require\n"
4173 "layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
4174 "\n"
4175 "void main()\n"
4176 "{\n"
4177 "}\n";
4178 const std::string sectShaderNoSubgroups =
4179 "#version 460 core\n"
4180 "#extension GL_EXT_ray_tracing: require\n"
4181 "hitAttributeEXT vec3 hitAttribute;\n"
4182 "\n"
4183 "void main()\n"
4184 "{\n"
4185 " reportIntersectionEXT(0.75f, gl_HitKindFrontFacingTriangleEXT);\n"
4186 "}\n";
4187 const std::string callShaderNoSubgroups =
4188 "#version 460 core\n"
4189 "#extension GL_EXT_ray_tracing: require\n"
4190 "layout(location = 0) callableDataInEXT float callData;\n"
4191 "\n"
4192 "void main()\n"
4193 "{\n"
4194 "}\n";
4195
4196 programCollection.glslSources.add("rgen_noSubgroup") << glu::RaygenSource (rgenShaderNoSubgroups) << buildOptions;
4197 programCollection.glslSources.add("ahit_noSubgroup") << glu::AnyHitSource (hitShaderNoSubgroups) << buildOptions;
4198 programCollection.glslSources.add("chit_noSubgroup") << glu::ClosestHitSource (hitShaderNoSubgroups) << buildOptions;
4199 programCollection.glslSources.add("miss_noSubgroup") << glu::MissSource (missShaderNoSubgroups) << buildOptions;
4200 programCollection.glslSources.add("sect_noSubgroup") << glu::IntersectionSource (sectShaderNoSubgroups) << buildOptions;
4201 programCollection.glslSources.add("call_noSubgroup") << glu::CallableSource (callShaderNoSubgroups) << buildOptions;
4202 }
4203
enumerateRayTracingShaderStages(const VkShaderStageFlags shaderStage)4204 static vector<VkShaderStageFlagBits> enumerateRayTracingShaderStages (const VkShaderStageFlags shaderStage)
4205 {
4206 vector<VkShaderStageFlagBits> result;
4207 const VkShaderStageFlagBits shaderStageFlags[] =
4208 {
4209 VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4210 VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4211 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4212 VK_SHADER_STAGE_MISS_BIT_KHR,
4213 VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4214 VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4215 };
4216
4217 for (auto shaderStageFlag: shaderStageFlags)
4218 {
4219 if (0 != (shaderStage & shaderStageFlag))
4220 result.push_back(shaderStageFlag);
4221 }
4222
4223 return result;
4224 }
4225
getRayTracingResultBinding(const VkShaderStageFlagBits shaderStage)4226 static deUint32 getRayTracingResultBinding (const VkShaderStageFlagBits shaderStage)
4227 {
4228 const VkShaderStageFlags shaderStageFlags[] =
4229 {
4230 VK_SHADER_STAGE_RAYGEN_BIT_KHR,
4231 VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
4232 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4233 VK_SHADER_STAGE_MISS_BIT_KHR,
4234 VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4235 VK_SHADER_STAGE_CALLABLE_BIT_KHR,
4236 };
4237
4238 for (deUint32 shaderStageNdx = 0; shaderStageNdx < DE_LENGTH_OF_ARRAY(shaderStageFlags); ++shaderStageNdx)
4239 {
4240 if (0 != (shaderStage & shaderStageFlags[shaderStageNdx]))
4241 {
4242 DE_ASSERT(0 == (shaderStage & (~shaderStageFlags[shaderStageNdx])));
4243
4244 return shaderStageNdx;
4245 }
4246 }
4247
4248 TCU_THROW(InternalError, "Non-raytracing stage specified or no stage at all");
4249 }
4250
makeRayTracingInputBuffers(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector)4251 static vectorBufferOrImage makeRayTracingInputBuffers (Context& context,
4252 VkFormat format,
4253 const SSBOData* extraDatas,
4254 deUint32 extraDatasCount,
4255 const vector<VkShaderStageFlagBits>& stagesVector)
4256 {
4257 const size_t stagesCount = stagesVector.size();
4258 const VkDeviceSize shaderSize = getMaxWidth();
4259 const VkDeviceSize inputBufferSize = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
4260 vectorBufferOrImage inputBuffers (stagesCount + extraDatasCount);
4261
4262 // The implicit result SSBO we use to store our outputs from the shader
4263 for (size_t stageNdx = 0u; stageNdx < stagesCount; ++stageNdx)
4264 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, inputBufferSize));
4265
4266 for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4267 {
4268 const size_t datasNdx = stageNdx - stagesCount;
4269
4270 if (extraDatas[datasNdx].isImage)
4271 {
4272 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
4273 }
4274 else
4275 {
4276 const VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
4277
4278 inputBuffers[stageNdx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
4279 }
4280
4281 initializeMemory(context, inputBuffers[stageNdx]->getAllocation(), extraDatas[datasNdx]);
4282 }
4283
4284 return inputBuffers;
4285 }
4286
makeRayTracingDescriptorSetLayout(Context & context,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4287 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayout (Context& context,
4288 const SSBOData* extraDatas,
4289 deUint32 extraDatasCount,
4290 const vector<VkShaderStageFlagBits>& stagesVector,
4291 const vectorBufferOrImage& inputBuffers)
4292 {
4293 const DeviceInterface& vkd = context.getDeviceInterface();
4294 const VkDevice device = context.getDevice();
4295 const size_t stagesCount = stagesVector.size();
4296 DescriptorSetLayoutBuilder layoutBuilder;
4297
4298 // The implicit result SSBO we use to store our outputs from the shader
4299 for (size_t stageNdx = 0u; stageNdx < stagesVector.size(); ++stageNdx)
4300 {
4301 const deUint32 stageBinding = getRayTracingResultBinding(stagesVector[stageNdx]);
4302
4303 layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, stagesVector[stageNdx], stageBinding, DE_NULL);
4304 }
4305
4306 for (size_t stageNdx = stagesCount; stageNdx < stagesCount + extraDatasCount; ++stageNdx)
4307 {
4308 const size_t datasNdx = stageNdx - stagesCount;
4309
4310 layoutBuilder.addIndexedBinding(inputBuffers[stageNdx]->getType(), 1, extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
4311 }
4312
4313 return layoutBuilder.build(vkd, device);
4314 }
4315
makeRayTracingDescriptorSetLayoutAS(Context & context)4316 static Move<VkDescriptorSetLayout> makeRayTracingDescriptorSetLayoutAS (Context& context)
4317 {
4318 const DeviceInterface& vkd = context.getDeviceInterface();
4319 const VkDevice device = context.getDevice();
4320 DescriptorSetLayoutBuilder layoutBuilder;
4321
4322 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
4323
4324 return layoutBuilder.build(vkd, device);
4325 }
4326
makeRayTracingDescriptorPool(Context & context,const vectorBufferOrImage & inputBuffers)4327 static Move<VkDescriptorPool> makeRayTracingDescriptorPool (Context& context,
4328 const vectorBufferOrImage& inputBuffers)
4329 {
4330 const DeviceInterface& vkd = context.getDeviceInterface();
4331 const VkDevice device = context.getDevice();
4332 const deUint32 maxDescriptorSets = 2u;
4333 DescriptorPoolBuilder poolBuilder;
4334 Move<VkDescriptorPool> result;
4335
4336 if (inputBuffers.size() > 0)
4337 {
4338 for (size_t ndx = 0u; ndx < inputBuffers.size(); ndx++)
4339 poolBuilder.addType(inputBuffers[ndx]->getType());
4340 }
4341
4342 poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
4343
4344 result = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, maxDescriptorSets);
4345
4346 return result;
4347 }
4348
makeRayTracingDescriptorSet(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,const SSBOData * extraDatas,deUint32 extraDatasCount,const vector<VkShaderStageFlagBits> & stagesVector,const vectorBufferOrImage & inputBuffers)4349 static Move<VkDescriptorSet> makeRayTracingDescriptorSet (Context& context,
4350 VkDescriptorPool descriptorPool,
4351 VkDescriptorSetLayout descriptorSetLayout,
4352 const SSBOData* extraDatas,
4353 deUint32 extraDatasCount,
4354 const vector<VkShaderStageFlagBits>& stagesVector,
4355 const vectorBufferOrImage& inputBuffers)
4356 {
4357 const DeviceInterface& vkd = context.getDeviceInterface();
4358 const VkDevice device = context.getDevice();
4359 const size_t stagesCount = stagesVector.size();
4360 Move<VkDescriptorSet> descriptorSet;
4361
4362 if (inputBuffers.size() > 0)
4363 {
4364 DescriptorSetUpdateBuilder updateBuilder;
4365
4366 // Create descriptor set
4367 descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4368
4369 for (size_t ndx = 0u; ndx < stagesCount + extraDatasCount; ndx++)
4370 {
4371 const deUint32 binding = (ndx < stagesCount)
4372 ? getRayTracingResultBinding(stagesVector[ndx])
4373 : extraDatas[ndx - stagesCount].binding;
4374
4375 if (inputBuffers[ndx]->isImage())
4376 {
4377 const VkDescriptorImageInfo info = makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(), inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
4378
4379 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4380 }
4381 else
4382 {
4383 const VkDescriptorBufferInfo info = makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(), 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
4384
4385 updateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(binding), inputBuffers[ndx]->getType(), &info);
4386 }
4387 }
4388
4389 updateBuilder.update(vkd, device);
4390 }
4391
4392 return descriptorSet;
4393 }
4394
makeRayTracingDescriptorSetAS(Context & context,VkDescriptorPool descriptorPool,VkDescriptorSetLayout descriptorSetLayout,de::MovePtr<TopLevelAccelerationStructure> & topLevelAccelerationStructure)4395 static Move<VkDescriptorSet> makeRayTracingDescriptorSetAS (Context& context,
4396 VkDescriptorPool descriptorPool,
4397 VkDescriptorSetLayout descriptorSetLayout,
4398 de::MovePtr<TopLevelAccelerationStructure>& topLevelAccelerationStructure)
4399 {
4400 const DeviceInterface& vkd = context.getDeviceInterface();
4401 const VkDevice device = context.getDevice();
4402 const TopLevelAccelerationStructure* topLevelAccelerationStructurePtr = topLevelAccelerationStructure.get();
4403 const VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet =
4404 {
4405 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, // VkStructureType sType;
4406 DE_NULL, // const void* pNext;
4407 1u, // deUint32 accelerationStructureCount;
4408 topLevelAccelerationStructurePtr->getPtr(), // const VkAccelerationStructureKHR* pAccelerationStructures;
4409 };
4410 Move<VkDescriptorSet> descriptorSet = makeDescriptorSet(vkd, device, descriptorPool, descriptorSetLayout);
4411
4412 DescriptorSetUpdateBuilder()
4413 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
4414 .update(vkd, device);
4415
4416 return descriptorSet;
4417 }
4418
makeRayTracingPipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout0,const VkDescriptorSetLayout descriptorSetLayout1)4419 static Move<VkPipelineLayout> makeRayTracingPipelineLayout (Context& context,
4420 const VkDescriptorSetLayout descriptorSetLayout0,
4421 const VkDescriptorSetLayout descriptorSetLayout1)
4422 {
4423 const DeviceInterface& vkd = context.getDeviceInterface();
4424 const VkDevice device = context.getDevice();
4425 const std::vector<VkDescriptorSetLayout> descriptorSetLayouts { descriptorSetLayout0, descriptorSetLayout1 };
4426 const deUint32 descriptorSetLayoutsSize = static_cast<deUint32>(descriptorSetLayouts.size());
4427
4428 return makePipelineLayout(vkd, device, descriptorSetLayoutsSize, descriptorSetLayouts.data());
4429 }
4430
createTopAccelerationStructure(Context & context,de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)4431 static de::MovePtr<TopLevelAccelerationStructure> createTopAccelerationStructure (Context& context,
4432 de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure)
4433 {
4434 const DeviceInterface& vkd = context.getDeviceInterface();
4435 const VkDevice device = context.getDevice();
4436 Allocator& allocator = context.getDefaultAllocator();
4437 de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
4438
4439 result->setInstanceCount(1);
4440 result->addInstance(bottomLevelAccelerationStructure);
4441 result->create(vkd, device, allocator);
4442
4443 return result;
4444 }
4445
createBottomAccelerationStructure(Context & context)4446 static de::SharedPtr<BottomLevelAccelerationStructure> createBottomAccelerationStructure (Context& context)
4447 {
4448 const DeviceInterface& vkd = context.getDeviceInterface();
4449 const VkDevice device = context.getDevice();
4450 Allocator& allocator = context.getDefaultAllocator();
4451 de::MovePtr<BottomLevelAccelerationStructure> result = makeBottomLevelAccelerationStructure();
4452 const std::vector<tcu::Vec3> geometryData { tcu::Vec3(-1.0f, -1.0f, -2.0f), tcu::Vec3(+1.0f, +1.0f, -1.0f) };
4453
4454 result->setGeometryCount(1u);
4455 result->addGeometry(geometryData, false);
4456 result->create(vkd, device, allocator, 0u);
4457
4458 return de::SharedPtr<BottomLevelAccelerationStructure>(result.release());
4459 }
4460
makeRayTracingPipeline(Context & context,const VkShaderStageFlags shaderStageTested,const VkPipelineLayout pipelineLayout,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6],Move<VkPipeline> & pipelineOut)4461 static de::MovePtr<RayTracingPipeline> makeRayTracingPipeline (Context& context,
4462 const VkShaderStageFlags shaderStageTested,
4463 const VkPipelineLayout pipelineLayout,
4464 const deUint32 shaderStageCreateFlags[6],
4465 const deUint32 requiredSubgroupSize[6],
4466 Move<VkPipeline>& pipelineOut)
4467 {
4468 const DeviceInterface& vkd = context.getDeviceInterface();
4469 const VkDevice device = context.getDevice();
4470 BinaryCollection& collection = context.getBinaryCollection();
4471 const char* shaderRgenName = (0 != (shaderStageTested & VK_SHADER_STAGE_RAYGEN_BIT_KHR)) ? "rgen" : "rgen_noSubgroup";
4472 const char* shaderAhitName = (0 != (shaderStageTested & VK_SHADER_STAGE_ANY_HIT_BIT_KHR)) ? "ahit" : "ahit_noSubgroup";
4473 const char* shaderChitName = (0 != (shaderStageTested & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)) ? "chit" : "chit_noSubgroup";
4474 const char* shaderMissName = (0 != (shaderStageTested & VK_SHADER_STAGE_MISS_BIT_KHR)) ? "miss" : "miss_noSubgroup";
4475 const char* shaderSectName = (0 != (shaderStageTested & VK_SHADER_STAGE_INTERSECTION_BIT_KHR)) ? "sect" : "sect_noSubgroup";
4476 const char* shaderCallName = (0 != (shaderStageTested & VK_SHADER_STAGE_CALLABLE_BIT_KHR)) ? "call" : "call_noSubgroup";
4477 const VkShaderModuleCreateFlags noShaderModuleCreateFlags = static_cast<VkShaderModuleCreateFlags>(0);
4478 Move<VkShaderModule> rgenShaderModule = createShaderModule(vkd, device, collection.get(shaderRgenName), noShaderModuleCreateFlags);
4479 Move<VkShaderModule> ahitShaderModule = createShaderModule(vkd, device, collection.get(shaderAhitName), noShaderModuleCreateFlags);
4480 Move<VkShaderModule> chitShaderModule = createShaderModule(vkd, device, collection.get(shaderChitName), noShaderModuleCreateFlags);
4481 Move<VkShaderModule> missShaderModule = createShaderModule(vkd, device, collection.get(shaderMissName), noShaderModuleCreateFlags);
4482 Move<VkShaderModule> sectShaderModule = createShaderModule(vkd, device, collection.get(shaderSectName), noShaderModuleCreateFlags);
4483 Move<VkShaderModule> callShaderModule = createShaderModule(vkd, device, collection.get(shaderCallName), noShaderModuleCreateFlags);
4484 const VkPipelineShaderStageCreateFlags noPipelineShaderStageCreateFlags = static_cast<VkPipelineShaderStageCreateFlags>(0);
4485 const VkPipelineShaderStageCreateFlags rgenPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[0];
4486 const VkPipelineShaderStageCreateFlags ahitPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[1];
4487 const VkPipelineShaderStageCreateFlags chitPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[2];
4488 const VkPipelineShaderStageCreateFlags missPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[3];
4489 const VkPipelineShaderStageCreateFlags sectPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[4];
4490 const VkPipelineShaderStageCreateFlags callPipelineShaderStageCreateFlags = (shaderStageCreateFlags == DE_NULL) ? noPipelineShaderStageCreateFlags : shaderStageCreateFlags[5];
4491 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[6] =
4492 {
4493 {
4494 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4495 DE_NULL,
4496 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
4497 },
4498 {
4499 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4500 DE_NULL,
4501 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
4502 },
4503 {
4504 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4505 DE_NULL,
4506 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
4507 },
4508 {
4509 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4510 DE_NULL,
4511 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
4512 },
4513 {
4514 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4515 DE_NULL,
4516 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
4517 },
4518 {
4519 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
4520 DE_NULL,
4521 requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[5] : 0u,
4522 },
4523 };
4524 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* rgenRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[0];
4525 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* ahitRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[1];
4526 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* chitRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[2];
4527 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* missRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[3];
4528 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* sectRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[4];
4529 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT* callRequiredSubgroupSizeCreateInfo = (requiredSubgroupSizeCreateInfo[5].requiredSubgroupSize == 0) ? DE_NULL : &requiredSubgroupSizeCreateInfo[5];
4530 de::MovePtr<RayTracingPipeline> rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
4531
4532 rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR , rgenShaderModule, RAYGEN_GROUP, DE_NULL, rgenPipelineShaderStageCreateFlags, rgenRequiredSubgroupSizeCreateInfo);
4533 rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR , ahitShaderModule, HIT_GROUP, DE_NULL, ahitPipelineShaderStageCreateFlags, ahitRequiredSubgroupSizeCreateInfo);
4534 rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR , chitShaderModule, HIT_GROUP, DE_NULL, chitPipelineShaderStageCreateFlags, chitRequiredSubgroupSizeCreateInfo);
4535 rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR , missShaderModule, MISS_GROUP, DE_NULL, missPipelineShaderStageCreateFlags, missRequiredSubgroupSizeCreateInfo);
4536 rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR , sectShaderModule, HIT_GROUP, DE_NULL, sectPipelineShaderStageCreateFlags, sectRequiredSubgroupSizeCreateInfo);
4537 rayTracingPipeline->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR , callShaderModule, CALL_GROUP, DE_NULL, callPipelineShaderStageCreateFlags, callRequiredSubgroupSizeCreateInfo);
4538
4539 // Must execute createPipeline here, due to pNext pointers in calls to addShader are local
4540 pipelineOut = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout);
4541
4542 return rayTracingPipeline;
4543 }
4544
getPossibleRayTracingSubgroupStages(Context & context,const VkShaderStageFlags testedStages)4545 VkShaderStageFlags getPossibleRayTracingSubgroupStages (Context& context, const VkShaderStageFlags testedStages)
4546 {
4547 const VkPhysicalDeviceSubgroupProperties& subgroupProperties = context.getSubgroupProperties();
4548 const VkShaderStageFlags stages = testedStages & subgroupProperties.supportedStages;
4549
4550 DE_ASSERT(isAllRayTracingStages(testedStages));
4551
4552 return stages;
4553 }
4554
allRayTracingStages(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDataCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStage)4555 tcu::TestStatus allRayTracingStages (Context& context,
4556 VkFormat format,
4557 const SSBOData* extraDatas,
4558 deUint32 extraDataCount,
4559 const void* internalData,
4560 const VerificationFunctor& checkResult,
4561 const VkShaderStageFlags shaderStage)
4562 {
4563 return vkt::subgroups::allRayTracingStagesRequiredSubgroupSize(context,
4564 format,
4565 extraDatas,
4566 extraDataCount,
4567 internalData,
4568 checkResult,
4569 shaderStage,
4570 DE_NULL,
4571 DE_NULL);
4572 }
4573
allRayTracingStagesRequiredSubgroupSize(Context & context,VkFormat format,const SSBOData * extraDatas,deUint32 extraDatasCount,const void * internalData,const VerificationFunctor & checkResult,const VkShaderStageFlags shaderStageTested,const deUint32 shaderStageCreateFlags[6],const deUint32 requiredSubgroupSize[6])4574 tcu::TestStatus allRayTracingStagesRequiredSubgroupSize (Context& context,
4575 VkFormat format,
4576 const SSBOData* extraDatas,
4577 deUint32 extraDatasCount,
4578 const void* internalData,
4579 const VerificationFunctor& checkResult,
4580 const VkShaderStageFlags shaderStageTested,
4581 const deUint32 shaderStageCreateFlags[6],
4582 const deUint32 requiredSubgroupSize[6])
4583 {
4584 const DeviceInterface& vkd = context.getDeviceInterface();
4585 const VkDevice device = context.getDevice();
4586 const VkQueue queue = context.getUniversalQueue();
4587 const deUint32 queueFamilyIndex = context.getUniversalQueueFamilyIndex();
4588 Allocator& allocator = context.getDefaultAllocator();
4589 const deUint32 subgroupSize = getSubgroupSize(context);
4590 const deUint32 maxWidth = getMaxWidth();
4591 const vector<VkShaderStageFlagBits> stagesVector = enumerateRayTracingShaderStages(shaderStageTested);
4592 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
4593 de::SharedPtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure = createBottomAccelerationStructure(context);
4594 de::MovePtr<TopLevelAccelerationStructure> topLevelAccelerationStructure = createTopAccelerationStructure(context, bottomLevelAccelerationStructure);
4595 vectorBufferOrImage inputBuffers = makeRayTracingInputBuffers(context, format, extraDatas, extraDatasCount, stagesVector);
4596 const Move<VkDescriptorSetLayout> descriptorSetLayout = makeRayTracingDescriptorSetLayout(context, extraDatas, extraDatasCount, stagesVector, inputBuffers);
4597 const Move<VkDescriptorSetLayout> descriptorSetLayoutAS = makeRayTracingDescriptorSetLayoutAS(context);
4598 const Move<VkPipelineLayout> pipelineLayout = makeRayTracingPipelineLayout(context, *descriptorSetLayout, *descriptorSetLayoutAS);
4599 Move<VkPipeline> pipeline = Move<VkPipeline>();
4600 const de::MovePtr<RayTracingPipeline> rayTracingPipeline = makeRayTracingPipeline(context, shaderStageTested, *pipelineLayout, shaderStageCreateFlags, requiredSubgroupSize, pipeline);
4601 const deUint32 shaderGroupHandleSize = context.getRayTracingPipelineProperties().shaderGroupHandleSize;
4602 const deUint32 shaderGroupBaseAlignment = context.getRayTracingPipelineProperties().shaderGroupBaseAlignment;
4603 de::MovePtr<BufferWithMemory> rgenShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, RAYGEN_GROUP, 1u);
4604 de::MovePtr<BufferWithMemory> missShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, MISS_GROUP, 1u);
4605 de::MovePtr<BufferWithMemory> hitsShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, HIT_GROUP, 1u);
4606 de::MovePtr<BufferWithMemory> callShaderBindingTable = rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, CALL_GROUP, 1u);
4607 const VkStridedDeviceAddressRegionKHR rgenShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, rgenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
4608 const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
4609 const VkStridedDeviceAddressRegionKHR hitsShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitsShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
4610 const VkStridedDeviceAddressRegionKHR callShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, callShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
4611 const Move<VkDescriptorPool> descriptorPool = makeRayTracingDescriptorPool(context, inputBuffers);
4612 const Move<VkDescriptorSet> descriptorSet = makeRayTracingDescriptorSet(context, *descriptorPool, *descriptorSetLayout, extraDatas, extraDatasCount, stagesVector, inputBuffers);
4613 const Move<VkDescriptorSet> descriptorSetAS = makeRayTracingDescriptorSetAS(context, *descriptorPool, *descriptorSetLayoutAS, topLevelAccelerationStructure);
4614 const Move<VkCommandPool> cmdPool = makeCommandPool(vkd, device, queueFamilyIndex);
4615 const Move<VkCommandBuffer> cmdBuffer = makeCommandBuffer(context, *cmdPool);
4616 deUint32 passIterations = 0u;
4617 deUint32 failIterations = 0u;
4618
4619 DE_ASSERT(shaderStageTested != 0);
4620
4621 for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
4622 {
4623
4624 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
4625 {
4626 // re-init the data
4627 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
4628
4629 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
4630 }
4631
4632 beginCommandBuffer(vkd, *cmdBuffer);
4633 {
4634 vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
4635
4636 bottomLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
4637 topLevelAccelerationStructure->build(vkd, device, *cmdBuffer);
4638
4639 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 1u, 1u, &descriptorSetAS.get(), 0u, DE_NULL);
4640
4641 if (stagesCount + extraDatasCount > 0)
4642 vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
4643
4644 cmdTraceRays(vkd,
4645 *cmdBuffer,
4646 &rgenShaderBindingTableRegion,
4647 &missShaderBindingTableRegion,
4648 &hitsShaderBindingTableRegion,
4649 &callShaderBindingTableRegion,
4650 width, 1, 1);
4651
4652 const VkMemoryBarrier postTraceMemoryBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
4653 cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_HOST_BIT, &postTraceMemoryBarrier);
4654 }
4655 endCommandBuffer(vkd, *cmdBuffer);
4656
4657 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
4658
4659 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
4660 {
4661 std::vector<const void*> datas;
4662
4663 if (!inputBuffers[ndx]->isImage())
4664 {
4665 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
4666
4667 invalidateAlloc(vkd, device, resultAlloc);
4668
4669 // we always have our result data first
4670 datas.push_back(resultAlloc.getHostPtr());
4671 }
4672
4673 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
4674 {
4675 const deUint32 datasNdx = index - stagesCount;
4676
4677 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
4678 {
4679 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
4680
4681 invalidateAlloc(vkd, device, resultAlloc);
4682
4683 // we always have our result data first
4684 datas.push_back(resultAlloc.getHostPtr());
4685 }
4686 }
4687
4688 if (!checkResult(internalData, datas, width, subgroupSize, false))
4689 failIterations++;
4690 else
4691 passIterations++;
4692 }
4693
4694 vkd.resetCommandBuffer(*cmdBuffer, 0);
4695 }
4696
4697 if (failIterations > 0 || passIterations == 0)
4698 return tcu::TestStatus::fail("Failed " + de::toString(failIterations) + " out of " + de::toString(failIterations + passIterations) + " iterations.");
4699 else
4700 return tcu::TestStatus::pass("OK");
4701 }
4702 } // namespace subgroups
4703 } // nsamespace vkt
4704