/*------------------------------------------------------------------------ * Vulkan Conformance Tests * ------------------------ * * Copyright (c) 2021 Google LLC. * * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *//*! * \file * \brief Tests that compute shaders have a subgroup size that is uniform in * command scope. *//*--------------------------------------------------------------------*/ #include "deUniquePtr.hpp" #include "vkRef.hpp" #include "vkRefUtil.hpp" #include "vkPrograms.hpp" #include "vkMemUtil.hpp" #include "vkBuilderUtil.hpp" #include "vkCmdUtil.hpp" #include "vkObjUtil.hpp" #include "vkTypeUtil.hpp" #include "vkImageWithMemory.hpp" #include "vkBarrierUtil.hpp" #include "vktTestCaseUtil.hpp" using namespace vk; namespace vkt { namespace subgroups { namespace { using std::vector; using de::MovePtr; class MultipleDispatchesUniformSubgroupSizeInstance : public TestInstance { public: MultipleDispatchesUniformSubgroupSizeInstance (Context& context); tcu::TestStatus iterate (void); }; MultipleDispatchesUniformSubgroupSizeInstance::MultipleDispatchesUniformSubgroupSizeInstance (Context& context) :TestInstance (context) { } tcu::TestStatus MultipleDispatchesUniformSubgroupSizeInstance::iterate (void) { const DeviceInterface& vk = m_context.getDeviceInterface(); const VkDevice device = m_context.getDevice(); Allocator& allocator = m_context.getDefaultAllocator(); const VkQueue queue = m_context.getUniversalQueue(); const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex(); const Move cmdPool = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex); const Move cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY); Move computeShader = createShaderModule (vk, device, m_context.getBinaryCollection().get("comp"), 0u); // The number of invocations in a workgroup. const deUint32 maxLocalSize = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0]; // Create a storage buffer to hold the sizes of subgroups. const VkDeviceSize bufferSize = maxLocalSize * 2 * sizeof(deUint32); const VkBufferCreateInfo resultBufferCreateInfo = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); Move resultBuffer = createBuffer(vk, device, &resultBufferCreateInfo); MovePtr resultBufferMemory = allocator.allocate(getBufferMemoryRequirements(vk, device, *resultBuffer), MemoryRequirement::HostVisible); VK_CHECK(vk.bindBufferMemory(device, *resultBuffer, resultBufferMemory->getMemory(), resultBufferMemory->getOffset())); // Build descriptors for the storage buffer const Unique descriptorPool (DescriptorPoolBuilder().addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u)); const auto descriptorSetLayout1 (DescriptorSetLayoutBuilder().addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, VK_SHADER_STAGE_COMPUTE_BIT) .build(vk, device)); const VkDescriptorBufferInfo resultInfo = makeDescriptorBufferInfo(*resultBuffer, 0u, (VkDeviceSize) bufferSize - maxLocalSize * sizeof(deUint32)); const VkDescriptorSetAllocateInfo allocInfo = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // sType DE_NULL, // pNext *descriptorPool, // descriptorPool 1u, // descriptorSetCount &(*descriptorSetLayout1) // pSetLayouts }; Move descriptorSet = allocateDescriptorSet(vk, device, &allocInfo); DescriptorSetUpdateBuilder builder; builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, &resultInfo); builder.update(vk, device); // Compute pipeline const Move computePipelineLayout = makePipelineLayout (vk, device, *descriptorSetLayout1); for (deUint32 localSize1 = 8; localSize1 < maxLocalSize + 1; localSize1 *= 2) { for (deUint32 localSize2 = 8; localSize2 < maxLocalSize + 1; localSize2 *= 2) { // On each iteration, change the number of invocations which might affect // the subgroup size if the driver doesn't behave as expected. const VkSpecializationMapEntry entries = { 0u, // deUint32 constantID; 0u, // deUint32 offset; sizeof(localSize1) // size_t size; }; const VkSpecializationInfo specInfo = { 1, // mapEntryCount &entries, // pMapEntries sizeof(localSize1), // dataSize &localSize1 // pData }; const VkSpecializationInfo specInfo2 = { 1, // mapEntryCount &entries, // pMapEntries sizeof(localSize2), // dataSize &localSize2 // pData }; const VkPipelineShaderStageCreateInfo shaderStageCreateInfo = { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType DE_NULL, // pNext VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, // flags VK_SHADER_STAGE_COMPUTE_BIT, // stage *computeShader, // module "main", // pName &specInfo, // pSpecializationInfo }; const VkPipelineShaderStageCreateInfo shaderStageCreateInfo2 = { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType DE_NULL, // pNext VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, // flags VK_SHADER_STAGE_COMPUTE_BIT, // stage *computeShader, // module "main", // pName &specInfo2, // pSpecializationInfo }; const VkComputePipelineCreateInfo pipelineCreateInfo = { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType DE_NULL, // pNext 0u, // flags shaderStageCreateInfo, // stage *computePipelineLayout, // layout (VkPipeline) 0, // basePipelineHandle 0u, // basePipelineIndex }; const VkComputePipelineCreateInfo pipelineCreateInfo2 = { VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType DE_NULL, // pNext 0u, // flags shaderStageCreateInfo2, // stage *computePipelineLayout, // layout (VkPipeline) 0, // basePipelineHandle 0u, // basePipelineIndex }; Move computePipeline = createComputePipeline(vk, device, (VkPipelineCache) 0u, &pipelineCreateInfo); Move computePipeline2 = createComputePipeline(vk, device, (VkPipelineCache) 0u, &pipelineCreateInfo2); beginCommandBuffer(vk, *cmdBuffer); // Clears the values written on the previous iteration. vk.cmdFillBuffer(*cmdBuffer, *resultBuffer, 0u, VK_WHOLE_SIZE, 0); const deUint32 zero = 0u; vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u, &descriptorSet.get(), 1, &zero); vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline); vk.cmdDispatch(*cmdBuffer, 1, 1, 1); const auto barrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT, *resultBuffer, 0ull, bufferSize); vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags) 0, 0, (const VkMemoryBarrier *) DE_NULL, 1, &barrier, 0, (const VkImageMemoryBarrier *) DE_NULL); const deUint32 offset = static_cast(maxLocalSize * sizeof(deUint32)); vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u, &descriptorSet.get(), 1u, &offset); vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline2); vk.cmdDispatch(*cmdBuffer, 1, 1, 1); endCommandBuffer(vk, *cmdBuffer); submitCommandsAndWait(vk, device, queue, *cmdBuffer); invalidateAlloc(vk, device, *resultBufferMemory); const deUint32 *res = static_cast(resultBufferMemory->getHostPtr()); deUint32 size = 0; // Search for the first nonzero size. Then go through the data of both pipelines and check that // the first nonzero size matches with other nonzero values. for (deUint32 i = 0; i < maxLocalSize; i++) { if (res[i] != 0) { size = res[i]; break; } } // Subgroup size is guaranteed to be at least 1. DE_ASSERT(size > 0); for (deUint32 i = 0; i < maxLocalSize * 2; i++) { if (size != res[i] && res[i] != 0) return tcu::TestStatus::fail("Subgroup size not uniform in command scope. " + std::to_string(res[i]) + " != " + std::to_string(size)); } } } return tcu::TestStatus::pass("pass"); } class MultipleDispatchesUniformSubgroupSize : public TestCase { public: MultipleDispatchesUniformSubgroupSize (tcu::TestContext& testCtx, const std::string& name, const std::string& description); void initPrograms (SourceCollections& programCollection) const; TestInstance* createInstance (Context& context) const; virtual void checkSupport (Context& context) const; }; MultipleDispatchesUniformSubgroupSize::MultipleDispatchesUniformSubgroupSize (tcu::TestContext& testCtx, const std::string& name, const std::string& description) : TestCase (testCtx, name, description) { } void MultipleDispatchesUniformSubgroupSize::checkSupport (Context& context) const { const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT& subgroupSizeControlFeatures = context.getSubgroupSizeControlFeaturesEXT(); if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE) TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes"); } void MultipleDispatchesUniformSubgroupSize::initPrograms (SourceCollections& programCollection) const { std::ostringstream computeSrc; computeSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n" << "#extension GL_KHR_shader_subgroup_basic : enable\n" << "#extension GL_KHR_shader_subgroup_vote : enable\n" << "#extension GL_KHR_shader_subgroup_ballot : enable\n" << "layout(std430, binding = 0) buffer Outputs { uint sizes[]; };\n" << "layout(local_size_x_id = 0) in;\n" << "void main()\n" << "{\n" << " if (subgroupElect())\n" << " {\n" << " sizes[gl_WorkGroupID.x * gl_NumSubgroups + gl_SubgroupID] = gl_SubgroupSize;\n" << " }\n" << "}\n"; programCollection.glslSources.add("comp") << glu::ComputeSource(computeSrc.str()) << ShaderBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u); } TestInstance* MultipleDispatchesUniformSubgroupSize::createInstance (Context& context) const { return new MultipleDispatchesUniformSubgroupSizeInstance(context); } } // anonymous ns tcu::TestCaseGroup* createMultipleDispatchesUniformSubgroupSizeTests (tcu::TestContext& testCtx) { de::MovePtr testGroup(new tcu::TestCaseGroup(testCtx, "multiple_dispatches", "Multiple dispatches uniform subgroup size tests")); testGroup->addChild(new MultipleDispatchesUniformSubgroupSize(testCtx, "uniform_subgroup_size", "")); return testGroup.release(); } } // compute } // vkt