1// Copyright (c) 2020-2023 Huawei Technologies Co. Ltd. 2// 3// SPDX-License-Identifier: CC-BY-4.0 4 5include::{generated}/meta/{refprefix}VK_HUAWEI_cluster_culling_shader.adoc[] 6 7=== Other Extension Metadata 8 9*Last Modified Date*:: 10 2023-08-16 11*Interactions and External Dependencies*:: 12 - This extension provides API support for 13 {GLSLregistry}/huawei/GLSL_HUAWEI_cluster_culling_shader.txt[`GL_HUAWEI_cluster_culling_shader`]. 14*Contributors*:: 15 - Yuchang Wang, Huawei 16 - Juntao Li, Huawei 17 - Pan Gao, Huawei 18 - Jie Cao, Huawei 19 - Yunjin Zhang, Huawei 20 - Shujie Zhou, Huawei 21 - Chaojun Wang, Huawei 22 - Jiajun Hu, Huawei 23 - Cong Zhang, Huawei 24 25=== Description 26 27Cluster Culling Shaders (CCS) are similar to the existing compute shaders. 28Their main purpose is to provide an execution environment in order to 29perform coarse-level geometry culling and LOD selection more efficiently on 30the GPU. 31 32The traditional 2-pass GPU culling solution using a compute shader sometimes 33needs a pipeline barrier between compute and graphics pipeline to optimize 34performance. 35An additional compaction process may also be required. 36This extension addresses these shortcomings, allowing compute shaders to 37directly emit visible clusters to the following graphics pipeline. 38 39A set of new built-in output variables are used to express a visible 40cluster, including per-cluster shading rate. 41In addition, a new built-in function is used to emit these variables from 42CCS to the IA stage. 43The IA stage can use these variables to fetches vertices of a visible 44cluster and drive vertex shaders to shading these vertices. 45 46Note that CCS do not work with geometry or tessellation shaders, but both IA 47and vertex shaders are preserved. 48Vertex shaders are still used for vertex position shading, instead of 49directly outputting transformed vertices from the compute shader. 50This makes CCS more suitable for mobile GPUs. 51 52include::{generated}/interfaces/VK_HUAWEI_cluster_culling_shader.adoc[] 53 54=== New Built-In Variables 55 56 * <<interfaces-builtin-variables-indexcounthuawei,IndexCountHUAWEI>> 57 * <<interfaces-builtin-variables-vertexcounthuawei,VertexCountHUAWEI>> 58 * <<interfaces-builtin-variables-instancecounthuawei,InstanceCountHUAWEI>> 59 * <<interfaces-builtin-variables-firstindexhuawei,FirstIndexHUAWEI>> 60 * <<interfaces-builtin-variables-firstvertexhuawei,FirstVertexHUAWEI>> 61 * <<interfaces-builtin-variables-vertexoffsethuawei,VertexOffsetHUAWEI>> 62 * <<interfaces-builtin-variables-firstinstancehuawei,FirstInstanceHUAWEI>> 63 * <<interfaces-builtin-variables-clusteridhuawei,ClusterIDHUAWEI>> 64 * <<interfaces-builtin-variables-clustershadingratehuawei,ClusterShadingRateHUAWEI>> 65 66=== New SPIR-V Capability 67 68 * <<spirvenv-capabilities-table-ClusterCullingShadingHUAWEI, 69 code:ClusterCullingShadingHUAWEI>> 70 71=== Sample Code 72 73Example of cluster culling in a GLSL shader 74 75[source,c] 76---- 77#extension GL_HUAWEI_cluster_culling_shader: enable 78 79#define GPU_WARP_SIZE 32 80#define GPU_GROUP_SIZE GPU_WARP_SIZE 81 82#define GPU_CLUSTER_PER_INVOCATION 1 83#define GPU_CLUSTER_PER_WORKGROUP (GPU_GROUP_SIZE * GPU_CLUSTER_PER_INVOCATION) 84 85// Number of threads per workgroup 86// - 1D only 87// - warpsize = 32 88layout(local_size_x=GPU_GROUP_SIZE, local_size_y=1, local_size_z=1) in; 89 90#define GPU_DRAW_BUFFER_BINDING 0 91#define GPU_INSTANCE_DESCRIPTOR_BINDING 1 92 93struct BoundingSphere 94{ 95 vec3 center; 96 float radius; 97}; 98 99struct InstanceData 100{ 101 mat4 mvp_matrix; // mvp matrix. 102 vec4 frustum_planes[6]; // six frustum planes 103 mat4 model_matrix_transpose_inverse; // inverse transpose of model matrix. 104 vec3 view_origin; // view original 105}; 106 107struct InstanceDescriptor 108{ 109 uint begin; 110 uint end; 111 uint cluster_count; 112 uint debug; 113 BoundingSphere sphere; 114 InstanceData instance_data; 115}; 116 117struct DrawElementsCommand{ 118 uint indexcount; 119 uint instanceCount; 120 uint firstIndex; 121 int vertexoffset; 122 uint firstInstance; 123 uint cluster_id; 124}; 125 126// indexed mode 127out gl_PerClusterHUAWEI{ 128 uint gl_IndexCountHUAWEI; 129 uint gl_InstanceCountHUAWEI; 130 uint gl_FirstIndexHUAWEI; 131 int gl_VertexOffsetHUAWEI; 132 uint gl_FirstInstanceHUAWEI; 133 uint gl_ClusterIDHUAWEI; 134 uint gl_ClusterShadingRateHUAWEI; 135}; 136 137layout(binding = GPU_DRAW_BUFFER_BINDING, std430) buffer draw_indirect_ssbo 138{ 139 DrawElementsCommand draw_commands[]; 140}; 141 142layout(binding = GPU_INSTANCE_DESCRIPTOR_BINDING, std430) buffer instance_descriptor_ssbo 143{ 144 InstanceDescriptor instance_descriptors[]; 145}; 146 147 148float Distance(uint instance_id) 149{ 150 vec3 v = normalize(instance_descriptor[instance_id].sphere.center - 151 instance_descriptor[instance_id].instance_data.view_origin); 152 float dist = sqrt(dot(v,v)); 153 154 return dist; 155} 156 157bool isSphereOutsideFrustum( vec3 sphere_center, float sphere_radius ) 158{ 159 bool isInside = false; 160 161 for(int i = 0; i < 6; i++) 162 { 163 isInside = isInside || 164 (dot(instance_descriptors[instance_id].instance_data.frustum_planes[i].xyz, 165 sphere_center) + instance_descriptors[instance_id].instance_data.frustum_planes[i].w < 166 sphere_radius); 167 } 168 return isInside; 169} 170 171 172void main() 173{ 174 // get instance description 175 instance_id = gl_GlobalInvocationID.x; 176 InstanceDescriptor inst_desc = instance_descriptors[instance_id]; 177 178 //instance based culling 179 bool render = !isSphereOutsideFrustum(inst_desc.sphere.center, inst_desc.sphere.radius); 180 181 if (render) 182 { 183 // calculate distance 184 float distance = Distance(instance_id); 185 186 // update shading rate built-in variable 187 if(distance > 0.7) 188 gl_ClusterShadingRateHUAWEI = 189 gl_ShadingRateFlag4VerticalPixelsEXT | gl_ShadingRateFlag4HorizontalPixelsEXT; 190 else if(distance > 0.3) 191 gl_ClusterShadingRateHUAWEI = 192 gl_ShadingRateFlag2VerticalPixelsEXT | gl_ShadingRateFlag2HorizontalPixelsEXT; 193 else 194 gl_ClusterShadingRateHUAWEI = 0; 195 196 // this is a visible cluster, update built-in output variable. 197 // in case of indexed mode: 198 gl_IndexCountHUAWEI = draw_commands[cluster_id].indexcount; 199 gl_InstanceCountHUAWEI = draw_commands[cluster_id].instanceCount; 200 gl_FirstIndexHUAWEI = draw_commands[cluster_id].firstIndex; 201 gl_VertexOffsetHUAWEI = draw_commands[cluster_id].vertexoffset; 202 gl_FirstInstanceHUAWEI = draw_commands[cluster_id].firstInstance; 203 gl_ClusterIDHUAWEI = draw_commands[cluster_id].cluster_id; 204 205 // emit built-in output variables as a drawing command to subsequent 206 // rendering pipeline. 207 dispatchClusterHUAWEI(); 208 } 209} 210---- 211 212Example of graphics pipeline creation with cluster culling shader 213 214[source,c] 215---- 216// create a cluster culling shader stage info structure. 217VkPipelineShaderStageCreateInfo ccsStageInfo{}; 218ccsStageInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 219ccsStageInfo.stage = VK_SHADER_STAGE_CLUSTER_CULLING_BIT_HUAWEI; 220ccsStageInfo.module = clustercullingshaderModule; 221ccsStageInfo.pName = "main"; 222 223// pipeline shader stage creation 224VkPipelineShaderStageCreateInfo shaderStages[] = { ccsStageInfo, vertexShaderStageInfo, fragmentShaderStageInfo }; 225 226// create graphics pipeline 227VkGraphicsPipelineCreateInfo pipelineInfo{}; 228pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; 229pipelineInfo.stageCount = 3; 230pipelineInfo.pStage = shaderStages; 231pipelineInfo.pVertexInputState = &vertexInputInfo; 232// ... 233VkPipeline graphicsPipeline; 234VkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, nullptr, &graphicsPipeline); 235---- 236 237 238Example of launching the execution of cluster culling shader 239 240[source,c] 241---- 242vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, graphicsPipeline); 243vkCmdDrawClusterHUAWEI(commandBuffer, groupCountX, 1, 1); 244vkCmdEndRenderPass(commandBuffer); 245---- 246 247=== Version History 248 249 * Revision 1, 2022-11-18 (YuChang Wang) 250 ** Internal revisions 251 * Revision 2, 2023-04-02 (Jon Leech) 252 ** Grammar edits. 253 * Revision 3, 2023-08-21 (YuChang Wang) 254 ** Add per-cluster shading rate. 255