• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright (c) 2020-2023 Huawei Technologies Co. Ltd.
2//
3// SPDX-License-Identifier: CC-BY-4.0
4
5include::{generated}/meta/{refprefix}VK_HUAWEI_cluster_culling_shader.adoc[]
6
7=== Other Extension Metadata
8
9*Last Modified Date*::
10    2023-08-16
11*Interactions and External Dependencies*::
12  - This extension provides API support for
13    {GLSLregistry}/huawei/GLSL_HUAWEI_cluster_culling_shader.txt[`GL_HUAWEI_cluster_culling_shader`].
14*Contributors*::
15  - Yuchang Wang, Huawei
16  - Juntao Li, Huawei
17  - Pan Gao, Huawei
18  - Jie Cao, Huawei
19  - Yunjin Zhang, Huawei
20  - Shujie Zhou, Huawei
21  - Chaojun Wang, Huawei
22  - Jiajun Hu, Huawei
23  - Cong Zhang, Huawei
24
25=== Description
26
27Cluster Culling Shaders (CCS) are similar to the existing compute shaders.
28Their main purpose is to provide an execution environment in order to
29perform coarse-level geometry culling and LOD selection more efficiently on
30the GPU.
31
32The traditional 2-pass GPU culling solution using a compute shader sometimes
33needs a pipeline barrier between compute and graphics pipeline to optimize
34performance.
35An additional compaction process may also be required.
36This extension addresses these shortcomings, allowing compute shaders to
37directly emit visible clusters to the following graphics pipeline.
38
39A set of new built-in output variables are used to express a visible
40cluster, including per-cluster shading rate.
41In addition, a new built-in function is used to emit these variables from
42CCS to the IA stage.
43The IA stage can use these variables to fetches vertices of a visible
44cluster and drive vertex shaders to shading these vertices.
45
46Note that CCS do not work with geometry or tessellation shaders, but both IA
47and vertex shaders are preserved.
48Vertex shaders are still used for vertex position shading, instead of
49directly outputting transformed vertices from the compute shader.
50This makes CCS more suitable for mobile GPUs.
51
52include::{generated}/interfaces/VK_HUAWEI_cluster_culling_shader.adoc[]
53
54=== New Built-In Variables
55
56  * <<interfaces-builtin-variables-indexcounthuawei,IndexCountHUAWEI>>
57  * <<interfaces-builtin-variables-vertexcounthuawei,VertexCountHUAWEI>>
58  * <<interfaces-builtin-variables-instancecounthuawei,InstanceCountHUAWEI>>
59  * <<interfaces-builtin-variables-firstindexhuawei,FirstIndexHUAWEI>>
60  * <<interfaces-builtin-variables-firstvertexhuawei,FirstVertexHUAWEI>>
61  * <<interfaces-builtin-variables-vertexoffsethuawei,VertexOffsetHUAWEI>>
62  * <<interfaces-builtin-variables-firstinstancehuawei,FirstInstanceHUAWEI>>
63  * <<interfaces-builtin-variables-clusteridhuawei,ClusterIDHUAWEI>>
64  * <<interfaces-builtin-variables-clustershadingratehuawei,ClusterShadingRateHUAWEI>>
65
66=== New SPIR-V Capability
67
68  * <<spirvenv-capabilities-table-ClusterCullingShadingHUAWEI,
69    code:ClusterCullingShadingHUAWEI>>
70
71=== Sample Code
72
73Example of cluster culling in a GLSL shader
74
75[source,c]
76----
77#extension GL_HUAWEI_cluster_culling_shader: enable
78
79#define GPU_WARP_SIZE                   32
80#define GPU_GROUP_SIZE                  GPU_WARP_SIZE
81
82#define GPU_CLUSTER_PER_INVOCATION      1
83#define GPU_CLUSTER_PER_WORKGROUP       (GPU_GROUP_SIZE * GPU_CLUSTER_PER_INVOCATION)
84
85// Number of threads per workgroup
86// - 1D only
87// - warpsize = 32
88layout(local_size_x=GPU_GROUP_SIZE, local_size_y=1, local_size_z=1) in;
89
90#define GPU_DRAW_BUFFER_BINDING             0
91#define GPU_INSTANCE_DESCRIPTOR_BINDING     1
92
93struct BoundingSphere
94{
95  vec3 center;
96  float radius;
97};
98
99struct InstanceData
100{
101  mat4 mvp_matrix;                      // mvp matrix.
102  vec4 frustum_planes[6];               // six frustum planes
103  mat4 model_matrix_transpose_inverse;  // inverse transpose of model matrix.
104  vec3 view_origin;                     // view original
105};
106
107struct InstanceDescriptor
108{
109  uint begin;
110  uint end;
111  uint cluster_count;
112  uint debug;
113  BoundingSphere sphere;
114  InstanceData instance_data;
115};
116
117struct DrawElementsCommand{
118  uint indexcount;
119  uint instanceCount;
120  uint firstIndex;
121  int  vertexoffset;
122  uint firstInstance;
123  uint cluster_id;
124};
125
126// indexed mode
127out gl_PerClusterHUAWEI{
128  uint gl_IndexCountHUAWEI;
129  uint gl_InstanceCountHUAWEI;
130  uint gl_FirstIndexHUAWEI;
131  int  gl_VertexOffsetHUAWEI;
132  uint gl_FirstInstanceHUAWEI;
133  uint gl_ClusterIDHUAWEI;
134  uint gl_ClusterShadingRateHUAWEI;
135};
136
137layout(binding = GPU_DRAW_BUFFER_BINDING, std430) buffer draw_indirect_ssbo
138{
139        DrawElementsCommand draw_commands[];
140};
141
142layout(binding = GPU_INSTANCE_DESCRIPTOR_BINDING, std430) buffer instance_descriptor_ssbo
143{
144        InstanceDescriptor instance_descriptors[];
145};
146
147
148float Distance(uint instance_id)
149{
150    vec3 v = normalize(instance_descriptor[instance_id].sphere.center -
151                     instance_descriptor[instance_id].instance_data.view_origin);
152    float dist = sqrt(dot(v,v));
153
154    return dist;
155}
156
157bool isSphereOutsideFrustum( vec3 sphere_center, float sphere_radius )
158{
159  bool isInside = false;
160
161  for(int i = 0; i < 6; i++)
162  {
163      isInside = isInside ||
164      (dot(instance_descriptors[instance_id].instance_data.frustum_planes[i].xyz,
165      sphere_center) + instance_descriptors[instance_id].instance_data.frustum_planes[i].w <
166      sphere_radius);
167  }
168  return isInside;
169}
170
171
172void main()
173{
174    // get instance description
175    instance_id = gl_GlobalInvocationID.x;
176    InstanceDescriptor inst_desc = instance_descriptors[instance_id];
177
178    //instance based culling
179    bool render = !isSphereOutsideFrustum(inst_desc.sphere.center, inst_desc.sphere.radius);
180
181    if (render)
182    {
183        // calculate distance
184        float distance = Distance(instance_id);
185
186        // update shading rate built-in variable
187        if(distance > 0.7)
188            gl_ClusterShadingRateHUAWEI =
189                gl_ShadingRateFlag4VerticalPixelsEXT | gl_ShadingRateFlag4HorizontalPixelsEXT;
190        else if(distance > 0.3)
191            gl_ClusterShadingRateHUAWEI =
192                gl_ShadingRateFlag2VerticalPixelsEXT | gl_ShadingRateFlag2HorizontalPixelsEXT;
193        else
194            gl_ClusterShadingRateHUAWEI = 0;
195
196        // this is a visible cluster, update built-in output variable.
197        // in case of indexed mode:
198        gl_IndexCountHUAWEI     = draw_commands[cluster_id].indexcount;
199        gl_InstanceCountHUAWEI  = draw_commands[cluster_id].instanceCount;
200        gl_FirstIndexHUAWEI     = draw_commands[cluster_id].firstIndex;
201        gl_VertexOffsetHUAWEI   = draw_commands[cluster_id].vertexoffset;
202        gl_FirstInstanceHUAWEI  = draw_commands[cluster_id].firstInstance;
203        gl_ClusterIDHUAWEI      = draw_commands[cluster_id].cluster_id;
204
205        // emit built-in output variables as a drawing command to subsequent
206        // rendering pipeline.
207        dispatchClusterHUAWEI();
208    }
209}
210----
211
212Example of graphics pipeline creation with cluster culling shader
213
214[source,c]
215----
216// create a cluster culling shader stage info structure.
217VkPipelineShaderStageCreateInfo ccsStageInfo{};
218ccsStageInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
219ccsStageInfo.stage = VK_SHADER_STAGE_CLUSTER_CULLING_BIT_HUAWEI;
220ccsStageInfo.module = clustercullingshaderModule;
221ccsStageInfo.pName =  "main";
222
223// pipeline shader stage creation
224VkPipelineShaderStageCreateInfo shaderStages[] = { ccsStageInfo, vertexShaderStageInfo, fragmentShaderStageInfo };
225
226// create graphics pipeline
227VkGraphicsPipelineCreateInfo pipelineInfo{};
228pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
229pipelineInfo.stageCount = 3;
230pipelineInfo.pStage = shaderStages;
231pipelineInfo.pVertexInputState = &vertexInputInfo;
232// ...
233VkPipeline graphicsPipeline;
234VkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, nullptr, &graphicsPipeline);
235----
236
237
238Example of launching the execution of cluster culling shader
239
240[source,c]
241----
242vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, graphicsPipeline);
243vkCmdDrawClusterHUAWEI(commandBuffer, groupCountX, 1, 1);
244vkCmdEndRenderPass(commandBuffer);
245----
246
247=== Version History
248
249  * Revision 1, 2022-11-18 (YuChang Wang)
250  ** Internal revisions
251  * Revision 2, 2023-04-02 (Jon Leech)
252  ** Grammar edits.
253  * Revision 3, 2023-08-21 (YuChang Wang)
254  ** Add per-cluster shading rate.
255