• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef BRW_RT_H
25 #define BRW_RT_H
26 
27 #ifdef __cplusplus
28 extern "C" {
29 #endif
30 
31 /** Vulkan defines shaderGroupHandleSize = 32 */
32 #define BRW_RT_SBT_HANDLE_SIZE 32
33 
34 /** Offset after the RT dispatch globals at which "push" constants live */
35 #define BRW_RT_PUSH_CONST_OFFSET 128
36 
37 /** Stride of the resume SBT */
38 #define BRW_BTD_RESUME_SBT_STRIDE 8
39 
40 /* Vulkan always uses exactly two levels of BVH: world and object.  At the API
41  * level, these are referred to as top and bottom.
42  */
43 enum brw_rt_bvh_level {
44    BRW_RT_BVH_LEVEL_WORLD = 0,
45    BRW_RT_BVH_LEVEL_OBJECT = 1,
46 };
47 #define BRW_RT_MAX_BVH_LEVELS 2
48 
49 enum brw_rt_bvh_node_type {
50    BRW_RT_BVH_NODE_TYPE_INTERNAL = 0,
51    BRW_RT_BVH_NODE_TYPE_INSTANCE = 1,
52    BRW_RT_BVH_NODE_TYPE_PROCEDURAL = 3,
53    BRW_RT_BVH_NODE_TYPE_QUAD = 4,
54 };
55 
56 /** HitKind values returned for triangle geometry
57  *
58  * This enum must match the SPIR-V enum.
59  */
60 enum brw_rt_hit_kind {
61    BRW_RT_HIT_KIND_FRONT_FACE = 0xfe,
62    BRW_RT_HIT_KIND_BACK_FACE = 0xff,
63 };
64 
65 /** Ray flags
66  *
67  * This enum must match the SPIR-V RayFlags enum.
68  */
69 enum brw_rt_ray_flags {
70    BRW_RT_RAY_FLAG_FORCE_OPAQUE                    = 0x01,
71    BRW_RT_RAY_FLAG_FORCE_NON_OPAQUE                = 0x02,
72    BRW_RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT          = 0x04,
73    BRW_RT_RAY_FLAG_SKIP_CLOSEST_HIT_SHADER         = 0x08,
74    BRW_RT_RAY_FLAG_CULL_BACK_FACING_TRIANGLES      = 0x10,
75    BRW_RT_RAY_FLAG_CULL_FRONT_FACING_TRIANGLES     = 0x20,
76    BRW_RT_RAY_FLAG_CULL_OPAQUE                     = 0x40,
77    BRW_RT_RAY_FLAG_CULL_NON_OPAQUE                 = 0x80,
78    BRW_RT_RAY_FLAG_SKIP_TRIANGLES                  = 0x100,
79    BRW_RT_RAY_FLAG_SKIP_AABBS                      = 0x200,
80 };
81 
82 struct brw_rt_scratch_layout {
83    /** Number of stack IDs per DSS */
84    uint32_t stack_ids_per_dss;
85 
86    /** Start offset (in bytes) of the hardware MemRay stack */
87    uint32_t ray_stack_start;
88 
89    /** Stride (in bytes) of the hardware MemRay stack */
90    uint32_t ray_stack_stride;
91 
92    /** Start offset (in bytes) of the SW stacks */
93    uint64_t sw_stack_start;
94 
95    /** Size (in bytes) of the SW stack for a single shader invocation */
96    uint32_t sw_stack_size;
97 
98    /** Total size (in bytes) of the RT scratch memory area */
99    uint64_t total_size;
100 };
101 
102 /** Parameters passed to the raygen trampoline shader
103  *
104  * This struct is carefully construected to be 32B and must be passed to the
105  * raygen trampoline shader as as inline constant data.
106  */
107 struct brw_rt_raygen_trampoline_params {
108    /** The GPU address of the RT_DISPATCH_GLOBALS */
109    uint64_t rt_disp_globals_addr;
110 
111    /** The GPU address of the BINDLESS_SHADER_RECORD for the raygen shader */
112    uint64_t raygen_bsr_addr;
113 
114    /** 1 if this is an indirect dispatch, 0 otherwise */
115    uint8_t is_indirect;
116 
117    /** The integer log2 of the local group size
118     *
119     * Ray-tracing shaders don't have a concept of local vs. global workgroup
120     * size.  They only have a single 3D launch size.  The raygen trampoline
121     * shader is always dispatched with a local workgroup size equal to the
122     * SIMD width but the shape of the local workgroup is determined at
123     * dispatch time based on the shape of the launch and passed to the
124     * trampoline via this field.  (There's no sense having a Z dimension on
125     * the local workgroup if the launch is 2D.)
126     *
127     * We use the integer log2 of the size because there's no point in
128     * non-power-of-two sizes and  shifts are cheaper than division.
129     */
130    uint8_t local_group_size_log2[3];
131 
132    uint32_t pad[3];
133 };
134 
135 /** Size of the "hot zone" in bytes
136  *
137  * The hot zone is a SW-defined data structure which is a single uvec4
138  * containing two bits of information:
139  *
140  *  - hotzone.x: Stack offset (in bytes)
141  *
142  *    This is the offset (in bytes) into the per-thread scratch space at which
143  *    the current shader's stack starts.  This is incremented by the calling
144  *    shader prior to any shader call type instructions and gets decremented
145  *    by the resume shader as part of completing the return operation.
146  *
147  *
148  *  - hotzone.yzw: The launch ID associated with the current thread
149  *
150  *    Inside a bindless shader, the only information we have is the DSS ID
151  *    from the hardware EU and a per-DSS stack ID.  In particular, the three-
152  *    dimensional launch ID is lost the moment we leave the raygen trampoline.
153  */
154 #define BRW_RT_SIZEOF_HOTZONE 16
155 
156 /* From the BSpec "Address Computation for Memory Based Data Structures:
157  * Ray and TraversalStack (Async Ray Tracing)":
158  *
159  *    sizeof(Ray) = 64B, sizeof(HitInfo) = 32B, sizeof(TravStack) = 32B.
160  */
161 #define BRW_RT_SIZEOF_RAY 64
162 #define BRW_RT_SIZEOF_HIT_INFO 32
163 #define BRW_RT_SIZEOF_TRAV_STACK 32
164 
165 /* From the BSpec:
166  *
167  *    syncStackSize = (maxBVHLevels % 2 == 1) ?
168  *       (sizeof(HitInfo) * 2 +
169  *          (sizeof(Ray) + sizeof(TravStack)) * maxBVHLevels + 32B) :
170  *       (sizeof(HitInfo) * 2 +
171  *          (sizeof(Ray) + sizeof(TravStack)) * maxBVHLevels);
172  *
173  * The select is just to align to 64B.
174  */
175 #define BRW_RT_SIZEOF_RAY_QUERY \
176    (BRW_RT_SIZEOF_HIT_INFO * 2 + \
177     (BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS + \
178     (BRW_RT_MAX_BVH_LEVELS % 2 ? 32 : 0))
179 
180 #define BRW_RT_SIZEOF_HW_STACK \
181    (BRW_RT_SIZEOF_HIT_INFO * 2 + \
182     BRW_RT_SIZEOF_RAY * BRW_RT_MAX_BVH_LEVELS + \
183     BRW_RT_SIZEOF_TRAV_STACK * BRW_RT_MAX_BVH_LEVELS)
184 
185 /* This is a mesa-defined region for hit attribute data */
186 #define BRW_RT_SIZEOF_HIT_ATTRIB_DATA 64
187 #define BRW_RT_OFFSETOF_HIT_ATTRIB_DATA BRW_RT_SIZEOF_HW_STACK
188 
189 #define BRW_RT_ASYNC_STACK_STRIDE \
190    ALIGN(BRW_RT_OFFSETOF_HIT_ATTRIB_DATA + \
191          BRW_RT_SIZEOF_HIT_ATTRIB_DATA, 64)
192 
193 static inline void
brw_rt_compute_scratch_layout(struct brw_rt_scratch_layout * layout,const struct intel_device_info * devinfo,uint32_t stack_ids_per_dss,uint32_t sw_stack_size)194 brw_rt_compute_scratch_layout(struct brw_rt_scratch_layout *layout,
195                               const struct intel_device_info *devinfo,
196                               uint32_t stack_ids_per_dss,
197                               uint32_t sw_stack_size)
198 {
199    layout->stack_ids_per_dss = stack_ids_per_dss;
200 
201    const uint32_t dss_count = intel_device_info_num_dual_subslices(devinfo);
202    const uint32_t num_stack_ids = dss_count * stack_ids_per_dss;
203 
204    uint64_t size = 0;
205 
206    /* The first thing in our scratch area is an array of "hot zones" which
207     * store the stack offset as well as the launch IDs for each active
208     * invocation.
209     */
210    size += BRW_RT_SIZEOF_HOTZONE * num_stack_ids;
211 
212    /* Next, we place the HW ray stacks */
213    assert(size % 64 == 0); /* Cache-line aligned */
214    assert(size < UINT32_MAX);
215    layout->ray_stack_start = size;
216    layout->ray_stack_stride = BRW_RT_ASYNC_STACK_STRIDE;
217    size += num_stack_ids * layout->ray_stack_stride;
218 
219    /* Finally, we place the SW stacks for the individual ray-tracing shader
220     * invocations.  We align these to 64B to ensure that we don't have any
221     * shared cache lines which could hurt performance.
222     */
223    assert(size % 64 == 0);
224    layout->sw_stack_start = size;
225    layout->sw_stack_size = ALIGN(sw_stack_size, 64);
226    size += num_stack_ids * layout->sw_stack_size;
227 
228    layout->total_size = size;
229 }
230 
231 #ifdef __cplusplus
232 }
233 #endif
234 
235 #endif /* BRW_RT_H */
236