1 /*
2 * Copyright 2024 Valve Corporation
3 * Copyright 2024 Alyssa Rosenzweig
4 * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5 * SPDX-License-Identifier: MIT
6 */
7
8 #pragma once
9
10 #include "asahi/compiler/agx_compile.h"
11 #include "util/macros.h"
12 #include "agx_linker.h"
13 #include "agx_nir_lower_vbo.h"
14 #include "agx_pack.h"
15 #include "agx_usc.h"
16 #include "agx_uvs.h"
17
18 #include "hk_device.h"
19 #include "hk_device_memory.h"
20 #include "hk_private.h"
21
22 #include "nir_xfb_info.h"
23 #include "shader_enums.h"
24 #include "vk_pipeline_cache.h"
25
26 #include "nir.h"
27
28 #include "vk_shader.h"
29
30 struct hk_physical_device;
31 struct hk_pipeline_compilation_ctx;
32 struct vk_descriptor_set_layout;
33 struct vk_graphics_pipeline_state;
34 struct vk_pipeline_cache;
35 struct vk_pipeline_layout;
36 struct vk_pipeline_robustness_state;
37 struct vk_shader_module;
38
39 /* TODO: Make dynamic */
40 #define HK_ROOT_UNIFORM 104
41 #define HK_IMAGE_HEAP_UNIFORM 108
42
43 struct hk_tess_info {
44 enum tess_primitive_mode mode : 8;
45 enum gl_tess_spacing spacing : 8;
46 bool points;
47 bool ccw;
48 };
49 static_assert(sizeof(struct hk_tess_info) == 4, "packed");
50
51 static struct hk_tess_info
hk_tess_info_merge(struct hk_tess_info a,struct hk_tess_info b)52 hk_tess_info_merge(struct hk_tess_info a, struct hk_tess_info b)
53 {
54 static_assert(TESS_PRIMITIVE_UNSPECIFIED == 0, "zero state");
55 static_assert(TESS_SPACING_UNSPECIFIED == 0, "zero state");
56
57 /* Just merge by OR'ing the raw bits */
58 uint32_t x, y;
59 memcpy(&x, &a, sizeof(x));
60 memcpy(&y, &b, sizeof(y));
61
62 x |= y;
63
64 struct hk_tess_info out;
65 memcpy(&out, &x, sizeof(out));
66 return out;
67 }
68
69 struct hk_shader_info {
70 union {
71 struct {
72 uint32_t attribs_read;
73 BITSET_DECLARE(attrib_components_read, AGX_MAX_ATTRIBS * 4);
74 uint8_t cull_distance_array_size;
75 uint8_t _pad[7];
76 } vs;
77
78 struct {
79 struct agx_interp_info interp;
80 struct agx_fs_epilog_link_info epilog_key;
81
82 bool reads_sample_mask;
83 bool post_depth_coverage;
84 bool uses_sample_shading;
85 bool early_fragment_tests;
86 bool writes_memory;
87
88 uint8_t _pad[7];
89 } fs;
90
91 struct {
92 uint64_t tcs_per_vertex_outputs;
93 uint32_t tcs_output_stride;
94 uint8_t tcs_output_patch_size;
95 uint8_t tcs_nr_patch_outputs;
96
97 struct hk_tess_info info;
98 } tess;
99
100 struct {
101 unsigned count_words;
102 enum mesa_prim out_prim;
103 uint8_t _pad[27];
104 } gs;
105
106 /* Used to initialize the union for other stages */
107 uint8_t _pad[32];
108 };
109
110 struct agx_unlinked_uvs_layout uvs;
111
112 /* Transform feedback buffer strides */
113 uint8_t xfb_stride[MAX_XFB_BUFFERS];
114
115 gl_shader_stage stage : 8;
116 uint8_t clip_distance_array_size;
117 uint8_t cull_distance_array_size;
118 uint8_t _pad0[1];
119
120 /* XXX: is there a less goofy way to do this? I really don't want dynamic
121 * allocation here.
122 */
123 nir_xfb_info xfb_info;
124 nir_xfb_output_info xfb_outputs[64];
125 };
126
127 /*
128 * Hash table keys for fast-linked shader variants. These contain the entire
129 * prolog/epilog key so we only do 1 hash table lookup instead of 2 in the
130 * general case where the linked shader is already ready.
131 */
132 struct hk_fast_link_key_vs {
133 struct agx_vs_prolog_key prolog;
134 };
135
136 struct hk_fast_link_key_fs {
137 unsigned nr_samples_shaded;
138 struct agx_fs_prolog_key prolog;
139 struct agx_fs_epilog_key epilog;
140 };
141
142 struct hk_shader {
143 struct agx_shader_part b;
144
145 struct hk_shader_info info;
146 struct agx_fragment_face_2_packed frag_face;
147 struct agx_counts_packed counts;
148
149 const void *code_ptr;
150 uint32_t code_size;
151
152 const void *data_ptr;
153 uint32_t data_size;
154
155 /* BO for any uploaded shader part */
156 struct agx_bo *bo;
157
158 /* Cache of fast linked variants */
159 struct {
160 simple_mtx_t lock;
161 struct hash_table *ht;
162 } linked;
163
164 /* If there's only a single possibly linked variant, direct pointer. TODO:
165 * Union with the cache to save some space?
166 */
167 struct hk_linked_shader *only_linked;
168
169 /* Address to the uploaded preamble section. Preambles are uploaded
170 * separately from fast-linked main shaders.
171 */
172 uint64_t preamble_addr;
173
174 /* Address of the start of the shader data section */
175 uint64_t data_addr;
176 };
177
178 enum hk_vs_variant {
179 /* Hardware vertex shader, when next stage is fragment */
180 HK_VS_VARIANT_HW,
181
182 /* Hardware compute shader, when next is geometry/tessellation */
183 HK_VS_VARIANT_SW,
184
185 HK_VS_VARIANTS,
186 };
187
188 enum hk_gs_variant {
189 /* Hardware vertex shader used for rasterization */
190 HK_GS_VARIANT_RAST,
191
192 /* Main compute shader */
193 HK_GS_VARIANT_MAIN,
194 HK_GS_VARIANT_MAIN_NO_RAST,
195
196 /* Count compute shader */
197 HK_GS_VARIANT_COUNT,
198 HK_GS_VARIANT_COUNT_NO_RAST,
199
200 /* Pre-GS compute shader */
201 HK_GS_VARIANT_PRE,
202 HK_GS_VARIANT_PRE_NO_RAST,
203
204 HK_GS_VARIANTS,
205 };
206
207 /* clang-format off */
208 static const char *hk_gs_variant_name[] = {
209 [HK_GS_VARIANT_RAST] = "Rasterization",
210 [HK_GS_VARIANT_MAIN] = "Main",
211 [HK_GS_VARIANT_MAIN_NO_RAST] = "Main (rast. discard)",
212 [HK_GS_VARIANT_COUNT] = "Count",
213 [HK_GS_VARIANT_COUNT_NO_RAST] = "Count (rast. discard)",
214 [HK_GS_VARIANT_PRE] = "Pre-GS",
215 [HK_GS_VARIANT_PRE_NO_RAST] = "Pre-GS (rast. discard)",
216 };
217 /* clang-format on */
218
219 static inline unsigned
hk_num_variants(gl_shader_stage stage)220 hk_num_variants(gl_shader_stage stage)
221 {
222 switch (stage) {
223 case MESA_SHADER_VERTEX:
224 case MESA_SHADER_TESS_EVAL:
225 return HK_VS_VARIANTS;
226
227 case MESA_SHADER_GEOMETRY:
228 return HK_GS_VARIANTS;
229
230 default:
231 return 1;
232 }
233 }
234
235 /*
236 * An hk_api shader maps 1:1 to a VkShader object. An hk_api_shader may contain
237 * multiple hardware hk_shader's, built at shader compile time. This complexity
238 * is required to efficiently implement the legacy geometry pipeline.
239 */
240 struct hk_api_shader {
241 struct vk_shader vk;
242
243 /* Is this an internal passthrough geometry shader? */
244 bool is_passthrough;
245
246 struct hk_shader variants[];
247 };
248
249 #define hk_foreach_variant(api_shader, var) \
250 for (struct hk_shader *var = api_shader->variants; \
251 var < api_shader->variants + hk_num_variants(api_shader->vk.stage); \
252 ++var)
253
254 static const char *
hk_variant_name(struct hk_api_shader * obj,struct hk_shader * variant)255 hk_variant_name(struct hk_api_shader *obj, struct hk_shader *variant)
256 {
257 unsigned i = variant - obj->variants;
258 assert(i < hk_num_variants(obj->vk.stage));
259
260 if (hk_num_variants(obj->vk.stage) == 1) {
261 return NULL;
262 } else if (obj->vk.stage == MESA_SHADER_GEOMETRY) {
263 assert(i < ARRAY_SIZE(hk_gs_variant_name));
264 return hk_gs_variant_name[i];
265 } else {
266 assert(i < 2);
267 return i == HK_VS_VARIANT_SW ? "Software" : "Hardware";
268 }
269 }
270
271 static struct hk_shader *
hk_only_variant(struct hk_api_shader * obj)272 hk_only_variant(struct hk_api_shader *obj)
273 {
274 if (!obj)
275 return NULL;
276
277 assert(hk_num_variants(obj->vk.stage) == 1);
278 return &obj->variants[0];
279 }
280
281 static struct hk_shader *
hk_any_variant(struct hk_api_shader * obj)282 hk_any_variant(struct hk_api_shader *obj)
283 {
284 if (!obj)
285 return NULL;
286
287 return &obj->variants[0];
288 }
289
290 static struct hk_shader *
hk_main_gs_variant(struct hk_api_shader * obj,bool rast_disc)291 hk_main_gs_variant(struct hk_api_shader *obj, bool rast_disc)
292 {
293 return &obj->variants[HK_GS_VARIANT_MAIN + rast_disc];
294 }
295
296 static struct hk_shader *
hk_count_gs_variant(struct hk_api_shader * obj,bool rast_disc)297 hk_count_gs_variant(struct hk_api_shader *obj, bool rast_disc)
298 {
299 return &obj->variants[HK_GS_VARIANT_COUNT + rast_disc];
300 }
301
302 static struct hk_shader *
hk_pre_gs_variant(struct hk_api_shader * obj,bool rast_disc)303 hk_pre_gs_variant(struct hk_api_shader *obj, bool rast_disc)
304 {
305 return &obj->variants[HK_GS_VARIANT_PRE + rast_disc];
306 }
307
308 #define HK_MAX_LINKED_USC_SIZE \
309 (AGX_USC_PRESHADER_LENGTH + AGX_USC_FRAGMENT_PROPERTIES_LENGTH + \
310 AGX_USC_REGISTERS_LENGTH + AGX_USC_SHADER_LENGTH + AGX_USC_SHARED_LENGTH + \
311 AGX_USC_SAMPLER_LENGTH + (AGX_USC_UNIFORM_LENGTH * 9))
312
313 struct hk_linked_shader {
314 struct agx_linked_shader b;
315
316 /* True if the VS prolog uses software indexing, either for geom/tess or
317 * adjacency primitives.
318 */
319 bool sw_indexing;
320
321 /* Distinct from hk_shader::counts due to addition of cf_binding_count, which
322 * is delayed since it depends on cull distance.
323 */
324 struct agx_fragment_shader_word_0_packed fs_counts;
325
326 /* Baked USC words to bind this linked shader */
327 struct {
328 uint8_t data[HK_MAX_LINKED_USC_SIZE];
329 size_t size;
330 } usc;
331 };
332
333 struct hk_linked_shader *hk_fast_link(struct hk_device *dev, bool fragment,
334 struct hk_shader *main,
335 struct agx_shader_part *prolog,
336 struct agx_shader_part *epilog,
337 unsigned nr_samples_shaded);
338
339 extern const struct vk_device_shader_ops hk_device_shader_ops;
340
341 uint64_t
342 hk_physical_device_compiler_flags(const struct hk_physical_device *pdev);
343
344 static inline nir_address_format
hk_buffer_addr_format(VkPipelineRobustnessBufferBehaviorEXT robustness)345 hk_buffer_addr_format(VkPipelineRobustnessBufferBehaviorEXT robustness)
346 {
347 switch (robustness) {
348 case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT:
349 return nir_address_format_64bit_global_32bit_offset;
350 case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT:
351 case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT:
352 return nir_address_format_64bit_bounded_global;
353 default:
354 unreachable("Invalid robust buffer access behavior");
355 }
356 }
357
358 bool hk_lower_uvs_index(nir_shader *s, unsigned vs_uniform_base);
359
360 bool
361 hk_nir_lower_descriptors(nir_shader *nir,
362 const struct vk_pipeline_robustness_state *rs,
363 uint32_t set_layout_count,
364 struct vk_descriptor_set_layout *const *set_layouts);
365 void hk_lower_nir(struct hk_device *dev, nir_shader *nir,
366 const struct vk_pipeline_robustness_state *rs,
367 bool is_multiview, uint32_t set_layout_count,
368 struct vk_descriptor_set_layout *const *set_layouts);
369
370 VkResult hk_compile_shader(struct hk_device *dev,
371 struct vk_shader_compile_info *info,
372 const struct vk_graphics_pipeline_state *state,
373 const VkAllocationCallbacks *pAllocator,
374 struct hk_api_shader **shader_out);
375
376 void hk_preprocess_nir_internal(struct vk_physical_device *vk_pdev,
377 nir_shader *nir);
378
379 void hk_api_shader_destroy(struct vk_device *vk_dev,
380 struct vk_shader *vk_shader,
381 const VkAllocationCallbacks *pAllocator);
382
383 const nir_shader_compiler_options *
384 hk_get_nir_options(struct vk_physical_device *vk_pdev, gl_shader_stage stage,
385 UNUSED const struct vk_pipeline_robustness_state *rs);
386
387 struct hk_api_shader *hk_meta_shader(struct hk_device *dev,
388 hk_internal_builder_t builder, void *data,
389 size_t data_size);
390
391 struct hk_passthrough_gs_key {
392 /* Bit mask of outputs written by the VS/TES, to be passed through */
393 uint64_t outputs;
394
395 /* Clip/cull sizes, implies clip/cull written in output */
396 uint8_t clip_distance_array_size;
397 uint8_t cull_distance_array_size;
398
399 /* Transform feedback buffer strides */
400 uint8_t xfb_stride[MAX_XFB_BUFFERS];
401
402 /* Decomposed primitive */
403 enum mesa_prim prim;
404
405 /* Transform feedback info. Must add nir_xfb_info_size to get the key size */
406 nir_xfb_info xfb_info;
407 };
408
409 void hk_nir_passthrough_gs(struct nir_builder *b, const void *key_);
410