• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Valve Corporation
3  * Copyright 2024 Alyssa Rosenzweig
4  * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #pragma once
9 
10 #include "asahi/compiler/agx_compile.h"
11 #include "util/macros.h"
12 #include "agx_linker.h"
13 #include "agx_nir_lower_vbo.h"
14 #include "agx_pack.h"
15 #include "agx_usc.h"
16 #include "agx_uvs.h"
17 
18 #include "hk_device.h"
19 #include "hk_device_memory.h"
20 #include "hk_private.h"
21 
22 #include "nir_xfb_info.h"
23 #include "shader_enums.h"
24 #include "vk_pipeline_cache.h"
25 
26 #include "nir.h"
27 
28 #include "vk_shader.h"
29 
30 struct hk_physical_device;
31 struct hk_pipeline_compilation_ctx;
32 struct vk_descriptor_set_layout;
33 struct vk_graphics_pipeline_state;
34 struct vk_pipeline_cache;
35 struct vk_pipeline_layout;
36 struct vk_pipeline_robustness_state;
37 struct vk_shader_module;
38 
39 /* TODO: Make dynamic */
40 #define HK_ROOT_UNIFORM       104
41 #define HK_IMAGE_HEAP_UNIFORM 108
42 
43 struct hk_tess_info {
44    enum tess_primitive_mode mode : 8;
45    enum gl_tess_spacing spacing  : 8;
46    bool points;
47    bool ccw;
48 };
49 static_assert(sizeof(struct hk_tess_info) == 4, "packed");
50 
51 static struct hk_tess_info
hk_tess_info_merge(struct hk_tess_info a,struct hk_tess_info b)52 hk_tess_info_merge(struct hk_tess_info a, struct hk_tess_info b)
53 {
54    static_assert(TESS_PRIMITIVE_UNSPECIFIED == 0, "zero state");
55    static_assert(TESS_SPACING_UNSPECIFIED == 0, "zero state");
56 
57    /* Just merge by OR'ing the raw bits */
58    uint32_t x, y;
59    memcpy(&x, &a, sizeof(x));
60    memcpy(&y, &b, sizeof(y));
61 
62    x |= y;
63 
64    struct hk_tess_info out;
65    memcpy(&out, &x, sizeof(out));
66    return out;
67 }
68 
69 struct hk_shader_info {
70    union {
71       struct {
72          uint32_t attribs_read;
73          BITSET_DECLARE(attrib_components_read, AGX_MAX_ATTRIBS * 4);
74          uint8_t cull_distance_array_size;
75          uint8_t _pad[7];
76       } vs;
77 
78       struct {
79          struct agx_interp_info interp;
80          struct agx_fs_epilog_link_info epilog_key;
81 
82          bool reads_sample_mask;
83          bool post_depth_coverage;
84          bool uses_sample_shading;
85          bool early_fragment_tests;
86          bool writes_memory;
87 
88          uint8_t _pad[7];
89       } fs;
90 
91       struct {
92          uint64_t tcs_per_vertex_outputs;
93          uint32_t tcs_output_stride;
94          uint8_t tcs_output_patch_size;
95          uint8_t tcs_nr_patch_outputs;
96 
97          struct hk_tess_info info;
98       } tess;
99 
100       struct {
101          unsigned count_words;
102          enum mesa_prim out_prim;
103          uint8_t _pad[27];
104       } gs;
105 
106       /* Used to initialize the union for other stages */
107       uint8_t _pad[32];
108    };
109 
110    struct agx_unlinked_uvs_layout uvs;
111 
112    /* Transform feedback buffer strides */
113    uint8_t xfb_stride[MAX_XFB_BUFFERS];
114 
115    gl_shader_stage stage : 8;
116    uint8_t clip_distance_array_size;
117    uint8_t cull_distance_array_size;
118    uint8_t _pad0[1];
119 
120    /* XXX: is there a less goofy way to do this? I really don't want dynamic
121     * allocation here.
122     */
123    nir_xfb_info xfb_info;
124    nir_xfb_output_info xfb_outputs[64];
125 };
126 
127 /*
128  * Hash table keys for fast-linked shader variants. These contain the entire
129  * prolog/epilog key so we only do 1 hash table lookup instead of 2 in the
130  * general case where the linked shader is already ready.
131  */
132 struct hk_fast_link_key_vs {
133    struct agx_vs_prolog_key prolog;
134 };
135 
136 struct hk_fast_link_key_fs {
137    unsigned nr_samples_shaded;
138    struct agx_fs_prolog_key prolog;
139    struct agx_fs_epilog_key epilog;
140 };
141 
142 struct hk_shader {
143    struct agx_shader_part b;
144 
145    struct hk_shader_info info;
146    struct agx_fragment_face_2_packed frag_face;
147    struct agx_counts_packed counts;
148 
149    const void *code_ptr;
150    uint32_t code_size;
151 
152    const void *data_ptr;
153    uint32_t data_size;
154 
155    /* BO for any uploaded shader part */
156    struct agx_bo *bo;
157 
158    /* Cache of fast linked variants */
159    struct {
160       simple_mtx_t lock;
161       struct hash_table *ht;
162    } linked;
163 
164    /* If there's only a single possibly linked variant, direct pointer. TODO:
165     * Union with the cache to save some space?
166     */
167    struct hk_linked_shader *only_linked;
168 
169    /* Address to the uploaded preamble section. Preambles are uploaded
170     * separately from fast-linked main shaders.
171     */
172    uint64_t preamble_addr;
173 
174    /* Address of the start of the shader data section */
175    uint64_t data_addr;
176 };
177 
178 enum hk_vs_variant {
179    /* Hardware vertex shader, when next stage is fragment */
180    HK_VS_VARIANT_HW,
181 
182    /* Hardware compute shader, when next is geometry/tessellation */
183    HK_VS_VARIANT_SW,
184 
185    HK_VS_VARIANTS,
186 };
187 
188 enum hk_gs_variant {
189    /* Hardware vertex shader used for rasterization */
190    HK_GS_VARIANT_RAST,
191 
192    /* Main compute shader */
193    HK_GS_VARIANT_MAIN,
194    HK_GS_VARIANT_MAIN_NO_RAST,
195 
196    /* Count compute shader */
197    HK_GS_VARIANT_COUNT,
198    HK_GS_VARIANT_COUNT_NO_RAST,
199 
200    /* Pre-GS compute shader */
201    HK_GS_VARIANT_PRE,
202    HK_GS_VARIANT_PRE_NO_RAST,
203 
204    HK_GS_VARIANTS,
205 };
206 
207 /* clang-format off */
208 static const char *hk_gs_variant_name[] = {
209    [HK_GS_VARIANT_RAST] = "Rasterization",
210    [HK_GS_VARIANT_MAIN] = "Main",
211    [HK_GS_VARIANT_MAIN_NO_RAST] = "Main (rast. discard)",
212    [HK_GS_VARIANT_COUNT] = "Count",
213    [HK_GS_VARIANT_COUNT_NO_RAST] = "Count (rast. discard)",
214    [HK_GS_VARIANT_PRE] = "Pre-GS",
215    [HK_GS_VARIANT_PRE_NO_RAST] = "Pre-GS (rast. discard)",
216 };
217 /* clang-format on */
218 
219 static inline unsigned
hk_num_variants(gl_shader_stage stage)220 hk_num_variants(gl_shader_stage stage)
221 {
222    switch (stage) {
223    case MESA_SHADER_VERTEX:
224    case MESA_SHADER_TESS_EVAL:
225       return HK_VS_VARIANTS;
226 
227    case MESA_SHADER_GEOMETRY:
228       return HK_GS_VARIANTS;
229 
230    default:
231       return 1;
232    }
233 }
234 
235 /*
236  * An hk_api shader maps 1:1 to a VkShader object. An hk_api_shader may contain
237  * multiple hardware hk_shader's, built at shader compile time. This complexity
238  * is required to efficiently implement the legacy geometry pipeline.
239  */
240 struct hk_api_shader {
241    struct vk_shader vk;
242 
243    /* Is this an internal passthrough geometry shader? */
244    bool is_passthrough;
245 
246    struct hk_shader variants[];
247 };
248 
249 #define hk_foreach_variant(api_shader, var)                                    \
250    for (struct hk_shader *var = api_shader->variants;                          \
251         var < api_shader->variants + hk_num_variants(api_shader->vk.stage);    \
252         ++var)
253 
254 static const char *
hk_variant_name(struct hk_api_shader * obj,struct hk_shader * variant)255 hk_variant_name(struct hk_api_shader *obj, struct hk_shader *variant)
256 {
257    unsigned i = variant - obj->variants;
258    assert(i < hk_num_variants(obj->vk.stage));
259 
260    if (hk_num_variants(obj->vk.stage) == 1) {
261       return NULL;
262    } else if (obj->vk.stage == MESA_SHADER_GEOMETRY) {
263       assert(i < ARRAY_SIZE(hk_gs_variant_name));
264       return hk_gs_variant_name[i];
265    } else {
266       assert(i < 2);
267       return i == HK_VS_VARIANT_SW ? "Software" : "Hardware";
268    }
269 }
270 
271 static struct hk_shader *
hk_only_variant(struct hk_api_shader * obj)272 hk_only_variant(struct hk_api_shader *obj)
273 {
274    if (!obj)
275       return NULL;
276 
277    assert(hk_num_variants(obj->vk.stage) == 1);
278    return &obj->variants[0];
279 }
280 
281 static struct hk_shader *
hk_any_variant(struct hk_api_shader * obj)282 hk_any_variant(struct hk_api_shader *obj)
283 {
284    if (!obj)
285       return NULL;
286 
287    return &obj->variants[0];
288 }
289 
290 static struct hk_shader *
hk_main_gs_variant(struct hk_api_shader * obj,bool rast_disc)291 hk_main_gs_variant(struct hk_api_shader *obj, bool rast_disc)
292 {
293    return &obj->variants[HK_GS_VARIANT_MAIN + rast_disc];
294 }
295 
296 static struct hk_shader *
hk_count_gs_variant(struct hk_api_shader * obj,bool rast_disc)297 hk_count_gs_variant(struct hk_api_shader *obj, bool rast_disc)
298 {
299    return &obj->variants[HK_GS_VARIANT_COUNT + rast_disc];
300 }
301 
302 static struct hk_shader *
hk_pre_gs_variant(struct hk_api_shader * obj,bool rast_disc)303 hk_pre_gs_variant(struct hk_api_shader *obj, bool rast_disc)
304 {
305    return &obj->variants[HK_GS_VARIANT_PRE + rast_disc];
306 }
307 
308 #define HK_MAX_LINKED_USC_SIZE                                                 \
309    (AGX_USC_PRESHADER_LENGTH + AGX_USC_FRAGMENT_PROPERTIES_LENGTH +            \
310     AGX_USC_REGISTERS_LENGTH + AGX_USC_SHADER_LENGTH + AGX_USC_SHARED_LENGTH + \
311     AGX_USC_SAMPLER_LENGTH + (AGX_USC_UNIFORM_LENGTH * 9))
312 
313 struct hk_linked_shader {
314    struct agx_linked_shader b;
315 
316    /* True if the VS prolog uses software indexing, either for geom/tess or
317     * adjacency primitives.
318     */
319    bool sw_indexing;
320 
321    /* Distinct from hk_shader::counts due to addition of cf_binding_count, which
322     * is delayed since it depends on cull distance.
323     */
324    struct agx_fragment_shader_word_0_packed fs_counts;
325 
326    /* Baked USC words to bind this linked shader */
327    struct {
328       uint8_t data[HK_MAX_LINKED_USC_SIZE];
329       size_t size;
330    } usc;
331 };
332 
333 struct hk_linked_shader *hk_fast_link(struct hk_device *dev, bool fragment,
334                                       struct hk_shader *main,
335                                       struct agx_shader_part *prolog,
336                                       struct agx_shader_part *epilog,
337                                       unsigned nr_samples_shaded);
338 
339 extern const struct vk_device_shader_ops hk_device_shader_ops;
340 
341 uint64_t
342 hk_physical_device_compiler_flags(const struct hk_physical_device *pdev);
343 
344 static inline nir_address_format
hk_buffer_addr_format(VkPipelineRobustnessBufferBehaviorEXT robustness)345 hk_buffer_addr_format(VkPipelineRobustnessBufferBehaviorEXT robustness)
346 {
347    switch (robustness) {
348    case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT:
349       return nir_address_format_64bit_global_32bit_offset;
350    case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT:
351    case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT:
352       return nir_address_format_64bit_bounded_global;
353    default:
354       unreachable("Invalid robust buffer access behavior");
355    }
356 }
357 
358 bool hk_lower_uvs_index(nir_shader *s, unsigned vs_uniform_base);
359 
360 bool
361 hk_nir_lower_descriptors(nir_shader *nir,
362                          const struct vk_pipeline_robustness_state *rs,
363                          uint32_t set_layout_count,
364                          struct vk_descriptor_set_layout *const *set_layouts);
365 void hk_lower_nir(struct hk_device *dev, nir_shader *nir,
366                   const struct vk_pipeline_robustness_state *rs,
367                   bool is_multiview, uint32_t set_layout_count,
368                   struct vk_descriptor_set_layout *const *set_layouts);
369 
370 VkResult hk_compile_shader(struct hk_device *dev,
371                            struct vk_shader_compile_info *info,
372                            const struct vk_graphics_pipeline_state *state,
373                            const VkAllocationCallbacks *pAllocator,
374                            struct hk_api_shader **shader_out);
375 
376 void hk_preprocess_nir_internal(struct vk_physical_device *vk_pdev,
377                                 nir_shader *nir);
378 
379 void hk_api_shader_destroy(struct vk_device *vk_dev,
380                            struct vk_shader *vk_shader,
381                            const VkAllocationCallbacks *pAllocator);
382 
383 const nir_shader_compiler_options *
384 hk_get_nir_options(struct vk_physical_device *vk_pdev, gl_shader_stage stage,
385                    UNUSED const struct vk_pipeline_robustness_state *rs);
386 
387 struct hk_api_shader *hk_meta_shader(struct hk_device *dev,
388                                      hk_internal_builder_t builder, void *data,
389                                      size_t data_size);
390 
391 struct hk_passthrough_gs_key {
392    /* Bit mask of outputs written by the VS/TES, to be passed through */
393    uint64_t outputs;
394 
395    /* Clip/cull sizes, implies clip/cull written in output */
396    uint8_t clip_distance_array_size;
397    uint8_t cull_distance_array_size;
398 
399    /* Transform feedback buffer strides */
400    uint8_t xfb_stride[MAX_XFB_BUFFERS];
401 
402    /* Decomposed primitive */
403    enum mesa_prim prim;
404 
405    /* Transform feedback info. Must add nir_xfb_info_size to get the key size */
406    nir_xfb_info xfb_info;
407 };
408 
409 void hk_nir_passthrough_gs(struct nir_builder *b, const void *key_);
410