• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Alyssa Rosenzweig
3  * Copyright 2019-2021 Collabora, Ltd.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #pragma once
8 
9 #include "asahi/compiler/agx_compile.h"
10 #include "asahi/genxml/agx_pack.h"
11 #include "asahi/layout/layout.h"
12 #include "asahi/lib/agx_bo.h"
13 #include "asahi/lib/agx_device.h"
14 #include "asahi/lib/agx_nir_lower_vbo.h"
15 #include "asahi/lib/agx_scratch.h"
16 #include "asahi/lib/agx_tilebuffer.h"
17 #include "asahi/lib/pool.h"
18 #include "asahi/lib/shaders/geometry.h"
19 #include "compiler/nir/nir_lower_blend.h"
20 #include "compiler/shader_enums.h"
21 #include "gallium/auxiliary/util/u_blitter.h"
22 #include "gallium/include/pipe/p_context.h"
23 #include "gallium/include/pipe/p_screen.h"
24 #include "gallium/include/pipe/p_state.h"
25 #include "pipe/p_defines.h"
26 #include "util/bitset.h"
27 #include "util/disk_cache.h"
28 #include "util/hash_table.h"
29 #include "util/u_range.h"
30 #include "agx_helpers.h"
31 #include "agx_meta.h"
32 
33 #ifdef __GLIBC__
34 #include <errno.h>
35 #define agx_msg(fmt, ...)                                                      \
36    fprintf(stderr, "[%s] " fmt, program_invocation_short_name, ##__VA_ARGS__)
37 #else
38 #define agx_msg(...) fprintf(stderr, __VA_ARGS__)
39 #endif
40 
41 #define AGX_NUM_TEXTURE_STATE_REGS 16
42 
43 struct agx_streamout_target {
44    struct pipe_stream_output_target base;
45    struct pipe_resource *offset;
46 
47    /* Current stride (bytes per vertex) */
48    uint32_t stride;
49 };
50 
51 static inline struct agx_streamout_target *
agx_so_target(struct pipe_stream_output_target * target)52 agx_so_target(struct pipe_stream_output_target *target)
53 {
54    return (struct agx_streamout_target *)target;
55 }
56 
57 struct agx_streamout {
58    struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
59    unsigned num_targets;
60 };
61 
62 /* Shaders can access fixed-function state through system values.
63  * It is convenient to stash all of this information into a single "root"
64  * descriptor, then push individual parts as needed.
65  *
66  * In the future, we could optimize this to reduce CPU overhead, e.g. splitting
67  * into multiple descriptors for finer dirty tracking. This is not ABI with the
68  * compiler. The layout is up to us and handled by our code lowering system
69  * values to uniforms.
70  */
71 enum agx_sysval_table {
72    AGX_SYSVAL_TABLE_ROOT,
73    AGX_SYSVAL_TABLE_PARAMS,
74    AGX_SYSVAL_TABLE_GRID,
75    AGX_SYSVAL_TABLE_VS,
76    AGX_SYSVAL_TABLE_TCS,
77    AGX_SYSVAL_TABLE_TES,
78    AGX_SYSVAL_TABLE_GS,
79    AGX_SYSVAL_TABLE_FS,
80    AGX_SYSVAL_TABLE_CS,
81    AGX_NUM_SYSVAL_TABLES
82 };
83 
84 #define AGX_SYSVAL_STAGE(stage) (AGX_SYSVAL_TABLE_VS + (stage))
85 
86 static_assert(AGX_SYSVAL_STAGE(PIPE_SHADER_VERTEX) == AGX_SYSVAL_TABLE_VS,
87               "fixed enum orderings");
88 static_assert(AGX_SYSVAL_STAGE(PIPE_SHADER_TESS_CTRL) == AGX_SYSVAL_TABLE_TCS,
89               "fixed enum orderings");
90 static_assert(AGX_SYSVAL_STAGE(PIPE_SHADER_TESS_EVAL) == AGX_SYSVAL_TABLE_TES,
91               "fixed enum orderings");
92 static_assert(AGX_SYSVAL_STAGE(PIPE_SHADER_GEOMETRY) == AGX_SYSVAL_TABLE_GS,
93               "fixed enum orderings");
94 static_assert(AGX_SYSVAL_STAGE(PIPE_SHADER_FRAGMENT) == AGX_SYSVAL_TABLE_FS,
95               "fixed enum orderings");
96 static_assert(AGX_SYSVAL_STAGE(PIPE_SHADER_COMPUTE) == AGX_SYSVAL_TABLE_CS,
97               "fixed enum orderings");
98 
99 /* Root system value table */
100 struct PACKED agx_draw_uniforms {
101    /* Pointers to the system value tables themselves (for indirection) */
102    uint64_t tables[AGX_NUM_SYSVAL_TABLES];
103 
104    /* Vertex buffer object bases, if present. If vertex robustness is disabled,
105     * attrib_base maps VBOs directly and attrib_max_index is undefined. If
106     * vertex robustness is enabled, attrib_base maps attributes and
107     * attrib_clamp is an inclusive clamp on vertex/divided instance indices.
108     */
109    uint64_t attrib_base[PIPE_MAX_ATTRIBS];
110    uint32_t attrib_clamp[PIPE_MAX_ATTRIBS];
111 
112    /* Addresses for the results of pipeline statistics queries */
113    uint64_t pipeline_statistics[PIPE_STAT_QUERY_MS_INVOCATIONS];
114 
115    /* Address of input assembly buffer if geom/tess is used, else 0 */
116    uint64_t input_assembly;
117 
118    /* Address of tessellation param buffer if tessellation is used, else 0 */
119    uint64_t tess_params;
120 
121    /* Address of geometry param buffer if geometry shaders are used, else 0 */
122    uint64_t geometry_params;
123 
124    /* Address of polygon stipple mask if used */
125    uint64_t polygon_stipple;
126 
127    /* Blend constant if any */
128    float blend_constant[4];
129 
130    /* glPointSize value */
131    float fixed_point_size;
132 
133    /* Value of the multisample control register, containing sample positions in
134     * each byte (x in low nibble, y in high nibble).
135     */
136    uint32_t ppp_multisamplectl;
137 
138    /* gl_DrawID for a direct multidraw */
139    uint32_t draw_id;
140 
141    /* Sprite coord replacement mask */
142    uint16_t sprite_mask;
143 
144    /* glSampleMask */
145    uint16_t sample_mask;
146 
147    /* Nonzero if the last vertex stage writes the layer ID, zero otherwise */
148    uint16_t layer_id_written;
149 
150    /* Nonzero for indexed draws, zero otherwise */
151    uint16_t is_indexed_draw;
152 
153    /* Zero for [0, 1] clipping, 0.5 for [-1, 1] clipping. */
154    uint16_t clip_z_coeff;
155 };
156 
157 struct PACKED agx_stage_uniforms {
158    /* Pointer to binding table for texture descriptor, or 0 if none. This must
159     * be first so that u0_u1 is always available for lowering binding
160     * tables to bindless access.
161     */
162    uint64_t texture_base;
163 
164    /* Uniform buffer objects */
165    uint64_t ubo_base[PIPE_MAX_CONSTANT_BUFFERS];
166    uint32_t ubo_size[PIPE_MAX_CONSTANT_BUFFERS];
167 
168    /* Shader storage buffer objects */
169    uint64_t ssbo_base[PIPE_MAX_SHADER_BUFFERS];
170    uint32_t ssbo_size[PIPE_MAX_SHADER_BUFFERS];
171 
172    /* If lowered to bindless, sampler index in the heap */
173    uint16_t sampler_handle[PIPE_MAX_SAMPLERS];
174 
175    /* LOD bias as float16 */
176    uint16_t lod_bias[PIPE_MAX_SAMPLERS];
177 };
178 
179 /* In the architecture, there are 512 uniform registers, each 16-bits. In a
180  * theoretical worst case, we could push to all of them. We use a worst-case
181  * maximum because the expression for a tight upper bound is too messy and easy
182  * to go out of sync with the code.
183  */
184 #define AGX_MAX_PUSH_RANGES (512)
185 
186 struct agx_push_range {
187    /* Base 16-bit uniform to push to */
188    uint16_t uniform;
189 
190    /* Offset into the table to push in bytes */
191    uint16_t offset;
192 
193    /* Which table to push from */
194    uint8_t table;
195 
196    /* Number of consecutive 16-bit uniforms to push */
197    uint8_t length;
198 };
199 
200 struct agx_compiled_shader {
201    /* Uncompiled shader that we belong to */
202    const struct agx_uncompiled_shader *so;
203 
204    /* Mapped executable memory */
205    struct agx_bo *bo;
206 
207    /* Metadata returned from the compiler */
208    struct agx_shader_info info;
209 
210    /* Uniforms the driver must push */
211    unsigned push_range_count;
212    struct agx_push_range push[AGX_MAX_PUSH_RANGES];
213 
214    /* Auxiliary programs, or NULL if not used */
215    struct agx_compiled_shader *gs_count, *pre_gs;
216    struct agx_compiled_shader *gs_copy;
217 
218    /* Output primitive mode for geometry shaders */
219    enum mesa_prim gs_output_mode;
220 
221    /* Number of words per primitive in the count buffer */
222    unsigned gs_count_words;
223 
224    /* Logical shader stage used for descriptor access. This may differ from the
225     * physical shader stage of the compiled shader, for example when executing a
226     * tessellation eval shader as a vertex shader.
227     */
228    enum pipe_shader_type stage;
229 };
230 
231 struct agx_uncompiled_shader {
232    struct pipe_shader_state base;
233    enum pipe_shader_type type;
234    struct blob early_serialized_nir;
235    struct blob serialized_nir;
236    uint8_t nir_sha1[20];
237    struct agx_uncompiled_shader_info info;
238    struct hash_table *variants;
239    struct agx_uncompiled_shader *passthrough_progs[MESA_PRIM_COUNT][3][2];
240    struct agx_uncompiled_shader *passthrough_tcs[32];
241 
242    uint32_t xfb_strides[4];
243    bool has_xfb_info;
244    bool is_xfb_passthrough;
245 
246    enum mesa_prim gs_mode;
247 
248    /* Whether the shader accesses indexed samplers via the bindless heap */
249    bool uses_bindless_samplers;
250 
251    /* Set on VS, passed to FS for linkage */
252    unsigned base_varying;
253 
254    /* Tessellation info */
255    struct {
256       uint64_t per_vertex_outputs;
257       uint32_t output_stride;
258       enum gl_tess_spacing spacing;
259       enum tess_primitive_mode primitive;
260       uint8_t output_patch_size;
261       uint8_t nr_patch_outputs;
262       bool ccw;
263       bool point_mode;
264    } tess;
265 };
266 
267 enum agx_stage_dirty {
268    AGX_STAGE_DIRTY_CONST = BITFIELD_BIT(0),
269    AGX_STAGE_DIRTY_SSBO = BITFIELD_BIT(1),
270    AGX_STAGE_DIRTY_IMAGE = BITFIELD_BIT(2),
271    AGX_STAGE_DIRTY_SAMPLER = BITFIELD_BIT(3),
272 };
273 
274 struct agx_stage {
275    struct agx_uncompiled_shader *shader;
276    uint32_t dirty;
277 
278    struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
279    uint32_t cb_mask;
280 
281    struct pipe_shader_buffer ssbo[PIPE_MAX_SHADER_BUFFERS];
282    uint32_t ssbo_writable_mask;
283    uint32_t ssbo_mask;
284 
285    struct pipe_image_view images[PIPE_MAX_SHADER_IMAGES];
286    uint32_t image_mask;
287 
288    /* Need full CSOs for u_blitter */
289    struct agx_sampler_state *samplers[PIPE_MAX_SAMPLERS];
290    struct agx_sampler_view *textures[PIPE_MAX_SHADER_SAMPLER_VIEWS];
291 
292    /* Does any bound sampler require custom border colours? */
293    bool custom_borders;
294 
295    unsigned sampler_count, texture_count;
296    uint32_t valid_samplers;
297 };
298 
299 union agx_batch_result {
300 };
301 
302 /* This is a firmware limit. It should be possible to raise to 2048 in the
303  * future... still not good enough for VK though :-(
304  */
305 #define AGX_SAMPLER_HEAP_SIZE (1024)
306 
307 struct agx_sampler_heap {
308    struct agx_bo *bo;
309    uint16_t count;
310 };
311 
312 uint16_t agx_sampler_heap_add(struct agx_device *dev,
313                               struct agx_sampler_heap *heap,
314                               struct agx_sampler_packed *sampler);
315 
316 struct agx_encoder {
317    struct agx_bo *bo;
318    uint8_t *current;
319    uint8_t *end;
320 };
321 
322 struct agx_batch {
323    struct agx_context *ctx;
324    struct pipe_framebuffer_state key;
325    uint64_t seqnum;
326    uint32_t syncobj;
327    uint32_t draws;
328 
329    struct agx_tilebuffer_layout tilebuffer_layout;
330 
331    /* PIPE_CLEAR_* bitmask */
332    uint32_t clear, draw, load, resolve;
333    bool initialized;
334 
335    uint64_t uploaded_clear_color[PIPE_MAX_COLOR_BUFS];
336    double clear_depth;
337    unsigned clear_stencil;
338 
339    /* Whether we're drawing points, lines, or triangles */
340    enum mesa_prim reduced_prim;
341 
342    /* Whether the bound FS needs a primitive ID that is not supplied by the
343     * bound hardware VS (software GS)
344     */
345    bool generate_primitive_id;
346 
347    /* Current varyings linkage structures */
348    uint32_t varyings;
349 
350    struct agx_draw_uniforms uniforms;
351    struct agx_stage_uniforms stage_uniforms[PIPE_SHADER_TYPES];
352 
353    /* Indirect buffer allocated for geometry shader */
354    uint64_t geom_indirect;
355    struct agx_bo *geom_indirect_bo;
356 
357    /* Geometry state buffer if geometry/etc shaders are used */
358    uint64_t geometry_state;
359 
360    /* Uploaded descriptors */
361    uint32_t texture_count[PIPE_SHADER_TYPES];
362 
363    uint64_t samplers[PIPE_SHADER_TYPES];
364    uint32_t sampler_count[PIPE_SHADER_TYPES];
365 
366    struct agx_sampler_heap sampler_heap;
367 
368    /* Resource list requirements, represented as a bit set indexed by BO
369     * handles (GEM handles on Linux, or IOGPU's equivalent on macOS)
370     */
371    struct {
372       BITSET_WORD *set;
373       unsigned bit_count;
374    } bo_list;
375 
376    /* If true, this batch contains a shader with a potentially incoherent write
377     * (e.g. image_write), needing a barrier later to access.
378     */
379    bool incoherent_writes;
380 
381    struct agx_pool pool, pipeline_pool;
382 
383    /* We may enqueue both CDM and VDM work, possibly to the same batch for
384     * geometry/tessellation.
385     */
386    struct agx_encoder vdm;
387    struct agx_encoder cdm;
388 
389    /* Scissor and depth-bias descriptors, uploaded at GPU time */
390    struct util_dynarray scissor, depth_bias;
391 
392    /* Arrays of GPU pointers that should be written with the batch timestamps */
393    struct util_dynarray timestamps;
394 
395    /* Result buffer where the kernel places command execution information */
396    union agx_batch_result *result;
397    size_t result_off;
398 
399    /* Actual pointer in a uniform */
400    struct agx_bo *geom_params_bo;
401 
402    /* Whether each stage uses scratch */
403    bool vs_scratch;
404    bool fs_scratch;
405    bool cs_scratch;
406 
407    /* Whether each stage has preambles using scratch, and if so which bucket.
408     * This just needs to be zero/nonzero for correctness, the magnitude in
409     * buckets is for statistics.
410     */
411    unsigned vs_preamble_scratch;
412    unsigned fs_preamble_scratch;
413    unsigned cs_preamble_scratch;
414 };
415 
416 struct agx_zsa {
417    struct pipe_depth_stencil_alpha_state base;
418    struct agx_fragment_face_packed depth;
419    struct agx_fragment_stencil_packed front_stencil, back_stencil;
420 
421    /* PIPE_CLEAR_* bitmask corresponding to this depth/stencil state */
422    uint32_t load, store;
423 };
424 
425 struct agx_blend_key {
426    nir_lower_blend_rt rt[8];
427    unsigned logicop_func;
428    bool alpha_to_coverage, alpha_to_one;
429 };
430 
431 struct agx_blend {
432    struct agx_blend_key key;
433 
434    /* PIPE_CLEAR_* bitmask corresponding to this blend state */
435    uint32_t store;
436 };
437 
438 /* These parts of the vertex element affect the generated code */
439 struct agx_velem_key {
440    uint32_t divisor;
441    uint16_t stride;
442    uint8_t format;
443    uint8_t pad;
444 };
445 
446 enum asahi_vs_next_stage {
447    ASAHI_VS_FS,
448    ASAHI_VS_GS,
449    ASAHI_VS_TCS,
450 };
451 
452 struct asahi_vs_shader_key {
453    struct agx_velem_key attribs[AGX_MAX_VBUFS];
454    enum asahi_vs_next_stage next_stage;
455 
456    union {
457       struct {
458          uint8_t index_size_B;
459       } gs;
460 
461       struct {
462          bool fixed_point_size;
463          uint64_t outputs_flat_shaded;
464          uint64_t outputs_linear_shaded;
465       } fs;
466    } next;
467 };
468 
469 struct agx_vertex_elements {
470    unsigned num_attribs;
471    struct agx_velem_key key[PIPE_MAX_ATTRIBS];
472 
473    /* These parts do not affect the generated code so are not in the key */
474    uint16_t src_offsets[PIPE_MAX_ATTRIBS];
475    uint16_t buffers[PIPE_MAX_ATTRIBS];
476 };
477 
478 struct asahi_fs_shader_key {
479    struct agx_blend_key blend;
480 
481    /* Need to count FRAGMENT_SHADER_INVOCATIONS */
482    bool statistics;
483 
484    /* Set if glSampleMask() is used with a mask other than all-1s. If not, we
485     * don't want to emit lowering code for it, since it would disable early-Z.
486     */
487    bool api_sample_mask;
488    bool polygon_stipple;
489 
490    uint8_t cull_distance_size;
491    uint8_t nr_samples;
492    enum pipe_format rt_formats[PIPE_MAX_COLOR_BUFS];
493 };
494 
495 struct asahi_tcs_shader_key {
496    /* Input assembly key. Simplified because we know we're operating on patches.
497     */
498    uint8_t index_size_B;
499 
500    /* Vertex shader key */
501    struct agx_velem_key attribs[AGX_MAX_VBUFS];
502 
503    /* Tessellation control shaders must be linked with a vertex shader. */
504    uint8_t input_nir_sha1[20];
505 };
506 
507 struct asahi_gs_shader_key {
508    /* Rasterizer shader key */
509    uint64_t outputs_flat_shaded;
510    uint64_t outputs_linear_shaded;
511    bool fixed_point_size;
512 
513    /* If true, this GS is run only for its side effects (including XFB) */
514    bool rasterizer_discard;
515    bool padding[6];
516 };
517 static_assert(sizeof(struct asahi_gs_shader_key) == 24, "no holes");
518 
519 union asahi_shader_key {
520    struct asahi_vs_shader_key vs;
521    struct asahi_tcs_shader_key tcs;
522    struct asahi_gs_shader_key gs;
523    struct asahi_fs_shader_key fs;
524 };
525 
526 enum agx_dirty {
527    AGX_DIRTY_VERTEX = BITFIELD_BIT(0),
528    AGX_DIRTY_VIEWPORT = BITFIELD_BIT(1),
529    AGX_DIRTY_SCISSOR_ZBIAS = BITFIELD_BIT(2),
530    AGX_DIRTY_ZS = BITFIELD_BIT(3),
531    AGX_DIRTY_STENCIL_REF = BITFIELD_BIT(4),
532    AGX_DIRTY_RS = BITFIELD_BIT(5),
533    AGX_DIRTY_SPRITE_COORD_MODE = BITFIELD_BIT(6),
534    AGX_DIRTY_PRIM = BITFIELD_BIT(7),
535 
536    /* Vertex/fragment pipelines, including uniforms and textures */
537    AGX_DIRTY_VS = BITFIELD_BIT(8),
538    AGX_DIRTY_FS = BITFIELD_BIT(9),
539 
540    /* Just the progs themselves */
541    AGX_DIRTY_VS_PROG = BITFIELD_BIT(10),
542    AGX_DIRTY_FS_PROG = BITFIELD_BIT(11),
543 
544    AGX_DIRTY_BLEND = BITFIELD_BIT(12),
545    AGX_DIRTY_QUERY = BITFIELD_BIT(13),
546    AGX_DIRTY_XFB = BITFIELD_BIT(14),
547    AGX_DIRTY_SAMPLE_MASK = BITFIELD_BIT(15),
548    AGX_DIRTY_BLEND_COLOR = BITFIELD_BIT(16),
549    AGX_DIRTY_POLY_STIPPLE = BITFIELD_BIT(17),
550 };
551 
552 /* Maximum number of in-progress + under-construction GPU batches.
553  * Must be large enough for silly workloads that do things like
554  * glGenerateMipmap on every frame, otherwise we end up losing performance.
555  */
556 #define AGX_MAX_BATCHES (128)
557 
558 static_assert(PIPE_TEX_FILTER_NEAREST < 2, "known order");
559 static_assert(PIPE_TEX_FILTER_LINEAR < 2, "known order");
560 
561 enum asahi_blit_clamp {
562    ASAHI_BLIT_CLAMP_NONE,
563    ASAHI_BLIT_CLAMP_UINT_TO_SINT,
564    ASAHI_BLIT_CLAMP_SINT_TO_UINT,
565 
566    /* keep last */
567    ASAHI_BLIT_CLAMP_COUNT,
568 };
569 
570 struct asahi_blitter {
571    bool active;
572 
573    /* [clamp_type][is_array] */
574    void *blit_cs[ASAHI_BLIT_CLAMP_COUNT][2];
575 
576    /* [filter] */
577    void *sampler[2];
578 
579    struct pipe_constant_buffer saved_cb;
580 
581    bool has_saved_image;
582    struct pipe_image_view saved_image;
583 
584    unsigned saved_num_sampler_states;
585    void *saved_sampler_states[PIPE_MAX_SAMPLERS];
586 
587    struct pipe_sampler_view *saved_sampler_view;
588 
589    void *saved_cs;
590 };
591 
592 struct agx_oq_heap;
593 
594 struct agx_context {
595    struct pipe_context base;
596    struct agx_compiled_shader *vs, *fs, *gs, *tcs, *tes;
597    uint32_t dirty;
598 
599    /* Heap for dynamic memory allocation for geometry/tessellation shaders */
600    struct pipe_resource *heap;
601 
602    /* Occlusion query heap */
603    struct agx_oq_heap *oq;
604 
605    /* Acts as a context-level shader key */
606    bool support_lod_bias;
607    bool robust;
608 
609    /* Set of batches. When full, the LRU entry (the batch with the smallest
610     * seqnum) is flushed to free a slot.
611     */
612    struct {
613       uint64_t seqnum;
614       struct agx_batch slots[AGX_MAX_BATCHES];
615 
616       /** Set of active batches for faster traversal */
617       BITSET_DECLARE(active, AGX_MAX_BATCHES);
618 
619       /** Set of submitted batches for faster traversal */
620       BITSET_DECLARE(submitted, AGX_MAX_BATCHES);
621 
622       /* Monotonic counter for each batch incremented when resetting a batch to
623        * invalidate all associated queries. Compared to
624        * agx_query::writer_generation.
625        */
626       uint64_t generation[AGX_MAX_BATCHES];
627    } batches;
628 
629    struct agx_batch *batch;
630    struct agx_bo *result_buf;
631 
632    struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
633    uint32_t vb_mask;
634 
635    unsigned patch_vertices;
636    float default_outer_level[4];
637    float default_inner_level[2];
638 
639    struct agx_stage stage[PIPE_SHADER_TYPES];
640    struct agx_vertex_elements *attributes;
641    struct agx_rasterizer *rast;
642    struct agx_zsa *zs;
643    struct agx_blend *blend;
644    struct pipe_blend_color blend_color;
645    struct pipe_viewport_state viewport[AGX_MAX_VIEWPORTS];
646    struct pipe_scissor_state scissor[AGX_MAX_VIEWPORTS];
647    struct pipe_stencil_ref stencil_ref;
648    struct agx_streamout streamout;
649    uint16_t sample_mask;
650    struct pipe_framebuffer_state framebuffer;
651 
652    uint32_t poly_stipple[32];
653 
654    struct pipe_query *cond_query;
655    bool cond_cond;
656    enum pipe_render_cond_flag cond_mode;
657 
658    struct agx_query *occlusion_query;
659    struct agx_query *prims_generated[4];
660    struct agx_query *tf_prims_generated[4];
661    struct agx_query *tf_overflow[4];
662    struct agx_query *tf_any_overflow;
663    struct agx_query *pipeline_statistics[PIPE_STAT_QUERY_TS_INVOCATIONS];
664    struct agx_query *time_elapsed;
665    bool active_queries;
666    bool active_draw_without_restart;
667 
668    struct util_debug_callback debug;
669    bool is_noop;
670 
671    bool in_tess;
672 
673    struct blitter_context *blitter;
674    struct asahi_blitter compute_blitter;
675 
676    /* Map of GEM handle to (batch index + 1) that (conservatively) writes that
677     * BO, or 0 if no writer.
678     */
679    struct util_dynarray writer;
680 
681    /* Bound CL global buffers */
682    struct util_dynarray global_buffers;
683 
684    struct hash_table *generic_meta;
685    struct agx_meta_cache meta;
686 
687    bool any_faults;
688 
689    uint32_t syncobj;
690    uint32_t dummy_syncobj;
691    int in_sync_fd;
692    uint32_t in_sync_obj;
693 
694    struct agx_scratch scratch_vs;
695    struct agx_scratch scratch_fs;
696    struct agx_scratch scratch_cs;
697 };
698 
699 static inline unsigned
agx_batch_idx(struct agx_batch * batch)700 agx_batch_idx(struct agx_batch *batch)
701 {
702    return batch - batch->ctx->batches.slots;
703 }
704 
705 static void
agx_writer_add(struct agx_context * ctx,uint8_t batch_index,unsigned handle)706 agx_writer_add(struct agx_context *ctx, uint8_t batch_index, unsigned handle)
707 {
708    assert(batch_index < AGX_MAX_BATCHES && "invariant");
709    static_assert(AGX_MAX_BATCHES < 0xFF, "no overflow on addition");
710 
711    /* If we need to grow, double the capacity so insertion is amortized O(1). */
712    if (unlikely(handle >= ctx->writer.size)) {
713       unsigned new_size =
714          MAX2(ctx->writer.capacity * 2, util_next_power_of_two(handle + 1));
715       unsigned grow = new_size - ctx->writer.size;
716 
717       memset(util_dynarray_grow(&ctx->writer, uint8_t, grow), 0,
718              grow * sizeof(uint8_t));
719    }
720 
721    /* There is now room */
722    uint8_t *value = util_dynarray_element(&ctx->writer, uint8_t, handle);
723    assert((*value) == 0 && "there should be no existing writer");
724    *value = batch_index + 1;
725 }
726 
727 static struct agx_batch *
agx_writer_get(struct agx_context * ctx,unsigned handle)728 agx_writer_get(struct agx_context *ctx, unsigned handle)
729 {
730    if (handle >= ctx->writer.size)
731       return NULL;
732 
733    uint8_t value = *util_dynarray_element(&ctx->writer, uint8_t, handle);
734 
735    if (value > 0)
736       return &ctx->batches.slots[value - 1];
737    else
738       return NULL;
739 }
740 
741 static void
agx_writer_remove(struct agx_context * ctx,unsigned handle)742 agx_writer_remove(struct agx_context *ctx, unsigned handle)
743 {
744    if (handle >= ctx->writer.size)
745       return;
746 
747    uint8_t *value = util_dynarray_element(&ctx->writer, uint8_t, handle);
748    *value = 0;
749 }
750 
751 static inline struct agx_context *
agx_context(struct pipe_context * pctx)752 agx_context(struct pipe_context *pctx)
753 {
754    return (struct agx_context *)pctx;
755 }
756 
757 void agx_launch(struct agx_batch *batch, const struct pipe_grid_info *info,
758                 struct agx_compiled_shader *cs, enum pipe_shader_type stage);
759 
760 void agx_init_query_functions(struct pipe_context *ctx);
761 
762 void
763 agx_primitives_update_direct(struct agx_context *ctx,
764                              const struct pipe_draw_info *info,
765                              const struct pipe_draw_start_count_bias *draw);
766 
767 void agx_draw_vbo_from_xfb(struct pipe_context *pctx,
768                            const struct pipe_draw_info *info,
769                            unsigned drawid_offset,
770                            const struct pipe_draw_indirect_info *indirect);
771 
772 uint64_t agx_batch_get_so_address(struct agx_batch *batch, unsigned buffer,
773                                   uint32_t *size);
774 
775 void agx_init_streamout_functions(struct pipe_context *ctx);
776 
777 static inline void
agx_dirty_all(struct agx_context * ctx)778 agx_dirty_all(struct agx_context *ctx)
779 {
780    ctx->dirty = ~0;
781 
782    for (unsigned i = 0; i < ARRAY_SIZE(ctx->stage); ++i)
783       ctx->stage[i].dirty = ~0;
784 }
785 
786 static inline void
agx_dirty_reset_graphics(struct agx_context * ctx)787 agx_dirty_reset_graphics(struct agx_context *ctx)
788 {
789    ctx->dirty = 0;
790 
791    for (unsigned i = 0; i < ARRAY_SIZE(ctx->stage); ++i) {
792       if (i != PIPE_SHADER_COMPUTE)
793          ctx->stage[i].dirty = 0;
794    }
795 }
796 
797 struct agx_rasterizer {
798    struct pipe_rasterizer_state base;
799    uint8_t cull[AGX_CULL_LENGTH];
800    uint8_t line_width;
801    uint8_t polygon_mode;
802 };
803 
804 struct agx_query {
805    unsigned type;
806    unsigned index;
807 
808    uint64_t writer_generation[AGX_MAX_BATCHES];
809    struct agx_bo *bo;
810    struct agx_ptr ptr;
811 };
812 
813 struct agx_sampler_state {
814    struct pipe_sampler_state base;
815 
816    /* Prepared descriptor */
817    struct agx_sampler_packed desc, desc_without_custom_border;
818 
819    /* Whether a custom border colour is required */
820    bool uses_custom_border;
821 
822    /* Packed custom border colour, or zero if none is required */
823    struct agx_border_packed border;
824 
825    /* LOD bias packed as fp16, the form we'll pass to the shader */
826    uint16_t lod_bias_as_fp16;
827 };
828 
829 struct agx_sampler_view {
830    struct pipe_sampler_view base;
831 
832    /* Resource/format, may differ from base in case of separate stencil */
833    struct agx_resource *rsrc;
834    enum pipe_format format;
835 
836    /* Prepared descriptor */
837    struct agx_texture_packed desc;
838 };
839 
840 struct agx_screen {
841    struct pipe_screen pscreen;
842    struct agx_device dev;
843    struct disk_cache *disk_cache;
844    /* Queue handle */
845    uint32_t queue_id;
846 };
847 
848 static inline struct agx_screen *
agx_screen(struct pipe_screen * p)849 agx_screen(struct pipe_screen *p)
850 {
851    return (struct agx_screen *)p;
852 }
853 
854 static inline struct agx_device *
agx_device(struct pipe_screen * p)855 agx_device(struct pipe_screen *p)
856 {
857    return &(agx_screen(p)->dev);
858 }
859 
860 #define perf_debug(dev, ...)                                                   \
861    do {                                                                        \
862       if (unlikely((dev)->debug & AGX_DBG_PERF))                               \
863          mesa_logw(__VA_ARGS__);                                               \
864    } while (0)
865 
866 #define perf_debug_ctx(ctx, ...)                                               \
867    perf_debug(agx_device((ctx)->base.screen), __VA_ARGS__)
868 
869 struct agx_resource {
870    struct pipe_resource base;
871    uint64_t modifier;
872 
873    /* Should probably be part of the modifier. Affects the tiling algorithm, or
874     * something like that.
875     */
876    bool mipmapped;
877 
878    /* Hardware backing */
879    struct agx_bo *bo;
880 
881    struct renderonly_scanout *scanout;
882 
883    BITSET_DECLARE(data_valid, PIPE_MAX_TEXTURE_LEVELS);
884 
885    struct ail_layout layout;
886 
887    /* Metal does not support packed depth/stencil formats; presumably AGX does
888     * not either. Instead, we create separate depth and stencil resources,
889     * managed by u_transfer_helper.  We provide the illusion of packed
890     * resources.
891     */
892    struct agx_resource *separate_stencil;
893 
894    /* Valid buffer range tracking, to optimize buffer appends */
895    struct util_range valid_buffer_range;
896 
897    /* Cumulative shadowed byte count for this resource, that is, the number of
898     * times multiplied by the resource size.
899     */
900    size_t shadowed_bytes;
901 };
902 
903 static inline struct agx_resource *
agx_resource(struct pipe_resource * pctx)904 agx_resource(struct pipe_resource *pctx)
905 {
906    return (struct agx_resource *)pctx;
907 }
908 
909 static inline bool
agx_resource_valid(struct agx_resource * rsrc,int level)910 agx_resource_valid(struct agx_resource *rsrc, int level)
911 {
912    /* Shared BOs can always be potentially valid */
913    if (rsrc->bo && rsrc->bo->flags & AGX_BO_SHARED) {
914       assert(level == 0);
915       return true;
916    }
917 
918    return BITSET_TEST(rsrc->data_valid, level);
919 }
920 
921 static inline void *
agx_map_texture_cpu(struct agx_resource * rsrc,unsigned level,unsigned z)922 agx_map_texture_cpu(struct agx_resource *rsrc, unsigned level, unsigned z)
923 {
924    return ((uint8_t *)rsrc->bo->ptr.cpu) +
925           ail_get_layer_level_B(&rsrc->layout, z, level);
926 }
927 
928 static inline uint64_t
agx_map_texture_gpu(struct agx_resource * rsrc,unsigned z)929 agx_map_texture_gpu(struct agx_resource *rsrc, unsigned z)
930 {
931    return rsrc->bo->ptr.gpu +
932           (uint64_t)ail_get_layer_offset_B(&rsrc->layout, z);
933 }
934 
935 void agx_decompress(struct agx_context *ctx, struct agx_resource *rsrc,
936                     const char *reason);
937 
938 void agx_legalize_compression(struct agx_context *ctx,
939                               struct agx_resource *rsrc,
940                               enum pipe_format format);
941 
942 struct agx_transfer {
943    struct pipe_transfer base;
944    void *map;
945    struct {
946       struct pipe_resource *rsrc;
947       struct pipe_box box;
948    } staging;
949 };
950 
951 static inline struct agx_transfer *
agx_transfer(struct pipe_transfer * p)952 agx_transfer(struct pipe_transfer *p)
953 {
954    return (struct agx_transfer *)p;
955 }
956 
957 void agx_upload_vbos(struct agx_batch *batch);
958 void agx_upload_uniforms(struct agx_batch *batch);
959 
960 void agx_set_sampler_uniforms(struct agx_batch *batch,
961                               enum pipe_shader_type stage);
962 
963 void agx_set_cbuf_uniforms(struct agx_batch *batch,
964                            enum pipe_shader_type stage);
965 
966 void agx_set_ssbo_uniforms(struct agx_batch *batch,
967                            enum pipe_shader_type stage);
968 
969 bool agx_nir_lower_point_size(nir_shader *nir, bool fixed_point_size);
970 
971 bool agx_nir_lower_sysvals(nir_shader *shader, enum pipe_shader_type desc_stage,
972                            bool lower_draw_params);
973 
974 bool agx_nir_layout_uniforms(nir_shader *shader,
975                              struct agx_compiled_shader *compiled,
976                              unsigned *push_size);
977 
978 bool agx_nir_lower_bindings(nir_shader *shader, bool *uses_bindless_samplers);
979 
980 bool agx_batch_is_active(struct agx_batch *batch);
981 bool agx_batch_is_submitted(struct agx_batch *batch);
982 
983 /* Add a BO to a batch. This needs to be amortized O(1) since it's called in
984  * hot paths. To achieve this we model BO lists by bit sets */
985 
986 static bool
agx_batch_uses_bo(struct agx_batch * batch,struct agx_bo * bo)987 agx_batch_uses_bo(struct agx_batch *batch, struct agx_bo *bo)
988 {
989    if (bo->handle < batch->bo_list.bit_count)
990       return BITSET_TEST(batch->bo_list.set, bo->handle);
991    else
992       return false;
993 }
994 
995 static inline void
agx_batch_add_bo(struct agx_batch * batch,struct agx_bo * bo)996 agx_batch_add_bo(struct agx_batch *batch, struct agx_bo *bo)
997 {
998    /* Double the size of the BO list if we run out, this is amortized O(1) */
999    if (unlikely(bo->handle > batch->bo_list.bit_count)) {
1000       const unsigned bits_per_word = sizeof(BITSET_WORD) * 8;
1001 
1002       unsigned bit_count =
1003          MAX2(batch->bo_list.bit_count * 2,
1004               util_next_power_of_two(ALIGN_POT(bo->handle + 1, bits_per_word)));
1005 
1006       batch->bo_list.set = rerzalloc(
1007          batch->ctx, batch->bo_list.set, BITSET_WORD,
1008          batch->bo_list.bit_count / bits_per_word, bit_count / bits_per_word);
1009       batch->bo_list.bit_count = bit_count;
1010    }
1011 
1012    if (BITSET_TEST(batch->bo_list.set, bo->handle))
1013       return;
1014 
1015    /* The batch holds a single reference to each BO in the batch, released when
1016     * the batch finishes execution.
1017     */
1018    agx_bo_reference(bo);
1019    BITSET_SET(batch->bo_list.set, bo->handle);
1020 }
1021 
1022 #define AGX_BATCH_FOREACH_BO_HANDLE(batch, handle)                             \
1023    BITSET_FOREACH_SET(handle, (batch)->bo_list.set, batch->bo_list.bit_count)
1024 
1025 void agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
1026                       uint32_t barriers, enum drm_asahi_cmd_type cmd_type,
1027                       void *cmdbuf);
1028 
1029 void agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch);
1030 void agx_flush_batch_for_reason(struct agx_context *ctx,
1031                                 struct agx_batch *batch, const char *reason);
1032 void agx_flush_all(struct agx_context *ctx, const char *reason);
1033 void agx_flush_readers(struct agx_context *ctx, struct agx_resource *rsrc,
1034                        const char *reason);
1035 void agx_flush_writer(struct agx_context *ctx, struct agx_resource *rsrc,
1036                       const char *reason);
1037 
1038 void agx_sync_writer(struct agx_context *ctx, struct agx_resource *rsrc,
1039                      const char *reason);
1040 void agx_sync_readers(struct agx_context *ctx, struct agx_resource *rsrc,
1041                       const char *reason);
1042 void agx_sync_batch(struct agx_context *ctx, struct agx_batch *batch);
1043 void agx_sync_all(struct agx_context *ctx, const char *reason);
1044 void agx_sync_batch_for_reason(struct agx_context *ctx, struct agx_batch *batch,
1045                                const char *reason);
1046 void agx_memory_barrier(struct pipe_context *pctx, unsigned flags);
1047 
1048 /* Use these instead of batch_add_bo for proper resource tracking */
1049 void agx_batch_reads(struct agx_batch *batch, struct agx_resource *rsrc);
1050 void agx_batch_writes(struct agx_batch *batch, struct agx_resource *rsrc,
1051                       unsigned level);
1052 void agx_batch_writes_range(struct agx_batch *batch, struct agx_resource *rsrc,
1053                             unsigned offset, unsigned size);
1054 void agx_batch_track_image(struct agx_batch *batch,
1055                            struct pipe_image_view *image);
1056 
1057 bool agx_any_batch_uses_resource(struct agx_context *ctx,
1058                                  struct agx_resource *rsrc);
1059 
1060 /* 16384 is the maximum framebuffer dimension, so we use a larger width (the
1061  * maximum uint16_t) as a sentinel to identify the compute batch. This ensures
1062  * compute batches don't mix with graphics. This is a bit of a hack but it
1063  * works.
1064  */
1065 #define AGX_COMPUTE_BATCH_WIDTH 0xFFFF
1066 
1067 static inline bool
agx_batch_is_compute(struct agx_batch * batch)1068 agx_batch_is_compute(struct agx_batch *batch)
1069 {
1070    return batch->key.width == AGX_COMPUTE_BATCH_WIDTH;
1071 }
1072 
1073 struct agx_batch *agx_get_batch(struct agx_context *ctx);
1074 struct agx_batch *agx_get_compute_batch(struct agx_context *ctx);
1075 void agx_batch_reset(struct agx_context *ctx, struct agx_batch *batch);
1076 int agx_cleanup_batches(struct agx_context *ctx);
1077 
1078 void agx_batch_add_timestamp_query(struct agx_batch *batch,
1079                                    struct agx_query *q);
1080 void agx_add_timestamp_end_query(struct agx_context *ctx, struct agx_query *q);
1081 
1082 void agx_query_increment_cpu(struct agx_context *ctx, struct agx_query *query,
1083                              uint64_t increment);
1084 
1085 /* Blit shaders */
1086 void agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
1087                       bool render_cond);
1088 
1089 void agx_blit(struct pipe_context *pipe, const struct pipe_blit_info *info);
1090 
1091 void agx_resource_copy_region(struct pipe_context *pctx,
1092                               struct pipe_resource *dst, unsigned dst_level,
1093                               unsigned dstx, unsigned dsty, unsigned dstz,
1094                               struct pipe_resource *src, unsigned src_level,
1095                               const struct pipe_box *src_box);
1096 
1097 /* Batch logic */
1098 
1099 struct agx_encoder agx_encoder_allocate(struct agx_batch *batch,
1100                                         struct agx_device *dev);
1101 
1102 void agx_batch_init_state(struct agx_batch *batch);
1103 
1104 uint64_t agx_build_meta(struct agx_batch *batch, bool store,
1105                         bool partial_render);
1106 
1107 /* Query management */
1108 uint16_t agx_get_oq_index(struct agx_batch *batch, struct agx_query *query);
1109 uint64_t agx_get_query_address(struct agx_batch *batch,
1110                                struct agx_query *query);
1111 uint64_t agx_get_occlusion_heap(struct agx_batch *batch);
1112 
1113 void agx_finish_batch_queries(struct agx_batch *batch, uint64_t begin_ts,
1114                               uint64_t end_ts);
1115 
1116 bool agx_render_condition_check_inner(struct agx_context *ctx);
1117 
1118 static inline bool
agx_render_condition_check(struct agx_context * ctx)1119 agx_render_condition_check(struct agx_context *ctx)
1120 {
1121    if (likely(!ctx->cond_query))
1122       return true;
1123    else
1124       return agx_render_condition_check_inner(ctx);
1125 }
1126 
1127 /* Texel buffers lowered to (at most) 1024x16384 2D textures */
1128 #define AGX_TEXTURE_BUFFER_WIDTH      1024
1129 #define AGX_TEXTURE_BUFFER_MAX_HEIGHT 16384
1130 #define AGX_TEXTURE_BUFFER_MAX_SIZE                                            \
1131    (AGX_TEXTURE_BUFFER_WIDTH * AGX_TEXTURE_BUFFER_MAX_HEIGHT)
1132 
1133 static inline uint32_t
agx_texture_buffer_size_el(enum pipe_format format,uint32_t size)1134 agx_texture_buffer_size_el(enum pipe_format format, uint32_t size)
1135 {
1136    unsigned blocksize = util_format_get_blocksize(format);
1137 
1138    return MIN2(AGX_TEXTURE_BUFFER_MAX_SIZE, size / blocksize);
1139 }
1140 
1141 typedef void (*meta_shader_builder_t)(struct nir_builder *b, const void *key);
1142 
1143 void agx_init_meta_shaders(struct agx_context *ctx);
1144 
1145 void agx_destroy_meta_shaders(struct agx_context *ctx);
1146