1 /*
2  * Copyright 2023 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #pragma once
7 
8 #include <stdbool.h>
9 #include "asahi/compiler/agx_compile.h"
10 #include "asahi/layout/layout.h"
11 #include "agx_pack.h"
12 #include "agx_ppp.h"
13 #include "libagx_shaders.h"
14 
15 #define AGX_MAX_OCCLUSION_QUERIES (32768)
16 #define AGX_MAX_VIEWPORTS         (16)
17 
18 static inline enum agx_sampler_states
agx_translate_sampler_state_count(unsigned count,bool extended)19 agx_translate_sampler_state_count(unsigned count, bool extended)
20 {
21    assert(count <= 17 && "max 17 sampler state registers supported");
22 
23    if (count == 0) {
24       return AGX_SAMPLER_STATES_0;
25    } else if (extended) {
26       if (count <= 8)
27          return AGX_SAMPLER_STATES_8_EXTENDED;
28       else
29          return AGX_SAMPLER_STATES_16_EXTENDED;
30    } else {
31       if (count <= 4)
32          return AGX_SAMPLER_STATES_4_COMPACT;
33       else if (count <= 8)
34          return AGX_SAMPLER_STATES_8_COMPACT;
35       else if (count <= 12)
36          return AGX_SAMPLER_STATES_12_COMPACT;
37       else
38          return AGX_SAMPLER_STATES_16_COMPACT;
39    }
40 }
41 
42 static void
agx_pack_txf_sampler(struct agx_sampler_packed * out)43 agx_pack_txf_sampler(struct agx_sampler_packed *out)
44 {
45    agx_pack(out, SAMPLER, cfg) {
46       /* Allow mipmapping. This is respected by txf, weirdly. */
47       cfg.minimum_lod = 0.0;
48       cfg.maximum_lod = INFINITY;
49       cfg.mip_filter = AGX_MIP_FILTER_NEAREST;
50 
51       /* Out-of-bounds reads must return 0 */
52       cfg.wrap_s = AGX_WRAP_CLAMP_TO_BORDER;
53       cfg.wrap_t = AGX_WRAP_CLAMP_TO_BORDER;
54       cfg.wrap_r = AGX_WRAP_CLAMP_TO_BORDER;
55       cfg.border_colour = AGX_BORDER_COLOUR_TRANSPARENT_BLACK;
56    }
57 }
58 
59 /* Channels agree for RGBA but are weird for force 0/1 */
60 
61 static inline enum agx_channel
agx_channel_from_pipe(enum pipe_swizzle in)62 agx_channel_from_pipe(enum pipe_swizzle in)
63 {
64    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_X == AGX_CHANNEL_R);
65    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_Y == AGX_CHANNEL_G);
66    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_Z == AGX_CHANNEL_B);
67    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_W == AGX_CHANNEL_A);
68    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_0 & 0x4);
69    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_1 & 0x4);
70    STATIC_ASSERT((enum agx_channel)PIPE_SWIZZLE_NONE & 0x4);
71 
72    if ((in & 0x4) == 0)
73       return (enum agx_channel)in;
74    else if (in == PIPE_SWIZZLE_1)
75       return AGX_CHANNEL_1;
76    else
77       return AGX_CHANNEL_0;
78 }
79 
80 static inline enum agx_layout
agx_translate_layout(enum ail_tiling tiling)81 agx_translate_layout(enum ail_tiling tiling)
82 {
83    switch (tiling) {
84    case AIL_TILING_TWIDDLED:
85    case AIL_TILING_TWIDDLED_COMPRESSED:
86       return AGX_LAYOUT_TWIDDLED;
87    case AIL_TILING_LINEAR:
88       return AGX_LAYOUT_LINEAR;
89    }
90 
91    unreachable("Invalid tiling");
92 }
93 
94 static enum agx_sample_count
agx_translate_sample_count(unsigned samples)95 agx_translate_sample_count(unsigned samples)
96 {
97    switch (samples) {
98    case 2:
99       return AGX_SAMPLE_COUNT_2;
100    case 4:
101       return AGX_SAMPLE_COUNT_4;
102    default:
103       unreachable("Invalid sample count");
104    }
105 }
106 
107 static enum agx_conservative_depth
agx_translate_depth_layout(enum gl_frag_depth_layout layout)108 agx_translate_depth_layout(enum gl_frag_depth_layout layout)
109 {
110    switch (layout) {
111    case FRAG_DEPTH_LAYOUT_ANY:
112       return AGX_CONSERVATIVE_DEPTH_ANY;
113    case FRAG_DEPTH_LAYOUT_LESS:
114       return AGX_CONSERVATIVE_DEPTH_LESS;
115    case FRAG_DEPTH_LAYOUT_GREATER:
116       return AGX_CONSERVATIVE_DEPTH_GREATER;
117    case FRAG_DEPTH_LAYOUT_UNCHANGED:
118       return AGX_CONSERVATIVE_DEPTH_UNCHANGED;
119    default:
120       unreachable("depth layout should have been canonicalized");
121    }
122 }
123 
124 static void
agx_pack_fragment_face_2(struct agx_fragment_face_2_packed * out,enum agx_object_type object_type,struct agx_shader_info * info)125 agx_pack_fragment_face_2(struct agx_fragment_face_2_packed *out,
126                          enum agx_object_type object_type,
127                          struct agx_shader_info *info)
128 {
129    agx_pack(out, FRAGMENT_FACE_2, cfg) {
130       /* These act like disables, ANDed in the hardware. Setting them like this
131        * means the draw-time flag is used.
132        */
133       cfg.disable_depth_write = true;
134       cfg.depth_function = AGX_ZS_FUNC_ALWAYS;
135 
136       cfg.object_type = object_type;
137       cfg.conservative_depth =
138          info ? agx_translate_depth_layout(info->depth_layout)
139               : AGX_CONSERVATIVE_DEPTH_UNCHANGED;
140    }
141 }
142 
143 static void
agx_ppp_fragment_face_2(struct agx_ppp_update * ppp,enum agx_object_type object_type,struct agx_shader_info * info)144 agx_ppp_fragment_face_2(struct agx_ppp_update *ppp,
145                         enum agx_object_type object_type,
146                         struct agx_shader_info *info)
147 {
148    struct agx_fragment_face_2_packed packed;
149    agx_pack_fragment_face_2(&packed, object_type, info);
150    agx_ppp_push_packed(ppp, &packed, FRAGMENT_FACE_2);
151 }
152 
153 static inline uint32_t
agx_pack_line_width(float line_width)154 agx_pack_line_width(float line_width)
155 {
156    /* Line width is packed in a 4:4 fixed point format */
157    unsigned line_width_fixed = ((unsigned)(line_width * 16.0f)) - 1;
158 
159    /* Clamp to maximum line width */
160    return MIN2(line_width_fixed, 0xFF);
161 }
162 
163 /*
164  * Despite having both a layout *and* a flag that I only see Metal use with null
165  * textures, AGX doesn't seem to have "real" null textures. Instead we need to
166  * bind an arbitrary address and throw away the results to read all 0's.
167  * Accordingly, the caller must pass some address that lives at least as long as
168  * the texture descriptor itself.
169  */
170 static void
agx_set_null_texture(struct agx_texture_packed * tex,uint64_t valid_address)171 agx_set_null_texture(struct agx_texture_packed *tex, uint64_t valid_address)
172 {
173    agx_pack(tex, TEXTURE, cfg) {
174       cfg.layout = AGX_LAYOUT_NULL;
175       cfg.channels = AGX_CHANNELS_R8;
176       cfg.type = AGX_TEXTURE_TYPE_UNORM /* don't care */;
177       cfg.swizzle_r = AGX_CHANNEL_0;
178       cfg.swizzle_g = AGX_CHANNEL_0;
179       cfg.swizzle_b = AGX_CHANNEL_0;
180       cfg.swizzle_a = AGX_CHANNEL_0;
181       cfg.address = valid_address;
182       cfg.null = true;
183    }
184 }
185 
186 static void
agx_set_null_pbe(struct agx_pbe_packed * pbe,uint64_t sink)187 agx_set_null_pbe(struct agx_pbe_packed *pbe, uint64_t sink)
188 {
189    agx_pack(pbe, PBE, cfg) {
190       cfg.width = 1;
191       cfg.height = 1;
192       cfg.levels = 1;
193       cfg.layout = AGX_LAYOUT_NULL;
194       cfg.channels = AGX_CHANNELS_R8;
195       cfg.type = AGX_TEXTURE_TYPE_UNORM /* don't care */;
196       cfg.swizzle_r = AGX_CHANNEL_R;
197       cfg.swizzle_g = AGX_CHANNEL_R;
198       cfg.swizzle_b = AGX_CHANNEL_R;
199       cfg.swizzle_a = AGX_CHANNEL_R;
200       cfg.buffer = sink;
201    }
202 }
203 
204 /*
205  * Determine the maximum vertex/divided instance index.  For robustness,
206  * the index will be clamped to this before reading (if soft fault is
207  * disabled).
208  *
209  * Index i accesses up to (exclusive) offset:
210  *
211  *    src_offset + (i * stride) + elsize_B
212  *
213  * so we require
214  *
215  *    src_offset + (i * stride) + elsize_B <= size
216  *
217  * <==>
218  *
219  *    i <= floor((size - src_offset - elsize_B) / stride)
220  */
221 static inline uint32_t
agx_calculate_vbo_clamp(uint64_t vbuf,uint64_t sink,enum pipe_format format,uint32_t size_B,uint32_t stride_B,uint32_t offset_B,uint64_t * vbuf_out)222 agx_calculate_vbo_clamp(uint64_t vbuf, uint64_t sink, enum pipe_format format,
223                         uint32_t size_B, uint32_t stride_B, uint32_t offset_B,
224                         uint64_t *vbuf_out)
225 {
226    unsigned elsize_B = util_format_get_blocksize(format);
227    unsigned subtracted_B = offset_B + elsize_B;
228 
229    /* If at least one index is valid, determine the max. Otherwise, direct reads
230     * to zero.
231     */
232    if (size_B >= subtracted_B) {
233       *vbuf_out = vbuf + offset_B;
234 
235       /* If stride is zero, do not clamp, everything is valid. */
236       if (stride_B)
237          return ((size_B - subtracted_B) / stride_B);
238       else
239          return UINT32_MAX;
240    } else {
241       *vbuf_out = sink;
242       return 0;
243    }
244 }
245 
246 static struct libagx_decompress_args
agx_fill_decompress_args(struct ail_layout * layout,unsigned layer,unsigned level,uint64_t ptr,uint64_t images)247 agx_fill_decompress_args(struct ail_layout *layout, unsigned layer,
248                          unsigned level, uint64_t ptr, uint64_t images)
249 {
250    return (struct libagx_decompress_args){
251       .images = images,
252       .tile_uncompressed = ail_tile_mode_uncompressed(layout->format),
253       .metadata = ptr + layout->metadata_offset_B +
254                   layout->level_offsets_compressed_B[level] +
255                   (layer * layout->compression_layer_stride_B),
256       .metadata_layer_stride_tl = layout->compression_layer_stride_B / 8,
257       .metadata_width_tl = ail_metadata_width_tl(layout, level),
258       .metadata_height_tl = ail_metadata_height_tl(layout, level),
259    };
260 }
261 
262 #undef libagx_decompress
263 #define libagx_decompress(context, grid, barrier, layout, layer, level, ptr,   \
264                           images)                                              \
265    libagx_decompress_struct(                                                   \
266       context, grid, barrier,                                                  \
267       agx_fill_decompress_args(layout, layer, level, ptr, images),             \
268       util_logbase2(layout->sample_count_sa))
269 
270 #define libagx_tessellate(context, grid, barrier, prim, mode, state)           \
271    if (prim == TESS_PRIMITIVE_QUADS) {                                         \
272       libagx_tess_quad(context, grid, barrier, state, mode);                   \
273    } else if (prim == TESS_PRIMITIVE_TRIANGLES) {                              \
274       libagx_tess_tri(context, grid, barrier, state, mode);                    \
275    } else {                                                                    \
276       assert(prim == TESS_PRIMITIVE_ISOLINES);                                 \
277       libagx_tess_isoline(context, grid, barrier, state, mode);                \
278    }
279 
280 struct agx_border_packed;
281 
282 void agx_pack_border(struct agx_border_packed *out, const uint32_t in[4],
283                      enum pipe_format format);
284