1 /*
2 * Copyright 2021 Alyssa Rosenzweig
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #
24 #include <stdint.h>
25 #include "agx_state.h"
26 #include "magic.h"
27
28 /* The structures managed in this file appear to be software defined (either in
29 * the macOS kernel driver or in the AGX firmware) */
30
31 /* Odd pattern */
32 static uint64_t
demo_unk6(struct agx_pool * pool)33 demo_unk6(struct agx_pool *pool)
34 {
35 struct agx_ptr ptr = agx_pool_alloc_aligned(pool, 0x4000 * sizeof(uint64_t), 64);
36 uint64_t *buf = ptr.cpu;
37 memset(buf, 0, sizeof(*buf));
38
39 for (unsigned i = 1; i < 0x3ff; ++i)
40 buf[i] = (i + 1);
41
42 return ptr.gpu;
43 }
44
45 static uint64_t
demo_zero(struct agx_pool * pool,unsigned count)46 demo_zero(struct agx_pool *pool, unsigned count)
47 {
48 struct agx_ptr ptr = agx_pool_alloc_aligned(pool, count, 64);
49 memset(ptr.cpu, 0, count);
50 return ptr.gpu;
51 }
52
53 static size_t
asahi_size_resource(struct pipe_resource * prsrc,unsigned level)54 asahi_size_resource(struct pipe_resource *prsrc, unsigned level)
55 {
56 struct agx_resource *rsrc = agx_resource(prsrc);
57 size_t size = rsrc->slices[level].size;
58
59 if (rsrc->separate_stencil)
60 size += asahi_size_resource(&rsrc->separate_stencil->base, level);
61
62 return size;
63 }
64
65 static size_t
asahi_size_surface(struct pipe_surface * surf)66 asahi_size_surface(struct pipe_surface *surf)
67 {
68 return asahi_size_resource(surf->texture, surf->u.tex.level);
69 }
70
71 static size_t
asahi_size_attachments(struct pipe_framebuffer_state * framebuffer)72 asahi_size_attachments(struct pipe_framebuffer_state *framebuffer)
73 {
74 size_t sum = 0;
75
76 for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i)
77 sum += asahi_size_surface(framebuffer->cbufs[i]);
78
79 if (framebuffer->zsbuf)
80 sum += asahi_size_surface(framebuffer->zsbuf);
81
82 return sum;
83 }
84
85 static enum agx_iogpu_attachment_type
asahi_classify_attachment(enum pipe_format format)86 asahi_classify_attachment(enum pipe_format format)
87 {
88 const struct util_format_description *desc = util_format_description(format);
89
90 if (util_format_has_depth(desc))
91 return AGX_IOGPU_ATTACHMENT_TYPE_DEPTH;
92 else if (util_format_has_stencil(desc))
93 return AGX_IOGPU_ATTACHMENT_TYPE_STENCIL;
94 else
95 return AGX_IOGPU_ATTACHMENT_TYPE_COLOUR;
96 }
97
98 static uint64_t
agx_map_surface_resource(struct pipe_surface * surf,struct agx_resource * rsrc)99 agx_map_surface_resource(struct pipe_surface *surf, struct agx_resource *rsrc)
100 {
101 return agx_map_texture_gpu(rsrc, surf->u.tex.level, surf->u.tex.first_layer);
102 }
103
104 static uint64_t
agx_map_surface(struct pipe_surface * surf)105 agx_map_surface(struct pipe_surface *surf)
106 {
107 return agx_map_surface_resource(surf, agx_resource(surf->texture));
108 }
109
110 static void
asahi_pack_iogpu_attachment(void * out,struct agx_resource * rsrc,struct pipe_surface * surf,unsigned total_size)111 asahi_pack_iogpu_attachment(void *out, struct agx_resource *rsrc,
112 struct pipe_surface *surf,
113 unsigned total_size)
114 {
115 /* We don't support layered rendering yet */
116 assert(surf->u.tex.first_layer == surf->u.tex.last_layer);
117
118 agx_pack(out, IOGPU_ATTACHMENT, cfg) {
119 cfg.type = asahi_classify_attachment(rsrc->base.format);
120 cfg.address = agx_map_surface_resource(surf, rsrc);
121 cfg.size = rsrc->slices[surf->u.tex.level].size;
122 cfg.percent = (100 * cfg.size) / total_size;
123 }
124 }
125
126 static unsigned
asahi_pack_iogpu_attachments(void * out,struct pipe_framebuffer_state * framebuffer)127 asahi_pack_iogpu_attachments(void *out, struct pipe_framebuffer_state *framebuffer)
128 {
129 unsigned total_attachment_size = asahi_size_attachments(framebuffer);
130 struct agx_iogpu_attachment_packed *attachments = out;
131 unsigned nr = 0;
132
133 for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) {
134 asahi_pack_iogpu_attachment(attachments + (nr++),
135 agx_resource(framebuffer->cbufs[i]->texture),
136 framebuffer->cbufs[i],
137 total_attachment_size);
138 }
139
140 if (framebuffer->zsbuf) {
141 struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture);
142
143 asahi_pack_iogpu_attachment(attachments + (nr++),
144 rsrc, framebuffer->zsbuf,
145 total_attachment_size);
146
147 if (rsrc->separate_stencil) {
148 asahi_pack_iogpu_attachment(attachments + (nr++),
149 rsrc->separate_stencil,
150 framebuffer->zsbuf,
151 total_attachment_size);
152 }
153 }
154
155 return nr;
156 }
157
158 unsigned
demo_cmdbuf(uint64_t * buf,size_t size,struct agx_pool * pool,struct pipe_framebuffer_state * framebuffer,uint64_t encoder_ptr,uint64_t encoder_id,uint64_t scissor_ptr,uint64_t depth_bias_ptr,uint32_t pipeline_clear,uint32_t pipeline_load,uint32_t pipeline_store,bool clear_pipeline_textures,double clear_depth,unsigned clear_stencil)159 demo_cmdbuf(uint64_t *buf, size_t size,
160 struct agx_pool *pool,
161 struct pipe_framebuffer_state *framebuffer,
162 uint64_t encoder_ptr,
163 uint64_t encoder_id,
164 uint64_t scissor_ptr,
165 uint64_t depth_bias_ptr,
166 uint32_t pipeline_clear,
167 uint32_t pipeline_load,
168 uint32_t pipeline_store,
169 bool clear_pipeline_textures,
170 double clear_depth,
171 unsigned clear_stencil)
172 {
173 uint32_t *map = (uint32_t *) buf;
174 memset(map, 0, 518 * 4);
175
176 uint64_t deflake_buffer = demo_zero(pool, 0x7e0);
177 uint64_t deflake_1 = deflake_buffer + 0x2a0;
178 uint64_t deflake_2 = deflake_buffer + 0x20;
179
180 uint64_t unk_buffer_2 = demo_zero(pool, 0x8000);
181
182 uint64_t depth_buffer = 0;
183 uint64_t stencil_buffer = 0;
184
185 agx_pack(map + 160, IOGPU_INTERNAL_PIPELINES, cfg) {
186 cfg.clear_pipeline_bind = 0xffff8002 | (clear_pipeline_textures ? 0x210 : 0);
187 cfg.clear_pipeline = pipeline_clear;
188
189 /* store pipeline used when entire frame completes */
190 cfg.store_pipeline_bind = 0x12;
191 cfg.store_pipeline = pipeline_store;
192 cfg.scissor_array = scissor_ptr;
193 cfg.depth_bias_array = depth_bias_ptr;
194
195 if (framebuffer->zsbuf) {
196 struct pipe_surface *zsbuf = framebuffer->zsbuf;
197 const struct util_format_description *desc =
198 util_format_description(zsbuf->texture->format);
199
200 // note: setting 0x4 bit here breaks partial render with depth
201 cfg.depth_flags = 0x80000; // no compression, clear
202
203 cfg.depth_width = framebuffer->width;
204 cfg.depth_height = framebuffer->height;
205
206 if (util_format_has_depth(desc)) {
207 depth_buffer = agx_map_surface(zsbuf);
208 } else {
209 stencil_buffer = agx_map_surface(zsbuf);
210 }
211
212 if (agx_resource(zsbuf->texture)->separate_stencil) {
213 stencil_buffer = agx_map_surface_resource(zsbuf,
214 agx_resource(zsbuf->texture)->separate_stencil);
215 }
216
217 cfg.stencil_buffer = stencil_buffer;
218 cfg.stencil_buffer_2 = stencil_buffer;
219
220 cfg.depth_buffer = depth_buffer;
221 cfg.depth_buffer_if_clearing = depth_buffer;
222 }
223 }
224
225 agx_pack(map + 228, IOGPU_AUX_FRAMEBUFFER, cfg) {
226 cfg.width = framebuffer->width;
227 cfg.height = framebuffer->height;
228 cfg.pointer = unk_buffer_2;
229 }
230
231 agx_pack(map + 292, IOGPU_CLEAR_Z_S, cfg) {
232 cfg.set_when_reloading_z_1 = clear_pipeline_textures;
233
234 cfg.depth_clear_value = fui(clear_depth);
235 cfg.stencil_clear_value = clear_stencil;
236
237 cfg.partial_reload_pipeline_bind = 0xffff8212;
238 cfg.partial_reload_pipeline = pipeline_load;
239
240 cfg.partial_store_pipeline_bind = 0x12;
241 cfg.partial_store_pipeline = pipeline_store;
242 }
243
244 agx_pack(map + 356, IOGPU_MISC, cfg) {
245 cfg.depth_buffer = depth_buffer;
246 cfg.stencil_buffer = stencil_buffer;
247 cfg.encoder_id = encoder_id;
248 cfg.unknown_buffer = demo_unk6(pool);
249 cfg.width = framebuffer->width;
250 cfg.height = framebuffer->height;
251 cfg.unk_80 = clear_pipeline_textures ? 0x0 : 0x1;
252 }
253
254 unsigned offset_unk = (484 * 4);
255 unsigned offset_attachments = (496 * 4);
256
257 unsigned nr_attachments =
258 asahi_pack_iogpu_attachments(map + (offset_attachments / 4) + 4,
259 framebuffer);
260
261 map[(offset_attachments / 4) + 3] = nr_attachments;
262
263 unsigned total_size = offset_attachments + (AGX_IOGPU_ATTACHMENT_LENGTH * nr_attachments) + 16;
264
265 agx_pack(map, IOGPU_HEADER, cfg) {
266 cfg.total_size = total_size;
267 cfg.attachment_offset = offset_attachments;
268 cfg.attachment_length = nr_attachments * AGX_IOGPU_ATTACHMENT_LENGTH;
269 cfg.unknown_offset = offset_unk;
270 cfg.encoder = encoder_ptr;
271
272 cfg.deflake_1 = deflake_1;
273 cfg.deflake_2 = deflake_2;
274 cfg.deflake_3 = deflake_buffer;
275 }
276
277 return total_size;
278 }
279
280 static struct agx_map_header
demo_map_header(uint64_t cmdbuf_id,uint64_t encoder_id,unsigned cmdbuf_size,unsigned count)281 demo_map_header(uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size, unsigned count)
282 {
283 /* Structure: header followed by resource groups. For now, we use a single
284 * resource group for every resource. This could be optimized.
285 */
286 unsigned length = sizeof(struct agx_map_header);
287 length += count * sizeof(struct agx_map_entry);
288 assert(length < 0x10000);
289
290 return (struct agx_map_header) {
291 .cmdbuf_id = cmdbuf_id,
292 .segment_count = 1,
293 .length = length,
294 .encoder_id = encoder_id,
295 .kernel_commands_start_offset = 0,
296 .kernel_commands_end_offset = cmdbuf_size,
297 .total_resources = count,
298 .resource_group_count = count,
299 .unk = 0x8000,
300 };
301 }
302
303 void
demo_mem_map(void * map,size_t size,unsigned * handles,unsigned count,uint64_t cmdbuf_id,uint64_t encoder_id,unsigned cmdbuf_size)304 demo_mem_map(void *map, size_t size, unsigned *handles, unsigned count,
305 uint64_t cmdbuf_id, uint64_t encoder_id, unsigned cmdbuf_size)
306 {
307 struct agx_map_header *header = map;
308 struct agx_map_entry *entries = (struct agx_map_entry *) (((uint8_t *) map) + sizeof(*header));
309 struct agx_map_entry *end = (struct agx_map_entry *) (((uint8_t *) map) + size);
310
311 /* Header precedes the entry */
312 *header = demo_map_header(cmdbuf_id, encoder_id, cmdbuf_size, count);
313
314 /* Add an entry for each BO mapped */
315 for (unsigned i = 0; i < count; ++i) {
316 assert((entries + i) < end);
317 entries[i] = (struct agx_map_entry) {
318 .resource_id = { handles[i] },
319 .resource_unk = { 0x20 },
320 .resource_flags = { 0x1 },
321 .resource_count = 1
322 };
323 }
324 }
325