1 /*
2 * Copyright 2021 Alyssa Rosenzweig
3 * Copyright 2019 Collabora, Ltd.
4 * Copyright 2020 Igalia S.L.
5 * SPDX-License-Identifier: MIT
6 */
7
8 #include "agx_device.h"
9 #include <inttypes.h>
10 #include "clc/asahi_clc.h"
11 #include "util/macros.h"
12 #include "util/ralloc.h"
13 #include "util/timespec.h"
14 #include "agx_bo.h"
15 #include "agx_compile.h"
16 #include "agx_device_virtio.h"
17 #include "agx_scratch.h"
18 #include "decode.h"
19 #include "glsl_types.h"
20 #include "libagx_dgc.h"
21 #include "libagx_shaders.h"
22
23 #include <fcntl.h>
24 #include <xf86drm.h>
25 #include "drm-uapi/dma-buf.h"
26 #include "util/blob.h"
27 #include "util/log.h"
28 #include "util/mesa-sha1.h"
29 #include "util/os_file.h"
30 #include "util/os_mman.h"
31 #include "util/os_time.h"
32 #include "util/simple_mtx.h"
33 #include "util/u_printf.h"
34 #include "git_sha1.h"
35 #include "nir_serialize.h"
36 #include "unstable_asahi_drm.h"
37 #include "vdrm.h"
38
39 static inline int
asahi_simple_ioctl(struct agx_device * dev,unsigned cmd,void * req)40 asahi_simple_ioctl(struct agx_device *dev, unsigned cmd, void *req)
41 {
42 if (dev->is_virtio) {
43 return agx_virtio_simple_ioctl(dev, cmd, req);
44 } else {
45 return drmIoctl(dev->fd, cmd, req);
46 }
47 }
48
49 /* clang-format off */
50 static const struct debug_named_value agx_debug_options[] = {
51 {"trace", AGX_DBG_TRACE, "Trace the command stream"},
52 {"bodump", AGX_DBG_BODUMP, "Periodically dump live BOs"},
53 {"no16", AGX_DBG_NO16, "Disable 16-bit support"},
54 {"perf", AGX_DBG_PERF, "Print performance warnings"},
55 #ifndef NDEBUG
56 {"dirty", AGX_DBG_DIRTY, "Disable dirty tracking"},
57 #endif
58 {"precompile",AGX_DBG_PRECOMPILE,"Precompile shaders for shader-db"},
59 {"nocompress",AGX_DBG_NOCOMPRESS,"Disable lossless compression"},
60 {"nocluster", AGX_DBG_NOCLUSTER,"Disable vertex clustering"},
61 {"sync", AGX_DBG_SYNC, "Synchronously wait for all submissions"},
62 {"stats", AGX_DBG_STATS, "Show command execution statistics"},
63 {"resource", AGX_DBG_RESOURCE, "Log resource operations"},
64 {"batch", AGX_DBG_BATCH, "Log batches"},
65 {"nowc", AGX_DBG_NOWC, "Disable write-combining"},
66 {"synctvb", AGX_DBG_SYNCTVB, "Synchronous TVB growth"},
67 {"smalltile", AGX_DBG_SMALLTILE,"Force 16x16 tiles"},
68 {"feedback", AGX_DBG_FEEDBACK, "Debug feedback loops"},
69 {"nomsaa", AGX_DBG_NOMSAA, "Force disable MSAA"},
70 {"noshadow", AGX_DBG_NOSHADOW, "Force disable resource shadowing"},
71 {"scratch", AGX_DBG_SCRATCH, "Debug scratch memory usage"},
72 {"1queue", AGX_DBG_1QUEUE, "Force usage of a single queue for multiple contexts"},
73 {"nosoft", AGX_DBG_NOSOFT, "Disable soft fault optimizations"},
74 {"bodumpverbose", AGX_DBG_BODUMPVERBOSE, "Include extra info with dumps"},
75 DEBUG_NAMED_VALUE_END
76 };
77 /* clang-format on */
78
79 void
agx_bo_free(struct agx_device * dev,struct agx_bo * bo)80 agx_bo_free(struct agx_device *dev, struct agx_bo *bo)
81 {
82 const uint64_t handle = bo->handle;
83
84 if (bo->_map)
85 munmap(bo->_map, bo->size);
86
87 /* Free the VA. No need to unmap the BO, as the kernel will take care of that
88 * when we close it.
89 */
90 agx_va_free(dev, bo->va);
91
92 if (bo->prime_fd != -1)
93 close(bo->prime_fd);
94
95 /* Reset the handle. This has to happen before the GEM close to avoid a race.
96 */
97 memset(bo, 0, sizeof(*bo));
98 __sync_synchronize();
99
100 struct drm_gem_close args = {.handle = handle};
101 drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &args);
102 }
103
104 static int
agx_bo_bind(struct agx_device * dev,struct agx_bo * bo,uint64_t addr,size_t size_B,uint64_t offset_B,uint32_t flags,bool unbind)105 agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr,
106 size_t size_B, uint64_t offset_B, uint32_t flags, bool unbind)
107 {
108 struct drm_asahi_gem_bind gem_bind = {
109 .op = unbind ? ASAHI_BIND_OP_UNBIND : ASAHI_BIND_OP_BIND,
110 .flags = flags,
111 .handle = bo->handle,
112 .vm_id = dev->vm_id,
113 .offset = offset_B,
114 .range = size_B,
115 .addr = addr,
116 };
117
118 int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND, &gem_bind);
119 if (ret) {
120 fprintf(stderr, "DRM_IOCTL_ASAHI_GEM_BIND failed: %m (handle=%d)\n",
121 bo->handle);
122 }
123
124 return ret;
125 }
126
127 static struct agx_bo *
agx_bo_alloc(struct agx_device * dev,size_t size,size_t align,enum agx_bo_flags flags)128 agx_bo_alloc(struct agx_device *dev, size_t size, size_t align,
129 enum agx_bo_flags flags)
130 {
131 struct agx_bo *bo;
132 unsigned handle = 0;
133
134 /* executable implies low va */
135 assert(!(flags & AGX_BO_EXEC) || (flags & AGX_BO_LOW_VA));
136
137 struct drm_asahi_gem_create gem_create = {.size = size};
138
139 if (flags & AGX_BO_WRITEBACK)
140 gem_create.flags |= ASAHI_GEM_WRITEBACK;
141
142 if (!(flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))) {
143 gem_create.flags |= ASAHI_GEM_VM_PRIVATE;
144 gem_create.vm_id = dev->vm_id;
145 }
146
147 int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_CREATE, &gem_create);
148 if (ret) {
149 fprintf(stderr, "DRM_IOCTL_ASAHI_GEM_CREATE failed: %m\n");
150 return NULL;
151 }
152
153 handle = gem_create.handle;
154
155 pthread_mutex_lock(&dev->bo_map_lock);
156 bo = agx_lookup_bo(dev, handle);
157 dev->max_handle = MAX2(dev->max_handle, handle);
158 pthread_mutex_unlock(&dev->bo_map_lock);
159
160 /* Fresh handle */
161 assert(!memcmp(bo, &((struct agx_bo){}), sizeof(*bo)));
162
163 bo->dev = dev;
164 bo->size = gem_create.size;
165 bo->align = align;
166 bo->flags = flags;
167 bo->handle = handle;
168 bo->prime_fd = -1;
169
170 enum agx_va_flags va_flags = flags & AGX_BO_LOW_VA ? AGX_VA_USC : 0;
171 bo->va = agx_va_alloc(dev, size, bo->align, va_flags, 0);
172 if (!bo->va) {
173 fprintf(stderr, "Failed to allocate BO VMA\n");
174 agx_bo_free(dev, bo);
175 return NULL;
176 }
177
178 uint32_t bind = ASAHI_BIND_READ;
179 if (!(flags & AGX_BO_READONLY)) {
180 bind |= ASAHI_BIND_WRITE;
181 }
182
183 ret = dev->ops.bo_bind(dev, bo, bo->va->addr, bo->size, 0, bind, false);
184 if (ret) {
185 agx_bo_free(dev, bo);
186 return NULL;
187 }
188
189 return bo;
190 }
191
192 static void
agx_bo_mmap(struct agx_device * dev,struct agx_bo * bo)193 agx_bo_mmap(struct agx_device *dev, struct agx_bo *bo)
194 {
195 assert(bo->_map == NULL && "not double mapped");
196
197 struct drm_asahi_gem_mmap_offset gem_mmap_offset = {.handle = bo->handle};
198 int ret;
199
200 ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET, &gem_mmap_offset);
201 if (ret) {
202 fprintf(stderr, "DRM_IOCTL_ASAHI_MMAP_BO failed: %m\n");
203 assert(0);
204 }
205
206 bo->_map = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
207 dev->fd, gem_mmap_offset.offset);
208 if (bo->_map == MAP_FAILED) {
209 bo->_map = NULL;
210 fprintf(stderr,
211 "mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n",
212 bo->_map, (long long)bo->size, dev->fd,
213 (long long)gem_mmap_offset.offset);
214 }
215 }
216
217 struct agx_bo *
agx_bo_import(struct agx_device * dev,int fd)218 agx_bo_import(struct agx_device *dev, int fd)
219 {
220 struct agx_bo *bo;
221 ASSERTED int ret;
222 unsigned gem_handle;
223
224 pthread_mutex_lock(&dev->bo_map_lock);
225
226 ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
227 if (ret) {
228 fprintf(stderr, "import failed: Could not map fd %d to handle\n", fd);
229 pthread_mutex_unlock(&dev->bo_map_lock);
230 return NULL;
231 }
232
233 bo = agx_lookup_bo(dev, gem_handle);
234 dev->max_handle = MAX2(dev->max_handle, gem_handle);
235
236 if (!bo->size) {
237 bo->dev = dev;
238 bo->size = lseek(fd, 0, SEEK_END);
239 bo->align = dev->params.vm_page_size;
240
241 /* Sometimes this can fail and return -1. size of -1 is not
242 * a nice thing for mmap to try mmap. Be more robust also
243 * for zero sized maps and fail nicely too
244 */
245 if ((bo->size == 0) || (bo->size == (size_t)-1)) {
246 pthread_mutex_unlock(&dev->bo_map_lock);
247 return NULL;
248 }
249 if (bo->size & (dev->params.vm_page_size - 1)) {
250 fprintf(
251 stderr,
252 "import failed: BO is not a multiple of the page size (0x%llx bytes)\n",
253 (long long)bo->size);
254 goto error;
255 }
256
257 bo->flags = AGX_BO_SHARED | AGX_BO_SHAREABLE;
258 bo->handle = gem_handle;
259 bo->prime_fd = os_dupfd_cloexec(fd);
260 bo->label = "Imported BO";
261 assert(bo->prime_fd >= 0);
262
263 p_atomic_set(&bo->refcnt, 1);
264 bo->va = agx_va_alloc(dev, bo->size, bo->align, 0, 0);
265
266 if (!bo->va) {
267 fprintf(
268 stderr,
269 "import failed: Could not allocate from VMA heap (0x%llx bytes)\n",
270 (long long)bo->size);
271 abort();
272 }
273
274 if (dev->is_virtio) {
275 bo->vbo_res_id = vdrm_handle_to_res_id(dev->vdrm, bo->handle);
276 }
277
278 ret = dev->ops.bo_bind(dev, bo, bo->va->addr, bo->size, 0,
279 ASAHI_BIND_READ | ASAHI_BIND_WRITE, false);
280 if (ret) {
281 fprintf(stderr, "import failed: Could not bind BO at 0x%llx\n",
282 (long long)bo->va->addr);
283 abort();
284 }
285 } else {
286 /* bo->refcnt == 0 can happen if the BO
287 * was being released but agx_bo_import() acquired the
288 * lock before agx_bo_unreference(). In that case, refcnt
289 * is 0 and we can't use agx_bo_reference() directly, we
290 * have to re-initialize the refcnt().
291 * Note that agx_bo_unreference() checks
292 * refcnt value just after acquiring the lock to
293 * make sure the object is not freed if agx_bo_import()
294 * acquired it in the meantime.
295 */
296 if (p_atomic_read(&bo->refcnt) == 0)
297 p_atomic_set(&bo->refcnt, 1);
298 else
299 agx_bo_reference(bo);
300 }
301 pthread_mutex_unlock(&dev->bo_map_lock);
302
303 assert(bo->dev != NULL && "post-condition");
304
305 if (dev->debug & AGX_DBG_TRACE) {
306 agx_bo_map(bo);
307 agxdecode_track_alloc(dev->agxdecode, bo);
308 }
309
310 return bo;
311
312 error:
313 memset(bo, 0, sizeof(*bo));
314 pthread_mutex_unlock(&dev->bo_map_lock);
315 return NULL;
316 }
317
318 int
agx_bo_export(struct agx_device * dev,struct agx_bo * bo)319 agx_bo_export(struct agx_device *dev, struct agx_bo *bo)
320 {
321 int fd;
322
323 assert(bo->flags & AGX_BO_SHAREABLE);
324
325 if (drmPrimeHandleToFD(dev->fd, bo->handle, DRM_CLOEXEC, &fd))
326 return -1;
327
328 if (!(bo->flags & AGX_BO_SHARED)) {
329 bo->flags |= AGX_BO_SHARED;
330 assert(bo->prime_fd == -1);
331 bo->prime_fd = os_dupfd_cloexec(fd);
332
333 /* If there is a pending writer to this BO, import it into the buffer
334 * for implicit sync.
335 */
336 uint64_t writer = p_atomic_read_relaxed(&bo->writer);
337 if (writer) {
338 int out_sync_fd = -1;
339 int ret = drmSyncobjExportSyncFile(
340 dev->fd, agx_bo_writer_syncobj(writer), &out_sync_fd);
341 assert(ret >= 0);
342 assert(out_sync_fd >= 0);
343
344 ret = agx_import_sync_file(dev, bo, out_sync_fd);
345 assert(ret >= 0);
346 close(out_sync_fd);
347 }
348 }
349
350 assert(bo->prime_fd >= 0);
351 return fd;
352 }
353
354 static int
agx_bo_bind_object(struct agx_device * dev,struct agx_bo * bo,uint32_t * object_handle,size_t size_B,uint64_t offset_B,uint32_t flags)355 agx_bo_bind_object(struct agx_device *dev, struct agx_bo *bo,
356 uint32_t *object_handle, size_t size_B, uint64_t offset_B,
357 uint32_t flags)
358 {
359 struct drm_asahi_gem_bind_object gem_bind = {
360 .op = ASAHI_BIND_OBJECT_OP_BIND,
361 .flags = flags,
362 .handle = bo->handle,
363 .vm_id = 0,
364 .offset = offset_B,
365 .range = size_B,
366 };
367
368 int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND_OBJECT, &gem_bind);
369 if (ret) {
370 fprintf(stderr,
371 "DRM_IOCTL_ASAHI_GEM_BIND_OBJECT failed: %m (handle=%d)\n",
372 bo->handle);
373 }
374
375 *object_handle = gem_bind.object_handle;
376
377 return ret;
378 }
379
380 static int
agx_bo_unbind_object(struct agx_device * dev,uint32_t object_handle,uint32_t flags)381 agx_bo_unbind_object(struct agx_device *dev, uint32_t object_handle,
382 uint32_t flags)
383 {
384 struct drm_asahi_gem_bind_object gem_bind = {
385 .op = ASAHI_BIND_OBJECT_OP_UNBIND,
386 .flags = flags,
387 .object_handle = object_handle,
388 };
389
390 int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND_OBJECT, &gem_bind);
391 if (ret) {
392 fprintf(stderr,
393 "DRM_IOCTL_ASAHI_GEM_BIND_OBJECT failed: %m (object_handle=%d)\n",
394 object_handle);
395 }
396
397 return ret;
398 }
399
400 static void
agx_get_global_ids(struct agx_device * dev)401 agx_get_global_ids(struct agx_device *dev)
402 {
403 dev->next_global_id = 0;
404 dev->last_global_id = 0x1000000;
405 }
406
407 uint64_t
agx_get_global_id(struct agx_device * dev)408 agx_get_global_id(struct agx_device *dev)
409 {
410 if (unlikely(dev->next_global_id >= dev->last_global_id)) {
411 agx_get_global_ids(dev);
412 }
413
414 return dev->next_global_id++;
415 }
416
417 static ssize_t
agx_get_params(struct agx_device * dev,void * buf,size_t size)418 agx_get_params(struct agx_device *dev, void *buf, size_t size)
419 {
420 struct drm_asahi_get_params get_param = {
421 .param_group = 0,
422 .pointer = (uint64_t)(uintptr_t)buf,
423 .size = size,
424 };
425
426 memset(buf, 0, size);
427
428 int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GET_PARAMS, &get_param);
429 if (ret) {
430 fprintf(stderr, "DRM_IOCTL_ASAHI_GET_PARAMS failed: %m\n");
431 return -EINVAL;
432 }
433
434 return get_param.size;
435 }
436
437 static int
agx_submit(struct agx_device * dev,struct drm_asahi_submit * submit,struct agx_submit_virt * virt)438 agx_submit(struct agx_device *dev, struct drm_asahi_submit *submit,
439 struct agx_submit_virt *virt)
440 {
441 return drmIoctl(dev->fd, DRM_IOCTL_ASAHI_SUBMIT, submit);
442 }
443
444 const agx_device_ops_t agx_device_drm_ops = {
445 .bo_alloc = agx_bo_alloc,
446 .bo_bind = agx_bo_bind,
447 .bo_mmap = agx_bo_mmap,
448 .get_params = agx_get_params,
449 .submit = agx_submit,
450 .bo_bind_object = agx_bo_bind_object,
451 .bo_unbind_object = agx_bo_unbind_object,
452 };
453
454 static uint64_t
gcd(uint64_t n,uint64_t m)455 gcd(uint64_t n, uint64_t m)
456 {
457 while (n != 0) {
458 uint64_t remainder = m % n;
459 m = n;
460 n = remainder;
461 }
462
463 return m;
464 }
465
466 static void
agx_init_timestamps(struct agx_device * dev)467 agx_init_timestamps(struct agx_device *dev)
468 {
469 uint64_t ts_gcd = gcd(dev->params.timer_frequency_hz, NSEC_PER_SEC);
470
471 dev->timestamp_to_ns.num = NSEC_PER_SEC / ts_gcd;
472 dev->timestamp_to_ns.den = dev->params.timer_frequency_hz / ts_gcd;
473
474 uint64_t user_ts_gcd = gcd(dev->params.timer_frequency_hz, NSEC_PER_SEC);
475
476 dev->user_timestamp_to_ns.num = NSEC_PER_SEC / user_ts_gcd;
477 dev->user_timestamp_to_ns.den =
478 dev->params.user_timestamp_frequency_hz / user_ts_gcd;
479 }
480
481 bool
agx_open_device(void * memctx,struct agx_device * dev)482 agx_open_device(void *memctx, struct agx_device *dev)
483 {
484 dev->debug =
485 debug_get_flags_option("ASAHI_MESA_DEBUG", agx_debug_options, 0);
486
487 dev->ops = agx_device_drm_ops;
488
489 ssize_t params_size = -1;
490
491 /* DRM version check */
492 {
493 drmVersionPtr version = drmGetVersion(dev->fd);
494 if (!version) {
495 fprintf(stderr, "cannot get version: %s", strerror(errno));
496 return NULL;
497 }
498
499 if (!strcmp(version->name, "asahi")) {
500 dev->is_virtio = false;
501 dev->ops = agx_device_drm_ops;
502 } else if (!strcmp(version->name, "virtio_gpu")) {
503 dev->is_virtio = true;
504 if (!agx_virtio_open_device(dev)) {
505 fprintf(
506 stderr,
507 "Error opening virtio-gpu device for Asahi native context\n");
508 return false;
509 }
510 } else {
511 return false;
512 }
513
514 drmFreeVersion(version);
515 }
516
517 params_size = dev->ops.get_params(dev, &dev->params, sizeof(dev->params));
518 if (params_size <= 0) {
519 assert(0);
520 return false;
521 }
522 assert(params_size >= sizeof(dev->params));
523
524 /* Refuse to probe. */
525 if (dev->params.unstable_uabi_version != DRM_ASAHI_UNSTABLE_UABI_VERSION) {
526 fprintf(
527 stderr,
528 "You are attempting to use upstream Mesa with a downstream kernel!\n"
529 "This WILL NOT work.\n"
530 "The Asahi UABI is unstable and NOT SUPPORTED in upstream Mesa.\n"
531 "UABI related code in upstream Mesa is not for use!\n"
532 "\n"
533 "Do NOT attempt to patch out checks, you WILL break your system.\n"
534 "Do NOT report bugs.\n"
535 "Do NOT ask Mesa developers for support.\n"
536 "Do NOT write guides about how to patch out these checks.\n"
537 "Do NOT package patches to Mesa to bypass this.\n"
538 "\n"
539 "~~~\n"
540 "This is not a place of honor.\n"
541 "No highly esteemed deed is commemorated here.\n"
542 "Nothing valued is here.\n"
543 "\n"
544 "What is here was dangerous and repulsive to us.\n"
545 "This message is a warning about danger.\n"
546 "\n"
547 "The danger is still present, in your time, as it was in ours.\n"
548 "The danger is unleashed only if you substantially disturb this place physically.\n"
549 "This place is best shunned and left uninhabited.\n"
550 "~~~\n"
551 "\n"
552 "THIS IS NOT A BUG. THIS IS YOU DOING SOMETHING BROKEN!\n");
553 abort();
554 }
555
556 uint64_t incompat =
557 dev->params.feat_incompat & (~AGX_SUPPORTED_INCOMPAT_FEATURES);
558 if (incompat) {
559 fprintf(stderr, "Missing GPU incompat features: 0x%" PRIx64 "\n",
560 incompat);
561 assert(0);
562 return false;
563 }
564
565 assert(dev->params.gpu_generation >= 13);
566 const char *variant = " Unknown";
567 switch (dev->params.gpu_variant) {
568 case 'G':
569 variant = "";
570 break;
571 case 'S':
572 variant = " Pro";
573 break;
574 case 'C':
575 variant = " Max";
576 break;
577 case 'D':
578 variant = " Ultra";
579 break;
580 }
581 snprintf(dev->name, sizeof(dev->name), "Apple M%d%s (G%d%c %02X)",
582 dev->params.gpu_generation - 12, variant,
583 dev->params.gpu_generation, dev->params.gpu_variant,
584 dev->params.gpu_revision + 0xA0);
585
586 /* We need a large chunk of VA space carved out for robustness. Hardware
587 * loads can shift an i32 by up to 2, for a total shift of 4. If the base
588 * address is zero, 36-bits is therefore enough to trap any zero-extended
589 * 32-bit index. For more generality we would need a larger carveout, but
590 * this is already optimal for VBOs.
591 *
592 * TODO: Maybe this should be on top instead? Might be ok.
593 */
594 uint64_t reservation = (1ull << 36);
595
596 /* Also reserve VA space for the printf buffer at a stable address, avoiding
597 * the need for relocs in precompiled shaders.
598 */
599 assert(reservation == LIBAGX_PRINTF_BUFFER_ADDRESS);
600 reservation += LIBAGX_PRINTF_BUFFER_SIZE;
601
602 dev->guard_size = dev->params.vm_page_size;
603 if (dev->params.vm_usc_start) {
604 dev->shader_base = dev->params.vm_usc_start;
605 } else {
606 // Put the USC heap at the bottom of the user address space, 4GiB aligned
607 dev->shader_base = ALIGN_POT(MAX2(dev->params.vm_user_start, reservation),
608 0x100000000ull);
609 }
610
611 if (dev->shader_base < reservation) {
612 /* Our robustness implementation requires the bottom unmapped */
613 fprintf(stderr, "Unexpected address layout, can't cope\n");
614 assert(0);
615 return false;
616 }
617
618 uint64_t shader_size = 0x100000000ull;
619 // Put the user heap after the USC heap
620 uint64_t user_start = dev->shader_base + shader_size;
621
622 assert(dev->shader_base >= dev->params.vm_user_start);
623 assert(user_start < dev->params.vm_user_end);
624
625 dev->agxdecode = agxdecode_new_context(dev->shader_base);
626
627 agx_init_timestamps(dev);
628
629 util_sparse_array_init(&dev->bo_map, sizeof(struct agx_bo), 512);
630 pthread_mutex_init(&dev->bo_map_lock, NULL);
631
632 simple_mtx_init(&dev->bo_cache.lock, mtx_plain);
633 list_inithead(&dev->bo_cache.lru);
634
635 for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i)
636 list_inithead(&dev->bo_cache.buckets[i]);
637
638 // Put the kernel heap at the top of the address space.
639 // Give it 32GB of address space, should be more than enough for any
640 // reasonable use case.
641 uint64_t kernel_size = MAX2(dev->params.vm_kernel_min_size, 32ull << 30);
642 struct drm_asahi_vm_create vm_create = {
643 .kernel_start = dev->params.vm_user_end - kernel_size,
644 .kernel_end = dev->params.vm_user_end,
645 };
646
647 uint64_t user_size = vm_create.kernel_start - user_start;
648
649 int ret = asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_VM_CREATE, &vm_create);
650 if (ret) {
651 fprintf(stderr, "DRM_IOCTL_ASAHI_VM_CREATE failed: %m\n");
652 assert(0);
653 return false;
654 }
655
656 simple_mtx_init(&dev->vma_lock, mtx_plain);
657 util_vma_heap_init(&dev->main_heap, user_start, user_size);
658 util_vma_heap_init(&dev->usc_heap, dev->shader_base, shader_size);
659
660 dev->vm_id = vm_create.vm_id;
661
662 agx_get_global_ids(dev);
663
664 glsl_type_singleton_init_or_ref();
665 struct blob_reader blob;
666 blob_reader_init(&blob, (void *)libagx_0_nir, sizeof(libagx_0_nir));
667 dev->libagx = nir_deserialize(memctx, &agx_nir_options, &blob);
668
669 if (agx_gather_device_key(dev).needs_g13x_coherency == U_TRISTATE_YES) {
670 dev->libagx_programs = libagx_g13x;
671 } else {
672 dev->libagx_programs = libagx_g13g;
673 }
674
675 if (dev->params.gpu_generation >= 14 && dev->params.num_clusters_total > 1) {
676 dev->chip = AGX_CHIP_G14X;
677 } else if (dev->params.gpu_generation >= 14) {
678 dev->chip = AGX_CHIP_G14G;
679 } else if (dev->params.gpu_generation >= 13 &&
680 dev->params.num_clusters_total > 1) {
681 dev->chip = AGX_CHIP_G13X;
682 } else {
683 dev->chip = AGX_CHIP_G13G;
684 }
685
686 void *bo = agx_bo_create(dev, LIBAGX_PRINTF_BUFFER_SIZE, 0, AGX_BO_WRITEBACK,
687 "Printf/abort");
688
689 ret = dev->ops.bo_bind(dev, bo, LIBAGX_PRINTF_BUFFER_ADDRESS,
690 LIBAGX_PRINTF_BUFFER_SIZE, 0,
691 ASAHI_BIND_READ | ASAHI_BIND_WRITE, false);
692 if (ret) {
693 fprintf(stderr, "Failed to bind printf buffer");
694 return false;
695 }
696
697 u_printf_init(&dev->printf, bo, agx_bo_map(bo));
698 u_printf_singleton_init_or_ref();
699 u_printf_singleton_add(dev->libagx->printf_info,
700 dev->libagx->printf_info_count);
701 return true;
702 }
703
704 void
agx_close_device(struct agx_device * dev)705 agx_close_device(struct agx_device *dev)
706 {
707 agx_bo_unreference(dev, dev->printf.bo);
708 u_printf_destroy(&dev->printf);
709 ralloc_free((void *)dev->libagx);
710 agx_bo_cache_evict_all(dev);
711 util_sparse_array_finish(&dev->bo_map);
712 agxdecode_destroy_context(dev->agxdecode);
713
714 util_vma_heap_finish(&dev->main_heap);
715 util_vma_heap_finish(&dev->usc_heap);
716 glsl_type_singleton_decref();
717 u_printf_singleton_decref();
718
719 close(dev->fd);
720 }
721
722 uint32_t
agx_create_command_queue(struct agx_device * dev,uint32_t caps,uint32_t priority)723 agx_create_command_queue(struct agx_device *dev, uint32_t caps,
724 uint32_t priority)
725 {
726
727 if (dev->debug & AGX_DBG_1QUEUE) {
728 // Abuse this lock for this, it's debug only anyway
729 simple_mtx_lock(&dev->vma_lock);
730 if (dev->queue_id) {
731 simple_mtx_unlock(&dev->vma_lock);
732 return dev->queue_id;
733 }
734 }
735
736 struct drm_asahi_queue_create queue_create = {
737 .vm_id = dev->vm_id,
738 .queue_caps = caps,
739 .priority = priority,
740 .flags = 0,
741 };
742
743 int ret =
744 asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_QUEUE_CREATE, &queue_create);
745 if (ret) {
746 fprintf(stderr, "DRM_IOCTL_ASAHI_QUEUE_CREATE failed: %m\n");
747 assert(0);
748 }
749
750 if (dev->debug & AGX_DBG_1QUEUE) {
751 dev->queue_id = queue_create.queue_id;
752 simple_mtx_unlock(&dev->vma_lock);
753 }
754
755 return queue_create.queue_id;
756 }
757
758 int
agx_destroy_command_queue(struct agx_device * dev,uint32_t queue_id)759 agx_destroy_command_queue(struct agx_device *dev, uint32_t queue_id)
760 {
761 if (dev->debug & AGX_DBG_1QUEUE)
762 return 0;
763
764 struct drm_asahi_queue_destroy queue_destroy = {
765 .queue_id = queue_id,
766 };
767
768 return asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_QUEUE_DESTROY,
769 &queue_destroy);
770 }
771
772 int
agx_import_sync_file(struct agx_device * dev,struct agx_bo * bo,int fd)773 agx_import_sync_file(struct agx_device *dev, struct agx_bo *bo, int fd)
774 {
775 struct dma_buf_import_sync_file import_sync_file_ioctl = {
776 .flags = DMA_BUF_SYNC_WRITE,
777 .fd = fd,
778 };
779
780 assert(fd >= 0);
781 assert(bo->prime_fd != -1);
782
783 int ret = drmIoctl(bo->prime_fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE,
784 &import_sync_file_ioctl);
785 assert(ret >= 0);
786
787 return ret;
788 }
789
790 int
agx_export_sync_file(struct agx_device * dev,struct agx_bo * bo)791 agx_export_sync_file(struct agx_device *dev, struct agx_bo *bo)
792 {
793 struct dma_buf_export_sync_file export_sync_file_ioctl = {
794 .flags = DMA_BUF_SYNC_RW,
795 .fd = -1,
796 };
797
798 assert(bo->prime_fd != -1);
799
800 int ret = drmIoctl(bo->prime_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
801 &export_sync_file_ioctl);
802 assert(ret >= 0);
803 assert(export_sync_file_ioctl.fd >= 0);
804
805 return ret >= 0 ? export_sync_file_ioctl.fd : ret;
806 }
807
808 void
agx_debug_fault(struct agx_device * dev,uint64_t addr)809 agx_debug_fault(struct agx_device *dev, uint64_t addr)
810 {
811 pthread_mutex_lock(&dev->bo_map_lock);
812
813 struct agx_bo *best = NULL;
814
815 for (uint32_t handle = 0; handle < dev->max_handle; handle++) {
816 struct agx_bo *bo = agx_lookup_bo(dev, handle);
817 if (!bo->va)
818 continue;
819
820 uint64_t bo_addr = bo->va->addr;
821 if (bo->flags & AGX_BO_LOW_VA)
822 bo_addr += dev->shader_base;
823
824 if (!bo->size || bo_addr > addr)
825 continue;
826
827 if (!best || bo_addr > best->va->addr)
828 best = bo;
829 }
830
831 if (!best) {
832 mesa_logw("Address 0x%" PRIx64 " is unknown\n", addr);
833 } else {
834 uint64_t start = best->va->addr;
835 uint64_t end = best->va->addr + best->size;
836 if (addr > (end + 1024 * 1024 * 1024)) {
837 /* 1GiB max as a sanity check */
838 mesa_logw("Address 0x%" PRIx64 " is unknown\n", addr);
839 } else if (addr > end) {
840 mesa_logw("Address 0x%" PRIx64 " is 0x%" PRIx64
841 " bytes beyond an object at 0x%" PRIx64 "..0x%" PRIx64
842 " (%s)\n",
843 addr, addr - end, start, end - 1, best->label);
844 } else {
845 mesa_logw("Address 0x%" PRIx64 " is 0x%" PRIx64
846 " bytes into an object at 0x%" PRIx64 "..0x%" PRIx64
847 " (%s)\n",
848 addr, addr - start, start, end - 1, best->label);
849 }
850 }
851
852 pthread_mutex_unlock(&dev->bo_map_lock);
853 }
854
855 uint64_t
agx_get_gpu_timestamp(struct agx_device * dev)856 agx_get_gpu_timestamp(struct agx_device *dev)
857 {
858 if (dev->params.feat_compat & DRM_ASAHI_FEAT_GETTIME) {
859 struct drm_asahi_get_time get_time = {.flags = 0, .extensions = 0};
860
861 int ret = asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_GET_TIME, &get_time);
862 if (ret) {
863 fprintf(stderr, "DRM_IOCTL_ASAHI_GET_TIME failed: %m\n");
864 } else {
865 return get_time.gpu_timestamp;
866 }
867 }
868 #if DETECT_ARCH_AARCH64
869 uint64_t ret;
870 __asm__ volatile("mrs \t%0, cntvct_el0" : "=r"(ret));
871 return ret;
872 #elif DETECT_ARCH_X86 || DETECT_ARCH_X86_64
873 /* Maps to the above when run under FEX without thunking */
874 uint32_t high, low;
875 __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
876 return (uint64_t)low | ((uint64_t)high << 32);
877 #else
878 #error "invalid architecture for asahi"
879 #endif
880 }
881
882 /* (Re)define UUID_SIZE to avoid including vulkan.h (or p_defines.h) here. */
883 #define UUID_SIZE 16
884
885 void
agx_get_device_uuid(const struct agx_device * dev,void * uuid)886 agx_get_device_uuid(const struct agx_device *dev, void *uuid)
887 {
888 struct mesa_sha1 sha1_ctx;
889 _mesa_sha1_init(&sha1_ctx);
890
891 /* The device UUID uniquely identifies the given device within the machine.
892 * Since we never have more than one device, this doesn't need to be a real
893 * UUID, so we use SHA1("agx" + gpu_generation + gpu_variant + gpu_revision).
894 */
895 static const char *device_name = "agx";
896 _mesa_sha1_update(&sha1_ctx, device_name, strlen(device_name));
897
898 _mesa_sha1_update(&sha1_ctx, &dev->params.gpu_generation,
899 sizeof(dev->params.gpu_generation));
900 _mesa_sha1_update(&sha1_ctx, &dev->params.gpu_variant,
901 sizeof(dev->params.gpu_variant));
902 _mesa_sha1_update(&sha1_ctx, &dev->params.gpu_revision,
903 sizeof(dev->params.gpu_revision));
904
905 uint8_t sha1[SHA1_DIGEST_LENGTH];
906 _mesa_sha1_final(&sha1_ctx, sha1);
907
908 assert(SHA1_DIGEST_LENGTH >= UUID_SIZE);
909 memcpy(uuid, sha1, UUID_SIZE);
910 }
911
912 void
agx_get_driver_uuid(void * uuid)913 agx_get_driver_uuid(void *uuid)
914 {
915 const char *driver_id = PACKAGE_VERSION MESA_GIT_SHA1;
916
917 /* The driver UUID is used for determining sharability of images and memory
918 * between two Vulkan instances in separate processes, but also to
919 * determining memory objects and sharability between Vulkan and OpenGL
920 * driver. People who want to share memory need to also check the device
921 * UUID.
922 */
923 struct mesa_sha1 sha1_ctx;
924 _mesa_sha1_init(&sha1_ctx);
925
926 _mesa_sha1_update(&sha1_ctx, driver_id, strlen(driver_id));
927
928 uint8_t sha1[SHA1_DIGEST_LENGTH];
929 _mesa_sha1_final(&sha1_ctx, sha1);
930
931 assert(SHA1_DIGEST_LENGTH >= UUID_SIZE);
932 memcpy(uuid, sha1, UUID_SIZE);
933 }
934
935 unsigned
agx_get_num_cores(const struct agx_device * dev)936 agx_get_num_cores(const struct agx_device *dev)
937 {
938 unsigned n = 0;
939
940 for (unsigned cl = 0; cl < dev->params.num_clusters_total; cl++) {
941 n += util_bitcount(dev->params.core_masks[cl]);
942 }
943
944 return n;
945 }
946
947 struct agx_device_key
agx_gather_device_key(struct agx_device * dev)948 agx_gather_device_key(struct agx_device *dev)
949 {
950 bool g13x_coh = (dev->params.gpu_generation == 13 &&
951 dev->params.num_clusters_total > 1) ||
952 dev->params.num_dies > 1;
953
954 return (struct agx_device_key){
955 .needs_g13x_coherency = u_tristate_make(g13x_coh),
956 .soft_fault = agx_has_soft_fault(dev),
957 };
958 }
959