• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Alyssa Rosenzweig
3  * Copyright 2019 Collabora, Ltd.
4  * Copyright 2020 Igalia S.L.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include "agx_device.h"
9 #include <inttypes.h>
10 #include "clc/asahi_clc.h"
11 #include "util/macros.h"
12 #include "util/ralloc.h"
13 #include "util/timespec.h"
14 #include "agx_bo.h"
15 #include "agx_compile.h"
16 #include "agx_device_virtio.h"
17 #include "agx_scratch.h"
18 #include "decode.h"
19 #include "glsl_types.h"
20 #include "libagx_dgc.h"
21 #include "libagx_shaders.h"
22 
23 #include <fcntl.h>
24 #include <xf86drm.h>
25 #include "drm-uapi/dma-buf.h"
26 #include "util/blob.h"
27 #include "util/log.h"
28 #include "util/mesa-sha1.h"
29 #include "util/os_file.h"
30 #include "util/os_mman.h"
31 #include "util/os_time.h"
32 #include "util/simple_mtx.h"
33 #include "util/u_printf.h"
34 #include "git_sha1.h"
35 #include "nir_serialize.h"
36 #include "unstable_asahi_drm.h"
37 #include "vdrm.h"
38 
39 static inline int
asahi_simple_ioctl(struct agx_device * dev,unsigned cmd,void * req)40 asahi_simple_ioctl(struct agx_device *dev, unsigned cmd, void *req)
41 {
42    if (dev->is_virtio) {
43       return agx_virtio_simple_ioctl(dev, cmd, req);
44    } else {
45       return drmIoctl(dev->fd, cmd, req);
46    }
47 }
48 
49 /* clang-format off */
50 static const struct debug_named_value agx_debug_options[] = {
51    {"trace",     AGX_DBG_TRACE,    "Trace the command stream"},
52    {"bodump",    AGX_DBG_BODUMP,   "Periodically dump live BOs"},
53    {"no16",      AGX_DBG_NO16,     "Disable 16-bit support"},
54    {"perf",      AGX_DBG_PERF,     "Print performance warnings"},
55 #ifndef NDEBUG
56    {"dirty",     AGX_DBG_DIRTY,    "Disable dirty tracking"},
57 #endif
58    {"precompile",AGX_DBG_PRECOMPILE,"Precompile shaders for shader-db"},
59    {"nocompress",AGX_DBG_NOCOMPRESS,"Disable lossless compression"},
60    {"nocluster", AGX_DBG_NOCLUSTER,"Disable vertex clustering"},
61    {"sync",      AGX_DBG_SYNC,     "Synchronously wait for all submissions"},
62    {"stats",     AGX_DBG_STATS,    "Show command execution statistics"},
63    {"resource",  AGX_DBG_RESOURCE, "Log resource operations"},
64    {"batch",     AGX_DBG_BATCH,    "Log batches"},
65    {"nowc",      AGX_DBG_NOWC,     "Disable write-combining"},
66    {"synctvb",   AGX_DBG_SYNCTVB,  "Synchronous TVB growth"},
67    {"smalltile", AGX_DBG_SMALLTILE,"Force 16x16 tiles"},
68    {"feedback",  AGX_DBG_FEEDBACK, "Debug feedback loops"},
69    {"nomsaa",    AGX_DBG_NOMSAA,   "Force disable MSAA"},
70    {"noshadow",  AGX_DBG_NOSHADOW, "Force disable resource shadowing"},
71    {"scratch",   AGX_DBG_SCRATCH,  "Debug scratch memory usage"},
72    {"1queue",    AGX_DBG_1QUEUE,   "Force usage of a single queue for multiple contexts"},
73    {"nosoft",    AGX_DBG_NOSOFT,   "Disable soft fault optimizations"},
74    {"bodumpverbose", AGX_DBG_BODUMPVERBOSE,   "Include extra info with dumps"},
75    DEBUG_NAMED_VALUE_END
76 };
77 /* clang-format on */
78 
79 void
agx_bo_free(struct agx_device * dev,struct agx_bo * bo)80 agx_bo_free(struct agx_device *dev, struct agx_bo *bo)
81 {
82    const uint64_t handle = bo->handle;
83 
84    if (bo->_map)
85       munmap(bo->_map, bo->size);
86 
87    /* Free the VA. No need to unmap the BO, as the kernel will take care of that
88     * when we close it.
89     */
90    agx_va_free(dev, bo->va);
91 
92    if (bo->prime_fd != -1)
93       close(bo->prime_fd);
94 
95    /* Reset the handle. This has to happen before the GEM close to avoid a race.
96     */
97    memset(bo, 0, sizeof(*bo));
98    __sync_synchronize();
99 
100    struct drm_gem_close args = {.handle = handle};
101    drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &args);
102 }
103 
104 static int
agx_bo_bind(struct agx_device * dev,struct agx_bo * bo,uint64_t addr,size_t size_B,uint64_t offset_B,uint32_t flags,bool unbind)105 agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr,
106             size_t size_B, uint64_t offset_B, uint32_t flags, bool unbind)
107 {
108    struct drm_asahi_gem_bind gem_bind = {
109       .op = unbind ? ASAHI_BIND_OP_UNBIND : ASAHI_BIND_OP_BIND,
110       .flags = flags,
111       .handle = bo->handle,
112       .vm_id = dev->vm_id,
113       .offset = offset_B,
114       .range = size_B,
115       .addr = addr,
116    };
117 
118    int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND, &gem_bind);
119    if (ret) {
120       fprintf(stderr, "DRM_IOCTL_ASAHI_GEM_BIND failed: %m (handle=%d)\n",
121               bo->handle);
122    }
123 
124    return ret;
125 }
126 
127 static struct agx_bo *
agx_bo_alloc(struct agx_device * dev,size_t size,size_t align,enum agx_bo_flags flags)128 agx_bo_alloc(struct agx_device *dev, size_t size, size_t align,
129              enum agx_bo_flags flags)
130 {
131    struct agx_bo *bo;
132    unsigned handle = 0;
133 
134    /* executable implies low va */
135    assert(!(flags & AGX_BO_EXEC) || (flags & AGX_BO_LOW_VA));
136 
137    struct drm_asahi_gem_create gem_create = {.size = size};
138 
139    if (flags & AGX_BO_WRITEBACK)
140       gem_create.flags |= ASAHI_GEM_WRITEBACK;
141 
142    if (!(flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))) {
143       gem_create.flags |= ASAHI_GEM_VM_PRIVATE;
144       gem_create.vm_id = dev->vm_id;
145    }
146 
147    int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_CREATE, &gem_create);
148    if (ret) {
149       fprintf(stderr, "DRM_IOCTL_ASAHI_GEM_CREATE failed: %m\n");
150       return NULL;
151    }
152 
153    handle = gem_create.handle;
154 
155    pthread_mutex_lock(&dev->bo_map_lock);
156    bo = agx_lookup_bo(dev, handle);
157    dev->max_handle = MAX2(dev->max_handle, handle);
158    pthread_mutex_unlock(&dev->bo_map_lock);
159 
160    /* Fresh handle */
161    assert(!memcmp(bo, &((struct agx_bo){}), sizeof(*bo)));
162 
163    bo->dev = dev;
164    bo->size = gem_create.size;
165    bo->align = align;
166    bo->flags = flags;
167    bo->handle = handle;
168    bo->prime_fd = -1;
169 
170    enum agx_va_flags va_flags = flags & AGX_BO_LOW_VA ? AGX_VA_USC : 0;
171    bo->va = agx_va_alloc(dev, size, bo->align, va_flags, 0);
172    if (!bo->va) {
173       fprintf(stderr, "Failed to allocate BO VMA\n");
174       agx_bo_free(dev, bo);
175       return NULL;
176    }
177 
178    uint32_t bind = ASAHI_BIND_READ;
179    if (!(flags & AGX_BO_READONLY)) {
180       bind |= ASAHI_BIND_WRITE;
181    }
182 
183    ret = dev->ops.bo_bind(dev, bo, bo->va->addr, bo->size, 0, bind, false);
184    if (ret) {
185       agx_bo_free(dev, bo);
186       return NULL;
187    }
188 
189    return bo;
190 }
191 
192 static void
agx_bo_mmap(struct agx_device * dev,struct agx_bo * bo)193 agx_bo_mmap(struct agx_device *dev, struct agx_bo *bo)
194 {
195    assert(bo->_map == NULL && "not double mapped");
196 
197    struct drm_asahi_gem_mmap_offset gem_mmap_offset = {.handle = bo->handle};
198    int ret;
199 
200    ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET, &gem_mmap_offset);
201    if (ret) {
202       fprintf(stderr, "DRM_IOCTL_ASAHI_MMAP_BO failed: %m\n");
203       assert(0);
204    }
205 
206    bo->_map = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
207                       dev->fd, gem_mmap_offset.offset);
208    if (bo->_map == MAP_FAILED) {
209       bo->_map = NULL;
210       fprintf(stderr,
211               "mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n",
212               bo->_map, (long long)bo->size, dev->fd,
213               (long long)gem_mmap_offset.offset);
214    }
215 }
216 
217 struct agx_bo *
agx_bo_import(struct agx_device * dev,int fd)218 agx_bo_import(struct agx_device *dev, int fd)
219 {
220    struct agx_bo *bo;
221    ASSERTED int ret;
222    unsigned gem_handle;
223 
224    pthread_mutex_lock(&dev->bo_map_lock);
225 
226    ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
227    if (ret) {
228       fprintf(stderr, "import failed: Could not map fd %d to handle\n", fd);
229       pthread_mutex_unlock(&dev->bo_map_lock);
230       return NULL;
231    }
232 
233    bo = agx_lookup_bo(dev, gem_handle);
234    dev->max_handle = MAX2(dev->max_handle, gem_handle);
235 
236    if (!bo->size) {
237       bo->dev = dev;
238       bo->size = lseek(fd, 0, SEEK_END);
239       bo->align = dev->params.vm_page_size;
240 
241       /* Sometimes this can fail and return -1. size of -1 is not
242        * a nice thing for mmap to try mmap. Be more robust also
243        * for zero sized maps and fail nicely too
244        */
245       if ((bo->size == 0) || (bo->size == (size_t)-1)) {
246          pthread_mutex_unlock(&dev->bo_map_lock);
247          return NULL;
248       }
249       if (bo->size & (dev->params.vm_page_size - 1)) {
250          fprintf(
251             stderr,
252             "import failed: BO is not a multiple of the page size (0x%llx bytes)\n",
253             (long long)bo->size);
254          goto error;
255       }
256 
257       bo->flags = AGX_BO_SHARED | AGX_BO_SHAREABLE;
258       bo->handle = gem_handle;
259       bo->prime_fd = os_dupfd_cloexec(fd);
260       bo->label = "Imported BO";
261       assert(bo->prime_fd >= 0);
262 
263       p_atomic_set(&bo->refcnt, 1);
264       bo->va = agx_va_alloc(dev, bo->size, bo->align, 0, 0);
265 
266       if (!bo->va) {
267          fprintf(
268             stderr,
269             "import failed: Could not allocate from VMA heap (0x%llx bytes)\n",
270             (long long)bo->size);
271          abort();
272       }
273 
274       if (dev->is_virtio) {
275          bo->vbo_res_id = vdrm_handle_to_res_id(dev->vdrm, bo->handle);
276       }
277 
278       ret = dev->ops.bo_bind(dev, bo, bo->va->addr, bo->size, 0,
279                              ASAHI_BIND_READ | ASAHI_BIND_WRITE, false);
280       if (ret) {
281          fprintf(stderr, "import failed: Could not bind BO at 0x%llx\n",
282                  (long long)bo->va->addr);
283          abort();
284       }
285    } else {
286       /* bo->refcnt == 0 can happen if the BO
287        * was being released but agx_bo_import() acquired the
288        * lock before agx_bo_unreference(). In that case, refcnt
289        * is 0 and we can't use agx_bo_reference() directly, we
290        * have to re-initialize the refcnt().
291        * Note that agx_bo_unreference() checks
292        * refcnt value just after acquiring the lock to
293        * make sure the object is not freed if agx_bo_import()
294        * acquired it in the meantime.
295        */
296       if (p_atomic_read(&bo->refcnt) == 0)
297          p_atomic_set(&bo->refcnt, 1);
298       else
299          agx_bo_reference(bo);
300    }
301    pthread_mutex_unlock(&dev->bo_map_lock);
302 
303    assert(bo->dev != NULL && "post-condition");
304 
305    if (dev->debug & AGX_DBG_TRACE) {
306       agx_bo_map(bo);
307       agxdecode_track_alloc(dev->agxdecode, bo);
308    }
309 
310    return bo;
311 
312 error:
313    memset(bo, 0, sizeof(*bo));
314    pthread_mutex_unlock(&dev->bo_map_lock);
315    return NULL;
316 }
317 
318 int
agx_bo_export(struct agx_device * dev,struct agx_bo * bo)319 agx_bo_export(struct agx_device *dev, struct agx_bo *bo)
320 {
321    int fd;
322 
323    assert(bo->flags & AGX_BO_SHAREABLE);
324 
325    if (drmPrimeHandleToFD(dev->fd, bo->handle, DRM_CLOEXEC, &fd))
326       return -1;
327 
328    if (!(bo->flags & AGX_BO_SHARED)) {
329       bo->flags |= AGX_BO_SHARED;
330       assert(bo->prime_fd == -1);
331       bo->prime_fd = os_dupfd_cloexec(fd);
332 
333       /* If there is a pending writer to this BO, import it into the buffer
334        * for implicit sync.
335        */
336       uint64_t writer = p_atomic_read_relaxed(&bo->writer);
337       if (writer) {
338          int out_sync_fd = -1;
339          int ret = drmSyncobjExportSyncFile(
340             dev->fd, agx_bo_writer_syncobj(writer), &out_sync_fd);
341          assert(ret >= 0);
342          assert(out_sync_fd >= 0);
343 
344          ret = agx_import_sync_file(dev, bo, out_sync_fd);
345          assert(ret >= 0);
346          close(out_sync_fd);
347       }
348    }
349 
350    assert(bo->prime_fd >= 0);
351    return fd;
352 }
353 
354 static int
agx_bo_bind_object(struct agx_device * dev,struct agx_bo * bo,uint32_t * object_handle,size_t size_B,uint64_t offset_B,uint32_t flags)355 agx_bo_bind_object(struct agx_device *dev, struct agx_bo *bo,
356                    uint32_t *object_handle, size_t size_B, uint64_t offset_B,
357                    uint32_t flags)
358 {
359    struct drm_asahi_gem_bind_object gem_bind = {
360       .op = ASAHI_BIND_OBJECT_OP_BIND,
361       .flags = flags,
362       .handle = bo->handle,
363       .vm_id = 0,
364       .offset = offset_B,
365       .range = size_B,
366    };
367 
368    int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND_OBJECT, &gem_bind);
369    if (ret) {
370       fprintf(stderr,
371               "DRM_IOCTL_ASAHI_GEM_BIND_OBJECT failed: %m (handle=%d)\n",
372               bo->handle);
373    }
374 
375    *object_handle = gem_bind.object_handle;
376 
377    return ret;
378 }
379 
380 static int
agx_bo_unbind_object(struct agx_device * dev,uint32_t object_handle,uint32_t flags)381 agx_bo_unbind_object(struct agx_device *dev, uint32_t object_handle,
382                      uint32_t flags)
383 {
384    struct drm_asahi_gem_bind_object gem_bind = {
385       .op = ASAHI_BIND_OBJECT_OP_UNBIND,
386       .flags = flags,
387       .object_handle = object_handle,
388    };
389 
390    int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND_OBJECT, &gem_bind);
391    if (ret) {
392       fprintf(stderr,
393               "DRM_IOCTL_ASAHI_GEM_BIND_OBJECT failed: %m (object_handle=%d)\n",
394               object_handle);
395    }
396 
397    return ret;
398 }
399 
400 static void
agx_get_global_ids(struct agx_device * dev)401 agx_get_global_ids(struct agx_device *dev)
402 {
403    dev->next_global_id = 0;
404    dev->last_global_id = 0x1000000;
405 }
406 
407 uint64_t
agx_get_global_id(struct agx_device * dev)408 agx_get_global_id(struct agx_device *dev)
409 {
410    if (unlikely(dev->next_global_id >= dev->last_global_id)) {
411       agx_get_global_ids(dev);
412    }
413 
414    return dev->next_global_id++;
415 }
416 
417 static ssize_t
agx_get_params(struct agx_device * dev,void * buf,size_t size)418 agx_get_params(struct agx_device *dev, void *buf, size_t size)
419 {
420    struct drm_asahi_get_params get_param = {
421       .param_group = 0,
422       .pointer = (uint64_t)(uintptr_t)buf,
423       .size = size,
424    };
425 
426    memset(buf, 0, size);
427 
428    int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GET_PARAMS, &get_param);
429    if (ret) {
430       fprintf(stderr, "DRM_IOCTL_ASAHI_GET_PARAMS failed: %m\n");
431       return -EINVAL;
432    }
433 
434    return get_param.size;
435 }
436 
437 static int
agx_submit(struct agx_device * dev,struct drm_asahi_submit * submit,struct agx_submit_virt * virt)438 agx_submit(struct agx_device *dev, struct drm_asahi_submit *submit,
439            struct agx_submit_virt *virt)
440 {
441    return drmIoctl(dev->fd, DRM_IOCTL_ASAHI_SUBMIT, submit);
442 }
443 
444 const agx_device_ops_t agx_device_drm_ops = {
445    .bo_alloc = agx_bo_alloc,
446    .bo_bind = agx_bo_bind,
447    .bo_mmap = agx_bo_mmap,
448    .get_params = agx_get_params,
449    .submit = agx_submit,
450    .bo_bind_object = agx_bo_bind_object,
451    .bo_unbind_object = agx_bo_unbind_object,
452 };
453 
454 static uint64_t
gcd(uint64_t n,uint64_t m)455 gcd(uint64_t n, uint64_t m)
456 {
457    while (n != 0) {
458       uint64_t remainder = m % n;
459       m = n;
460       n = remainder;
461    }
462 
463    return m;
464 }
465 
466 static void
agx_init_timestamps(struct agx_device * dev)467 agx_init_timestamps(struct agx_device *dev)
468 {
469    uint64_t ts_gcd = gcd(dev->params.timer_frequency_hz, NSEC_PER_SEC);
470 
471    dev->timestamp_to_ns.num = NSEC_PER_SEC / ts_gcd;
472    dev->timestamp_to_ns.den = dev->params.timer_frequency_hz / ts_gcd;
473 
474    uint64_t user_ts_gcd = gcd(dev->params.timer_frequency_hz, NSEC_PER_SEC);
475 
476    dev->user_timestamp_to_ns.num = NSEC_PER_SEC / user_ts_gcd;
477    dev->user_timestamp_to_ns.den =
478       dev->params.user_timestamp_frequency_hz / user_ts_gcd;
479 }
480 
481 bool
agx_open_device(void * memctx,struct agx_device * dev)482 agx_open_device(void *memctx, struct agx_device *dev)
483 {
484    dev->debug =
485       debug_get_flags_option("ASAHI_MESA_DEBUG", agx_debug_options, 0);
486 
487    dev->ops = agx_device_drm_ops;
488 
489    ssize_t params_size = -1;
490 
491    /* DRM version check */
492    {
493       drmVersionPtr version = drmGetVersion(dev->fd);
494       if (!version) {
495          fprintf(stderr, "cannot get version: %s", strerror(errno));
496          return NULL;
497       }
498 
499       if (!strcmp(version->name, "asahi")) {
500          dev->is_virtio = false;
501          dev->ops = agx_device_drm_ops;
502       } else if (!strcmp(version->name, "virtio_gpu")) {
503          dev->is_virtio = true;
504          if (!agx_virtio_open_device(dev)) {
505             fprintf(
506                stderr,
507                "Error opening virtio-gpu device for Asahi native context\n");
508             return false;
509          }
510       } else {
511          return false;
512       }
513 
514       drmFreeVersion(version);
515    }
516 
517    params_size = dev->ops.get_params(dev, &dev->params, sizeof(dev->params));
518    if (params_size <= 0) {
519       assert(0);
520       return false;
521    }
522    assert(params_size >= sizeof(dev->params));
523 
524    /* Refuse to probe. */
525    if (dev->params.unstable_uabi_version != DRM_ASAHI_UNSTABLE_UABI_VERSION) {
526       fprintf(
527          stderr,
528          "You are attempting to use upstream Mesa with a downstream kernel!\n"
529          "This WILL NOT work.\n"
530          "The Asahi UABI is unstable and NOT SUPPORTED in upstream Mesa.\n"
531          "UABI related code in upstream Mesa is not for use!\n"
532          "\n"
533          "Do NOT attempt to patch out checks, you WILL break your system.\n"
534          "Do NOT report bugs.\n"
535          "Do NOT ask Mesa developers for support.\n"
536          "Do NOT write guides about how to patch out these checks.\n"
537          "Do NOT package patches to Mesa to bypass this.\n"
538          "\n"
539          "~~~\n"
540          "This is not a place of honor.\n"
541          "No highly esteemed deed is commemorated here.\n"
542          "Nothing valued is here.\n"
543          "\n"
544          "What is here was dangerous and repulsive to us.\n"
545          "This message is a warning about danger.\n"
546          "\n"
547          "The danger is still present, in your time, as it was in ours.\n"
548          "The danger is unleashed only if you substantially disturb this place physically.\n"
549          "This place is best shunned and left uninhabited.\n"
550          "~~~\n"
551          "\n"
552          "THIS IS NOT A BUG. THIS IS YOU DOING SOMETHING BROKEN!\n");
553       abort();
554    }
555 
556    uint64_t incompat =
557       dev->params.feat_incompat & (~AGX_SUPPORTED_INCOMPAT_FEATURES);
558    if (incompat) {
559       fprintf(stderr, "Missing GPU incompat features: 0x%" PRIx64 "\n",
560               incompat);
561       assert(0);
562       return false;
563    }
564 
565    assert(dev->params.gpu_generation >= 13);
566    const char *variant = " Unknown";
567    switch (dev->params.gpu_variant) {
568    case 'G':
569       variant = "";
570       break;
571    case 'S':
572       variant = " Pro";
573       break;
574    case 'C':
575       variant = " Max";
576       break;
577    case 'D':
578       variant = " Ultra";
579       break;
580    }
581    snprintf(dev->name, sizeof(dev->name), "Apple M%d%s (G%d%c %02X)",
582             dev->params.gpu_generation - 12, variant,
583             dev->params.gpu_generation, dev->params.gpu_variant,
584             dev->params.gpu_revision + 0xA0);
585 
586    /* We need a large chunk of VA space carved out for robustness. Hardware
587     * loads can shift an i32 by up to 2, for a total shift of 4. If the base
588     * address is zero, 36-bits is therefore enough to trap any zero-extended
589     * 32-bit index. For more generality we would need a larger carveout, but
590     * this is already optimal for VBOs.
591     *
592     * TODO: Maybe this should be on top instead? Might be ok.
593     */
594    uint64_t reservation = (1ull << 36);
595 
596    /* Also reserve VA space for the printf buffer at a stable address, avoiding
597     * the need for relocs in precompiled shaders.
598     */
599    assert(reservation == LIBAGX_PRINTF_BUFFER_ADDRESS);
600    reservation += LIBAGX_PRINTF_BUFFER_SIZE;
601 
602    dev->guard_size = dev->params.vm_page_size;
603    if (dev->params.vm_usc_start) {
604       dev->shader_base = dev->params.vm_usc_start;
605    } else {
606       // Put the USC heap at the bottom of the user address space, 4GiB aligned
607       dev->shader_base = ALIGN_POT(MAX2(dev->params.vm_user_start, reservation),
608                                    0x100000000ull);
609    }
610 
611    if (dev->shader_base < reservation) {
612       /* Our robustness implementation requires the bottom unmapped */
613       fprintf(stderr, "Unexpected address layout, can't cope\n");
614       assert(0);
615       return false;
616    }
617 
618    uint64_t shader_size = 0x100000000ull;
619    // Put the user heap after the USC heap
620    uint64_t user_start = dev->shader_base + shader_size;
621 
622    assert(dev->shader_base >= dev->params.vm_user_start);
623    assert(user_start < dev->params.vm_user_end);
624 
625    dev->agxdecode = agxdecode_new_context(dev->shader_base);
626 
627    agx_init_timestamps(dev);
628 
629    util_sparse_array_init(&dev->bo_map, sizeof(struct agx_bo), 512);
630    pthread_mutex_init(&dev->bo_map_lock, NULL);
631 
632    simple_mtx_init(&dev->bo_cache.lock, mtx_plain);
633    list_inithead(&dev->bo_cache.lru);
634 
635    for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i)
636       list_inithead(&dev->bo_cache.buckets[i]);
637 
638    // Put the kernel heap at the top of the address space.
639    // Give it 32GB of address space, should be more than enough for any
640    // reasonable use case.
641    uint64_t kernel_size = MAX2(dev->params.vm_kernel_min_size, 32ull << 30);
642    struct drm_asahi_vm_create vm_create = {
643       .kernel_start = dev->params.vm_user_end - kernel_size,
644       .kernel_end = dev->params.vm_user_end,
645    };
646 
647    uint64_t user_size = vm_create.kernel_start - user_start;
648 
649    int ret = asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_VM_CREATE, &vm_create);
650    if (ret) {
651       fprintf(stderr, "DRM_IOCTL_ASAHI_VM_CREATE failed: %m\n");
652       assert(0);
653       return false;
654    }
655 
656    simple_mtx_init(&dev->vma_lock, mtx_plain);
657    util_vma_heap_init(&dev->main_heap, user_start, user_size);
658    util_vma_heap_init(&dev->usc_heap, dev->shader_base, shader_size);
659 
660    dev->vm_id = vm_create.vm_id;
661 
662    agx_get_global_ids(dev);
663 
664    glsl_type_singleton_init_or_ref();
665    struct blob_reader blob;
666    blob_reader_init(&blob, (void *)libagx_0_nir, sizeof(libagx_0_nir));
667    dev->libagx = nir_deserialize(memctx, &agx_nir_options, &blob);
668 
669    if (agx_gather_device_key(dev).needs_g13x_coherency == U_TRISTATE_YES) {
670       dev->libagx_programs = libagx_g13x;
671    } else {
672       dev->libagx_programs = libagx_g13g;
673    }
674 
675    if (dev->params.gpu_generation >= 14 && dev->params.num_clusters_total > 1) {
676       dev->chip = AGX_CHIP_G14X;
677    } else if (dev->params.gpu_generation >= 14) {
678       dev->chip = AGX_CHIP_G14G;
679    } else if (dev->params.gpu_generation >= 13 &&
680               dev->params.num_clusters_total > 1) {
681       dev->chip = AGX_CHIP_G13X;
682    } else {
683       dev->chip = AGX_CHIP_G13G;
684    }
685 
686    void *bo = agx_bo_create(dev, LIBAGX_PRINTF_BUFFER_SIZE, 0, AGX_BO_WRITEBACK,
687                             "Printf/abort");
688 
689    ret = dev->ops.bo_bind(dev, bo, LIBAGX_PRINTF_BUFFER_ADDRESS,
690                           LIBAGX_PRINTF_BUFFER_SIZE, 0,
691                           ASAHI_BIND_READ | ASAHI_BIND_WRITE, false);
692    if (ret) {
693       fprintf(stderr, "Failed to bind printf buffer");
694       return false;
695    }
696 
697    u_printf_init(&dev->printf, bo, agx_bo_map(bo));
698    u_printf_singleton_init_or_ref();
699    u_printf_singleton_add(dev->libagx->printf_info,
700                           dev->libagx->printf_info_count);
701    return true;
702 }
703 
704 void
agx_close_device(struct agx_device * dev)705 agx_close_device(struct agx_device *dev)
706 {
707    agx_bo_unreference(dev, dev->printf.bo);
708    u_printf_destroy(&dev->printf);
709    ralloc_free((void *)dev->libagx);
710    agx_bo_cache_evict_all(dev);
711    util_sparse_array_finish(&dev->bo_map);
712    agxdecode_destroy_context(dev->agxdecode);
713 
714    util_vma_heap_finish(&dev->main_heap);
715    util_vma_heap_finish(&dev->usc_heap);
716    glsl_type_singleton_decref();
717    u_printf_singleton_decref();
718 
719    close(dev->fd);
720 }
721 
722 uint32_t
agx_create_command_queue(struct agx_device * dev,uint32_t caps,uint32_t priority)723 agx_create_command_queue(struct agx_device *dev, uint32_t caps,
724                          uint32_t priority)
725 {
726 
727    if (dev->debug & AGX_DBG_1QUEUE) {
728       // Abuse this lock for this, it's debug only anyway
729       simple_mtx_lock(&dev->vma_lock);
730       if (dev->queue_id) {
731          simple_mtx_unlock(&dev->vma_lock);
732          return dev->queue_id;
733       }
734    }
735 
736    struct drm_asahi_queue_create queue_create = {
737       .vm_id = dev->vm_id,
738       .queue_caps = caps,
739       .priority = priority,
740       .flags = 0,
741    };
742 
743    int ret =
744       asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_QUEUE_CREATE, &queue_create);
745    if (ret) {
746       fprintf(stderr, "DRM_IOCTL_ASAHI_QUEUE_CREATE failed: %m\n");
747       assert(0);
748    }
749 
750    if (dev->debug & AGX_DBG_1QUEUE) {
751       dev->queue_id = queue_create.queue_id;
752       simple_mtx_unlock(&dev->vma_lock);
753    }
754 
755    return queue_create.queue_id;
756 }
757 
758 int
agx_destroy_command_queue(struct agx_device * dev,uint32_t queue_id)759 agx_destroy_command_queue(struct agx_device *dev, uint32_t queue_id)
760 {
761    if (dev->debug & AGX_DBG_1QUEUE)
762       return 0;
763 
764    struct drm_asahi_queue_destroy queue_destroy = {
765       .queue_id = queue_id,
766    };
767 
768    return asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_QUEUE_DESTROY,
769                              &queue_destroy);
770 }
771 
772 int
agx_import_sync_file(struct agx_device * dev,struct agx_bo * bo,int fd)773 agx_import_sync_file(struct agx_device *dev, struct agx_bo *bo, int fd)
774 {
775    struct dma_buf_import_sync_file import_sync_file_ioctl = {
776       .flags = DMA_BUF_SYNC_WRITE,
777       .fd = fd,
778    };
779 
780    assert(fd >= 0);
781    assert(bo->prime_fd != -1);
782 
783    int ret = drmIoctl(bo->prime_fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE,
784                       &import_sync_file_ioctl);
785    assert(ret >= 0);
786 
787    return ret;
788 }
789 
790 int
agx_export_sync_file(struct agx_device * dev,struct agx_bo * bo)791 agx_export_sync_file(struct agx_device *dev, struct agx_bo *bo)
792 {
793    struct dma_buf_export_sync_file export_sync_file_ioctl = {
794       .flags = DMA_BUF_SYNC_RW,
795       .fd = -1,
796    };
797 
798    assert(bo->prime_fd != -1);
799 
800    int ret = drmIoctl(bo->prime_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
801                       &export_sync_file_ioctl);
802    assert(ret >= 0);
803    assert(export_sync_file_ioctl.fd >= 0);
804 
805    return ret >= 0 ? export_sync_file_ioctl.fd : ret;
806 }
807 
808 void
agx_debug_fault(struct agx_device * dev,uint64_t addr)809 agx_debug_fault(struct agx_device *dev, uint64_t addr)
810 {
811    pthread_mutex_lock(&dev->bo_map_lock);
812 
813    struct agx_bo *best = NULL;
814 
815    for (uint32_t handle = 0; handle < dev->max_handle; handle++) {
816       struct agx_bo *bo = agx_lookup_bo(dev, handle);
817       if (!bo->va)
818          continue;
819 
820       uint64_t bo_addr = bo->va->addr;
821       if (bo->flags & AGX_BO_LOW_VA)
822          bo_addr += dev->shader_base;
823 
824       if (!bo->size || bo_addr > addr)
825          continue;
826 
827       if (!best || bo_addr > best->va->addr)
828          best = bo;
829    }
830 
831    if (!best) {
832       mesa_logw("Address 0x%" PRIx64 " is unknown\n", addr);
833    } else {
834       uint64_t start = best->va->addr;
835       uint64_t end = best->va->addr + best->size;
836       if (addr > (end + 1024 * 1024 * 1024)) {
837          /* 1GiB max as a sanity check */
838          mesa_logw("Address 0x%" PRIx64 " is unknown\n", addr);
839       } else if (addr > end) {
840          mesa_logw("Address 0x%" PRIx64 " is 0x%" PRIx64
841                    " bytes beyond an object at 0x%" PRIx64 "..0x%" PRIx64
842                    " (%s)\n",
843                    addr, addr - end, start, end - 1, best->label);
844       } else {
845          mesa_logw("Address 0x%" PRIx64 " is 0x%" PRIx64
846                    " bytes into an object at 0x%" PRIx64 "..0x%" PRIx64
847                    " (%s)\n",
848                    addr, addr - start, start, end - 1, best->label);
849       }
850    }
851 
852    pthread_mutex_unlock(&dev->bo_map_lock);
853 }
854 
855 uint64_t
agx_get_gpu_timestamp(struct agx_device * dev)856 agx_get_gpu_timestamp(struct agx_device *dev)
857 {
858    if (dev->params.feat_compat & DRM_ASAHI_FEAT_GETTIME) {
859       struct drm_asahi_get_time get_time = {.flags = 0, .extensions = 0};
860 
861       int ret = asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_GET_TIME, &get_time);
862       if (ret) {
863          fprintf(stderr, "DRM_IOCTL_ASAHI_GET_TIME failed: %m\n");
864       } else {
865          return get_time.gpu_timestamp;
866       }
867    }
868 #if DETECT_ARCH_AARCH64
869    uint64_t ret;
870    __asm__ volatile("mrs \t%0, cntvct_el0" : "=r"(ret));
871    return ret;
872 #elif DETECT_ARCH_X86 || DETECT_ARCH_X86_64
873    /* Maps to the above when run under FEX without thunking */
874    uint32_t high, low;
875    __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
876    return (uint64_t)low | ((uint64_t)high << 32);
877 #else
878 #error "invalid architecture for asahi"
879 #endif
880 }
881 
882 /* (Re)define UUID_SIZE to avoid including vulkan.h (or p_defines.h) here. */
883 #define UUID_SIZE 16
884 
885 void
agx_get_device_uuid(const struct agx_device * dev,void * uuid)886 agx_get_device_uuid(const struct agx_device *dev, void *uuid)
887 {
888    struct mesa_sha1 sha1_ctx;
889    _mesa_sha1_init(&sha1_ctx);
890 
891    /* The device UUID uniquely identifies the given device within the machine.
892     * Since we never have more than one device, this doesn't need to be a real
893     * UUID, so we use SHA1("agx" + gpu_generation + gpu_variant + gpu_revision).
894     */
895    static const char *device_name = "agx";
896    _mesa_sha1_update(&sha1_ctx, device_name, strlen(device_name));
897 
898    _mesa_sha1_update(&sha1_ctx, &dev->params.gpu_generation,
899                      sizeof(dev->params.gpu_generation));
900    _mesa_sha1_update(&sha1_ctx, &dev->params.gpu_variant,
901                      sizeof(dev->params.gpu_variant));
902    _mesa_sha1_update(&sha1_ctx, &dev->params.gpu_revision,
903                      sizeof(dev->params.gpu_revision));
904 
905    uint8_t sha1[SHA1_DIGEST_LENGTH];
906    _mesa_sha1_final(&sha1_ctx, sha1);
907 
908    assert(SHA1_DIGEST_LENGTH >= UUID_SIZE);
909    memcpy(uuid, sha1, UUID_SIZE);
910 }
911 
912 void
agx_get_driver_uuid(void * uuid)913 agx_get_driver_uuid(void *uuid)
914 {
915    const char *driver_id = PACKAGE_VERSION MESA_GIT_SHA1;
916 
917    /* The driver UUID is used for determining sharability of images and memory
918     * between two Vulkan instances in separate processes, but also to
919     * determining memory objects and sharability between Vulkan and OpenGL
920     * driver. People who want to share memory need to also check the device
921     * UUID.
922     */
923    struct mesa_sha1 sha1_ctx;
924    _mesa_sha1_init(&sha1_ctx);
925 
926    _mesa_sha1_update(&sha1_ctx, driver_id, strlen(driver_id));
927 
928    uint8_t sha1[SHA1_DIGEST_LENGTH];
929    _mesa_sha1_final(&sha1_ctx, sha1);
930 
931    assert(SHA1_DIGEST_LENGTH >= UUID_SIZE);
932    memcpy(uuid, sha1, UUID_SIZE);
933 }
934 
935 unsigned
agx_get_num_cores(const struct agx_device * dev)936 agx_get_num_cores(const struct agx_device *dev)
937 {
938    unsigned n = 0;
939 
940    for (unsigned cl = 0; cl < dev->params.num_clusters_total; cl++) {
941       n += util_bitcount(dev->params.core_masks[cl]);
942    }
943 
944    return n;
945 }
946 
947 struct agx_device_key
agx_gather_device_key(struct agx_device * dev)948 agx_gather_device_key(struct agx_device *dev)
949 {
950    bool g13x_coh = (dev->params.gpu_generation == 13 &&
951                     dev->params.num_clusters_total > 1) ||
952                    dev->params.num_dies > 1;
953 
954    return (struct agx_device_key){
955       .needs_g13x_coherency = u_tristate_make(g13x_coh),
956       .soft_fault = agx_has_soft_fault(dev),
957    };
958 }
959