• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Alyssa Rosenzweig
3  * Copyright 2019 Collabora, Ltd.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "agx_device.h"
8 #include <inttypes.h>
9 #include "util/timespec.h"
10 #include "agx_bo.h"
11 #include "agx_compile.h"
12 #include "agx_scratch.h"
13 #include "decode.h"
14 #include "glsl_types.h"
15 #include "libagx_shaders.h"
16 
17 #include <fcntl.h>
18 #include <xf86drm.h>
19 #include "drm-uapi/dma-buf.h"
20 #include "util/blob.h"
21 #include "util/log.h"
22 #include "util/os_file.h"
23 #include "util/os_mman.h"
24 #include "util/os_time.h"
25 #include "util/simple_mtx.h"
26 #include "git_sha1.h"
27 #include "nir_serialize.h"
28 
29 /* TODO: Linux UAPI. Dummy defines to get some things to compile. */
30 #define ASAHI_BIND_READ  0
31 #define ASAHI_BIND_WRITE 0
32 
33 void
agx_bo_free(struct agx_device * dev,struct agx_bo * bo)34 agx_bo_free(struct agx_device *dev, struct agx_bo *bo)
35 {
36    const uint64_t handle = bo->handle;
37 
38    if (bo->ptr.cpu)
39       munmap(bo->ptr.cpu, bo->size);
40 
41    if (bo->ptr.gpu) {
42       struct util_vma_heap *heap;
43       uint64_t bo_addr = bo->ptr.gpu;
44 
45       if (bo->flags & AGX_BO_LOW_VA) {
46          heap = &dev->usc_heap;
47          bo_addr += dev->shader_base;
48       } else {
49          heap = &dev->main_heap;
50       }
51 
52       simple_mtx_lock(&dev->vma_lock);
53       util_vma_heap_free(heap, bo_addr, bo->size + dev->guard_size);
54       simple_mtx_unlock(&dev->vma_lock);
55 
56       /* No need to unmap the BO, as the kernel will take care of that when we
57        * close it. */
58    }
59 
60    if (bo->prime_fd != -1)
61       close(bo->prime_fd);
62 
63    /* Reset the handle. This has to happen before the GEM close to avoid a race.
64     */
65    memset(bo, 0, sizeof(*bo));
66    __sync_synchronize();
67 
68    struct drm_gem_close args = {.handle = handle};
69    drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &args);
70 }
71 
72 static int
agx_bo_bind(struct agx_device * dev,struct agx_bo * bo,uint64_t addr,uint32_t flags)73 agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr,
74             uint32_t flags)
75 {
76    unreachable("Linux UAPI not yet upstream");
77 }
78 
79 struct agx_bo *
agx_bo_alloc(struct agx_device * dev,size_t size,size_t align,enum agx_bo_flags flags)80 agx_bo_alloc(struct agx_device *dev, size_t size, size_t align,
81              enum agx_bo_flags flags)
82 {
83    struct agx_bo *bo;
84    unsigned handle = 0;
85 
86    size = ALIGN_POT(size, dev->params.vm_page_size);
87 
88    /* executable implies low va */
89    assert(!(flags & AGX_BO_EXEC) || (flags & AGX_BO_LOW_VA));
90 
91    unreachable("Linux UAPI not yet upstream");
92 
93    pthread_mutex_lock(&dev->bo_map_lock);
94    bo = agx_lookup_bo(dev, handle);
95    dev->max_handle = MAX2(dev->max_handle, handle);
96    pthread_mutex_unlock(&dev->bo_map_lock);
97 
98    /* Fresh handle */
99    assert(!memcmp(bo, &((struct agx_bo){}), sizeof(*bo)));
100 
101    bo->type = AGX_ALLOC_REGULAR;
102    bo->size = size; /* TODO: gem_create.size */
103    bo->align = MAX2(dev->params.vm_page_size, align);
104    bo->flags = flags;
105    bo->dev = dev;
106    bo->handle = handle;
107    bo->prime_fd = -1;
108 
109    ASSERTED bool lo = (flags & AGX_BO_LOW_VA);
110 
111    struct util_vma_heap *heap;
112    if (lo)
113       heap = &dev->usc_heap;
114    else
115       heap = &dev->main_heap;
116 
117    simple_mtx_lock(&dev->vma_lock);
118    bo->ptr.gpu = util_vma_heap_alloc(heap, size + dev->guard_size, bo->align);
119    simple_mtx_unlock(&dev->vma_lock);
120    if (!bo->ptr.gpu) {
121       fprintf(stderr, "Failed to allocate BO VMA\n");
122       agx_bo_free(dev, bo);
123       return NULL;
124    }
125 
126    bo->guid = bo->handle; /* TODO: We don't care about guids */
127 
128    uint32_t bind = ASAHI_BIND_READ;
129    if (!(flags & AGX_BO_READONLY)) {
130       bind |= ASAHI_BIND_WRITE;
131    }
132 
133    int ret = agx_bo_bind(dev, bo, bo->ptr.gpu, bind);
134    if (ret) {
135       agx_bo_free(dev, bo);
136       return NULL;
137    }
138 
139    agx_bo_mmap(bo);
140 
141    if (flags & AGX_BO_LOW_VA)
142       bo->ptr.gpu -= dev->shader_base;
143 
144    assert(bo->ptr.gpu < (1ull << (lo ? 32 : 40)));
145 
146    return bo;
147 }
148 
149 void
agx_bo_mmap(struct agx_bo * bo)150 agx_bo_mmap(struct agx_bo *bo)
151 {
152    unreachable("Linux UAPI not yet upstream");
153 }
154 
155 struct agx_bo *
agx_bo_import(struct agx_device * dev,int fd)156 agx_bo_import(struct agx_device *dev, int fd)
157 {
158    struct agx_bo *bo;
159    ASSERTED int ret;
160    unsigned gem_handle;
161 
162    pthread_mutex_lock(&dev->bo_map_lock);
163 
164    ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
165    if (ret) {
166       fprintf(stderr, "import failed: Could not map fd %d to handle\n", fd);
167       pthread_mutex_unlock(&dev->bo_map_lock);
168       return NULL;
169    }
170 
171    bo = agx_lookup_bo(dev, gem_handle);
172    dev->max_handle = MAX2(dev->max_handle, gem_handle);
173 
174    if (!bo->dev) {
175       bo->dev = dev;
176       bo->size = lseek(fd, 0, SEEK_END);
177 
178       /* Sometimes this can fail and return -1. size of -1 is not
179        * a nice thing for mmap to try mmap. Be more robust also
180        * for zero sized maps and fail nicely too
181        */
182       if ((bo->size == 0) || (bo->size == (size_t)-1)) {
183          pthread_mutex_unlock(&dev->bo_map_lock);
184          return NULL;
185       }
186       if (bo->size & (dev->params.vm_page_size - 1)) {
187          fprintf(
188             stderr,
189             "import failed: BO is not a multiple of the page size (0x%llx bytes)\n",
190             (long long)bo->size);
191          goto error;
192       }
193 
194       bo->flags = AGX_BO_SHARED | AGX_BO_SHAREABLE;
195       bo->handle = gem_handle;
196       bo->prime_fd = os_dupfd_cloexec(fd);
197       bo->label = "Imported BO";
198       assert(bo->prime_fd >= 0);
199 
200       p_atomic_set(&bo->refcnt, 1);
201 
202       simple_mtx_lock(&dev->vma_lock);
203       bo->ptr.gpu = util_vma_heap_alloc(
204          &dev->main_heap, bo->size + dev->guard_size, dev->params.vm_page_size);
205       simple_mtx_unlock(&dev->vma_lock);
206 
207       if (!bo->ptr.gpu) {
208          fprintf(
209             stderr,
210             "import failed: Could not allocate from VMA heap (0x%llx bytes)\n",
211             (long long)bo->size);
212          abort();
213       }
214 
215       ret =
216          agx_bo_bind(dev, bo, bo->ptr.gpu, ASAHI_BIND_READ | ASAHI_BIND_WRITE);
217       if (ret) {
218          fprintf(stderr, "import failed: Could not bind BO at 0x%llx\n",
219                  (long long)bo->ptr.gpu);
220          abort();
221       }
222    } else {
223       /* bo->refcnt == 0 can happen if the BO
224        * was being released but agx_bo_import() acquired the
225        * lock before agx_bo_unreference(). In that case, refcnt
226        * is 0 and we can't use agx_bo_reference() directly, we
227        * have to re-initialize the refcnt().
228        * Note that agx_bo_unreference() checks
229        * refcnt value just after acquiring the lock to
230        * make sure the object is not freed if agx_bo_import()
231        * acquired it in the meantime.
232        */
233       if (p_atomic_read(&bo->refcnt) == 0)
234          p_atomic_set(&bo->refcnt, 1);
235       else
236          agx_bo_reference(bo);
237    }
238    pthread_mutex_unlock(&dev->bo_map_lock);
239 
240    return bo;
241 
242 error:
243    memset(bo, 0, sizeof(*bo));
244    pthread_mutex_unlock(&dev->bo_map_lock);
245    return NULL;
246 }
247 
248 int
agx_bo_export(struct agx_bo * bo)249 agx_bo_export(struct agx_bo *bo)
250 {
251    int fd;
252 
253    assert(bo->flags & AGX_BO_SHAREABLE);
254 
255    if (drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC, &fd))
256       return -1;
257 
258    if (!(bo->flags & AGX_BO_SHARED)) {
259       bo->flags |= AGX_BO_SHARED;
260       assert(bo->prime_fd == -1);
261       bo->prime_fd = os_dupfd_cloexec(fd);
262 
263       /* If there is a pending writer to this BO, import it into the buffer
264        * for implicit sync.
265        */
266       uint32_t writer_syncobj = p_atomic_read_relaxed(&bo->writer_syncobj);
267       if (writer_syncobj) {
268          int out_sync_fd = -1;
269          int ret =
270             drmSyncobjExportSyncFile(bo->dev->fd, writer_syncobj, &out_sync_fd);
271          assert(ret >= 0);
272          assert(out_sync_fd >= 0);
273 
274          ret = agx_import_sync_file(bo->dev, bo, out_sync_fd);
275          assert(ret >= 0);
276          close(out_sync_fd);
277       }
278    }
279 
280    assert(bo->prime_fd >= 0);
281    return fd;
282 }
283 
284 static void
agx_get_global_ids(struct agx_device * dev)285 agx_get_global_ids(struct agx_device *dev)
286 {
287    dev->next_global_id = 0;
288    dev->last_global_id = 0x1000000;
289 }
290 
291 uint64_t
agx_get_global_id(struct agx_device * dev)292 agx_get_global_id(struct agx_device *dev)
293 {
294    if (unlikely(dev->next_global_id >= dev->last_global_id)) {
295       agx_get_global_ids(dev);
296    }
297 
298    return dev->next_global_id++;
299 }
300 
301 static ssize_t
agx_get_params(struct agx_device * dev,void * buf,size_t size)302 agx_get_params(struct agx_device *dev, void *buf, size_t size)
303 {
304    /* TODO: Linux UAPI */
305    unreachable("Linux UAPI not yet upstream");
306 }
307 
308 bool
agx_open_device(void * memctx,struct agx_device * dev)309 agx_open_device(void *memctx, struct agx_device *dev)
310 {
311    ssize_t params_size = -1;
312 
313    /* TODO: Linux UAPI */
314    return false;
315 
316    params_size = agx_get_params(dev, &dev->params, sizeof(dev->params));
317    if (params_size <= 0) {
318       assert(0);
319       return false;
320    }
321    assert(params_size >= sizeof(dev->params));
322 
323    /* TODO: Linux UAPI: Params */
324    unreachable("Linux UAPI not yet upstream");
325 
326    util_sparse_array_init(&dev->bo_map, sizeof(struct agx_bo), 512);
327    pthread_mutex_init(&dev->bo_map_lock, NULL);
328 
329    simple_mtx_init(&dev->bo_cache.lock, mtx_plain);
330    list_inithead(&dev->bo_cache.lru);
331 
332    for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i)
333       list_inithead(&dev->bo_cache.buckets[i]);
334 
335    /* TODO: Linux UAPI: Create VM */
336 
337    simple_mtx_init(&dev->vma_lock, mtx_plain);
338    util_vma_heap_init(&dev->main_heap, dev->params.vm_user_start,
339                       dev->params.vm_user_end - dev->params.vm_user_start + 1);
340    util_vma_heap_init(
341       &dev->usc_heap, dev->params.vm_shader_start,
342       dev->params.vm_shader_end - dev->params.vm_shader_start + 1);
343 
344    agx_get_global_ids(dev);
345 
346    glsl_type_singleton_init_or_ref();
347    struct blob_reader blob;
348    blob_reader_init(&blob, (void *)libagx_shaders_nir,
349                     sizeof(libagx_shaders_nir));
350    dev->libagx = nir_deserialize(memctx, &agx_nir_options, &blob);
351 
352    dev->helper = agx_build_helper(dev);
353 
354    return true;
355 }
356 
357 void
agx_close_device(struct agx_device * dev)358 agx_close_device(struct agx_device *dev)
359 {
360    if (dev->helper)
361       agx_bo_unreference(dev->helper);
362 
363    agx_bo_cache_evict_all(dev);
364    util_sparse_array_finish(&dev->bo_map);
365 
366    util_vma_heap_finish(&dev->main_heap);
367    util_vma_heap_finish(&dev->usc_heap);
368    glsl_type_singleton_decref();
369 
370    close(dev->fd);
371 }
372 
373 uint32_t
agx_create_command_queue(struct agx_device * dev,uint32_t caps)374 agx_create_command_queue(struct agx_device *dev, uint32_t caps)
375 {
376    unreachable("Linux UAPI not yet upstream");
377 }
378 
379 int
agx_import_sync_file(struct agx_device * dev,struct agx_bo * bo,int fd)380 agx_import_sync_file(struct agx_device *dev, struct agx_bo *bo, int fd)
381 {
382    struct dma_buf_import_sync_file import_sync_file_ioctl = {
383       .flags = DMA_BUF_SYNC_WRITE,
384       .fd = fd,
385    };
386 
387    assert(fd >= 0);
388    assert(bo->prime_fd != -1);
389 
390    int ret = drmIoctl(bo->prime_fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE,
391                       &import_sync_file_ioctl);
392    assert(ret >= 0);
393 
394    return ret;
395 }
396 
397 int
agx_export_sync_file(struct agx_device * dev,struct agx_bo * bo)398 agx_export_sync_file(struct agx_device *dev, struct agx_bo *bo)
399 {
400    struct dma_buf_export_sync_file export_sync_file_ioctl = {
401       .flags = DMA_BUF_SYNC_RW,
402       .fd = -1,
403    };
404 
405    assert(bo->prime_fd != -1);
406 
407    int ret = drmIoctl(bo->prime_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
408                       &export_sync_file_ioctl);
409    assert(ret >= 0);
410    assert(export_sync_file_ioctl.fd >= 0);
411 
412    return ret >= 0 ? export_sync_file_ioctl.fd : ret;
413 }
414 
415 void
agx_debug_fault(struct agx_device * dev,uint64_t addr)416 agx_debug_fault(struct agx_device *dev, uint64_t addr)
417 {
418    pthread_mutex_lock(&dev->bo_map_lock);
419 
420    struct agx_bo *best = NULL;
421 
422    for (uint32_t handle = 0; handle < dev->max_handle; handle++) {
423       struct agx_bo *bo = agx_lookup_bo(dev, handle);
424       uint64_t bo_addr = bo->ptr.gpu;
425       if (bo->flags & AGX_BO_LOW_VA)
426          bo_addr += dev->shader_base;
427 
428       if (!bo->dev || bo_addr > addr)
429          continue;
430 
431       if (!best || bo_addr > best->ptr.gpu)
432          best = bo;
433    }
434 
435    if (!best) {
436       mesa_logw("Address 0x%" PRIx64 " is unknown\n", addr);
437    } else {
438       uint64_t start = best->ptr.gpu;
439       uint64_t end = best->ptr.gpu + best->size;
440       if (addr > (end + 1024 * 1024 * 1024)) {
441          /* 1GiB max as a sanity check */
442          mesa_logw("Address 0x%" PRIx64 " is unknown\n", addr);
443       } else if (addr > end) {
444          mesa_logw("Address 0x%" PRIx64 " is 0x%" PRIx64
445                    " bytes beyond an object at 0x%" PRIx64 "..0x%" PRIx64
446                    " (%s)\n",
447                    addr, addr - end, start, end - 1, best->label);
448       } else {
449          mesa_logw("Address 0x%" PRIx64 " is 0x%" PRIx64
450                    " bytes into an object at 0x%" PRIx64 "..0x%" PRIx64
451                    " (%s)\n",
452                    addr, addr - start, start, end - 1, best->label);
453       }
454    }
455 
456    pthread_mutex_unlock(&dev->bo_map_lock);
457 }
458 
459 uint64_t
agx_get_gpu_timestamp(struct agx_device * dev)460 agx_get_gpu_timestamp(struct agx_device *dev)
461 {
462 #if DETECT_ARCH_AARCH64
463    uint64_t ret;
464    __asm__ volatile("mrs \t%0, cntvct_el0" : "=r"(ret));
465    return ret;
466 #elif DETECT_ARCH_X86 || DETECT_ARCH_X86_64
467    /* Maps to the above when run under FEX without thunking */
468    uint32_t high, low;
469    __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
470    return (uint64_t)low | ((uint64_t)high << 32);
471 #else
472 #error "invalid architecture for asahi"
473 #endif
474 }
475