1 /*
2 * Copyright 2021 Alyssa Rosenzweig
3 * Copyright 2019 Collabora, Ltd.
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "agx_device.h"
8 #include <inttypes.h>
9 #include "util/timespec.h"
10 #include "agx_bo.h"
11 #include "agx_compile.h"
12 #include "agx_scratch.h"
13 #include "decode.h"
14 #include "glsl_types.h"
15 #include "libagx_shaders.h"
16
17 #include <fcntl.h>
18 #include <xf86drm.h>
19 #include "drm-uapi/dma-buf.h"
20 #include "util/blob.h"
21 #include "util/log.h"
22 #include "util/os_file.h"
23 #include "util/os_mman.h"
24 #include "util/os_time.h"
25 #include "util/simple_mtx.h"
26 #include "git_sha1.h"
27 #include "nir_serialize.h"
28
29 /* TODO: Linux UAPI. Dummy defines to get some things to compile. */
30 #define ASAHI_BIND_READ 0
31 #define ASAHI_BIND_WRITE 0
32
33 void
agx_bo_free(struct agx_device * dev,struct agx_bo * bo)34 agx_bo_free(struct agx_device *dev, struct agx_bo *bo)
35 {
36 const uint64_t handle = bo->handle;
37
38 if (bo->ptr.cpu)
39 munmap(bo->ptr.cpu, bo->size);
40
41 if (bo->ptr.gpu) {
42 struct util_vma_heap *heap;
43 uint64_t bo_addr = bo->ptr.gpu;
44
45 if (bo->flags & AGX_BO_LOW_VA) {
46 heap = &dev->usc_heap;
47 bo_addr += dev->shader_base;
48 } else {
49 heap = &dev->main_heap;
50 }
51
52 simple_mtx_lock(&dev->vma_lock);
53 util_vma_heap_free(heap, bo_addr, bo->size + dev->guard_size);
54 simple_mtx_unlock(&dev->vma_lock);
55
56 /* No need to unmap the BO, as the kernel will take care of that when we
57 * close it. */
58 }
59
60 if (bo->prime_fd != -1)
61 close(bo->prime_fd);
62
63 /* Reset the handle. This has to happen before the GEM close to avoid a race.
64 */
65 memset(bo, 0, sizeof(*bo));
66 __sync_synchronize();
67
68 struct drm_gem_close args = {.handle = handle};
69 drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &args);
70 }
71
72 static int
agx_bo_bind(struct agx_device * dev,struct agx_bo * bo,uint64_t addr,uint32_t flags)73 agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr,
74 uint32_t flags)
75 {
76 unreachable("Linux UAPI not yet upstream");
77 }
78
79 struct agx_bo *
agx_bo_alloc(struct agx_device * dev,size_t size,size_t align,enum agx_bo_flags flags)80 agx_bo_alloc(struct agx_device *dev, size_t size, size_t align,
81 enum agx_bo_flags flags)
82 {
83 struct agx_bo *bo;
84 unsigned handle = 0;
85
86 size = ALIGN_POT(size, dev->params.vm_page_size);
87
88 /* executable implies low va */
89 assert(!(flags & AGX_BO_EXEC) || (flags & AGX_BO_LOW_VA));
90
91 unreachable("Linux UAPI not yet upstream");
92
93 pthread_mutex_lock(&dev->bo_map_lock);
94 bo = agx_lookup_bo(dev, handle);
95 dev->max_handle = MAX2(dev->max_handle, handle);
96 pthread_mutex_unlock(&dev->bo_map_lock);
97
98 /* Fresh handle */
99 assert(!memcmp(bo, &((struct agx_bo){}), sizeof(*bo)));
100
101 bo->type = AGX_ALLOC_REGULAR;
102 bo->size = size; /* TODO: gem_create.size */
103 bo->align = MAX2(dev->params.vm_page_size, align);
104 bo->flags = flags;
105 bo->dev = dev;
106 bo->handle = handle;
107 bo->prime_fd = -1;
108
109 ASSERTED bool lo = (flags & AGX_BO_LOW_VA);
110
111 struct util_vma_heap *heap;
112 if (lo)
113 heap = &dev->usc_heap;
114 else
115 heap = &dev->main_heap;
116
117 simple_mtx_lock(&dev->vma_lock);
118 bo->ptr.gpu = util_vma_heap_alloc(heap, size + dev->guard_size, bo->align);
119 simple_mtx_unlock(&dev->vma_lock);
120 if (!bo->ptr.gpu) {
121 fprintf(stderr, "Failed to allocate BO VMA\n");
122 agx_bo_free(dev, bo);
123 return NULL;
124 }
125
126 bo->guid = bo->handle; /* TODO: We don't care about guids */
127
128 uint32_t bind = ASAHI_BIND_READ;
129 if (!(flags & AGX_BO_READONLY)) {
130 bind |= ASAHI_BIND_WRITE;
131 }
132
133 int ret = agx_bo_bind(dev, bo, bo->ptr.gpu, bind);
134 if (ret) {
135 agx_bo_free(dev, bo);
136 return NULL;
137 }
138
139 agx_bo_mmap(bo);
140
141 if (flags & AGX_BO_LOW_VA)
142 bo->ptr.gpu -= dev->shader_base;
143
144 assert(bo->ptr.gpu < (1ull << (lo ? 32 : 40)));
145
146 return bo;
147 }
148
149 void
agx_bo_mmap(struct agx_bo * bo)150 agx_bo_mmap(struct agx_bo *bo)
151 {
152 unreachable("Linux UAPI not yet upstream");
153 }
154
155 struct agx_bo *
agx_bo_import(struct agx_device * dev,int fd)156 agx_bo_import(struct agx_device *dev, int fd)
157 {
158 struct agx_bo *bo;
159 ASSERTED int ret;
160 unsigned gem_handle;
161
162 pthread_mutex_lock(&dev->bo_map_lock);
163
164 ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
165 if (ret) {
166 fprintf(stderr, "import failed: Could not map fd %d to handle\n", fd);
167 pthread_mutex_unlock(&dev->bo_map_lock);
168 return NULL;
169 }
170
171 bo = agx_lookup_bo(dev, gem_handle);
172 dev->max_handle = MAX2(dev->max_handle, gem_handle);
173
174 if (!bo->dev) {
175 bo->dev = dev;
176 bo->size = lseek(fd, 0, SEEK_END);
177
178 /* Sometimes this can fail and return -1. size of -1 is not
179 * a nice thing for mmap to try mmap. Be more robust also
180 * for zero sized maps and fail nicely too
181 */
182 if ((bo->size == 0) || (bo->size == (size_t)-1)) {
183 pthread_mutex_unlock(&dev->bo_map_lock);
184 return NULL;
185 }
186 if (bo->size & (dev->params.vm_page_size - 1)) {
187 fprintf(
188 stderr,
189 "import failed: BO is not a multiple of the page size (0x%llx bytes)\n",
190 (long long)bo->size);
191 goto error;
192 }
193
194 bo->flags = AGX_BO_SHARED | AGX_BO_SHAREABLE;
195 bo->handle = gem_handle;
196 bo->prime_fd = os_dupfd_cloexec(fd);
197 bo->label = "Imported BO";
198 assert(bo->prime_fd >= 0);
199
200 p_atomic_set(&bo->refcnt, 1);
201
202 simple_mtx_lock(&dev->vma_lock);
203 bo->ptr.gpu = util_vma_heap_alloc(
204 &dev->main_heap, bo->size + dev->guard_size, dev->params.vm_page_size);
205 simple_mtx_unlock(&dev->vma_lock);
206
207 if (!bo->ptr.gpu) {
208 fprintf(
209 stderr,
210 "import failed: Could not allocate from VMA heap (0x%llx bytes)\n",
211 (long long)bo->size);
212 abort();
213 }
214
215 ret =
216 agx_bo_bind(dev, bo, bo->ptr.gpu, ASAHI_BIND_READ | ASAHI_BIND_WRITE);
217 if (ret) {
218 fprintf(stderr, "import failed: Could not bind BO at 0x%llx\n",
219 (long long)bo->ptr.gpu);
220 abort();
221 }
222 } else {
223 /* bo->refcnt == 0 can happen if the BO
224 * was being released but agx_bo_import() acquired the
225 * lock before agx_bo_unreference(). In that case, refcnt
226 * is 0 and we can't use agx_bo_reference() directly, we
227 * have to re-initialize the refcnt().
228 * Note that agx_bo_unreference() checks
229 * refcnt value just after acquiring the lock to
230 * make sure the object is not freed if agx_bo_import()
231 * acquired it in the meantime.
232 */
233 if (p_atomic_read(&bo->refcnt) == 0)
234 p_atomic_set(&bo->refcnt, 1);
235 else
236 agx_bo_reference(bo);
237 }
238 pthread_mutex_unlock(&dev->bo_map_lock);
239
240 return bo;
241
242 error:
243 memset(bo, 0, sizeof(*bo));
244 pthread_mutex_unlock(&dev->bo_map_lock);
245 return NULL;
246 }
247
248 int
agx_bo_export(struct agx_bo * bo)249 agx_bo_export(struct agx_bo *bo)
250 {
251 int fd;
252
253 assert(bo->flags & AGX_BO_SHAREABLE);
254
255 if (drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC, &fd))
256 return -1;
257
258 if (!(bo->flags & AGX_BO_SHARED)) {
259 bo->flags |= AGX_BO_SHARED;
260 assert(bo->prime_fd == -1);
261 bo->prime_fd = os_dupfd_cloexec(fd);
262
263 /* If there is a pending writer to this BO, import it into the buffer
264 * for implicit sync.
265 */
266 uint32_t writer_syncobj = p_atomic_read_relaxed(&bo->writer_syncobj);
267 if (writer_syncobj) {
268 int out_sync_fd = -1;
269 int ret =
270 drmSyncobjExportSyncFile(bo->dev->fd, writer_syncobj, &out_sync_fd);
271 assert(ret >= 0);
272 assert(out_sync_fd >= 0);
273
274 ret = agx_import_sync_file(bo->dev, bo, out_sync_fd);
275 assert(ret >= 0);
276 close(out_sync_fd);
277 }
278 }
279
280 assert(bo->prime_fd >= 0);
281 return fd;
282 }
283
284 static void
agx_get_global_ids(struct agx_device * dev)285 agx_get_global_ids(struct agx_device *dev)
286 {
287 dev->next_global_id = 0;
288 dev->last_global_id = 0x1000000;
289 }
290
291 uint64_t
agx_get_global_id(struct agx_device * dev)292 agx_get_global_id(struct agx_device *dev)
293 {
294 if (unlikely(dev->next_global_id >= dev->last_global_id)) {
295 agx_get_global_ids(dev);
296 }
297
298 return dev->next_global_id++;
299 }
300
301 static ssize_t
agx_get_params(struct agx_device * dev,void * buf,size_t size)302 agx_get_params(struct agx_device *dev, void *buf, size_t size)
303 {
304 /* TODO: Linux UAPI */
305 unreachable("Linux UAPI not yet upstream");
306 }
307
308 bool
agx_open_device(void * memctx,struct agx_device * dev)309 agx_open_device(void *memctx, struct agx_device *dev)
310 {
311 ssize_t params_size = -1;
312
313 /* TODO: Linux UAPI */
314 return false;
315
316 params_size = agx_get_params(dev, &dev->params, sizeof(dev->params));
317 if (params_size <= 0) {
318 assert(0);
319 return false;
320 }
321 assert(params_size >= sizeof(dev->params));
322
323 /* TODO: Linux UAPI: Params */
324 unreachable("Linux UAPI not yet upstream");
325
326 util_sparse_array_init(&dev->bo_map, sizeof(struct agx_bo), 512);
327 pthread_mutex_init(&dev->bo_map_lock, NULL);
328
329 simple_mtx_init(&dev->bo_cache.lock, mtx_plain);
330 list_inithead(&dev->bo_cache.lru);
331
332 for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i)
333 list_inithead(&dev->bo_cache.buckets[i]);
334
335 /* TODO: Linux UAPI: Create VM */
336
337 simple_mtx_init(&dev->vma_lock, mtx_plain);
338 util_vma_heap_init(&dev->main_heap, dev->params.vm_user_start,
339 dev->params.vm_user_end - dev->params.vm_user_start + 1);
340 util_vma_heap_init(
341 &dev->usc_heap, dev->params.vm_shader_start,
342 dev->params.vm_shader_end - dev->params.vm_shader_start + 1);
343
344 agx_get_global_ids(dev);
345
346 glsl_type_singleton_init_or_ref();
347 struct blob_reader blob;
348 blob_reader_init(&blob, (void *)libagx_shaders_nir,
349 sizeof(libagx_shaders_nir));
350 dev->libagx = nir_deserialize(memctx, &agx_nir_options, &blob);
351
352 dev->helper = agx_build_helper(dev);
353
354 return true;
355 }
356
357 void
agx_close_device(struct agx_device * dev)358 agx_close_device(struct agx_device *dev)
359 {
360 if (dev->helper)
361 agx_bo_unreference(dev->helper);
362
363 agx_bo_cache_evict_all(dev);
364 util_sparse_array_finish(&dev->bo_map);
365
366 util_vma_heap_finish(&dev->main_heap);
367 util_vma_heap_finish(&dev->usc_heap);
368 glsl_type_singleton_decref();
369
370 close(dev->fd);
371 }
372
373 uint32_t
agx_create_command_queue(struct agx_device * dev,uint32_t caps)374 agx_create_command_queue(struct agx_device *dev, uint32_t caps)
375 {
376 unreachable("Linux UAPI not yet upstream");
377 }
378
379 int
agx_import_sync_file(struct agx_device * dev,struct agx_bo * bo,int fd)380 agx_import_sync_file(struct agx_device *dev, struct agx_bo *bo, int fd)
381 {
382 struct dma_buf_import_sync_file import_sync_file_ioctl = {
383 .flags = DMA_BUF_SYNC_WRITE,
384 .fd = fd,
385 };
386
387 assert(fd >= 0);
388 assert(bo->prime_fd != -1);
389
390 int ret = drmIoctl(bo->prime_fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE,
391 &import_sync_file_ioctl);
392 assert(ret >= 0);
393
394 return ret;
395 }
396
397 int
agx_export_sync_file(struct agx_device * dev,struct agx_bo * bo)398 agx_export_sync_file(struct agx_device *dev, struct agx_bo *bo)
399 {
400 struct dma_buf_export_sync_file export_sync_file_ioctl = {
401 .flags = DMA_BUF_SYNC_RW,
402 .fd = -1,
403 };
404
405 assert(bo->prime_fd != -1);
406
407 int ret = drmIoctl(bo->prime_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
408 &export_sync_file_ioctl);
409 assert(ret >= 0);
410 assert(export_sync_file_ioctl.fd >= 0);
411
412 return ret >= 0 ? export_sync_file_ioctl.fd : ret;
413 }
414
415 void
agx_debug_fault(struct agx_device * dev,uint64_t addr)416 agx_debug_fault(struct agx_device *dev, uint64_t addr)
417 {
418 pthread_mutex_lock(&dev->bo_map_lock);
419
420 struct agx_bo *best = NULL;
421
422 for (uint32_t handle = 0; handle < dev->max_handle; handle++) {
423 struct agx_bo *bo = agx_lookup_bo(dev, handle);
424 uint64_t bo_addr = bo->ptr.gpu;
425 if (bo->flags & AGX_BO_LOW_VA)
426 bo_addr += dev->shader_base;
427
428 if (!bo->dev || bo_addr > addr)
429 continue;
430
431 if (!best || bo_addr > best->ptr.gpu)
432 best = bo;
433 }
434
435 if (!best) {
436 mesa_logw("Address 0x%" PRIx64 " is unknown\n", addr);
437 } else {
438 uint64_t start = best->ptr.gpu;
439 uint64_t end = best->ptr.gpu + best->size;
440 if (addr > (end + 1024 * 1024 * 1024)) {
441 /* 1GiB max as a sanity check */
442 mesa_logw("Address 0x%" PRIx64 " is unknown\n", addr);
443 } else if (addr > end) {
444 mesa_logw("Address 0x%" PRIx64 " is 0x%" PRIx64
445 " bytes beyond an object at 0x%" PRIx64 "..0x%" PRIx64
446 " (%s)\n",
447 addr, addr - end, start, end - 1, best->label);
448 } else {
449 mesa_logw("Address 0x%" PRIx64 " is 0x%" PRIx64
450 " bytes into an object at 0x%" PRIx64 "..0x%" PRIx64
451 " (%s)\n",
452 addr, addr - start, start, end - 1, best->label);
453 }
454 }
455
456 pthread_mutex_unlock(&dev->bo_map_lock);
457 }
458
459 uint64_t
agx_get_gpu_timestamp(struct agx_device * dev)460 agx_get_gpu_timestamp(struct agx_device *dev)
461 {
462 #if DETECT_ARCH_AARCH64
463 uint64_t ret;
464 __asm__ volatile("mrs \t%0, cntvct_el0" : "=r"(ret));
465 return ret;
466 #elif DETECT_ARCH_X86 || DETECT_ARCH_X86_64
467 /* Maps to the above when run under FEX without thunking */
468 uint32_t high, low;
469 __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
470 return (uint64_t)low | ((uint64_t)high << 32);
471 #else
472 #error "invalid architecture for asahi"
473 #endif
474 }
475