1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25
26 #include <errno.h>
27 #include <sys/mman.h>
28
29 #include "drm-uapi/v3d_drm.h"
30 #include "util/u_memory.h"
31
32 /* Default max size of the bo cache, in MB.
33 *
34 * FIXME: we got this value when testing some apps using the rpi4 with 4GB,
35 * but it should depend on the total amount of RAM. But for that we would need
36 * to test on real hw with different amount of RAM. Using this value for now.
37 */
38 #define DEFAULT_MAX_BO_CACHE_SIZE 512
39
40 /* Discarded to use a V3D_DEBUG for this, as it would mean adding a run-time
41 * check for most of the calls
42 */
43 static const bool dump_stats = false;
44
45 static void
bo_dump_stats(struct v3dv_device * device)46 bo_dump_stats(struct v3dv_device *device)
47 {
48 struct v3dv_bo_cache *cache = &device->bo_cache;
49
50 fprintf(stderr, " BOs allocated: %d\n", device->bo_count);
51 fprintf(stderr, " BOs size: %dkb\n", device->bo_size / 1024);
52 fprintf(stderr, " BOs cached: %d\n", cache->cache_count);
53 fprintf(stderr, " BOs cached size: %dkb\n", cache->cache_size / 1024);
54
55 if (!list_is_empty(&cache->time_list)) {
56 struct v3dv_bo *first = list_first_entry(&cache->time_list,
57 struct v3dv_bo,
58 time_list);
59 struct v3dv_bo *last = list_last_entry(&cache->time_list,
60 struct v3dv_bo,
61 time_list);
62
63 fprintf(stderr, " oldest cache time: %ld\n",
64 (long)first->free_time);
65 fprintf(stderr, " newest cache time: %ld\n",
66 (long)last->free_time);
67
68 struct timespec time;
69 clock_gettime(CLOCK_MONOTONIC, &time);
70 fprintf(stderr, " now: %lld\n",
71 (long long)time.tv_sec);
72 }
73
74 if (cache->size_list_size) {
75 uint32_t empty_size_list = 0;
76 for (uint32_t i = 0; i < cache->size_list_size; i++) {
77 if (list_is_empty(&cache->size_list[i]))
78 empty_size_list++;
79 }
80 fprintf(stderr, " Empty size_list lists: %d\n", empty_size_list);
81 }
82 }
83
84 static void
bo_remove_from_cache(struct v3dv_bo_cache * cache,struct v3dv_bo * bo)85 bo_remove_from_cache(struct v3dv_bo_cache *cache, struct v3dv_bo *bo)
86 {
87 list_del(&bo->time_list);
88 list_del(&bo->size_list);
89
90 cache->cache_count--;
91 cache->cache_size -= bo->size;
92 }
93
94 static struct v3dv_bo *
bo_from_cache(struct v3dv_device * device,uint32_t size,const char * name)95 bo_from_cache(struct v3dv_device *device, uint32_t size, const char *name)
96 {
97 struct v3dv_bo_cache *cache = &device->bo_cache;
98 uint32_t page_index = size / 4096 - 1;
99
100 if (cache->size_list_size <= page_index)
101 return NULL;
102
103 struct v3dv_bo *bo = NULL;
104
105 mtx_lock(&cache->lock);
106 if (!list_is_empty(&cache->size_list[page_index])) {
107 bo = list_first_entry(&cache->size_list[page_index],
108 struct v3dv_bo, size_list);
109
110 /* Check that the BO has gone idle. If not, then we want to
111 * allocate something new instead, since we assume that the
112 * user will proceed to CPU map it and fill it with stuff.
113 */
114 if (!v3dv_bo_wait(device, bo, 0)) {
115 mtx_unlock(&cache->lock);
116 return NULL;
117 }
118
119 bo_remove_from_cache(cache, bo);
120 bo->name = name;
121 p_atomic_set(&bo->refcnt, 1);
122 }
123 mtx_unlock(&cache->lock);
124 return bo;
125 }
126
127 static bool
bo_free(struct v3dv_device * device,struct v3dv_bo * bo)128 bo_free(struct v3dv_device *device,
129 struct v3dv_bo *bo)
130 {
131 if (!bo)
132 return true;
133
134 assert(p_atomic_read(&bo->refcnt) == 0);
135 assert(bo->map == NULL);
136
137 struct drm_gem_close c;
138 memset(&c, 0, sizeof(c));
139 c.handle = bo->handle;
140 int ret = v3dv_ioctl(device->pdevice->render_fd, DRM_IOCTL_GEM_CLOSE, &c);
141 if (ret != 0)
142 fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));
143
144 device->bo_count--;
145 device->bo_size -= bo->size;
146
147 if (dump_stats) {
148 fprintf(stderr, "Freed %s%s%dkb:\n",
149 bo->name ? bo->name : "",
150 bo->name ? " " : "",
151 bo->size / 1024);
152 bo_dump_stats(device);
153 }
154
155 /* Our BO structs are stored in a sparse array in the physical device,
156 * so we don't want to free the BO pointer, instead we want to reset it
157 * to 0, to signal that array entry as being free.
158 */
159 memset(bo, 0, sizeof(*bo));
160
161 return ret == 0;
162 }
163
164 static void
bo_cache_free_all(struct v3dv_device * device,bool with_lock)165 bo_cache_free_all(struct v3dv_device *device,
166 bool with_lock)
167 {
168 struct v3dv_bo_cache *cache = &device->bo_cache;
169
170 if (with_lock)
171 mtx_lock(&cache->lock);
172 list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
173 time_list) {
174 bo_remove_from_cache(cache, bo);
175 bo_free(device, bo);
176 }
177 if (with_lock)
178 mtx_unlock(&cache->lock);
179
180 }
181
182 void
v3dv_bo_init(struct v3dv_bo * bo,uint32_t handle,uint32_t size,uint32_t offset,const char * name,bool private)183 v3dv_bo_init(struct v3dv_bo *bo,
184 uint32_t handle,
185 uint32_t size,
186 uint32_t offset,
187 const char *name,
188 bool private)
189 {
190 p_atomic_set(&bo->refcnt, 1);
191 bo->handle = handle;
192 bo->handle_bit = 1ull << (handle % 64);
193 bo->size = size;
194 bo->offset = offset;
195 bo->map = NULL;
196 bo->map_size = 0;
197 bo->name = name;
198 bo->private = private;
199 bo->dumb_handle = -1;
200 list_inithead(&bo->list_link);
201 }
202
203 struct v3dv_bo *
v3dv_bo_alloc(struct v3dv_device * device,uint32_t size,const char * name,bool private)204 v3dv_bo_alloc(struct v3dv_device *device,
205 uint32_t size,
206 const char *name,
207 bool private)
208 {
209 struct v3dv_bo *bo;
210
211 const uint32_t page_align = 4096; /* Always allocate full pages */
212 size = align(size, page_align);
213
214 if (private) {
215 bo = bo_from_cache(device, size, name);
216 if (bo) {
217 if (dump_stats) {
218 fprintf(stderr, "Allocated %s %dkb from cache:\n",
219 name, size / 1024);
220 bo_dump_stats(device);
221 }
222 return bo;
223 }
224 }
225
226 retry:
227 ;
228
229 bool cleared_and_retried = false;
230 struct drm_v3d_create_bo create = {
231 .size = size
232 };
233
234 int ret = v3dv_ioctl(device->pdevice->render_fd,
235 DRM_IOCTL_V3D_CREATE_BO, &create);
236 if (ret != 0) {
237 if (!list_is_empty(&device->bo_cache.time_list) &&
238 !cleared_and_retried) {
239 cleared_and_retried = true;
240 bo_cache_free_all(device, true);
241 goto retry;
242 }
243
244 fprintf(stderr, "Failed to allocate device memory for BO\n");
245 return NULL;
246 }
247
248 assert(create.offset % page_align == 0);
249 assert((create.offset & 0xffffffff) == create.offset);
250
251 bo = v3dv_device_lookup_bo(device->pdevice, create.handle);
252 assert(bo && bo->handle == 0);
253
254 v3dv_bo_init(bo, create.handle, size, create.offset, name, private);
255
256 device->bo_count++;
257 device->bo_size += bo->size;
258 if (dump_stats) {
259 fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
260 bo_dump_stats(device);
261 }
262
263 return bo;
264 }
265
266 bool
v3dv_bo_map_unsynchronized(struct v3dv_device * device,struct v3dv_bo * bo,uint32_t size)267 v3dv_bo_map_unsynchronized(struct v3dv_device *device,
268 struct v3dv_bo *bo,
269 uint32_t size)
270 {
271 assert(bo != NULL && size <= bo->size);
272
273 if (bo->map)
274 return bo->map;
275
276 struct drm_v3d_mmap_bo map;
277 memset(&map, 0, sizeof(map));
278 map.handle = bo->handle;
279 int ret = v3dv_ioctl(device->pdevice->render_fd,
280 DRM_IOCTL_V3D_MMAP_BO, &map);
281 if (ret != 0) {
282 fprintf(stderr, "map ioctl failure\n");
283 return false;
284 }
285
286 bo->map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
287 device->pdevice->render_fd, map.offset);
288 if (bo->map == MAP_FAILED) {
289 fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
290 bo->handle, (long long)map.offset, (uint32_t)bo->size);
291 return false;
292 }
293 VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));
294
295 bo->map_size = size;
296
297 return true;
298 }
299
300 bool
v3dv_bo_wait(struct v3dv_device * device,struct v3dv_bo * bo,uint64_t timeout_ns)301 v3dv_bo_wait(struct v3dv_device *device,
302 struct v3dv_bo *bo,
303 uint64_t timeout_ns)
304 {
305 struct drm_v3d_wait_bo wait = {
306 .handle = bo->handle,
307 .timeout_ns = timeout_ns,
308 };
309 return v3dv_ioctl(device->pdevice->render_fd,
310 DRM_IOCTL_V3D_WAIT_BO, &wait) == 0;
311 }
312
313 bool
v3dv_bo_map(struct v3dv_device * device,struct v3dv_bo * bo,uint32_t size)314 v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size)
315 {
316 assert(bo && size <= bo->size);
317
318 bool ok = v3dv_bo_map_unsynchronized(device, bo, size);
319 if (!ok)
320 return false;
321
322 ok = v3dv_bo_wait(device, bo, PIPE_TIMEOUT_INFINITE);
323 if (!ok) {
324 fprintf(stderr, "memory wait for map failed\n");
325 return false;
326 }
327
328 return true;
329 }
330
331 void
v3dv_bo_unmap(struct v3dv_device * device,struct v3dv_bo * bo)332 v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo)
333 {
334 assert(bo && bo->map && bo->map_size > 0);
335
336 munmap(bo->map, bo->map_size);
337 VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
338 bo->map = NULL;
339 bo->map_size = 0;
340 }
341
342 static boolean
reallocate_size_list(struct v3dv_bo_cache * cache,struct v3dv_device * device,uint32_t size)343 reallocate_size_list(struct v3dv_bo_cache *cache,
344 struct v3dv_device *device,
345 uint32_t size)
346 {
347 struct list_head *new_list =
348 vk_alloc(&device->vk.alloc, sizeof(struct list_head) * size, 8,
349 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
350
351 if (!new_list) {
352 fprintf(stderr, "Failed to allocate host memory for cache bo list\n");
353 return false;
354 }
355 struct list_head *old_list = cache->size_list;
356
357 /* Move old list contents over (since the array has moved, and
358 * therefore the pointers to the list heads have to change).
359 */
360 for (int i = 0; i < cache->size_list_size; i++) {
361 struct list_head *old_head = &cache->size_list[i];
362 if (list_is_empty(old_head)) {
363 list_inithead(&new_list[i]);
364 } else {
365 new_list[i].next = old_head->next;
366 new_list[i].prev = old_head->prev;
367 new_list[i].next->prev = &new_list[i];
368 new_list[i].prev->next = &new_list[i];
369 }
370 }
371 for (int i = cache->size_list_size; i < size; i++)
372 list_inithead(&new_list[i]);
373
374 cache->size_list = new_list;
375 cache->size_list_size = size;
376 vk_free(&device->vk.alloc, old_list);
377
378 return true;
379 }
380
381 void
v3dv_bo_cache_init(struct v3dv_device * device)382 v3dv_bo_cache_init(struct v3dv_device *device)
383 {
384 device->bo_size = 0;
385 device->bo_count = 0;
386 list_inithead(&device->bo_cache.time_list);
387 /* FIXME: perhaps set a initial size for the size-list, to avoid run-time
388 * reallocations
389 */
390 device->bo_cache.size_list_size = 0;
391
392 const char *max_cache_size_str = getenv("V3DV_MAX_BO_CACHE_SIZE");
393 if (max_cache_size_str == NULL)
394 device->bo_cache.max_cache_size = DEFAULT_MAX_BO_CACHE_SIZE;
395 else
396 device->bo_cache.max_cache_size = atoll(max_cache_size_str);
397
398 if (dump_stats) {
399 fprintf(stderr, "MAX BO CACHE SIZE: %iMB\n", device->bo_cache.max_cache_size);
400 }
401
402 device->bo_cache.max_cache_size *= 1024 * 1024;
403 device->bo_cache.cache_count = 0;
404 device->bo_cache.cache_size = 0;
405 }
406
407 void
v3dv_bo_cache_destroy(struct v3dv_device * device)408 v3dv_bo_cache_destroy(struct v3dv_device *device)
409 {
410 bo_cache_free_all(device, true);
411 vk_free(&device->vk.alloc, device->bo_cache.size_list);
412
413 if (dump_stats) {
414 fprintf(stderr, "BO stats after screen destroy:\n");
415 bo_dump_stats(device);
416 }
417 }
418
419
420 static void
free_stale_bos(struct v3dv_device * device,time_t time)421 free_stale_bos(struct v3dv_device *device,
422 time_t time)
423 {
424 struct v3dv_bo_cache *cache = &device->bo_cache;
425 bool freed_any = false;
426
427 list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
428 time_list) {
429 /* If it's more than a second old, free it. */
430 if (time - bo->free_time > 2) {
431 if (dump_stats && !freed_any) {
432 fprintf(stderr, "Freeing stale BOs:\n");
433 bo_dump_stats(device);
434 freed_any = true;
435 }
436
437 bo_remove_from_cache(cache, bo);
438 bo_free(device, bo);
439 } else {
440 break;
441 }
442 }
443
444 if (dump_stats && freed_any) {
445 fprintf(stderr, "Freed stale BOs:\n");
446 bo_dump_stats(device);
447 }
448 }
449
450 bool
v3dv_bo_free(struct v3dv_device * device,struct v3dv_bo * bo)451 v3dv_bo_free(struct v3dv_device *device,
452 struct v3dv_bo *bo)
453 {
454 if (!bo)
455 return true;
456
457 if (!p_atomic_dec_zero(&bo->refcnt))
458 return true;
459
460 if (bo->map)
461 v3dv_bo_unmap(device, bo);
462
463 struct timespec time;
464 struct v3dv_bo_cache *cache = &device->bo_cache;
465 uint32_t page_index = bo->size / 4096 - 1;
466
467 if (bo->private &&
468 bo->size > cache->max_cache_size - cache->cache_size) {
469 clock_gettime(CLOCK_MONOTONIC, &time);
470 mtx_lock(&cache->lock);
471 free_stale_bos(device, time.tv_sec);
472 mtx_unlock(&cache->lock);
473 }
474
475 if (!bo->private ||
476 bo->size > cache->max_cache_size - cache->cache_size) {
477 return bo_free(device, bo);
478 }
479
480 clock_gettime(CLOCK_MONOTONIC, &time);
481 mtx_lock(&cache->lock);
482
483 if (cache->size_list_size <= page_index) {
484 if (!reallocate_size_list(cache, device, page_index + 1)) {
485 bool outcome = bo_free(device, bo);
486 /* If the reallocation failed, it usually means that we are out of
487 * memory, so we also free all the bo cache. We need to call it to
488 * not use the cache lock, as we are already under it.
489 */
490 bo_cache_free_all(device, false);
491 mtx_unlock(&cache->lock);
492 return outcome;
493 }
494 }
495
496 bo->free_time = time.tv_sec;
497 list_addtail(&bo->size_list, &cache->size_list[page_index]);
498 list_addtail(&bo->time_list, &cache->time_list);
499
500 cache->cache_count++;
501 cache->cache_size += bo->size;
502
503 if (dump_stats) {
504 fprintf(stderr, "Freed %s %dkb to cache:\n",
505 bo->name, bo->size / 1024);
506 bo_dump_stats(device);
507 }
508 bo->name = NULL;
509
510 free_stale_bos(device, time.tv_sec);
511
512 mtx_unlock(&cache->lock);
513
514 return true;
515 }
516