1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25
26 #include <errno.h>
27 #include <sys/mman.h>
28
29 #include "drm-uapi/v3d_drm.h"
30 #include "util/perf/cpu_trace.h"
31 #include "util/u_memory.h"
32
33 /* Default max size of the bo cache, in MB.
34 *
35 * This value comes from testing different Vulkan application. Greater values
36 * didn't get any further performance benefit. This looks somewhat small, but
37 * from testing those applications, the main consumer of the bo cache are
38 * the bos used for the CLs, that are usually small.
39 */
40 #define DEFAULT_MAX_BO_CACHE_SIZE 64
41
42 /* Discarded to use a V3D_DEBUG for this, as it would mean adding a run-time
43 * check for most of the calls
44 */
45 static const bool dump_stats = false;
46
47 static void
bo_dump_stats(struct v3dv_device * device)48 bo_dump_stats(struct v3dv_device *device)
49 {
50 struct v3dv_bo_cache *cache = &device->bo_cache;
51
52 mesa_logi(" BOs allocated: %d\n", device->bo_count);
53 mesa_logi(" BOs size: %dkb\n", device->bo_size / 1024);
54 mesa_logi(" BOs cached: %d\n", cache->cache_count);
55 mesa_logi(" BOs cached size: %dkb\n", cache->cache_size / 1024);
56
57 if (!list_is_empty(&cache->time_list)) {
58 struct v3dv_bo *first = list_first_entry(&cache->time_list,
59 struct v3dv_bo,
60 time_list);
61 struct v3dv_bo *last = list_last_entry(&cache->time_list,
62 struct v3dv_bo,
63 time_list);
64
65 mesa_logi(" oldest cache time: %ld\n", (long)first->free_time);
66 mesa_logi(" newest cache time: %ld\n", (long)last->free_time);
67
68 struct timespec time;
69 clock_gettime(CLOCK_MONOTONIC, &time);
70 mesa_logi(" now: %lld\n", (long long)time.tv_sec);
71 }
72
73 if (cache->size_list_size) {
74 uint32_t empty_size_list = 0;
75 for (uint32_t i = 0; i < cache->size_list_size; i++) {
76 if (list_is_empty(&cache->size_list[i]))
77 empty_size_list++;
78 }
79 mesa_logi(" Empty size_list lists: %d\n", empty_size_list);
80 }
81 }
82
83 static void
bo_remove_from_cache(struct v3dv_bo_cache * cache,struct v3dv_bo * bo)84 bo_remove_from_cache(struct v3dv_bo_cache *cache, struct v3dv_bo *bo)
85 {
86 list_del(&bo->time_list);
87 list_del(&bo->size_list);
88
89 cache->cache_count--;
90 cache->cache_size -= bo->size;
91 }
92
93 static struct v3dv_bo *
bo_from_cache(struct v3dv_device * device,uint32_t size,const char * name)94 bo_from_cache(struct v3dv_device *device, uint32_t size, const char *name)
95 {
96 struct v3dv_bo_cache *cache = &device->bo_cache;
97 uint32_t page_index = size / 4096 - 1;
98
99 if (cache->size_list_size <= page_index)
100 return NULL;
101
102 struct v3dv_bo *bo = NULL;
103
104 mtx_lock(&cache->lock);
105 if (!list_is_empty(&cache->size_list[page_index])) {
106 bo = list_first_entry(&cache->size_list[page_index],
107 struct v3dv_bo, size_list);
108
109 /* Check that the BO has gone idle. If not, then we want to
110 * allocate something new instead, since we assume that the
111 * user will proceed to CPU map it and fill it with stuff.
112 */
113 if (!v3dv_bo_wait(device, bo, 0)) {
114 mtx_unlock(&cache->lock);
115 return NULL;
116 }
117
118 bo_remove_from_cache(cache, bo);
119 bo->name = name;
120 p_atomic_set(&bo->refcnt, 1);
121 }
122 mtx_unlock(&cache->lock);
123 return bo;
124 }
125
126 static bool
bo_free(struct v3dv_device * device,struct v3dv_bo * bo)127 bo_free(struct v3dv_device *device,
128 struct v3dv_bo *bo)
129 {
130 if (!bo)
131 return true;
132
133 assert(p_atomic_read(&bo->refcnt) == 0);
134 assert(bo->map == NULL);
135
136 if (!bo->is_import) {
137 device->bo_count--;
138 device->bo_size -= bo->size;
139
140 if (dump_stats) {
141 mesa_logi("Freed %s%s%dkb:\n", bo->name ? bo->name : "",
142 bo->name ? " " : "", bo->size / 1024);
143 bo_dump_stats(device);
144 }
145 }
146
147 uint32_t handle = bo->handle;
148 /* Our BO structs are stored in a sparse array in the physical device,
149 * so we don't want to free the BO pointer, instead we want to reset it
150 * to 0, to signal that array entry as being free.
151 *
152 * We must do the reset before we actually free the BO in the kernel, since
153 * otherwise there is a chance the application creates another BO in a
154 * different thread and gets the same array entry, causing a race.
155 */
156 memset(bo, 0, sizeof(*bo));
157
158 struct drm_gem_close c;
159 memset(&c, 0, sizeof(c));
160 c.handle = handle;
161 int ret = v3d_ioctl(device->pdevice->render_fd, DRM_IOCTL_GEM_CLOSE, &c);
162 if (ret != 0)
163 mesa_loge("close object %d: %s\n", handle, strerror(errno));
164
165 return ret == 0;
166 }
167
168 static void
bo_cache_free_all(struct v3dv_device * device,bool with_lock)169 bo_cache_free_all(struct v3dv_device *device,
170 bool with_lock)
171 {
172 struct v3dv_bo_cache *cache = &device->bo_cache;
173
174 if (with_lock)
175 mtx_lock(&cache->lock);
176 list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
177 time_list) {
178 bo_remove_from_cache(cache, bo);
179 bo_free(device, bo);
180 }
181 if (with_lock)
182 mtx_unlock(&cache->lock);
183
184 }
185
186 void
v3dv_bo_init(struct v3dv_bo * bo,uint32_t handle,uint32_t size,uint32_t offset,const char * name,bool private)187 v3dv_bo_init(struct v3dv_bo *bo,
188 uint32_t handle,
189 uint32_t size,
190 uint32_t offset,
191 const char *name,
192 bool private)
193 {
194 p_atomic_set(&bo->refcnt, 1);
195 bo->handle = handle;
196 bo->handle_bit = 1ull << (handle % 64);
197 bo->size = size;
198 bo->offset = offset;
199 bo->map = NULL;
200 bo->map_size = 0;
201 bo->name = name;
202 bo->private = private;
203 bo->dumb_handle = -1;
204 bo->is_import = false;
205 bo->cl_branch_offset = 0xffffffff;
206 list_inithead(&bo->list_link);
207 }
208
209 void
v3dv_bo_init_import(struct v3dv_bo * bo,uint32_t handle,uint32_t size,uint32_t offset,bool private)210 v3dv_bo_init_import(struct v3dv_bo *bo,
211 uint32_t handle,
212 uint32_t size,
213 uint32_t offset,
214 bool private)
215 {
216 v3dv_bo_init(bo, handle, size, offset, "import", private);
217 bo->is_import = true;
218 }
219
220 struct v3dv_bo *
v3dv_bo_alloc(struct v3dv_device * device,uint32_t size,const char * name,bool private)221 v3dv_bo_alloc(struct v3dv_device *device,
222 uint32_t size,
223 const char *name,
224 bool private)
225 {
226 struct v3dv_bo *bo;
227
228 const uint32_t page_align = 4096; /* Always allocate full pages */
229 size = align(size, page_align);
230
231 if (private) {
232 bo = bo_from_cache(device, size, name);
233 if (bo) {
234 if (dump_stats) {
235 mesa_logi("Allocated %s %dkb from cache:\n", name, size / 1024);
236 bo_dump_stats(device);
237 }
238 return bo;
239 }
240 }
241
242 struct drm_v3d_create_bo create = {
243 .size = size
244 };
245
246 int ret;
247 retry:
248 ret = v3d_ioctl(device->pdevice->render_fd,
249 DRM_IOCTL_V3D_CREATE_BO, &create);
250 if (ret != 0) {
251 if (!list_is_empty(&device->bo_cache.time_list)) {
252 bo_cache_free_all(device, true);
253 goto retry;
254 }
255
256 mesa_loge("Failed to allocate device memory for BO\n");
257 return NULL;
258 }
259
260 assert(create.offset % page_align == 0);
261 assert((create.offset & 0xffffffff) == create.offset);
262
263 bo = v3dv_device_lookup_bo(device->pdevice, create.handle);
264 assert(bo && bo->handle == 0);
265
266 v3dv_bo_init(bo, create.handle, size, create.offset, name, private);
267
268 device->bo_count++;
269 device->bo_size += bo->size;
270 if (dump_stats) {
271 mesa_logi("Allocated %s %dkb:\n", name, size / 1024);
272 bo_dump_stats(device);
273 }
274
275 return bo;
276 }
277
278 bool
v3dv_bo_map_unsynchronized(struct v3dv_device * device,struct v3dv_bo * bo,uint32_t size)279 v3dv_bo_map_unsynchronized(struct v3dv_device *device,
280 struct v3dv_bo *bo,
281 uint32_t size)
282 {
283 assert(bo != NULL && size <= bo->size);
284
285 if (bo->map)
286 return bo->map;
287
288 struct drm_v3d_mmap_bo map;
289 memset(&map, 0, sizeof(map));
290 map.handle = bo->handle;
291 int ret = v3d_ioctl(device->pdevice->render_fd,
292 DRM_IOCTL_V3D_MMAP_BO, &map);
293 if (ret != 0) {
294 mesa_loge("map ioctl failure\n");
295 return false;
296 }
297
298 bo->map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
299 device->pdevice->render_fd, map.offset);
300 if (bo->map == MAP_FAILED) {
301 mesa_loge("mmap of bo %d (offset 0x%016llx, size %d) failed\n",
302 bo->handle, (long long)map.offset, (uint32_t)bo->size);
303 return false;
304 }
305 VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));
306
307 bo->map_size = size;
308
309 return true;
310 }
311
312 bool
v3dv_bo_wait(struct v3dv_device * device,struct v3dv_bo * bo,uint64_t timeout_ns)313 v3dv_bo_wait(struct v3dv_device *device,
314 struct v3dv_bo *bo,
315 uint64_t timeout_ns)
316 {
317 MESA_TRACE_FUNC();
318 struct drm_v3d_wait_bo wait = {
319 .handle = bo->handle,
320 .timeout_ns = timeout_ns,
321 };
322 return v3d_ioctl(device->pdevice->render_fd,
323 DRM_IOCTL_V3D_WAIT_BO, &wait) == 0;
324 }
325
326 bool
v3dv_bo_map(struct v3dv_device * device,struct v3dv_bo * bo,uint32_t size)327 v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size)
328 {
329 assert(bo && size <= bo->size);
330
331 bool ok = v3dv_bo_map_unsynchronized(device, bo, size);
332 if (!ok)
333 return false;
334
335 ok = v3dv_bo_wait(device, bo, OS_TIMEOUT_INFINITE);
336 if (!ok) {
337 mesa_loge("memory wait for map failed\n");
338 return false;
339 }
340
341 return true;
342 }
343
344 void
v3dv_bo_unmap(struct v3dv_device * device,struct v3dv_bo * bo)345 v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo)
346 {
347 assert(bo && bo->map && bo->map_size > 0);
348
349 munmap(bo->map, bo->map_size);
350 VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
351 bo->map = NULL;
352 bo->map_size = 0;
353 }
354
355 static bool
reallocate_size_list(struct v3dv_bo_cache * cache,struct v3dv_device * device,uint32_t size)356 reallocate_size_list(struct v3dv_bo_cache *cache,
357 struct v3dv_device *device,
358 uint32_t size)
359 {
360 struct list_head *new_list =
361 vk_alloc(&device->vk.alloc, sizeof(struct list_head) * size, 8,
362 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
363
364 if (!new_list) {
365 mesa_loge("Failed to allocate host memory for cache bo list\n");
366 return false;
367 }
368 struct list_head *old_list = cache->size_list;
369
370 /* Move old list contents over (since the array has moved, and
371 * therefore the pointers to the list heads have to change).
372 */
373 for (int i = 0; i < cache->size_list_size; i++) {
374 struct list_head *old_head = &cache->size_list[i];
375 if (list_is_empty(old_head)) {
376 list_inithead(&new_list[i]);
377 } else {
378 new_list[i].next = old_head->next;
379 new_list[i].prev = old_head->prev;
380 new_list[i].next->prev = &new_list[i];
381 new_list[i].prev->next = &new_list[i];
382 }
383 }
384 for (int i = cache->size_list_size; i < size; i++)
385 list_inithead(&new_list[i]);
386
387 cache->size_list = new_list;
388 cache->size_list_size = size;
389 vk_free(&device->vk.alloc, old_list);
390
391 return true;
392 }
393
394 void
v3dv_bo_cache_init(struct v3dv_device * device)395 v3dv_bo_cache_init(struct v3dv_device *device)
396 {
397 device->bo_size = 0;
398 device->bo_count = 0;
399 list_inithead(&device->bo_cache.time_list);
400 /* FIXME: perhaps set a initial size for the size-list, to avoid run-time
401 * reallocations
402 */
403 device->bo_cache.size_list_size = 0;
404
405 const char *max_cache_size_str = getenv("V3DV_MAX_BO_CACHE_SIZE");
406 if (max_cache_size_str == NULL)
407 device->bo_cache.max_cache_size = DEFAULT_MAX_BO_CACHE_SIZE;
408 else
409 device->bo_cache.max_cache_size = atoll(max_cache_size_str);
410
411 if (dump_stats) {
412 mesa_logi("MAX BO CACHE SIZE: %iMB\n", device->bo_cache.max_cache_size);
413 }
414
415 mtx_lock(&device->bo_cache.lock);
416 device->bo_cache.max_cache_size *= 1024 * 1024;
417 device->bo_cache.cache_count = 0;
418 device->bo_cache.cache_size = 0;
419 mtx_unlock(&device->bo_cache.lock);
420 }
421
422 void
v3dv_bo_cache_destroy(struct v3dv_device * device)423 v3dv_bo_cache_destroy(struct v3dv_device *device)
424 {
425 bo_cache_free_all(device, true);
426 vk_free(&device->vk.alloc, device->bo_cache.size_list);
427
428 if (dump_stats) {
429 mesa_loge("BO stats after screen destroy:\n");
430 bo_dump_stats(device);
431 }
432 }
433
434
435 static void
free_stale_bos(struct v3dv_device * device,time_t time)436 free_stale_bos(struct v3dv_device *device,
437 time_t time)
438 {
439 struct v3dv_bo_cache *cache = &device->bo_cache;
440 bool freed_any = false;
441
442 list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
443 time_list) {
444 /* If it's more than a second old, free it. */
445 if (time - bo->free_time > 2) {
446 if (dump_stats && !freed_any) {
447 mesa_logi("Freeing stale BOs:\n");
448 bo_dump_stats(device);
449 freed_any = true;
450 }
451
452 bo_remove_from_cache(cache, bo);
453 bo_free(device, bo);
454 } else {
455 break;
456 }
457 }
458
459 if (dump_stats && freed_any) {
460 mesa_logi("Freed stale BOs:\n");
461 bo_dump_stats(device);
462 }
463 }
464
465 bool
v3dv_bo_free(struct v3dv_device * device,struct v3dv_bo * bo)466 v3dv_bo_free(struct v3dv_device *device,
467 struct v3dv_bo *bo)
468 {
469 if (!bo)
470 return true;
471
472 if (!p_atomic_dec_zero(&bo->refcnt))
473 return true;
474
475 if (bo->map)
476 v3dv_bo_unmap(device, bo);
477
478 struct timespec time;
479 struct v3dv_bo_cache *cache = &device->bo_cache;
480 uint32_t page_index = bo->size / 4096 - 1;
481
482 if (bo->private &&
483 bo->size > cache->max_cache_size - cache->cache_size) {
484 clock_gettime(CLOCK_MONOTONIC, &time);
485 mtx_lock(&cache->lock);
486 free_stale_bos(device, time.tv_sec);
487 mtx_unlock(&cache->lock);
488 }
489
490 if (!bo->private ||
491 bo->size > cache->max_cache_size - cache->cache_size) {
492 return bo_free(device, bo);
493 }
494
495 clock_gettime(CLOCK_MONOTONIC, &time);
496 mtx_lock(&cache->lock);
497
498 if (cache->size_list_size <= page_index) {
499 if (!reallocate_size_list(cache, device, page_index + 1)) {
500 bool outcome = bo_free(device, bo);
501 /* If the reallocation failed, it usually means that we are out of
502 * memory, so we also free all the bo cache. We need to call it to
503 * not use the cache lock, as we are already under it.
504 */
505 bo_cache_free_all(device, false);
506 mtx_unlock(&cache->lock);
507 return outcome;
508 }
509 }
510
511 bo->free_time = time.tv_sec;
512 list_addtail(&bo->size_list, &cache->size_list[page_index]);
513 list_addtail(&bo->time_list, &cache->time_list);
514
515 cache->cache_count++;
516 cache->cache_size += bo->size;
517
518 if (dump_stats) {
519 mesa_logi("Freed %s %dkb to cache:\n", bo->name, bo->size / 1024);
520 bo_dump_stats(device);
521 }
522 bo->name = NULL;
523
524 free_stale_bos(device, time.tv_sec);
525
526 mtx_unlock(&cache->lock);
527
528 return true;
529 }
530