• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 
26 #include <errno.h>
27 #include <sys/mman.h>
28 
29 #include "drm-uapi/v3d_drm.h"
30 #include "util/perf/cpu_trace.h"
31 #include "util/u_memory.h"
32 
33 /* Default max size of the bo cache, in MB.
34  *
35  * This value comes from testing different Vulkan application. Greater values
36  * didn't get any further performance benefit. This looks somewhat small, but
37  * from testing those applications, the main consumer of the bo cache are
38  * the bos used for the CLs, that are usually small.
39  */
40 #define DEFAULT_MAX_BO_CACHE_SIZE 64
41 
42 /* Discarded to use a V3D_DEBUG for this, as it would mean adding a run-time
43  * check for most of the calls
44  */
45 static const bool dump_stats = false;
46 
47 static void
bo_dump_stats(struct v3dv_device * device)48 bo_dump_stats(struct v3dv_device *device)
49 {
50    struct v3dv_bo_cache *cache = &device->bo_cache;
51 
52    mesa_logi("  BOs allocated:   %d\n", device->bo_count);
53    mesa_logi("  BOs size:        %dkb\n", device->bo_size / 1024);
54    mesa_logi("  BOs cached:      %d\n", cache->cache_count);
55    mesa_logi("  BOs cached size: %dkb\n", cache->cache_size / 1024);
56 
57    if (!list_is_empty(&cache->time_list)) {
58       struct v3dv_bo *first = list_first_entry(&cache->time_list,
59                                               struct v3dv_bo,
60                                               time_list);
61       struct v3dv_bo *last = list_last_entry(&cache->time_list,
62                                             struct v3dv_bo,
63                                             time_list);
64 
65       mesa_logi("  oldest cache time: %ld\n", (long)first->free_time);
66       mesa_logi("  newest cache time: %ld\n", (long)last->free_time);
67 
68       struct timespec time;
69       clock_gettime(CLOCK_MONOTONIC, &time);
70       mesa_logi("  now:               %lld\n", (long long)time.tv_sec);
71    }
72 
73    if (cache->size_list_size) {
74       uint32_t empty_size_list = 0;
75       for (uint32_t i = 0; i < cache->size_list_size; i++) {
76          if (list_is_empty(&cache->size_list[i]))
77             empty_size_list++;
78       }
79       mesa_logi("  Empty size_list lists: %d\n", empty_size_list);
80    }
81 }
82 
83 static void
bo_remove_from_cache(struct v3dv_bo_cache * cache,struct v3dv_bo * bo)84 bo_remove_from_cache(struct v3dv_bo_cache *cache, struct v3dv_bo *bo)
85 {
86    list_del(&bo->time_list);
87    list_del(&bo->size_list);
88 
89    cache->cache_count--;
90    cache->cache_size -= bo->size;
91 }
92 
93 static struct v3dv_bo *
bo_from_cache(struct v3dv_device * device,uint32_t size,const char * name)94 bo_from_cache(struct v3dv_device *device, uint32_t size, const char *name)
95 {
96    struct v3dv_bo_cache *cache = &device->bo_cache;
97    uint32_t page_index = size / 4096 - 1;
98 
99    if (cache->size_list_size <= page_index)
100       return NULL;
101 
102    struct v3dv_bo *bo = NULL;
103 
104    mtx_lock(&cache->lock);
105    if (!list_is_empty(&cache->size_list[page_index])) {
106       bo = list_first_entry(&cache->size_list[page_index],
107                             struct v3dv_bo, size_list);
108 
109       /* Check that the BO has gone idle.  If not, then we want to
110        * allocate something new instead, since we assume that the
111        * user will proceed to CPU map it and fill it with stuff.
112        */
113       if (!v3dv_bo_wait(device, bo, 0)) {
114          mtx_unlock(&cache->lock);
115          return NULL;
116       }
117 
118       bo_remove_from_cache(cache, bo);
119       bo->name = name;
120       p_atomic_set(&bo->refcnt, 1);
121    }
122    mtx_unlock(&cache->lock);
123    return bo;
124 }
125 
126 static bool
bo_free(struct v3dv_device * device,struct v3dv_bo * bo)127 bo_free(struct v3dv_device *device,
128         struct v3dv_bo *bo)
129 {
130    if (!bo)
131       return true;
132 
133    assert(p_atomic_read(&bo->refcnt) == 0);
134    assert(bo->map == NULL);
135 
136    if (!bo->is_import) {
137       device->bo_count--;
138       device->bo_size -= bo->size;
139 
140       if (dump_stats) {
141          mesa_logi("Freed %s%s%dkb:\n", bo->name ? bo->name : "",
142                    bo->name ? " " : "", bo->size / 1024);
143          bo_dump_stats(device);
144       }
145    }
146 
147    uint32_t handle = bo->handle;
148    /* Our BO structs are stored in a sparse array in the physical device,
149     * so we don't want to free the BO pointer, instead we want to reset it
150     * to 0, to signal that array entry as being free.
151     *
152     * We must do the reset before we actually free the BO in the kernel, since
153     * otherwise there is a chance the application creates another BO in a
154     * different thread and gets the same array entry, causing a race.
155     */
156    memset(bo, 0, sizeof(*bo));
157 
158    struct drm_gem_close c;
159    memset(&c, 0, sizeof(c));
160    c.handle = handle;
161    int ret = v3d_ioctl(device->pdevice->render_fd, DRM_IOCTL_GEM_CLOSE, &c);
162    if (ret != 0)
163       mesa_loge("close object %d: %s\n", handle, strerror(errno));
164 
165    return ret == 0;
166 }
167 
168 static void
bo_cache_free_all(struct v3dv_device * device,bool with_lock)169 bo_cache_free_all(struct v3dv_device *device,
170                        bool with_lock)
171 {
172    struct v3dv_bo_cache *cache = &device->bo_cache;
173 
174    if (with_lock)
175       mtx_lock(&cache->lock);
176    list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
177                             time_list) {
178       bo_remove_from_cache(cache, bo);
179       bo_free(device, bo);
180    }
181    if (with_lock)
182       mtx_unlock(&cache->lock);
183 
184 }
185 
186 void
v3dv_bo_init(struct v3dv_bo * bo,uint32_t handle,uint32_t size,uint32_t offset,const char * name,bool private)187 v3dv_bo_init(struct v3dv_bo *bo,
188              uint32_t handle,
189              uint32_t size,
190              uint32_t offset,
191              const char *name,
192              bool private)
193 {
194    p_atomic_set(&bo->refcnt, 1);
195    bo->handle = handle;
196    bo->handle_bit = 1ull << (handle % 64);
197    bo->size = size;
198    bo->offset = offset;
199    bo->map = NULL;
200    bo->map_size = 0;
201    bo->name = name;
202    bo->private = private;
203    bo->dumb_handle = -1;
204    bo->is_import = false;
205    bo->cl_branch_offset = 0xffffffff;
206    list_inithead(&bo->list_link);
207 }
208 
209 void
v3dv_bo_init_import(struct v3dv_bo * bo,uint32_t handle,uint32_t size,uint32_t offset,bool private)210 v3dv_bo_init_import(struct v3dv_bo *bo,
211                     uint32_t handle,
212                     uint32_t size,
213                     uint32_t offset,
214                     bool private)
215 {
216    v3dv_bo_init(bo, handle, size, offset, "import", private);
217    bo->is_import = true;
218 }
219 
220 struct v3dv_bo *
v3dv_bo_alloc(struct v3dv_device * device,uint32_t size,const char * name,bool private)221 v3dv_bo_alloc(struct v3dv_device *device,
222               uint32_t size,
223               const char *name,
224               bool private)
225 {
226    struct v3dv_bo *bo;
227 
228    const uint32_t page_align = 4096; /* Always allocate full pages */
229    size = align(size, page_align);
230 
231    if (private) {
232       bo = bo_from_cache(device, size, name);
233       if (bo) {
234          if (dump_stats) {
235             mesa_logi("Allocated %s %dkb from cache:\n", name, size / 1024);
236             bo_dump_stats(device);
237          }
238          return bo;
239       }
240    }
241 
242    struct drm_v3d_create_bo create = {
243       .size = size
244    };
245 
246    int ret;
247 retry:
248    ret = v3d_ioctl(device->pdevice->render_fd,
249                    DRM_IOCTL_V3D_CREATE_BO, &create);
250    if (ret != 0) {
251       if (!list_is_empty(&device->bo_cache.time_list)) {
252          bo_cache_free_all(device, true);
253          goto retry;
254       }
255 
256       mesa_loge("Failed to allocate device memory for BO\n");
257       return NULL;
258    }
259 
260    assert(create.offset % page_align == 0);
261    assert((create.offset & 0xffffffff) == create.offset);
262 
263    bo = v3dv_device_lookup_bo(device->pdevice, create.handle);
264    assert(bo && bo->handle == 0);
265 
266    v3dv_bo_init(bo, create.handle, size, create.offset, name, private);
267 
268    device->bo_count++;
269    device->bo_size += bo->size;
270    if (dump_stats) {
271       mesa_logi("Allocated %s %dkb:\n", name, size / 1024);
272       bo_dump_stats(device);
273    }
274 
275    return bo;
276 }
277 
278 bool
v3dv_bo_map_unsynchronized(struct v3dv_device * device,struct v3dv_bo * bo,uint32_t size)279 v3dv_bo_map_unsynchronized(struct v3dv_device *device,
280                            struct v3dv_bo *bo,
281                            uint32_t size)
282 {
283    assert(bo != NULL && size <= bo->size);
284 
285    if (bo->map)
286       return bo->map;
287 
288    struct drm_v3d_mmap_bo map;
289    memset(&map, 0, sizeof(map));
290    map.handle = bo->handle;
291    int ret = v3d_ioctl(device->pdevice->render_fd,
292                        DRM_IOCTL_V3D_MMAP_BO, &map);
293    if (ret != 0) {
294       mesa_loge("map ioctl failure\n");
295       return false;
296    }
297 
298    bo->map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
299                   device->pdevice->render_fd, map.offset);
300    if (bo->map == MAP_FAILED) {
301       mesa_loge("mmap of bo %d (offset 0x%016llx, size %d) failed\n",
302                 bo->handle, (long long)map.offset, (uint32_t)bo->size);
303       return false;
304    }
305    VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));
306 
307    bo->map_size = size;
308 
309    return true;
310 }
311 
312 bool
v3dv_bo_wait(struct v3dv_device * device,struct v3dv_bo * bo,uint64_t timeout_ns)313 v3dv_bo_wait(struct v3dv_device *device,
314              struct v3dv_bo *bo,
315              uint64_t timeout_ns)
316 {
317    MESA_TRACE_FUNC();
318    struct drm_v3d_wait_bo wait = {
319       .handle = bo->handle,
320       .timeout_ns = timeout_ns,
321    };
322    return v3d_ioctl(device->pdevice->render_fd,
323                     DRM_IOCTL_V3D_WAIT_BO, &wait) == 0;
324 }
325 
326 bool
v3dv_bo_map(struct v3dv_device * device,struct v3dv_bo * bo,uint32_t size)327 v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size)
328 {
329    assert(bo && size <= bo->size);
330 
331    bool ok = v3dv_bo_map_unsynchronized(device, bo, size);
332    if (!ok)
333       return false;
334 
335    ok = v3dv_bo_wait(device, bo, OS_TIMEOUT_INFINITE);
336    if (!ok) {
337       mesa_loge("memory wait for map failed\n");
338       return false;
339    }
340 
341    return true;
342 }
343 
344 void
v3dv_bo_unmap(struct v3dv_device * device,struct v3dv_bo * bo)345 v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo)
346 {
347    assert(bo && bo->map && bo->map_size > 0);
348 
349    munmap(bo->map, bo->map_size);
350    VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
351    bo->map = NULL;
352    bo->map_size = 0;
353 }
354 
355 static bool
reallocate_size_list(struct v3dv_bo_cache * cache,struct v3dv_device * device,uint32_t size)356 reallocate_size_list(struct v3dv_bo_cache *cache,
357                      struct v3dv_device *device,
358                      uint32_t size)
359 {
360    struct list_head *new_list =
361       vk_alloc(&device->vk.alloc, sizeof(struct list_head) * size, 8,
362                VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
363 
364    if (!new_list) {
365       mesa_loge("Failed to allocate host memory for cache bo list\n");
366       return false;
367    }
368    struct list_head *old_list = cache->size_list;
369 
370    /* Move old list contents over (since the array has moved, and
371     * therefore the pointers to the list heads have to change).
372     */
373    for (int i = 0; i < cache->size_list_size; i++) {
374       struct list_head *old_head = &cache->size_list[i];
375       if (list_is_empty(old_head)) {
376          list_inithead(&new_list[i]);
377       } else {
378          new_list[i].next = old_head->next;
379          new_list[i].prev = old_head->prev;
380          new_list[i].next->prev = &new_list[i];
381          new_list[i].prev->next = &new_list[i];
382       }
383    }
384    for (int i = cache->size_list_size; i < size; i++)
385       list_inithead(&new_list[i]);
386 
387    cache->size_list = new_list;
388    cache->size_list_size = size;
389    vk_free(&device->vk.alloc, old_list);
390 
391    return true;
392 }
393 
394 void
v3dv_bo_cache_init(struct v3dv_device * device)395 v3dv_bo_cache_init(struct v3dv_device *device)
396 {
397    device->bo_size = 0;
398    device->bo_count = 0;
399    list_inithead(&device->bo_cache.time_list);
400    /* FIXME: perhaps set a initial size for the size-list, to avoid run-time
401     * reallocations
402     */
403    device->bo_cache.size_list_size = 0;
404 
405    const char *max_cache_size_str = getenv("V3DV_MAX_BO_CACHE_SIZE");
406    if (max_cache_size_str == NULL)
407       device->bo_cache.max_cache_size = DEFAULT_MAX_BO_CACHE_SIZE;
408    else
409       device->bo_cache.max_cache_size = atoll(max_cache_size_str);
410 
411    if (dump_stats) {
412       mesa_logi("MAX BO CACHE SIZE: %iMB\n", device->bo_cache.max_cache_size);
413    }
414 
415    mtx_lock(&device->bo_cache.lock);
416    device->bo_cache.max_cache_size *= 1024 * 1024;
417    device->bo_cache.cache_count = 0;
418    device->bo_cache.cache_size = 0;
419    mtx_unlock(&device->bo_cache.lock);
420 }
421 
422 void
v3dv_bo_cache_destroy(struct v3dv_device * device)423 v3dv_bo_cache_destroy(struct v3dv_device *device)
424 {
425    bo_cache_free_all(device, true);
426    vk_free(&device->vk.alloc, device->bo_cache.size_list);
427 
428    if (dump_stats) {
429       mesa_loge("BO stats after screen destroy:\n");
430       bo_dump_stats(device);
431    }
432 }
433 
434 
435 static void
free_stale_bos(struct v3dv_device * device,time_t time)436 free_stale_bos(struct v3dv_device *device,
437                time_t time)
438 {
439    struct v3dv_bo_cache *cache = &device->bo_cache;
440    bool freed_any = false;
441 
442    list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
443                             time_list) {
444       /* If it's more than a second old, free it. */
445       if (time - bo->free_time > 2) {
446          if (dump_stats && !freed_any) {
447             mesa_logi("Freeing stale BOs:\n");
448             bo_dump_stats(device);
449             freed_any = true;
450          }
451 
452          bo_remove_from_cache(cache, bo);
453          bo_free(device, bo);
454       } else {
455          break;
456       }
457    }
458 
459    if (dump_stats && freed_any) {
460       mesa_logi("Freed stale BOs:\n");
461       bo_dump_stats(device);
462    }
463 }
464 
465 bool
v3dv_bo_free(struct v3dv_device * device,struct v3dv_bo * bo)466 v3dv_bo_free(struct v3dv_device *device,
467              struct v3dv_bo *bo)
468 {
469    if (!bo)
470       return true;
471 
472    if (!p_atomic_dec_zero(&bo->refcnt))
473       return true;
474 
475    if (bo->map)
476       v3dv_bo_unmap(device, bo);
477 
478    struct timespec time;
479    struct v3dv_bo_cache *cache = &device->bo_cache;
480    uint32_t page_index = bo->size / 4096 - 1;
481 
482    if (bo->private &&
483        bo->size > cache->max_cache_size - cache->cache_size) {
484       clock_gettime(CLOCK_MONOTONIC, &time);
485       mtx_lock(&cache->lock);
486       free_stale_bos(device, time.tv_sec);
487       mtx_unlock(&cache->lock);
488    }
489 
490    if (!bo->private ||
491        bo->size > cache->max_cache_size - cache->cache_size) {
492       return bo_free(device, bo);
493    }
494 
495    clock_gettime(CLOCK_MONOTONIC, &time);
496    mtx_lock(&cache->lock);
497 
498    if (cache->size_list_size <= page_index) {
499       if (!reallocate_size_list(cache, device, page_index + 1)) {
500          bool outcome = bo_free(device, bo);
501          /* If the reallocation failed, it usually means that we are out of
502           * memory, so we also free all the bo cache. We need to call it to
503           * not use the cache lock, as we are already under it.
504           */
505          bo_cache_free_all(device, false);
506          mtx_unlock(&cache->lock);
507          return outcome;
508       }
509    }
510 
511    bo->free_time = time.tv_sec;
512    list_addtail(&bo->size_list, &cache->size_list[page_index]);
513    list_addtail(&bo->time_list, &cache->time_list);
514 
515    cache->cache_count++;
516    cache->cache_size += bo->size;
517 
518    if (dump_stats) {
519       mesa_logi("Freed %s %dkb to cache:\n", bo->name, bo->size / 1024);
520       bo_dump_stats(device);
521    }
522    bo->name = NULL;
523 
524    free_stale_bos(device, time.tv_sec);
525 
526    mtx_unlock(&cache->lock);
527 
528    return true;
529 }
530