• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_heap.h"
6 
7 #include "nvk_device.h"
8 #include "nvk_physical_device.h"
9 #include "nvk_queue.h"
10 
11 #include "util/macros.h"
12 
13 #include "nv_push.h"
14 #include "nvk_cl90b5.h"
15 
16 VkResult
nvk_heap_init(struct nvk_device * dev,struct nvk_heap * heap,enum nouveau_ws_bo_flags bo_flags,enum nouveau_ws_bo_map_flags map_flags,uint32_t overalloc,bool contiguous)17 nvk_heap_init(struct nvk_device *dev, struct nvk_heap *heap,
18               enum nouveau_ws_bo_flags bo_flags,
19               enum nouveau_ws_bo_map_flags map_flags,
20               uint32_t overalloc, bool contiguous)
21 {
22    memset(heap, 0, sizeof(*heap));
23 
24    heap->bo_flags = bo_flags;
25    if (map_flags)
26       heap->bo_flags |= NOUVEAU_WS_BO_MAP;
27    heap->map_flags = map_flags;
28    heap->overalloc = overalloc;
29 
30    if (contiguous) {
31       heap->base_addr = nouveau_ws_alloc_vma(dev->ws_dev, 0,
32                                              NVK_HEAP_MAX_SIZE,
33                                              0, false /* bda */,
34                                              false /* sparse */);
35       if (heap->base_addr == 0) {
36          return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
37                           "Failed to allocate VMA for heap");
38       }
39    }
40 
41    simple_mtx_init(&heap->mutex, mtx_plain);
42    util_vma_heap_init(&heap->heap, 0, 0);
43 
44    heap->total_size = 0;
45    heap->bo_count = 0;
46 
47    return VK_SUCCESS;
48 }
49 
50 void
nvk_heap_finish(struct nvk_device * dev,struct nvk_heap * heap)51 nvk_heap_finish(struct nvk_device *dev, struct nvk_heap *heap)
52 {
53    for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) {
54       if (heap->base_addr != 0) {
55          nouveau_ws_bo_unbind_vma(dev->ws_dev, heap->bos[bo_idx].addr,
56                                   heap->bos[bo_idx].bo->size);
57       }
58       nouveau_ws_bo_unmap(heap->bos[bo_idx].bo, heap->bos[bo_idx].map);
59       nouveau_ws_bo_destroy(heap->bos[bo_idx].bo);
60    }
61 
62    util_vma_heap_finish(&heap->heap);
63    simple_mtx_destroy(&heap->mutex);
64 
65    if (heap->base_addr != 0) {
66       nouveau_ws_free_vma(dev->ws_dev, heap->base_addr, NVK_HEAP_MAX_SIZE,
67                           false /* bda */, false /* sparse */);
68    }
69 }
70 
71 static uint64_t
encode_vma(uint32_t bo_idx,uint64_t bo_offset)72 encode_vma(uint32_t bo_idx, uint64_t bo_offset)
73 {
74    assert(bo_idx < UINT16_MAX - 1);
75    assert(bo_offset < (1ull << 48));
76    return ((uint64_t)(bo_idx + 1) << 48) | bo_offset;
77 }
78 
79 static uint32_t
vma_bo_idx(uint64_t offset)80 vma_bo_idx(uint64_t offset)
81 {
82    offset = offset >> 48;
83    assert(offset > 0);
84    return offset - 1;
85 }
86 
87 static uint64_t
vma_bo_offset(uint64_t offset)88 vma_bo_offset(uint64_t offset)
89 {
90    return offset & BITFIELD64_MASK(48);
91 }
92 
93 static VkResult
nvk_heap_grow_locked(struct nvk_device * dev,struct nvk_heap * heap)94 nvk_heap_grow_locked(struct nvk_device *dev, struct nvk_heap *heap)
95 {
96    if (heap->bo_count >= NVK_HEAP_MAX_BO_COUNT) {
97       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
98                        "Heap has already hit its maximum size");
99    }
100 
101    /* First two BOs are MIN_SIZE, double after that */
102    const uint64_t new_bo_size =
103       NVK_HEAP_MIN_SIZE << (MAX2(heap->bo_count, 1) - 1);
104 
105    struct nouveau_ws_bo *bo =
106       nouveau_ws_bo_new(dev->ws_dev, new_bo_size, 0, heap->bo_flags);
107    if (bo == NULL) {
108       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
109                        "Failed to allocate a heap BO: %m");
110    }
111 
112    void *map = NULL;
113    if (heap->map_flags) {
114       map = nouveau_ws_bo_map(bo, heap->map_flags, NULL);
115       if (map == NULL) {
116          nouveau_ws_bo_destroy(bo);
117          return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
118                           "Failed to map a heap BO: %m");
119       }
120    }
121 
122    uint64_t addr = bo->offset;
123    if (heap->base_addr != 0) {
124       addr = heap->base_addr + heap->total_size;
125       nouveau_ws_bo_bind_vma(dev->ws_dev, bo, addr, new_bo_size, 0, 0);
126 
127       /* For contiguous heaps, we can now free the padding from the previous
128        * BO because the BO we just added will provide the needed padding. For
129        * non-contiguous heaps, we have to leave each BO padded individually.
130        */
131       if (heap->bo_count > 0) {
132          struct nouveau_ws_bo *prev_bo = heap->bos[heap->bo_count - 1].bo;
133          assert(heap->overalloc < prev_bo->size);
134          const uint64_t pad_vma =
135             encode_vma(heap->bo_count - 1, prev_bo->size - heap->overalloc);
136          util_vma_heap_free(&heap->heap, pad_vma, heap->overalloc);
137       }
138    }
139 
140    uint64_t vma = encode_vma(heap->bo_count, 0);
141    assert(heap->overalloc < new_bo_size);
142    util_vma_heap_free(&heap->heap, vma, new_bo_size - heap->overalloc);
143 
144    heap->bos[heap->bo_count++] = (struct nvk_heap_bo) {
145       .bo = bo,
146       .map = map,
147       .addr = addr,
148    };
149    heap->total_size += new_bo_size;
150 
151    return VK_SUCCESS;
152 }
153 
154 static VkResult
nvk_heap_alloc_locked(struct nvk_device * dev,struct nvk_heap * heap,uint64_t size,uint32_t alignment,uint64_t * addr_out,void ** map_out)155 nvk_heap_alloc_locked(struct nvk_device *dev, struct nvk_heap *heap,
156                       uint64_t size, uint32_t alignment,
157                       uint64_t *addr_out, void **map_out)
158 {
159    while (1) {
160       uint64_t vma = util_vma_heap_alloc(&heap->heap, size, alignment);
161       if (vma != 0) {
162          uint32_t bo_idx = vma_bo_idx(vma);
163          uint64_t bo_offset = vma_bo_offset(vma);
164 
165          assert(bo_idx < heap->bo_count);
166          assert(heap->bos[bo_idx].bo != NULL);
167          assert(bo_offset + size <= heap->bos[bo_idx].bo->size);
168 
169          *addr_out = heap->bos[bo_idx].addr + bo_offset;
170          if (map_out != NULL) {
171             if (heap->bos[bo_idx].map != NULL)
172                *map_out = (char *)heap->bos[bo_idx].map + bo_offset;
173             else
174                *map_out = NULL;
175          }
176 
177          return VK_SUCCESS;
178       }
179 
180       VkResult result = nvk_heap_grow_locked(dev, heap);
181       if (result != VK_SUCCESS)
182          return result;
183    }
184 }
185 
186 static void
nvk_heap_free_locked(struct nvk_device * dev,struct nvk_heap * heap,uint64_t addr,uint64_t size)187 nvk_heap_free_locked(struct nvk_device *dev, struct nvk_heap *heap,
188                      uint64_t addr, uint64_t size)
189 {
190    assert(addr + size > addr);
191 
192    for (uint32_t bo_idx = 0; bo_idx < heap->bo_count; bo_idx++) {
193       if (addr < heap->bos[bo_idx].addr)
194          continue;
195 
196       uint64_t bo_offset = addr - heap->bos[bo_idx].addr;
197       if (bo_offset >= heap->bos[bo_idx].bo->size)
198          continue;
199 
200       assert(bo_offset + size <= heap->bos[bo_idx].bo->size);
201       uint64_t vma = encode_vma(bo_idx, bo_offset);
202 
203       util_vma_heap_free(&heap->heap, vma, size);
204       return;
205    }
206    assert(!"Failed to find heap BO");
207 }
208 
209 VkResult
nvk_heap_alloc(struct nvk_device * dev,struct nvk_heap * heap,uint64_t size,uint32_t alignment,uint64_t * addr_out,void ** map_out)210 nvk_heap_alloc(struct nvk_device *dev, struct nvk_heap *heap,
211                uint64_t size, uint32_t alignment,
212                uint64_t *addr_out, void **map_out)
213 {
214    simple_mtx_lock(&heap->mutex);
215    VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
216                                            addr_out, map_out);
217    simple_mtx_unlock(&heap->mutex);
218 
219    return result;
220 }
221 
222 VkResult
nvk_heap_upload(struct nvk_device * dev,struct nvk_heap * heap,const void * data,size_t size,uint32_t alignment,uint64_t * addr_out)223 nvk_heap_upload(struct nvk_device *dev, struct nvk_heap *heap,
224                 const void *data, size_t size, uint32_t alignment,
225                 uint64_t *addr_out)
226 {
227    simple_mtx_lock(&heap->mutex);
228    void *map = NULL;
229    VkResult result = nvk_heap_alloc_locked(dev, heap, size, alignment,
230                                            addr_out, &map);
231    simple_mtx_unlock(&heap->mutex);
232 
233    if (result != VK_SUCCESS)
234       return result;
235 
236    if (map != NULL && (heap->map_flags & NOUVEAU_WS_BO_WR)) {
237       /* If we have a map, copy directly with memcpy */
238       memcpy(map, data, size);
239    } else {
240       /* Otherwise, kick off an upload with the upload queue.
241        *
242        * This is a queued operation that the driver ensures happens before any
243        * more client work via semaphores.  Because this is asynchronous and
244        * heap allocations are synchronous we have to be a bit careful here.
245        * The heap only ever tracks the current known CPU state of everything
246        * while the upload queue makes that state valid at some point in the
247        * future.
248        *
249        * This can be especially tricky for very fast upload/free cycles such
250        * as if the client compiles a shader, throws it away without using it,
251        * and then compiles another shader that ends up at the same address.
252        * What makes this all correct is the fact that the everything on the
253        * upload queue happens in a well-defined device-wide order.  In this
254        * case the first shader will get uploaded and then the second will get
255        * uploaded over top of it.  As long as we don't free the memory out
256        * from under the upload queue, everything will end up in the correct
257        * state by the time the client's shaders actually execute.
258        */
259       result = nvk_upload_queue_upload(dev, &dev->upload, *addr_out, data, size);
260       if (result != VK_SUCCESS) {
261          nvk_heap_free(dev, heap, *addr_out, size);
262          return result;
263       }
264    }
265 
266    return VK_SUCCESS;
267 }
268 
269 void
nvk_heap_free(struct nvk_device * dev,struct nvk_heap * heap,uint64_t addr,uint64_t size)270 nvk_heap_free(struct nvk_device *dev, struct nvk_heap *heap,
271               uint64_t addr, uint64_t size)
272 {
273    simple_mtx_lock(&heap->mutex);
274    nvk_heap_free_locked(dev, heap, addr, size);
275    simple_mtx_unlock(&heap->mutex);
276 }
277