1
2 #include <inttypes.h>
3
4 #include "util/simple_mtx.h"
5 #include "util/u_inlines.h"
6 #include "util/u_memory.h"
7 #include "util/list.h"
8
9 #include "nouveau_winsys.h"
10 #include "nouveau_screen.h"
11 #include "nouveau_mm.h"
12
13 /* TODO: Higher orders can waste a lot of space for npot size buffers, should
14 * add an extra cache for such buffer objects.
15 *
16 * HACK: Max order == 21 to accommodate TF2's 1.5 MiB, frequently reallocated
17 * vertex buffer (VM flush (?) decreases performance dramatically).
18 */
19
20 #define MM_MIN_ORDER 7 /* >= 6 to not violate ARB_map_buffer_alignment */
21 #define MM_MAX_ORDER 21
22
23 #define MM_NUM_BUCKETS (MM_MAX_ORDER - MM_MIN_ORDER + 1)
24
25 #define MM_MIN_SIZE (1 << MM_MIN_ORDER)
26 #define MM_MAX_SIZE (1 << MM_MAX_ORDER)
27
28 struct mm_bucket {
29 struct list_head free;
30 struct list_head used;
31 struct list_head full;
32 int num_free;
33 simple_mtx_t lock;
34 };
35
36 struct nouveau_mman {
37 struct nouveau_device *dev;
38 struct mm_bucket bucket[MM_NUM_BUCKETS];
39 uint32_t domain;
40 union nouveau_bo_config config;
41 uint64_t allocated;
42 };
43
44 struct mm_slab {
45 struct list_head head;
46 struct nouveau_bo *bo;
47 struct nouveau_mman *cache;
48 int order;
49 int count;
50 int free;
51 uint32_t bits[0];
52 };
53
54 static int
mm_slab_alloc(struct mm_slab * slab)55 mm_slab_alloc(struct mm_slab *slab)
56 {
57 int i, n, b;
58
59 if (slab->free == 0)
60 return -1;
61
62 for (i = 0; i < (slab->count + 31) / 32; ++i) {
63 b = ffs(slab->bits[i]) - 1;
64 if (b >= 0) {
65 n = i * 32 + b;
66 assert(n < slab->count);
67 slab->free--;
68 slab->bits[i] &= ~(1 << b);
69 return n;
70 }
71 }
72 return -1;
73 }
74
75 static inline void
mm_slab_free(struct mm_slab * slab,int i)76 mm_slab_free(struct mm_slab *slab, int i)
77 {
78 assert(i < slab->count);
79 slab->bits[i / 32] |= 1 << (i % 32);
80 slab->free++;
81 assert(slab->free <= slab->count);
82 }
83
84 static inline int
mm_get_order(uint32_t size)85 mm_get_order(uint32_t size)
86 {
87 int s = __builtin_clz(size) ^ 31;
88
89 if (size > (1 << s))
90 s += 1;
91 return s;
92 }
93
94 static struct mm_bucket *
mm_bucket_by_order(struct nouveau_mman * cache,int order)95 mm_bucket_by_order(struct nouveau_mman *cache, int order)
96 {
97 if (order > MM_MAX_ORDER)
98 return NULL;
99 return &cache->bucket[MAX2(order, MM_MIN_ORDER) - MM_MIN_ORDER];
100 }
101
102 static struct mm_bucket *
mm_bucket_by_size(struct nouveau_mman * cache,unsigned size)103 mm_bucket_by_size(struct nouveau_mman *cache, unsigned size)
104 {
105 return mm_bucket_by_order(cache, mm_get_order(size));
106 }
107
108 /* size of bo allocation for slab with chunks of (1 << chunk_order) bytes */
109 static inline uint32_t
mm_default_slab_size(unsigned chunk_order)110 mm_default_slab_size(unsigned chunk_order)
111 {
112 static const int8_t slab_order[MM_MAX_ORDER - MM_MIN_ORDER + 1] =
113 {
114 12, 12, 13, 14, 14, 17, 17, 17, 17, 19, 19, 20, 21, 22, 22
115 };
116
117 assert(chunk_order <= MM_MAX_ORDER && chunk_order >= MM_MIN_ORDER);
118
119 return 1 << slab_order[chunk_order - MM_MIN_ORDER];
120 }
121
122 static int
mm_slab_new(struct nouveau_mman * cache,struct mm_bucket * bucket,int chunk_order)123 mm_slab_new(struct nouveau_mman *cache, struct mm_bucket *bucket, int chunk_order)
124 {
125 struct mm_slab *slab;
126 int words, ret;
127 const uint32_t size = mm_default_slab_size(chunk_order);
128
129 simple_mtx_assert_locked(&bucket->lock);
130
131 words = ((size >> chunk_order) + 31) / 32;
132 assert(words);
133
134 slab = MALLOC(sizeof(struct mm_slab) + words * 4);
135 if (!slab)
136 return PIPE_ERROR_OUT_OF_MEMORY;
137
138 memset(&slab->bits[0], ~0, words * 4);
139
140 slab->bo = NULL;
141
142 ret = nouveau_bo_new(cache->dev, cache->domain, 0, size, &cache->config,
143 &slab->bo);
144 if (ret) {
145 FREE(slab);
146 return PIPE_ERROR_OUT_OF_MEMORY;
147 }
148
149 list_inithead(&slab->head);
150
151 slab->cache = cache;
152 slab->order = chunk_order;
153 slab->count = slab->free = size >> chunk_order;
154
155 assert(bucket == mm_bucket_by_order(cache, chunk_order));
156 list_add(&slab->head, &bucket->free);
157
158 p_atomic_add(&cache->allocated, size);
159
160 if (nouveau_mesa_debug)
161 debug_printf("MM: new slab, total memory = %"PRIu64" KiB\n",
162 cache->allocated / 1024);
163
164 return PIPE_OK;
165 }
166
167 /* @return token to identify slab or NULL if we just allocated a new bo */
168 struct nouveau_mm_allocation *
nouveau_mm_allocate(struct nouveau_mman * cache,uint32_t size,struct nouveau_bo ** bo,uint32_t * offset)169 nouveau_mm_allocate(struct nouveau_mman *cache,
170 uint32_t size, struct nouveau_bo **bo, uint32_t *offset)
171 {
172 struct mm_bucket *bucket;
173 struct mm_slab *slab;
174 struct nouveau_mm_allocation *alloc;
175 int ret;
176
177 bucket = mm_bucket_by_size(cache, size);
178 if (!bucket) {
179 ret = nouveau_bo_new(cache->dev, cache->domain, 0, size, &cache->config,
180 bo);
181 if (ret)
182 debug_printf("bo_new(%x, %x): %i\n",
183 size, cache->config.nv50.memtype, ret);
184
185 *offset = 0;
186 return NULL;
187 }
188
189 alloc = MALLOC_STRUCT(nouveau_mm_allocation);
190 if (!alloc)
191 return NULL;
192
193 simple_mtx_lock(&bucket->lock);
194 if (!list_is_empty(&bucket->used)) {
195 slab = list_entry(bucket->used.next, struct mm_slab, head);
196 } else {
197 if (list_is_empty(&bucket->free)) {
198 mm_slab_new(cache, bucket, MAX2(mm_get_order(size), MM_MIN_ORDER));
199 }
200 slab = list_entry(bucket->free.next, struct mm_slab, head);
201
202 list_del(&slab->head);
203 list_add(&slab->head, &bucket->used);
204 }
205
206 *offset = mm_slab_alloc(slab) << slab->order;
207
208 nouveau_bo_ref(slab->bo, bo);
209
210 if (slab->free == 0) {
211 list_del(&slab->head);
212 list_add(&slab->head, &bucket->full);
213 }
214 simple_mtx_unlock(&bucket->lock);
215
216 alloc->offset = *offset;
217 alloc->priv = (void *)slab;
218
219 return alloc;
220 }
221
222 void
nouveau_mm_free(struct nouveau_mm_allocation * alloc)223 nouveau_mm_free(struct nouveau_mm_allocation *alloc)
224 {
225 struct mm_slab *slab = (struct mm_slab *)alloc->priv;
226 struct mm_bucket *bucket = mm_bucket_by_order(slab->cache, slab->order);
227
228 simple_mtx_lock(&bucket->lock);
229 mm_slab_free(slab, alloc->offset >> slab->order);
230
231 if (slab->free == slab->count) {
232 list_del(&slab->head);
233 list_addtail(&slab->head, &bucket->free);
234 } else
235 if (slab->free == 1) {
236 list_del(&slab->head);
237 list_addtail(&slab->head, &bucket->used);
238 }
239 simple_mtx_unlock(&bucket->lock);
240
241 FREE(alloc);
242 }
243
244 void
nouveau_mm_free_work(void * data)245 nouveau_mm_free_work(void *data)
246 {
247 nouveau_mm_free(data);
248 }
249
250 struct nouveau_mman *
nouveau_mm_create(struct nouveau_device * dev,uint32_t domain,union nouveau_bo_config * config)251 nouveau_mm_create(struct nouveau_device *dev, uint32_t domain,
252 union nouveau_bo_config *config)
253 {
254 struct nouveau_mman *cache = MALLOC_STRUCT(nouveau_mman);
255 int i;
256
257 if (!cache)
258 return NULL;
259
260 cache->dev = dev;
261 cache->domain = domain;
262 cache->config = *config;
263 cache->allocated = 0;
264
265 for (i = 0; i < MM_NUM_BUCKETS; ++i) {
266 list_inithead(&cache->bucket[i].free);
267 list_inithead(&cache->bucket[i].used);
268 list_inithead(&cache->bucket[i].full);
269 simple_mtx_init(&cache->bucket[i].lock, mtx_plain);
270 }
271
272 return cache;
273 }
274
275 static inline void
nouveau_mm_free_slabs(struct list_head * head)276 nouveau_mm_free_slabs(struct list_head *head)
277 {
278 struct mm_slab *slab, *next;
279
280 LIST_FOR_EACH_ENTRY_SAFE(slab, next, head, head) {
281 list_del(&slab->head);
282 nouveau_bo_ref(NULL, &slab->bo);
283 FREE(slab);
284 }
285 }
286
287 void
nouveau_mm_destroy(struct nouveau_mman * cache)288 nouveau_mm_destroy(struct nouveau_mman *cache)
289 {
290 int i;
291
292 if (!cache)
293 return;
294
295 for (i = 0; i < MM_NUM_BUCKETS; ++i) {
296 if (!list_is_empty(&cache->bucket[i].used) ||
297 !list_is_empty(&cache->bucket[i].full))
298 debug_printf("WARNING: destroying GPU memory cache "
299 "with some buffers still in use\n");
300
301 nouveau_mm_free_slabs(&cache->bucket[i].free);
302 nouveau_mm_free_slabs(&cache->bucket[i].used);
303 nouveau_mm_free_slabs(&cache->bucket[i].full);
304 simple_mtx_destroy(&cache->bucket[i].lock);
305 }
306
307 FREE(cache);
308 }
309