1 /*
2 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 * Copyright © 2021 Valve Corporation
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
19 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 * Authors:
29 * Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
30 */
31
32 #include "zink_bo.h"
33 #include "zink_resource.h"
34 #include "zink_screen.h"
35 #include "util/u_hash_table.h"
36
37 struct zink_bo;
38
39 struct zink_sparse_backing_chunk {
40 uint32_t begin, end;
41 };
42
43
44 /*
45 * Sub-allocation information for a real buffer used as backing memory of a
46 * sparse buffer.
47 */
48 struct zink_sparse_backing {
49 struct list_head list;
50
51 struct zink_bo *bo;
52
53 /* Sorted list of free chunks. */
54 struct zink_sparse_backing_chunk *chunks;
55 uint32_t max_chunks;
56 uint32_t num_chunks;
57 };
58
59 struct zink_sparse_commitment {
60 struct zink_sparse_backing *backing;
61 uint32_t page;
62 };
63
64 struct zink_slab {
65 struct pb_slab base;
66 unsigned entry_size;
67 struct zink_bo *buffer;
68 struct zink_bo *entries;
69 };
70
71
72 ALWAYS_INLINE static struct zink_slab *
zink_slab(struct pb_slab * pslab)73 zink_slab(struct pb_slab *pslab)
74 {
75 return (struct zink_slab*)pslab;
76 }
77
78 static struct pb_slabs *
get_slabs(struct zink_screen * screen,uint64_t size,enum zink_alloc_flag flags)79 get_slabs(struct zink_screen *screen, uint64_t size, enum zink_alloc_flag flags)
80 {
81 //struct pb_slabs *bo_slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ?
82 //screen->bo_slabs_encrypted : screen->bo_slabs;
83
84 struct pb_slabs *bo_slabs = screen->pb.bo_slabs;
85 /* Find the correct slab allocator for the given size. */
86 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
87 struct pb_slabs *slabs = &bo_slabs[i];
88
89 if (size <= 1ULL << (slabs->min_order + slabs->num_orders - 1))
90 return slabs;
91 }
92
93 assert(0);
94 return NULL;
95 }
96
97 /* Return the power of two size of a slab entry matching the input size. */
98 static unsigned
get_slab_pot_entry_size(struct zink_screen * screen,unsigned size)99 get_slab_pot_entry_size(struct zink_screen *screen, unsigned size)
100 {
101 unsigned entry_size = util_next_power_of_two(size);
102 unsigned min_entry_size = 1 << screen->pb.bo_slabs[0].min_order;
103
104 return MAX2(entry_size, min_entry_size);
105 }
106
107 /* Return the slab entry alignment. */
get_slab_entry_alignment(struct zink_screen * screen,unsigned size)108 static unsigned get_slab_entry_alignment(struct zink_screen *screen, unsigned size)
109 {
110 unsigned entry_size = get_slab_pot_entry_size(screen, size);
111
112 if (size <= entry_size * 3 / 4)
113 return entry_size / 4;
114
115 return entry_size;
116 }
117
118 static void
bo_destroy(struct zink_screen * screen,struct pb_buffer * pbuf)119 bo_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
120 {
121 struct zink_bo *bo = zink_bo(pbuf);
122
123 simple_mtx_lock(&screen->pb.bo_export_table_lock);
124 _mesa_hash_table_remove_key(screen->pb.bo_export_table, bo);
125 simple_mtx_unlock(&screen->pb.bo_export_table_lock);
126
127 if (!bo->u.real.is_user_ptr && bo->u.real.cpu_ptr) {
128 bo->u.real.map_count = 1;
129 bo->u.real.cpu_ptr = NULL;
130 zink_bo_unmap(screen, bo);
131 }
132
133 VKSCR(FreeMemory)(screen->dev, bo->mem, NULL);
134
135 simple_mtx_destroy(&bo->lock);
136 FREE(bo);
137 }
138
139 static bool
bo_can_reclaim(struct zink_screen * screen,struct pb_buffer * pbuf)140 bo_can_reclaim(struct zink_screen *screen, struct pb_buffer *pbuf)
141 {
142 struct zink_bo *bo = zink_bo(pbuf);
143
144 return zink_screen_usage_check_completion(screen, bo->reads) && zink_screen_usage_check_completion(screen, bo->writes);
145 }
146
147 static bool
bo_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)148 bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
149 {
150 struct zink_bo *bo = container_of(entry, struct zink_bo, u.slab.entry);
151
152 return bo_can_reclaim(priv, &bo->base);
153 }
154
155 static void
bo_slab_free(struct zink_screen * screen,struct pb_slab * pslab)156 bo_slab_free(struct zink_screen *screen, struct pb_slab *pslab)
157 {
158 struct zink_slab *slab = zink_slab(pslab);
159 ASSERTED unsigned slab_size = slab->buffer->base.size;
160
161 assert(slab->base.num_entries * slab->entry_size <= slab_size);
162 FREE(slab->entries);
163 zink_bo_unref(screen, slab->buffer);
164 FREE(slab);
165 }
166
167 static void
bo_slab_destroy(struct zink_screen * screen,struct pb_buffer * pbuf)168 bo_slab_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
169 {
170 struct zink_bo *bo = zink_bo(pbuf);
171
172 assert(!bo->mem);
173
174 //if (bo->base.usage & RADEON_FLAG_ENCRYPTED)
175 //pb_slab_free(get_slabs(screen, bo->base.size, RADEON_FLAG_ENCRYPTED), &bo->u.slab.entry);
176 //else
177 pb_slab_free(get_slabs(screen, bo->base.size, 0), &bo->u.slab.entry);
178 }
179
180 static void
clean_up_buffer_managers(struct zink_screen * screen)181 clean_up_buffer_managers(struct zink_screen *screen)
182 {
183 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
184 pb_slabs_reclaim(&screen->pb.bo_slabs[i]);
185 //if (screen->info.has_tmz_support)
186 //pb_slabs_reclaim(&screen->bo_slabs_encrypted[i]);
187 }
188
189 pb_cache_release_all_buffers(&screen->pb.bo_cache);
190 }
191
192 static unsigned
get_optimal_alignment(struct zink_screen * screen,uint64_t size,unsigned alignment)193 get_optimal_alignment(struct zink_screen *screen, uint64_t size, unsigned alignment)
194 {
195 /* Increase the alignment for faster address translation and better memory
196 * access pattern.
197 */
198 if (size >= 4096) {
199 alignment = MAX2(alignment, 4096);
200 } else if (size) {
201 unsigned msb = util_last_bit(size);
202
203 alignment = MAX2(alignment, 1u << (msb - 1));
204 }
205 return alignment;
206 }
207
208 static void
bo_destroy_or_cache(struct zink_screen * screen,struct pb_buffer * pbuf)209 bo_destroy_or_cache(struct zink_screen *screen, struct pb_buffer *pbuf)
210 {
211 struct zink_bo *bo = zink_bo(pbuf);
212
213 assert(bo->mem); /* slab buffers have a separate vtbl */
214 bo->reads = NULL;
215 bo->writes = NULL;
216
217 if (bo->u.real.use_reusable_pool)
218 pb_cache_add_buffer(bo->cache_entry);
219 else
220 bo_destroy(screen, pbuf);
221 }
222
223 static const struct pb_vtbl bo_vtbl = {
224 /* Cast to void* because one of the function parameters is a struct pointer instead of void*. */
225 (void*)bo_destroy_or_cache
226 /* other functions are never called */
227 };
228
229 static struct zink_bo *
bo_create_internal(struct zink_screen * screen,uint64_t size,unsigned alignment,enum zink_heap heap,unsigned flags,const void * pNext)230 bo_create_internal(struct zink_screen *screen,
231 uint64_t size,
232 unsigned alignment,
233 enum zink_heap heap,
234 unsigned flags,
235 const void *pNext)
236 {
237 struct zink_bo *bo;
238 bool init_pb_cache;
239
240 /* too big for vk alloc */
241 if (size > UINT32_MAX)
242 return NULL;
243
244 alignment = get_optimal_alignment(screen, size, alignment);
245
246 VkMemoryAllocateInfo mai;
247 mai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
248 mai.pNext = pNext;
249 mai.allocationSize = size;
250 mai.memoryTypeIndex = screen->heap_map[heap];
251 if (screen->info.mem_props.memoryTypes[mai.memoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
252 alignment = MAX2(alignment, screen->info.props.limits.minMemoryMapAlignment);
253 mai.allocationSize = align64(mai.allocationSize, screen->info.props.limits.minMemoryMapAlignment);
254 }
255 unsigned heap_idx = screen->info.mem_props.memoryTypes[screen->heap_map[heap]].heapIndex;
256 if (mai.allocationSize > screen->info.mem_props.memoryHeaps[heap_idx].size) {
257 mesa_loge("zink: can't allocate %"PRIu64" bytes from heap that's only %"PRIu64" bytes!\n", mai.allocationSize, screen->info.mem_props.memoryHeaps[heap_idx].size);
258 return NULL;
259 }
260
261 /* all non-suballocated bo can cache */
262 init_pb_cache = !pNext;
263
264 bo = CALLOC(1, sizeof(struct zink_bo) + init_pb_cache * sizeof(struct pb_cache_entry));
265 if (!bo) {
266 return NULL;
267 }
268
269 if (init_pb_cache) {
270 bo->u.real.use_reusable_pool = true;
271 pb_cache_init_entry(&screen->pb.bo_cache, bo->cache_entry, &bo->base, heap);
272 }
273
274 VkResult ret = VKSCR(AllocateMemory)(screen->dev, &mai, NULL, &bo->mem);
275 if (!zink_screen_handle_vkresult(screen, ret))
276 goto fail;
277
278 simple_mtx_init(&bo->lock, mtx_plain);
279 pipe_reference_init(&bo->base.reference, 1);
280 bo->base.alignment_log2 = util_logbase2(alignment);
281 bo->base.size = mai.allocationSize;
282 bo->base.vtbl = &bo_vtbl;
283 bo->base.placement = vk_domain_from_heap(heap);
284 bo->base.usage = flags;
285 bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id);
286
287 return bo;
288
289 fail:
290 bo_destroy(screen, (void*)bo);
291 return NULL;
292 }
293
294 /*
295 * Attempt to allocate the given number of backing pages. Fewer pages may be
296 * allocated (depending on the fragmentation of existing backing buffers),
297 * which will be reflected by a change to *pnum_pages.
298 */
299 static struct zink_sparse_backing *
sparse_backing_alloc(struct zink_screen * screen,struct zink_bo * bo,uint32_t * pstart_page,uint32_t * pnum_pages)300 sparse_backing_alloc(struct zink_screen *screen, struct zink_bo *bo,
301 uint32_t *pstart_page, uint32_t *pnum_pages)
302 {
303 struct zink_sparse_backing *best_backing;
304 unsigned best_idx;
305 uint32_t best_num_pages;
306
307 best_backing = NULL;
308 best_idx = 0;
309 best_num_pages = 0;
310
311 /* This is a very simple and inefficient best-fit algorithm. */
312 list_for_each_entry(struct zink_sparse_backing, backing, &bo->u.sparse.backing, list) {
313 for (unsigned idx = 0; idx < backing->num_chunks; ++idx) {
314 uint32_t cur_num_pages = backing->chunks[idx].end - backing->chunks[idx].begin;
315 if ((best_num_pages < *pnum_pages && cur_num_pages > best_num_pages) ||
316 (best_num_pages > *pnum_pages && cur_num_pages < best_num_pages)) {
317 best_backing = backing;
318 best_idx = idx;
319 best_num_pages = cur_num_pages;
320 }
321 }
322 }
323
324 /* Allocate a new backing buffer if necessary. */
325 if (!best_backing) {
326 struct pb_buffer *buf;
327 uint64_t size;
328 uint32_t pages;
329
330 best_backing = CALLOC_STRUCT(zink_sparse_backing);
331 if (!best_backing)
332 return NULL;
333
334 best_backing->max_chunks = 4;
335 best_backing->chunks = CALLOC(best_backing->max_chunks,
336 sizeof(*best_backing->chunks));
337 if (!best_backing->chunks) {
338 FREE(best_backing);
339 return NULL;
340 }
341
342 assert(bo->u.sparse.num_backing_pages < DIV_ROUND_UP(bo->base.size, ZINK_SPARSE_BUFFER_PAGE_SIZE));
343
344 size = MIN3(bo->base.size / 16,
345 8 * 1024 * 1024,
346 bo->base.size - (uint64_t)bo->u.sparse.num_backing_pages * ZINK_SPARSE_BUFFER_PAGE_SIZE);
347 size = MAX2(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
348
349 buf = zink_bo_create(screen, size, ZINK_SPARSE_BUFFER_PAGE_SIZE,
350 bo->base.placement, ZINK_ALLOC_NO_SUBALLOC, NULL);
351 if (!buf) {
352 FREE(best_backing->chunks);
353 FREE(best_backing);
354 return NULL;
355 }
356
357 /* We might have gotten a bigger buffer than requested via caching. */
358 pages = buf->size / ZINK_SPARSE_BUFFER_PAGE_SIZE;
359
360 best_backing->bo = zink_bo(buf);
361 best_backing->num_chunks = 1;
362 best_backing->chunks[0].begin = 0;
363 best_backing->chunks[0].end = pages;
364
365 list_add(&best_backing->list, &bo->u.sparse.backing);
366 bo->u.sparse.num_backing_pages += pages;
367
368 best_idx = 0;
369 best_num_pages = pages;
370 }
371
372 *pnum_pages = MIN2(*pnum_pages, best_num_pages);
373 *pstart_page = best_backing->chunks[best_idx].begin;
374 best_backing->chunks[best_idx].begin += *pnum_pages;
375
376 if (best_backing->chunks[best_idx].begin >= best_backing->chunks[best_idx].end) {
377 memmove(&best_backing->chunks[best_idx], &best_backing->chunks[best_idx + 1],
378 sizeof(*best_backing->chunks) * (best_backing->num_chunks - best_idx - 1));
379 best_backing->num_chunks--;
380 }
381
382 return best_backing;
383 }
384
385 static void
sparse_free_backing_buffer(struct zink_screen * screen,struct zink_bo * bo,struct zink_sparse_backing * backing)386 sparse_free_backing_buffer(struct zink_screen *screen, struct zink_bo *bo,
387 struct zink_sparse_backing *backing)
388 {
389 bo->u.sparse.num_backing_pages -= backing->bo->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE;
390
391 list_del(&backing->list);
392 zink_bo_unref(screen, backing->bo);
393 FREE(backing->chunks);
394 FREE(backing);
395 }
396
397 /*
398 * Return a range of pages from the given backing buffer back into the
399 * free structure.
400 */
401 static bool
sparse_backing_free(struct zink_screen * screen,struct zink_bo * bo,struct zink_sparse_backing * backing,uint32_t start_page,uint32_t num_pages)402 sparse_backing_free(struct zink_screen *screen, struct zink_bo *bo,
403 struct zink_sparse_backing *backing,
404 uint32_t start_page, uint32_t num_pages)
405 {
406 uint32_t end_page = start_page + num_pages;
407 unsigned low = 0;
408 unsigned high = backing->num_chunks;
409
410 /* Find the first chunk with begin >= start_page. */
411 while (low < high) {
412 unsigned mid = low + (high - low) / 2;
413
414 if (backing->chunks[mid].begin >= start_page)
415 high = mid;
416 else
417 low = mid + 1;
418 }
419
420 assert(low >= backing->num_chunks || end_page <= backing->chunks[low].begin);
421 assert(low == 0 || backing->chunks[low - 1].end <= start_page);
422
423 if (low > 0 && backing->chunks[low - 1].end == start_page) {
424 backing->chunks[low - 1].end = end_page;
425
426 if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
427 backing->chunks[low - 1].end = backing->chunks[low].end;
428 memmove(&backing->chunks[low], &backing->chunks[low + 1],
429 sizeof(*backing->chunks) * (backing->num_chunks - low - 1));
430 backing->num_chunks--;
431 }
432 } else if (low < backing->num_chunks && end_page == backing->chunks[low].begin) {
433 backing->chunks[low].begin = start_page;
434 } else {
435 if (backing->num_chunks >= backing->max_chunks) {
436 unsigned new_max_chunks = 2 * backing->max_chunks;
437 struct zink_sparse_backing_chunk *new_chunks =
438 REALLOC(backing->chunks,
439 sizeof(*backing->chunks) * backing->max_chunks,
440 sizeof(*backing->chunks) * new_max_chunks);
441 if (!new_chunks)
442 return false;
443
444 backing->max_chunks = new_max_chunks;
445 backing->chunks = new_chunks;
446 }
447
448 memmove(&backing->chunks[low + 1], &backing->chunks[low],
449 sizeof(*backing->chunks) * (backing->num_chunks - low));
450 backing->chunks[low].begin = start_page;
451 backing->chunks[low].end = end_page;
452 backing->num_chunks++;
453 }
454
455 if (backing->num_chunks == 1 && backing->chunks[0].begin == 0 &&
456 backing->chunks[0].end == backing->bo->base.size / ZINK_SPARSE_BUFFER_PAGE_SIZE)
457 sparse_free_backing_buffer(screen, bo, backing);
458
459 return true;
460 }
461
462 static void
bo_sparse_destroy(struct zink_screen * screen,struct pb_buffer * pbuf)463 bo_sparse_destroy(struct zink_screen *screen, struct pb_buffer *pbuf)
464 {
465 struct zink_bo *bo = zink_bo(pbuf);
466
467 assert(!bo->mem && bo->base.usage & ZINK_ALLOC_SPARSE);
468
469 while (!list_is_empty(&bo->u.sparse.backing)) {
470 sparse_free_backing_buffer(screen, bo,
471 container_of(bo->u.sparse.backing.next,
472 struct zink_sparse_backing, list));
473 }
474
475 FREE(bo->u.sparse.commitments);
476 simple_mtx_destroy(&bo->lock);
477 FREE(bo);
478 }
479
480 static const struct pb_vtbl bo_sparse_vtbl = {
481 /* Cast to void* because one of the function parameters is a struct pointer instead of void*. */
482 (void*)bo_sparse_destroy
483 /* other functions are never called */
484 };
485
486 static struct pb_buffer *
bo_sparse_create(struct zink_screen * screen,uint64_t size)487 bo_sparse_create(struct zink_screen *screen, uint64_t size)
488 {
489 struct zink_bo *bo;
490
491 /* We use 32-bit page numbers; refuse to attempt allocating sparse buffers
492 * that exceed this limit. This is not really a restriction: we don't have
493 * that much virtual address space anyway.
494 */
495 if (size > (uint64_t)INT32_MAX * ZINK_SPARSE_BUFFER_PAGE_SIZE)
496 return NULL;
497
498 bo = CALLOC_STRUCT(zink_bo);
499 if (!bo)
500 return NULL;
501
502 simple_mtx_init(&bo->lock, mtx_plain);
503 pipe_reference_init(&bo->base.reference, 1);
504 bo->base.alignment_log2 = util_logbase2(ZINK_SPARSE_BUFFER_PAGE_SIZE);
505 bo->base.size = size;
506 bo->base.vtbl = &bo_sparse_vtbl;
507 bo->base.placement = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
508 bo->unique_id = p_atomic_inc_return(&screen->pb.next_bo_unique_id);
509 bo->base.usage = ZINK_ALLOC_SPARSE;
510
511 bo->u.sparse.num_va_pages = DIV_ROUND_UP(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
512 bo->u.sparse.commitments = CALLOC(bo->u.sparse.num_va_pages,
513 sizeof(*bo->u.sparse.commitments));
514 if (!bo->u.sparse.commitments)
515 goto error_alloc_commitments;
516
517 list_inithead(&bo->u.sparse.backing);
518
519 return &bo->base;
520
521 error_alloc_commitments:
522 simple_mtx_destroy(&bo->lock);
523 FREE(bo);
524 return NULL;
525 }
526
527 struct pb_buffer *
zink_bo_create(struct zink_screen * screen,uint64_t size,unsigned alignment,enum zink_heap heap,enum zink_alloc_flag flags,const void * pNext)528 zink_bo_create(struct zink_screen *screen, uint64_t size, unsigned alignment, enum zink_heap heap, enum zink_alloc_flag flags, const void *pNext)
529 {
530 struct zink_bo *bo;
531 /* pull in sparse flag */
532 flags |= zink_alloc_flags_from_heap(heap);
533
534 //struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ?
535 //screen->bo_slabs_encrypted : screen->bo_slabs;
536 struct pb_slabs *slabs = screen->pb.bo_slabs;
537
538 struct pb_slabs *last_slab = &slabs[NUM_SLAB_ALLOCATORS - 1];
539 unsigned max_slab_entry_size = 1 << (last_slab->min_order + last_slab->num_orders - 1);
540
541 /* Sub-allocate small buffers from slabs. */
542 if (!(flags & (ZINK_ALLOC_NO_SUBALLOC | ZINK_ALLOC_SPARSE)) &&
543 size <= max_slab_entry_size) {
544 struct pb_slab_entry *entry;
545
546 if (heap < 0 || heap >= ZINK_HEAP_MAX)
547 goto no_slab;
548
549 unsigned alloc_size = size;
550
551 /* Always use slabs for sizes less than 4 KB because the kernel aligns
552 * everything to 4 KB.
553 */
554 if (size < alignment && alignment <= 4 * 1024)
555 alloc_size = alignment;
556
557 if (alignment > get_slab_entry_alignment(screen, alloc_size)) {
558 /* 3/4 allocations can return too small alignment. Try again with a power of two
559 * allocation size.
560 */
561 unsigned pot_size = get_slab_pot_entry_size(screen, alloc_size);
562
563 if (alignment <= pot_size) {
564 /* This size works but wastes some memory to fulfil the alignment. */
565 alloc_size = pot_size;
566 } else {
567 goto no_slab; /* can't fulfil alignment requirements */
568 }
569 }
570
571 struct pb_slabs *slabs = get_slabs(screen, alloc_size, flags);
572 entry = pb_slab_alloc(slabs, alloc_size, heap);
573 if (!entry) {
574 /* Clean up buffer managers and try again. */
575 clean_up_buffer_managers(screen);
576
577 entry = pb_slab_alloc(slabs, alloc_size, heap);
578 }
579 if (!entry)
580 return NULL;
581
582 bo = container_of(entry, struct zink_bo, u.slab.entry);
583 pipe_reference_init(&bo->base.reference, 1);
584 bo->base.size = size;
585 assert(alignment <= 1 << bo->base.alignment_log2);
586
587 return &bo->base;
588 }
589 no_slab:
590
591 if (flags & ZINK_ALLOC_SPARSE) {
592 assert(ZINK_SPARSE_BUFFER_PAGE_SIZE % alignment == 0);
593
594 return bo_sparse_create(screen, size);
595 }
596
597 /* Align size to page size. This is the minimum alignment for normal
598 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
599 * like constant/uniform buffers, can benefit from better and more reuse.
600 */
601 if (heap == ZINK_HEAP_DEVICE_LOCAL_VISIBLE) {
602 size = align64(size, screen->info.props.limits.minMemoryMapAlignment);
603 alignment = align(alignment, screen->info.props.limits.minMemoryMapAlignment);
604 }
605
606 bool use_reusable_pool = !(flags & ZINK_ALLOC_NO_SUBALLOC);
607
608 if (use_reusable_pool) {
609 /* Get a buffer from the cache. */
610 bo = (struct zink_bo*)
611 pb_cache_reclaim_buffer(&screen->pb.bo_cache, size, alignment, 0, heap);
612 if (bo)
613 return &bo->base;
614 }
615
616 /* Create a new one. */
617 bo = bo_create_internal(screen, size, alignment, heap, flags, pNext);
618 if (!bo) {
619 /* Clean up buffer managers and try again. */
620 clean_up_buffer_managers(screen);
621
622 bo = bo_create_internal(screen, size, alignment, heap, flags, pNext);
623 if (!bo)
624 return NULL;
625 }
626
627 return &bo->base;
628 }
629
630 void *
zink_bo_map(struct zink_screen * screen,struct zink_bo * bo)631 zink_bo_map(struct zink_screen *screen, struct zink_bo *bo)
632 {
633 void *cpu = NULL;
634 uint64_t offset = 0;
635 struct zink_bo *real;
636
637 if (bo->mem) {
638 real = bo;
639 } else {
640 real = bo->u.slab.real;
641 offset = bo->offset - real->offset;
642 }
643
644 cpu = p_atomic_read(&real->u.real.cpu_ptr);
645 if (!cpu) {
646 simple_mtx_lock(&real->lock);
647 /* Must re-check due to the possibility of a race. Re-check need not
648 * be atomic thanks to the lock. */
649 cpu = real->u.real.cpu_ptr;
650 if (!cpu) {
651 VkResult result = VKSCR(MapMemory)(screen->dev, real->mem, 0, real->base.size, 0, &cpu);
652 if (result != VK_SUCCESS) {
653 simple_mtx_unlock(&real->lock);
654 return NULL;
655 }
656 p_atomic_set(&real->u.real.cpu_ptr, cpu);
657 }
658 simple_mtx_unlock(&real->lock);
659 }
660 p_atomic_inc(&real->u.real.map_count);
661
662 return (uint8_t*)cpu + offset;
663 }
664
665 void
zink_bo_unmap(struct zink_screen * screen,struct zink_bo * bo)666 zink_bo_unmap(struct zink_screen *screen, struct zink_bo *bo)
667 {
668 struct zink_bo *real = bo->mem ? bo : bo->u.slab.real;
669
670 assert(real->u.real.map_count != 0 && "too many unmaps");
671
672 if (p_atomic_dec_zero(&real->u.real.map_count)) {
673 p_atomic_set(&real->u.real.cpu_ptr, NULL);
674 VKSCR(UnmapMemory)(screen->dev, real->mem);
675 }
676 }
677
678 static bool
do_commit_single(struct zink_screen * screen,struct zink_resource * res,struct zink_bo * bo,uint32_t offset,uint32_t size,bool commit)679 do_commit_single(struct zink_screen *screen, struct zink_resource *res, struct zink_bo *bo, uint32_t offset, uint32_t size, bool commit)
680 {
681 VkBindSparseInfo sparse = {0};
682 sparse.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO;
683 sparse.bufferBindCount = 1;
684
685 VkSparseBufferMemoryBindInfo sparse_bind;
686 sparse_bind.buffer = res->obj->buffer;
687 sparse_bind.bindCount = 1;
688 sparse.pBufferBinds = &sparse_bind;
689
690 VkSparseMemoryBind mem_bind;
691 mem_bind.resourceOffset = offset;
692 mem_bind.size = MIN2(res->base.b.width0 - offset, size);
693 mem_bind.memory = commit ? bo->mem : VK_NULL_HANDLE;
694 mem_bind.memoryOffset = 0;
695 mem_bind.flags = 0;
696 sparse_bind.pBinds = &mem_bind;
697
698 VkQueue queue = screen->threaded ? screen->thread_queue : screen->queue;
699
700 simple_mtx_lock(&screen->queue_lock);
701 VkResult ret = VKSCR(QueueBindSparse)(queue, 1, &sparse, VK_NULL_HANDLE);
702 simple_mtx_unlock(&screen->queue_lock);
703 return zink_screen_handle_vkresult(screen, ret);
704 }
705
706 bool
zink_bo_commit(struct zink_screen * screen,struct zink_resource * res,uint32_t offset,uint32_t size,bool commit)707 zink_bo_commit(struct zink_screen *screen, struct zink_resource *res, uint32_t offset, uint32_t size, bool commit)
708 {
709 bool ok = true;
710 struct zink_bo *bo = res->obj->bo;
711 assert(offset % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0);
712 assert(offset <= bo->base.size);
713 assert(size <= bo->base.size - offset);
714 assert(size % ZINK_SPARSE_BUFFER_PAGE_SIZE == 0 || offset + size == bo->base.size);
715
716 struct zink_sparse_commitment *comm = bo->u.sparse.commitments;
717
718 uint32_t va_page = offset / ZINK_SPARSE_BUFFER_PAGE_SIZE;
719 uint32_t end_va_page = va_page + DIV_ROUND_UP(size, ZINK_SPARSE_BUFFER_PAGE_SIZE);
720
721 simple_mtx_lock(&bo->lock);
722
723 if (commit) {
724 while (va_page < end_va_page) {
725 uint32_t span_va_page;
726
727 /* Skip pages that are already committed. */
728 if (comm[va_page].backing) {
729 va_page++;
730 continue;
731 }
732
733 /* Determine length of uncommitted span. */
734 span_va_page = va_page;
735 while (va_page < end_va_page && !comm[va_page].backing)
736 va_page++;
737
738 /* Fill the uncommitted span with chunks of backing memory. */
739 while (span_va_page < va_page) {
740 struct zink_sparse_backing *backing;
741 uint32_t backing_start, backing_size;
742
743 backing_size = va_page - span_va_page;
744 backing = sparse_backing_alloc(screen, bo, &backing_start, &backing_size);
745 if (!backing) {
746 ok = false;
747 goto out;
748 }
749 if (!do_commit_single(screen, res, backing->bo,
750 (uint64_t)span_va_page * ZINK_SPARSE_BUFFER_PAGE_SIZE,
751 (uint64_t)backing_size * ZINK_SPARSE_BUFFER_PAGE_SIZE, true)) {
752
753 ok = sparse_backing_free(screen, bo, backing, backing_start, backing_size);
754 assert(ok && "sufficient memory should already be allocated");
755
756 ok = false;
757 goto out;
758 }
759
760 while (backing_size) {
761 comm[span_va_page].backing = backing;
762 comm[span_va_page].page = backing_start;
763 span_va_page++;
764 backing_start++;
765 backing_size--;
766 }
767 }
768 }
769 } else {
770 if (!do_commit_single(screen, res, NULL,
771 (uint64_t)va_page * ZINK_SPARSE_BUFFER_PAGE_SIZE,
772 (uint64_t)(end_va_page - va_page) * ZINK_SPARSE_BUFFER_PAGE_SIZE, false)) {
773 ok = false;
774 goto out;
775 }
776
777 while (va_page < end_va_page) {
778 struct zink_sparse_backing *backing;
779 uint32_t backing_start;
780 uint32_t span_pages;
781
782 /* Skip pages that are already uncommitted. */
783 if (!comm[va_page].backing) {
784 va_page++;
785 continue;
786 }
787
788 /* Group contiguous spans of pages. */
789 backing = comm[va_page].backing;
790 backing_start = comm[va_page].page;
791 comm[va_page].backing = NULL;
792
793 span_pages = 1;
794 va_page++;
795
796 while (va_page < end_va_page &&
797 comm[va_page].backing == backing &&
798 comm[va_page].page == backing_start + span_pages) {
799 comm[va_page].backing = NULL;
800 va_page++;
801 span_pages++;
802 }
803
804 if (!sparse_backing_free(screen, bo, backing, backing_start, span_pages)) {
805 /* Couldn't allocate tracking data structures, so we have to leak */
806 fprintf(stderr, "zink: leaking sparse backing memory\n");
807 ok = false;
808 }
809 }
810 }
811 out:
812
813 simple_mtx_unlock(&bo->lock);
814 return ok;
815 }
816
817 static const struct pb_vtbl bo_slab_vtbl = {
818 /* Cast to void* because one of the function parameters is a struct pointer instead of void*. */
819 (void*)bo_slab_destroy
820 /* other functions are never called */
821 };
822
823 static struct pb_slab *
bo_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index,bool encrypted)824 bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index, bool encrypted)
825 {
826 struct zink_screen *screen = priv;
827 VkMemoryPropertyFlags domains = vk_domain_from_heap(heap);
828 uint32_t base_id;
829 unsigned slab_size = 0;
830 struct zink_slab *slab = CALLOC_STRUCT(zink_slab);
831
832 if (!slab)
833 return NULL;
834
835 //struct pb_slabs *slabs = ((flags & RADEON_FLAG_ENCRYPTED) && screen->info.has_tmz_support) ?
836 //screen->bo_slabs_encrypted : screen->bo_slabs;
837 struct pb_slabs *slabs = screen->pb.bo_slabs;
838
839 /* Determine the slab buffer size. */
840 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
841 unsigned max_entry_size = 1 << (slabs[i].min_order + slabs[i].num_orders - 1);
842
843 if (entry_size <= max_entry_size) {
844 /* The slab size is twice the size of the largest possible entry. */
845 slab_size = max_entry_size * 2;
846
847 if (!util_is_power_of_two_nonzero(entry_size)) {
848 assert(util_is_power_of_two_nonzero(entry_size * 4 / 3));
849
850 /* If the entry size is 3/4 of a power of two, we would waste space and not gain
851 * anything if we allocated only twice the power of two for the backing buffer:
852 * 2 * 3/4 = 1.5 usable with buffer size 2
853 *
854 * Allocating 5 times the entry size leads us to the next power of two and results
855 * in a much better memory utilization:
856 * 5 * 3/4 = 3.75 usable with buffer size 4
857 */
858 if (entry_size * 5 > slab_size)
859 slab_size = util_next_power_of_two(entry_size * 5);
860 }
861
862 break;
863 }
864 }
865 assert(slab_size != 0);
866
867 slab->buffer = zink_bo(zink_bo_create(screen, slab_size, slab_size, heap, 0, NULL));
868 if (!slab->buffer)
869 goto fail;
870
871 slab_size = slab->buffer->base.size;
872
873 slab->base.num_entries = slab_size / entry_size;
874 slab->base.num_free = slab->base.num_entries;
875 slab->entry_size = entry_size;
876 slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
877 if (!slab->entries)
878 goto fail_buffer;
879
880 list_inithead(&slab->base.free);
881
882 #ifdef _MSC_VER
883 /* C11 too hard for msvc, no __sync_fetch_and_add */
884 base_id = p_atomic_add_return(&screen->pb.next_bo_unique_id, slab->base.num_entries) - slab->base.num_entries;
885 #else
886 base_id = __sync_fetch_and_add(&screen->pb.next_bo_unique_id, slab->base.num_entries);
887 #endif
888 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
889 struct zink_bo *bo = &slab->entries[i];
890
891 simple_mtx_init(&bo->lock, mtx_plain);
892 bo->base.alignment_log2 = util_logbase2(get_slab_entry_alignment(screen, entry_size));
893 bo->base.size = entry_size;
894 bo->base.vtbl = &bo_slab_vtbl;
895 bo->offset = slab->buffer->offset + i * entry_size;
896 bo->base.placement = domains;
897 bo->unique_id = base_id + i;
898 bo->u.slab.entry.slab = &slab->base;
899 bo->u.slab.entry.group_index = group_index;
900 bo->u.slab.entry.entry_size = entry_size;
901
902 if (slab->buffer->mem) {
903 /* The slab is not suballocated. */
904 bo->u.slab.real = slab->buffer;
905 } else {
906 /* The slab is allocated out of a bigger slab. */
907 bo->u.slab.real = slab->buffer->u.slab.real;
908 assert(bo->u.slab.real->mem);
909 }
910
911 list_addtail(&bo->u.slab.entry.head, &slab->base.free);
912 }
913
914 /* Wasted alignment due to slabs with 3/4 allocations being aligned to a power of two. */
915 assert(slab->base.num_entries * entry_size <= slab_size);
916
917 return &slab->base;
918
919 fail_buffer:
920 zink_bo_unref(screen, slab->buffer);
921 fail:
922 FREE(slab);
923 return NULL;
924 }
925
926 static struct pb_slab *
bo_slab_alloc_normal(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)927 bo_slab_alloc_normal(void *priv, unsigned heap, unsigned entry_size, unsigned group_index)
928 {
929 return bo_slab_alloc(priv, heap, entry_size, group_index, false);
930 }
931
932 bool
zink_bo_init(struct zink_screen * screen)933 zink_bo_init(struct zink_screen *screen)
934 {
935 uint64_t total_mem = 0;
936 for (uint32_t i = 0; i < screen->info.mem_props.memoryHeapCount; ++i)
937 total_mem += screen->info.mem_props.memoryHeaps[i].size;
938 /* Create managers. */
939 pb_cache_init(&screen->pb.bo_cache, ZINK_HEAP_MAX,
940 500000, 2.0f, 0,
941 total_mem / 8, screen,
942 (void*)bo_destroy, (void*)bo_can_reclaim);
943
944 unsigned min_slab_order = 8; /* 256 bytes */
945 unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
946 unsigned num_slab_orders_per_allocator = (max_slab_order - min_slab_order) /
947 NUM_SLAB_ALLOCATORS;
948
949 /* Divide the size order range among slab managers. */
950 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
951 unsigned min_order = min_slab_order;
952 unsigned max_order = MIN2(min_order + num_slab_orders_per_allocator,
953 max_slab_order);
954
955 if (!pb_slabs_init(&screen->pb.bo_slabs[i],
956 min_order, max_order,
957 ZINK_HEAP_MAX, true,
958 screen,
959 bo_can_reclaim_slab,
960 bo_slab_alloc_normal,
961 (void*)bo_slab_free)) {
962 return false;
963 }
964 min_slab_order = max_order + 1;
965 }
966 screen->pb.min_alloc_size = 1 << screen->pb.bo_slabs[0].min_order;
967 screen->pb.bo_export_table = util_hash_table_create_ptr_keys();
968 simple_mtx_init(&screen->pb.bo_export_table_lock, mtx_plain);
969 return true;
970 }
971
972 void
zink_bo_deinit(struct zink_screen * screen)973 zink_bo_deinit(struct zink_screen *screen)
974 {
975 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
976 if (screen->pb.bo_slabs[i].groups)
977 pb_slabs_deinit(&screen->pb.bo_slabs[i]);
978 }
979 pb_cache_deinit(&screen->pb.bo_cache);
980 _mesa_hash_table_destroy(screen->pb.bo_export_table, NULL);
981 simple_mtx_destroy(&screen->pb.bo_export_table_lock);
982 }
983