1 /*
2 * Copyright © 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
3 * Copyright © 2009 Joakim Sindholt <opensource@zhasha.com>
4 * Copyright © 2011 Marek Olšák <maraeo@gmail.com>
5 * Copyright © 2015 Advanced Micro Devices, Inc.
6 *
7 * SPDX-License-Identifier: MIT
8 */
9
10 #include "amdgpu_cs.h"
11
12 #include "util/os_file.h"
13 #include "util/os_misc.h"
14 #include "util/u_cpu_detect.h"
15 #include "util/u_hash_table.h"
16 #include "util/hash_table.h"
17 #include "util/thread_sched.h"
18 #include "util/xmlconfig.h"
19 #include "drm-uapi/amdgpu_drm.h"
20 #include <xf86drm.h>
21 #include <stdio.h>
22 #include <sys/stat.h>
23 #include <fcntl.h>
24 #include "sid.h"
25
26 static struct hash_table *dev_tab = NULL;
27 static simple_mtx_t dev_tab_mutex = SIMPLE_MTX_INITIALIZER;
28
29 #if DEBUG
30 DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false)
31 #endif
32
33 /* Helper function to do the ioctls needed for setup and init. */
do_winsys_init(struct amdgpu_winsys * ws,const struct pipe_screen_config * config,int fd)34 static bool do_winsys_init(struct amdgpu_winsys *ws,
35 const struct pipe_screen_config *config,
36 int fd)
37 {
38 if (!ac_query_gpu_info(fd, ws->dev, &ws->info, false))
39 goto fail;
40
41 /* TODO: Enable this once the kernel handles it efficiently. */
42 if (ws->info.has_dedicated_vram)
43 ws->info.has_local_buffers = false;
44
45 ws->addrlib = ac_addrlib_create(&ws->info, &ws->info.max_alignment);
46 if (!ws->addrlib) {
47 fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
48 goto fail;
49 }
50
51 ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL ||
52 strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL;
53 ws->noop_cs = ws->info.family_overridden || debug_get_bool_option("RADEON_NOOP", false);
54 #if DEBUG
55 ws->debug_all_bos = debug_get_option_all_bos();
56 #endif
57 ws->reserve_vmid = strstr(debug_get_option("R600_DEBUG", ""), "reserve_vmid") != NULL ||
58 strstr(debug_get_option("AMD_DEBUG", ""), "reserve_vmid") != NULL ||
59 strstr(debug_get_option("AMD_DEBUG", ""), "sqtt") != NULL;
60 ws->zero_all_vram_allocs = strstr(debug_get_option("R600_DEBUG", ""), "zerovram") != NULL ||
61 driQueryOptionb(config->options, "radeonsi_zerovram");
62
63 return true;
64
65 fail:
66 amdgpu_device_deinitialize(ws->dev);
67 ws->dev = NULL;
68 return false;
69 }
70
do_winsys_deinit(struct amdgpu_winsys * ws)71 static void do_winsys_deinit(struct amdgpu_winsys *ws)
72 {
73 if (ws->reserve_vmid)
74 amdgpu_vm_unreserve_vmid(ws->dev, 0);
75
76 for (unsigned i = 0; i < ARRAY_SIZE(ws->queues); i++) {
77 for (unsigned j = 0; j < ARRAY_SIZE(ws->queues[i].fences); j++)
78 amdgpu_fence_reference(&ws->queues[i].fences[j], NULL);
79
80 amdgpu_ctx_reference(&ws->queues[i].last_ctx, NULL);
81 }
82
83 if (util_queue_is_initialized(&ws->cs_queue))
84 util_queue_destroy(&ws->cs_queue);
85
86 simple_mtx_destroy(&ws->bo_fence_lock);
87 if (ws->bo_slabs.groups)
88 pb_slabs_deinit(&ws->bo_slabs);
89 pb_cache_deinit(&ws->bo_cache);
90 _mesa_hash_table_destroy(ws->bo_export_table, NULL);
91 simple_mtx_destroy(&ws->sws_list_lock);
92 #if DEBUG
93 simple_mtx_destroy(&ws->global_bo_list_lock);
94 #endif
95 simple_mtx_destroy(&ws->bo_export_table_lock);
96
97 ac_addrlib_destroy(ws->addrlib);
98 amdgpu_device_deinitialize(ws->dev);
99 FREE(ws);
100 }
101
amdgpu_winsys_destroy_locked(struct radeon_winsys * rws,bool locked)102 static void amdgpu_winsys_destroy_locked(struct radeon_winsys *rws, bool locked)
103 {
104 struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws);
105 struct amdgpu_winsys *ws = sws->aws;
106 bool destroy;
107
108 /* When the reference counter drops to zero, remove the device pointer
109 * from the table.
110 * This must happen while the mutex is locked, so that
111 * amdgpu_winsys_create in another thread doesn't get the winsys
112 * from the table when the counter drops to 0.
113 */
114 if (!locked)
115 simple_mtx_lock(&dev_tab_mutex);
116
117 destroy = pipe_reference(&ws->reference, NULL);
118 if (destroy && dev_tab) {
119 _mesa_hash_table_remove_key(dev_tab, ws->dev);
120 if (_mesa_hash_table_num_entries(dev_tab) == 0) {
121 _mesa_hash_table_destroy(dev_tab, NULL);
122 dev_tab = NULL;
123 }
124 }
125
126 if (!locked)
127 simple_mtx_unlock(&dev_tab_mutex);
128
129 if (destroy)
130 do_winsys_deinit(ws);
131
132 close(sws->fd);
133 FREE(rws);
134 }
135
amdgpu_winsys_destroy(struct radeon_winsys * rws)136 static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
137 {
138 amdgpu_winsys_destroy_locked(rws, false);
139 }
140
amdgpu_winsys_query_info(struct radeon_winsys * rws,struct radeon_info * info)141 static void amdgpu_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info)
142 {
143 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
144
145 *info = ws->info;
146 }
147
amdgpu_cs_request_feature(struct radeon_cmdbuf * rcs,enum radeon_feature_id fid,bool enable)148 static bool amdgpu_cs_request_feature(struct radeon_cmdbuf *rcs,
149 enum radeon_feature_id fid,
150 bool enable)
151 {
152 return false;
153 }
154
amdgpu_query_value(struct radeon_winsys * rws,enum radeon_value_id value)155 static uint64_t amdgpu_query_value(struct radeon_winsys *rws,
156 enum radeon_value_id value)
157 {
158 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
159 struct amdgpu_heap_info heap;
160 uint64_t retval = 0;
161
162 switch (value) {
163 case RADEON_REQUESTED_VRAM_MEMORY:
164 return ws->allocated_vram;
165 case RADEON_REQUESTED_GTT_MEMORY:
166 return ws->allocated_gtt;
167 case RADEON_MAPPED_VRAM:
168 return ws->mapped_vram;
169 case RADEON_MAPPED_GTT:
170 return ws->mapped_gtt;
171 case RADEON_SLAB_WASTED_VRAM:
172 return ws->slab_wasted_vram;
173 case RADEON_SLAB_WASTED_GTT:
174 return ws->slab_wasted_gtt;
175 case RADEON_BUFFER_WAIT_TIME_NS:
176 return ws->buffer_wait_time;
177 case RADEON_NUM_MAPPED_BUFFERS:
178 return ws->num_mapped_buffers;
179 case RADEON_TIMESTAMP:
180 amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
181 return retval;
182 case RADEON_NUM_GFX_IBS:
183 return ws->num_gfx_IBs;
184 case RADEON_NUM_SDMA_IBS:
185 return ws->num_sdma_IBs;
186 case RADEON_GFX_BO_LIST_COUNTER:
187 return ws->gfx_bo_list_counter;
188 case RADEON_GFX_IB_SIZE_COUNTER:
189 return ws->gfx_ib_size_counter;
190 case RADEON_NUM_BYTES_MOVED:
191 amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED, 8, &retval);
192 return retval;
193 case RADEON_NUM_EVICTIONS:
194 amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_EVICTIONS, 8, &retval);
195 return retval;
196 case RADEON_NUM_VRAM_CPU_PAGE_FAULTS:
197 amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS, 8, &retval);
198 return retval;
199 case RADEON_VRAM_USAGE:
200 amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &heap);
201 return heap.heap_usage;
202 case RADEON_VRAM_VIS_USAGE:
203 amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM,
204 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &heap);
205 return heap.heap_usage;
206 case RADEON_GTT_USAGE:
207 amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap);
208 return heap.heap_usage;
209 case RADEON_GPU_TEMPERATURE:
210 amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GPU_TEMP, 4, &retval);
211 return retval;
212 case RADEON_CURRENT_SCLK:
213 amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_SCLK, 4, &retval);
214 return retval;
215 case RADEON_CURRENT_MCLK:
216 amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_MCLK, 4, &retval);
217 return retval;
218 case RADEON_CS_THREAD_TIME:
219 return util_queue_get_thread_time_nano(&ws->cs_queue, 0);
220 }
221 return 0;
222 }
223
amdgpu_read_registers(struct radeon_winsys * rws,unsigned reg_offset,unsigned num_registers,uint32_t * out)224 static bool amdgpu_read_registers(struct radeon_winsys *rws,
225 unsigned reg_offset,
226 unsigned num_registers, uint32_t *out)
227 {
228 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
229
230 return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers,
231 0xffffffff, 0, out) == 0;
232 }
233
amdgpu_winsys_unref(struct radeon_winsys * rws)234 static bool amdgpu_winsys_unref(struct radeon_winsys *rws)
235 {
236 struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws);
237 struct amdgpu_winsys *aws = sws->aws;
238 bool ret;
239
240 simple_mtx_lock(&aws->sws_list_lock);
241
242 ret = pipe_reference(&sws->reference, NULL);
243 if (ret) {
244 struct amdgpu_screen_winsys **sws_iter;
245 struct amdgpu_winsys *aws = sws->aws;
246
247 /* Remove this amdgpu_screen_winsys from amdgpu_winsys' list, so that
248 * amdgpu_winsys_create can't re-use it anymore
249 */
250 for (sws_iter = &aws->sws_list; *sws_iter; sws_iter = &(*sws_iter)->next) {
251 if (*sws_iter == sws) {
252 *sws_iter = sws->next;
253 break;
254 }
255 }
256 }
257
258 simple_mtx_unlock(&aws->sws_list_lock);
259
260 if (ret && sws->kms_handles) {
261 struct drm_gem_close args;
262
263 hash_table_foreach(sws->kms_handles, entry) {
264 args.handle = (uintptr_t)entry->data;
265 drmIoctl(sws->fd, DRM_IOCTL_GEM_CLOSE, &args);
266 }
267 _mesa_hash_table_destroy(sws->kms_handles, NULL);
268 }
269
270 return ret;
271 }
272
amdgpu_pin_threads_to_L3_cache(struct radeon_winsys * rws,unsigned cpu)273 static void amdgpu_pin_threads_to_L3_cache(struct radeon_winsys *rws,
274 unsigned cpu)
275 {
276 struct amdgpu_winsys *ws = amdgpu_winsys(rws);
277
278 util_thread_sched_apply_policy(ws->cs_queue.threads[0],
279 UTIL_THREAD_DRIVER_SUBMIT, cpu, NULL);
280 }
281
kms_handle_hash(const void * key)282 static uint32_t kms_handle_hash(const void *key)
283 {
284 const struct amdgpu_bo_real *bo = key;
285
286 return bo->kms_handle;
287 }
288
kms_handle_equals(const void * a,const void * b)289 static bool kms_handle_equals(const void *a, const void *b)
290 {
291 return a == b;
292 }
293
amdgpu_cs_is_secure(struct radeon_cmdbuf * rcs)294 static bool amdgpu_cs_is_secure(struct radeon_cmdbuf *rcs)
295 {
296 struct amdgpu_cs *cs = amdgpu_cs(rcs);
297 return cs->csc->secure;
298 }
299
300 static uint32_t
radeon_to_amdgpu_pstate(enum radeon_ctx_pstate pstate)301 radeon_to_amdgpu_pstate(enum radeon_ctx_pstate pstate)
302 {
303 switch (pstate) {
304 case RADEON_CTX_PSTATE_NONE:
305 return AMDGPU_CTX_STABLE_PSTATE_NONE;
306 case RADEON_CTX_PSTATE_STANDARD:
307 return AMDGPU_CTX_STABLE_PSTATE_STANDARD;
308 case RADEON_CTX_PSTATE_MIN_SCLK:
309 return AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK;
310 case RADEON_CTX_PSTATE_MIN_MCLK:
311 return AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK;
312 case RADEON_CTX_PSTATE_PEAK:
313 return AMDGPU_CTX_STABLE_PSTATE_PEAK;
314 default:
315 unreachable("Invalid pstate");
316 }
317 }
318
319 static bool
amdgpu_cs_set_pstate(struct radeon_cmdbuf * rcs,enum radeon_ctx_pstate pstate)320 amdgpu_cs_set_pstate(struct radeon_cmdbuf *rcs, enum radeon_ctx_pstate pstate)
321 {
322 struct amdgpu_cs *cs = amdgpu_cs(rcs);
323 uint32_t amdgpu_pstate = radeon_to_amdgpu_pstate(pstate);
324 return amdgpu_cs_ctx_stable_pstate(cs->ctx->ctx,
325 AMDGPU_CTX_OP_SET_STABLE_PSTATE, amdgpu_pstate, NULL) == 0;
326 }
327
328 static bool
are_file_descriptions_equal(int fd1,int fd2)329 are_file_descriptions_equal(int fd1, int fd2)
330 {
331 int r = os_same_file_description(fd1, fd2);
332
333 if (r == 0)
334 return true;
335
336 if (r < 0) {
337 static bool logged;
338
339 if (!logged) {
340 os_log_message("amdgpu: os_same_file_description couldn't "
341 "determine if two DRM fds reference the same "
342 "file description.\n"
343 "If they do, bad things may happen!\n");
344 logged = true;
345 }
346 }
347 return false;
348 }
349
350 static int
amdgpu_drm_winsys_get_fd(struct radeon_winsys * rws)351 amdgpu_drm_winsys_get_fd(struct radeon_winsys *rws)
352 {
353 struct amdgpu_screen_winsys *sws = amdgpu_screen_winsys(rws);
354
355 return sws->fd;
356 }
357
358 PUBLIC struct radeon_winsys *
amdgpu_winsys_create(int fd,const struct pipe_screen_config * config,radeon_screen_create_t screen_create)359 amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
360 radeon_screen_create_t screen_create)
361 {
362 struct amdgpu_screen_winsys *ws;
363 struct amdgpu_winsys *aws;
364 amdgpu_device_handle dev;
365 uint32_t drm_major, drm_minor;
366 int r;
367
368 ws = CALLOC_STRUCT(amdgpu_screen_winsys);
369 if (!ws)
370 return NULL;
371
372 pipe_reference_init(&ws->reference, 1);
373 ws->fd = os_dupfd_cloexec(fd);
374
375 /* Look up the winsys from the dev table. */
376 simple_mtx_lock(&dev_tab_mutex);
377 if (!dev_tab)
378 dev_tab = util_hash_table_create_ptr_keys();
379
380 /* Initialize the amdgpu device. This should always return the same pointer
381 * for the same fd. */
382 r = amdgpu_device_initialize(ws->fd, &drm_major, &drm_minor, &dev);
383 if (r) {
384 fprintf(stderr, "amdgpu: amdgpu_device_initialize failed.\n");
385 goto fail;
386 }
387
388 /* Lookup a winsys if we have already created one for this device. */
389 aws = util_hash_table_get(dev_tab, dev);
390 if (aws) {
391 struct amdgpu_screen_winsys *sws_iter;
392
393 /* Release the device handle, because we don't need it anymore.
394 * This function is returning an existing winsys instance, which
395 * has its own device handle.
396 */
397 amdgpu_device_deinitialize(dev);
398
399 simple_mtx_lock(&aws->sws_list_lock);
400 for (sws_iter = aws->sws_list; sws_iter; sws_iter = sws_iter->next) {
401 if (are_file_descriptions_equal(sws_iter->fd, ws->fd)) {
402 close(ws->fd);
403 FREE(ws);
404 ws = sws_iter;
405 pipe_reference(NULL, &ws->reference);
406 simple_mtx_unlock(&aws->sws_list_lock);
407 goto unlock;
408 }
409 }
410 simple_mtx_unlock(&aws->sws_list_lock);
411
412 ws->kms_handles = _mesa_hash_table_create(NULL, kms_handle_hash,
413 kms_handle_equals);
414 if (!ws->kms_handles)
415 goto fail;
416
417 pipe_reference(NULL, &aws->reference);
418 } else {
419 /* Create a new winsys. */
420 aws = CALLOC_STRUCT(amdgpu_winsys);
421 if (!aws)
422 goto fail;
423
424 aws->dev = dev;
425 /* The device fd might be different from the one we passed because of
426 * libdrm_amdgpu device dedup logic. This can happen if radv is initialized
427 * first.
428 * Get the correct fd or the buffer sharing will not work (see #3424).
429 */
430 int device_fd = amdgpu_device_get_fd(dev);
431 if (!are_file_descriptions_equal(device_fd, fd)) {
432 ws->kms_handles = _mesa_hash_table_create(NULL, kms_handle_hash,
433 kms_handle_equals);
434 if (!ws->kms_handles)
435 goto fail;
436 /* We could avoid storing the fd and use amdgpu_device_get_fd() where
437 * we need it but we'd have to use os_same_file_description() to
438 * compare the fds.
439 */
440 aws->fd = device_fd;
441 } else {
442 aws->fd = ws->fd;
443 }
444 aws->info.drm_major = drm_major;
445 aws->info.drm_minor = drm_minor;
446
447 /* Only aws and buffer functions are used. */
448 aws->dummy_ws.aws = aws;
449 amdgpu_bo_init_functions(&aws->dummy_ws);
450
451 if (!do_winsys_init(aws, config, fd))
452 goto fail_alloc;
453
454 /* Create managers. */
455 pb_cache_init(&aws->bo_cache, RADEON_NUM_HEAPS,
456 500000, aws->check_vm ? 1.0f : 1.5f, 0,
457 ((uint64_t)aws->info.vram_size_kb + aws->info.gart_size_kb) * 1024 / 8,
458 offsetof(struct amdgpu_bo_real_reusable, cache_entry), aws,
459 /* Cast to void* because one of the function parameters
460 * is a struct pointer instead of void*. */
461 (void*)amdgpu_bo_destroy, (void*)amdgpu_bo_can_reclaim);
462
463 if (!pb_slabs_init(&aws->bo_slabs,
464 8, /* min slab entry size: 256 bytes */
465 20, /* max slab entry size: 1 MB (slab size = 2 MB) */
466 RADEON_NUM_HEAPS, true,
467 aws,
468 amdgpu_bo_can_reclaim_slab,
469 amdgpu_bo_slab_alloc,
470 /* Cast to void* because one of the function parameters
471 * is a struct pointer instead of void*. */
472 (void*)amdgpu_bo_slab_free)) {
473 amdgpu_winsys_destroy(&ws->base);
474 simple_mtx_unlock(&dev_tab_mutex);
475 return NULL;
476 }
477
478 aws->info.min_alloc_size = 1 << aws->bo_slabs.min_order;
479
480 /* init reference */
481 pipe_reference_init(&aws->reference, 1);
482 #if DEBUG
483 list_inithead(&aws->global_bo_list);
484 #endif
485 aws->bo_export_table = util_hash_table_create_ptr_keys();
486
487 (void) simple_mtx_init(&aws->sws_list_lock, mtx_plain);
488 #if DEBUG
489 (void) simple_mtx_init(&aws->global_bo_list_lock, mtx_plain);
490 #endif
491 (void) simple_mtx_init(&aws->bo_fence_lock, mtx_plain);
492 (void) simple_mtx_init(&aws->bo_export_table_lock, mtx_plain);
493
494 if (!util_queue_init(&aws->cs_queue, "cs", 8, 1,
495 UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL)) {
496 amdgpu_winsys_destroy(&ws->base);
497 simple_mtx_unlock(&dev_tab_mutex);
498 return NULL;
499 }
500
501 _mesa_hash_table_insert(dev_tab, dev, aws);
502
503 if (aws->reserve_vmid) {
504 r = amdgpu_vm_reserve_vmid(dev, 0);
505 if (r) {
506 amdgpu_winsys_destroy(&ws->base);
507 simple_mtx_unlock(&dev_tab_mutex);
508 return NULL;
509 }
510 }
511 }
512
513 ws->aws = aws;
514
515 /* Set functions. */
516 ws->base.unref = amdgpu_winsys_unref;
517 ws->base.destroy = amdgpu_winsys_destroy;
518 ws->base.get_fd = amdgpu_drm_winsys_get_fd;
519 ws->base.query_info = amdgpu_winsys_query_info;
520 ws->base.cs_request_feature = amdgpu_cs_request_feature;
521 ws->base.query_value = amdgpu_query_value;
522 ws->base.read_registers = amdgpu_read_registers;
523 ws->base.pin_threads_to_L3_cache = amdgpu_pin_threads_to_L3_cache;
524 ws->base.cs_is_secure = amdgpu_cs_is_secure;
525 ws->base.cs_set_pstate = amdgpu_cs_set_pstate;
526
527 amdgpu_bo_init_functions(ws);
528 amdgpu_cs_init_functions(ws);
529 amdgpu_surface_init_functions(ws);
530
531 simple_mtx_lock(&aws->sws_list_lock);
532 ws->next = aws->sws_list;
533 aws->sws_list = ws;
534 simple_mtx_unlock(&aws->sws_list_lock);
535
536 /* Create the screen at the end. The winsys must be initialized
537 * completely.
538 *
539 * Alternatively, we could create the screen based on "ws->gen"
540 * and link all drivers into one binary blob. */
541 ws->base.screen = screen_create(&ws->base, config);
542 if (!ws->base.screen) {
543 amdgpu_winsys_destroy_locked(&ws->base, true);
544 simple_mtx_unlock(&dev_tab_mutex);
545 return NULL;
546 }
547
548 unlock:
549 /* We must unlock the mutex once the winsys is fully initialized, so that
550 * other threads attempting to create the winsys from the same fd will
551 * get a fully initialized winsys and not just half-way initialized. */
552 simple_mtx_unlock(&dev_tab_mutex);
553
554 return &ws->base;
555
556 fail_alloc:
557 FREE(aws);
558 fail:
559 if (ws->kms_handles)
560 _mesa_hash_table_destroy(ws->kms_handles, NULL);
561 close(ws->fd);
562 FREE(ws);
563 simple_mtx_unlock(&dev_tab_mutex);
564 return NULL;
565 }
566