1 /*
2 * Copyright © 2024 Advanced Micro Devices, Inc.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "amdgpu_bo.h"
8 #include "ac_linux_drm.h"
9
10 static bool
amdgpu_userq_ring_init(struct amdgpu_winsys * aws,struct amdgpu_userq * userq)11 amdgpu_userq_ring_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq)
12 {
13 /* Allocate ring and user fence in one buffer. */
14 uint32_t gtt_bo_size = AMDGPU_USERQ_RING_SIZE + aws->info.gart_page_size;
15 userq->gtt_bo = amdgpu_bo_create(aws, gtt_bo_size, 256, RADEON_DOMAIN_GTT,
16 RADEON_FLAG_GL2_BYPASS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
17 if (!userq->gtt_bo)
18 return false;
19
20 userq->gtt_bo_map = amdgpu_bo_map(&aws->dummy_sws.base, userq->gtt_bo, NULL,
21 PIPE_MAP_READ | PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
22 if (!userq->gtt_bo_map)
23 return false;
24
25 userq->wptr_bo = amdgpu_bo_create(aws, aws->info.gart_page_size, 256, RADEON_DOMAIN_GTT,
26 RADEON_FLAG_GL2_BYPASS | RADEON_FLAG_NO_SUBALLOC |
27 RADEON_FLAG_NO_INTERPROCESS_SHARING);
28 if (!userq->wptr_bo)
29 return false;
30
31 userq->wptr_bo_map = amdgpu_bo_map(&aws->dummy_sws.base, userq->wptr_bo, NULL,
32 PIPE_MAP_READ | PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
33 if (!userq->wptr_bo_map)
34 return false;
35
36 userq->ring_ptr = (uint32_t*)userq->gtt_bo_map;
37 userq->user_fence_ptr = (uint64_t*)(userq->gtt_bo_map + AMDGPU_USERQ_RING_SIZE);
38 userq->user_fence_va = amdgpu_bo_get_va(userq->gtt_bo) + AMDGPU_USERQ_RING_SIZE;
39 *userq->user_fence_ptr = 0;
40 *userq->wptr_bo_map = 0;
41 userq->next_wptr = 0;
42
43 userq->rptr_bo = amdgpu_bo_create(aws, aws->info.gart_page_size, 256, RADEON_DOMAIN_VRAM,
44 RADEON_FLAG_CLEAR_VRAM | RADEON_FLAG_GL2_BYPASS |
45 RADEON_FLAG_NO_SUBALLOC |
46 RADEON_FLAG_NO_INTERPROCESS_SHARING);
47 if (!userq->rptr_bo)
48 return false;
49
50 return true;
51 }
52
53 void
amdgpu_userq_deinit(struct amdgpu_winsys * aws,struct amdgpu_userq * userq)54 amdgpu_userq_deinit(struct amdgpu_winsys *aws, struct amdgpu_userq *userq)
55 {
56 if (userq->userq_handle)
57 ac_drm_free_userqueue(aws->dev, userq->userq_handle);
58
59 radeon_bo_reference(&aws->dummy_sws.base, &userq->gtt_bo, NULL);
60 radeon_bo_reference(&aws->dummy_sws.base, &userq->wptr_bo, NULL);
61 radeon_bo_reference(&aws->dummy_sws.base, &userq->rptr_bo, NULL);
62 radeon_bo_reference(&aws->dummy_sws.base, &userq->doorbell_bo, NULL);
63
64 switch (userq->ip_type) {
65 case AMD_IP_GFX:
66 radeon_bo_reference(&aws->dummy_sws.base, &userq->gfx_data.csa_bo, NULL);
67 radeon_bo_reference(&aws->dummy_sws.base, &userq->gfx_data.shadow_bo, NULL);
68 break;
69 case AMD_IP_COMPUTE:
70 radeon_bo_reference(&aws->dummy_sws.base, &userq->compute_data.eop_bo, NULL);
71 break;
72 case AMD_IP_SDMA:
73 radeon_bo_reference(&aws->dummy_sws.base, &userq->sdma_data.csa_bo, NULL);
74 break;
75 default:
76 fprintf(stderr, "amdgpu: userq unsupported for ip = %d\n", userq->ip_type);
77 }
78 }
79
80 bool
amdgpu_userq_init(struct amdgpu_winsys * aws,struct amdgpu_userq * userq,enum amd_ip_type ip_type)81 amdgpu_userq_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq, enum amd_ip_type ip_type)
82 {
83 int r = -1;
84 uint32_t hw_ip_type;
85 struct drm_amdgpu_userq_mqd_gfx11 gfx_mqd;
86 struct drm_amdgpu_userq_mqd_compute_gfx11 compute_mqd;
87 struct drm_amdgpu_userq_mqd_sdma_gfx11 sdma_mqd;
88 void *mqd;
89
90 simple_mtx_lock(&userq->lock);
91
92 if (userq->gtt_bo) {
93 simple_mtx_unlock(&userq->lock);
94 return true;
95 }
96
97 userq->ip_type = ip_type;
98 if (!amdgpu_userq_ring_init(aws, userq))
99 goto fail;
100
101 switch (userq->ip_type) {
102 case AMD_IP_GFX:
103 hw_ip_type = AMDGPU_HW_IP_GFX;
104 userq->gfx_data.csa_bo = amdgpu_bo_create(aws, aws->info.fw_based_mcbp.csa_size,
105 aws->info.fw_based_mcbp.csa_alignment,
106 RADEON_DOMAIN_VRAM,
107 RADEON_FLAG_NO_INTERPROCESS_SHARING);
108 if (!userq->gfx_data.csa_bo)
109 goto fail;
110
111 userq->gfx_data.shadow_bo = amdgpu_bo_create(aws, aws->info.fw_based_mcbp.shadow_size,
112 aws->info.fw_based_mcbp.shadow_alignment,
113 RADEON_DOMAIN_VRAM,
114 RADEON_FLAG_NO_INTERPROCESS_SHARING);
115 if (!userq->gfx_data.shadow_bo)
116 goto fail;
117
118 gfx_mqd.shadow_va = amdgpu_bo_get_va(userq->gfx_data.shadow_bo);
119 gfx_mqd.csa_va = amdgpu_bo_get_va(userq->gfx_data.csa_bo);
120 mqd = &gfx_mqd;
121 break;
122 case AMD_IP_COMPUTE:
123 hw_ip_type = AMDGPU_HW_IP_COMPUTE;
124 userq->compute_data.eop_bo = amdgpu_bo_create(aws, aws->info.gart_page_size, 256,
125 RADEON_DOMAIN_VRAM,
126 RADEON_FLAG_NO_INTERPROCESS_SHARING);
127 if (!userq->compute_data.eop_bo)
128 goto fail;
129
130 compute_mqd.eop_va = amdgpu_bo_get_va(userq->compute_data.eop_bo);
131 mqd = &compute_mqd;
132 break;
133 case AMD_IP_SDMA:
134 hw_ip_type = AMDGPU_HW_IP_DMA;
135 userq->sdma_data.csa_bo = amdgpu_bo_create(aws, aws->info.fw_based_mcbp.csa_size,
136 aws->info.fw_based_mcbp.csa_alignment,
137 RADEON_DOMAIN_VRAM,
138 RADEON_FLAG_NO_INTERPROCESS_SHARING);
139 if (!userq->sdma_data.csa_bo)
140 goto fail;
141
142 sdma_mqd.csa_va = amdgpu_bo_get_va(userq->sdma_data.csa_bo);
143 mqd = &sdma_mqd;
144 break;
145 default:
146 fprintf(stderr, "amdgpu: userq unsupported for ip = %d\n", userq->ip_type);
147 goto fail;
148 }
149
150 userq->doorbell_bo = amdgpu_bo_create(aws, aws->info.gart_page_size, 256,
151 RADEON_DOMAIN_DOORBELL,
152 RADEON_FLAG_NO_INTERPROCESS_SHARING);
153 if (!userq->doorbell_bo)
154 goto fail;
155
156 /* doorbell map should be the last map call, it is used to wait for all mappings before
157 * calling amdgpu_create_userqueue().
158 */
159 userq->doorbell_bo_map = amdgpu_bo_map(&aws->dummy_sws.base, userq->doorbell_bo, NULL,
160 PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
161 if (!userq->doorbell_bo_map)
162 goto fail;
163
164 /* The VA page table for ring buffer should be ready before job submission so that the packets
165 * submitted can be read by gpu. The same applies to rptr, wptr buffers also.
166 */
167 r = ac_drm_cs_syncobj_timeline_wait(aws->fd, &aws->vm_timeline_syncobj,
168 &get_real_bo(amdgpu_winsys_bo(userq->doorbell_bo))
169 ->vm_timeline_point,
170 1, INT64_MAX, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
171 DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL);
172 if (r) {
173 fprintf(stderr, "amdgpu: waiting for vm fences failed\n");
174 goto fail;
175 }
176
177 uint64_t ring_va = amdgpu_bo_get_va(userq->gtt_bo);
178 r = ac_drm_create_userqueue(aws->dev, hw_ip_type,
179 get_real_bo(amdgpu_winsys_bo(userq->doorbell_bo))->kms_handle,
180 AMDGPU_USERQ_DOORBELL_INDEX, ring_va, AMDGPU_USERQ_RING_SIZE,
181 amdgpu_bo_get_va(userq->wptr_bo), amdgpu_bo_get_va(userq->rptr_bo),
182 mqd, &userq->userq_handle);
183 if (r) {
184 fprintf(stderr, "amdgpu: failed to create userq\n");
185 goto fail;
186 }
187
188 simple_mtx_unlock(&userq->lock);
189 return true;
190 fail:
191 amdgpu_userq_deinit(aws, userq);
192 simple_mtx_unlock(&userq->lock);
193 return false;
194 }
195