• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2024 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "amdgpu_bo.h"
8 #include "ac_linux_drm.h"
9 
10 static bool
amdgpu_userq_ring_init(struct amdgpu_winsys * aws,struct amdgpu_userq * userq)11 amdgpu_userq_ring_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq)
12 {
13    /* Allocate ring and user fence in one buffer. */
14    uint32_t gtt_bo_size = AMDGPU_USERQ_RING_SIZE + aws->info.gart_page_size;
15    userq->gtt_bo = amdgpu_bo_create(aws, gtt_bo_size, 256, RADEON_DOMAIN_GTT,
16                                     RADEON_FLAG_GL2_BYPASS | RADEON_FLAG_NO_INTERPROCESS_SHARING);
17    if (!userq->gtt_bo)
18       return false;
19 
20    userq->gtt_bo_map = amdgpu_bo_map(&aws->dummy_sws.base, userq->gtt_bo, NULL,
21                                      PIPE_MAP_READ | PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
22    if (!userq->gtt_bo_map)
23       return false;
24 
25    userq->wptr_bo = amdgpu_bo_create(aws, aws->info.gart_page_size, 256, RADEON_DOMAIN_GTT,
26                                      RADEON_FLAG_GL2_BYPASS | RADEON_FLAG_NO_SUBALLOC |
27                                         RADEON_FLAG_NO_INTERPROCESS_SHARING);
28    if (!userq->wptr_bo)
29       return false;
30 
31    userq->wptr_bo_map = amdgpu_bo_map(&aws->dummy_sws.base, userq->wptr_bo, NULL,
32                                       PIPE_MAP_READ | PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
33    if (!userq->wptr_bo_map)
34       return false;
35 
36    userq->ring_ptr = (uint32_t*)userq->gtt_bo_map;
37    userq->user_fence_ptr = (uint64_t*)(userq->gtt_bo_map + AMDGPU_USERQ_RING_SIZE);
38    userq->user_fence_va = amdgpu_bo_get_va(userq->gtt_bo) + AMDGPU_USERQ_RING_SIZE;
39    *userq->user_fence_ptr = 0;
40    *userq->wptr_bo_map = 0;
41    userq->next_wptr = 0;
42 
43    userq->rptr_bo = amdgpu_bo_create(aws, aws->info.gart_page_size, 256, RADEON_DOMAIN_VRAM,
44                                      RADEON_FLAG_CLEAR_VRAM | RADEON_FLAG_GL2_BYPASS |
45                                         RADEON_FLAG_NO_SUBALLOC |
46                                         RADEON_FLAG_NO_INTERPROCESS_SHARING);
47    if (!userq->rptr_bo)
48       return false;
49 
50    return true;
51 }
52 
53 void
amdgpu_userq_deinit(struct amdgpu_winsys * aws,struct amdgpu_userq * userq)54 amdgpu_userq_deinit(struct amdgpu_winsys *aws, struct amdgpu_userq *userq)
55 {
56    if (userq->userq_handle)
57       ac_drm_free_userqueue(aws->dev, userq->userq_handle);
58 
59    radeon_bo_reference(&aws->dummy_sws.base, &userq->gtt_bo, NULL);
60    radeon_bo_reference(&aws->dummy_sws.base, &userq->wptr_bo, NULL);
61    radeon_bo_reference(&aws->dummy_sws.base, &userq->rptr_bo, NULL);
62    radeon_bo_reference(&aws->dummy_sws.base, &userq->doorbell_bo, NULL);
63 
64    switch (userq->ip_type) {
65    case AMD_IP_GFX:
66       radeon_bo_reference(&aws->dummy_sws.base, &userq->gfx_data.csa_bo, NULL);
67       radeon_bo_reference(&aws->dummy_sws.base, &userq->gfx_data.shadow_bo, NULL);
68       break;
69    case AMD_IP_COMPUTE:
70       radeon_bo_reference(&aws->dummy_sws.base, &userq->compute_data.eop_bo, NULL);
71       break;
72    case AMD_IP_SDMA:
73       radeon_bo_reference(&aws->dummy_sws.base, &userq->sdma_data.csa_bo, NULL);
74       break;
75    default:
76       fprintf(stderr, "amdgpu: userq unsupported for ip = %d\n", userq->ip_type);
77    }
78 }
79 
80 bool
amdgpu_userq_init(struct amdgpu_winsys * aws,struct amdgpu_userq * userq,enum amd_ip_type ip_type)81 amdgpu_userq_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq, enum amd_ip_type ip_type)
82 {
83    int r = -1;
84    uint32_t hw_ip_type;
85    struct drm_amdgpu_userq_mqd_gfx11 gfx_mqd;
86    struct drm_amdgpu_userq_mqd_compute_gfx11 compute_mqd;
87    struct drm_amdgpu_userq_mqd_sdma_gfx11 sdma_mqd;
88    void *mqd;
89 
90    simple_mtx_lock(&userq->lock);
91 
92    if (userq->gtt_bo) {
93       simple_mtx_unlock(&userq->lock);
94       return true;
95    }
96 
97    userq->ip_type = ip_type;
98    if (!amdgpu_userq_ring_init(aws, userq))
99       goto fail;
100 
101    switch (userq->ip_type) {
102    case AMD_IP_GFX:
103       hw_ip_type = AMDGPU_HW_IP_GFX;
104       userq->gfx_data.csa_bo = amdgpu_bo_create(aws, aws->info.fw_based_mcbp.csa_size,
105                                                 aws->info.fw_based_mcbp.csa_alignment,
106                                                 RADEON_DOMAIN_VRAM,
107                                                 RADEON_FLAG_NO_INTERPROCESS_SHARING);
108       if (!userq->gfx_data.csa_bo)
109          goto fail;
110 
111       userq->gfx_data.shadow_bo = amdgpu_bo_create(aws, aws->info.fw_based_mcbp.shadow_size,
112                                                    aws->info.fw_based_mcbp.shadow_alignment,
113                                                    RADEON_DOMAIN_VRAM,
114                                                    RADEON_FLAG_NO_INTERPROCESS_SHARING);
115       if (!userq->gfx_data.shadow_bo)
116          goto fail;
117 
118       gfx_mqd.shadow_va = amdgpu_bo_get_va(userq->gfx_data.shadow_bo);
119       gfx_mqd.csa_va = amdgpu_bo_get_va(userq->gfx_data.csa_bo);
120       mqd = &gfx_mqd;
121       break;
122    case AMD_IP_COMPUTE:
123       hw_ip_type = AMDGPU_HW_IP_COMPUTE;
124       userq->compute_data.eop_bo = amdgpu_bo_create(aws, aws->info.gart_page_size, 256,
125                                                     RADEON_DOMAIN_VRAM,
126                                                     RADEON_FLAG_NO_INTERPROCESS_SHARING);
127       if (!userq->compute_data.eop_bo)
128          goto fail;
129 
130       compute_mqd.eop_va = amdgpu_bo_get_va(userq->compute_data.eop_bo);
131       mqd = &compute_mqd;
132       break;
133    case AMD_IP_SDMA:
134       hw_ip_type = AMDGPU_HW_IP_DMA;
135       userq->sdma_data.csa_bo = amdgpu_bo_create(aws, aws->info.fw_based_mcbp.csa_size,
136                                                  aws->info.fw_based_mcbp.csa_alignment,
137                                                  RADEON_DOMAIN_VRAM,
138                                                  RADEON_FLAG_NO_INTERPROCESS_SHARING);
139       if (!userq->sdma_data.csa_bo)
140          goto fail;
141 
142       sdma_mqd.csa_va = amdgpu_bo_get_va(userq->sdma_data.csa_bo);
143       mqd = &sdma_mqd;
144       break;
145    default:
146       fprintf(stderr, "amdgpu: userq unsupported for ip = %d\n", userq->ip_type);
147       goto fail;
148    }
149 
150    userq->doorbell_bo = amdgpu_bo_create(aws, aws->info.gart_page_size, 256,
151                                          RADEON_DOMAIN_DOORBELL,
152                                          RADEON_FLAG_NO_INTERPROCESS_SHARING);
153    if (!userq->doorbell_bo)
154       goto fail;
155 
156    /* doorbell map should be the last map call, it is used to wait for all mappings before
157     * calling amdgpu_create_userqueue().
158     */
159    userq->doorbell_bo_map = amdgpu_bo_map(&aws->dummy_sws.base, userq->doorbell_bo, NULL,
160                                           PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
161    if (!userq->doorbell_bo_map)
162       goto fail;
163 
164    /* The VA page table for ring buffer should be ready before job submission so that the packets
165     * submitted can be read by gpu. The same applies to rptr, wptr buffers also.
166     */
167    r = ac_drm_cs_syncobj_timeline_wait(aws->fd, &aws->vm_timeline_syncobj,
168                                        &get_real_bo(amdgpu_winsys_bo(userq->doorbell_bo))
169                                           ->vm_timeline_point,
170                                        1, INT64_MAX, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
171                                           DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL);
172    if (r) {
173       fprintf(stderr, "amdgpu: waiting for vm fences failed\n");
174       goto fail;
175    }
176 
177    uint64_t ring_va = amdgpu_bo_get_va(userq->gtt_bo);
178    r = ac_drm_create_userqueue(aws->dev, hw_ip_type,
179                                get_real_bo(amdgpu_winsys_bo(userq->doorbell_bo))->kms_handle,
180                                AMDGPU_USERQ_DOORBELL_INDEX, ring_va, AMDGPU_USERQ_RING_SIZE,
181                                amdgpu_bo_get_va(userq->wptr_bo), amdgpu_bo_get_va(userq->rptr_bo),
182                                mqd, &userq->userq_handle);
183    if (r) {
184       fprintf(stderr, "amdgpu: failed to create userq\n");
185       goto fail;
186    }
187 
188    simple_mtx_unlock(&userq->lock);
189    return true;
190 fail:
191    amdgpu_userq_deinit(aws, userq);
192    simple_mtx_unlock(&userq->lock);
193    return false;
194 }
195