• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #endif
27 
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <errno.h>
32 #include <pthread.h>
33 #include <sched.h>
34 #include <sys/ioctl.h>
35 #ifdef HAVE_ALLOCA_H
36 # include <alloca.h>
37 #endif
38 
39 #include "xf86drm.h"
40 #include "amdgpu_drm.h"
41 #include "amdgpu_internal.h"
42 
43 /**
44  * Create command submission context
45  *
46  * \param   dev - \c [in] amdgpu device handle
47  * \param   context - \c [out] amdgpu context handle
48  *
49  * \return  0 on success otherwise POSIX Error code
50 */
amdgpu_cs_ctx_create(amdgpu_device_handle dev,amdgpu_context_handle * context)51 int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
52 			 amdgpu_context_handle *context)
53 {
54 	struct amdgpu_context *gpu_context;
55 	union drm_amdgpu_ctx args;
56 	int r;
57 
58 	if (NULL == dev)
59 		return -EINVAL;
60 	if (NULL == context)
61 		return -EINVAL;
62 
63 	gpu_context = calloc(1, sizeof(struct amdgpu_context));
64 	if (NULL == gpu_context)
65 		return -ENOMEM;
66 
67 	gpu_context->dev = dev;
68 
69 	/* Create the context */
70 	memset(&args, 0, sizeof(args));
71 	args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
72 	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
73 	if (r)
74 		goto error;
75 
76 	gpu_context->id = args.out.alloc.ctx_id;
77 	*context = (amdgpu_context_handle)gpu_context;
78 
79 	return 0;
80 
81 error:
82 	free(gpu_context);
83 	return r;
84 }
85 
86 /**
87  * Release command submission context
88  *
89  * \param   dev - \c [in] amdgpu device handle
90  * \param   context - \c [in] amdgpu context handle
91  *
92  * \return  0 on success otherwise POSIX Error code
93 */
amdgpu_cs_ctx_free(amdgpu_context_handle context)94 int amdgpu_cs_ctx_free(amdgpu_context_handle context)
95 {
96 	union drm_amdgpu_ctx args;
97 	int r;
98 
99 	if (NULL == context)
100 		return -EINVAL;
101 
102 	/* now deal with kernel side */
103 	memset(&args, 0, sizeof(args));
104 	args.in.op = AMDGPU_CTX_OP_FREE_CTX;
105 	args.in.ctx_id = context->id;
106 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
107 				&args, sizeof(args));
108 
109 	free(context);
110 
111 	return r;
112 }
113 
amdgpu_cs_query_reset_state(amdgpu_context_handle context,uint32_t * state,uint32_t * hangs)114 int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
115 				uint32_t *state, uint32_t *hangs)
116 {
117 	union drm_amdgpu_ctx args;
118 	int r;
119 
120 	if (!context)
121 		return -EINVAL;
122 
123 	memset(&args, 0, sizeof(args));
124 	args.in.op = AMDGPU_CTX_OP_QUERY_STATE;
125 	args.in.ctx_id = context->id;
126 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
127 				&args, sizeof(args));
128 	if (!r) {
129 		*state = args.out.state.reset_status;
130 		*hangs = args.out.state.hangs;
131 	}
132 	return r;
133 }
134 
135 /**
136  * Submit command to kernel DRM
137  * \param   dev - \c [in]  Device handle
138  * \param   context - \c [in]  GPU Context
139  * \param   ibs_request - \c [in]  Pointer to submission requests
140  * \param   fence - \c [out] return fence for this submission
141  *
142  * \return  0 on success otherwise POSIX Error code
143  * \sa amdgpu_cs_submit()
144 */
amdgpu_cs_submit_one(amdgpu_context_handle context,struct amdgpu_cs_request * ibs_request)145 static int amdgpu_cs_submit_one(amdgpu_context_handle context,
146 				struct amdgpu_cs_request *ibs_request)
147 {
148 	union drm_amdgpu_cs cs;
149 	uint64_t *chunk_array;
150 	struct drm_amdgpu_cs_chunk *chunks;
151 	struct drm_amdgpu_cs_chunk_data *chunk_data;
152 	struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
153 	uint32_t i, size;
154 	bool user_fence;
155 	int r = 0;
156 
157 	if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
158 		return -EINVAL;
159 	if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS)
160 		return -EINVAL;
161 	if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT)
162 		return -EINVAL;
163 	user_fence = (ibs_request->fence_info.handle != NULL);
164 
165 	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1);
166 
167 	chunk_array = alloca(sizeof(uint64_t) * size);
168 	chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
169 
170 	size = ibs_request->number_of_ibs + (user_fence ? 1 : 0);
171 
172 	chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
173 
174 	memset(&cs, 0, sizeof(cs));
175 	cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
176 	cs.in.ctx_id = context->id;
177 	if (ibs_request->resources)
178 		cs.in.bo_list_handle = ibs_request->resources->handle;
179 	cs.in.num_chunks = ibs_request->number_of_ibs;
180 	/* IB chunks */
181 	for (i = 0; i < ibs_request->number_of_ibs; i++) {
182 		struct amdgpu_cs_ib_info *ib;
183 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
184 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
185 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
186 		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
187 
188 		ib = &ibs_request->ibs[i];
189 
190 		chunk_data[i].ib_data._pad = 0;
191 		chunk_data[i].ib_data.va_start = ib->ib_mc_address;
192 		chunk_data[i].ib_data.ib_bytes = ib->size * 4;
193 		chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
194 		chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
195 		chunk_data[i].ib_data.ring = ibs_request->ring;
196 		chunk_data[i].ib_data.flags = ib->flags;
197 	}
198 
199 	if (user_fence) {
200 		i = cs.in.num_chunks++;
201 
202 		/* fence chunk */
203 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
204 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
205 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
206 		chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
207 
208 		/* fence bo handle */
209 		chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle;
210 		/* offset */
211 		chunk_data[i].fence_data.offset =
212 			ibs_request->fence_info.offset * sizeof(uint64_t);
213 	}
214 
215 	if (ibs_request->number_of_dependencies) {
216 		dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) *
217 			ibs_request->number_of_dependencies);
218 		if (!dependencies) {
219 			r = -ENOMEM;
220 			goto error_unlock;
221 		}
222 
223 		for (i = 0; i < ibs_request->number_of_dependencies; ++i) {
224 			struct amdgpu_cs_fence *info = &ibs_request->dependencies[i];
225 			struct drm_amdgpu_cs_chunk_dep *dep = &dependencies[i];
226 			dep->ip_type = info->ip_type;
227 			dep->ip_instance = info->ip_instance;
228 			dep->ring = info->ring;
229 			dep->ctx_id = info->context->id;
230 			dep->handle = info->fence;
231 		}
232 
233 		i = cs.in.num_chunks++;
234 
235 		/* dependencies chunk */
236 		chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
237 		chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
238 		chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4
239 			* ibs_request->number_of_dependencies;
240 		chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
241 	}
242 
243 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
244 				&cs, sizeof(cs));
245 	if (r)
246 		goto error_unlock;
247 
248 	ibs_request->seq_no = cs.out.handle;
249 
250 error_unlock:
251 	free(dependencies);
252 	return r;
253 }
254 
amdgpu_cs_submit(amdgpu_context_handle context,uint64_t flags,struct amdgpu_cs_request * ibs_request,uint32_t number_of_requests)255 int amdgpu_cs_submit(amdgpu_context_handle context,
256 		     uint64_t flags,
257 		     struct amdgpu_cs_request *ibs_request,
258 		     uint32_t number_of_requests)
259 {
260 	uint32_t i;
261 	int r;
262 
263 	if (NULL == context)
264 		return -EINVAL;
265 	if (NULL == ibs_request)
266 		return -EINVAL;
267 
268 	r = 0;
269 	for (i = 0; i < number_of_requests; i++) {
270 		r = amdgpu_cs_submit_one(context, ibs_request);
271 		if (r)
272 			break;
273 		ibs_request++;
274 	}
275 
276 	return r;
277 }
278 
279 /**
280  * Calculate absolute timeout.
281  *
282  * \param   timeout - \c [in] timeout in nanoseconds.
283  *
284  * \return  absolute timeout in nanoseconds
285 */
amdgpu_cs_calculate_timeout(uint64_t timeout)286 drm_private uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout)
287 {
288 	int r;
289 
290 	if (timeout != AMDGPU_TIMEOUT_INFINITE) {
291 		struct timespec current;
292 		uint64_t current_ns;
293 		r = clock_gettime(CLOCK_MONOTONIC, &current);
294 		if (r) {
295 			fprintf(stderr, "clock_gettime() returned error (%d)!", errno);
296 			return AMDGPU_TIMEOUT_INFINITE;
297 		}
298 
299 		current_ns = ((uint64_t)current.tv_sec) * 1000000000ull;
300 		current_ns += current.tv_nsec;
301 		timeout += current_ns;
302 		if (timeout < current_ns)
303 			timeout = AMDGPU_TIMEOUT_INFINITE;
304 	}
305 	return timeout;
306 }
307 
amdgpu_ioctl_wait_cs(amdgpu_context_handle context,unsigned ip,unsigned ip_instance,uint32_t ring,uint64_t handle,uint64_t timeout_ns,uint64_t flags,bool * busy)308 static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context,
309 				unsigned ip,
310 				unsigned ip_instance,
311 				uint32_t ring,
312 				uint64_t handle,
313 				uint64_t timeout_ns,
314 				uint64_t flags,
315 				bool *busy)
316 {
317 	amdgpu_device_handle dev = context->dev;
318 	union drm_amdgpu_wait_cs args;
319 	int r;
320 
321 	memset(&args, 0, sizeof(args));
322 	args.in.handle = handle;
323 	args.in.ip_type = ip;
324 	args.in.ip_instance = ip_instance;
325 	args.in.ring = ring;
326 	args.in.ctx_id = context->id;
327 
328 	if (flags & AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE)
329 		args.in.timeout = timeout_ns;
330 	else
331 		args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
332 
333 	r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args);
334 	if (r)
335 		return -errno;
336 
337 	*busy = args.out.status;
338 	return 0;
339 }
340 
amdgpu_cs_query_fence_status(struct amdgpu_cs_fence * fence,uint64_t timeout_ns,uint64_t flags,uint32_t * expired)341 int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
342 				 uint64_t timeout_ns,
343 				 uint64_t flags,
344 				 uint32_t *expired)
345 {
346 	bool busy = true;
347 	int r;
348 
349 	if (NULL == fence)
350 		return -EINVAL;
351 	if (NULL == expired)
352 		return -EINVAL;
353 	if (NULL == fence->context)
354 		return -EINVAL;
355 	if (fence->ip_type >= AMDGPU_HW_IP_NUM)
356 		return -EINVAL;
357 	if (fence->ring >= AMDGPU_CS_MAX_RINGS)
358 		return -EINVAL;
359 
360 	*expired = false;
361 
362 	r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type,
363 				fence->ip_instance, fence->ring,
364 			       	fence->fence, timeout_ns, flags, &busy);
365 
366 	if (!r && !busy)
367 		*expired = true;
368 
369 	return r;
370 }
371 
372