• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #include <stdlib.h>
26 #include <amdgpu.h>
27 #include <amdgpu_drm.h>
28 #include <assert.h>
29 
30 #include "ac_debug.h"
31 #include "amdgpu_id.h"
32 #include "radv_radeon_winsys.h"
33 #include "radv_amdgpu_cs.h"
34 #include "radv_amdgpu_bo.h"
35 #include "sid.h"
36 
37 struct radv_amdgpu_cs {
38 	struct radeon_winsys_cs base;
39 	struct radv_amdgpu_winsys *ws;
40 
41 	struct amdgpu_cs_ib_info    ib;
42 
43 	struct radeon_winsys_bo     *ib_buffer;
44 	uint8_t                 *ib_mapped;
45 	unsigned                    max_num_buffers;
46 	unsigned                    num_buffers;
47 	amdgpu_bo_handle            *handles;
48 	uint8_t                     *priorities;
49 
50 	struct radeon_winsys_bo     **old_ib_buffers;
51 	unsigned                    num_old_ib_buffers;
52 	unsigned                    max_num_old_ib_buffers;
53 	unsigned                    *ib_size_ptr;
54 	bool                        failed;
55 	bool                        is_chained;
56 
57 	int                         buffer_hash_table[1024];
58 	unsigned                    hw_ip;
59 };
60 
61 static inline struct radv_amdgpu_cs *
radv_amdgpu_cs(struct radeon_winsys_cs * base)62 radv_amdgpu_cs(struct radeon_winsys_cs *base)
63 {
64 	return (struct radv_amdgpu_cs*)base;
65 }
66 
ring_to_hw_ip(enum ring_type ring)67 static int ring_to_hw_ip(enum ring_type ring)
68 {
69 	switch (ring) {
70 	case RING_GFX:
71 		return AMDGPU_HW_IP_GFX;
72 	case RING_DMA:
73 		return AMDGPU_HW_IP_DMA;
74 	case RING_COMPUTE:
75 		return AMDGPU_HW_IP_COMPUTE;
76 	default:
77 		unreachable("unsupported ring");
78 	}
79 }
80 
radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx * ctx,struct amdgpu_cs_fence * fence,struct amdgpu_cs_request * req)81 static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
82 					 struct amdgpu_cs_fence *fence,
83 					 struct amdgpu_cs_request *req)
84 {
85 	fence->context = ctx->ctx;
86 	fence->ip_type = req->ip_type;
87 	fence->ip_instance = req->ip_instance;
88 	fence->ring = req->ring;
89 	fence->fence = req->seq_no;
90 }
91 
radv_amdgpu_create_fence()92 static struct radeon_winsys_fence *radv_amdgpu_create_fence()
93 {
94 	struct radv_amdgpu_cs_fence *fence = calloc(1, sizeof(struct amdgpu_cs_fence));
95 	return (struct radeon_winsys_fence*)fence;
96 }
97 
radv_amdgpu_destroy_fence(struct radeon_winsys_fence * _fence)98 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence *_fence)
99 {
100 	struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
101 	free(fence);
102 }
103 
radv_amdgpu_fence_wait(struct radeon_winsys * _ws,struct radeon_winsys_fence * _fence,bool absolute,uint64_t timeout)104 static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws,
105 			      struct radeon_winsys_fence *_fence,
106 			      bool absolute,
107 			      uint64_t timeout)
108 {
109 	struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
110 	unsigned flags = absolute ? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE : 0;
111 	int r;
112 	uint32_t expired = 0;
113 
114 	/* Now use the libdrm query. */
115 	r = amdgpu_cs_query_fence_status(fence,
116 					 timeout,
117 					 flags,
118 					 &expired);
119 
120 	if (r) {
121 		fprintf(stderr, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n");
122 		return false;
123 	}
124 
125 	if (expired)
126 		return true;
127 
128 	return false;
129 }
130 
radv_amdgpu_cs_destroy(struct radeon_winsys_cs * rcs)131 static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
132 {
133 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
134 
135 	if (cs->ib_buffer)
136 		cs->ws->base.buffer_destroy(cs->ib_buffer);
137 	else
138 		free(cs->base.buf);
139 
140 	for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
141 		cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
142 
143 	free(cs->old_ib_buffers);
144 	free(cs->handles);
145 	free(cs->priorities);
146 	free(cs);
147 }
148 
radv_amdgpu_init_cs(struct radv_amdgpu_cs * cs,enum ring_type ring_type)149 static boolean radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs,
150 				   enum ring_type ring_type)
151 {
152 	for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i)
153 		cs->buffer_hash_table[i] = -1;
154 
155 	cs->hw_ip = ring_to_hw_ip(ring_type);
156 	return true;
157 }
158 
159 static struct radeon_winsys_cs *
radv_amdgpu_cs_create(struct radeon_winsys * ws,enum ring_type ring_type)160 radv_amdgpu_cs_create(struct radeon_winsys *ws,
161 		      enum ring_type ring_type)
162 {
163 	struct radv_amdgpu_cs *cs;
164 	uint32_t ib_size = 20 * 1024 * 4;
165 	cs = calloc(1, sizeof(struct radv_amdgpu_cs));
166 	if (!cs)
167 		return NULL;
168 
169 	cs->ws = radv_amdgpu_winsys(ws);
170 	radv_amdgpu_init_cs(cs, ring_type);
171 
172 	if (cs->ws->use_ib_bos) {
173 		cs->ib_buffer = ws->buffer_create(ws, ib_size, 0,
174 						RADEON_DOMAIN_GTT,
175 						RADEON_FLAG_CPU_ACCESS);
176 		if (!cs->ib_buffer) {
177 			free(cs);
178 			return NULL;
179 		}
180 
181 		cs->ib_mapped = ws->buffer_map(cs->ib_buffer);
182 		if (!cs->ib_mapped) {
183 			ws->buffer_destroy(cs->ib_buffer);
184 			free(cs);
185 			return NULL;
186 		}
187 
188 		cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
189 		cs->base.buf = (uint32_t *)cs->ib_mapped;
190 		cs->base.max_dw = ib_size / 4 - 4;
191 		cs->ib_size_ptr = &cs->ib.size;
192 		cs->ib.size = 0;
193 
194 		ws->cs_add_buffer(&cs->base, cs->ib_buffer, 8);
195 	} else {
196 		cs->base.buf = malloc(16384);
197 		cs->base.max_dw = 4096;
198 		if (!cs->base.buf) {
199 			free(cs);
200 			return NULL;
201 		}
202 	}
203 
204 	return &cs->base;
205 }
206 
radv_amdgpu_cs_grow(struct radeon_winsys_cs * _cs,size_t min_size)207 static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size)
208 {
209 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
210 
211 	if (cs->failed) {
212 		cs->base.cdw = 0;
213 		return;
214 	}
215 
216 	if (!cs->ws->use_ib_bos) {
217 		const uint64_t limit_dws = 0xffff8;
218 		uint64_t ib_dws = MAX2(cs->base.cdw + min_size,
219 				       MIN2(cs->base.max_dw * 2, limit_dws));
220 
221 		/* The total ib size cannot exceed limit_dws dwords. */
222 		if (ib_dws > limit_dws)
223 		{
224 			cs->failed = true;
225 			cs->base.cdw = 0;
226 			return;
227 		}
228 
229 		uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
230 		if (new_buf) {
231 			cs->base.buf = new_buf;
232 			cs->base.max_dw = ib_dws;
233 		} else {
234 			cs->failed = true;
235 			cs->base.cdw = 0;
236 		}
237 		return;
238 	}
239 
240 	uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2);
241 
242 	/* max that fits in the chain size field. */
243 	ib_size = MIN2(ib_size, 0xfffff);
244 
245 	while (!cs->base.cdw || (cs->base.cdw & 7) != 4)
246 		cs->base.buf[cs->base.cdw++] = 0xffff1000;
247 
248 	*cs->ib_size_ptr |= cs->base.cdw + 4;
249 
250 	if (cs->num_old_ib_buffers == cs->max_num_old_ib_buffers) {
251 		cs->max_num_old_ib_buffers = MAX2(1, cs->max_num_old_ib_buffers * 2);
252 		cs->old_ib_buffers = realloc(cs->old_ib_buffers,
253 					     cs->max_num_old_ib_buffers * sizeof(void*));
254 	}
255 
256 	cs->old_ib_buffers[cs->num_old_ib_buffers++] = cs->ib_buffer;
257 
258 	cs->ib_buffer = cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0,
259 						   RADEON_DOMAIN_GTT,
260 						   RADEON_FLAG_CPU_ACCESS);
261 
262 	if (!cs->ib_buffer) {
263 		cs->base.cdw = 0;
264 		cs->failed = true;
265 		cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
266 	}
267 
268 	cs->ib_mapped = cs->ws->base.buffer_map(cs->ib_buffer);
269 	if (!cs->ib_mapped) {
270 		cs->ws->base.buffer_destroy(cs->ib_buffer);
271 		cs->base.cdw = 0;
272 		cs->failed = true;
273 		cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
274 	}
275 
276 	cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
277 
278 	cs->base.buf[cs->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
279 	cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
280 	cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va >> 32;
281 	cs->ib_size_ptr = cs->base.buf + cs->base.cdw;
282 	cs->base.buf[cs->base.cdw++] = S_3F2_CHAIN(1) | S_3F2_VALID(1);
283 
284 	cs->base.buf = (uint32_t *)cs->ib_mapped;
285 	cs->base.cdw = 0;
286 	cs->base.max_dw = ib_size / 4 - 4;
287 
288 }
289 
radv_amdgpu_cs_finalize(struct radeon_winsys_cs * _cs)290 static bool radv_amdgpu_cs_finalize(struct radeon_winsys_cs *_cs)
291 {
292 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
293 
294 	if (cs->ws->use_ib_bos) {
295 		while (!cs->base.cdw || (cs->base.cdw & 7) != 0)
296 			cs->base.buf[cs->base.cdw++] = 0xffff1000;
297 
298 		*cs->ib_size_ptr |= cs->base.cdw;
299 
300 		cs->is_chained = false;
301 	}
302 
303 	return !cs->failed;
304 }
305 
radv_amdgpu_cs_reset(struct radeon_winsys_cs * _cs)306 static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
307 {
308 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
309 	cs->base.cdw = 0;
310 	cs->failed = false;
311 
312 	for (unsigned i = 0; i < cs->num_buffers; ++i) {
313 		unsigned hash = ((uintptr_t)cs->handles[i] >> 6) &
314 		                 (ARRAY_SIZE(cs->buffer_hash_table) - 1);
315 		cs->buffer_hash_table[hash] = -1;
316 	}
317 
318 	cs->num_buffers = 0;
319 
320 	if (cs->ws->use_ib_bos) {
321 		cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
322 
323 		for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
324 			cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
325 
326 		cs->num_old_ib_buffers = 0;
327 		cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
328 		cs->ib_size_ptr = &cs->ib.size;
329 		cs->ib.size = 0;
330 	}
331 }
332 
radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs * cs,amdgpu_bo_handle bo)333 static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs,
334 				      amdgpu_bo_handle bo)
335 {
336 	unsigned hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
337 	int index = cs->buffer_hash_table[hash];
338 
339 	if (index == -1)
340 		return -1;
341 
342 	if (cs->handles[index] == bo)
343 		return index;
344 
345 	for (unsigned i = 0; i < cs->num_buffers; ++i) {
346 		if (cs->handles[i] == bo) {
347 			cs->buffer_hash_table[hash] = i;
348 			return i;
349 		}
350 	}
351 
352 	return -1;
353 }
354 
radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs * cs,amdgpu_bo_handle bo,uint8_t priority)355 static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
356 					       amdgpu_bo_handle bo,
357 					       uint8_t priority)
358 {
359 	unsigned hash;
360 	int index = radv_amdgpu_cs_find_buffer(cs, bo);
361 
362 	if (index != -1) {
363 		cs->priorities[index] = MAX2(cs->priorities[index], priority);
364 		return;
365 	}
366 
367 	if (cs->num_buffers == cs->max_num_buffers) {
368 		unsigned new_count = MAX2(1, cs->max_num_buffers * 2);
369 		cs->handles = realloc(cs->handles, new_count * sizeof(amdgpu_bo_handle));
370 		cs->priorities = realloc(cs->priorities, new_count * sizeof(uint8_t));
371 		cs->max_num_buffers = new_count;
372 	}
373 
374 	cs->handles[cs->num_buffers] = bo;
375 	cs->priorities[cs->num_buffers] = priority;
376 
377 	hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
378 	cs->buffer_hash_table[hash] = cs->num_buffers;
379 
380 	++cs->num_buffers;
381 }
382 
radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs * _cs,struct radeon_winsys_bo * _bo,uint8_t priority)383 static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
384 				 struct radeon_winsys_bo *_bo,
385 				 uint8_t priority)
386 {
387 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
388 	struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
389 
390 	radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority);
391 }
392 
radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs * _parent,struct radeon_winsys_cs * _child)393 static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
394 					     struct radeon_winsys_cs *_child)
395 {
396 	struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent);
397 	struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child);
398 
399 	for (unsigned i = 0; i < child->num_buffers; ++i) {
400 		radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i],
401 						   child->priorities[i]);
402 	}
403 
404 	if (parent->ws->use_ib_bos) {
405 		if (parent->base.cdw + 4 > parent->base.max_dw)
406 			radv_amdgpu_cs_grow(&parent->base, 4);
407 
408 		parent->base.buf[parent->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
409 		parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address;
410 		parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address >> 32;
411 		parent->base.buf[parent->base.cdw++] = child->ib.size;
412 	} else {
413 		if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
414 			radv_amdgpu_cs_grow(&parent->base, child->base.cdw);
415 
416 		memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw);
417 		parent->base.cdw += child->base.cdw;
418 	}
419 }
420 
radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys * ws,struct radeon_winsys_cs ** cs_array,unsigned count,struct radv_amdgpu_winsys_bo * extra_bo,amdgpu_bo_list_handle * bo_list)421 static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
422 				      struct radeon_winsys_cs **cs_array,
423 				      unsigned count,
424 				      struct radv_amdgpu_winsys_bo *extra_bo,
425 				      amdgpu_bo_list_handle *bo_list)
426 {
427 	int r;
428 	if (ws->debug_all_bos) {
429 		struct radv_amdgpu_winsys_bo *bo;
430 		amdgpu_bo_handle *handles;
431 		unsigned num = 0;
432 
433 		pthread_mutex_lock(&ws->global_bo_list_lock);
434 
435 		handles = malloc(sizeof(handles[0]) * ws->num_buffers);
436 		if (!handles) {
437 			pthread_mutex_unlock(&ws->global_bo_list_lock);
438 			return -ENOMEM;
439 		}
440 
441 		LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) {
442 			assert(num < ws->num_buffers);
443 			handles[num++] = bo->bo;
444 		}
445 
446 		r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
447 					  handles, NULL,
448 					  bo_list);
449 		free(handles);
450 		pthread_mutex_unlock(&ws->global_bo_list_lock);
451 	} else if (count == 1 && !extra_bo) {
452 		struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
453 		r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
454 					  cs->priorities, bo_list);
455 	} else {
456 		unsigned total_buffer_count = !!extra_bo;
457 		unsigned unique_bo_count = !!extra_bo;
458 		for (unsigned i = 0; i < count; ++i) {
459 			struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
460 			total_buffer_count += cs->num_buffers;
461 		}
462 
463 		amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count);
464 		uint8_t *priorities = malloc(sizeof(uint8_t) * total_buffer_count);
465 		if (!handles || !priorities) {
466 			free(handles);
467 			free(priorities);
468 			return -ENOMEM;
469 		}
470 
471 		if (extra_bo) {
472 			handles[0] = extra_bo->bo;
473 			priorities[0] = 8;
474 		}
475 
476 		for (unsigned i = 0; i < count; ++i) {
477 			struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
478 			for (unsigned j = 0; j < cs->num_buffers; ++j) {
479 				bool found = false;
480 				for (unsigned k = 0; k < unique_bo_count; ++k) {
481 					if (handles[k] == cs->handles[j]) {
482 						found = true;
483 						priorities[k] = MAX2(priorities[k],
484 								     cs->priorities[j]);
485 						break;
486 					}
487 				}
488 				if (!found) {
489 					handles[unique_bo_count] = cs->handles[j];
490 					priorities[unique_bo_count] = cs->priorities[j];
491 					++unique_bo_count;
492 				}
493 			}
494 		}
495 		r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles,
496 					  priorities, bo_list);
497 
498 		free(handles);
499 		free(priorities);
500 	}
501 
502 	return r;
503 }
504 
radv_assign_last_submit(struct radv_amdgpu_ctx * ctx,struct amdgpu_cs_request * request)505 static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
506 				    struct amdgpu_cs_request *request)
507 {
508 	radv_amdgpu_request_to_fence(ctx,
509 	                             &ctx->last_submission[request->ip_type][request->ring],
510 	                             request);
511 }
512 
radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx * _ctx,int queue_idx,struct radeon_winsys_cs ** cs_array,unsigned cs_count,struct radeon_winsys_fence * _fence)513 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
514 						int queue_idx,
515 						struct radeon_winsys_cs **cs_array,
516 						unsigned cs_count,
517 						struct radeon_winsys_fence *_fence)
518 {
519 	int r;
520 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
521 	struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
522 	struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
523 	amdgpu_bo_list_handle bo_list;
524 	struct amdgpu_cs_request request = {0};
525 
526 	for (unsigned i = cs_count; i--;) {
527 		struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
528 
529 		if (cs->is_chained) {
530 			*cs->ib_size_ptr -= 4;
531 			cs->is_chained = false;
532 		}
533 
534 		if (i + 1 < cs_count) {
535 			struct radv_amdgpu_cs *next = radv_amdgpu_cs(cs_array[i + 1]);
536 			assert(cs->base.cdw + 4 <= cs->base.max_dw);
537 
538 			cs->is_chained = true;
539 			*cs->ib_size_ptr += 4;
540 
541 			cs->base.buf[cs->base.cdw + 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
542 			cs->base.buf[cs->base.cdw + 1] = next->ib.ib_mc_address;
543 			cs->base.buf[cs->base.cdw + 2] = next->ib.ib_mc_address >> 32;
544 			cs->base.buf[cs->base.cdw + 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next->ib.size;
545 		}
546 	}
547 
548 	r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, &bo_list);
549 	if (r) {
550 		fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
551 		return r;
552 	}
553 
554 	request.ip_type = cs0->hw_ip;
555 	request.ring = queue_idx;
556 	request.number_of_ibs = 1;
557 	request.ibs = &cs0->ib;
558 	request.resources = bo_list;
559 
560 	r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
561 	if (r) {
562 		if (r == -ENOMEM)
563 			fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
564 		else
565 			fprintf(stderr, "amdgpu: The CS has been rejected, "
566 					"see dmesg for more information.\n");
567 	}
568 
569 	amdgpu_bo_list_destroy(bo_list);
570 
571 	if (fence)
572 		radv_amdgpu_request_to_fence(ctx, fence, &request);
573 
574 	radv_assign_last_submit(ctx, &request);
575 
576 	return r;
577 }
578 
radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx * _ctx,int queue_idx,struct radeon_winsys_cs ** cs_array,unsigned cs_count,struct radeon_winsys_fence * _fence)579 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
580 						 int queue_idx,
581 						 struct radeon_winsys_cs **cs_array,
582 						 unsigned cs_count,
583 						 struct radeon_winsys_fence *_fence)
584 {
585 	int r;
586 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
587 	struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
588 	amdgpu_bo_list_handle bo_list;
589 	struct amdgpu_cs_request request;
590 
591 	assert(cs_count);
592 
593 	for (unsigned i = 0; i < cs_count;) {
594 		struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
595 		struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
596 		unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT, cs_count - i);
597 
598 		memset(&request, 0, sizeof(request));
599 
600 		r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, &bo_list);
601 		if (r) {
602 			fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
603 			return r;
604 		}
605 
606 		request.ip_type = cs0->hw_ip;
607 		request.ring = queue_idx;
608 		request.resources = bo_list;
609 		request.number_of_ibs = cnt;
610 		request.ibs = ibs;
611 
612 		for (unsigned j = 0; j < cnt; ++j) {
613 			struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
614 			ibs[j] = cs->ib;
615 
616 			if (cs->is_chained) {
617 				*cs->ib_size_ptr -= 4;
618 				cs->is_chained = false;
619 			}
620 		}
621 
622 		r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
623 		if (r) {
624 			if (r == -ENOMEM)
625 				fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
626 			else
627 				fprintf(stderr, "amdgpu: The CS has been rejected, "
628 						"see dmesg for more information.\n");
629 		}
630 
631 		amdgpu_bo_list_destroy(bo_list);
632 
633 		if (r)
634 			return r;
635 
636 		i += cnt;
637 	}
638 	if (fence)
639 		radv_amdgpu_request_to_fence(ctx, fence, &request);
640 
641 	radv_assign_last_submit(ctx, &request);
642 
643 	return 0;
644 }
645 
radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx * _ctx,int queue_idx,struct radeon_winsys_cs ** cs_array,unsigned cs_count,struct radeon_winsys_fence * _fence)646 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
647 					       int queue_idx,
648 					       struct radeon_winsys_cs **cs_array,
649 					       unsigned cs_count,
650 					       struct radeon_winsys_fence *_fence)
651 {
652 	int r;
653 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
654 	struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
655 	struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
656 	struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws;
657 	amdgpu_bo_list_handle bo_list;
658 	struct amdgpu_cs_request request;
659 	uint32_t pad_word = 0xffff1000U;
660 
661 	if (radv_amdgpu_winsys(ws)->family == FAMILY_SI)
662 		pad_word = 0x80000000;
663 
664 	assert(cs_count);
665 
666 	for (unsigned i = 0; i < cs_count;) {
667 		struct amdgpu_cs_ib_info ib = {0};
668 		struct radeon_winsys_bo *bo = NULL;
669 		uint32_t *ptr;
670 		unsigned cnt = 0;
671 		unsigned size = 0;
672 
673 		while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
674 			size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
675 			++cnt;
676 		}
677 
678 		assert(cnt);
679 
680 		bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
681 		ptr = ws->buffer_map(bo);
682 
683 		for (unsigned j = 0; j < cnt; ++j) {
684 			struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
685 			memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
686 			ptr += cs->base.cdw;
687 
688 		}
689 
690 		while(!size || (size & 7)) {
691 			*ptr++ = pad_word;
692 			++size;
693 		}
694 
695 		memset(&request, 0, sizeof(request));
696 
697 
698 		r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
699 		                               (struct radv_amdgpu_winsys_bo*)bo, &bo_list);
700 		if (r) {
701 			fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
702 			return r;
703 		}
704 
705 		ib.size = size;
706 		ib.ib_mc_address = ws->buffer_get_va(bo);
707 
708 		request.ip_type = cs0->hw_ip;
709 		request.ring = queue_idx;
710 		request.resources = bo_list;
711 		request.number_of_ibs = 1;
712 		request.ibs = &ib;
713 
714 		r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
715 		if (r) {
716 			if (r == -ENOMEM)
717 				fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
718 			else
719 				fprintf(stderr, "amdgpu: The CS has been rejected, "
720 						"see dmesg for more information.\n");
721 		}
722 
723 		amdgpu_bo_list_destroy(bo_list);
724 
725 		ws->buffer_destroy(bo);
726 		if (r)
727 			return r;
728 
729 		i += cnt;
730 	}
731 	if (fence)
732 		radv_amdgpu_request_to_fence(ctx, fence, &request);
733 
734 	radv_assign_last_submit(ctx, &request);
735 
736 	return 0;
737 }
738 
radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx * _ctx,int queue_idx,struct radeon_winsys_cs ** cs_array,unsigned cs_count,struct radeon_winsys_sem ** wait_sem,unsigned wait_sem_count,struct radeon_winsys_sem ** signal_sem,unsigned signal_sem_count,bool can_patch,struct radeon_winsys_fence * _fence)739 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
740 					int queue_idx,
741 					struct radeon_winsys_cs **cs_array,
742 					unsigned cs_count,
743 					struct radeon_winsys_sem **wait_sem,
744 					unsigned wait_sem_count,
745 					struct radeon_winsys_sem **signal_sem,
746 					unsigned signal_sem_count,
747 					bool can_patch,
748 					struct radeon_winsys_fence *_fence)
749 {
750 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
751 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
752 	int ret;
753 	int i;
754 
755 	for (i = 0; i < wait_sem_count; i++) {
756 		amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)wait_sem[i];
757 		amdgpu_cs_wait_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx,
758 					 sem);
759 	}
760 	if (!cs->ws->use_ib_bos) {
761 		ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array,
762 							   cs_count, _fence);
763 	} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
764 		ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array,
765 							    cs_count, _fence);
766 	} else {
767 		ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, cs_array,
768 							     cs_count, _fence);
769 	}
770 
771 	for (i = 0; i < signal_sem_count; i++) {
772 		amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)signal_sem[i];
773 		amdgpu_cs_signal_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx,
774 					   sem);
775 	}
776 	return ret;
777 }
778 
779 
radv_amdgpu_winsys_get_cpu_addr(void * _cs,uint64_t addr)780 static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
781 {
782 	struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
783 	void *ret = NULL;
784 	for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
785 		struct radv_amdgpu_winsys_bo *bo;
786 
787 		bo = (struct radv_amdgpu_winsys_bo*)
788 		       (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
789 		if (addr >= bo->va && addr - bo->va < bo->size) {
790 			if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
791 				return (char *)ret + (addr - bo->va);
792 		}
793 	}
794 	return ret;
795 }
796 
radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs * _cs,FILE * file,uint32_t trace_id)797 static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
798                                        FILE* file,
799                                        uint32_t trace_id)
800 {
801 	struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
802 
803 	ac_parse_ib(file,
804 		    radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
805 		    cs->ib.size, trace_id,  "main IB", cs->ws->info.chip_class,
806 		    radv_amdgpu_winsys_get_cpu_addr, cs);
807 }
808 
radv_amdgpu_ctx_create(struct radeon_winsys * _ws)809 static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
810 {
811 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
812 	struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx);
813 	int r;
814 
815 	if (!ctx)
816 		return NULL;
817 	r = amdgpu_cs_ctx_create(ws->dev, &ctx->ctx);
818 	if (r) {
819 		fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create failed. (%i)\n", r);
820 		goto error_create;
821 	}
822 	ctx->ws = ws;
823 	return (struct radeon_winsys_ctx *)ctx;
824 error_create:
825 	FREE(ctx);
826 	return NULL;
827 }
828 
radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx * rwctx)829 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
830 {
831 	struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
832 	amdgpu_cs_ctx_free(ctx->ctx);
833 	FREE(ctx);
834 }
835 
radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx * rwctx,enum ring_type ring_type,int ring_index)836 static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
837                                       enum ring_type ring_type, int ring_index)
838 {
839 	struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
840 	int ip_type = ring_to_hw_ip(ring_type);
841 
842 	if (ctx->last_submission[ip_type][ring_index].fence) {
843 		uint32_t expired;
844 		int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index],
845 		                                       1000000000ull, 0, &expired);
846 
847 		if (ret || !expired)
848 			return false;
849 	}
850 
851 	return true;
852 }
853 
radv_amdgpu_create_sem(struct radeon_winsys * _ws)854 static struct radeon_winsys_sem *radv_amdgpu_create_sem(struct radeon_winsys *_ws)
855 {
856 	int ret;
857 	amdgpu_semaphore_handle sem;
858 
859 	ret = amdgpu_cs_create_semaphore(&sem);
860 	if (ret)
861 		return NULL;
862 	return (struct radeon_winsys_sem *)sem;
863 }
864 
radv_amdgpu_destroy_sem(struct radeon_winsys_sem * _sem)865 static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem *_sem)
866 {
867 	amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)_sem;
868 	amdgpu_cs_destroy_semaphore(sem);
869 }
870 
radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys * ws)871 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
872 {
873 	ws->base.ctx_create = radv_amdgpu_ctx_create;
874 	ws->base.ctx_destroy = radv_amdgpu_ctx_destroy;
875 	ws->base.ctx_wait_idle = radv_amdgpu_ctx_wait_idle;
876 	ws->base.cs_create = radv_amdgpu_cs_create;
877 	ws->base.cs_destroy = radv_amdgpu_cs_destroy;
878 	ws->base.cs_grow = radv_amdgpu_cs_grow;
879 	ws->base.cs_finalize = radv_amdgpu_cs_finalize;
880 	ws->base.cs_reset = radv_amdgpu_cs_reset;
881 	ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
882 	ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
883 	ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
884 	ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
885 	ws->base.create_fence = radv_amdgpu_create_fence;
886 	ws->base.destroy_fence = radv_amdgpu_destroy_fence;
887 	ws->base.create_sem = radv_amdgpu_create_sem;
888 	ws->base.destroy_sem = radv_amdgpu_destroy_sem;
889 	ws->base.fence_wait = radv_amdgpu_fence_wait;
890 }
891