1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include <stdlib.h>
26 #include <amdgpu.h>
27 #include <amdgpu_drm.h>
28 #include <assert.h>
29
30 #include "ac_debug.h"
31 #include "amdgpu_id.h"
32 #include "radv_radeon_winsys.h"
33 #include "radv_amdgpu_cs.h"
34 #include "radv_amdgpu_bo.h"
35 #include "sid.h"
36
37 struct radv_amdgpu_cs {
38 struct radeon_winsys_cs base;
39 struct radv_amdgpu_winsys *ws;
40
41 struct amdgpu_cs_ib_info ib;
42
43 struct radeon_winsys_bo *ib_buffer;
44 uint8_t *ib_mapped;
45 unsigned max_num_buffers;
46 unsigned num_buffers;
47 amdgpu_bo_handle *handles;
48 uint8_t *priorities;
49
50 struct radeon_winsys_bo **old_ib_buffers;
51 unsigned num_old_ib_buffers;
52 unsigned max_num_old_ib_buffers;
53 unsigned *ib_size_ptr;
54 bool failed;
55 bool is_chained;
56
57 int buffer_hash_table[1024];
58 unsigned hw_ip;
59 };
60
61 static inline struct radv_amdgpu_cs *
radv_amdgpu_cs(struct radeon_winsys_cs * base)62 radv_amdgpu_cs(struct radeon_winsys_cs *base)
63 {
64 return (struct radv_amdgpu_cs*)base;
65 }
66
ring_to_hw_ip(enum ring_type ring)67 static int ring_to_hw_ip(enum ring_type ring)
68 {
69 switch (ring) {
70 case RING_GFX:
71 return AMDGPU_HW_IP_GFX;
72 case RING_DMA:
73 return AMDGPU_HW_IP_DMA;
74 case RING_COMPUTE:
75 return AMDGPU_HW_IP_COMPUTE;
76 default:
77 unreachable("unsupported ring");
78 }
79 }
80
radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx * ctx,struct amdgpu_cs_fence * fence,struct amdgpu_cs_request * req)81 static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
82 struct amdgpu_cs_fence *fence,
83 struct amdgpu_cs_request *req)
84 {
85 fence->context = ctx->ctx;
86 fence->ip_type = req->ip_type;
87 fence->ip_instance = req->ip_instance;
88 fence->ring = req->ring;
89 fence->fence = req->seq_no;
90 }
91
radv_amdgpu_create_fence()92 static struct radeon_winsys_fence *radv_amdgpu_create_fence()
93 {
94 struct radv_amdgpu_cs_fence *fence = calloc(1, sizeof(struct amdgpu_cs_fence));
95 return (struct radeon_winsys_fence*)fence;
96 }
97
radv_amdgpu_destroy_fence(struct radeon_winsys_fence * _fence)98 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence *_fence)
99 {
100 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
101 free(fence);
102 }
103
radv_amdgpu_fence_wait(struct radeon_winsys * _ws,struct radeon_winsys_fence * _fence,bool absolute,uint64_t timeout)104 static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws,
105 struct radeon_winsys_fence *_fence,
106 bool absolute,
107 uint64_t timeout)
108 {
109 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
110 unsigned flags = absolute ? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE : 0;
111 int r;
112 uint32_t expired = 0;
113
114 /* Now use the libdrm query. */
115 r = amdgpu_cs_query_fence_status(fence,
116 timeout,
117 flags,
118 &expired);
119
120 if (r) {
121 fprintf(stderr, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n");
122 return false;
123 }
124
125 if (expired)
126 return true;
127
128 return false;
129 }
130
radv_amdgpu_cs_destroy(struct radeon_winsys_cs * rcs)131 static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
132 {
133 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
134
135 if (cs->ib_buffer)
136 cs->ws->base.buffer_destroy(cs->ib_buffer);
137 else
138 free(cs->base.buf);
139
140 for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
141 cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
142
143 free(cs->old_ib_buffers);
144 free(cs->handles);
145 free(cs->priorities);
146 free(cs);
147 }
148
radv_amdgpu_init_cs(struct radv_amdgpu_cs * cs,enum ring_type ring_type)149 static boolean radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs,
150 enum ring_type ring_type)
151 {
152 for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i)
153 cs->buffer_hash_table[i] = -1;
154
155 cs->hw_ip = ring_to_hw_ip(ring_type);
156 return true;
157 }
158
159 static struct radeon_winsys_cs *
radv_amdgpu_cs_create(struct radeon_winsys * ws,enum ring_type ring_type)160 radv_amdgpu_cs_create(struct radeon_winsys *ws,
161 enum ring_type ring_type)
162 {
163 struct radv_amdgpu_cs *cs;
164 uint32_t ib_size = 20 * 1024 * 4;
165 cs = calloc(1, sizeof(struct radv_amdgpu_cs));
166 if (!cs)
167 return NULL;
168
169 cs->ws = radv_amdgpu_winsys(ws);
170 radv_amdgpu_init_cs(cs, ring_type);
171
172 if (cs->ws->use_ib_bos) {
173 cs->ib_buffer = ws->buffer_create(ws, ib_size, 0,
174 RADEON_DOMAIN_GTT,
175 RADEON_FLAG_CPU_ACCESS);
176 if (!cs->ib_buffer) {
177 free(cs);
178 return NULL;
179 }
180
181 cs->ib_mapped = ws->buffer_map(cs->ib_buffer);
182 if (!cs->ib_mapped) {
183 ws->buffer_destroy(cs->ib_buffer);
184 free(cs);
185 return NULL;
186 }
187
188 cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
189 cs->base.buf = (uint32_t *)cs->ib_mapped;
190 cs->base.max_dw = ib_size / 4 - 4;
191 cs->ib_size_ptr = &cs->ib.size;
192 cs->ib.size = 0;
193
194 ws->cs_add_buffer(&cs->base, cs->ib_buffer, 8);
195 } else {
196 cs->base.buf = malloc(16384);
197 cs->base.max_dw = 4096;
198 if (!cs->base.buf) {
199 free(cs);
200 return NULL;
201 }
202 }
203
204 return &cs->base;
205 }
206
radv_amdgpu_cs_grow(struct radeon_winsys_cs * _cs,size_t min_size)207 static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size)
208 {
209 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
210
211 if (cs->failed) {
212 cs->base.cdw = 0;
213 return;
214 }
215
216 if (!cs->ws->use_ib_bos) {
217 const uint64_t limit_dws = 0xffff8;
218 uint64_t ib_dws = MAX2(cs->base.cdw + min_size,
219 MIN2(cs->base.max_dw * 2, limit_dws));
220
221 /* The total ib size cannot exceed limit_dws dwords. */
222 if (ib_dws > limit_dws)
223 {
224 cs->failed = true;
225 cs->base.cdw = 0;
226 return;
227 }
228
229 uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
230 if (new_buf) {
231 cs->base.buf = new_buf;
232 cs->base.max_dw = ib_dws;
233 } else {
234 cs->failed = true;
235 cs->base.cdw = 0;
236 }
237 return;
238 }
239
240 uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2);
241
242 /* max that fits in the chain size field. */
243 ib_size = MIN2(ib_size, 0xfffff);
244
245 while (!cs->base.cdw || (cs->base.cdw & 7) != 4)
246 cs->base.buf[cs->base.cdw++] = 0xffff1000;
247
248 *cs->ib_size_ptr |= cs->base.cdw + 4;
249
250 if (cs->num_old_ib_buffers == cs->max_num_old_ib_buffers) {
251 cs->max_num_old_ib_buffers = MAX2(1, cs->max_num_old_ib_buffers * 2);
252 cs->old_ib_buffers = realloc(cs->old_ib_buffers,
253 cs->max_num_old_ib_buffers * sizeof(void*));
254 }
255
256 cs->old_ib_buffers[cs->num_old_ib_buffers++] = cs->ib_buffer;
257
258 cs->ib_buffer = cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0,
259 RADEON_DOMAIN_GTT,
260 RADEON_FLAG_CPU_ACCESS);
261
262 if (!cs->ib_buffer) {
263 cs->base.cdw = 0;
264 cs->failed = true;
265 cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
266 }
267
268 cs->ib_mapped = cs->ws->base.buffer_map(cs->ib_buffer);
269 if (!cs->ib_mapped) {
270 cs->ws->base.buffer_destroy(cs->ib_buffer);
271 cs->base.cdw = 0;
272 cs->failed = true;
273 cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
274 }
275
276 cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
277
278 cs->base.buf[cs->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
279 cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
280 cs->base.buf[cs->base.cdw++] = radv_amdgpu_winsys_bo(cs->ib_buffer)->va >> 32;
281 cs->ib_size_ptr = cs->base.buf + cs->base.cdw;
282 cs->base.buf[cs->base.cdw++] = S_3F2_CHAIN(1) | S_3F2_VALID(1);
283
284 cs->base.buf = (uint32_t *)cs->ib_mapped;
285 cs->base.cdw = 0;
286 cs->base.max_dw = ib_size / 4 - 4;
287
288 }
289
radv_amdgpu_cs_finalize(struct radeon_winsys_cs * _cs)290 static bool radv_amdgpu_cs_finalize(struct radeon_winsys_cs *_cs)
291 {
292 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
293
294 if (cs->ws->use_ib_bos) {
295 while (!cs->base.cdw || (cs->base.cdw & 7) != 0)
296 cs->base.buf[cs->base.cdw++] = 0xffff1000;
297
298 *cs->ib_size_ptr |= cs->base.cdw;
299
300 cs->is_chained = false;
301 }
302
303 return !cs->failed;
304 }
305
radv_amdgpu_cs_reset(struct radeon_winsys_cs * _cs)306 static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
307 {
308 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
309 cs->base.cdw = 0;
310 cs->failed = false;
311
312 for (unsigned i = 0; i < cs->num_buffers; ++i) {
313 unsigned hash = ((uintptr_t)cs->handles[i] >> 6) &
314 (ARRAY_SIZE(cs->buffer_hash_table) - 1);
315 cs->buffer_hash_table[hash] = -1;
316 }
317
318 cs->num_buffers = 0;
319
320 if (cs->ws->use_ib_bos) {
321 cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer, 8);
322
323 for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
324 cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
325
326 cs->num_old_ib_buffers = 0;
327 cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->va;
328 cs->ib_size_ptr = &cs->ib.size;
329 cs->ib.size = 0;
330 }
331 }
332
radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs * cs,amdgpu_bo_handle bo)333 static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs,
334 amdgpu_bo_handle bo)
335 {
336 unsigned hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
337 int index = cs->buffer_hash_table[hash];
338
339 if (index == -1)
340 return -1;
341
342 if (cs->handles[index] == bo)
343 return index;
344
345 for (unsigned i = 0; i < cs->num_buffers; ++i) {
346 if (cs->handles[i] == bo) {
347 cs->buffer_hash_table[hash] = i;
348 return i;
349 }
350 }
351
352 return -1;
353 }
354
radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs * cs,amdgpu_bo_handle bo,uint8_t priority)355 static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
356 amdgpu_bo_handle bo,
357 uint8_t priority)
358 {
359 unsigned hash;
360 int index = radv_amdgpu_cs_find_buffer(cs, bo);
361
362 if (index != -1) {
363 cs->priorities[index] = MAX2(cs->priorities[index], priority);
364 return;
365 }
366
367 if (cs->num_buffers == cs->max_num_buffers) {
368 unsigned new_count = MAX2(1, cs->max_num_buffers * 2);
369 cs->handles = realloc(cs->handles, new_count * sizeof(amdgpu_bo_handle));
370 cs->priorities = realloc(cs->priorities, new_count * sizeof(uint8_t));
371 cs->max_num_buffers = new_count;
372 }
373
374 cs->handles[cs->num_buffers] = bo;
375 cs->priorities[cs->num_buffers] = priority;
376
377 hash = ((uintptr_t)bo >> 6) & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
378 cs->buffer_hash_table[hash] = cs->num_buffers;
379
380 ++cs->num_buffers;
381 }
382
radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs * _cs,struct radeon_winsys_bo * _bo,uint8_t priority)383 static void radv_amdgpu_cs_add_buffer(struct radeon_winsys_cs *_cs,
384 struct radeon_winsys_bo *_bo,
385 uint8_t priority)
386 {
387 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
388 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
389
390 radv_amdgpu_cs_add_buffer_internal(cs, bo->bo, priority);
391 }
392
radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs * _parent,struct radeon_winsys_cs * _child)393 static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
394 struct radeon_winsys_cs *_child)
395 {
396 struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent);
397 struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child);
398
399 for (unsigned i = 0; i < child->num_buffers; ++i) {
400 radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i],
401 child->priorities[i]);
402 }
403
404 if (parent->ws->use_ib_bos) {
405 if (parent->base.cdw + 4 > parent->base.max_dw)
406 radv_amdgpu_cs_grow(&parent->base, 4);
407
408 parent->base.buf[parent->base.cdw++] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
409 parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address;
410 parent->base.buf[parent->base.cdw++] = child->ib.ib_mc_address >> 32;
411 parent->base.buf[parent->base.cdw++] = child->ib.size;
412 } else {
413 if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
414 radv_amdgpu_cs_grow(&parent->base, child->base.cdw);
415
416 memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw);
417 parent->base.cdw += child->base.cdw;
418 }
419 }
420
radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys * ws,struct radeon_winsys_cs ** cs_array,unsigned count,struct radv_amdgpu_winsys_bo * extra_bo,amdgpu_bo_list_handle * bo_list)421 static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
422 struct radeon_winsys_cs **cs_array,
423 unsigned count,
424 struct radv_amdgpu_winsys_bo *extra_bo,
425 amdgpu_bo_list_handle *bo_list)
426 {
427 int r;
428 if (ws->debug_all_bos) {
429 struct radv_amdgpu_winsys_bo *bo;
430 amdgpu_bo_handle *handles;
431 unsigned num = 0;
432
433 pthread_mutex_lock(&ws->global_bo_list_lock);
434
435 handles = malloc(sizeof(handles[0]) * ws->num_buffers);
436 if (!handles) {
437 pthread_mutex_unlock(&ws->global_bo_list_lock);
438 return -ENOMEM;
439 }
440
441 LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, global_list_item) {
442 assert(num < ws->num_buffers);
443 handles[num++] = bo->bo;
444 }
445
446 r = amdgpu_bo_list_create(ws->dev, ws->num_buffers,
447 handles, NULL,
448 bo_list);
449 free(handles);
450 pthread_mutex_unlock(&ws->global_bo_list_lock);
451 } else if (count == 1 && !extra_bo) {
452 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
453 r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
454 cs->priorities, bo_list);
455 } else {
456 unsigned total_buffer_count = !!extra_bo;
457 unsigned unique_bo_count = !!extra_bo;
458 for (unsigned i = 0; i < count; ++i) {
459 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
460 total_buffer_count += cs->num_buffers;
461 }
462
463 amdgpu_bo_handle *handles = malloc(sizeof(amdgpu_bo_handle) * total_buffer_count);
464 uint8_t *priorities = malloc(sizeof(uint8_t) * total_buffer_count);
465 if (!handles || !priorities) {
466 free(handles);
467 free(priorities);
468 return -ENOMEM;
469 }
470
471 if (extra_bo) {
472 handles[0] = extra_bo->bo;
473 priorities[0] = 8;
474 }
475
476 for (unsigned i = 0; i < count; ++i) {
477 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
478 for (unsigned j = 0; j < cs->num_buffers; ++j) {
479 bool found = false;
480 for (unsigned k = 0; k < unique_bo_count; ++k) {
481 if (handles[k] == cs->handles[j]) {
482 found = true;
483 priorities[k] = MAX2(priorities[k],
484 cs->priorities[j]);
485 break;
486 }
487 }
488 if (!found) {
489 handles[unique_bo_count] = cs->handles[j];
490 priorities[unique_bo_count] = cs->priorities[j];
491 ++unique_bo_count;
492 }
493 }
494 }
495 r = amdgpu_bo_list_create(ws->dev, unique_bo_count, handles,
496 priorities, bo_list);
497
498 free(handles);
499 free(priorities);
500 }
501
502 return r;
503 }
504
radv_assign_last_submit(struct radv_amdgpu_ctx * ctx,struct amdgpu_cs_request * request)505 static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
506 struct amdgpu_cs_request *request)
507 {
508 radv_amdgpu_request_to_fence(ctx,
509 &ctx->last_submission[request->ip_type][request->ring],
510 request);
511 }
512
radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx * _ctx,int queue_idx,struct radeon_winsys_cs ** cs_array,unsigned cs_count,struct radeon_winsys_fence * _fence)513 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
514 int queue_idx,
515 struct radeon_winsys_cs **cs_array,
516 unsigned cs_count,
517 struct radeon_winsys_fence *_fence)
518 {
519 int r;
520 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
521 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
522 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
523 amdgpu_bo_list_handle bo_list;
524 struct amdgpu_cs_request request = {0};
525
526 for (unsigned i = cs_count; i--;) {
527 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
528
529 if (cs->is_chained) {
530 *cs->ib_size_ptr -= 4;
531 cs->is_chained = false;
532 }
533
534 if (i + 1 < cs_count) {
535 struct radv_amdgpu_cs *next = radv_amdgpu_cs(cs_array[i + 1]);
536 assert(cs->base.cdw + 4 <= cs->base.max_dw);
537
538 cs->is_chained = true;
539 *cs->ib_size_ptr += 4;
540
541 cs->base.buf[cs->base.cdw + 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
542 cs->base.buf[cs->base.cdw + 1] = next->ib.ib_mc_address;
543 cs->base.buf[cs->base.cdw + 2] = next->ib.ib_mc_address >> 32;
544 cs->base.buf[cs->base.cdw + 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next->ib.size;
545 }
546 }
547
548 r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, &bo_list);
549 if (r) {
550 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
551 return r;
552 }
553
554 request.ip_type = cs0->hw_ip;
555 request.ring = queue_idx;
556 request.number_of_ibs = 1;
557 request.ibs = &cs0->ib;
558 request.resources = bo_list;
559
560 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
561 if (r) {
562 if (r == -ENOMEM)
563 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
564 else
565 fprintf(stderr, "amdgpu: The CS has been rejected, "
566 "see dmesg for more information.\n");
567 }
568
569 amdgpu_bo_list_destroy(bo_list);
570
571 if (fence)
572 radv_amdgpu_request_to_fence(ctx, fence, &request);
573
574 radv_assign_last_submit(ctx, &request);
575
576 return r;
577 }
578
radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx * _ctx,int queue_idx,struct radeon_winsys_cs ** cs_array,unsigned cs_count,struct radeon_winsys_fence * _fence)579 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
580 int queue_idx,
581 struct radeon_winsys_cs **cs_array,
582 unsigned cs_count,
583 struct radeon_winsys_fence *_fence)
584 {
585 int r;
586 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
587 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
588 amdgpu_bo_list_handle bo_list;
589 struct amdgpu_cs_request request;
590
591 assert(cs_count);
592
593 for (unsigned i = 0; i < cs_count;) {
594 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
595 struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
596 unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT, cs_count - i);
597
598 memset(&request, 0, sizeof(request));
599
600 r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, &bo_list);
601 if (r) {
602 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
603 return r;
604 }
605
606 request.ip_type = cs0->hw_ip;
607 request.ring = queue_idx;
608 request.resources = bo_list;
609 request.number_of_ibs = cnt;
610 request.ibs = ibs;
611
612 for (unsigned j = 0; j < cnt; ++j) {
613 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
614 ibs[j] = cs->ib;
615
616 if (cs->is_chained) {
617 *cs->ib_size_ptr -= 4;
618 cs->is_chained = false;
619 }
620 }
621
622 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
623 if (r) {
624 if (r == -ENOMEM)
625 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
626 else
627 fprintf(stderr, "amdgpu: The CS has been rejected, "
628 "see dmesg for more information.\n");
629 }
630
631 amdgpu_bo_list_destroy(bo_list);
632
633 if (r)
634 return r;
635
636 i += cnt;
637 }
638 if (fence)
639 radv_amdgpu_request_to_fence(ctx, fence, &request);
640
641 radv_assign_last_submit(ctx, &request);
642
643 return 0;
644 }
645
radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx * _ctx,int queue_idx,struct radeon_winsys_cs ** cs_array,unsigned cs_count,struct radeon_winsys_fence * _fence)646 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
647 int queue_idx,
648 struct radeon_winsys_cs **cs_array,
649 unsigned cs_count,
650 struct radeon_winsys_fence *_fence)
651 {
652 int r;
653 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
654 struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
655 struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
656 struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws;
657 amdgpu_bo_list_handle bo_list;
658 struct amdgpu_cs_request request;
659 uint32_t pad_word = 0xffff1000U;
660
661 if (radv_amdgpu_winsys(ws)->family == FAMILY_SI)
662 pad_word = 0x80000000;
663
664 assert(cs_count);
665
666 for (unsigned i = 0; i < cs_count;) {
667 struct amdgpu_cs_ib_info ib = {0};
668 struct radeon_winsys_bo *bo = NULL;
669 uint32_t *ptr;
670 unsigned cnt = 0;
671 unsigned size = 0;
672
673 while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
674 size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
675 ++cnt;
676 }
677
678 assert(cnt);
679
680 bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
681 ptr = ws->buffer_map(bo);
682
683 for (unsigned j = 0; j < cnt; ++j) {
684 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
685 memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
686 ptr += cs->base.cdw;
687
688 }
689
690 while(!size || (size & 7)) {
691 *ptr++ = pad_word;
692 ++size;
693 }
694
695 memset(&request, 0, sizeof(request));
696
697
698 r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
699 (struct radv_amdgpu_winsys_bo*)bo, &bo_list);
700 if (r) {
701 fprintf(stderr, "amdgpu: Failed to created the BO list for submission\n");
702 return r;
703 }
704
705 ib.size = size;
706 ib.ib_mc_address = ws->buffer_get_va(bo);
707
708 request.ip_type = cs0->hw_ip;
709 request.ring = queue_idx;
710 request.resources = bo_list;
711 request.number_of_ibs = 1;
712 request.ibs = &ib;
713
714 r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
715 if (r) {
716 if (r == -ENOMEM)
717 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
718 else
719 fprintf(stderr, "amdgpu: The CS has been rejected, "
720 "see dmesg for more information.\n");
721 }
722
723 amdgpu_bo_list_destroy(bo_list);
724
725 ws->buffer_destroy(bo);
726 if (r)
727 return r;
728
729 i += cnt;
730 }
731 if (fence)
732 radv_amdgpu_request_to_fence(ctx, fence, &request);
733
734 radv_assign_last_submit(ctx, &request);
735
736 return 0;
737 }
738
radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx * _ctx,int queue_idx,struct radeon_winsys_cs ** cs_array,unsigned cs_count,struct radeon_winsys_sem ** wait_sem,unsigned wait_sem_count,struct radeon_winsys_sem ** signal_sem,unsigned signal_sem_count,bool can_patch,struct radeon_winsys_fence * _fence)739 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
740 int queue_idx,
741 struct radeon_winsys_cs **cs_array,
742 unsigned cs_count,
743 struct radeon_winsys_sem **wait_sem,
744 unsigned wait_sem_count,
745 struct radeon_winsys_sem **signal_sem,
746 unsigned signal_sem_count,
747 bool can_patch,
748 struct radeon_winsys_fence *_fence)
749 {
750 struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
751 struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
752 int ret;
753 int i;
754
755 for (i = 0; i < wait_sem_count; i++) {
756 amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)wait_sem[i];
757 amdgpu_cs_wait_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx,
758 sem);
759 }
760 if (!cs->ws->use_ib_bos) {
761 ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array,
762 cs_count, _fence);
763 } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
764 ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array,
765 cs_count, _fence);
766 } else {
767 ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, cs_array,
768 cs_count, _fence);
769 }
770
771 for (i = 0; i < signal_sem_count; i++) {
772 amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)signal_sem[i];
773 amdgpu_cs_signal_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx,
774 sem);
775 }
776 return ret;
777 }
778
779
radv_amdgpu_winsys_get_cpu_addr(void * _cs,uint64_t addr)780 static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
781 {
782 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
783 void *ret = NULL;
784 for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
785 struct radv_amdgpu_winsys_bo *bo;
786
787 bo = (struct radv_amdgpu_winsys_bo*)
788 (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
789 if (addr >= bo->va && addr - bo->va < bo->size) {
790 if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
791 return (char *)ret + (addr - bo->va);
792 }
793 }
794 return ret;
795 }
796
radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs * _cs,FILE * file,uint32_t trace_id)797 static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
798 FILE* file,
799 uint32_t trace_id)
800 {
801 struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
802
803 ac_parse_ib(file,
804 radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
805 cs->ib.size, trace_id, "main IB", cs->ws->info.chip_class,
806 radv_amdgpu_winsys_get_cpu_addr, cs);
807 }
808
radv_amdgpu_ctx_create(struct radeon_winsys * _ws)809 static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
810 {
811 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
812 struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx);
813 int r;
814
815 if (!ctx)
816 return NULL;
817 r = amdgpu_cs_ctx_create(ws->dev, &ctx->ctx);
818 if (r) {
819 fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create failed. (%i)\n", r);
820 goto error_create;
821 }
822 ctx->ws = ws;
823 return (struct radeon_winsys_ctx *)ctx;
824 error_create:
825 FREE(ctx);
826 return NULL;
827 }
828
radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx * rwctx)829 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
830 {
831 struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
832 amdgpu_cs_ctx_free(ctx->ctx);
833 FREE(ctx);
834 }
835
radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx * rwctx,enum ring_type ring_type,int ring_index)836 static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
837 enum ring_type ring_type, int ring_index)
838 {
839 struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
840 int ip_type = ring_to_hw_ip(ring_type);
841
842 if (ctx->last_submission[ip_type][ring_index].fence) {
843 uint32_t expired;
844 int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index],
845 1000000000ull, 0, &expired);
846
847 if (ret || !expired)
848 return false;
849 }
850
851 return true;
852 }
853
radv_amdgpu_create_sem(struct radeon_winsys * _ws)854 static struct radeon_winsys_sem *radv_amdgpu_create_sem(struct radeon_winsys *_ws)
855 {
856 int ret;
857 amdgpu_semaphore_handle sem;
858
859 ret = amdgpu_cs_create_semaphore(&sem);
860 if (ret)
861 return NULL;
862 return (struct radeon_winsys_sem *)sem;
863 }
864
radv_amdgpu_destroy_sem(struct radeon_winsys_sem * _sem)865 static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem *_sem)
866 {
867 amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)_sem;
868 amdgpu_cs_destroy_semaphore(sem);
869 }
870
radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys * ws)871 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
872 {
873 ws->base.ctx_create = radv_amdgpu_ctx_create;
874 ws->base.ctx_destroy = radv_amdgpu_ctx_destroy;
875 ws->base.ctx_wait_idle = radv_amdgpu_ctx_wait_idle;
876 ws->base.cs_create = radv_amdgpu_cs_create;
877 ws->base.cs_destroy = radv_amdgpu_cs_destroy;
878 ws->base.cs_grow = radv_amdgpu_cs_grow;
879 ws->base.cs_finalize = radv_amdgpu_cs_finalize;
880 ws->base.cs_reset = radv_amdgpu_cs_reset;
881 ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
882 ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
883 ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
884 ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
885 ws->base.create_fence = radv_amdgpu_create_fence;
886 ws->base.destroy_fence = radv_amdgpu_destroy_fence;
887 ws->base.create_sem = radv_amdgpu_create_sem;
888 ws->base.destroy_sem = radv_amdgpu_destroy_sem;
889 ws->base.fence_wait = radv_amdgpu_fence_wait;
890 }
891