1 /*
2 * Copyright 2008 Jerome Glisse.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Jerome Glisse <glisse@freedesktop.org>
26 */
27 #include <linux/list_sort.h>
28 #include <drm/drmP.h>
29 #include <drm/radeon_drm.h>
30 #include "radeon_reg.h"
31 #include "radeon.h"
32 #include "radeon_trace.h"
33
34 #define RADEON_CS_MAX_PRIORITY 32u
35 #define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1)
36
37 /* This is based on the bucket sort with O(n) time complexity.
38 * An item with priority "i" is added to bucket[i]. The lists are then
39 * concatenated in descending order.
40 */
41 struct radeon_cs_buckets {
42 struct list_head bucket[RADEON_CS_NUM_BUCKETS];
43 };
44
radeon_cs_buckets_init(struct radeon_cs_buckets * b)45 static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
46 {
47 unsigned i;
48
49 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
50 INIT_LIST_HEAD(&b->bucket[i]);
51 }
52
radeon_cs_buckets_add(struct radeon_cs_buckets * b,struct list_head * item,unsigned priority)53 static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
54 struct list_head *item, unsigned priority)
55 {
56 /* Since buffers which appear sooner in the relocation list are
57 * likely to be used more often than buffers which appear later
58 * in the list, the sort mustn't change the ordering of buffers
59 * with the same priority, i.e. it must be stable.
60 */
61 list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
62 }
63
radeon_cs_buckets_get_list(struct radeon_cs_buckets * b,struct list_head * out_list)64 static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
65 struct list_head *out_list)
66 {
67 unsigned i;
68
69 /* Connect the sorted buckets in the output list. */
70 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
71 list_splice(&b->bucket[i], out_list);
72 }
73 }
74
radeon_cs_parser_relocs(struct radeon_cs_parser * p)75 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
76 {
77 struct radeon_cs_chunk *chunk;
78 struct radeon_cs_buckets buckets;
79 unsigned i;
80 bool need_mmap_lock = false;
81 int r;
82
83 if (p->chunk_relocs == NULL) {
84 return 0;
85 }
86 chunk = p->chunk_relocs;
87 p->dma_reloc_idx = 0;
88 /* FIXME: we assume that each relocs use 4 dwords */
89 p->nrelocs = chunk->length_dw / 4;
90 p->relocs = drm_calloc_large(p->nrelocs, sizeof(struct radeon_bo_list));
91 if (p->relocs == NULL) {
92 return -ENOMEM;
93 }
94
95 radeon_cs_buckets_init(&buckets);
96
97 for (i = 0; i < p->nrelocs; i++) {
98 struct drm_radeon_cs_reloc *r;
99 struct drm_gem_object *gobj;
100 unsigned priority;
101
102 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
103 gobj = drm_gem_object_lookup(p->filp, r->handle);
104 if (gobj == NULL) {
105 DRM_ERROR("gem object lookup failed 0x%x\n",
106 r->handle);
107 return -ENOENT;
108 }
109 p->relocs[i].robj = gem_to_radeon_bo(gobj);
110
111 /* The userspace buffer priorities are from 0 to 15. A higher
112 * number means the buffer is more important.
113 * Also, the buffers used for write have a higher priority than
114 * the buffers used for read only, which doubles the range
115 * to 0 to 31. 32 is reserved for the kernel driver.
116 */
117 priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2
118 + !!r->write_domain;
119
120 /* the first reloc of an UVD job is the msg and that must be in
121 VRAM, also but everything into VRAM on AGP cards and older
122 IGP chips to avoid image corruptions */
123 if (p->ring == R600_RING_TYPE_UVD_INDEX &&
124 (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
125 p->rdev->family == CHIP_RS780 ||
126 p->rdev->family == CHIP_RS880)) {
127
128 /* TODO: is this still needed for NI+ ? */
129 p->relocs[i].prefered_domains =
130 RADEON_GEM_DOMAIN_VRAM;
131
132 p->relocs[i].allowed_domains =
133 RADEON_GEM_DOMAIN_VRAM;
134
135 /* prioritize this over any other relocation */
136 priority = RADEON_CS_MAX_PRIORITY;
137 } else {
138 uint32_t domain = r->write_domain ?
139 r->write_domain : r->read_domains;
140
141 if (domain & RADEON_GEM_DOMAIN_CPU) {
142 DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid "
143 "for command submission\n");
144 return -EINVAL;
145 }
146
147 p->relocs[i].prefered_domains = domain;
148 if (domain == RADEON_GEM_DOMAIN_VRAM)
149 domain |= RADEON_GEM_DOMAIN_GTT;
150 p->relocs[i].allowed_domains = domain;
151 }
152
153 if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
154 uint32_t domain = p->relocs[i].prefered_domains;
155 if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
156 DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
157 "allowed for userptr BOs\n");
158 return -EINVAL;
159 }
160 need_mmap_lock = true;
161 domain = RADEON_GEM_DOMAIN_GTT;
162 p->relocs[i].prefered_domains = domain;
163 p->relocs[i].allowed_domains = domain;
164 }
165
166 p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
167 p->relocs[i].tv.shared = !r->write_domain;
168
169 radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
170 priority);
171 }
172
173 radeon_cs_buckets_get_list(&buckets, &p->validated);
174
175 if (p->cs_flags & RADEON_CS_USE_VM)
176 p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
177 &p->validated);
178 if (need_mmap_lock)
179 down_read(¤t->mm->mmap_sem);
180
181 r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
182
183 if (need_mmap_lock)
184 up_read(¤t->mm->mmap_sem);
185
186 return r;
187 }
188
radeon_cs_get_ring(struct radeon_cs_parser * p,u32 ring,s32 priority)189 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
190 {
191 p->priority = priority;
192
193 switch (ring) {
194 default:
195 DRM_ERROR("unknown ring id: %d\n", ring);
196 return -EINVAL;
197 case RADEON_CS_RING_GFX:
198 p->ring = RADEON_RING_TYPE_GFX_INDEX;
199 break;
200 case RADEON_CS_RING_COMPUTE:
201 if (p->rdev->family >= CHIP_TAHITI) {
202 if (p->priority > 0)
203 p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
204 else
205 p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
206 } else
207 p->ring = RADEON_RING_TYPE_GFX_INDEX;
208 break;
209 case RADEON_CS_RING_DMA:
210 if (p->rdev->family >= CHIP_CAYMAN) {
211 if (p->priority > 0)
212 p->ring = R600_RING_TYPE_DMA_INDEX;
213 else
214 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
215 } else if (p->rdev->family >= CHIP_RV770) {
216 p->ring = R600_RING_TYPE_DMA_INDEX;
217 } else {
218 return -EINVAL;
219 }
220 break;
221 case RADEON_CS_RING_UVD:
222 p->ring = R600_RING_TYPE_UVD_INDEX;
223 break;
224 case RADEON_CS_RING_VCE:
225 /* TODO: only use the low priority ring for now */
226 p->ring = TN_RING_TYPE_VCE1_INDEX;
227 break;
228 }
229 return 0;
230 }
231
radeon_cs_sync_rings(struct radeon_cs_parser * p)232 static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
233 {
234 struct radeon_bo_list *reloc;
235 int r;
236
237 list_for_each_entry(reloc, &p->validated, tv.head) {
238 struct reservation_object *resv;
239
240 resv = reloc->robj->tbo.resv;
241 r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
242 reloc->tv.shared);
243 if (r)
244 return r;
245 }
246 return 0;
247 }
248
249 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
radeon_cs_parser_init(struct radeon_cs_parser * p,void * data)250 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
251 {
252 struct drm_radeon_cs *cs = data;
253 uint64_t *chunk_array_ptr;
254 unsigned size, i;
255 u32 ring = RADEON_CS_RING_GFX;
256 s32 priority = 0;
257
258 INIT_LIST_HEAD(&p->validated);
259
260 if (!cs->num_chunks) {
261 return 0;
262 }
263
264 /* get chunks */
265 p->idx = 0;
266 p->ib.sa_bo = NULL;
267 p->const_ib.sa_bo = NULL;
268 p->chunk_ib = NULL;
269 p->chunk_relocs = NULL;
270 p->chunk_flags = NULL;
271 p->chunk_const_ib = NULL;
272 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
273 if (p->chunks_array == NULL) {
274 return -ENOMEM;
275 }
276 chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
277 if (copy_from_user(p->chunks_array, chunk_array_ptr,
278 sizeof(uint64_t)*cs->num_chunks)) {
279 return -EFAULT;
280 }
281 p->cs_flags = 0;
282 p->nchunks = cs->num_chunks;
283 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
284 if (p->chunks == NULL) {
285 return -ENOMEM;
286 }
287 for (i = 0; i < p->nchunks; i++) {
288 struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
289 struct drm_radeon_cs_chunk user_chunk;
290 uint32_t __user *cdata;
291
292 chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
293 if (copy_from_user(&user_chunk, chunk_ptr,
294 sizeof(struct drm_radeon_cs_chunk))) {
295 return -EFAULT;
296 }
297 p->chunks[i].length_dw = user_chunk.length_dw;
298 if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) {
299 p->chunk_relocs = &p->chunks[i];
300 }
301 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
302 p->chunk_ib = &p->chunks[i];
303 /* zero length IB isn't useful */
304 if (p->chunks[i].length_dw == 0)
305 return -EINVAL;
306 }
307 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) {
308 p->chunk_const_ib = &p->chunks[i];
309 /* zero length CONST IB isn't useful */
310 if (p->chunks[i].length_dw == 0)
311 return -EINVAL;
312 }
313 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
314 p->chunk_flags = &p->chunks[i];
315 /* zero length flags aren't useful */
316 if (p->chunks[i].length_dw == 0)
317 return -EINVAL;
318 }
319
320 size = p->chunks[i].length_dw;
321 cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
322 p->chunks[i].user_ptr = cdata;
323 if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB)
324 continue;
325
326 if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) {
327 if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP))
328 continue;
329 }
330
331 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
332 size *= sizeof(uint32_t);
333 if (p->chunks[i].kdata == NULL) {
334 return -ENOMEM;
335 }
336 if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
337 return -EFAULT;
338 }
339 if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) {
340 p->cs_flags = p->chunks[i].kdata[0];
341 if (p->chunks[i].length_dw > 1)
342 ring = p->chunks[i].kdata[1];
343 if (p->chunks[i].length_dw > 2)
344 priority = (s32)p->chunks[i].kdata[2];
345 }
346 }
347
348 /* these are KMS only */
349 if (p->rdev) {
350 if ((p->cs_flags & RADEON_CS_USE_VM) &&
351 !p->rdev->vm_manager.enabled) {
352 DRM_ERROR("VM not active on asic!\n");
353 return -EINVAL;
354 }
355
356 if (radeon_cs_get_ring(p, ring, priority))
357 return -EINVAL;
358
359 /* we only support VM on some SI+ rings */
360 if ((p->cs_flags & RADEON_CS_USE_VM) == 0) {
361 if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) {
362 DRM_ERROR("Ring %d requires VM!\n", p->ring);
363 return -EINVAL;
364 }
365 } else {
366 if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) {
367 DRM_ERROR("VM not supported on ring %d!\n",
368 p->ring);
369 return -EINVAL;
370 }
371 }
372 }
373
374 return 0;
375 }
376
cmp_size_smaller_first(void * priv,struct list_head * a,struct list_head * b)377 static int cmp_size_smaller_first(void *priv, struct list_head *a,
378 struct list_head *b)
379 {
380 struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head);
381 struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head);
382
383 /* Sort A before B if A is smaller. */
384 return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
385 }
386
387 /**
388 * cs_parser_fini() - clean parser states
389 * @parser: parser structure holding parsing context.
390 * @error: error number
391 *
392 * If error is set than unvalidate buffer, otherwise just free memory
393 * used by parsing context.
394 **/
radeon_cs_parser_fini(struct radeon_cs_parser * parser,int error,bool backoff)395 static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff)
396 {
397 unsigned i;
398
399 if (!error) {
400 /* Sort the buffer list from the smallest to largest buffer,
401 * which affects the order of buffers in the LRU list.
402 * This assures that the smallest buffers are added first
403 * to the LRU list, so they are likely to be later evicted
404 * first, instead of large buffers whose eviction is more
405 * expensive.
406 *
407 * This slightly lowers the number of bytes moved by TTM
408 * per frame under memory pressure.
409 */
410 list_sort(NULL, &parser->validated, cmp_size_smaller_first);
411
412 ttm_eu_fence_buffer_objects(&parser->ticket,
413 &parser->validated,
414 &parser->ib.fence->base);
415 } else if (backoff) {
416 ttm_eu_backoff_reservation(&parser->ticket,
417 &parser->validated);
418 }
419
420 if (parser->relocs != NULL) {
421 for (i = 0; i < parser->nrelocs; i++) {
422 struct radeon_bo *bo = parser->relocs[i].robj;
423 if (bo == NULL)
424 continue;
425
426 drm_gem_object_unreference_unlocked(&bo->gem_base);
427 }
428 }
429 kfree(parser->track);
430 drm_free_large(parser->relocs);
431 drm_free_large(parser->vm_bos);
432 for (i = 0; i < parser->nchunks; i++)
433 drm_free_large(parser->chunks[i].kdata);
434 kfree(parser->chunks);
435 kfree(parser->chunks_array);
436 radeon_ib_free(parser->rdev, &parser->ib);
437 radeon_ib_free(parser->rdev, &parser->const_ib);
438 }
439
radeon_cs_ib_chunk(struct radeon_device * rdev,struct radeon_cs_parser * parser)440 static int radeon_cs_ib_chunk(struct radeon_device *rdev,
441 struct radeon_cs_parser *parser)
442 {
443 int r;
444
445 if (parser->chunk_ib == NULL)
446 return 0;
447
448 if (parser->cs_flags & RADEON_CS_USE_VM)
449 return 0;
450
451 r = radeon_cs_parse(rdev, parser->ring, parser);
452 if (r || parser->parser_error) {
453 DRM_ERROR("Invalid command stream !\n");
454 return r;
455 }
456
457 r = radeon_cs_sync_rings(parser);
458 if (r) {
459 if (r != -ERESTARTSYS)
460 DRM_ERROR("Failed to sync rings: %i\n", r);
461 return r;
462 }
463
464 if (parser->ring == R600_RING_TYPE_UVD_INDEX)
465 radeon_uvd_note_usage(rdev);
466 else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
467 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
468 radeon_vce_note_usage(rdev);
469
470 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
471 if (r) {
472 DRM_ERROR("Failed to schedule IB !\n");
473 }
474 return r;
475 }
476
radeon_bo_vm_update_pte(struct radeon_cs_parser * p,struct radeon_vm * vm)477 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
478 struct radeon_vm *vm)
479 {
480 struct radeon_device *rdev = p->rdev;
481 struct radeon_bo_va *bo_va;
482 int i, r;
483
484 r = radeon_vm_update_page_directory(rdev, vm);
485 if (r)
486 return r;
487
488 r = radeon_vm_clear_freed(rdev, vm);
489 if (r)
490 return r;
491
492 if (vm->ib_bo_va == NULL) {
493 DRM_ERROR("Tmp BO not in VM!\n");
494 return -EINVAL;
495 }
496
497 r = radeon_vm_bo_update(rdev, vm->ib_bo_va,
498 &rdev->ring_tmp_bo.bo->tbo.mem);
499 if (r)
500 return r;
501
502 for (i = 0; i < p->nrelocs; i++) {
503 struct radeon_bo *bo;
504
505 bo = p->relocs[i].robj;
506 bo_va = radeon_vm_bo_find(vm, bo);
507 if (bo_va == NULL) {
508 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
509 return -EINVAL;
510 }
511
512 r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem);
513 if (r)
514 return r;
515
516 radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
517 }
518
519 return radeon_vm_clear_invalids(rdev, vm);
520 }
521
radeon_cs_ib_vm_chunk(struct radeon_device * rdev,struct radeon_cs_parser * parser)522 static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
523 struct radeon_cs_parser *parser)
524 {
525 struct radeon_fpriv *fpriv = parser->filp->driver_priv;
526 struct radeon_vm *vm = &fpriv->vm;
527 int r;
528
529 if (parser->chunk_ib == NULL)
530 return 0;
531 if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
532 return 0;
533
534 if (parser->const_ib.length_dw) {
535 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
536 if (r) {
537 return r;
538 }
539 }
540
541 r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
542 if (r) {
543 return r;
544 }
545
546 if (parser->ring == R600_RING_TYPE_UVD_INDEX)
547 radeon_uvd_note_usage(rdev);
548
549 mutex_lock(&vm->mutex);
550 r = radeon_bo_vm_update_pte(parser, vm);
551 if (r) {
552 goto out;
553 }
554
555 r = radeon_cs_sync_rings(parser);
556 if (r) {
557 if (r != -ERESTARTSYS)
558 DRM_ERROR("Failed to sync rings: %i\n", r);
559 goto out;
560 }
561
562 if ((rdev->family >= CHIP_TAHITI) &&
563 (parser->chunk_const_ib != NULL)) {
564 r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
565 } else {
566 r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
567 }
568
569 out:
570 mutex_unlock(&vm->mutex);
571 return r;
572 }
573
radeon_cs_handle_lockup(struct radeon_device * rdev,int r)574 static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
575 {
576 if (r == -EDEADLK) {
577 r = radeon_gpu_reset(rdev);
578 if (!r)
579 r = -EAGAIN;
580 }
581 return r;
582 }
583
radeon_cs_ib_fill(struct radeon_device * rdev,struct radeon_cs_parser * parser)584 static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser)
585 {
586 struct radeon_cs_chunk *ib_chunk;
587 struct radeon_vm *vm = NULL;
588 int r;
589
590 if (parser->chunk_ib == NULL)
591 return 0;
592
593 if (parser->cs_flags & RADEON_CS_USE_VM) {
594 struct radeon_fpriv *fpriv = parser->filp->driver_priv;
595 vm = &fpriv->vm;
596
597 if ((rdev->family >= CHIP_TAHITI) &&
598 (parser->chunk_const_ib != NULL)) {
599 ib_chunk = parser->chunk_const_ib;
600 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
601 DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
602 return -EINVAL;
603 }
604 r = radeon_ib_get(rdev, parser->ring, &parser->const_ib,
605 vm, ib_chunk->length_dw * 4);
606 if (r) {
607 DRM_ERROR("Failed to get const ib !\n");
608 return r;
609 }
610 parser->const_ib.is_const_ib = true;
611 parser->const_ib.length_dw = ib_chunk->length_dw;
612 if (copy_from_user(parser->const_ib.ptr,
613 ib_chunk->user_ptr,
614 ib_chunk->length_dw * 4))
615 return -EFAULT;
616 }
617
618 ib_chunk = parser->chunk_ib;
619 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
620 DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
621 return -EINVAL;
622 }
623 }
624 ib_chunk = parser->chunk_ib;
625
626 r = radeon_ib_get(rdev, parser->ring, &parser->ib,
627 vm, ib_chunk->length_dw * 4);
628 if (r) {
629 DRM_ERROR("Failed to get ib !\n");
630 return r;
631 }
632 parser->ib.length_dw = ib_chunk->length_dw;
633 if (ib_chunk->kdata)
634 memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4);
635 else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4))
636 return -EFAULT;
637 return 0;
638 }
639
radeon_cs_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)640 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
641 {
642 struct radeon_device *rdev = dev->dev_private;
643 struct radeon_cs_parser parser;
644 int r;
645
646 down_read(&rdev->exclusive_lock);
647 if (!rdev->accel_working) {
648 up_read(&rdev->exclusive_lock);
649 return -EBUSY;
650 }
651 if (rdev->in_reset) {
652 up_read(&rdev->exclusive_lock);
653 r = radeon_gpu_reset(rdev);
654 if (!r)
655 r = -EAGAIN;
656 return r;
657 }
658 /* initialize parser */
659 memset(&parser, 0, sizeof(struct radeon_cs_parser));
660 parser.filp = filp;
661 parser.rdev = rdev;
662 parser.dev = rdev->dev;
663 parser.family = rdev->family;
664 r = radeon_cs_parser_init(&parser, data);
665 if (r) {
666 DRM_ERROR("Failed to initialize parser !\n");
667 radeon_cs_parser_fini(&parser, r, false);
668 up_read(&rdev->exclusive_lock);
669 r = radeon_cs_handle_lockup(rdev, r);
670 return r;
671 }
672
673 r = radeon_cs_ib_fill(rdev, &parser);
674 if (!r) {
675 r = radeon_cs_parser_relocs(&parser);
676 if (r && r != -ERESTARTSYS)
677 DRM_ERROR("Failed to parse relocation %d!\n", r);
678 }
679
680 if (r) {
681 radeon_cs_parser_fini(&parser, r, false);
682 up_read(&rdev->exclusive_lock);
683 r = radeon_cs_handle_lockup(rdev, r);
684 return r;
685 }
686
687 trace_radeon_cs(&parser);
688
689 r = radeon_cs_ib_chunk(rdev, &parser);
690 if (r) {
691 goto out;
692 }
693 r = radeon_cs_ib_vm_chunk(rdev, &parser);
694 if (r) {
695 goto out;
696 }
697 out:
698 radeon_cs_parser_fini(&parser, r, true);
699 up_read(&rdev->exclusive_lock);
700 r = radeon_cs_handle_lockup(rdev, r);
701 return r;
702 }
703
704 /**
705 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
706 * @parser: parser structure holding parsing context.
707 * @pkt: where to store packet information
708 *
709 * Assume that chunk_ib_index is properly set. Will return -EINVAL
710 * if packet is bigger than remaining ib size. or if packets is unknown.
711 **/
radeon_cs_packet_parse(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt,unsigned idx)712 int radeon_cs_packet_parse(struct radeon_cs_parser *p,
713 struct radeon_cs_packet *pkt,
714 unsigned idx)
715 {
716 struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
717 struct radeon_device *rdev = p->rdev;
718 uint32_t header;
719 int ret = 0, i;
720
721 if (idx >= ib_chunk->length_dw) {
722 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
723 idx, ib_chunk->length_dw);
724 return -EINVAL;
725 }
726 header = radeon_get_ib_value(p, idx);
727 pkt->idx = idx;
728 pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
729 pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
730 pkt->one_reg_wr = 0;
731 switch (pkt->type) {
732 case RADEON_PACKET_TYPE0:
733 if (rdev->family < CHIP_R600) {
734 pkt->reg = R100_CP_PACKET0_GET_REG(header);
735 pkt->one_reg_wr =
736 RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
737 } else
738 pkt->reg = R600_CP_PACKET0_GET_REG(header);
739 break;
740 case RADEON_PACKET_TYPE3:
741 pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
742 break;
743 case RADEON_PACKET_TYPE2:
744 pkt->count = -1;
745 break;
746 default:
747 DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
748 ret = -EINVAL;
749 goto dump_ib;
750 }
751 if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
752 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
753 pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
754 ret = -EINVAL;
755 goto dump_ib;
756 }
757 return 0;
758
759 dump_ib:
760 for (i = 0; i < ib_chunk->length_dw; i++) {
761 if (i == idx)
762 printk("\t0x%08x <---\n", radeon_get_ib_value(p, i));
763 else
764 printk("\t0x%08x\n", radeon_get_ib_value(p, i));
765 }
766 return ret;
767 }
768
769 /**
770 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
771 * @p: structure holding the parser context.
772 *
773 * Check if the next packet is NOP relocation packet3.
774 **/
radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser * p)775 bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
776 {
777 struct radeon_cs_packet p3reloc;
778 int r;
779
780 r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
781 if (r)
782 return false;
783 if (p3reloc.type != RADEON_PACKET_TYPE3)
784 return false;
785 if (p3reloc.opcode != RADEON_PACKET3_NOP)
786 return false;
787 return true;
788 }
789
790 /**
791 * radeon_cs_dump_packet() - dump raw packet context
792 * @p: structure holding the parser context.
793 * @pkt: structure holding the packet.
794 *
795 * Used mostly for debugging and error reporting.
796 **/
radeon_cs_dump_packet(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt)797 void radeon_cs_dump_packet(struct radeon_cs_parser *p,
798 struct radeon_cs_packet *pkt)
799 {
800 volatile uint32_t *ib;
801 unsigned i;
802 unsigned idx;
803
804 ib = p->ib.ptr;
805 idx = pkt->idx;
806 for (i = 0; i <= (pkt->count + 1); i++, idx++)
807 DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
808 }
809
810 /**
811 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
812 * @parser: parser structure holding parsing context.
813 * @data: pointer to relocation data
814 * @offset_start: starting offset
815 * @offset_mask: offset mask (to align start offset on)
816 * @reloc: reloc informations
817 *
818 * Check if next packet is relocation packet3, do bo validation and compute
819 * GPU offset using the provided start.
820 **/
radeon_cs_packet_next_reloc(struct radeon_cs_parser * p,struct radeon_bo_list ** cs_reloc,int nomm)821 int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
822 struct radeon_bo_list **cs_reloc,
823 int nomm)
824 {
825 struct radeon_cs_chunk *relocs_chunk;
826 struct radeon_cs_packet p3reloc;
827 unsigned idx;
828 int r;
829
830 if (p->chunk_relocs == NULL) {
831 DRM_ERROR("No relocation chunk !\n");
832 return -EINVAL;
833 }
834 *cs_reloc = NULL;
835 relocs_chunk = p->chunk_relocs;
836 r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
837 if (r)
838 return r;
839 p->idx += p3reloc.count + 2;
840 if (p3reloc.type != RADEON_PACKET_TYPE3 ||
841 p3reloc.opcode != RADEON_PACKET3_NOP) {
842 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
843 p3reloc.idx);
844 radeon_cs_dump_packet(p, &p3reloc);
845 return -EINVAL;
846 }
847 idx = radeon_get_ib_value(p, p3reloc.idx + 1);
848 if (idx >= relocs_chunk->length_dw) {
849 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
850 idx, relocs_chunk->length_dw);
851 radeon_cs_dump_packet(p, &p3reloc);
852 return -EINVAL;
853 }
854 /* FIXME: we assume reloc size is 4 dwords */
855 if (nomm) {
856 *cs_reloc = p->relocs;
857 (*cs_reloc)->gpu_offset =
858 (u64)relocs_chunk->kdata[idx + 3] << 32;
859 (*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
860 } else
861 *cs_reloc = &p->relocs[(idx / 4)];
862 return 0;
863 }
864