1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Tegra host1x Job
4 *
5 * Copyright (c) 2010-2015, NVIDIA Corporation.
6 */
7
8 #include <linux/dma-mapping.h>
9 #include <linux/err.h>
10 #include <linux/host1x.h>
11 #include <linux/iommu.h>
12 #include <linux/kref.h>
13 #include <linux/module.h>
14 #include <linux/scatterlist.h>
15 #include <linux/slab.h>
16 #include <linux/vmalloc.h>
17 #include <trace/events/host1x.h>
18
19 #include "channel.h"
20 #include "dev.h"
21 #include "job.h"
22 #include "syncpt.h"
23
24 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
25
host1x_job_alloc(struct host1x_channel * ch,u32 num_cmdbufs,u32 num_relocs,bool skip_firewall)26 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
27 u32 num_cmdbufs, u32 num_relocs,
28 bool skip_firewall)
29 {
30 struct host1x_job *job = NULL;
31 unsigned int num_unpins = num_relocs;
32 bool enable_firewall;
33 u64 total;
34 void *mem;
35
36 enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall;
37
38 if (!enable_firewall)
39 num_unpins += num_cmdbufs;
40
41 /* Check that we're not going to overflow */
42 total = sizeof(struct host1x_job) +
43 (u64)num_relocs * sizeof(struct host1x_reloc) +
44 (u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
45 (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) +
46 (u64)num_unpins * sizeof(dma_addr_t) +
47 (u64)num_unpins * sizeof(u32 *);
48 if (total > ULONG_MAX)
49 return NULL;
50
51 mem = job = kzalloc(total, GFP_KERNEL);
52 if (!job)
53 return NULL;
54
55 job->enable_firewall = enable_firewall;
56
57 kref_init(&job->ref);
58 job->channel = ch;
59
60 /* Redistribute memory to the structs */
61 mem += sizeof(struct host1x_job);
62 job->relocs = num_relocs ? mem : NULL;
63 mem += num_relocs * sizeof(struct host1x_reloc);
64 job->unpins = num_unpins ? mem : NULL;
65 mem += num_unpins * sizeof(struct host1x_job_unpin_data);
66 job->cmds = num_cmdbufs ? mem : NULL;
67 mem += num_cmdbufs * sizeof(struct host1x_job_cmd);
68 job->addr_phys = num_unpins ? mem : NULL;
69
70 job->reloc_addr_phys = job->addr_phys;
71 job->gather_addr_phys = &job->addr_phys[num_relocs];
72
73 return job;
74 }
75 EXPORT_SYMBOL(host1x_job_alloc);
76
host1x_job_get(struct host1x_job * job)77 struct host1x_job *host1x_job_get(struct host1x_job *job)
78 {
79 kref_get(&job->ref);
80 return job;
81 }
82 EXPORT_SYMBOL(host1x_job_get);
83
job_free(struct kref * ref)84 static void job_free(struct kref *ref)
85 {
86 struct host1x_job *job = container_of(ref, struct host1x_job, ref);
87
88 if (job->release)
89 job->release(job);
90
91 if (job->waiter)
92 host1x_intr_put_ref(job->syncpt->host, job->syncpt->id,
93 job->waiter, false);
94
95 if (job->syncpt)
96 host1x_syncpt_put(job->syncpt);
97
98 kfree(job);
99 }
100
host1x_job_put(struct host1x_job * job)101 void host1x_job_put(struct host1x_job *job)
102 {
103 kref_put(&job->ref, job_free);
104 }
105 EXPORT_SYMBOL(host1x_job_put);
106
host1x_job_add_gather(struct host1x_job * job,struct host1x_bo * bo,unsigned int words,unsigned int offset)107 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
108 unsigned int words, unsigned int offset)
109 {
110 struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather;
111
112 gather->words = words;
113 gather->bo = bo;
114 gather->offset = offset;
115
116 job->num_cmds++;
117 }
118 EXPORT_SYMBOL(host1x_job_add_gather);
119
host1x_job_add_wait(struct host1x_job * job,u32 id,u32 thresh,bool relative,u32 next_class)120 void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
121 bool relative, u32 next_class)
122 {
123 struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
124
125 cmd->is_wait = true;
126 cmd->wait.id = id;
127 cmd->wait.threshold = thresh;
128 cmd->wait.next_class = next_class;
129 cmd->wait.relative = relative;
130
131 job->num_cmds++;
132 }
133 EXPORT_SYMBOL(host1x_job_add_wait);
134
pin_job(struct host1x * host,struct host1x_job * job)135 static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
136 {
137 struct host1x_client *client = job->client;
138 struct device *dev = client->dev;
139 struct host1x_job_gather *g;
140 struct iommu_domain *domain;
141 struct sg_table *sgt;
142 unsigned int i;
143 int err;
144
145 domain = iommu_get_domain_for_dev(dev);
146 job->num_unpins = 0;
147
148 for (i = 0; i < job->num_relocs; i++) {
149 struct host1x_reloc *reloc = &job->relocs[i];
150 dma_addr_t phys_addr, *phys;
151
152 reloc->target.bo = host1x_bo_get(reloc->target.bo);
153 if (!reloc->target.bo) {
154 err = -EINVAL;
155 goto unpin;
156 }
157
158 /*
159 * If the client device is not attached to an IOMMU, the
160 * physical address of the buffer object can be used.
161 *
162 * Similarly, when an IOMMU domain is shared between all
163 * host1x clients, the IOVA is already available, so no
164 * need to map the buffer object again.
165 *
166 * XXX Note that this isn't always safe to do because it
167 * relies on an assumption that no cache maintenance is
168 * needed on the buffer objects.
169 */
170 if (!domain || client->group)
171 phys = &phys_addr;
172 else
173 phys = NULL;
174
175 sgt = host1x_bo_pin(dev, reloc->target.bo, phys);
176 if (IS_ERR(sgt)) {
177 err = PTR_ERR(sgt);
178 goto unpin;
179 }
180
181 if (sgt) {
182 unsigned long mask = HOST1X_RELOC_READ |
183 HOST1X_RELOC_WRITE;
184 enum dma_data_direction dir;
185
186 switch (reloc->flags & mask) {
187 case HOST1X_RELOC_READ:
188 dir = DMA_TO_DEVICE;
189 break;
190
191 case HOST1X_RELOC_WRITE:
192 dir = DMA_FROM_DEVICE;
193 break;
194
195 case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
196 dir = DMA_BIDIRECTIONAL;
197 break;
198
199 default:
200 err = -EINVAL;
201 goto unpin;
202 }
203
204 err = dma_map_sgtable(dev, sgt, dir, 0);
205 if (err)
206 goto unpin;
207
208 job->unpins[job->num_unpins].dev = dev;
209 job->unpins[job->num_unpins].dir = dir;
210 phys_addr = sg_dma_address(sgt->sgl);
211 }
212
213 job->addr_phys[job->num_unpins] = phys_addr;
214 job->unpins[job->num_unpins].bo = reloc->target.bo;
215 job->unpins[job->num_unpins].sgt = sgt;
216 job->num_unpins++;
217 }
218
219 /*
220 * We will copy gathers BO content later, so there is no need to
221 * hold and pin them.
222 */
223 if (job->enable_firewall)
224 return 0;
225
226 for (i = 0; i < job->num_cmds; i++) {
227 size_t gather_size = 0;
228 struct scatterlist *sg;
229 dma_addr_t phys_addr;
230 unsigned long shift;
231 struct iova *alloc;
232 dma_addr_t *phys;
233 unsigned int j;
234
235 if (job->cmds[i].is_wait)
236 continue;
237
238 g = &job->cmds[i].gather;
239
240 g->bo = host1x_bo_get(g->bo);
241 if (!g->bo) {
242 err = -EINVAL;
243 goto unpin;
244 }
245
246 /**
247 * If the host1x is not attached to an IOMMU, there is no need
248 * to map the buffer object for the host1x, since the physical
249 * address can simply be used.
250 */
251 if (!iommu_get_domain_for_dev(host->dev))
252 phys = &phys_addr;
253 else
254 phys = NULL;
255
256 sgt = host1x_bo_pin(host->dev, g->bo, phys);
257 if (IS_ERR(sgt)) {
258 err = PTR_ERR(sgt);
259 goto put;
260 }
261
262 if (host->domain) {
263 for_each_sgtable_sg(sgt, sg, j)
264 gather_size += sg->length;
265 gather_size = iova_align(&host->iova, gather_size);
266
267 shift = iova_shift(&host->iova);
268 alloc = alloc_iova(&host->iova, gather_size >> shift,
269 host->iova_end >> shift, true);
270 if (!alloc) {
271 err = -ENOMEM;
272 goto put;
273 }
274
275 err = iommu_map_sgtable(host->domain,
276 iova_dma_addr(&host->iova, alloc),
277 sgt, IOMMU_READ);
278 if (err == 0) {
279 __free_iova(&host->iova, alloc);
280 err = -EINVAL;
281 goto put;
282 }
283
284 job->unpins[job->num_unpins].size = gather_size;
285 phys_addr = iova_dma_addr(&host->iova, alloc);
286 } else if (sgt) {
287 err = dma_map_sgtable(host->dev, sgt, DMA_TO_DEVICE, 0);
288 if (err)
289 goto put;
290
291 job->unpins[job->num_unpins].dir = DMA_TO_DEVICE;
292 job->unpins[job->num_unpins].dev = host->dev;
293 phys_addr = sg_dma_address(sgt->sgl);
294 }
295
296 job->addr_phys[job->num_unpins] = phys_addr;
297 job->gather_addr_phys[i] = phys_addr;
298
299 job->unpins[job->num_unpins].bo = g->bo;
300 job->unpins[job->num_unpins].sgt = sgt;
301 job->num_unpins++;
302 }
303
304 return 0;
305
306 put:
307 host1x_bo_put(g->bo);
308 unpin:
309 host1x_job_unpin(job);
310 return err;
311 }
312
do_relocs(struct host1x_job * job,struct host1x_job_gather * g)313 static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
314 {
315 void *cmdbuf_addr = NULL;
316 struct host1x_bo *cmdbuf = g->bo;
317 unsigned int i;
318
319 /* pin & patch the relocs for one gather */
320 for (i = 0; i < job->num_relocs; i++) {
321 struct host1x_reloc *reloc = &job->relocs[i];
322 u32 reloc_addr = (job->reloc_addr_phys[i] +
323 reloc->target.offset) >> reloc->shift;
324 u32 *target;
325
326 /* skip all other gathers */
327 if (cmdbuf != reloc->cmdbuf.bo)
328 continue;
329
330 if (job->enable_firewall) {
331 target = (u32 *)job->gather_copy_mapped +
332 reloc->cmdbuf.offset / sizeof(u32) +
333 g->offset / sizeof(u32);
334 goto patch_reloc;
335 }
336
337 if (!cmdbuf_addr) {
338 cmdbuf_addr = host1x_bo_mmap(cmdbuf);
339
340 if (unlikely(!cmdbuf_addr)) {
341 pr_err("Could not map cmdbuf for relocation\n");
342 return -ENOMEM;
343 }
344 }
345
346 target = cmdbuf_addr + reloc->cmdbuf.offset;
347 patch_reloc:
348 *target = reloc_addr;
349 }
350
351 if (cmdbuf_addr)
352 host1x_bo_munmap(cmdbuf, cmdbuf_addr);
353
354 return 0;
355 }
356
check_reloc(struct host1x_reloc * reloc,struct host1x_bo * cmdbuf,unsigned int offset)357 static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
358 unsigned int offset)
359 {
360 offset *= sizeof(u32);
361
362 if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
363 return false;
364
365 /* relocation shift value validation isn't implemented yet */
366 if (reloc->shift)
367 return false;
368
369 return true;
370 }
371
372 struct host1x_firewall {
373 struct host1x_job *job;
374 struct device *dev;
375
376 unsigned int num_relocs;
377 struct host1x_reloc *reloc;
378
379 struct host1x_bo *cmdbuf;
380 unsigned int offset;
381
382 u32 words;
383 u32 class;
384 u32 reg;
385 u32 mask;
386 u32 count;
387 };
388
check_register(struct host1x_firewall * fw,unsigned long offset)389 static int check_register(struct host1x_firewall *fw, unsigned long offset)
390 {
391 if (!fw->job->is_addr_reg)
392 return 0;
393
394 if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
395 if (!fw->num_relocs)
396 return -EINVAL;
397
398 if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset))
399 return -EINVAL;
400
401 fw->num_relocs--;
402 fw->reloc++;
403 }
404
405 return 0;
406 }
407
check_class(struct host1x_firewall * fw,u32 class)408 static int check_class(struct host1x_firewall *fw, u32 class)
409 {
410 if (!fw->job->is_valid_class) {
411 if (fw->class != class)
412 return -EINVAL;
413 } else {
414 if (!fw->job->is_valid_class(fw->class))
415 return -EINVAL;
416 }
417
418 return 0;
419 }
420
check_mask(struct host1x_firewall * fw)421 static int check_mask(struct host1x_firewall *fw)
422 {
423 u32 mask = fw->mask;
424 u32 reg = fw->reg;
425 int ret;
426
427 while (mask) {
428 if (fw->words == 0)
429 return -EINVAL;
430
431 if (mask & 1) {
432 ret = check_register(fw, reg);
433 if (ret < 0)
434 return ret;
435
436 fw->words--;
437 fw->offset++;
438 }
439 mask >>= 1;
440 reg++;
441 }
442
443 return 0;
444 }
445
check_incr(struct host1x_firewall * fw)446 static int check_incr(struct host1x_firewall *fw)
447 {
448 u32 count = fw->count;
449 u32 reg = fw->reg;
450 int ret;
451
452 while (count) {
453 if (fw->words == 0)
454 return -EINVAL;
455
456 ret = check_register(fw, reg);
457 if (ret < 0)
458 return ret;
459
460 reg++;
461 fw->words--;
462 fw->offset++;
463 count--;
464 }
465
466 return 0;
467 }
468
check_nonincr(struct host1x_firewall * fw)469 static int check_nonincr(struct host1x_firewall *fw)
470 {
471 u32 count = fw->count;
472 int ret;
473
474 while (count) {
475 if (fw->words == 0)
476 return -EINVAL;
477
478 ret = check_register(fw, fw->reg);
479 if (ret < 0)
480 return ret;
481
482 fw->words--;
483 fw->offset++;
484 count--;
485 }
486
487 return 0;
488 }
489
validate(struct host1x_firewall * fw,struct host1x_job_gather * g)490 static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
491 {
492 u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
493 (g->offset / sizeof(u32));
494 u32 job_class = fw->class;
495 int err = 0;
496
497 fw->words = g->words;
498 fw->cmdbuf = g->bo;
499 fw->offset = 0;
500
501 while (fw->words && !err) {
502 u32 word = cmdbuf_base[fw->offset];
503 u32 opcode = (word & 0xf0000000) >> 28;
504
505 fw->mask = 0;
506 fw->reg = 0;
507 fw->count = 0;
508 fw->words--;
509 fw->offset++;
510
511 switch (opcode) {
512 case 0:
513 fw->class = word >> 6 & 0x3ff;
514 fw->mask = word & 0x3f;
515 fw->reg = word >> 16 & 0xfff;
516 err = check_class(fw, job_class);
517 if (!err)
518 err = check_mask(fw);
519 if (err)
520 goto out;
521 break;
522 case 1:
523 fw->reg = word >> 16 & 0xfff;
524 fw->count = word & 0xffff;
525 err = check_incr(fw);
526 if (err)
527 goto out;
528 break;
529
530 case 2:
531 fw->reg = word >> 16 & 0xfff;
532 fw->count = word & 0xffff;
533 err = check_nonincr(fw);
534 if (err)
535 goto out;
536 break;
537
538 case 3:
539 fw->mask = word & 0xffff;
540 fw->reg = word >> 16 & 0xfff;
541 err = check_mask(fw);
542 if (err)
543 goto out;
544 break;
545 case 4:
546 case 14:
547 break;
548 default:
549 err = -EINVAL;
550 break;
551 }
552 }
553
554 out:
555 return err;
556 }
557
copy_gathers(struct device * host,struct host1x_job * job,struct device * dev)558 static inline int copy_gathers(struct device *host, struct host1x_job *job,
559 struct device *dev)
560 {
561 struct host1x_firewall fw;
562 size_t size = 0;
563 size_t offset = 0;
564 unsigned int i;
565
566 fw.job = job;
567 fw.dev = dev;
568 fw.reloc = job->relocs;
569 fw.num_relocs = job->num_relocs;
570 fw.class = job->class;
571
572 for (i = 0; i < job->num_cmds; i++) {
573 struct host1x_job_gather *g;
574
575 if (job->cmds[i].is_wait)
576 continue;
577
578 g = &job->cmds[i].gather;
579
580 size += g->words * sizeof(u32);
581 }
582
583 /*
584 * Try a non-blocking allocation from a higher priority pools first,
585 * as awaiting for the allocation here is a major performance hit.
586 */
587 job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy,
588 GFP_NOWAIT);
589
590 /* the higher priority allocation failed, try the generic-blocking */
591 if (!job->gather_copy_mapped)
592 job->gather_copy_mapped = dma_alloc_wc(host, size,
593 &job->gather_copy,
594 GFP_KERNEL);
595 if (!job->gather_copy_mapped)
596 return -ENOMEM;
597
598 job->gather_copy_size = size;
599
600 for (i = 0; i < job->num_cmds; i++) {
601 struct host1x_job_gather *g;
602 void *gather;
603
604 if (job->cmds[i].is_wait)
605 continue;
606 g = &job->cmds[i].gather;
607
608 /* Copy the gather */
609 gather = host1x_bo_mmap(g->bo);
610 memcpy(job->gather_copy_mapped + offset, gather + g->offset,
611 g->words * sizeof(u32));
612 host1x_bo_munmap(g->bo, gather);
613
614 /* Store the location in the buffer */
615 g->base = job->gather_copy;
616 g->offset = offset;
617
618 /* Validate the job */
619 if (validate(&fw, g))
620 return -EINVAL;
621
622 offset += g->words * sizeof(u32);
623 }
624
625 /* No relocs should remain at this point */
626 if (fw.num_relocs)
627 return -EINVAL;
628
629 return 0;
630 }
631
host1x_job_pin(struct host1x_job * job,struct device * dev)632 int host1x_job_pin(struct host1x_job *job, struct device *dev)
633 {
634 int err;
635 unsigned int i, j;
636 struct host1x *host = dev_get_drvdata(dev->parent);
637
638 /* pin memory */
639 err = pin_job(host, job);
640 if (err)
641 goto out;
642
643 if (job->enable_firewall) {
644 err = copy_gathers(host->dev, job, dev);
645 if (err)
646 goto out;
647 }
648
649 /* patch gathers */
650 for (i = 0; i < job->num_cmds; i++) {
651 struct host1x_job_gather *g;
652
653 if (job->cmds[i].is_wait)
654 continue;
655 g = &job->cmds[i].gather;
656
657 /* process each gather mem only once */
658 if (g->handled)
659 continue;
660
661 /* copy_gathers() sets gathers base if firewall is enabled */
662 if (!job->enable_firewall)
663 g->base = job->gather_addr_phys[i];
664
665 for (j = i + 1; j < job->num_cmds; j++) {
666 if (!job->cmds[j].is_wait &&
667 job->cmds[j].gather.bo == g->bo) {
668 job->cmds[j].gather.handled = true;
669 job->cmds[j].gather.base = g->base;
670 }
671 }
672
673 err = do_relocs(job, g);
674 if (err)
675 break;
676 }
677
678 out:
679 if (err)
680 host1x_job_unpin(job);
681 wmb();
682
683 return err;
684 }
685 EXPORT_SYMBOL(host1x_job_pin);
686
host1x_job_unpin(struct host1x_job * job)687 void host1x_job_unpin(struct host1x_job *job)
688 {
689 struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
690 unsigned int i;
691
692 for (i = 0; i < job->num_unpins; i++) {
693 struct host1x_job_unpin_data *unpin = &job->unpins[i];
694 struct device *dev = unpin->dev ?: host->dev;
695 struct sg_table *sgt = unpin->sgt;
696
697 if (!job->enable_firewall && unpin->size && host->domain) {
698 iommu_unmap(host->domain, job->addr_phys[i],
699 unpin->size);
700 free_iova(&host->iova,
701 iova_pfn(&host->iova, job->addr_phys[i]));
702 }
703
704 if (unpin->dev && sgt)
705 dma_unmap_sgtable(unpin->dev, sgt, unpin->dir, 0);
706
707 host1x_bo_unpin(dev, unpin->bo, sgt);
708 host1x_bo_put(unpin->bo);
709 }
710
711 job->num_unpins = 0;
712
713 if (job->gather_copy_size)
714 dma_free_wc(host->dev, job->gather_copy_size,
715 job->gather_copy_mapped, job->gather_copy);
716 }
717 EXPORT_SYMBOL(host1x_job_unpin);
718
719 /*
720 * Debug routine used to dump job entries
721 */
host1x_job_dump(struct device * dev,struct host1x_job * job)722 void host1x_job_dump(struct device *dev, struct host1x_job *job)
723 {
724 dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt->id);
725 dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end);
726 dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get);
727 dev_dbg(dev, " TIMEOUT %d\n", job->timeout);
728 dev_dbg(dev, " NUM_SLOTS %d\n", job->num_slots);
729 dev_dbg(dev, " NUM_HANDLES %d\n", job->num_unpins);
730 }
731