• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright © 2014 Broadcom
3   *
4   * Permission is hereby granted, free of charge, to any person obtaining a
5   * copy of this software and associated documentation files (the "Software"),
6   * to deal in the Software without restriction, including without limitation
7   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8   * and/or sell copies of the Software, and to permit persons to whom the
9   * Software is furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice (including the next
12   * paragraph) shall be included in all copies or substantial portions of the
13   * Software.
14   *
15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21   * IN THE SOFTWARE.
22   */
23  
24  #include <linux/module.h>
25  #include <linux/platform_device.h>
26  #include <linux/pm_runtime.h>
27  #include <linux/device.h>
28  #include <linux/io.h>
29  #include <linux/sched/signal.h>
30  
31  #include "uapi/drm/vc4_drm.h"
32  #include "vc4_drv.h"
33  #include "vc4_regs.h"
34  #include "vc4_trace.h"
35  
36  static void
vc4_queue_hangcheck(struct drm_device * dev)37  vc4_queue_hangcheck(struct drm_device *dev)
38  {
39  	struct vc4_dev *vc4 = to_vc4_dev(dev);
40  
41  	mod_timer(&vc4->hangcheck.timer,
42  		  round_jiffies_up(jiffies + msecs_to_jiffies(100)));
43  }
44  
45  struct vc4_hang_state {
46  	struct drm_vc4_get_hang_state user_state;
47  
48  	u32 bo_count;
49  	struct drm_gem_object **bo;
50  };
51  
52  static void
vc4_free_hang_state(struct drm_device * dev,struct vc4_hang_state * state)53  vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
54  {
55  	unsigned int i;
56  
57  	for (i = 0; i < state->user_state.bo_count; i++)
58  		drm_gem_object_put_unlocked(state->bo[i]);
59  
60  	kfree(state);
61  }
62  
63  int
vc4_get_hang_state_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)64  vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
65  			 struct drm_file *file_priv)
66  {
67  	struct drm_vc4_get_hang_state *get_state = data;
68  	struct drm_vc4_get_hang_state_bo *bo_state;
69  	struct vc4_hang_state *kernel_state;
70  	struct drm_vc4_get_hang_state *state;
71  	struct vc4_dev *vc4 = to_vc4_dev(dev);
72  	unsigned long irqflags;
73  	u32 i;
74  	int ret = 0;
75  
76  	spin_lock_irqsave(&vc4->job_lock, irqflags);
77  	kernel_state = vc4->hang_state;
78  	if (!kernel_state) {
79  		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
80  		return -ENOENT;
81  	}
82  	state = &kernel_state->user_state;
83  
84  	/* If the user's array isn't big enough, just return the
85  	 * required array size.
86  	 */
87  	if (get_state->bo_count < state->bo_count) {
88  		get_state->bo_count = state->bo_count;
89  		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
90  		return 0;
91  	}
92  
93  	vc4->hang_state = NULL;
94  	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
95  
96  	/* Save the user's BO pointer, so we don't stomp it with the memcpy. */
97  	state->bo = get_state->bo;
98  	memcpy(get_state, state, sizeof(*state));
99  
100  	bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
101  	if (!bo_state) {
102  		ret = -ENOMEM;
103  		goto err_free;
104  	}
105  
106  	for (i = 0; i < state->bo_count; i++) {
107  		struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
108  		u32 handle;
109  
110  		ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
111  					    &handle);
112  
113  		if (ret) {
114  			state->bo_count = i;
115  			goto err_delete_handle;
116  		}
117  		bo_state[i].handle = handle;
118  		bo_state[i].paddr = vc4_bo->base.paddr;
119  		bo_state[i].size = vc4_bo->base.base.size;
120  	}
121  
122  	if (copy_to_user(u64_to_user_ptr(get_state->bo),
123  			 bo_state,
124  			 state->bo_count * sizeof(*bo_state)))
125  		ret = -EFAULT;
126  
127  err_delete_handle:
128  	if (ret) {
129  		for (i = 0; i < state->bo_count; i++)
130  			drm_gem_handle_delete(file_priv, bo_state[i].handle);
131  	}
132  
133  err_free:
134  	vc4_free_hang_state(dev, kernel_state);
135  	kfree(bo_state);
136  
137  	return ret;
138  }
139  
140  static void
vc4_save_hang_state(struct drm_device * dev)141  vc4_save_hang_state(struct drm_device *dev)
142  {
143  	struct vc4_dev *vc4 = to_vc4_dev(dev);
144  	struct drm_vc4_get_hang_state *state;
145  	struct vc4_hang_state *kernel_state;
146  	struct vc4_exec_info *exec[2];
147  	struct vc4_bo *bo;
148  	unsigned long irqflags;
149  	unsigned int i, j, k, unref_list_count;
150  
151  	kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL);
152  	if (!kernel_state)
153  		return;
154  
155  	state = &kernel_state->user_state;
156  
157  	spin_lock_irqsave(&vc4->job_lock, irqflags);
158  	exec[0] = vc4_first_bin_job(vc4);
159  	exec[1] = vc4_first_render_job(vc4);
160  	if (!exec[0] && !exec[1]) {
161  		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
162  		return;
163  	}
164  
165  	/* Get the bos from both binner and renderer into hang state. */
166  	state->bo_count = 0;
167  	for (i = 0; i < 2; i++) {
168  		if (!exec[i])
169  			continue;
170  
171  		unref_list_count = 0;
172  		list_for_each_entry(bo, &exec[i]->unref_list, unref_head)
173  			unref_list_count++;
174  		state->bo_count += exec[i]->bo_count + unref_list_count;
175  	}
176  
177  	kernel_state->bo = kcalloc(state->bo_count,
178  				   sizeof(*kernel_state->bo), GFP_ATOMIC);
179  
180  	if (!kernel_state->bo) {
181  		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
182  		return;
183  	}
184  
185  	k = 0;
186  	for (i = 0; i < 2; i++) {
187  		if (!exec[i])
188  			continue;
189  
190  		for (j = 0; j < exec[i]->bo_count; j++) {
191  			drm_gem_object_get(&exec[i]->bo[j]->base);
192  			kernel_state->bo[k++] = &exec[i]->bo[j]->base;
193  		}
194  
195  		list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
196  			drm_gem_object_get(&bo->base.base);
197  			kernel_state->bo[k++] = &bo->base.base;
198  		}
199  	}
200  
201  	WARN_ON_ONCE(k != state->bo_count);
202  
203  	if (exec[0])
204  		state->start_bin = exec[0]->ct0ca;
205  	if (exec[1])
206  		state->start_render = exec[1]->ct1ca;
207  
208  	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
209  
210  	state->ct0ca = V3D_READ(V3D_CTNCA(0));
211  	state->ct0ea = V3D_READ(V3D_CTNEA(0));
212  
213  	state->ct1ca = V3D_READ(V3D_CTNCA(1));
214  	state->ct1ea = V3D_READ(V3D_CTNEA(1));
215  
216  	state->ct0cs = V3D_READ(V3D_CTNCS(0));
217  	state->ct1cs = V3D_READ(V3D_CTNCS(1));
218  
219  	state->ct0ra0 = V3D_READ(V3D_CT00RA0);
220  	state->ct1ra0 = V3D_READ(V3D_CT01RA0);
221  
222  	state->bpca = V3D_READ(V3D_BPCA);
223  	state->bpcs = V3D_READ(V3D_BPCS);
224  	state->bpoa = V3D_READ(V3D_BPOA);
225  	state->bpos = V3D_READ(V3D_BPOS);
226  
227  	state->vpmbase = V3D_READ(V3D_VPMBASE);
228  
229  	state->dbge = V3D_READ(V3D_DBGE);
230  	state->fdbgo = V3D_READ(V3D_FDBGO);
231  	state->fdbgb = V3D_READ(V3D_FDBGB);
232  	state->fdbgr = V3D_READ(V3D_FDBGR);
233  	state->fdbgs = V3D_READ(V3D_FDBGS);
234  	state->errstat = V3D_READ(V3D_ERRSTAT);
235  
236  	spin_lock_irqsave(&vc4->job_lock, irqflags);
237  	if (vc4->hang_state) {
238  		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
239  		vc4_free_hang_state(dev, kernel_state);
240  	} else {
241  		vc4->hang_state = kernel_state;
242  		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
243  	}
244  }
245  
246  static void
vc4_reset(struct drm_device * dev)247  vc4_reset(struct drm_device *dev)
248  {
249  	struct vc4_dev *vc4 = to_vc4_dev(dev);
250  
251  	DRM_INFO("Resetting GPU.\n");
252  
253  	mutex_lock(&vc4->power_lock);
254  	if (vc4->power_refcount) {
255  		/* Power the device off and back on the by dropping the
256  		 * reference on runtime PM.
257  		 */
258  		pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev);
259  		pm_runtime_get_sync(&vc4->v3d->pdev->dev);
260  	}
261  	mutex_unlock(&vc4->power_lock);
262  
263  	vc4_irq_reset(dev);
264  
265  	/* Rearm the hangcheck -- another job might have been waiting
266  	 * for our hung one to get kicked off, and vc4_irq_reset()
267  	 * would have started it.
268  	 */
269  	vc4_queue_hangcheck(dev);
270  }
271  
272  static void
vc4_reset_work(struct work_struct * work)273  vc4_reset_work(struct work_struct *work)
274  {
275  	struct vc4_dev *vc4 =
276  		container_of(work, struct vc4_dev, hangcheck.reset_work);
277  
278  	vc4_save_hang_state(vc4->dev);
279  
280  	vc4_reset(vc4->dev);
281  }
282  
283  static void
vc4_hangcheck_elapsed(unsigned long data)284  vc4_hangcheck_elapsed(unsigned long data)
285  {
286  	struct drm_device *dev = (struct drm_device *)data;
287  	struct vc4_dev *vc4 = to_vc4_dev(dev);
288  	uint32_t ct0ca, ct1ca;
289  	unsigned long irqflags;
290  	struct vc4_exec_info *bin_exec, *render_exec;
291  
292  	spin_lock_irqsave(&vc4->job_lock, irqflags);
293  
294  	bin_exec = vc4_first_bin_job(vc4);
295  	render_exec = vc4_first_render_job(vc4);
296  
297  	/* If idle, we can stop watching for hangs. */
298  	if (!bin_exec && !render_exec) {
299  		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
300  		return;
301  	}
302  
303  	ct0ca = V3D_READ(V3D_CTNCA(0));
304  	ct1ca = V3D_READ(V3D_CTNCA(1));
305  
306  	/* If we've made any progress in execution, rearm the timer
307  	 * and wait.
308  	 */
309  	if ((bin_exec && ct0ca != bin_exec->last_ct0ca) ||
310  	    (render_exec && ct1ca != render_exec->last_ct1ca)) {
311  		if (bin_exec)
312  			bin_exec->last_ct0ca = ct0ca;
313  		if (render_exec)
314  			render_exec->last_ct1ca = ct1ca;
315  		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
316  		vc4_queue_hangcheck(dev);
317  		return;
318  	}
319  
320  	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
321  
322  	/* We've gone too long with no progress, reset.  This has to
323  	 * be done from a work struct, since resetting can sleep and
324  	 * this timer hook isn't allowed to.
325  	 */
326  	schedule_work(&vc4->hangcheck.reset_work);
327  }
328  
329  static void
submit_cl(struct drm_device * dev,uint32_t thread,uint32_t start,uint32_t end)330  submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
331  {
332  	struct vc4_dev *vc4 = to_vc4_dev(dev);
333  
334  	/* Set the current and end address of the control list.
335  	 * Writing the end register is what starts the job.
336  	 */
337  	V3D_WRITE(V3D_CTNCA(thread), start);
338  	V3D_WRITE(V3D_CTNEA(thread), end);
339  }
340  
341  int
vc4_wait_for_seqno(struct drm_device * dev,uint64_t seqno,uint64_t timeout_ns,bool interruptible)342  vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
343  		   bool interruptible)
344  {
345  	struct vc4_dev *vc4 = to_vc4_dev(dev);
346  	int ret = 0;
347  	unsigned long timeout_expire;
348  	DEFINE_WAIT(wait);
349  
350  	if (vc4->finished_seqno >= seqno)
351  		return 0;
352  
353  	if (timeout_ns == 0)
354  		return -ETIME;
355  
356  	timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
357  
358  	trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
359  	for (;;) {
360  		prepare_to_wait(&vc4->job_wait_queue, &wait,
361  				interruptible ? TASK_INTERRUPTIBLE :
362  				TASK_UNINTERRUPTIBLE);
363  
364  		if (interruptible && signal_pending(current)) {
365  			ret = -ERESTARTSYS;
366  			break;
367  		}
368  
369  		if (vc4->finished_seqno >= seqno)
370  			break;
371  
372  		if (timeout_ns != ~0ull) {
373  			if (time_after_eq(jiffies, timeout_expire)) {
374  				ret = -ETIME;
375  				break;
376  			}
377  			schedule_timeout(timeout_expire - jiffies);
378  		} else {
379  			schedule();
380  		}
381  	}
382  
383  	finish_wait(&vc4->job_wait_queue, &wait);
384  	trace_vc4_wait_for_seqno_end(dev, seqno);
385  
386  	return ret;
387  }
388  
389  static void
vc4_flush_caches(struct drm_device * dev)390  vc4_flush_caches(struct drm_device *dev)
391  {
392  	struct vc4_dev *vc4 = to_vc4_dev(dev);
393  
394  	/* Flush the GPU L2 caches.  These caches sit on top of system
395  	 * L3 (the 128kb or so shared with the CPU), and are
396  	 * non-allocating in the L3.
397  	 */
398  	V3D_WRITE(V3D_L2CACTL,
399  		  V3D_L2CACTL_L2CCLR);
400  
401  	V3D_WRITE(V3D_SLCACTL,
402  		  VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
403  		  VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
404  		  VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
405  		  VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
406  }
407  
408  /* Sets the registers for the next job to be actually be executed in
409   * the hardware.
410   *
411   * The job_lock should be held during this.
412   */
413  void
vc4_submit_next_bin_job(struct drm_device * dev)414  vc4_submit_next_bin_job(struct drm_device *dev)
415  {
416  	struct vc4_dev *vc4 = to_vc4_dev(dev);
417  	struct vc4_exec_info *exec;
418  
419  again:
420  	exec = vc4_first_bin_job(vc4);
421  	if (!exec)
422  		return;
423  
424  	vc4_flush_caches(dev);
425  
426  	/* Either put the job in the binner if it uses the binner, or
427  	 * immediately move it to the to-be-rendered queue.
428  	 */
429  	if (exec->ct0ca != exec->ct0ea) {
430  		submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
431  	} else {
432  		vc4_move_job_to_render(dev, exec);
433  		goto again;
434  	}
435  }
436  
437  void
vc4_submit_next_render_job(struct drm_device * dev)438  vc4_submit_next_render_job(struct drm_device *dev)
439  {
440  	struct vc4_dev *vc4 = to_vc4_dev(dev);
441  	struct vc4_exec_info *exec = vc4_first_render_job(vc4);
442  
443  	if (!exec)
444  		return;
445  
446  	submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
447  }
448  
449  void
vc4_move_job_to_render(struct drm_device * dev,struct vc4_exec_info * exec)450  vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec)
451  {
452  	struct vc4_dev *vc4 = to_vc4_dev(dev);
453  	bool was_empty = list_empty(&vc4->render_job_list);
454  
455  	list_move_tail(&exec->head, &vc4->render_job_list);
456  	if (was_empty)
457  		vc4_submit_next_render_job(dev);
458  }
459  
460  static void
vc4_update_bo_seqnos(struct vc4_exec_info * exec,uint64_t seqno)461  vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
462  {
463  	struct vc4_bo *bo;
464  	unsigned i;
465  
466  	for (i = 0; i < exec->bo_count; i++) {
467  		bo = to_vc4_bo(&exec->bo[i]->base);
468  		bo->seqno = seqno;
469  
470  		reservation_object_add_shared_fence(bo->resv, exec->fence);
471  	}
472  
473  	list_for_each_entry(bo, &exec->unref_list, unref_head) {
474  		bo->seqno = seqno;
475  	}
476  
477  	for (i = 0; i < exec->rcl_write_bo_count; i++) {
478  		bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
479  		bo->write_seqno = seqno;
480  
481  		reservation_object_add_excl_fence(bo->resv, exec->fence);
482  	}
483  }
484  
485  static void
vc4_unlock_bo_reservations(struct drm_device * dev,struct vc4_exec_info * exec,struct ww_acquire_ctx * acquire_ctx)486  vc4_unlock_bo_reservations(struct drm_device *dev,
487  			   struct vc4_exec_info *exec,
488  			   struct ww_acquire_ctx *acquire_ctx)
489  {
490  	int i;
491  
492  	for (i = 0; i < exec->bo_count; i++) {
493  		struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
494  
495  		ww_mutex_unlock(&bo->resv->lock);
496  	}
497  
498  	ww_acquire_fini(acquire_ctx);
499  }
500  
501  /* Takes the reservation lock on all the BOs being referenced, so that
502   * at queue submit time we can update the reservations.
503   *
504   * We don't lock the RCL the tile alloc/state BOs, or overflow memory
505   * (all of which are on exec->unref_list).  They're entirely private
506   * to vc4, so we don't attach dma-buf fences to them.
507   */
508  static int
vc4_lock_bo_reservations(struct drm_device * dev,struct vc4_exec_info * exec,struct ww_acquire_ctx * acquire_ctx)509  vc4_lock_bo_reservations(struct drm_device *dev,
510  			 struct vc4_exec_info *exec,
511  			 struct ww_acquire_ctx *acquire_ctx)
512  {
513  	int contended_lock = -1;
514  	int i, ret;
515  	struct vc4_bo *bo;
516  
517  	ww_acquire_init(acquire_ctx, &reservation_ww_class);
518  
519  retry:
520  	if (contended_lock != -1) {
521  		bo = to_vc4_bo(&exec->bo[contended_lock]->base);
522  		ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
523  						       acquire_ctx);
524  		if (ret) {
525  			ww_acquire_done(acquire_ctx);
526  			return ret;
527  		}
528  	}
529  
530  	for (i = 0; i < exec->bo_count; i++) {
531  		if (i == contended_lock)
532  			continue;
533  
534  		bo = to_vc4_bo(&exec->bo[i]->base);
535  
536  		ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
537  		if (ret) {
538  			int j;
539  
540  			for (j = 0; j < i; j++) {
541  				bo = to_vc4_bo(&exec->bo[j]->base);
542  				ww_mutex_unlock(&bo->resv->lock);
543  			}
544  
545  			if (contended_lock != -1 && contended_lock >= i) {
546  				bo = to_vc4_bo(&exec->bo[contended_lock]->base);
547  
548  				ww_mutex_unlock(&bo->resv->lock);
549  			}
550  
551  			if (ret == -EDEADLK) {
552  				contended_lock = i;
553  				goto retry;
554  			}
555  
556  			ww_acquire_done(acquire_ctx);
557  			return ret;
558  		}
559  	}
560  
561  	ww_acquire_done(acquire_ctx);
562  
563  	/* Reserve space for our shared (read-only) fence references,
564  	 * before we commit the CL to the hardware.
565  	 */
566  	for (i = 0; i < exec->bo_count; i++) {
567  		bo = to_vc4_bo(&exec->bo[i]->base);
568  
569  		ret = reservation_object_reserve_shared(bo->resv);
570  		if (ret) {
571  			vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
572  			return ret;
573  		}
574  	}
575  
576  	return 0;
577  }
578  
579  /* Queues a struct vc4_exec_info for execution.  If no job is
580   * currently executing, then submits it.
581   *
582   * Unlike most GPUs, our hardware only handles one command list at a
583   * time.  To queue multiple jobs at once, we'd need to edit the
584   * previous command list to have a jump to the new one at the end, and
585   * then bump the end address.  That's a change for a later date,
586   * though.
587   */
588  static int
vc4_queue_submit(struct drm_device * dev,struct vc4_exec_info * exec,struct ww_acquire_ctx * acquire_ctx)589  vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
590  		 struct ww_acquire_ctx *acquire_ctx)
591  {
592  	struct vc4_dev *vc4 = to_vc4_dev(dev);
593  	uint64_t seqno;
594  	unsigned long irqflags;
595  	struct vc4_fence *fence;
596  
597  	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
598  	if (!fence)
599  		return -ENOMEM;
600  	fence->dev = dev;
601  
602  	spin_lock_irqsave(&vc4->job_lock, irqflags);
603  
604  	seqno = ++vc4->emit_seqno;
605  	exec->seqno = seqno;
606  
607  	dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock,
608  		       vc4->dma_fence_context, exec->seqno);
609  	fence->seqno = exec->seqno;
610  	exec->fence = &fence->base;
611  
612  	vc4_update_bo_seqnos(exec, seqno);
613  
614  	vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
615  
616  	list_add_tail(&exec->head, &vc4->bin_job_list);
617  
618  	/* If no job was executing, kick ours off.  Otherwise, it'll
619  	 * get started when the previous job's flush done interrupt
620  	 * occurs.
621  	 */
622  	if (vc4_first_bin_job(vc4) == exec) {
623  		vc4_submit_next_bin_job(dev);
624  		vc4_queue_hangcheck(dev);
625  	}
626  
627  	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
628  
629  	return 0;
630  }
631  
632  /**
633   * vc4_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
634   * referenced by the job.
635   * @dev: DRM device
636   * @file_priv: DRM file for this fd
637   * @exec: V3D job being set up
638   *
639   * The command validator needs to reference BOs by their index within
640   * the submitted job's BO list.  This does the validation of the job's
641   * BO list and reference counting for the lifetime of the job.
642   *
643   * Note that this function doesn't need to unreference the BOs on
644   * failure, because that will happen at vc4_complete_exec() time.
645   */
646  static int
vc4_cl_lookup_bos(struct drm_device * dev,struct drm_file * file_priv,struct vc4_exec_info * exec)647  vc4_cl_lookup_bos(struct drm_device *dev,
648  		  struct drm_file *file_priv,
649  		  struct vc4_exec_info *exec)
650  {
651  	struct drm_vc4_submit_cl *args = exec->args;
652  	uint32_t *handles;
653  	int ret = 0;
654  	int i;
655  
656  	exec->bo_count = args->bo_handle_count;
657  
658  	if (!exec->bo_count) {
659  		/* See comment on bo_index for why we have to check
660  		 * this.
661  		 */
662  		DRM_DEBUG("Rendering requires BOs to validate\n");
663  		return -EINVAL;
664  	}
665  
666  	exec->bo = kvmalloc_array(exec->bo_count,
667  				    sizeof(struct drm_gem_cma_object *),
668  				    GFP_KERNEL | __GFP_ZERO);
669  	if (!exec->bo) {
670  		DRM_ERROR("Failed to allocate validated BO pointers\n");
671  		return -ENOMEM;
672  	}
673  
674  	handles = kvmalloc_array(exec->bo_count, sizeof(uint32_t), GFP_KERNEL);
675  	if (!handles) {
676  		ret = -ENOMEM;
677  		DRM_ERROR("Failed to allocate incoming GEM handles\n");
678  		goto fail;
679  	}
680  
681  	if (copy_from_user(handles, u64_to_user_ptr(args->bo_handles),
682  			   exec->bo_count * sizeof(uint32_t))) {
683  		ret = -EFAULT;
684  		DRM_ERROR("Failed to copy in GEM handles\n");
685  		goto fail;
686  	}
687  
688  	spin_lock(&file_priv->table_lock);
689  	for (i = 0; i < exec->bo_count; i++) {
690  		struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
691  						     handles[i]);
692  		if (!bo) {
693  			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
694  				  i, handles[i]);
695  			ret = -EINVAL;
696  			spin_unlock(&file_priv->table_lock);
697  			goto fail;
698  		}
699  		drm_gem_object_get(bo);
700  		exec->bo[i] = (struct drm_gem_cma_object *)bo;
701  	}
702  	spin_unlock(&file_priv->table_lock);
703  
704  fail:
705  	kvfree(handles);
706  	return ret;
707  }
708  
709  static int
vc4_get_bcl(struct drm_device * dev,struct vc4_exec_info * exec)710  vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
711  {
712  	struct drm_vc4_submit_cl *args = exec->args;
713  	void *temp = NULL;
714  	void *bin;
715  	int ret = 0;
716  	uint32_t bin_offset = 0;
717  	uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
718  					     16);
719  	uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
720  	uint32_t exec_size = uniforms_offset + args->uniforms_size;
721  	uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
722  					  args->shader_rec_count);
723  	struct vc4_bo *bo;
724  
725  	if (shader_rec_offset < args->bin_cl_size ||
726  	    uniforms_offset < shader_rec_offset ||
727  	    exec_size < uniforms_offset ||
728  	    args->shader_rec_count >= (UINT_MAX /
729  					  sizeof(struct vc4_shader_state)) ||
730  	    temp_size < exec_size) {
731  		DRM_DEBUG("overflow in exec arguments\n");
732  		ret = -EINVAL;
733  		goto fail;
734  	}
735  
736  	/* Allocate space where we'll store the copied in user command lists
737  	 * and shader records.
738  	 *
739  	 * We don't just copy directly into the BOs because we need to
740  	 * read the contents back for validation, and I think the
741  	 * bo->vaddr is uncached access.
742  	 */
743  	temp = kvmalloc_array(temp_size, 1, GFP_KERNEL);
744  	if (!temp) {
745  		DRM_ERROR("Failed to allocate storage for copying "
746  			  "in bin/render CLs.\n");
747  		ret = -ENOMEM;
748  		goto fail;
749  	}
750  	bin = temp + bin_offset;
751  	exec->shader_rec_u = temp + shader_rec_offset;
752  	exec->uniforms_u = temp + uniforms_offset;
753  	exec->shader_state = temp + exec_size;
754  	exec->shader_state_size = args->shader_rec_count;
755  
756  	if (copy_from_user(bin,
757  			   u64_to_user_ptr(args->bin_cl),
758  			   args->bin_cl_size)) {
759  		ret = -EFAULT;
760  		goto fail;
761  	}
762  
763  	if (copy_from_user(exec->shader_rec_u,
764  			   u64_to_user_ptr(args->shader_rec),
765  			   args->shader_rec_size)) {
766  		ret = -EFAULT;
767  		goto fail;
768  	}
769  
770  	if (copy_from_user(exec->uniforms_u,
771  			   u64_to_user_ptr(args->uniforms),
772  			   args->uniforms_size)) {
773  		ret = -EFAULT;
774  		goto fail;
775  	}
776  
777  	bo = vc4_bo_create(dev, exec_size, true, VC4_BO_TYPE_BCL);
778  	if (IS_ERR(bo)) {
779  		DRM_ERROR("Couldn't allocate BO for binning\n");
780  		ret = PTR_ERR(bo);
781  		goto fail;
782  	}
783  	exec->exec_bo = &bo->base;
784  
785  	list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
786  		      &exec->unref_list);
787  
788  	exec->ct0ca = exec->exec_bo->paddr + bin_offset;
789  
790  	exec->bin_u = bin;
791  
792  	exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
793  	exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
794  	exec->shader_rec_size = args->shader_rec_size;
795  
796  	exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
797  	exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
798  	exec->uniforms_size = args->uniforms_size;
799  
800  	ret = vc4_validate_bin_cl(dev,
801  				  exec->exec_bo->vaddr + bin_offset,
802  				  bin,
803  				  exec);
804  	if (ret)
805  		goto fail;
806  
807  	ret = vc4_validate_shader_recs(dev, exec);
808  	if (ret)
809  		goto fail;
810  
811  	/* Block waiting on any previous rendering into the CS's VBO,
812  	 * IB, or textures, so that pixels are actually written by the
813  	 * time we try to read them.
814  	 */
815  	ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true);
816  
817  fail:
818  	kvfree(temp);
819  	return ret;
820  }
821  
822  static void
vc4_complete_exec(struct drm_device * dev,struct vc4_exec_info * exec)823  vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
824  {
825  	struct vc4_dev *vc4 = to_vc4_dev(dev);
826  	unsigned long irqflags;
827  	unsigned i;
828  
829  	/* If we got force-completed because of GPU reset rather than
830  	 * through our IRQ handler, signal the fence now.
831  	 */
832  	if (exec->fence) {
833  		dma_fence_signal(exec->fence);
834  		dma_fence_put(exec->fence);
835  	}
836  
837  	if (exec->bo) {
838  		for (i = 0; i < exec->bo_count; i++)
839  			drm_gem_object_put_unlocked(&exec->bo[i]->base);
840  		kvfree(exec->bo);
841  	}
842  
843  	while (!list_empty(&exec->unref_list)) {
844  		struct vc4_bo *bo = list_first_entry(&exec->unref_list,
845  						     struct vc4_bo, unref_head);
846  		list_del(&bo->unref_head);
847  		drm_gem_object_put_unlocked(&bo->base.base);
848  	}
849  
850  	/* Free up the allocation of any bin slots we used. */
851  	spin_lock_irqsave(&vc4->job_lock, irqflags);
852  	vc4->bin_alloc_used &= ~exec->bin_slots;
853  	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
854  
855  	mutex_lock(&vc4->power_lock);
856  	if (--vc4->power_refcount == 0) {
857  		pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
858  		pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
859  	}
860  	mutex_unlock(&vc4->power_lock);
861  
862  	kfree(exec);
863  }
864  
865  void
vc4_job_handle_completed(struct vc4_dev * vc4)866  vc4_job_handle_completed(struct vc4_dev *vc4)
867  {
868  	unsigned long irqflags;
869  	struct vc4_seqno_cb *cb, *cb_temp;
870  
871  	spin_lock_irqsave(&vc4->job_lock, irqflags);
872  	while (!list_empty(&vc4->job_done_list)) {
873  		struct vc4_exec_info *exec =
874  			list_first_entry(&vc4->job_done_list,
875  					 struct vc4_exec_info, head);
876  		list_del(&exec->head);
877  
878  		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
879  		vc4_complete_exec(vc4->dev, exec);
880  		spin_lock_irqsave(&vc4->job_lock, irqflags);
881  	}
882  
883  	list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
884  		if (cb->seqno <= vc4->finished_seqno) {
885  			list_del_init(&cb->work.entry);
886  			schedule_work(&cb->work);
887  		}
888  	}
889  
890  	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
891  }
892  
vc4_seqno_cb_work(struct work_struct * work)893  static void vc4_seqno_cb_work(struct work_struct *work)
894  {
895  	struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
896  
897  	cb->func(cb);
898  }
899  
vc4_queue_seqno_cb(struct drm_device * dev,struct vc4_seqno_cb * cb,uint64_t seqno,void (* func)(struct vc4_seqno_cb * cb))900  int vc4_queue_seqno_cb(struct drm_device *dev,
901  		       struct vc4_seqno_cb *cb, uint64_t seqno,
902  		       void (*func)(struct vc4_seqno_cb *cb))
903  {
904  	struct vc4_dev *vc4 = to_vc4_dev(dev);
905  	int ret = 0;
906  	unsigned long irqflags;
907  
908  	cb->func = func;
909  	INIT_WORK(&cb->work, vc4_seqno_cb_work);
910  
911  	spin_lock_irqsave(&vc4->job_lock, irqflags);
912  	if (seqno > vc4->finished_seqno) {
913  		cb->seqno = seqno;
914  		list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
915  	} else {
916  		schedule_work(&cb->work);
917  	}
918  	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
919  
920  	return ret;
921  }
922  
923  /* Scheduled when any job has been completed, this walks the list of
924   * jobs that had completed and unrefs their BOs and frees their exec
925   * structs.
926   */
927  static void
vc4_job_done_work(struct work_struct * work)928  vc4_job_done_work(struct work_struct *work)
929  {
930  	struct vc4_dev *vc4 =
931  		container_of(work, struct vc4_dev, job_done_work);
932  
933  	vc4_job_handle_completed(vc4);
934  }
935  
936  static int
vc4_wait_for_seqno_ioctl_helper(struct drm_device * dev,uint64_t seqno,uint64_t * timeout_ns)937  vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
938  				uint64_t seqno,
939  				uint64_t *timeout_ns)
940  {
941  	unsigned long start = jiffies;
942  	int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
943  
944  	if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
945  		uint64_t delta = jiffies_to_nsecs(jiffies - start);
946  
947  		if (*timeout_ns >= delta)
948  			*timeout_ns -= delta;
949  	}
950  
951  	return ret;
952  }
953  
954  int
vc4_wait_seqno_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)955  vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
956  		     struct drm_file *file_priv)
957  {
958  	struct drm_vc4_wait_seqno *args = data;
959  
960  	return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
961  					       &args->timeout_ns);
962  }
963  
964  int
vc4_wait_bo_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)965  vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
966  		  struct drm_file *file_priv)
967  {
968  	int ret;
969  	struct drm_vc4_wait_bo *args = data;
970  	struct drm_gem_object *gem_obj;
971  	struct vc4_bo *bo;
972  
973  	if (args->pad != 0)
974  		return -EINVAL;
975  
976  	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
977  	if (!gem_obj) {
978  		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
979  		return -EINVAL;
980  	}
981  	bo = to_vc4_bo(gem_obj);
982  
983  	ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
984  					      &args->timeout_ns);
985  
986  	drm_gem_object_put_unlocked(gem_obj);
987  	return ret;
988  }
989  
990  /**
991   * vc4_submit_cl_ioctl() - Submits a job (frame) to the VC4.
992   * @dev: DRM device
993   * @data: ioctl argument
994   * @file_priv: DRM file for this fd
995   *
996   * This is the main entrypoint for userspace to submit a 3D frame to
997   * the GPU.  Userspace provides the binner command list (if
998   * applicable), and the kernel sets up the render command list to draw
999   * to the framebuffer described in the ioctl, using the command lists
1000   * that the 3D engine's binner will produce.
1001   */
1002  int
vc4_submit_cl_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)1003  vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
1004  		    struct drm_file *file_priv)
1005  {
1006  	struct vc4_dev *vc4 = to_vc4_dev(dev);
1007  	struct drm_vc4_submit_cl *args = data;
1008  	struct vc4_exec_info *exec;
1009  	struct ww_acquire_ctx acquire_ctx;
1010  	int ret = 0;
1011  
1012  	if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
1013  			     VC4_SUBMIT_CL_FIXED_RCL_ORDER |
1014  			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
1015  			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
1016  		DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
1017  		return -EINVAL;
1018  	}
1019  
1020  	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
1021  	if (!exec) {
1022  		DRM_ERROR("malloc failure on exec struct\n");
1023  		return -ENOMEM;
1024  	}
1025  
1026  	mutex_lock(&vc4->power_lock);
1027  	if (vc4->power_refcount++ == 0) {
1028  		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
1029  		if (ret < 0) {
1030  			mutex_unlock(&vc4->power_lock);
1031  			vc4->power_refcount--;
1032  			kfree(exec);
1033  			return ret;
1034  		}
1035  	}
1036  	mutex_unlock(&vc4->power_lock);
1037  
1038  	exec->args = args;
1039  	INIT_LIST_HEAD(&exec->unref_list);
1040  
1041  	ret = vc4_cl_lookup_bos(dev, file_priv, exec);
1042  	if (ret)
1043  		goto fail;
1044  
1045  	if (exec->args->bin_cl_size != 0) {
1046  		ret = vc4_get_bcl(dev, exec);
1047  		if (ret)
1048  			goto fail;
1049  	} else {
1050  		exec->ct0ca = 0;
1051  		exec->ct0ea = 0;
1052  	}
1053  
1054  	ret = vc4_get_rcl(dev, exec);
1055  	if (ret)
1056  		goto fail;
1057  
1058  	ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx);
1059  	if (ret)
1060  		goto fail;
1061  
1062  	/* Clear this out of the struct we'll be putting in the queue,
1063  	 * since it's part of our stack.
1064  	 */
1065  	exec->args = NULL;
1066  
1067  	ret = vc4_queue_submit(dev, exec, &acquire_ctx);
1068  	if (ret)
1069  		goto fail;
1070  
1071  	/* Return the seqno for our job. */
1072  	args->seqno = vc4->emit_seqno;
1073  
1074  	return 0;
1075  
1076  fail:
1077  	vc4_complete_exec(vc4->dev, exec);
1078  
1079  	return ret;
1080  }
1081  
1082  void
vc4_gem_init(struct drm_device * dev)1083  vc4_gem_init(struct drm_device *dev)
1084  {
1085  	struct vc4_dev *vc4 = to_vc4_dev(dev);
1086  
1087  	vc4->dma_fence_context = dma_fence_context_alloc(1);
1088  
1089  	INIT_LIST_HEAD(&vc4->bin_job_list);
1090  	INIT_LIST_HEAD(&vc4->render_job_list);
1091  	INIT_LIST_HEAD(&vc4->job_done_list);
1092  	INIT_LIST_HEAD(&vc4->seqno_cb_list);
1093  	spin_lock_init(&vc4->job_lock);
1094  
1095  	INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
1096  	setup_timer(&vc4->hangcheck.timer,
1097  		    vc4_hangcheck_elapsed,
1098  		    (unsigned long)dev);
1099  
1100  	INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
1101  
1102  	mutex_init(&vc4->power_lock);
1103  }
1104  
1105  void
vc4_gem_destroy(struct drm_device * dev)1106  vc4_gem_destroy(struct drm_device *dev)
1107  {
1108  	struct vc4_dev *vc4 = to_vc4_dev(dev);
1109  
1110  	/* Waiting for exec to finish would need to be done before
1111  	 * unregistering V3D.
1112  	 */
1113  	WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
1114  
1115  	/* V3D should already have disabled its interrupt and cleared
1116  	 * the overflow allocation registers.  Now free the object.
1117  	 */
1118  	if (vc4->bin_bo) {
1119  		drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
1120  		vc4->bin_bo = NULL;
1121  	}
1122  
1123  	if (vc4->hang_state)
1124  		vc4_free_hang_state(dev, vc4->hang_state);
1125  }
1126