• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 #include <mali_kbase.h>
23 #include "mali_kbase_config_defaults.h"
24 #include <mali_kbase_ctx_sched.h>
25 #include <mali_kbase_reset_gpu.h>
26 #include <mali_kbase_as_fault_debugfs.h>
27 #include "mali_kbase_csf.h"
28 #include <tl/mali_kbase_tracepoints.h>
29 #include <backend/gpu/mali_kbase_pm_internal.h>
30 #include <linux/export.h>
31 #include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
32 #include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
33 #include <mali_kbase_hwaccess_time.h>
34 
35 /* Value to indicate that a queue group is not groups_to_schedule list */
36 #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
37 
38 /* This decides the upper limit on the waiting time for the Scheduler
39  * to exit the sleep state. Usually the value of autosuspend_delay is
40  * expected to be around 100 milli seconds.
41  */
42 #define MAX_AUTO_SUSPEND_DELAY_MS (5000)
43 
44 /* Maximum number of endpoints which may run tiler jobs. */
45 #define CSG_TILER_MAX ((u8)1)
46 
47 /* Maximum dynamic CSG slot priority value */
48 #define MAX_CSG_SLOT_PRIORITY ((u8)15)
49 
50 /* CSF scheduler time slice value */
51 #define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */
52 
53 /*
54  * CSF scheduler time threshold for converting "tock" requests into "tick" if
55  * they come too close to the end of a tick interval. This avoids scheduling
56  * twice in a row.
57  */
58 #define CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS \
59 	CSF_SCHEDULER_TIME_TICK_MS
60 
61 #define CSF_SCHEDULER_TIME_TICK_THRESHOLD_JIFFIES \
62 	msecs_to_jiffies(CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS)
63 
64 /* Nanoseconds per millisecond */
65 #define NS_PER_MS ((u64)1000 * 1000)
66 
67 /*
68  * CSF minimum time to reschedule for a new "tock" request. Bursts of "tock"
69  * requests are not serviced immediately, but shall wait for a minimum time in
70  * order to reduce load on the CSF scheduler thread.
71  */
72 #define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */
73 
74 /* CS suspended and is idle (empty ring buffer) */
75 #define CS_IDLE_FLAG (1 << 0)
76 
77 /* CS suspended and is wait for a CQS condition */
78 #define CS_WAIT_SYNC_FLAG (1 << 1)
79 
80 /* A GPU address space slot is reserved for MCU. */
81 #define NUM_RESERVED_AS_SLOTS (1)
82 
83 static int scheduler_group_schedule(struct kbase_queue_group *group);
84 static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
85 static
86 void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
87 		struct kbase_queue_group *const group,
88 		enum kbase_csf_group_state run_state);
89 static struct kbase_queue_group *scheduler_get_protm_enter_async_group(
90 		struct kbase_device *const kbdev,
91 		struct kbase_queue_group *const group);
92 static struct kbase_queue_group *get_tock_top_group(
93 	struct kbase_csf_scheduler *const scheduler);
94 static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev);
95 static int suspend_active_queue_groups(struct kbase_device *kbdev,
96 				       unsigned long *slot_mask);
97 static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
98 					      bool system_suspend);
99 static void schedule_in_cycle(struct kbase_queue_group *group, bool force);
100 
101 #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
102 
103 #ifdef KBASE_PM_RUNTIME
104 /**
105  * wait_for_scheduler_to_exit_sleep() - Wait for Scheduler to exit the
106  *                                      sleeping state.
107  *
108  * @kbdev: Pointer to the device
109  *
110  * This function waits until the Scheduler has exited the sleep state and
111  * it is called when an on-slot group is terminated or when the suspend
112  * buffer of an on-slot group needs to be captured.
113  *
114  * Return: 0 when the wait is successful, otherwise an error code.
115  */
wait_for_scheduler_to_exit_sleep(struct kbase_device * kbdev)116 static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev)
117 {
118 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
119 	int autosuspend_delay = kbdev->dev->power.autosuspend_delay;
120 	unsigned int sleep_exit_wait_time;
121 	long remaining;
122 	int ret = 0;
123 
124 	lockdep_assert_held(&scheduler->lock);
125 	WARN_ON(scheduler->state != SCHED_SLEEPING);
126 
127 	/* No point in waiting if autosuspend_delay value is negative.
128 	 * For the negative value of autosuspend_delay Driver will directly
129 	 * go for the suspend of Scheduler, but the autosuspend_delay value
130 	 * could have been changed after the sleep was initiated.
131 	 */
132 	if (autosuspend_delay < 0)
133 		return -EINVAL;
134 
135 	if (autosuspend_delay > MAX_AUTO_SUSPEND_DELAY_MS)
136 		autosuspend_delay = MAX_AUTO_SUSPEND_DELAY_MS;
137 
138 	/* Usually Scheduler would remain in sleeping state until the
139 	 * auto-suspend timer expires and all active CSGs are suspended.
140 	 */
141 	sleep_exit_wait_time = autosuspend_delay + kbdev->reset_timeout_ms;
142 
143 	remaining = kbase_csf_timeout_in_jiffies(sleep_exit_wait_time);
144 
145 	while ((scheduler->state == SCHED_SLEEPING) && !ret) {
146 		mutex_unlock(&scheduler->lock);
147 		remaining = wait_event_timeout(
148 				kbdev->csf.event_wait,
149 				(scheduler->state != SCHED_SLEEPING),
150 				remaining);
151 		mutex_lock(&scheduler->lock);
152 		if (!remaining && (scheduler->state == SCHED_SLEEPING))
153 			ret = -ETIMEDOUT;
154 	}
155 
156 	return ret;
157 }
158 
159 /**
160  * force_scheduler_to_exit_sleep() - Force scheduler to exit sleep state
161  *
162  * @kbdev: Pointer to the device
163  *
164  * This function will force the Scheduler to exit the sleep state by doing the
165  * wake up of MCU and suspension of on-slot groups. It is called at the time of
166  * system suspend.
167  *
168  * Return: 0 on success.
169  */
force_scheduler_to_exit_sleep(struct kbase_device * kbdev)170 static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
171 {
172 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
173 	unsigned long flags;
174 	int ret = 0;
175 
176 	lockdep_assert_held(&scheduler->lock);
177 	WARN_ON(scheduler->state != SCHED_SLEEPING);
178 	WARN_ON(!kbdev->pm.backend.gpu_sleep_mode_active);
179 
180 	kbase_pm_lock(kbdev);
181 	ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
182 	kbase_pm_unlock(kbdev);
183 	if (ret) {
184 		dev_warn(kbdev->dev,
185 			 "[%llu] Wait for MCU wake up failed on forced scheduler suspend",
186 			 kbase_backend_get_cycle_cnt(kbdev));
187 		goto out;
188 	}
189 
190 	if (suspend_active_groups_on_powerdown(kbdev, true))
191 		goto out;
192 
193 	kbase_pm_lock(kbdev);
194 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
195 	kbdev->pm.backend.gpu_sleep_mode_active = false;
196 	kbdev->pm.backend.gpu_wakeup_override = false;
197 	kbase_pm_update_state(kbdev);
198 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
199 	ret = kbase_pm_wait_for_desired_state(kbdev);
200 	kbase_pm_unlock(kbdev);
201 	if (ret) {
202 		dev_warn(kbdev->dev,
203 			 "[%llu] Wait for pm state change failed on forced scheduler suspend",
204 			 kbase_backend_get_cycle_cnt(kbdev));
205 		goto out;
206 	}
207 
208 	scheduler->state = SCHED_SUSPENDED;
209 
210 	return 0;
211 
212 out:
213 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
214 	kbdev->pm.backend.exit_gpu_sleep_mode = true;
215 	kbdev->pm.backend.gpu_wakeup_override = false;
216 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
217 	kbase_csf_scheduler_invoke_tick(kbdev);
218 
219 	return ret;
220 }
221 #endif
222 
223 /**
224  * tick_timer_callback() - Callback function for the scheduling tick hrtimer
225  *
226  * @timer: Pointer to the scheduling tick hrtimer
227  *
228  * This function will enqueue the scheduling tick work item for immediate
229  * execution, if it has not been queued already.
230  *
231  * Return: enum value to indicate that timer should not be restarted.
232  */
tick_timer_callback(struct hrtimer * timer)233 static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
234 {
235 	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
236 						  csf.scheduler.tick_timer);
237 
238 	kbase_csf_scheduler_advance_tick(kbdev);
239 	return HRTIMER_NORESTART;
240 }
241 
242 /**
243  * start_tick_timer() - Start the scheduling tick hrtimer.
244  *
245  * @kbdev: Pointer to the device
246  *
247  * This function will start the scheduling tick hrtimer and is supposed to
248  * be called only from the tick work item function. The tick hrtimer should
249  * should not be active already.
250  */
start_tick_timer(struct kbase_device * kbdev)251 static void start_tick_timer(struct kbase_device *kbdev)
252 {
253 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
254 	unsigned long flags;
255 
256 	lockdep_assert_held(&scheduler->lock);
257 
258 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
259 	WARN_ON(scheduler->tick_timer_active);
260 	if (likely(!work_pending(&scheduler->tick_work))) {
261 		scheduler->tick_timer_active = true;
262 
263 		hrtimer_start(&scheduler->tick_timer,
264 		    HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms),
265 		    HRTIMER_MODE_REL);
266 	}
267 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
268 }
269 
270 /**
271  * cancel_tick_timer() - Cancel the scheduling tick hrtimer
272  *
273  * @kbdev: Pointer to the device
274  */
cancel_tick_timer(struct kbase_device * kbdev)275 static void cancel_tick_timer(struct kbase_device *kbdev)
276 {
277 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
278 	unsigned long flags;
279 
280 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
281 	scheduler->tick_timer_active = false;
282 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
283 	hrtimer_cancel(&scheduler->tick_timer);
284 }
285 
286 /**
287  * enqueue_tick_work() - Enqueue the scheduling tick work item
288  *
289  * @kbdev: Pointer to the device
290  *
291  * This function will queue the scheduling tick work item for immediate
292  * execution. This shall only be called when both the tick hrtimer and tick
293  * work item are not active/pending.
294  */
enqueue_tick_work(struct kbase_device * kbdev)295 static void enqueue_tick_work(struct kbase_device *kbdev)
296 {
297 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
298 
299 	lockdep_assert_held(&scheduler->lock);
300 
301 	kbase_csf_scheduler_invoke_tick(kbdev);
302 }
303 
release_doorbell(struct kbase_device * kbdev,int doorbell_nr)304 static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr)
305 {
306 	WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL);
307 
308 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
309 	clear_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap);
310 }
311 
acquire_doorbell(struct kbase_device * kbdev)312 static int acquire_doorbell(struct kbase_device *kbdev)
313 {
314 	int doorbell_nr;
315 
316 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
317 
318 	doorbell_nr = find_first_zero_bit(
319 			kbdev->csf.scheduler.doorbell_inuse_bitmap,
320 			CSF_NUM_DOORBELL);
321 
322 	if (doorbell_nr >= CSF_NUM_DOORBELL)
323 		return KBASEP_USER_DB_NR_INVALID;
324 
325 	set_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap);
326 
327 	return doorbell_nr;
328 }
329 
unassign_user_doorbell_from_group(struct kbase_device * kbdev,struct kbase_queue_group * group)330 static void unassign_user_doorbell_from_group(struct kbase_device *kbdev,
331 		struct kbase_queue_group *group)
332 {
333 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
334 
335 	if (group->doorbell_nr != KBASEP_USER_DB_NR_INVALID) {
336 		release_doorbell(kbdev, group->doorbell_nr);
337 		group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
338 	}
339 }
340 
unassign_user_doorbell_from_queue(struct kbase_device * kbdev,struct kbase_queue * queue)341 static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev,
342 		struct kbase_queue *queue)
343 {
344 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
345 
346 	mutex_lock(&kbdev->csf.reg_lock);
347 
348 	if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) {
349 		queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
350 		/* After this the dummy page would be mapped in */
351 		unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping,
352 			queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1);
353 	}
354 
355 	mutex_unlock(&kbdev->csf.reg_lock);
356 }
357 
assign_user_doorbell_to_group(struct kbase_device * kbdev,struct kbase_queue_group * group)358 static void assign_user_doorbell_to_group(struct kbase_device *kbdev,
359 		struct kbase_queue_group *group)
360 {
361 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
362 
363 	if (group->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
364 		group->doorbell_nr = acquire_doorbell(kbdev);
365 }
366 
assign_user_doorbell_to_queue(struct kbase_device * kbdev,struct kbase_queue * const queue)367 static void assign_user_doorbell_to_queue(struct kbase_device *kbdev,
368 		struct kbase_queue *const queue)
369 {
370 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
371 
372 	mutex_lock(&kbdev->csf.reg_lock);
373 
374 	/* If bind operation for the queue hasn't completed yet, then the
375 	 * the CSI can't be programmed for the queue
376 	 * (even in stopped state) and so the doorbell also can't be assigned
377 	 * to it.
378 	 */
379 	if ((queue->bind_state == KBASE_CSF_QUEUE_BOUND) &&
380 	    (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)) {
381 		WARN_ON(queue->group->doorbell_nr == KBASEP_USER_DB_NR_INVALID);
382 		queue->doorbell_nr = queue->group->doorbell_nr;
383 
384 		/* After this the real Hw doorbell page would be mapped in */
385 		unmap_mapping_range(
386 				kbdev->csf.db_filp->f_inode->i_mapping,
387 				queue->db_file_offset << PAGE_SHIFT,
388 				PAGE_SIZE, 1);
389 	}
390 
391 	mutex_unlock(&kbdev->csf.reg_lock);
392 }
393 
scheduler_doorbell_init(struct kbase_device * kbdev)394 static void scheduler_doorbell_init(struct kbase_device *kbdev)
395 {
396 	int doorbell_nr;
397 
398 	bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap,
399 		CSF_NUM_DOORBELL);
400 
401 	mutex_lock(&kbdev->csf.scheduler.lock);
402 	/* Reserve doorbell 0 for use by kernel driver */
403 	doorbell_nr = acquire_doorbell(kbdev);
404 	mutex_unlock(&kbdev->csf.scheduler.lock);
405 
406 	WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR);
407 }
408 
kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device * kbdev)409 u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev)
410 {
411 	u32 nr_active_csgs;
412 
413 	lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
414 
415 	nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap,
416 				kbdev->csf.global_iface.group_num);
417 
418 	return nr_active_csgs;
419 }
420 
kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device * kbdev)421 u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev)
422 {
423 	u32 nr_active_csgs;
424 	unsigned long flags;
425 
426 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
427 	nr_active_csgs = kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev);
428 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
429 
430 	return nr_active_csgs;
431 }
432 
433 /**
434  * csg_slot_in_use - returns true if a queue group has been programmed on a
435  *                   given CSG slot.
436  *
437  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
438  * @slot:  Index/number of the CSG slot in question.
439  *
440  * Return: the interface is actively engaged flag.
441  *
442  * Note: Caller must hold the scheduler lock.
443  */
csg_slot_in_use(struct kbase_device * kbdev,int slot)444 static inline bool csg_slot_in_use(struct kbase_device *kbdev, int slot)
445 {
446 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
447 
448 	return (kbdev->csf.scheduler.csg_slots[slot].resident_group != NULL);
449 }
450 
queue_group_suspended_locked(struct kbase_queue_group * group)451 static bool queue_group_suspended_locked(struct kbase_queue_group *group)
452 {
453 	lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
454 
455 	return (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
456 		group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE ||
457 		group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
458 }
459 
queue_group_idle_locked(struct kbase_queue_group * group)460 static bool queue_group_idle_locked(struct kbase_queue_group *group)
461 {
462 	lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
463 
464 	return (group->run_state == KBASE_CSF_GROUP_IDLE ||
465 		group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE);
466 }
467 
on_slot_group_idle_locked(struct kbase_queue_group * group)468 static bool on_slot_group_idle_locked(struct kbase_queue_group *group)
469 {
470 	lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
471 
472 	return (group->run_state == KBASE_CSF_GROUP_IDLE);
473 }
474 
queue_group_scheduled(struct kbase_queue_group * group)475 static bool queue_group_scheduled(struct kbase_queue_group *group)
476 {
477 	return (group->run_state != KBASE_CSF_GROUP_INACTIVE &&
478 		group->run_state != KBASE_CSF_GROUP_TERMINATED &&
479 		group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED);
480 }
481 
queue_group_scheduled_locked(struct kbase_queue_group * group)482 static bool queue_group_scheduled_locked(struct kbase_queue_group *group)
483 {
484 	lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
485 
486 	return queue_group_scheduled(group);
487 }
488 
489 /**
490  * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode.
491  *
492  * @kbdev: Pointer to the GPU device
493  *
494  * This function waits for the GPU to exit protected mode which is confirmed
495  * when active_protm_grp is set to NULL.
496  */
scheduler_wait_protm_quit(struct kbase_device * kbdev)497 static void scheduler_wait_protm_quit(struct kbase_device *kbdev)
498 {
499 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
500 	long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
501 	long remaining;
502 
503 	lockdep_assert_held(&scheduler->lock);
504 
505 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT, NULL,
506 			 jiffies_to_msecs(wt));
507 
508 	remaining = wait_event_timeout(kbdev->csf.event_wait,
509 			!kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
510 
511 	if (!remaining)
512 		dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped",
513 			kbase_backend_get_cycle_cnt(kbdev),
514 			kbdev->csf.fw_timeout_ms);
515 
516 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT_DONE, NULL,
517 			 jiffies_to_msecs(remaining));
518 }
519 
520 /**
521  * scheduler_force_protm_exit() - Force GPU to exit protected mode.
522  *
523  * @kbdev: Pointer to the GPU device
524  *
525  * This function sends a ping request to the firmware and waits for the GPU
526  * to exit protected mode.
527  */
scheduler_force_protm_exit(struct kbase_device * kbdev)528 static void scheduler_force_protm_exit(struct kbase_device *kbdev)
529 {
530 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
531 
532 	kbase_csf_firmware_ping(kbdev);
533 	scheduler_wait_protm_quit(kbdev);
534 }
535 
536 /**
537  * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up
538  * automatically for periodic tasks.
539  *
540  * @kbdev: Pointer to the device
541  *
542  * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the
543  * CSF scheduler lock to already have been held.
544  *
545  * Return: true if the scheduler is configured to wake up periodically
546  */
scheduler_timer_is_enabled_nolock(struct kbase_device * kbdev)547 static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev)
548 {
549 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
550 
551 	return kbdev->csf.scheduler.timer_enabled;
552 }
553 
enable_gpu_idle_fw_timer(struct kbase_device * kbdev)554 static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev)
555 {
556 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
557 	unsigned long flags;
558 
559 	lockdep_assert_held(&scheduler->lock);
560 
561 	if (scheduler->gpu_idle_fw_timer_enabled)
562 		return;
563 
564 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
565 
566 	/* Update the timer_enabled flag requires holding interrupt_lock */
567 	scheduler->gpu_idle_fw_timer_enabled = true;
568 	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
569 
570 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
571 }
572 
disable_gpu_idle_fw_timer_locked(struct kbase_device * kbdev)573 static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev)
574 {
575 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
576 
577 	lockdep_assert_held(&scheduler->lock);
578 	lockdep_assert_held(&scheduler->interrupt_lock);
579 
580 	/* Update of the timer_enabled flag requires holding interrupt_lock */
581 	if (scheduler->gpu_idle_fw_timer_enabled) {
582 		scheduler->gpu_idle_fw_timer_enabled = false;
583 		kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
584 	}
585 }
586 
disable_gpu_idle_fw_timer(struct kbase_device * kbdev)587 static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev)
588 {
589 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
590 	unsigned long flags;
591 
592 	lockdep_assert_held(&scheduler->lock);
593 
594 	if (!scheduler->gpu_idle_fw_timer_enabled)
595 		return;
596 
597 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
598 	disable_gpu_idle_fw_timer_locked(kbdev);
599 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
600 }
601 
602 /**
603  * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
604  *                                        Scheduler
605  *
606  * @kbdev: Pointer to the device
607  * @suspend_handler: Handler code for how to handle a suspend that might occur.
608  *
609  * This function is usually called when Scheduler needs to be activated.
610  * The PM reference count is acquired for the Scheduler and the power on
611  * of GPU is initiated.
612  *
613  * Return: 0 if successful or a negative error code on failure.
614  */
scheduler_pm_active_handle_suspend(struct kbase_device * kbdev,enum kbase_pm_suspend_handler suspend_handler)615 static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
616 				enum kbase_pm_suspend_handler suspend_handler)
617 {
618 	unsigned long flags;
619 	u32 prev_count;
620 	int ret = 0;
621 
622 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
623 
624 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
625 	prev_count = kbdev->csf.scheduler.pm_active_count;
626 	if (!WARN_ON(prev_count == U32_MAX))
627 		kbdev->csf.scheduler.pm_active_count++;
628 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
629 
630 	/* On 0 => 1, make a pm_ctx_active request */
631 	if (!prev_count) {
632 		ret = kbase_pm_context_active_handle_suspend(kbdev,
633 							suspend_handler);
634 		if (ret) {
635 			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
636 			kbdev->csf.scheduler.pm_active_count--;
637 			kbase_pm_update_state(kbdev);
638 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
639 		}
640 	}
641 
642 	return ret;
643 }
644 
645 #ifdef KBASE_PM_RUNTIME
646 /**
647  * scheduler_pm_active_after_sleep() - Acquire the PM reference count for
648  *                                     Scheduler
649  *
650  * @kbdev: Pointer to the device
651  * @flags: flags containing previous interrupt state
652  *
653  * This function is called when Scheduler needs to be activated from the
654  * sleeping state.
655  * The PM reference count is acquired for the Scheduler and the wake up of
656  * MCU is initiated. It resets the flag that indicates to the MCU state
657  * machine that MCU needs to be put in sleep state.
658  *
659  * Note: This function shall be called with hwaccess lock held and it will
660  * release that lock.
661  *
662  * Return: zero when the PM reference was taken and non-zero when the
663  * system is being suspending/suspended.
664  */
scheduler_pm_active_after_sleep(struct kbase_device * kbdev,unsigned long flags)665 static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev,
666 					   unsigned long flags)
667 {
668 	u32 prev_count;
669 	int ret = 0;
670 
671 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
672 	lockdep_assert_held(&kbdev->hwaccess_lock);
673 
674 	prev_count = kbdev->csf.scheduler.pm_active_count;
675 	if (!WARN_ON(prev_count == U32_MAX))
676 		kbdev->csf.scheduler.pm_active_count++;
677 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
678 
679 	/* On 0 => 1, make a pm_ctx_active request */
680 	if (!prev_count) {
681 		ret = kbase_pm_context_active_handle_suspend(kbdev,
682 				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
683 
684 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
685 		if (ret)
686 			kbdev->csf.scheduler.pm_active_count--;
687 		else
688 			kbdev->pm.backend.gpu_sleep_mode_active = false;
689 		kbase_pm_update_state(kbdev);
690 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
691 	}
692 
693 	return ret;
694 }
695 #endif
696 
697 /**
698  * scheduler_pm_idle() - Release the PM reference count held by Scheduler
699  *
700  * @kbdev: Pointer to the device
701  *
702  * This function is usually called after Scheduler is suspended.
703  * The PM reference count held by the Scheduler is released to trigger the
704  * power down of GPU.
705  */
scheduler_pm_idle(struct kbase_device * kbdev)706 static void scheduler_pm_idle(struct kbase_device *kbdev)
707 {
708 	unsigned long flags;
709 	u32 prev_count;
710 
711 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
712 
713 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
714 	prev_count = kbdev->csf.scheduler.pm_active_count;
715 	if (!WARN_ON(prev_count == 0))
716 		kbdev->csf.scheduler.pm_active_count--;
717 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
718 
719 	if (prev_count == 1)
720 		kbase_pm_context_idle(kbdev);
721 }
722 
723 #ifdef KBASE_PM_RUNTIME
724 /**
725  * scheduler_pm_idle_before_sleep() - Release the PM reference count and
726  *                                    trigger the tranistion to sleep state.
727  *
728  * @kbdev: Pointer to the device
729  *
730  * This function is called on the GPU idle notification. It releases the
731  * Scheduler's PM reference count and sets the flag to indicate to the
732  * MCU state machine that MCU needs to be put in sleep state.
733  */
scheduler_pm_idle_before_sleep(struct kbase_device * kbdev)734 static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
735 {
736 	unsigned long flags;
737 	u32 prev_count;
738 
739 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
740 
741 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
742 	prev_count = kbdev->csf.scheduler.pm_active_count;
743 	if (!WARN_ON(prev_count == 0))
744 		kbdev->csf.scheduler.pm_active_count--;
745 	kbdev->pm.backend.gpu_sleep_mode_active = true;
746 	kbdev->pm.backend.exit_gpu_sleep_mode = false;
747 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
748 
749 	if (prev_count == 1)
750 		kbase_pm_context_idle(kbdev);
751 }
752 #endif
753 
scheduler_wakeup(struct kbase_device * kbdev,bool kick)754 static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
755 {
756 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
757 	int ret;
758 
759 	lockdep_assert_held(&scheduler->lock);
760 
761 	if ((scheduler->state != SCHED_SUSPENDED) &&
762 	    (scheduler->state != SCHED_SLEEPING))
763 		return;
764 
765 	if (scheduler->state == SCHED_SUSPENDED) {
766 		dev_dbg(kbdev->dev,
767 			"Re-activating the Scheduler after suspend");
768 		ret = scheduler_pm_active_handle_suspend(kbdev,
769 				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
770 	} else {
771 #ifdef KBASE_PM_RUNTIME
772 		unsigned long flags;
773 
774 		dev_dbg(kbdev->dev,
775 			"Re-activating the Scheduler out of sleep");
776 
777 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
778 		ret = scheduler_pm_active_after_sleep(kbdev, flags);
779 		/* hwaccess_lock is released in the previous function call. */
780 #endif
781 	}
782 
783 	if (ret) {
784 		/* GPUCORE-29850 would add the handling for the case where
785 		 * Scheduler could not be activated due to system suspend.
786 		 */
787 		dev_info(kbdev->dev,
788 			"Couldn't wakeup Scheduler due to system suspend");
789 		return;
790 	}
791 
792 	scheduler->state = SCHED_INACTIVE;
793 
794 	if (kick)
795 		scheduler_enable_tick_timer_nolock(kbdev);
796 }
797 
scheduler_suspend(struct kbase_device * kbdev)798 static void scheduler_suspend(struct kbase_device *kbdev)
799 {
800 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
801 
802 	lockdep_assert_held(&scheduler->lock);
803 
804 	if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) {
805 		dev_dbg(kbdev->dev, "Suspending the Scheduler");
806 		scheduler_pm_idle(kbdev);
807 		scheduler->state = SCHED_SUSPENDED;
808 	}
809 }
810 
811 /**
812  * update_idle_suspended_group_state() - Move the queue group to a non-idle
813  *                                       suspended state.
814  * @group: Pointer to the queue group.
815  *
816  * This function is called to change the state of queue group to non-idle
817  * suspended state, if the group was suspended when all the queues bound to it
818  * became empty or when some queues got blocked on a sync wait & others became
819  * empty. The group is also moved to the runnable list from idle wait list in
820  * the latter case.
821  * So the function gets called when a queue is kicked or sync wait condition
822  * gets satisfied.
823  */
update_idle_suspended_group_state(struct kbase_queue_group * group)824 static void update_idle_suspended_group_state(struct kbase_queue_group *group)
825 {
826 	struct kbase_csf_scheduler *scheduler =
827 		&group->kctx->kbdev->csf.scheduler;
828 	int new_val;
829 
830 	lockdep_assert_held(&scheduler->lock);
831 
832 	if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) {
833 		remove_group_from_idle_wait(group);
834 		insert_group_to_runnable(scheduler, group,
835 					 KBASE_CSF_GROUP_SUSPENDED);
836 	} else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) {
837 		group->run_state = KBASE_CSF_GROUP_SUSPENDED;
838 
839 		/* If scheduler is not suspended and the given group's
840 		 * static priority (reflected by the scan_seq_num) is inside
841 		 * the current tick slot-range, or there are some on_slot
842 		 * idle groups, schedule an async tock.
843 		 */
844 		if (scheduler->state != SCHED_SUSPENDED) {
845 			unsigned long flags;
846 			int n_idle;
847 			int n_used;
848 			int n_slots =
849 				group->kctx->kbdev->csf.global_iface.group_num;
850 
851 			spin_lock_irqsave(&scheduler->interrupt_lock, flags);
852 			n_idle = bitmap_weight(scheduler->csg_slots_idle_mask,
853 					       n_slots);
854 			n_used = bitmap_weight(scheduler->csg_inuse_bitmap,
855 					       n_slots);
856 			spin_unlock_irqrestore(&scheduler->interrupt_lock,
857 					       flags);
858 
859 			if (n_idle ||
860 			    n_used < scheduler->num_csg_slots_for_tick ||
861 			    group->scan_seq_num <
862 				    scheduler->num_csg_slots_for_tick)
863 				schedule_in_cycle(group, true);
864 		}
865 	} else
866 		return;
867 
868 	new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
869 	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
870 				group, new_val);
871 }
872 
kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group * group)873 int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group)
874 {
875 	struct kbase_csf_scheduler *scheduler =
876 			&group->kctx->kbdev->csf.scheduler;
877 	int slot_num = group->csg_nr;
878 
879 	lockdep_assert_held(&scheduler->interrupt_lock);
880 
881 	if (slot_num >= 0) {
882 		if (WARN_ON(scheduler->csg_slots[slot_num].resident_group !=
883 			    group))
884 			return -1;
885 	}
886 
887 	return slot_num;
888 }
889 
kbase_csf_scheduler_group_get_slot(struct kbase_queue_group * group)890 int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group)
891 {
892 	struct kbase_csf_scheduler *scheduler =
893 			&group->kctx->kbdev->csf.scheduler;
894 	unsigned long flags;
895 	int slot_num;
896 
897 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
898 	slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
899 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
900 
901 	return slot_num;
902 }
903 
904 /* kbasep_csf_scheduler_group_is_on_slot_locked() - Check if CSG is on slot.
905  *
906  * @group: GPU queue group to be checked
907  *
908  * This function needs to be called with scheduler's lock held
909  *
910  * Return: true if @group is on slot.
911  */
kbasep_csf_scheduler_group_is_on_slot_locked(struct kbase_queue_group * group)912 static bool kbasep_csf_scheduler_group_is_on_slot_locked(
913 				struct kbase_queue_group *group)
914 {
915 	struct kbase_csf_scheduler *scheduler =
916 			&group->kctx->kbdev->csf.scheduler;
917 	int slot_num = group->csg_nr;
918 
919 	lockdep_assert_held(&scheduler->lock);
920 
921 	if (slot_num >= 0) {
922 		if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group !=
923 			     group))
924 			return true;
925 	}
926 
927 	return false;
928 }
929 
kbase_csf_scheduler_group_events_enabled(struct kbase_device * kbdev,struct kbase_queue_group * group)930 bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev,
931 			struct kbase_queue_group *group)
932 {
933 	struct kbase_csf_scheduler *scheduler =
934 			&group->kctx->kbdev->csf.scheduler;
935 	int slot_num = group->csg_nr;
936 
937 	lockdep_assert_held(&scheduler->interrupt_lock);
938 
939 	if (WARN_ON(slot_num < 0))
940 		return false;
941 
942 	return test_bit(slot_num, scheduler->csgs_events_enable_mask);
943 }
944 
kbase_csf_scheduler_get_group_on_slot(struct kbase_device * kbdev,int slot)945 struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot(
946 			struct kbase_device *kbdev, int slot)
947 {
948 	lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
949 
950 	return kbdev->csf.scheduler.csg_slots[slot].resident_group;
951 }
952 
halt_stream_sync(struct kbase_queue * queue)953 static int halt_stream_sync(struct kbase_queue *queue)
954 {
955 	struct kbase_queue_group *group = queue->group;
956 	struct kbase_device *kbdev = queue->kctx->kbdev;
957 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
958 	struct kbase_csf_cmd_stream_group_info *ginfo;
959 	struct kbase_csf_cmd_stream_info *stream;
960 	int csi_index = queue->csi_index;
961 	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
962 
963 	if (WARN_ON(!group) ||
964 	    WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
965 		return -EINVAL;
966 
967 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
968 	ginfo = &global_iface->groups[group->csg_nr];
969 	stream = &ginfo->streams[csi_index];
970 
971 	if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) ==
972 			CS_REQ_STATE_START) {
973 
974 		remaining = wait_event_timeout(kbdev->csf.event_wait,
975 			(CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
976 			 == CS_ACK_STATE_START), remaining);
977 
978 		if (!remaining) {
979 			dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to start on csi %d bound to group %d on slot %d",
980 				 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
981 				 csi_index, group->handle, group->csg_nr);
982 			if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
983 				kbase_reset_gpu(kbdev);
984 
985 
986 			return -ETIMEDOUT;
987 		}
988 
989 		remaining =
990 			kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
991 	}
992 
993 	/* Set state to STOP */
994 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP,
995 					 CS_REQ_STATE_MASK);
996 
997 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQUESTED, group, queue, 0u);
998 	kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true);
999 
1000 	/* Timed wait */
1001 	remaining = wait_event_timeout(kbdev->csf.event_wait,
1002 		(CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
1003 		 == CS_ACK_STATE_STOP), remaining);
1004 
1005 	if (!remaining) {
1006 		dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to stop on csi %d bound to group %d on slot %d",
1007 			 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
1008 			 queue->csi_index, group->handle, group->csg_nr);
1009 
1010 		/* TODO GPUCORE-25328: The CSG can't be terminated, the GPU
1011 		 * will be reset as a work-around.
1012 		 */
1013 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
1014 			kbase_reset_gpu(kbdev);
1015 
1016 
1017 	}
1018 	return (remaining) ? 0 : -ETIMEDOUT;
1019 }
1020 
can_halt_stream(struct kbase_device * kbdev,struct kbase_queue_group * group)1021 static bool can_halt_stream(struct kbase_device *kbdev,
1022 		struct kbase_queue_group *group)
1023 {
1024 	struct kbase_csf_csg_slot *const csg_slot =
1025 			kbdev->csf.scheduler.csg_slots;
1026 	unsigned long flags;
1027 	bool can_halt;
1028 	int slot;
1029 
1030 	if (!queue_group_scheduled(group))
1031 		return true;
1032 
1033 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
1034 	slot = kbase_csf_scheduler_group_get_slot_locked(group);
1035 	can_halt = (slot >= 0) &&
1036 		   (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING);
1037 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock,
1038 				flags);
1039 
1040 	return can_halt;
1041 }
1042 
1043 /**
1044  * sched_halt_stream() - Stop a GPU queue when its queue group is not running
1045  *                       on a CSG slot.
1046  * @queue: Pointer to the GPU queue to stop.
1047  *
1048  * This function handles stopping gpu queues for groups that are either not on
1049  * a CSG slot or are on the slot but undergoing transition to
1050  * resume or suspend states.
1051  * It waits until the queue group is scheduled on a slot and starts running,
1052  * which is needed as groups that were suspended may need to resume all queues
1053  * that were enabled and running at the time of suspension.
1054  *
1055  * Return: 0 on success, or negative on failure.
1056  */
sched_halt_stream(struct kbase_queue * queue)1057 static int sched_halt_stream(struct kbase_queue *queue)
1058 {
1059 	struct kbase_queue_group *group = queue->group;
1060 	struct kbase_device *kbdev = queue->kctx->kbdev;
1061 	struct kbase_csf_scheduler *const scheduler =
1062 			&kbdev->csf.scheduler;
1063 	struct kbase_csf_csg_slot *const csg_slot =
1064 			kbdev->csf.scheduler.csg_slots;
1065 	bool retry_needed = false;
1066 	bool retried = false;
1067 	long remaining;
1068 	int slot;
1069 	int err = 0;
1070 	const u32 group_schedule_timeout =
1071 		20 * kbdev->csf.scheduler.csg_scheduling_period_ms;
1072 
1073 	if (WARN_ON(!group))
1074 		return -EINVAL;
1075 
1076 	lockdep_assert_held(&queue->kctx->csf.lock);
1077 	lockdep_assert_held(&scheduler->lock);
1078 
1079 	slot = kbase_csf_scheduler_group_get_slot(group);
1080 
1081 	if (slot >= 0) {
1082 		WARN_ON(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING);
1083 
1084 		if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) {
1085 			dev_dbg(kbdev->dev, "Stopping a queue on csi %d when Group-%d is in under transition to running state",
1086 				queue->csi_index, group->handle);
1087 			retry_needed = true;
1088 		}
1089 	}
1090 retry:
1091 	/* Update the group state so that it can get scheduled soon */
1092 	update_idle_suspended_group_state(group);
1093 
1094 	mutex_unlock(&scheduler->lock);
1095 
1096 	/* This function is called when the queue group is either not on a CSG
1097 	 * slot or is on the slot but undergoing transition.
1098 	 *
1099 	 * To stop the queue, the function needs to wait either for the queue
1100 	 * group to be assigned a CSG slot (and that slot has to reach the
1101 	 * running state) or for the eviction of the queue group from the
1102 	 * scheduler's list.
1103 	 *
1104 	 * In order to evaluate the latter condition, the function doesn't
1105 	 * really need to lock the scheduler, as any update to the run_state
1106 	 * of the queue group by sched_evict_group() would be visible due
1107 	 * to implicit barriers provided by the kernel waitqueue macros.
1108 	 *
1109 	 * The group pointer cannot disappear meanwhile, as the high level
1110 	 * CSF context is locked. Therefore, the scheduler would be
1111 	 * the only one to update the run_state of the group.
1112 	 */
1113 	remaining = wait_event_timeout(
1114 		kbdev->csf.event_wait, can_halt_stream(kbdev, group),
1115 		kbase_csf_timeout_in_jiffies(group_schedule_timeout));
1116 
1117 	mutex_lock(&scheduler->lock);
1118 
1119 	if (remaining && queue_group_scheduled_locked(group)) {
1120 		slot = kbase_csf_scheduler_group_get_slot(group);
1121 
1122 		/* If the group is still on slot and slot is in running state
1123 		 * then explicitly stop the CSI of the
1124 		 * queue. Otherwise there are different cases to consider
1125 		 *
1126 		 * - If the queue group was already undergoing transition to
1127 		 *   resume/start state when this function was entered then it
1128 		 *   would not have disabled the CSI of the
1129 		 *   queue being stopped and the previous wait would have ended
1130 		 *   once the slot was in a running state with CS
1131 		 *   interface still enabled.
1132 		 *   Now the group is going through another transition either
1133 		 *   to a suspend state or to a resume state (it could have
1134 		 *   been suspended before the scheduler lock was grabbed).
1135 		 *   In both scenarios need to wait again for the group to
1136 		 *   come on a slot and that slot to reach the running state,
1137 		 *   as that would guarantee that firmware will observe the
1138 		 *   CSI as disabled.
1139 		 *
1140 		 * - If the queue group was either off the slot or was
1141 		 *   undergoing transition to suspend state on entering this
1142 		 *   function, then the group would have been resumed with the
1143 		 *   queue's CSI in disabled state.
1144 		 *   So now if the group is undergoing another transition
1145 		 *   (after the resume) then just need to wait for the state
1146 		 *   bits in the ACK register of CSI to be
1147 		 *   set to STOP value. It is expected that firmware will
1148 		 *   process the stop/disable request of the CS
1149 		 *   interface after resuming the group before it processes
1150 		 *   another state change request of the group.
1151 		 */
1152 		if ((slot >= 0) &&
1153 		    (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) {
1154 			err = halt_stream_sync(queue);
1155 		} else if (retry_needed && !retried) {
1156 			retried = true;
1157 			goto retry;
1158 		} else if (slot >= 0) {
1159 			struct kbase_csf_global_iface *global_iface =
1160 					&kbdev->csf.global_iface;
1161 			struct kbase_csf_cmd_stream_group_info *ginfo =
1162 					&global_iface->groups[slot];
1163 			struct kbase_csf_cmd_stream_info *stream =
1164 					&ginfo->streams[queue->csi_index];
1165 			u32 cs_req =
1166 			    kbase_csf_firmware_cs_input_read(stream, CS_REQ);
1167 
1168 			if (!WARN_ON(CS_REQ_STATE_GET(cs_req) !=
1169 				     CS_REQ_STATE_STOP)) {
1170 				/* Timed wait */
1171 				remaining = wait_event_timeout(
1172 					kbdev->csf.event_wait,
1173 					(CS_ACK_STATE_GET(
1174 						 kbase_csf_firmware_cs_output(
1175 							 stream, CS_ACK)) ==
1176 					 CS_ACK_STATE_STOP),
1177 					kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms));
1178 
1179 				if (!remaining) {
1180 					dev_warn(kbdev->dev,
1181 						 "[%llu] Timeout (%d ms) waiting for queue stop ack on csi %d bound to group %d on slot %d",
1182 						 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
1183 						 queue->csi_index,
1184 						 group->handle, group->csg_nr);
1185 
1186 
1187 					err = -ETIMEDOUT;
1188 				}
1189 			}
1190 		}
1191 	} else if (!remaining) {
1192 		dev_warn(kbdev->dev, "[%llu] Group-%d failed to get a slot for stopping the queue on csi %d (timeout %d ms)",
1193 			 kbase_backend_get_cycle_cnt(kbdev),
1194 			 group->handle, queue->csi_index,
1195 			 group_schedule_timeout);
1196 
1197 
1198 		err = -ETIMEDOUT;
1199 	}
1200 
1201 	return err;
1202 }
1203 
1204 /**
1205  * scheduler_activate_on_queue_stop() - Activate the Scheduler when the GPU
1206  *                                      queue needs to be stopped.
1207  *
1208  * @queue: Pointer the GPU command queue
1209  *
1210  * This function is called when the CSI to which GPU queue is bound needs to
1211  * be stopped. For that the corresponding queue group needs to be resident on
1212  * the CSG slot and MCU firmware should be running. So this function makes the
1213  * Scheduler exit the sleeping or suspended state.
1214  */
scheduler_activate_on_queue_stop(struct kbase_queue * queue)1215 static void scheduler_activate_on_queue_stop(struct kbase_queue *queue)
1216 {
1217 	struct kbase_device *kbdev = queue->kctx->kbdev;
1218 
1219 	scheduler_wakeup(kbdev, true);
1220 
1221 	/* Wait for MCU firmware to start running */
1222 	if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
1223 		dev_warn(
1224 			kbdev->dev,
1225 			"[%llu] Wait for MCU active failed for stopping queue on csi %d bound to group %d of context %d_%d on slot %d",
1226 			kbase_backend_get_cycle_cnt(kbdev),
1227 			queue->csi_index, queue->group->handle,
1228 			queue->kctx->tgid, queue->kctx->id,
1229 			queue->group->csg_nr);
1230 	}
1231 }
1232 
kbase_csf_scheduler_queue_stop(struct kbase_queue * queue)1233 int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
1234 {
1235 	struct kbase_device *kbdev = queue->kctx->kbdev;
1236 	struct kbase_queue_group *group = queue->group;
1237 	bool const cs_enabled = queue->enabled;
1238 	int err = 0;
1239 
1240 	if (WARN_ON(!group))
1241 		return -EINVAL;
1242 
1243 	kbase_reset_gpu_assert_failed_or_prevented(kbdev);
1244 	lockdep_assert_held(&queue->kctx->csf.lock);
1245 	mutex_lock(&kbdev->csf.scheduler.lock);
1246 
1247 	queue->enabled = false;
1248 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled);
1249 
1250 	if (cs_enabled && queue_group_scheduled_locked(group)) {
1251 		struct kbase_csf_csg_slot *const csg_slot =
1252 			kbdev->csf.scheduler.csg_slots;
1253 		int slot = kbase_csf_scheduler_group_get_slot(group);
1254 
1255 		/* Since the group needs to be resumed in order to stop the queue,
1256 		 * check if GPU needs to be powered up.
1257 		 */
1258 		scheduler_activate_on_queue_stop(queue);
1259 
1260 		if ((slot >= 0) &&
1261 		    (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING))
1262 			err = halt_stream_sync(queue);
1263 		else
1264 			err = sched_halt_stream(queue);
1265 
1266 		unassign_user_doorbell_from_queue(kbdev, queue);
1267 	}
1268 
1269 	mutex_unlock(&kbdev->csf.scheduler.lock);
1270 	return err;
1271 }
1272 
update_hw_active(struct kbase_queue * queue,bool active)1273 static void update_hw_active(struct kbase_queue *queue, bool active)
1274 {
1275 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
1276 	if (queue && queue->enabled) {
1277 		u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
1278 
1279 		output_addr[CS_ACTIVE / sizeof(u32)] = active;
1280 	}
1281 #else
1282 	CSTD_UNUSED(queue);
1283 	CSTD_UNUSED(active);
1284 #endif
1285 }
1286 
program_cs_extract_init(struct kbase_queue * queue)1287 static void program_cs_extract_init(struct kbase_queue *queue)
1288 {
1289 	u64 *input_addr = (u64 *)queue->user_io_addr;
1290 	u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
1291 
1292 	input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] =
1293 			output_addr[CS_EXTRACT_LO / sizeof(u64)];
1294 }
1295 
program_cs_trace_cfg(struct kbase_csf_cmd_stream_info * stream,struct kbase_queue * queue)1296 static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream,
1297 				 struct kbase_queue *queue)
1298 {
1299 	struct kbase_device *kbdev = queue->kctx->kbdev;
1300 	u32 const glb_version = kbdev->csf.global_iface.version;
1301 
1302 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1303 
1304 	/* If cs_trace_command not supported, nothing to program */
1305 	if (glb_version < kbase_csf_interface_version(1, 1, 0))
1306 		return;
1307 
1308 	/* Program for cs_trace if enabled. In the current arrangement, it is
1309 	 * possible for the context to enable the cs_trace after some queues
1310 	 * has been registered in cs_trace in disabled state. This is tracked by
1311 	 * the queue's trace buffer base address, which had been validated at the
1312 	 * queue's register_ex call.
1313 	 */
1314 	if (kbase_csf_scheduler_queue_has_trace(queue)) {
1315 		u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET(
1316 			queue->trace_cfg, queue->kctx->as_nr);
1317 
1318 		kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, cs_cfg);
1319 		kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE,
1320 				queue->trace_buffer_size);
1321 
1322 		kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_LO,
1323 				queue->trace_buffer_base & U32_MAX);
1324 		kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_HI,
1325 				queue->trace_buffer_base >> 32);
1326 
1327 		kbase_csf_firmware_cs_input(
1328 				stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO,
1329 				queue->trace_offset_ptr & U32_MAX);
1330 		kbase_csf_firmware_cs_input(
1331 				stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI,
1332 				queue->trace_offset_ptr >> 32);
1333 	} else {
1334 		/* Place the configuration to the disabled condition */
1335 		kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, 0);
1336 		kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, 0);
1337 	}
1338 }
1339 
program_cs(struct kbase_device * kbdev,struct kbase_queue * queue,bool ring_csg_doorbell)1340 static void program_cs(struct kbase_device *kbdev,
1341 		struct kbase_queue *queue, bool ring_csg_doorbell)
1342 {
1343 	struct kbase_queue_group *group = queue->group;
1344 	struct kbase_csf_cmd_stream_group_info *ginfo;
1345 	struct kbase_csf_cmd_stream_info *stream;
1346 	int csi_index = queue->csi_index;
1347 	u64 user_input;
1348 	u64 user_output;
1349 
1350 	if (WARN_ON(!group))
1351 		return;
1352 
1353 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1354 
1355 	if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
1356 		return;
1357 
1358 	ginfo = &kbdev->csf.global_iface.groups[group->csg_nr];
1359 
1360 	if (WARN_ON(csi_index < 0) ||
1361 	    WARN_ON(csi_index >= ginfo->stream_num))
1362 		return;
1363 
1364 	assign_user_doorbell_to_queue(kbdev, queue);
1365 	if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
1366 		return;
1367 
1368 	WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
1369 
1370 	if (queue->enabled && queue_group_suspended_locked(group))
1371 		program_cs_extract_init(queue);
1372 
1373 	stream = &ginfo->streams[csi_index];
1374 
1375 	kbase_csf_firmware_cs_input(stream, CS_BASE_LO,
1376 				    queue->base_addr & 0xFFFFFFFF);
1377 	kbase_csf_firmware_cs_input(stream, CS_BASE_HI,
1378 				    queue->base_addr >> 32);
1379 	kbase_csf_firmware_cs_input(stream, CS_SIZE,
1380 				    queue->size);
1381 
1382 	user_input = (queue->reg->start_pfn << PAGE_SHIFT);
1383 	kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO,
1384 				    user_input & 0xFFFFFFFF);
1385 	kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI,
1386 				    user_input >> 32);
1387 
1388 	user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT);
1389 	kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO,
1390 				    user_output & 0xFFFFFFFF);
1391 	kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI,
1392 				    user_output >> 32);
1393 
1394 	kbase_csf_firmware_cs_input(stream, CS_CONFIG,
1395 		(queue->doorbell_nr << 8) | (queue->priority & 0xF));
1396 
1397 	/* Program the queue's cs_trace configuration */
1398 	program_cs_trace_cfg(stream, queue);
1399 
1400 	/* Enable all interrupts for now */
1401 	kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0));
1402 
1403 	/*
1404 	 * Enable the CSG idle notification once the CS's ringbuffer
1405 	 * becomes empty or the CS becomes sync_idle, waiting sync update
1406 	 * or protected mode switch.
1407 	 */
1408 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
1409 			CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK,
1410 			CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK);
1411 
1412 	/* Set state to START/STOP */
1413 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
1414 		queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP,
1415 		CS_REQ_STATE_MASK);
1416 
1417 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled);
1418 
1419 	kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr,
1420 					  ring_csg_doorbell);
1421 	update_hw_active(queue, true);
1422 }
1423 
kbase_csf_scheduler_queue_start(struct kbase_queue * queue)1424 int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
1425 {
1426 	struct kbase_queue_group *group = queue->group;
1427 	struct kbase_device *kbdev = queue->kctx->kbdev;
1428 	bool const cs_enabled = queue->enabled;
1429 	int err = 0;
1430 	bool evicted = false;
1431 
1432 	kbase_reset_gpu_assert_prevented(kbdev);
1433 	lockdep_assert_held(&queue->kctx->csf.lock);
1434 
1435 	if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND))
1436 		return -EINVAL;
1437 
1438 	mutex_lock(&kbdev->csf.scheduler.lock);
1439 
1440 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue,
1441 				   group->run_state);
1442 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, queue->group,
1443 				   queue, queue->status_wait);
1444 
1445 	if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) {
1446 		err = -EIO;
1447 		evicted = true;
1448 	} else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
1449 		   && CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) {
1450 		dev_dbg(kbdev->dev, "blocked queue(csi_index=%d) of group %d was kicked",
1451 			queue->csi_index, group->handle);
1452 	} else {
1453 		err = scheduler_group_schedule(group);
1454 
1455 		if (!err) {
1456 			queue->enabled = true;
1457 			if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
1458 				if (cs_enabled) {
1459 					/* In normal situation, when a queue is
1460 					 * already running, the queue update
1461 					 * would be a doorbell kick on user
1462 					 * side. However, if such a kick is
1463 					 * shortly following a start or resume,
1464 					 * the queue may actually in transition
1465 					 * hence the said kick would enter the
1466 					 * kernel as the hw_active flag is yet
1467 					 * to be set. The sheduler needs to
1468 					 * give a kick to the corresponding
1469 					 * user door-bell on such a case.
1470 					 */
1471 					kbase_csf_ring_cs_user_doorbell(kbdev, queue);
1472 				} else
1473 					program_cs(kbdev, queue, true);
1474 			}
1475 			queue_delayed_work(system_long_wq,
1476 				&kbdev->csf.scheduler.ping_work,
1477 				msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS));
1478 		}
1479 	}
1480 
1481 	mutex_unlock(&kbdev->csf.scheduler.lock);
1482 
1483 	if (evicted)
1484 		kbase_csf_term_descheduled_queue_group(group);
1485 
1486 	return err;
1487 }
1488 
update_csg_slot_status(struct kbase_device * kbdev,s8 slot)1489 static enum kbase_csf_csg_slot_state update_csg_slot_status(
1490 				struct kbase_device *kbdev, s8 slot)
1491 {
1492 	struct kbase_csf_csg_slot *csg_slot =
1493 		&kbdev->csf.scheduler.csg_slots[slot];
1494 	struct kbase_csf_cmd_stream_group_info *ginfo =
1495 		&kbdev->csf.global_iface.groups[slot];
1496 	u32 state;
1497 	enum kbase_csf_csg_slot_state slot_state;
1498 
1499 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1500 
1501 	state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo,
1502 			CSG_ACK));
1503 	slot_state = atomic_read(&csg_slot->state);
1504 
1505 	switch (slot_state) {
1506 	case CSG_SLOT_READY2RUN:
1507 		if ((state == CSG_ACK_STATE_START) ||
1508 		    (state == CSG_ACK_STATE_RESUME)) {
1509 			slot_state = CSG_SLOT_RUNNING;
1510 			atomic_set(&csg_slot->state, slot_state);
1511 			csg_slot->trigger_jiffies = jiffies;
1512 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STARTED, csg_slot->resident_group, state);
1513 			dev_dbg(kbdev->dev, "Group %u running on slot %d\n",
1514 				csg_slot->resident_group->handle, slot);
1515 		}
1516 		break;
1517 	case CSG_SLOT_DOWN2STOP:
1518 		if ((state == CSG_ACK_STATE_SUSPEND) ||
1519 		    (state == CSG_ACK_STATE_TERMINATE)) {
1520 			slot_state = CSG_SLOT_STOPPED;
1521 			atomic_set(&csg_slot->state, slot_state);
1522 			csg_slot->trigger_jiffies = jiffies;
1523 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state);
1524 			dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n",
1525 				csg_slot->resident_group->handle, slot);
1526 		}
1527 		break;
1528 	case CSG_SLOT_DOWN2STOP_TIMEDOUT:
1529 	case CSG_SLOT_READY2RUN_TIMEDOUT:
1530 	case CSG_SLOT_READY:
1531 	case CSG_SLOT_RUNNING:
1532 	case CSG_SLOT_STOPPED:
1533 		break;
1534 	default:
1535 		dev_warn(kbdev->dev, "Unknown CSG slot state %d", slot_state);
1536 		break;
1537 	}
1538 
1539 	return slot_state;
1540 }
1541 
csg_slot_running(struct kbase_device * kbdev,s8 slot)1542 static bool csg_slot_running(struct kbase_device *kbdev, s8 slot)
1543 {
1544 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1545 
1546 	return (update_csg_slot_status(kbdev, slot) == CSG_SLOT_RUNNING);
1547 }
1548 
csg_slot_stopped_locked(struct kbase_device * kbdev,s8 slot)1549 static bool csg_slot_stopped_locked(struct kbase_device *kbdev, s8 slot)
1550 {
1551 	enum kbase_csf_csg_slot_state slot_state;
1552 
1553 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1554 
1555 	slot_state = update_csg_slot_status(kbdev, slot);
1556 
1557 	return (slot_state == CSG_SLOT_STOPPED ||
1558 		slot_state == CSG_SLOT_READY);
1559 }
1560 
csg_slot_stopped_raw(struct kbase_device * kbdev,s8 slot)1561 static bool csg_slot_stopped_raw(struct kbase_device *kbdev, s8 slot)
1562 {
1563 	struct kbase_csf_cmd_stream_group_info *ginfo =
1564 		&kbdev->csf.global_iface.groups[slot];
1565 	u32 state;
1566 
1567 	state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo,
1568 			CSG_ACK));
1569 
1570 	if (state == CSG_ACK_STATE_SUSPEND || state == CSG_ACK_STATE_TERMINATE) {
1571 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, kbdev->csf.scheduler.csg_slots[slot].resident_group, state);
1572 		dev_dbg(kbdev->dev, "(raw status) slot %d stopped\n", slot);
1573 		return true;
1574 	}
1575 
1576 	return false;
1577 }
1578 
halt_csg_slot(struct kbase_queue_group * group,bool suspend)1579 static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
1580 {
1581 	struct kbase_device *kbdev = group->kctx->kbdev;
1582 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
1583 	struct kbase_csf_csg_slot *csg_slot =
1584 		kbdev->csf.scheduler.csg_slots;
1585 	s8 slot;
1586 
1587 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1588 
1589 	if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
1590 		return;
1591 
1592 	slot = group->csg_nr;
1593 
1594 	/* When in transition, wait for it to complete */
1595 	if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) {
1596 		long remaining =
1597 			kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
1598 
1599 		dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot);
1600 		remaining = wait_event_timeout(kbdev->csf.event_wait,
1601 				csg_slot_running(kbdev, slot), remaining);
1602 		if (!remaining)
1603 			dev_warn(kbdev->dev,
1604 				 "[%llu] slot %d timeout (%d ms) on up-running\n",
1605 				 kbase_backend_get_cycle_cnt(kbdev),
1606 				 slot, kbdev->csf.fw_timeout_ms);
1607 	}
1608 
1609 	if (csg_slot_running(kbdev, slot)) {
1610 		unsigned long flags;
1611 		struct kbase_csf_cmd_stream_group_info *ginfo =
1612 						&global_iface->groups[slot];
1613 		u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND :
1614 					 CSG_REQ_STATE_TERMINATE;
1615 
1616 		dev_dbg(kbdev->dev, "Halting(suspend=%d) group %d of context %d_%d on slot %d",
1617 			suspend, group->handle, group->kctx->tgid, group->kctx->id, slot);
1618 
1619 		spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
1620 		/* Set state to SUSPEND/TERMINATE */
1621 		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd,
1622 						  CSG_REQ_STATE_MASK);
1623 		spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock,
1624 					flags);
1625 		atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP);
1626 		csg_slot[slot].trigger_jiffies = jiffies;
1627 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP, group, halt_cmd);
1628 
1629 		KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(
1630 			kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot);
1631 		kbase_csf_ring_csg_doorbell(kbdev, slot);
1632 	}
1633 }
1634 
term_csg_slot(struct kbase_queue_group * group)1635 static void term_csg_slot(struct kbase_queue_group *group)
1636 {
1637 	halt_csg_slot(group, false);
1638 }
1639 
suspend_csg_slot(struct kbase_queue_group * group)1640 static void suspend_csg_slot(struct kbase_queue_group *group)
1641 {
1642 	halt_csg_slot(group, true);
1643 }
1644 
1645 /**
1646  * evaluate_sync_update() - Evaluate the sync wait condition the GPU command
1647  *                          queue has been blocked on.
1648  *
1649  * @queue: Pointer to the GPU command queue
1650  *
1651  * Return: true if sync wait condition is satisfied.
1652  */
evaluate_sync_update(struct kbase_queue * queue)1653 static bool evaluate_sync_update(struct kbase_queue *queue)
1654 {
1655 	struct kbase_vmap_struct *mapping;
1656 	bool updated = false;
1657 	u32 *sync_ptr;
1658 	u32 sync_wait_cond;
1659 	u32 sync_current_val;
1660 	struct kbase_device *kbdev;
1661 
1662 	if (WARN_ON(!queue))
1663 		return false;
1664 
1665 	kbdev = queue->kctx->kbdev;
1666 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
1667 
1668 	sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
1669 					&mapping);
1670 
1671 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE, queue->group,
1672 				   queue, queue->sync_ptr);
1673 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_BLOCKED_REASON,
1674 				   queue->group, queue, queue->blocked_reason);
1675 
1676 	if (!sync_ptr) {
1677 		dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed",
1678 			queue->sync_ptr);
1679 		goto out;
1680 	}
1681 
1682 	sync_wait_cond =
1683 		CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait);
1684 
1685 	WARN_ON((sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
1686 		(sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE));
1687 
1688 	sync_current_val = READ_ONCE(*sync_ptr);
1689 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_CURRENT_VAL, queue->group,
1690 				   queue, sync_current_val);
1691 
1692 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_TEST_VAL, queue->group,
1693 				   queue, queue->sync_value);
1694 
1695 	if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
1696 	     (sync_current_val > queue->sync_value)) ||
1697 	    ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) &&
1698 	     (sync_current_val <= queue->sync_value))) {
1699 		/* The sync wait condition is satisfied so the group to which
1700 		 * queue is bound can be re-scheduled.
1701 		 */
1702 		updated = true;
1703 	} else {
1704 		dev_dbg(queue->kctx->kbdev->dev,
1705 			"sync memory not updated yet(%u)", sync_current_val);
1706 	}
1707 
1708 	kbase_phy_alloc_mapping_put(queue->kctx, mapping);
1709 out:
1710 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVALUATED,
1711 				   queue->group, queue, updated);
1712 	return updated;
1713 }
1714 
1715 /**
1716  * save_slot_cs() -  Save the state for blocked GPU command queue.
1717  *
1718  * @ginfo: Pointer to the CSG interface used by the group
1719  *         the queue is bound to.
1720  * @queue: Pointer to the GPU command queue.
1721  *
1722  * This function will check if GPU command queue is blocked on a sync wait and
1723  * evaluate the wait condition. If the wait condition isn't satisfied it would
1724  * save the state needed to reevaluate the condition in future.
1725  * The group to which queue is bound shall be in idle state.
1726  *
1727  * Return: true if the queue is blocked on a sync wait operation.
1728  */
1729 static
save_slot_cs(struct kbase_csf_cmd_stream_group_info const * const ginfo,struct kbase_queue * queue)1730 bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
1731 		struct kbase_queue *queue)
1732 {
1733 	struct kbase_csf_cmd_stream_info *const stream =
1734 		&ginfo->streams[queue->csi_index];
1735 	u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
1736 	bool is_waiting = false;
1737 
1738 	KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT,
1739 				   queue->group, queue, status);
1740 
1741 	if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) {
1742 		queue->status_wait = status;
1743 		queue->sync_ptr = kbase_csf_firmware_cs_output(stream,
1744 			CS_STATUS_WAIT_SYNC_POINTER_LO);
1745 		queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(stream,
1746 			CS_STATUS_WAIT_SYNC_POINTER_HI) << 32;
1747 		queue->sync_value = kbase_csf_firmware_cs_output(stream,
1748 			CS_STATUS_WAIT_SYNC_VALUE);
1749 
1750 		queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET(
1751 			kbase_csf_firmware_cs_output(stream,
1752 						     CS_STATUS_SCOREBOARDS));
1753 		queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET(
1754 			kbase_csf_firmware_cs_output(stream,
1755 						     CS_STATUS_BLOCKED_REASON));
1756 
1757 		if (!evaluate_sync_update(queue)) {
1758 			is_waiting = true;
1759 		} else {
1760 			/* Sync object already got updated & met the condition
1761 			 * thus it doesn't need to be reevaluated and so can
1762 			 * clear the 'status_wait' here.
1763 			 */
1764 			queue->status_wait = 0;
1765 		}
1766 	} else {
1767 		/* Invalidate wait status info that would have been recorded if
1768 		 * this queue was blocked when the group (in idle state) was
1769 		 * suspended previously. After that the group could have been
1770 		 * unblocked due to the kicking of another queue bound to it &
1771 		 * so the wait status info would have stuck with this queue.
1772 		 */
1773 		queue->status_wait = 0;
1774 	}
1775 
1776 	return is_waiting;
1777 }
1778 
schedule_in_cycle(struct kbase_queue_group * group,bool force)1779 static void schedule_in_cycle(struct kbase_queue_group *group, bool force)
1780 {
1781 	struct kbase_context *kctx = group->kctx;
1782 	struct kbase_device *kbdev = kctx->kbdev;
1783 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
1784 
1785 	lockdep_assert_held(&scheduler->lock);
1786 
1787 	/* Only try to schedule work for this event if no requests are pending,
1788 	 * otherwise the function will end up canceling previous work requests,
1789 	 * and scheduler is configured to wake up periodically (or the schedule
1790 	 * of work needs to be enforced in situation such as entering into
1791 	 * protected mode).
1792 	 */
1793 	if ((likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) &&
1794 			!scheduler->tock_pending_request) {
1795 		scheduler->tock_pending_request = true;
1796 		dev_dbg(kbdev->dev, "Kicking async for group %d\n",
1797 			group->handle);
1798 		mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
1799 	}
1800 }
1801 
1802 static
insert_group_to_runnable(struct kbase_csf_scheduler * const scheduler,struct kbase_queue_group * const group,enum kbase_csf_group_state run_state)1803 void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
1804 		struct kbase_queue_group *const group,
1805 		enum kbase_csf_group_state run_state)
1806 {
1807 	struct kbase_context *const kctx = group->kctx;
1808 	struct kbase_device *const kbdev = kctx->kbdev;
1809 
1810 	lockdep_assert_held(&scheduler->lock);
1811 
1812 	WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
1813 
1814 	if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
1815 		return;
1816 
1817 	group->run_state = run_state;
1818 
1819 	if (run_state == KBASE_CSF_GROUP_RUNNABLE)
1820 		group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID;
1821 
1822 	list_add_tail(&group->link,
1823 			&kctx->csf.sched.runnable_groups[group->priority]);
1824 	kctx->csf.sched.num_runnable_grps++;
1825 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_INSERT_RUNNABLE, group,
1826 				 kctx->csf.sched.num_runnable_grps);
1827 
1828 	/* Add the kctx if not yet in runnable kctxs */
1829 	if (kctx->csf.sched.num_runnable_grps == 1) {
1830 		/* First runnable csg, adds to the runnable_kctxs */
1831 		INIT_LIST_HEAD(&kctx->csf.link);
1832 		list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs);
1833 		KBASE_KTRACE_ADD(kbdev, SCHEDULER_INSERT_RUNNABLE, kctx, 0u);
1834 	}
1835 
1836 	scheduler->total_runnable_grps++;
1837 
1838 	if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
1839 	    (scheduler->total_runnable_grps == 1 ||
1840 	     scheduler->state == SCHED_SUSPENDED ||
1841 	     scheduler->state == SCHED_SLEEPING)) {
1842 		dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n");
1843 		/* Fire a scheduling to start the time-slice */
1844 		enqueue_tick_work(kbdev);
1845 	} else
1846 		schedule_in_cycle(group, false);
1847 
1848 	/* Since a new group has become runnable, check if GPU needs to be
1849 	 * powered up.
1850 	 */
1851 	scheduler_wakeup(kbdev, false);
1852 }
1853 
1854 static
remove_group_from_runnable(struct kbase_csf_scheduler * const scheduler,struct kbase_queue_group * group,enum kbase_csf_group_state run_state)1855 void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
1856 		struct kbase_queue_group *group,
1857 		enum kbase_csf_group_state run_state)
1858 {
1859 	struct kbase_context *kctx = group->kctx;
1860 	struct kbase_queue_group *new_head_grp;
1861 	struct list_head *list =
1862 		&kctx->csf.sched.runnable_groups[group->priority];
1863 	unsigned long flags;
1864 
1865 	lockdep_assert_held(&scheduler->lock);
1866 
1867 	WARN_ON(!queue_group_scheduled_locked(group));
1868 
1869 	group->run_state = run_state;
1870 	list_del_init(&group->link);
1871 
1872 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
1873 	/* The below condition will be true when the group running in protected
1874 	 * mode is being terminated but the protected mode exit interrupt was't
1875 	 * received. This can happen if the FW got stuck during protected mode
1876 	 * for some reason (like GPU page fault or some internal error).
1877 	 * In normal cases FW is expected to send the protected mode exit
1878 	 * interrupt before it handles the CSG termination request.
1879 	 */
1880 	if (unlikely(scheduler->active_protm_grp == group)) {
1881 		/* CSG slot cleanup should have happened for the pmode group */
1882 		WARN_ON(kbasep_csf_scheduler_group_is_on_slot_locked(group));
1883 		WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
1884 		/* Initiate a GPU reset, in case it wasn't initiated yet,
1885 		 * in order to rectify the anomaly.
1886 		 */
1887 		if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE))
1888 			kbase_reset_gpu(kctx->kbdev);
1889 
1890 		KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_EXIT_PROTM,
1891 					 scheduler->active_protm_grp, 0u);
1892 		scheduler->active_protm_grp = NULL;
1893 	}
1894 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
1895 
1896 	if (scheduler->top_grp == group) {
1897 		/*
1898 		 * Note: this disables explicit rotation in the next scheduling
1899 		 * cycle. However, removing the top_grp is the same as an
1900 		 * implicit rotation (e.g. if we instead rotated the top_ctx
1901 		 * and then remove top_grp)
1902 		 *
1903 		 * This implicit rotation is assumed by the scheduler rotate
1904 		 * functions.
1905 		 */
1906 		scheduler->top_grp = NULL;
1907 
1908 		/*
1909 		 * Trigger a scheduling tock for a CSG containing protected
1910 		 * content in case there has been any in order to minimise
1911 		 * latency.
1912 		 */
1913 		group = scheduler_get_protm_enter_async_group(kctx->kbdev,
1914 							      NULL);
1915 		if (group)
1916 			schedule_in_cycle(group, true);
1917 	}
1918 
1919 	kctx->csf.sched.num_runnable_grps--;
1920 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_RUNNABLE, group,
1921 				 kctx->csf.sched.num_runnable_grps);
1922 	new_head_grp = (!list_empty(list)) ?
1923 				list_first_entry(list, struct kbase_queue_group, link) :
1924 				NULL;
1925 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_RUNNABLE, new_head_grp,
1926 				 0u);
1927 
1928 	if (kctx->csf.sched.num_runnable_grps == 0) {
1929 		struct kbase_context *new_head_kctx;
1930 		struct list_head *kctx_list = &scheduler->runnable_kctxs;
1931 		/* drop the kctx */
1932 		list_del_init(&kctx->csf.link);
1933 		if (scheduler->top_ctx == kctx)
1934 			scheduler->top_ctx = NULL;
1935 		KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_REMOVE_RUNNABLE, kctx,
1936 				 0u);
1937 		new_head_kctx = (!list_empty(kctx_list)) ?
1938 					list_first_entry(kctx_list, struct kbase_context, csf.link) :
1939 					NULL;
1940 		KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_HEAD_RUNNABLE,
1941 				 new_head_kctx, 0u);
1942 	}
1943 
1944 	WARN_ON(scheduler->total_runnable_grps == 0);
1945 	scheduler->total_runnable_grps--;
1946 	if (!scheduler->total_runnable_grps) {
1947 		dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups");
1948 		cancel_tick_timer(kctx->kbdev);
1949 		WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
1950 		if (scheduler->state != SCHED_SUSPENDED)
1951 			queue_work(system_wq, &scheduler->gpu_idle_work);
1952 	}
1953 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
1954 			scheduler->num_active_address_spaces |
1955 			(((u64)scheduler->total_runnable_grps) << 32));
1956 }
1957 
insert_group_to_idle_wait(struct kbase_queue_group * const group)1958 static void insert_group_to_idle_wait(struct kbase_queue_group *const group)
1959 {
1960 	struct kbase_context *kctx = group->kctx;
1961 
1962 	lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
1963 
1964 	WARN_ON(group->run_state != KBASE_CSF_GROUP_IDLE);
1965 
1966 	list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups);
1967 	kctx->csf.sched.num_idle_wait_grps++;
1968 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_INSERT_IDLE_WAIT, group,
1969 				 kctx->csf.sched.num_idle_wait_grps);
1970 	group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC;
1971 	dev_dbg(kctx->kbdev->dev,
1972 		"Group-%d suspended on sync_wait, total wait_groups: %u\n",
1973 		group->handle, kctx->csf.sched.num_idle_wait_grps);
1974 }
1975 
remove_group_from_idle_wait(struct kbase_queue_group * const group)1976 static void remove_group_from_idle_wait(struct kbase_queue_group *const group)
1977 {
1978 	struct kbase_context *kctx = group->kctx;
1979 	struct list_head *list = &kctx->csf.sched.idle_wait_groups;
1980 	struct kbase_queue_group *new_head_grp;
1981 
1982 	lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
1983 
1984 	WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
1985 
1986 	list_del_init(&group->link);
1987 	WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0);
1988 	kctx->csf.sched.num_idle_wait_grps--;
1989 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_IDLE_WAIT, group,
1990 				 kctx->csf.sched.num_idle_wait_grps);
1991 	new_head_grp = (!list_empty(list)) ?
1992 				list_first_entry(list, struct kbase_queue_group, link) :
1993 				NULL;
1994 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_IDLE_WAIT,
1995 				 new_head_grp, 0u);
1996 	group->run_state = KBASE_CSF_GROUP_INACTIVE;
1997 }
1998 
deschedule_idle_wait_group(struct kbase_csf_scheduler * scheduler,struct kbase_queue_group * group)1999 static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
2000 		struct kbase_queue_group *group)
2001 {
2002 	lockdep_assert_held(&scheduler->lock);
2003 
2004 	if (WARN_ON(!group))
2005 		return;
2006 
2007 	remove_group_from_runnable(scheduler, group, KBASE_CSF_GROUP_IDLE);
2008 	insert_group_to_idle_wait(group);
2009 }
2010 
update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group * group)2011 static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group)
2012 {
2013 	struct kbase_device *kbdev = group->kctx->kbdev;
2014 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2015 
2016 	lockdep_assert_held(&scheduler->lock);
2017 
2018 	if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
2019 		int new_val =
2020 			atomic_dec_return(&scheduler->non_idle_offslot_grps);
2021 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
2022 					 group, new_val);
2023 	}
2024 }
2025 
update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group * group)2026 static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group)
2027 {
2028 	struct kbase_device *kbdev = group->kctx->kbdev;
2029 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2030 
2031 	lockdep_assert_held(&scheduler->lock);
2032 
2033 	WARN_ON(group->csg_nr < 0);
2034 
2035 	if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
2036 		int new_val =
2037 			atomic_dec_return(&scheduler->non_idle_offslot_grps);
2038 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
2039 					 group, new_val);
2040 	}
2041 }
2042 
update_offslot_non_idle_cnt_on_grp_suspend(struct kbase_queue_group * group)2043 static void update_offslot_non_idle_cnt_on_grp_suspend(
2044 				struct kbase_queue_group *group)
2045 {
2046 	struct kbase_device *kbdev = group->kctx->kbdev;
2047 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2048 
2049 	lockdep_assert_held(&scheduler->lock);
2050 
2051 	if (scheduler->state == SCHED_BUSY) {
2052 		/* active phase or, async entering the protected mode */
2053 		if (group->prepared_seq_num >=
2054 		    scheduler->non_idle_scanout_grps) {
2055 			/* At scanout, it was tagged as on-slot idle */
2056 			if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
2057 				int new_val = atomic_inc_return(
2058 					&scheduler->non_idle_offslot_grps);
2059 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
2060 					group, new_val);
2061 			}
2062 		} else {
2063 			if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) {
2064 				int new_val = atomic_dec_return(
2065 					&scheduler->non_idle_offslot_grps);
2066 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
2067 					group, new_val);
2068 			}
2069 		}
2070 	} else {
2071 		/* async phases */
2072 		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
2073 			int new_val = atomic_inc_return(
2074 				&scheduler->non_idle_offslot_grps);
2075 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
2076 						 group, new_val);
2077 		}
2078 	}
2079 }
2080 
confirm_cmd_buf_empty(struct kbase_queue * queue)2081 static bool confirm_cmd_buf_empty(struct kbase_queue *queue)
2082 {
2083 	bool cs_empty;
2084 	bool cs_idle;
2085 	u32 sb_status = 0;
2086 
2087 	struct kbase_device const *const kbdev = queue->group->kctx->kbdev;
2088 	struct kbase_csf_global_iface const *const iface =
2089 		&kbdev->csf.global_iface;
2090 
2091 	u32 glb_version = iface->version;
2092 
2093 	u64 *input_addr = (u64 *)queue->user_io_addr;
2094 	u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
2095 
2096 	if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
2097 		/* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
2098 		struct kbase_csf_cmd_stream_group_info const *const ginfo =
2099 			&kbdev->csf.global_iface.groups[queue->group->csg_nr];
2100 		struct kbase_csf_cmd_stream_info const *const stream =
2101 			&ginfo->streams[queue->csi_index];
2102 
2103 		sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET(
2104 			kbase_csf_firmware_cs_output(stream,
2105 						     CS_STATUS_SCOREBOARDS));
2106 	}
2107 
2108 	cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] ==
2109 		    output_addr[CS_EXTRACT_LO / sizeof(u64)]);
2110 	cs_idle = cs_empty && (!sb_status);
2111 
2112 	return cs_idle;
2113 }
2114 
save_csg_slot(struct kbase_queue_group * group)2115 static void save_csg_slot(struct kbase_queue_group *group)
2116 {
2117 	struct kbase_device *kbdev = group->kctx->kbdev;
2118 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2119 	struct kbase_csf_cmd_stream_group_info *ginfo;
2120 	u32 state;
2121 
2122 	lockdep_assert_held(&scheduler->lock);
2123 
2124 	if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
2125 		return;
2126 
2127 	ginfo = &kbdev->csf.global_iface.groups[group->csg_nr];
2128 
2129 	state =
2130 	    CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK));
2131 
2132 	if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) &&
2133 		     (state != CSG_ACK_STATE_TERMINATE))) {
2134 		u32 max_streams = ginfo->stream_num;
2135 		u32 i;
2136 		bool sync_wait = false;
2137 		bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
2138 			    CSG_STATUS_STATE_IDLE_MASK;
2139 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
2140 		for (i = 0; i < max_streams; i++)
2141 			update_hw_active(group->bound_queues[i], false);
2142 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
2143 		for (i = 0; idle && i < max_streams; i++) {
2144 			struct kbase_queue *const queue =
2145 					group->bound_queues[i];
2146 
2147 			if (!queue || !queue->enabled)
2148 				continue;
2149 
2150 			if (save_slot_cs(ginfo, queue))
2151 				sync_wait = true;
2152 			else {
2153 				/* Need to confirm if ringbuffer of the GPU
2154 				 * queue is empty or not. A race can arise
2155 				 * between the flush of GPU queue and suspend
2156 				 * of CSG. If a queue is flushed after FW has
2157 				 * set the IDLE bit in CSG_STATUS_STATE, then
2158 				 * Scheduler will incorrectly consider CSG
2159 				 * as idle. And there may not be any further
2160 				 * flush call for the GPU queue, which would
2161 				 * have de-idled the CSG.
2162 				 */
2163 				idle = confirm_cmd_buf_empty(queue);
2164 			}
2165 		}
2166 
2167 		if (idle) {
2168 			/* Take the suspended group out of the runnable_groups
2169 			 * list of the context and move it to the
2170 			 * idle_wait_groups list.
2171 			 */
2172 			if (sync_wait)
2173 				deschedule_idle_wait_group(scheduler, group);
2174 			else {
2175 				group->run_state =
2176 					KBASE_CSF_GROUP_SUSPENDED_ON_IDLE;
2177 				dev_dbg(kbdev->dev, "Group-%d suspended: idle",
2178 					group->handle);
2179 			}
2180 		} else {
2181 			group->run_state = KBASE_CSF_GROUP_SUSPENDED;
2182 		}
2183 
2184 		update_offslot_non_idle_cnt_on_grp_suspend(group);
2185 	}
2186 }
2187 
2188 /* Cleanup_csg_slot after it has been vacated, ready for next csg run.
2189  * Return whether there is a kctx address fault associated with the group
2190  * for which the clean-up is done.
2191  */
cleanup_csg_slot(struct kbase_queue_group * group)2192 static bool cleanup_csg_slot(struct kbase_queue_group *group)
2193 {
2194 	struct kbase_context *kctx = group->kctx;
2195 	struct kbase_device *kbdev = kctx->kbdev;
2196 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
2197 	struct kbase_csf_cmd_stream_group_info *ginfo;
2198 	s8 slot;
2199 	struct kbase_csf_csg_slot *csg_slot;
2200 	unsigned long flags;
2201 	u32 i;
2202 	bool as_fault = false;
2203 
2204 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
2205 
2206 	if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
2207 		return as_fault;
2208 
2209 	slot = group->csg_nr;
2210 	csg_slot = &kbdev->csf.scheduler.csg_slots[slot];
2211 	ginfo = &global_iface->groups[slot];
2212 
2213 	/* Now loop through all the bound CSs, and clean them via a stop */
2214 	for (i = 0; i < ginfo->stream_num; i++) {
2215 		struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[i];
2216 
2217 		if (group->bound_queues[i]) {
2218 			if (group->bound_queues[i]->enabled) {
2219 				kbase_csf_firmware_cs_input_mask(stream,
2220 					CS_REQ, CS_REQ_STATE_STOP,
2221 					CS_REQ_STATE_MASK);
2222 			}
2223 
2224 			unassign_user_doorbell_from_queue(kbdev,
2225 				group->bound_queues[i]);
2226 		}
2227 	}
2228 
2229 	unassign_user_doorbell_from_group(kbdev, group);
2230 
2231 	/* The csg does not need cleanup other than drop its AS */
2232 	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
2233 	as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT);
2234 	kbase_ctx_sched_release_ctx(kctx);
2235 	if (unlikely(group->faulted))
2236 		as_fault = true;
2237 	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
2238 
2239 	/* now marking the slot is vacant */
2240 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2241 	kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL;
2242 	clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask);
2243 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
2244 				 kbdev->csf.scheduler.csg_slots_idle_mask[0]);
2245 
2246 	group->csg_nr = KBASEP_CSG_NR_INVALID;
2247 	set_bit(slot, kbdev->csf.scheduler.csgs_events_enable_mask);
2248 	clear_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap);
2249 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2250 
2251 	csg_slot->trigger_jiffies = jiffies;
2252 	atomic_set(&csg_slot->state, CSG_SLOT_READY);
2253 
2254 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, slot);
2255 	dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n",
2256 		group->handle, slot);
2257 
2258 	KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev,
2259 		kbdev->gpu_props.props.raw_props.gpu_id, slot);
2260 
2261 	return as_fault;
2262 }
2263 
update_csg_slot_priority(struct kbase_queue_group * group,u8 prio)2264 static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
2265 {
2266 	struct kbase_device *kbdev = group->kctx->kbdev;
2267 	struct kbase_csf_csg_slot *csg_slot;
2268 	struct kbase_csf_cmd_stream_group_info *ginfo;
2269 	s8 slot;
2270 	u8 prev_prio;
2271 	u32 ep_cfg;
2272 	u32 csg_req;
2273 	unsigned long flags;
2274 
2275 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
2276 
2277 	if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
2278 		return;
2279 
2280 	slot = group->csg_nr;
2281 	csg_slot = &kbdev->csf.scheduler.csg_slots[slot];
2282 	ginfo = &kbdev->csf.global_iface.groups[slot];
2283 
2284 	/* CSGs remaining on-slot can be either idle or runnable.
2285 	 * This also applies in protected mode.
2286 	 */
2287 	WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) ||
2288 		(group->run_state == KBASE_CSF_GROUP_IDLE)));
2289 
2290 	/* Update consumes a group from scanout */
2291 	update_offslot_non_idle_cnt_for_onslot_grp(group);
2292 
2293 	if (csg_slot->priority == prio)
2294 		return;
2295 
2296 	/* Read the csg_ep_cfg back for updating the priority field */
2297 	ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ);
2298 	prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg);
2299 	ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
2300 	kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
2301 
2302 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2303 	csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
2304 	csg_req ^= CSG_REQ_EP_CFG_MASK;
2305 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
2306 					  CSG_REQ_EP_CFG_MASK);
2307 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2308 
2309 	csg_slot->priority = prio;
2310 
2311 	dev_dbg(kbdev->dev, "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n",
2312 		group->handle, group->kctx->tgid, group->kctx->id, slot,
2313 		prev_prio, prio);
2314 
2315 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PRIO_UPDATE, group, prev_prio);
2316 
2317 	kbase_csf_ring_csg_doorbell(kbdev, slot);
2318 	set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update);
2319 }
2320 
program_csg_slot(struct kbase_queue_group * group,s8 slot,u8 prio)2321 static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
2322 		u8 prio)
2323 {
2324 	struct kbase_context *kctx = group->kctx;
2325 	struct kbase_device *kbdev = kctx->kbdev;
2326 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
2327 	const u64 shader_core_mask =
2328 		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER);
2329 	const u64 tiler_core_mask =
2330 		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER);
2331 	const u64 compute_mask = shader_core_mask & group->compute_mask;
2332 	const u64 fragment_mask = shader_core_mask & group->fragment_mask;
2333 	const u64 tiler_mask = tiler_core_mask & group->tiler_mask;
2334 	const u8 num_cores = kbdev->gpu_props.num_cores;
2335 	const u8 compute_max = min(num_cores, group->compute_max);
2336 	const u8 fragment_max = min(num_cores, group->fragment_max);
2337 	const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max);
2338 	struct kbase_csf_cmd_stream_group_info *ginfo;
2339 	u32 ep_cfg = 0;
2340 	u32 csg_req;
2341 	u32 state;
2342 	int i;
2343 	unsigned long flags;
2344 	const u64 normal_suspend_buf =
2345 		group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT;
2346 	struct kbase_csf_csg_slot *csg_slot =
2347 		&kbdev->csf.scheduler.csg_slots[slot];
2348 
2349 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
2350 
2351 	if (WARN_ON(slot < 0) &&
2352 	    WARN_ON(slot >= global_iface->group_num))
2353 		return;
2354 
2355 	WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY);
2356 
2357 	ginfo = &global_iface->groups[slot];
2358 
2359 	/* Pick an available address space for this context */
2360 	mutex_lock(&kbdev->mmu_hw_mutex);
2361 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
2362 	kbase_ctx_sched_retain_ctx(kctx);
2363 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
2364 	mutex_unlock(&kbdev->mmu_hw_mutex);
2365 
2366 	if (kctx->as_nr == KBASEP_AS_NR_INVALID) {
2367 		dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
2368 			 group->handle, kctx->tgid, kctx->id, slot);
2369 		return;
2370 	}
2371 
2372 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2373 	set_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap);
2374 	kbdev->csf.scheduler.csg_slots[slot].resident_group = group;
2375 	group->csg_nr = slot;
2376 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2377 
2378 	assign_user_doorbell_to_group(kbdev, group);
2379 
2380 	/* Now loop through all the bound & kicked CSs, and program them */
2381 	for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
2382 		struct kbase_queue *queue = group->bound_queues[i];
2383 
2384 		if (queue)
2385 			program_cs(kbdev, queue, false);
2386 	}
2387 
2388 
2389 	/* Endpoint programming for CSG */
2390 	kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO,
2391 				     compute_mask & U32_MAX);
2392 	kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI,
2393 				     compute_mask >> 32);
2394 	kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO,
2395 				     fragment_mask & U32_MAX);
2396 	kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI,
2397 				     fragment_mask >> 32);
2398 	kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER,
2399 				     tiler_mask & U32_MAX);
2400 
2401 	/* Register group UID with firmware */
2402 	kbase_csf_firmware_csg_input(ginfo, CSG_ITER_TRACE_CONFIG,
2403 				     group->group_uid);
2404 
2405 	ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max);
2406 	ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max);
2407 	ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max);
2408 	ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
2409 	kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
2410 
2411 	/* Program the address space number assigned to the context */
2412 	kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr);
2413 
2414 	kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO,
2415 			normal_suspend_buf & U32_MAX);
2416 	kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI,
2417 			normal_suspend_buf >> 32);
2418 
2419 	if (group->protected_suspend_buf.reg) {
2420 		const u64 protm_suspend_buf =
2421 			group->protected_suspend_buf.reg->start_pfn <<
2422 				PAGE_SHIFT;
2423 		kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO,
2424 			protm_suspend_buf & U32_MAX);
2425 		kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI,
2426 			protm_suspend_buf >> 32);
2427 	}
2428 
2429 
2430 	/* Enable all interrupts for now */
2431 	kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0));
2432 
2433 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2434 	csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
2435 	csg_req ^= CSG_REQ_EP_CFG_MASK;
2436 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
2437 					  CSG_REQ_EP_CFG_MASK);
2438 
2439 	/* Set state to START/RESUME */
2440 	if (queue_group_suspended_locked(group)) {
2441 		state = CSG_REQ_STATE_RESUME;
2442 	} else {
2443 		WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE);
2444 		state = CSG_REQ_STATE_START;
2445 	}
2446 
2447 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
2448 			state, CSG_REQ_STATE_MASK);
2449 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2450 
2451 	/* Update status before rings the door-bell, marking ready => run */
2452 	atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN);
2453 	csg_slot->trigger_jiffies = jiffies;
2454 	csg_slot->priority = prio;
2455 
2456 	/* Trace the programming of the CSG on the slot */
2457 	KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(
2458 		kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id,
2459 		group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0);
2460 
2461 	dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n",
2462 		group->handle, kctx->tgid, kctx->id, slot, prio);
2463 
2464 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START, group,
2465 				(((u64)ep_cfg) << 32) |
2466 				((((u32)kctx->as_nr) & 0xF) << 16) |
2467 				(state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
2468 
2469 	kbase_csf_ring_csg_doorbell(kbdev, slot);
2470 
2471 	/* Programming a slot consumes a group from scanout */
2472 	update_offslot_non_idle_cnt_for_onslot_grp(group);
2473 }
2474 
remove_scheduled_group(struct kbase_device * kbdev,struct kbase_queue_group * group)2475 static void remove_scheduled_group(struct kbase_device *kbdev,
2476 		struct kbase_queue_group *group)
2477 {
2478 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2479 
2480 	lockdep_assert_held(&scheduler->lock);
2481 
2482 	WARN_ON(group->prepared_seq_num ==
2483 		KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID);
2484 	WARN_ON(list_empty(&group->link_to_schedule));
2485 
2486 	list_del_init(&group->link_to_schedule);
2487 	scheduler->ngrp_to_schedule--;
2488 	group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID;
2489 	group->kctx->csf.sched.ngrp_to_schedule--;
2490 }
2491 
sched_evict_group(struct kbase_queue_group * group,bool fault,bool update_non_idle_offslot_grps_cnt)2492 static void sched_evict_group(struct kbase_queue_group *group, bool fault,
2493 			      bool update_non_idle_offslot_grps_cnt)
2494 {
2495 	struct kbase_context *kctx = group->kctx;
2496 	struct kbase_device *kbdev = kctx->kbdev;
2497 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2498 
2499 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
2500 
2501 	if (queue_group_scheduled_locked(group)) {
2502 		u32 i;
2503 
2504 		if (update_non_idle_offslot_grps_cnt &&
2505 		    (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
2506 		     group->run_state == KBASE_CSF_GROUP_RUNNABLE)) {
2507 			int new_val = atomic_dec_return(
2508 				&scheduler->non_idle_offslot_grps);
2509 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
2510 						 group, new_val);
2511 		}
2512 
2513 		for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
2514 			if (group->bound_queues[i])
2515 				group->bound_queues[i]->enabled = false;
2516 		}
2517 
2518 		if (group->prepared_seq_num !=
2519 				KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID)
2520 			remove_scheduled_group(kbdev, group);
2521 
2522 		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
2523 			remove_group_from_idle_wait(group);
2524 		else {
2525 			remove_group_from_runnable(scheduler, group,
2526 						KBASE_CSF_GROUP_INACTIVE);
2527 		}
2528 
2529 		WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
2530 
2531 		if (fault)
2532 			group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED;
2533 
2534 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT_SCHED, group,
2535 				(((u64)scheduler->total_runnable_grps) << 32) |
2536 				((u32)group->run_state));
2537 		dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n",
2538 			group->handle, scheduler->total_runnable_grps);
2539 		/* Notify a group has been evicted */
2540 		wake_up_all(&kbdev->csf.event_wait);
2541 	}
2542 }
2543 
term_group_sync(struct kbase_queue_group * group)2544 static int term_group_sync(struct kbase_queue_group *group)
2545 {
2546 	struct kbase_device *kbdev = group->kctx->kbdev;
2547 	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
2548 	int err = 0;
2549 
2550 	term_csg_slot(group);
2551 
2552 	remaining = wait_event_timeout(kbdev->csf.event_wait,
2553 		csg_slot_stopped_locked(kbdev, group->csg_nr), remaining);
2554 
2555 	if (!remaining) {
2556 		dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d",
2557 			 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
2558 			 group->handle, group->kctx->tgid,
2559 			 group->kctx->id, group->csg_nr);
2560 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
2561 			kbase_reset_gpu(kbdev);
2562 
2563 
2564 		err = -ETIMEDOUT;
2565 	}
2566 
2567 	return err;
2568 }
2569 
kbase_csf_scheduler_group_deschedule(struct kbase_queue_group * group)2570 void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
2571 {
2572 	struct kbase_device *kbdev = group->kctx->kbdev;
2573 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2574 	bool on_slot;
2575 
2576 	kbase_reset_gpu_assert_failed_or_prevented(kbdev);
2577 	lockdep_assert_held(&group->kctx->csf.lock);
2578 	mutex_lock(&scheduler->lock);
2579 
2580 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state);
2581 	if (!queue_group_scheduled_locked(group))
2582 		goto unlock;
2583 
2584 	on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
2585 
2586 #ifdef KBASE_PM_RUNTIME
2587 	/* If the queue group is on slot and Scheduler is in SLEEPING state,
2588 	 * then we need to wait here for Scheduler to exit the sleep state
2589 	 * (i.e. wait for the runtime suspend or power down of GPU). This would
2590 	 * be better than aborting the power down. The group will be suspended
2591 	 * anyways on power down, so won't have to send the CSG termination
2592 	 * request to FW.
2593 	 */
2594 	if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
2595 		if (wait_for_scheduler_to_exit_sleep(kbdev)) {
2596 			dev_warn(
2597 				kbdev->dev,
2598 				"Wait for scheduler to exit sleep state timedout when terminating group %d of context %d_%d on slot %d",
2599 				group->handle, group->kctx->tgid,
2600 				group->kctx->id, group->csg_nr);
2601 
2602 			scheduler_wakeup(kbdev, true);
2603 
2604 			/* Wait for MCU firmware to start running */
2605 			if (kbase_csf_scheduler_wait_mcu_active(kbdev))
2606 				dev_warn(
2607 					kbdev->dev,
2608 					"[%llu] Wait for MCU active failed when when terminating group %d of context %d_%d on slot %d",
2609 					kbase_backend_get_cycle_cnt(kbdev),
2610 					group->handle, group->kctx->tgid,
2611 					group->kctx->id, group->csg_nr);
2612 		}
2613 
2614 		/* Check the group state again as scheduler lock would have been
2615 		 * released when waiting for the exit from SLEEPING state.
2616 		 */
2617 		if (!queue_group_scheduled_locked(group))
2618 			goto unlock;
2619 
2620 		on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
2621 	}
2622 #endif
2623 	if (!on_slot) {
2624 		sched_evict_group(group, false, true);
2625 	} else {
2626 		bool as_faulty;
2627 
2628 		term_group_sync(group);
2629 		/* Treat the csg been terminated */
2630 		as_faulty = cleanup_csg_slot(group);
2631 		/* remove from the scheduler list */
2632 		sched_evict_group(group, as_faulty, false);
2633 	}
2634 
2635 	WARN_ON(queue_group_scheduled_locked(group));
2636 
2637 unlock:
2638 	mutex_unlock(&scheduler->lock);
2639 }
2640 
2641 /**
2642  * scheduler_group_schedule() - Schedule a GPU command queue group on firmware
2643  *
2644  * @group: Pointer to the queue group to be scheduled.
2645  *
2646  * This function would enable the scheduling of GPU command queue group on
2647  * firmware.
2648  *
2649  * Return: 0 on success, or negative on failure.
2650  */
scheduler_group_schedule(struct kbase_queue_group * group)2651 static int scheduler_group_schedule(struct kbase_queue_group *group)
2652 {
2653 	struct kbase_context *kctx = group->kctx;
2654 	struct kbase_device *kbdev = kctx->kbdev;
2655 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2656 
2657 	lockdep_assert_held(&kctx->csf.lock);
2658 	lockdep_assert_held(&scheduler->lock);
2659 
2660 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state);
2661 	if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
2662 		update_idle_suspended_group_state(group);
2663 	else if (queue_group_idle_locked(group)) {
2664 		WARN_ON(kctx->csf.sched.num_runnable_grps == 0);
2665 		WARN_ON(kbdev->csf.scheduler.total_runnable_grps == 0);
2666 
2667 		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE)
2668 			update_idle_suspended_group_state(group);
2669 		else {
2670 			struct kbase_queue_group *protm_grp;
2671 			unsigned long flags;
2672 
2673 			WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(
2674 				group));
2675 
2676 			group->run_state = KBASE_CSF_GROUP_RUNNABLE;
2677 
2678 			/* A normal mode CSG could be idle onslot during
2679 			 * protected mode. In this case clear the
2680 			 * appropriate bit in csg_slots_idle_mask.
2681 			 */
2682 			spin_lock_irqsave(&scheduler->interrupt_lock, flags);
2683 			protm_grp = scheduler->active_protm_grp;
2684 			if (protm_grp && protm_grp != group) {
2685 				clear_bit((unsigned int)group->csg_nr,
2686 					  scheduler->csg_slots_idle_mask);
2687 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
2688 					scheduler->csg_slots_idle_mask[0]);
2689 			}
2690 			spin_unlock_irqrestore(&scheduler->interrupt_lock,
2691 					       flags);
2692 
2693 			/* If GPU is in protected mode then any doorbells rang
2694 			 * would have no effect. Check if GPU is in protected
2695 			 * mode and if this group has higher priority than the
2696 			 * active protected mode group. If so prompt the FW
2697 			 * to exit protected mode.
2698 			 */
2699 			if (protm_grp &&
2700 			    group->scan_seq_num < protm_grp->scan_seq_num) {
2701 				/* Prompt the FW to exit protected mode */
2702 				scheduler_force_protm_exit(kbdev);
2703 			}
2704 		}
2705 	} else if (!queue_group_scheduled_locked(group)) {
2706 		int new_val;
2707 		insert_group_to_runnable(&kbdev->csf.scheduler, group,
2708 			KBASE_CSF_GROUP_RUNNABLE);
2709 		/* A new group into the scheduler */
2710 		new_val = atomic_inc_return(
2711 			&kbdev->csf.scheduler.non_idle_offslot_grps);
2712 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
2713 					 group, new_val);
2714 	}
2715 
2716 	/* Since a group has become active now, check if GPU needs to be
2717 	 * powered up. Also rekick the Scheduler.
2718 	 */
2719 	scheduler_wakeup(kbdev, true);
2720 
2721 	return 0;
2722 }
2723 
2724 /**
2725  * set_max_csg_slots() - Set the number of available CSG slots
2726  *
2727  * @kbdev: Pointer of the GPU device.
2728  *
2729  * This function would set/limit the number of CSG slots that
2730  * can be used in the given tick/tock. It would be less than the total CSG
2731  * slots supported by firmware if the number of GPU address space slots
2732  * required to utilize all the CSG slots is more than the available
2733  * address space slots.
2734  */
set_max_csg_slots(struct kbase_device * kbdev)2735 static inline void set_max_csg_slots(struct kbase_device *kbdev)
2736 {
2737 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2738 	unsigned int total_csg_slots = kbdev->csf.global_iface.group_num;
2739 	unsigned int max_address_space_slots =
2740 			kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS;
2741 
2742 	WARN_ON(scheduler->num_active_address_spaces > total_csg_slots);
2743 
2744 	if (likely(scheduler->num_active_address_spaces <=
2745 		   max_address_space_slots))
2746 		scheduler->num_csg_slots_for_tick = total_csg_slots;
2747 }
2748 
2749 /**
2750  * count_active_address_space() - Count the number of GPU address space slots
2751  *
2752  * @kbdev: Pointer of the GPU device.
2753  * @kctx: Pointer of the Kbase context.
2754  *
2755  * This function would update the counter that is tracking the number of GPU
2756  * address space slots that would be required to program the CS
2757  * group slots from the groups at the head of groups_to_schedule list.
2758  */
count_active_address_space(struct kbase_device * kbdev,struct kbase_context * kctx)2759 static inline void count_active_address_space(struct kbase_device *kbdev,
2760 		struct kbase_context *kctx)
2761 {
2762 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2763 	unsigned int total_csg_slots = kbdev->csf.global_iface.group_num;
2764 	unsigned int max_address_space_slots =
2765 			kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS;
2766 
2767 	if (scheduler->ngrp_to_schedule <= total_csg_slots) {
2768 		if (kctx->csf.sched.ngrp_to_schedule == 1)
2769 			scheduler->num_active_address_spaces++;
2770 
2771 		if (scheduler->num_active_address_spaces <=
2772 		    max_address_space_slots)
2773 			scheduler->num_csg_slots_for_tick++;
2774 	}
2775 }
2776 
2777 /* Two schemes are used in assigning the priority to CSG slots for a given
2778  * CSG from the 'groups_to_schedule' list.
2779  * This is needed as an idle on-slot group is deprioritized by moving it to
2780  * the tail of 'groups_to_schedule' list. As a result it can either get
2781  * evicted from the CSG slot in current tick/tock dealing, or its position
2782  * can be after the lower priority non-idle groups in the 'groups_to_schedule'
2783  * list. The latter case can result in the on-slot subset containing both
2784  * non-idle and idle CSGs, and is handled through the 2nd scheme described
2785  * below.
2786  *
2787  * First scheme :- If all the slots are going to be occupied by the non-idle or
2788  * idle groups, then a simple assignment of the priority is done as per the
2789  * position of a group in the 'groups_to_schedule' list. So maximum priority
2790  * gets assigned to the slot of a group which is at the head of the list.
2791  * Here the 'groups_to_schedule' list would effectively be ordered as per the
2792  * static priority of groups.
2793  *
2794  * Second scheme :- If the slots are going to be occupied by a mix of idle and
2795  * non-idle groups then the priority assignment needs to ensure that the
2796  * priority of a slot belonging to a higher priority idle group will always be
2797  * greater than the priority of a slot belonging to a lower priority non-idle
2798  * group, reflecting the original position of a group in the scan order (i.e
2799  * static priority) 'scan_seq_num', which is set during the prepare phase of a
2800  * tick/tock before the group is moved to 'idle_groups_to_schedule' list if it
2801  * is idle.
2802  * The priority range [MAX_CSG_SLOT_PRIORITY, 0] is partitioned with the first
2803  * 'slots_for_tick' groups in the original scan order are assigned a priority in
2804  * the subrange [MAX_CSG_SLOT_PRIORITY, MAX_CSG_SLOT_PRIORITY - slots_for_tick),
2805  * whereas rest of the groups are assigned the priority in the subrange
2806  * [MAX_CSG_SLOT_PRIORITY - slots_for_tick, 0]. This way even if an idle higher
2807  * priority group ends up after the non-idle lower priority groups in the
2808  * 'groups_to_schedule' list, it will get a higher slot priority. And this will
2809  * enable the FW to quickly start the execution of higher priority group when it
2810  * gets de-idled.
2811  */
get_slot_priority(struct kbase_queue_group * group)2812 static u8 get_slot_priority(struct kbase_queue_group *group)
2813 {
2814 	struct kbase_csf_scheduler *scheduler =
2815 		&group->kctx->kbdev->csf.scheduler;
2816 	u8 slot_prio;
2817 	u32 slots_for_tick = scheduler->num_csg_slots_for_tick;
2818 	u32 used_slots = slots_for_tick - scheduler->remaining_tick_slots;
2819 	/* Check if all the slots are going to be occupied by the non-idle or
2820 	 * idle groups.
2821 	 */
2822 	if (scheduler->non_idle_scanout_grps >= slots_for_tick ||
2823 	    !scheduler->non_idle_scanout_grps) {
2824 		slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - used_slots);
2825 	} else {
2826 		/* There will be a mix of idle and non-idle groups. */
2827 		if (group->scan_seq_num < slots_for_tick)
2828 			slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY -
2829 					 group->scan_seq_num);
2830 		else if (MAX_CSG_SLOT_PRIORITY > (slots_for_tick + used_slots))
2831 			slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - (slots_for_tick + used_slots));
2832 		else
2833 			slot_prio = 0;
2834 	}
2835 	return slot_prio;
2836 }
2837 
2838 /**
2839  * update_resident_groups_priority() - Update the priority of resident groups
2840  *
2841  * @kbdev:    The GPU device.
2842  *
2843  * This function will update the priority of all resident queue groups
2844  * that are at the head of groups_to_schedule list, preceding the first
2845  * non-resident group.
2846  *
2847  * This function will also adjust kbase_csf_scheduler.remaining_tick_slots on
2848  * the priority update.
2849  */
update_resident_groups_priority(struct kbase_device * kbdev)2850 static void update_resident_groups_priority(struct kbase_device *kbdev)
2851 {
2852 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2853 	u32 num_groups = scheduler->num_csg_slots_for_tick;
2854 
2855 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
2856 	while (!list_empty(&scheduler->groups_to_schedule)) {
2857 		struct kbase_queue_group *group =
2858 			list_first_entry(&scheduler->groups_to_schedule,
2859 					struct kbase_queue_group,
2860 					 link_to_schedule);
2861 		bool resident =
2862 			kbasep_csf_scheduler_group_is_on_slot_locked(group);
2863 
2864 		if ((group->prepared_seq_num >= num_groups) || !resident)
2865 			break;
2866 
2867 		update_csg_slot_priority(group,
2868 					 get_slot_priority(group));
2869 
2870 		/* Drop the head group from the list */
2871 		remove_scheduled_group(kbdev, group);
2872 		scheduler->remaining_tick_slots--;
2873 	}
2874 }
2875 
2876 /**
2877  * program_group_on_vacant_csg_slot() - Program a non-resident group on the
2878  *                                      given vacant CSG slot.
2879  * @kbdev:    Pointer to the GPU device.
2880  * @slot:     Vacant CSG slot number.
2881  *
2882  * This function will program a non-resident group at the head of
2883  * kbase_csf_scheduler.groups_to_schedule list on the given vacant
2884  * CSG slot, provided the initial position of the non-resident
2885  * group in the list is less than the number of CSG slots and there is
2886  * an available GPU address space slot.
2887  * kbase_csf_scheduler.remaining_tick_slots would also be adjusted after
2888  * programming the slot.
2889  */
program_group_on_vacant_csg_slot(struct kbase_device * kbdev,s8 slot)2890 static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev,
2891 		s8 slot)
2892 {
2893 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2894 	struct kbase_queue_group *const group =
2895 		list_empty(&scheduler->groups_to_schedule) ? NULL :
2896 			list_first_entry(&scheduler->groups_to_schedule,
2897 					struct kbase_queue_group,
2898 					link_to_schedule);
2899 	u32 num_groups = scheduler->num_csg_slots_for_tick;
2900 
2901 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
2902 	if (group && (group->prepared_seq_num < num_groups)) {
2903 		bool ret = kbasep_csf_scheduler_group_is_on_slot_locked(group);
2904 
2905 		if (!WARN_ON(ret)) {
2906 			if (kctx_as_enabled(group->kctx) && !group->faulted) {
2907 				program_csg_slot(group, slot,
2908 					get_slot_priority(group));
2909 
2910 				if (likely(csg_slot_in_use(kbdev, slot))) {
2911 					/* Drop the head group from the list */
2912 					remove_scheduled_group(kbdev, group);
2913 					scheduler->remaining_tick_slots--;
2914 				}
2915 			} else {
2916 				update_offslot_non_idle_cnt_for_faulty_grp(
2917 					group);
2918 				remove_scheduled_group(kbdev, group);
2919 			}
2920 		}
2921 	}
2922 }
2923 
2924 /**
2925  * program_vacant_csg_slot() - Program the vacant CSG slot with a non-resident
2926  *                             group and update the priority of resident groups.
2927  *
2928  * @kbdev:    Pointer to the GPU device.
2929  * @slot:     Vacant CSG slot number.
2930  *
2931  * This function will first update the priority of all resident queue groups
2932  * that are at the head of groups_to_schedule list, preceding the first
2933  * non-resident group, it will then try to program the given CS
2934  * group slot with the non-resident group. Finally update the priority of all
2935  * resident queue groups following the non-resident group.
2936  *
2937  * kbase_csf_scheduler.remaining_tick_slots would also be adjusted.
2938  */
program_vacant_csg_slot(struct kbase_device * kbdev,s8 slot)2939 static void program_vacant_csg_slot(struct kbase_device *kbdev, s8 slot)
2940 {
2941 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2942 	struct kbase_csf_csg_slot *const csg_slot =
2943 				scheduler->csg_slots;
2944 
2945 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
2946 	WARN_ON(atomic_read(&csg_slot[slot].state) != CSG_SLOT_READY);
2947 
2948 	/* First update priority for already resident groups (if any)
2949 	 * before the non-resident group
2950 	 */
2951 	update_resident_groups_priority(kbdev);
2952 
2953 	/* Now consume the vacant slot for the non-resident group */
2954 	program_group_on_vacant_csg_slot(kbdev, slot);
2955 
2956 	/* Now update priority for already resident groups (if any)
2957 	 * following the non-resident group
2958 	 */
2959 	update_resident_groups_priority(kbdev);
2960 }
2961 
slots_state_changed(struct kbase_device * kbdev,unsigned long * slots_mask,bool (* state_check_func)(struct kbase_device *,s8))2962 static bool slots_state_changed(struct kbase_device *kbdev,
2963 		unsigned long *slots_mask,
2964 		bool (*state_check_func)(struct kbase_device *, s8))
2965 {
2966 	u32 num_groups = kbdev->csf.global_iface.group_num;
2967 	DECLARE_BITMAP(changed_slots, MAX_SUPPORTED_CSGS) = {0};
2968 	bool changed = false;
2969 	u32 i;
2970 
2971 	for_each_set_bit(i, slots_mask, num_groups) {
2972 		if (state_check_func(kbdev, (s8)i)) {
2973 			set_bit(i, changed_slots);
2974 			changed = true;
2975 		}
2976 	}
2977 
2978 	if (changed)
2979 		bitmap_copy(slots_mask, changed_slots, MAX_SUPPORTED_CSGS);
2980 
2981 	return changed;
2982 }
2983 
2984 /**
2985  * program_suspending_csg_slots() - Program the CSG slots vacated on suspension
2986  *                                  of queue groups running on them.
2987  *
2988  * @kbdev:    Pointer to the GPU device.
2989  *
2990  * This function will first wait for the ongoing suspension to complete on a
2991  * CSG slot and will then program the vacant slot with the
2992  * non-resident queue group inside the groups_to_schedule list.
2993  * The programming of the non-resident queue group on the vacant slot could
2994  * fail due to unavailability of free GPU address space slot and so the
2995  * programming is re-attempted after the ongoing suspension has completed
2996  * for all the CSG slots.
2997  * The priority of resident groups before and after the non-resident group
2998  * in the groups_to_schedule list would also be updated.
2999  * This would be repeated for all the slots undergoing suspension.
3000  * GPU reset would be initiated if the wait for suspend times out.
3001  */
program_suspending_csg_slots(struct kbase_device * kbdev)3002 static void program_suspending_csg_slots(struct kbase_device *kbdev)
3003 {
3004 	u32 num_groups = kbdev->csf.global_iface.group_num;
3005 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3006 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS);
3007 	DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0};
3008 	bool suspend_wait_failed = false;
3009 	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3010 
3011 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3012 
3013 	/* In the current implementation, csgs_events_enable_mask would be used
3014 	 * only to indicate suspending CSGs.
3015 	 */
3016 	bitmap_complement(slot_mask, scheduler->csgs_events_enable_mask,
3017 		MAX_SUPPORTED_CSGS);
3018 
3019 	while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
3020 		DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
3021 
3022 		bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS);
3023 
3024 		remaining = wait_event_timeout(kbdev->csf.event_wait,
3025 			slots_state_changed(kbdev, changed,
3026 				csg_slot_stopped_raw),
3027 			remaining);
3028 
3029 		if (remaining) {
3030 			u32 i;
3031 
3032 			for_each_set_bit(i, changed, num_groups) {
3033 				struct kbase_queue_group *group =
3034 					scheduler->csg_slots[i].resident_group;
3035 
3036 				if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) {
3037 					continue;
3038 				}
3039 				/* The on slot csg is now stopped */
3040 				clear_bit(i, slot_mask);
3041 
3042 				if (likely(group)) {
3043 					bool as_fault;
3044 					/* Only do save/cleanup if the
3045 					 * group is not terminated during
3046 					 * the sleep.
3047 					 */
3048 					save_csg_slot(group);
3049 					as_fault = cleanup_csg_slot(group);
3050 					/* If AS fault detected, evict it */
3051 					if (as_fault) {
3052 						sched_evict_group(group, true, true);
3053 						set_bit(i, evicted_mask);
3054 					}
3055 				}
3056 
3057 				program_vacant_csg_slot(kbdev, (s8)i);
3058 			}
3059 		} else {
3060 			u32 i;
3061 
3062 			/* Groups that have failed to suspend in time shall
3063 			 * raise a fatal error as they could no longer be
3064 			 * safely resumed.
3065 			 */
3066 			for_each_set_bit(i, slot_mask, num_groups) {
3067 				struct kbase_queue_group *const group =
3068 					scheduler->csg_slots[i].resident_group;
3069 
3070 				struct base_gpu_queue_group_error const
3071 					err_payload = { .error_type =
3072 								BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
3073 							.payload = {
3074 								.fatal_group = {
3075 									.status =
3076 										GPU_EXCEPTION_TYPE_SW_FAULT_2,
3077 								} } };
3078 
3079 				if (unlikely(group == NULL))
3080 					continue;
3081 
3082 				kbase_csf_add_group_fatal_error(group,
3083 								&err_payload);
3084 				kbase_event_wakeup(group->kctx);
3085 
3086 				/* TODO GPUCORE-25328: The CSG can't be
3087 				 * terminated, the GPU will be reset as a
3088 				 * work-around.
3089 				 */
3090 				dev_warn(
3091 					kbdev->dev,
3092 					"[%llu] Group %d of context %d_%d on slot %u failed to suspend (timeout %d ms)",
3093 					kbase_backend_get_cycle_cnt(kbdev),
3094 					group->handle, group->kctx->tgid,
3095 					group->kctx->id, i,
3096 					kbdev->csf.fw_timeout_ms);
3097 
3098 				/* The group has failed suspension, stop
3099 				 * further examination.
3100 				 */
3101 				clear_bit(i, slot_mask);
3102 				set_bit(i, scheduler->csgs_events_enable_mask);
3103 				update_offslot_non_idle_cnt_for_onslot_grp(
3104 					group);
3105 			}
3106 
3107 			suspend_wait_failed = true;
3108 		}
3109 	}
3110 
3111 	if (!bitmap_empty(evicted_mask, MAX_SUPPORTED_CSGS))
3112 		dev_info(kbdev->dev, "Scheduler evicting slots: 0x%*pb\n",
3113 			 num_groups, evicted_mask);
3114 
3115 	if (likely(!suspend_wait_failed)) {
3116 		u32 i;
3117 
3118 		while (scheduler->ngrp_to_schedule &&
3119 		       scheduler->remaining_tick_slots) {
3120 			i = find_first_zero_bit(scheduler->csg_inuse_bitmap,
3121 					num_groups);
3122 			if (WARN_ON(i == num_groups))
3123 				break;
3124 			program_vacant_csg_slot(kbdev, (s8)i);
3125 			if (!csg_slot_in_use(kbdev, (int)i)) {
3126 				dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i);
3127 				break;
3128 			}
3129 		}
3130 	} else {
3131 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
3132 			kbase_reset_gpu(kbdev);
3133 	}
3134 }
3135 
suspend_queue_group(struct kbase_queue_group * group)3136 static void suspend_queue_group(struct kbase_queue_group *group)
3137 {
3138 	unsigned long flags;
3139 	struct kbase_csf_scheduler *const scheduler =
3140 		&group->kctx->kbdev->csf.scheduler;
3141 
3142 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3143 	/* This shall be used in program_suspending_csg_slots() where we
3144 	 * assume that whilst CSGs are being suspended, this bitmask is not
3145 	 * used by anything else i.e., it indicates only the CSGs going
3146 	 * through suspension.
3147 	 */
3148 	clear_bit(group->csg_nr, scheduler->csgs_events_enable_mask);
3149 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3150 
3151 	/* If AS fault detected, terminate the group */
3152 	if (!kctx_as_enabled(group->kctx) || group->faulted)
3153 		term_csg_slot(group);
3154 	else
3155 		suspend_csg_slot(group);
3156 }
3157 
wait_csg_slots_start(struct kbase_device * kbdev)3158 static void wait_csg_slots_start(struct kbase_device *kbdev)
3159 {
3160 	u32 num_groups = kbdev->csf.global_iface.group_num;
3161 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3162 	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3163 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
3164 	u32 i;
3165 
3166 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3167 
3168 	/* extract start slot flags for check */
3169 	for (i = 0; i < num_groups; i++) {
3170 		if (atomic_read(&scheduler->csg_slots[i].state) ==
3171 		    CSG_SLOT_READY2RUN)
3172 			set_bit(i, slot_mask);
3173 	}
3174 
3175 	while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
3176 		DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
3177 
3178 		bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS);
3179 
3180 		remaining = wait_event_timeout(kbdev->csf.event_wait,
3181 			slots_state_changed(kbdev, changed, csg_slot_running),
3182 			remaining);
3183 
3184 		if (remaining) {
3185 			for_each_set_bit(i, changed, num_groups) {
3186 				struct kbase_queue_group *group =
3187 					scheduler->csg_slots[i].resident_group;
3188 
3189 				/* The on slot csg is now running */
3190 				clear_bit(i, slot_mask);
3191 				group->run_state = KBASE_CSF_GROUP_RUNNABLE;
3192 			}
3193 		} else {
3194 			dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to start, slots: 0x%*pb\n",
3195 				 kbase_backend_get_cycle_cnt(kbdev),
3196 				 kbdev->csf.fw_timeout_ms,
3197 				 num_groups, slot_mask);
3198 
3199 			if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
3200 				kbase_reset_gpu(kbdev);
3201 			break;
3202 		}
3203 	}
3204 }
3205 
3206 /**
3207  * group_on_slot_is_idle() - Check if the given slot has a CSG-idle state
3208  *                           flagged after the completion of a CSG status
3209  *                           update command
3210  *
3211  * @kbdev:  Pointer to the GPU device.
3212  * @slot:   The given slot for checking an occupying resident group's idle
3213  *          state.
3214  *
3215  * This function is called at the start of scheduling tick to check the
3216  * idle status of a queue group resident on a CSG slot.
3217  * The caller must make sure the corresponding status update command has
3218  * been called and completed before checking this status.
3219  *
3220  * Return: true if the group resident on slot is idle, otherwise false.
3221  */
group_on_slot_is_idle(struct kbase_device * kbdev,unsigned long slot)3222 static bool group_on_slot_is_idle(struct kbase_device *kbdev,
3223 				  unsigned long slot)
3224 {
3225 	struct kbase_csf_cmd_stream_group_info *ginfo =
3226 					&kbdev->csf.global_iface.groups[slot];
3227 	bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
3228 			CSG_STATUS_STATE_IDLE_MASK;
3229 
3230 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3231 
3232 	return idle;
3233 }
3234 
3235 /**
3236  * slots_update_state_changed() -  Check the handshake state of a subset of
3237  *                                 command group slots.
3238  *
3239  * @kbdev:          The GPU device.
3240  * @field_mask:     The field mask for checking the state in the csg_req/ack.
3241  * @slots_mask:     A bit_map specifying the slots to check.
3242  * @slots_done:     A cleared bit_map for returning the slots that
3243  *                  have finished update.
3244  *
3245  * Checks the state of a subset of slots selected through the slots_mask
3246  * bit_map. Records which slots' handshake completed and send it back in the
3247  * slots_done bit_map.
3248  *
3249  * Return: true if the slots_done is set for at least one slot.
3250  *         Otherwise false.
3251  */
3252 static
slots_update_state_changed(struct kbase_device * kbdev,u32 field_mask,const unsigned long * slots_mask,unsigned long * slots_done)3253 bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask,
3254 		const unsigned long *slots_mask, unsigned long *slots_done)
3255 {
3256 	u32 num_groups = kbdev->csf.global_iface.group_num;
3257 	bool changed = false;
3258 	u32 i;
3259 
3260 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3261 
3262 	for_each_set_bit(i, slots_mask, num_groups) {
3263 		struct kbase_csf_cmd_stream_group_info const *const ginfo =
3264 					    &kbdev->csf.global_iface.groups[i];
3265 		u32 state = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
3266 
3267 		state ^= kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
3268 
3269 		if (!(state & field_mask)) {
3270 			set_bit(i, slots_done);
3271 			changed = true;
3272 		}
3273 	}
3274 
3275 	return changed;
3276 }
3277 
3278 /**
3279  * wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on
3280  *                                the specified groups.
3281  *
3282  * @kbdev:           Pointer to the GPU device.
3283  * @field_mask:      The field mask for checking the state in the csg_req/ack.
3284  * @slot_mask:       Bitmap reflecting the slots, the function will modify
3285  *                   the acknowledged slots by clearing their corresponding
3286  *                   bits.
3287  * @wait_in_jiffies: Wait duration in jiffies, controlling the time-out.
3288  *
3289  * This function waits for the acknowledgment of the request that have
3290  * already been placed for the CSG slots by the caller. Currently used for
3291  * the CSG priority update and status update requests.
3292  *
3293  * Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For
3294  *         timed out condition with unacknowledged slots, their bits remain
3295  *         set in the slot_mask.
3296  */
wait_csg_slots_handshake_ack(struct kbase_device * kbdev,u32 field_mask,unsigned long * slot_mask,long wait_in_jiffies)3297 static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev,
3298 		u32 field_mask, unsigned long *slot_mask, long wait_in_jiffies)
3299 {
3300 	const u32 num_groups = kbdev->csf.global_iface.group_num;
3301 	long remaining = wait_in_jiffies;
3302 
3303 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3304 
3305 	while (!bitmap_empty(slot_mask, num_groups) &&
3306 	       !kbase_reset_gpu_is_active(kbdev)) {
3307 		DECLARE_BITMAP(dones, MAX_SUPPORTED_CSGS) = { 0 };
3308 
3309 		remaining = wait_event_timeout(kbdev->csf.event_wait,
3310 				slots_update_state_changed(kbdev, field_mask,
3311 						   slot_mask, dones),
3312 				remaining);
3313 
3314 		if (remaining)
3315 			bitmap_andnot(slot_mask, slot_mask, dones, num_groups);
3316 		else {
3317 
3318 
3319 			/* Timed-out on the wait */
3320 			return -ETIMEDOUT;
3321 		}
3322 	}
3323 
3324 	return 0;
3325 }
3326 
wait_csg_slots_finish_prio_update(struct kbase_device * kbdev)3327 static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev)
3328 {
3329 	unsigned long *slot_mask =
3330 			kbdev->csf.scheduler.csg_slots_prio_update;
3331 	long wait_time = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3332 	int ret = wait_csg_slots_handshake_ack(kbdev, CSG_REQ_EP_CFG_MASK,
3333 					       slot_mask, wait_time);
3334 
3335 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
3336 
3337 	if (ret != 0) {
3338 		/* The update timeout is not regarded as a serious
3339 		 * issue, no major consequences are expected as a
3340 		 * result, so just warn the case.
3341 		 */
3342 		dev_warn(
3343 			kbdev->dev,
3344 			"[%llu] Timeout (%d ms) on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx",
3345 			kbase_backend_get_cycle_cnt(kbdev),
3346 			kbdev->csf.fw_timeout_ms,
3347 			slot_mask[0]);
3348 	}
3349 }
3350 
kbase_csf_scheduler_evict_ctx_slots(struct kbase_device * kbdev,struct kbase_context * kctx,struct list_head * evicted_groups)3351 void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
3352 		struct kbase_context *kctx, struct list_head *evicted_groups)
3353 {
3354 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3355 	struct kbase_queue_group *group;
3356 	u32 num_groups = kbdev->csf.global_iface.group_num;
3357 	u32 slot;
3358 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
3359 
3360 	lockdep_assert_held(&kctx->csf.lock);
3361 	mutex_lock(&scheduler->lock);
3362 
3363 	/* This code is only called during reset, so we don't wait for the CSG
3364 	 * slots to be stopped
3365 	 */
3366 	WARN_ON(!kbase_reset_gpu_is_active(kbdev));
3367 
3368 	KBASE_KTRACE_ADD(kbdev, EVICT_CTX_SLOTS, kctx, 0u);
3369 	for (slot = 0; slot < num_groups; slot++) {
3370 		group = kbdev->csf.scheduler.csg_slots[slot].resident_group;
3371 		if (group && group->kctx == kctx) {
3372 			bool as_fault;
3373 
3374 			term_csg_slot(group);
3375 			as_fault = cleanup_csg_slot(group);
3376 			/* remove the group from the scheduler list */
3377 			sched_evict_group(group, as_fault, false);
3378 			/* return the evicted group to the caller */
3379 			list_add_tail(&group->link, evicted_groups);
3380 			set_bit(slot, slot_mask);
3381 		}
3382 	}
3383 
3384 	dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n",
3385 			kctx->tgid, kctx->id, num_groups, slot_mask);
3386 
3387 	mutex_unlock(&scheduler->lock);
3388 }
3389 
3390 /**
3391  * scheduler_slot_protm_ack - Acknowledging the protected region requests
3392  * from the resident group on a given slot.
3393  *
3394  * @kbdev:  Pointer to the GPU device.
3395  * @group:  Pointer to the resident group on the given slot.
3396  * @slot:   The slot that the given group is actively operating on.
3397  *
3398  * The function assumes that the given slot is in stable running state and
3399  * has already been judged by the caller on that any pending protected region
3400  * requests of the resident group should be acknowledged.
3401  *
3402  * Return: true if the group has pending protm request(s) and is acknowledged.
3403  *         The caller should arrange to enter the protected mode for servicing
3404  *         it. Otherwise return false, indicating the group has no pending protm
3405  *         request.
3406  */
scheduler_slot_protm_ack(struct kbase_device * const kbdev,struct kbase_queue_group * const group,const int slot)3407 static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev,
3408 		struct kbase_queue_group *const group,
3409 		const int slot)
3410 {
3411 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
3412 	bool protm_ack = false;
3413 	struct kbase_csf_cmd_stream_group_info *ginfo =
3414 		&kbdev->csf.global_iface.groups[slot];
3415 	u32 max_csi;
3416 	int i;
3417 
3418 	if (WARN_ON(scheduler->csg_slots[slot].resident_group != group))
3419 		return protm_ack;
3420 
3421 	lockdep_assert_held(&scheduler->lock);
3422 	lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.interrupt_lock);
3423 
3424 	max_csi = ginfo->stream_num;
3425 	for (i = find_first_bit(group->protm_pending_bitmap, max_csi);
3426 	     i < max_csi;
3427 	     i = find_next_bit(group->protm_pending_bitmap, max_csi, i + 1)) {
3428 		struct kbase_queue *queue = group->bound_queues[i];
3429 
3430 		clear_bit(i, group->protm_pending_bitmap);
3431 		KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_CLEAR, group,
3432 					   queue, group->protm_pending_bitmap[0]);
3433 
3434 		if (!WARN_ON(!queue) && queue->enabled) {
3435 			struct kbase_csf_cmd_stream_info *stream =
3436 						&ginfo->streams[i];
3437 			u32 cs_protm_ack = kbase_csf_firmware_cs_output(
3438 						stream, CS_ACK) &
3439 						CS_ACK_PROTM_PEND_MASK;
3440 			u32 cs_protm_req = kbase_csf_firmware_cs_input_read(
3441 						stream, CS_REQ) &
3442 						CS_REQ_PROTM_PEND_MASK;
3443 
3444 			KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_ACK, group,
3445 						   queue, cs_protm_ack ^ cs_protm_req);
3446 
3447 			if (cs_protm_ack == cs_protm_req) {
3448 				dev_dbg(kbdev->dev,
3449 					"PROTM-ack already done for queue-%d group-%d slot-%d",
3450 					queue->csi_index, group->handle, slot);
3451 				continue;
3452 			}
3453 
3454 			kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
3455 						cs_protm_ack,
3456 						CS_ACK_PROTM_PEND_MASK);
3457 			protm_ack = true;
3458 			dev_dbg(kbdev->dev,
3459 				"PROTM-ack for queue-%d, group-%d slot-%d",
3460 				queue->csi_index, group->handle, slot);
3461 		}
3462 	}
3463 
3464 	return protm_ack;
3465 }
3466 
3467 /**
3468  * scheduler_group_check_protm_enter - Request the given group to be evaluated
3469  * for triggering the protected mode.
3470  *
3471  * @kbdev:     Pointer to the GPU device.
3472  * @input_grp: Pointer to the GPU queue group.
3473  *
3474  * The function assumes the given group is either an active running group or
3475  * the scheduler internally maintained field scheduler->top_grp.
3476  *
3477  * If the GPU is not already running in protected mode and the input group
3478  * has protected region requests from its bound queues, the requests are
3479  * acknowledged and the GPU is instructed to enter the protected mode.
3480  */
scheduler_group_check_protm_enter(struct kbase_device * const kbdev,struct kbase_queue_group * const input_grp)3481 static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
3482 				struct kbase_queue_group *const input_grp)
3483 {
3484 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3485 	unsigned long flags;
3486 	bool protm_in_use;
3487 
3488 	lockdep_assert_held(&scheduler->lock);
3489 
3490 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3491 
3492 	/* Check if the previous transition to enter & exit the protected
3493 	 * mode has completed or not.
3494 	 */
3495 	protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
3496 		       kbdev->protected_mode;
3497 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_CHECK_PROTM_ENTER, input_grp,
3498 				 protm_in_use);
3499 
3500 	/* Firmware samples the PROTM_PEND ACK bit for CSs when
3501 	 * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit
3502 	 * is set for a CS after Host has sent the PROTM_ENTER
3503 	 * Global request, then there is no guarantee that firmware will
3504 	 * notice that prior to switching to protected mode. And firmware
3505 	 * may not again raise the PROTM_PEND interrupt for that CS
3506 	 * later on. To avoid that uncertainty PROTM_PEND ACK bit
3507 	 * is not set for a CS if the request to enter protected
3508 	 * mode has already been sent. It will be set later (after the exit
3509 	 * from protected mode has taken place) when the group to which
3510 	 * CS is bound becomes the top group.
3511 	 *
3512 	 * The actual decision of entering protected mode is hinging on the
3513 	 * input group is the top priority group, or, in case the previous
3514 	 * top-group is evicted from the scheduler during the tick, its would
3515 	 * be replacement, and that it is currently in a stable state (i.e. the
3516 	 * slot state is running).
3517 	 */
3518 	if (!protm_in_use && !WARN_ON(!input_grp)) {
3519 		const int slot =
3520 			kbase_csf_scheduler_group_get_slot_locked(input_grp);
3521 
3522 		/* check the input_grp is running and requesting protected mode
3523 		 */
3524 		if (slot >= 0 &&
3525 		    atomic_read(&scheduler->csg_slots[slot].state) ==
3526 			    CSG_SLOT_RUNNING) {
3527 			if (kctx_as_enabled(input_grp->kctx) &&
3528 			    scheduler_slot_protm_ack(kbdev, input_grp, slot)) {
3529 				/* Option of acknowledging to multiple
3530 				 * CSGs from the same kctx is dropped,
3531 				 * after consulting with the
3532 				 * architecture team. See the comment in
3533 				 * GPUCORE-21394.
3534 				 */
3535 
3536 				/* Disable the idle timer */
3537 				disable_gpu_idle_fw_timer_locked(kbdev);
3538 
3539 				/* Switch to protected mode */
3540 				scheduler->active_protm_grp = input_grp;
3541 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM,
3542 							 input_grp, 0u);
3543 
3544 				kbase_csf_enter_protected_mode(kbdev);
3545 				spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3546 
3547 				kbase_csf_wait_protected_mode_enter(kbdev);
3548 				return;
3549 			}
3550 		}
3551 	}
3552 
3553 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3554 }
3555 
scheduler_apply(struct kbase_device * kbdev)3556 static void scheduler_apply(struct kbase_device *kbdev)
3557 {
3558 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3559 	const u32 total_csg_slots = kbdev->csf.global_iface.group_num;
3560 	const u32 available_csg_slots = scheduler->num_csg_slots_for_tick;
3561 	u32 suspend_cnt = 0;
3562 	u32 remain_cnt = 0;
3563 	u32 resident_cnt = 0;
3564 	struct kbase_queue_group *group;
3565 	u32 i;
3566 	u32 spare;
3567 
3568 	lockdep_assert_held(&scheduler->lock);
3569 
3570 	/* Suspend those resident groups not in the run list */
3571 	for (i = 0; i < total_csg_slots; i++) {
3572 		group = scheduler->csg_slots[i].resident_group;
3573 		if (group) {
3574 			resident_cnt++;
3575 			if (group->prepared_seq_num >= available_csg_slots) {
3576 				suspend_queue_group(group);
3577 				suspend_cnt++;
3578 			} else
3579 				remain_cnt++;
3580 		}
3581 	}
3582 
3583 	/* Initialize the remaining available csg slots for the tick/tock */
3584 	scheduler->remaining_tick_slots = available_csg_slots;
3585 
3586 	/* If there are spare slots, apply heads in the list */
3587 	spare = (available_csg_slots > resident_cnt) ?
3588 		(available_csg_slots - resident_cnt) : 0;
3589 	while (!list_empty(&scheduler->groups_to_schedule)) {
3590 		group = list_first_entry(&scheduler->groups_to_schedule,
3591 				struct kbase_queue_group,
3592 				link_to_schedule);
3593 
3594 		if (kbasep_csf_scheduler_group_is_on_slot_locked(group) &&
3595 		    group->prepared_seq_num < available_csg_slots) {
3596 			/* One of the resident remainders */
3597 			update_csg_slot_priority(group,
3598 					get_slot_priority(group));
3599 		} else if (spare != 0) {
3600 			s8 slot = (s8)find_first_zero_bit(
3601 				     kbdev->csf.scheduler.csg_inuse_bitmap,
3602 				     total_csg_slots);
3603 
3604 			if (WARN_ON(slot >= (s8)total_csg_slots))
3605 				break;
3606 
3607 			if (!kctx_as_enabled(group->kctx) || group->faulted) {
3608 				/* Drop the head group and continue */
3609 				update_offslot_non_idle_cnt_for_faulty_grp(
3610 					group);
3611 				remove_scheduled_group(kbdev, group);
3612 				continue;
3613 			}
3614 			program_csg_slot(group, slot,
3615 					 get_slot_priority(group));
3616 			if (unlikely(!csg_slot_in_use(kbdev, slot)))
3617 				break;
3618 
3619 			spare--;
3620 		} else
3621 			break;
3622 
3623 		/* Drop the head csg from the list */
3624 		remove_scheduled_group(kbdev, group);
3625 		if (!WARN_ON(!scheduler->remaining_tick_slots))
3626 			scheduler->remaining_tick_slots--;
3627 	}
3628 
3629 	/* Dealing with groups currently going through suspend */
3630 	program_suspending_csg_slots(kbdev);
3631 }
3632 
scheduler_ctx_scan_groups(struct kbase_device * kbdev,struct kbase_context * kctx,int priority)3633 static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
3634 		struct kbase_context *kctx, int priority)
3635 {
3636 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3637 	struct kbase_queue_group *group;
3638 
3639 	lockdep_assert_held(&scheduler->lock);
3640 	if (WARN_ON(priority < 0) ||
3641 	    WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
3642 		return;
3643 
3644 	if (!kctx_as_enabled(kctx))
3645 		return;
3646 
3647 	list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority],
3648 			    link) {
3649 		if (WARN_ON(!list_empty(&group->link_to_schedule)))
3650 			/* This would be a bug */
3651 			list_del_init(&group->link_to_schedule);
3652 
3653 		if (unlikely(group->faulted))
3654 			continue;
3655 
3656 		/* Set the scanout sequence number, starting from 0 */
3657 		group->scan_seq_num = scheduler->csg_scan_count_for_tick++;
3658 
3659 		if (queue_group_idle_locked(group)) {
3660 			if (on_slot_group_idle_locked(group))
3661 				list_add_tail(&group->link_to_schedule,
3662 					&scheduler->idle_groups_to_schedule);
3663 			continue;
3664 		}
3665 
3666 		if (!scheduler->ngrp_to_schedule) {
3667 			/* keep the top csg's origin */
3668 			scheduler->top_ctx = kctx;
3669 			scheduler->top_grp = group;
3670 		}
3671 
3672 		list_add_tail(&group->link_to_schedule,
3673 			      &scheduler->groups_to_schedule);
3674 		group->prepared_seq_num = scheduler->ngrp_to_schedule++;
3675 
3676 		kctx->csf.sched.ngrp_to_schedule++;
3677 		count_active_address_space(kbdev, kctx);
3678 	}
3679 }
3680 
3681 /**
3682  * scheduler_rotate_groups() - Rotate the runnable queue groups to provide
3683  *                             fairness of scheduling within a single
3684  *                             kbase_context.
3685  *
3686  * @kbdev:    Pointer to the GPU device.
3687  *
3688  * Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned
3689  * the highest slot priority) is guaranteed to get the resources that it
3690  * needs we only rotate the kbase_context corresponding to it -
3691  * kbase_csf_scheduler's top_ctx.
3692  *
3693  * The priority level chosen for rotation is the one containing the previous
3694  * scheduling cycle's kbase_csf_scheduler's top_grp.
3695  *
3696  * In a 'fresh-slice-cycle' this always corresponds to the highest group
3697  * priority in use by kbase_csf_scheduler's top_ctx. That is, it's the priority
3698  * level of the previous scheduling cycle's first runnable kbase_context.
3699  *
3700  * We choose this priority level because when higher priority work is
3701  * scheduled, we should always cause the scheduler to run and do a scan. The
3702  * scan always enumerates the highest priority work first (whether that be
3703  * based on process priority or group priority), and thus
3704  * kbase_csf_scheduler's top_grp will point to the first of those high priority
3705  * groups, which necessarily must be the highest priority group in
3706  * kbase_csf_scheduler's top_ctx. The fresh-slice-cycle will run later and pick
3707  * up that group appropriately.
3708  *
3709  * If kbase_csf_scheduler's top_grp was instead evicted (and thus is NULL),
3710  * then no explicit rotation occurs on the next fresh-slice-cycle schedule, but
3711  * will set up kbase_csf_scheduler's top_ctx again for the next scheduling
3712  * cycle. Implicitly, a rotation had already occurred by removing
3713  * the kbase_csf_scheduler's top_grp
3714  *
3715  * If kbase_csf_scheduler's top_grp became idle and all other groups belonging
3716  * to kbase_csf_scheduler's top_grp's priority level in kbase_csf_scheduler's
3717  * top_ctx are also idle, then the effect of this will be to rotate idle
3718  * groups, which might not actually become resident in the next
3719  * scheduling slice. However this is acceptable since a queue group becoming
3720  * idle is implicitly a rotation (as above with evicted queue groups), as it
3721  * automatically allows a new queue group to take the maximum slot priority
3722  * whilst the idle kbase_csf_scheduler's top_grp ends up near the back of
3723  * the kbase_csf_scheduler's groups_to_schedule list. In this example, it will
3724  * be for a group in the next lowest priority level or in absence of those the
3725  * next kbase_context's queue groups.
3726  */
scheduler_rotate_groups(struct kbase_device * kbdev)3727 static void scheduler_rotate_groups(struct kbase_device *kbdev)
3728 {
3729 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3730 	struct kbase_context *const top_ctx = scheduler->top_ctx;
3731 	struct kbase_queue_group *const top_grp = scheduler->top_grp;
3732 
3733 	lockdep_assert_held(&scheduler->lock);
3734 	if (top_ctx && top_grp) {
3735 		struct list_head *list =
3736 			&top_ctx->csf.sched.runnable_groups[top_grp->priority];
3737 
3738 		WARN_ON(top_grp->kctx != top_ctx);
3739 		if (!WARN_ON(list_empty(list))) {
3740 			struct kbase_queue_group *new_head_grp;
3741 			list_move_tail(&top_grp->link, list);
3742 			new_head_grp = (!list_empty(list)) ?
3743 						list_first_entry(list, struct kbase_queue_group, link) :
3744 						NULL;
3745 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_ROTATE_RUNNABLE,
3746 						top_grp, top_ctx->csf.sched.num_runnable_grps);
3747 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_HEAD_RUNNABLE,
3748 						 new_head_grp, 0u);
3749 			dev_dbg(kbdev->dev,
3750 			    "groups rotated for a context, num_runnable_groups: %u\n",
3751 			    scheduler->top_ctx->csf.sched.num_runnable_grps);
3752 		}
3753 	}
3754 }
3755 
scheduler_rotate_ctxs(struct kbase_device * kbdev)3756 static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
3757 {
3758 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3759 	struct list_head *list = &scheduler->runnable_kctxs;
3760 
3761 	lockdep_assert_held(&scheduler->lock);
3762 	if (scheduler->top_ctx) {
3763 		if (!WARN_ON(list_empty(list))) {
3764 			struct kbase_context *pos;
3765 			bool found = false;
3766 
3767 			/* Locate the ctx on the list */
3768 			list_for_each_entry(pos, list, csf.link) {
3769 				if (scheduler->top_ctx == pos) {
3770 					found = true;
3771 					break;
3772 				}
3773 			}
3774 
3775 			if (!WARN_ON(!found)) {
3776 				struct kbase_context *new_head_kctx;
3777 				list_move_tail(&pos->csf.link, list);
3778 				KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos,
3779 						 0u);
3780 				new_head_kctx = (!list_empty(list)) ?
3781 							list_first_entry(list, struct kbase_context, csf.link) :
3782 							NULL;
3783 				KBASE_KTRACE_ADD(kbdev, SCHEDULER_HEAD_RUNNABLE,
3784 						 new_head_kctx, 0u);
3785 				dev_dbg(kbdev->dev, "contexts rotated\n");
3786 			}
3787 		}
3788 	}
3789 }
3790 
3791 /**
3792  * scheduler_update_idle_slots_status() - Get the status update for the CSG
3793  *                       slots for which the IDLE notification was received
3794  *                        previously.
3795  *
3796  * @kbdev:             Pointer to the GPU device.
3797  * @csg_bitmap:        Bitmap of the CSG slots for which
3798  *                     the status update request completed successfully.
3799  * @failed_csg_bitmap: Bitmap of the CSG slots for which
3800  *                     the status update request timedout.
3801  *
3802  * This function sends a CSG status update request for all the CSG slots
3803  * present in the bitmap scheduler->csg_slots_idle_mask and wait for the
3804  * request to complete.
3805  * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
3806  * this function.
3807  */
scheduler_update_idle_slots_status(struct kbase_device * kbdev,unsigned long * csg_bitmap,unsigned long * failed_csg_bitmap)3808 static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
3809 		unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap)
3810 {
3811 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
3812 	const u32 num_groups = kbdev->csf.global_iface.group_num;
3813 	struct kbase_csf_global_iface *const global_iface =
3814 						&kbdev->csf.global_iface;
3815 	unsigned long flags, i;
3816 
3817 	lockdep_assert_held(&scheduler->lock);
3818 
3819 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3820 	for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
3821 		struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
3822 		struct kbase_queue_group *group = csg_slot->resident_group;
3823 		struct kbase_csf_cmd_stream_group_info *const ginfo =
3824 						&global_iface->groups[i];
3825 		u32 csg_req;
3826 
3827 		clear_bit(i, scheduler->csg_slots_idle_mask);
3828 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
3829 					 scheduler->csg_slots_idle_mask[0]);
3830 		if (WARN_ON(!group))
3831 			continue;
3832 
3833 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STATUS_UPDATE, group,
3834 					 i);
3835 
3836 		csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
3837 		csg_req ^= CSG_REQ_STATUS_UPDATE_MASK;
3838 		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
3839 						  CSG_REQ_STATUS_UPDATE_MASK);
3840 
3841 		set_bit(i, csg_bitmap);
3842 	}
3843 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3844 
3845 	/* The groups are aggregated into a single kernel doorbell request */
3846 	if (!bitmap_empty(csg_bitmap, num_groups)) {
3847 		long wt =
3848 			kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3849 		u32 db_slots = (u32)csg_bitmap[0];
3850 
3851 		kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots);
3852 
3853 		if (wait_csg_slots_handshake_ack(kbdev,
3854 				CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) {
3855 			dev_warn(
3856 				kbdev->dev,
3857 				"[%llu] Timeout (%d ms) on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx",
3858 				kbase_backend_get_cycle_cnt(kbdev),
3859 				kbdev->csf.fw_timeout_ms,
3860 				csg_bitmap[0]);
3861 
3862 			/* Store the bitmap of timed out slots */
3863 			bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups);
3864 			csg_bitmap[0] = ~csg_bitmap[0] & db_slots;
3865 		} else {
3866 			KBASE_KTRACE_ADD(kbdev, SLOTS_STATUS_UPDATE_ACK, NULL,
3867 					 db_slots);
3868 			csg_bitmap[0] = db_slots;
3869 		}
3870 	}
3871 }
3872 
3873 /**
3874  * scheduler_handle_idle_slots() - Update the idle status of queue groups
3875  *                    resident on CSG slots for which the
3876  *                    IDLE notification was received previously.
3877  *
3878  * @kbdev:  Pointer to the GPU device.
3879  *
3880  * This function is called at the start of scheduling tick/tock to reconfirm
3881  * the idle status of queue groups resident on CSG slots for
3882  * which idle notification was received previously, i.e. all the CSG slots
3883  * present in the bitmap scheduler->csg_slots_idle_mask.
3884  * The confirmation is done by sending the CSG status update request to the
3885  * firmware. On completion, the firmware will mark the idleness at the
3886  * slot's interface CSG_STATUS_STATE register accordingly.
3887  *
3888  * The run state of the groups resident on still idle CSG slots is changed to
3889  * KBASE_CSF_GROUP_IDLE and the bitmap scheduler->csg_slots_idle_mask is
3890  * updated accordingly.
3891  * The bits corresponding to slots for which the status update request timedout
3892  * remain set in scheduler->csg_slots_idle_mask.
3893  */
scheduler_handle_idle_slots(struct kbase_device * kbdev)3894 static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
3895 {
3896 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3897 	u32 num_groups = kbdev->csf.global_iface.group_num;
3898 	unsigned long flags, i;
3899 	DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
3900 	DECLARE_BITMAP(failed_csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
3901 
3902 	lockdep_assert_held(&scheduler->lock);
3903 
3904 	scheduler_update_idle_slots_status(kbdev, csg_bitmap,
3905 					   failed_csg_bitmap);
3906 
3907 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3908 	for_each_set_bit(i, csg_bitmap, num_groups) {
3909 		struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
3910 		struct kbase_queue_group *group = csg_slot->resident_group;
3911 
3912 		if (WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_RUNNING))
3913 			continue;
3914 		if (WARN_ON(!group))
3915 			continue;
3916 		if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE &&
3917 					group->run_state != KBASE_CSF_GROUP_IDLE))
3918 			continue;
3919 		if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
3920 			continue;
3921 
3922 		if (group_on_slot_is_idle(kbdev, i)) {
3923 			group->run_state = KBASE_CSF_GROUP_IDLE;
3924 			set_bit(i, scheduler->csg_slots_idle_mask);
3925 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET,
3926 						 group, scheduler->csg_slots_idle_mask[0]);
3927 		} else
3928 			group->run_state = KBASE_CSF_GROUP_RUNNABLE;
3929 	}
3930 
3931 	bitmap_or(scheduler->csg_slots_idle_mask,
3932 		  scheduler->csg_slots_idle_mask,
3933 		  failed_csg_bitmap, num_groups);
3934 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, NULL,
3935 				 scheduler->csg_slots_idle_mask[0]);
3936 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3937 }
3938 
scheduler_scan_idle_groups(struct kbase_device * kbdev)3939 static void scheduler_scan_idle_groups(struct kbase_device *kbdev)
3940 {
3941 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3942 	struct kbase_queue_group *group, *n;
3943 
3944 	list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule,
3945 				 link_to_schedule) {
3946 
3947 		WARN_ON(!on_slot_group_idle_locked(group));
3948 
3949 		if (!scheduler->ngrp_to_schedule) {
3950 			/* keep the top csg's origin */
3951 			scheduler->top_ctx = group->kctx;
3952 			scheduler->top_grp = group;
3953 		}
3954 
3955 		group->prepared_seq_num = scheduler->ngrp_to_schedule++;
3956 		list_move_tail(&group->link_to_schedule,
3957 			       &scheduler->groups_to_schedule);
3958 
3959 		group->kctx->csf.sched.ngrp_to_schedule++;
3960 		count_active_address_space(kbdev, group->kctx);
3961 	}
3962 }
3963 
scheduler_rotate(struct kbase_device * kbdev)3964 static void scheduler_rotate(struct kbase_device *kbdev)
3965 {
3966 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3967 
3968 	lockdep_assert_held(&scheduler->lock);
3969 
3970 	/* Dealing with rotation */
3971 	scheduler_rotate_groups(kbdev);
3972 	scheduler_rotate_ctxs(kbdev);
3973 }
3974 
get_tock_top_group(struct kbase_csf_scheduler * const scheduler)3975 static struct kbase_queue_group *get_tock_top_group(
3976 	struct kbase_csf_scheduler *const scheduler)
3977 {
3978 	struct kbase_context *kctx;
3979 	int i;
3980 
3981 	lockdep_assert_held(&scheduler->lock);
3982 	for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
3983 		list_for_each_entry(kctx,
3984 			&scheduler->runnable_kctxs, csf.link) {
3985 			struct kbase_queue_group *group;
3986 
3987 			list_for_each_entry(group,
3988 					&kctx->csf.sched.runnable_groups[i],
3989 					link) {
3990 				if (queue_group_idle_locked(group))
3991 					continue;
3992 
3993 				return group;
3994 			}
3995 		}
3996 	}
3997 
3998 	return NULL;
3999 }
4000 
4001 /**
4002  * suspend_active_groups_on_powerdown() - Suspend active CSG groups upon
4003  *                                        suspend or GPU IDLE.
4004  *
4005  * @kbdev:          Pointer to the device
4006  * @system_suspend: Flag to indicate it's for system suspend.
4007  *
4008  * This function will suspend all active CSG groups upon either
4009  * system suspend, runtime suspend or GPU IDLE.
4010  *
4011  * Return: 0 on success, -1 otherwise.
4012  */
suspend_active_groups_on_powerdown(struct kbase_device * kbdev,bool system_suspend)4013 static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
4014 					      bool system_suspend)
4015 {
4016 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4017 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
4018 
4019 	int ret = suspend_active_queue_groups(kbdev, slot_mask);
4020 
4021 	if (ret) {
4022 		/* The suspend of CSGs failed,
4023 		 * trigger the GPU reset to be in a deterministic state.
4024 		 */
4025 		dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
4026 			 kbase_backend_get_cycle_cnt(kbdev),
4027 			 kbdev->csf.fw_timeout_ms,
4028 			 kbdev->csf.global_iface.group_num, slot_mask);
4029 
4030 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
4031 			kbase_reset_gpu(kbdev);
4032 
4033 		return -1;
4034 	}
4035 
4036 	/* Check if the groups became active whilst the suspend was ongoing,
4037 	 * but only for the case where the system suspend is not in progress
4038 	 */
4039 	if (!system_suspend && atomic_read(&scheduler->non_idle_offslot_grps))
4040 		return -1;
4041 
4042 	return 0;
4043 }
4044 
scheduler_idle_suspendable(struct kbase_device * kbdev)4045 static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
4046 {
4047 	bool suspend;
4048 	unsigned long flags;
4049 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4050 
4051 	lockdep_assert_held(&scheduler->lock);
4052 
4053 	if ((scheduler->state == SCHED_SUSPENDED) ||
4054 	    (scheduler->state == SCHED_SLEEPING))
4055 		return false;
4056 
4057 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
4058 	if (scheduler->total_runnable_grps) {
4059 		spin_lock(&scheduler->interrupt_lock);
4060 
4061 		/* Check both on-slots and off-slots groups idle status */
4062 		suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) &&
4063 			  !atomic_read(&scheduler->non_idle_offslot_grps) &&
4064 			  kbase_pm_idle_groups_sched_suspendable(kbdev);
4065 
4066 		spin_unlock(&scheduler->interrupt_lock);
4067 	} else
4068 		suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
4069 
4070 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
4071 
4072 	return suspend;
4073 }
4074 
4075 #ifdef KBASE_PM_RUNTIME
4076 /**
4077  * scheduler_sleep_on_idle - Put the Scheduler in sleeping state on GPU
4078  *                           becoming idle.
4079  *
4080  * @kbdev: Pointer to the device.
4081  *
4082  * This function is called on GPU idle notification to trigger the transition of
4083  * GPU to sleep state, where MCU firmware pauses execution and L2 cache is
4084  * turned off. Scheduler's state is changed to sleeping and all the active queue
4085  * groups remain on the CSG slots.
4086  */
scheduler_sleep_on_idle(struct kbase_device * kbdev)4087 static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
4088 {
4089 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4090 
4091 	lockdep_assert_held(&scheduler->lock);
4092 
4093 	dev_dbg(kbdev->dev,
4094 		"Scheduler to be put to sleep on GPU becoming idle");
4095 	cancel_tick_timer(kbdev);
4096 	scheduler_pm_idle_before_sleep(kbdev);
4097 	scheduler->state = SCHED_SLEEPING;
4098 }
4099 #endif
4100 
4101 /**
4102  * scheduler_suspend_on_idle - Put the Scheduler in suspended state on GPU
4103  *                             becoming idle.
4104  *
4105  * @kbdev: Pointer to the device.
4106  *
4107  * This function is called on GPU idle notification to trigger the power down of
4108  * GPU. Scheduler's state is changed to suspended and all the active queue
4109  * groups are suspended before halting the MCU firmware.
4110  *
4111  * Return: true if scheduler will be suspended or false if suspend is aborted.
4112  */
scheduler_suspend_on_idle(struct kbase_device * kbdev)4113 static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
4114 {
4115 	int ret = suspend_active_groups_on_powerdown(kbdev, false);
4116 
4117 	if (ret) {
4118 		dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)",
4119 			atomic_read(
4120 				&kbdev->csf.scheduler.non_idle_offslot_grps));
4121 		/* Bring forward the next tick */
4122 		kbase_csf_scheduler_advance_tick(kbdev);
4123 		return false;
4124 	}
4125 
4126 	dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle");
4127 	scheduler_suspend(kbdev);
4128 	cancel_tick_timer(kbdev);
4129 	return true;
4130 }
4131 
gpu_idle_worker(struct work_struct * work)4132 static void gpu_idle_worker(struct work_struct *work)
4133 {
4134 	struct kbase_device *kbdev = container_of(
4135 		work, struct kbase_device, csf.scheduler.gpu_idle_work);
4136 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4137 	bool scheduler_is_idle_suspendable = false;
4138 	bool all_groups_suspended = false;
4139 
4140 	KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_BEGIN, NULL, 0u);
4141 
4142 #define __ENCODE_KTRACE_INFO(reset, idle, all_suspend)                         \
4143 	(((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8))
4144 
4145 	if (kbase_reset_gpu_try_prevent(kbdev)) {
4146 		dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n");
4147 		KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL,
4148 				 __ENCODE_KTRACE_INFO(true, false, false));
4149 		return;
4150 	}
4151 	mutex_lock(&scheduler->lock);
4152 
4153 	/* Cycle completed, disable the firmware idle timer */
4154 	disable_gpu_idle_fw_timer(kbdev);
4155 	scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
4156 	if (scheduler_is_idle_suspendable) {
4157 		KBASE_KTRACE_ADD(kbdev, GPU_IDLE_HANDLING_START, NULL,
4158 				 kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
4159 #ifdef KBASE_PM_RUNTIME
4160 		if (kbase_pm_gpu_sleep_allowed(kbdev) &&
4161 		    scheduler->total_runnable_grps)
4162 			scheduler_sleep_on_idle(kbdev);
4163 		else
4164 #endif
4165 			all_groups_suspended = scheduler_suspend_on_idle(kbdev);
4166 	}
4167 
4168 	mutex_unlock(&scheduler->lock);
4169 	kbase_reset_gpu_allow(kbdev);
4170 	KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL,
4171 			 __ENCODE_KTRACE_INFO(false,
4172 					      scheduler_is_idle_suspendable,
4173 					      all_groups_suspended));
4174 #undef __ENCODE_KTRACE_INFO
4175 }
4176 
scheduler_prepare(struct kbase_device * kbdev)4177 static int scheduler_prepare(struct kbase_device *kbdev)
4178 {
4179 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4180 	int i;
4181 
4182 	lockdep_assert_held(&scheduler->lock);
4183 
4184 	/* Empty the groups_to_schedule */
4185 	while (!list_empty(&scheduler->groups_to_schedule)) {
4186 		struct kbase_queue_group *grp =
4187 			list_first_entry(&scheduler->groups_to_schedule,
4188 					 struct kbase_queue_group,
4189 					 link_to_schedule);
4190 
4191 		remove_scheduled_group(kbdev, grp);
4192 	}
4193 
4194 	/* Pre-scan init scheduler fields */
4195 	if (WARN_ON(scheduler->ngrp_to_schedule != 0))
4196 		scheduler->ngrp_to_schedule = 0;
4197 	scheduler->top_ctx = NULL;
4198 	scheduler->top_grp = NULL;
4199 	scheduler->csg_scan_count_for_tick = 0;
4200 	WARN_ON(!list_empty(&scheduler->idle_groups_to_schedule));
4201 	scheduler->num_active_address_spaces = 0;
4202 	scheduler->num_csg_slots_for_tick = 0;
4203 	bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS);
4204 
4205 	/* Scan out to run groups */
4206 	for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
4207 		struct kbase_context *kctx;
4208 
4209 		list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link)
4210 			scheduler_ctx_scan_groups(kbdev, kctx, i);
4211 	}
4212 
4213 	/* Update this tick's non-idle groups */
4214 	scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
4215 
4216 	/* Initial number of non-idle off-slot groups, before the scheduler's
4217 	 * scheduler_apply() operation. This gives a sensible start point view
4218 	 * of the tick. It will be subject to up/downs during the scheduler
4219 	 * active phase.
4220 	 */
4221 	atomic_set(&scheduler->non_idle_offslot_grps,
4222 		   scheduler->non_idle_scanout_grps);
4223 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, NULL,
4224 				 scheduler->non_idle_scanout_grps);
4225 
4226 	/* Adds those idle but runnable groups to the scanout list */
4227 	scheduler_scan_idle_groups(kbdev);
4228 
4229 	WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule);
4230 
4231 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
4232 			scheduler->num_active_address_spaces |
4233 			(((u64)scheduler->ngrp_to_schedule) << 32));
4234 	set_max_csg_slots(kbdev);
4235 	dev_dbg(kbdev->dev, "prepared groups length: %u, num_active_address_spaces: %u\n",
4236 		scheduler->ngrp_to_schedule, scheduler->num_active_address_spaces);
4237 	return 0;
4238 }
4239 
scheduler_handle_idle_timer_onoff(struct kbase_device * kbdev)4240 static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev)
4241 {
4242 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4243 
4244 	lockdep_assert_held(&scheduler->lock);
4245 
4246 	/* After the scheduler apply operation, the internal variable
4247 	 * scheduler->non_idle_offslot_grps reflects the end-point view
4248 	 * of the count at the end of the active phase.
4249 	 *
4250 	 * Any changes that follow (after the scheduler has dropped the
4251 	 * scheduler->lock), reflects async operations to the scheduler,
4252 	 * such as a group gets killed (evicted) or a new group inserted,
4253 	 * cqs wait-sync triggered state transtion etc.
4254 	 *
4255 	 * The condition for enable the idle timer is that there is no
4256 	 * non-idle groups off-slots. If there is non-idle group off-slot,
4257 	 * the timer should be disabled.
4258 	 */
4259 	if (atomic_read(&scheduler->non_idle_offslot_grps))
4260 		disable_gpu_idle_fw_timer(kbdev);
4261 	else
4262 		enable_gpu_idle_fw_timer(kbdev);
4263 }
4264 
4265 /**
4266  * keep_lru_on_slots() - Check the condition for LRU is met.
4267  *
4268  * This function tries to maintain the Last-Recent-Use case on slots, when
4269  * the scheduler has no non-idle off-slot CSGs for a replacement
4270  * consideration. This effectively extends the previous scheduling results
4271  * for the new one. That is, the last recent used CSGs are retained on slots
4272  * for the new tick/tock action.
4273  *
4274  * @kbdev: Pointer to the device.
4275  *
4276  * Return: true for avoiding on-slot CSGs changes (i.e. keep existing LRU),
4277  *         otherwise false.
4278  */
keep_lru_on_slots(struct kbase_device * kbdev)4279 static bool keep_lru_on_slots(struct kbase_device *kbdev)
4280 {
4281 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4282 	bool keep_lru = false;
4283 	int on_slots = bitmap_weight(scheduler->csg_inuse_bitmap,
4284 				     kbdev->csf.global_iface.group_num);
4285 
4286 	lockdep_assert_held(&scheduler->lock);
4287 
4288 	if (on_slots && !atomic_read(&scheduler->non_idle_offslot_grps)) {
4289 		unsigned long flags;
4290 
4291 		spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4292 		/* All on-slots are idle, no non-idle off-slot CSGs available
4293 		 * for considering a meaningful change. Set keep_lru.
4294 		 */
4295 		keep_lru = kbase_csf_scheduler_all_csgs_idle(kbdev);
4296 
4297 		if (keep_lru && !scheduler->gpu_idle_fw_timer_enabled) {
4298 			scheduler->gpu_idle_fw_timer_enabled = true;
4299 			kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
4300 		}
4301 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4302 
4303 		dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n",
4304 			keep_lru, on_slots);
4305 	}
4306 
4307 	return keep_lru;
4308 }
4309 
4310 /**
4311  * prepare_fast_local_tock() - making preparation arrangement for exercizing
4312  *                             a fast local tock inside scheduling-actions.
4313  *
4314  * The function assumes that a scheduling action of firing a fast local tock
4315  * call (i.e. an equivalent tock action without dropping the lock) is desired
4316  * if there are idle onslot CSGs. The function updates those affected CSGs'
4317  * run-state as a preparation. This should only be called from inside the
4318  * schedule_actions(), where the previous idle-flags are still considered to
4319  * be reflective, following its earlier idle confirmation operational call,
4320  * plus some potential newly idle CSGs in the scheduling action committing
4321  * steps.
4322  *
4323  * @kbdev:  Pointer to the GPU device.
4324  *
4325  * Return: number of on-slots CSGs that can be considered for replacing.
4326  */
prepare_fast_local_tock(struct kbase_device * kbdev)4327 static int prepare_fast_local_tock(struct kbase_device *kbdev)
4328 {
4329 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4330 	u32 num_groups = kbdev->csf.global_iface.group_num;
4331 	unsigned long flags, i;
4332 	DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
4333 
4334 	lockdep_assert_held(&scheduler->lock);
4335 
4336 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4337 	bitmap_copy(csg_bitmap, scheduler->csg_slots_idle_mask, num_groups);
4338 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4339 
4340 	/* Marking the flagged idle CSGs' run state to IDLE, so
4341 	 * the intended fast local tock can replacing them with off-slots
4342 	 * non-idle CSGs.
4343 	 */
4344 	for_each_set_bit(i, csg_bitmap, num_groups) {
4345 		struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
4346 		struct kbase_queue_group *group = csg_slot->resident_group;
4347 
4348 		if (!queue_group_idle_locked(group))
4349 			group->run_state = KBASE_CSF_GROUP_IDLE;
4350 	}
4351 
4352 	/* Return the number of idle slots for potential replacement */
4353 	return bitmap_weight(csg_bitmap, num_groups);
4354 }
4355 
schedule_actions(struct kbase_device * kbdev,bool is_tick)4356 static void schedule_actions(struct kbase_device *kbdev, bool is_tick)
4357 {
4358 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4359 	unsigned long flags;
4360 	struct kbase_queue_group *protm_grp;
4361 	int ret;
4362 	bool skip_scheduling_actions;
4363 	bool skip_idle_slots_update;
4364 	bool new_protm_top_grp = false;
4365 	int local_tock_slots = 0;
4366 
4367 	kbase_reset_gpu_assert_prevented(kbdev);
4368 	lockdep_assert_held(&scheduler->lock);
4369 
4370 	ret = kbase_csf_scheduler_wait_mcu_active(kbdev);
4371 	if (ret) {
4372 		dev_err(kbdev->dev,
4373 			"Wait for MCU power on failed on scheduling tick/tock");
4374 		return;
4375 	}
4376 
4377 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4378 	skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev);
4379 	skip_scheduling_actions =
4380 			!skip_idle_slots_update && kbdev->protected_mode;
4381 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4382 
4383 	/* Skip scheduling actions as GPU reset hasn't been performed yet to
4384 	 * rectify the anomaly that happened when pmode exit interrupt wasn't
4385 	 * received before the termination of group running in pmode.
4386 	 */
4387 	if (unlikely(skip_scheduling_actions)) {
4388 		dev_info(kbdev->dev,
4389 			 "Scheduling actions skipped due to anomaly in pmode");
4390 		return;
4391 	}
4392 
4393 	if (!skip_idle_slots_update) {
4394 		/* Updating on-slot idle CSGs when not in protected mode. */
4395 		scheduler_handle_idle_slots(kbdev);
4396 
4397 		/* Determine whether the condition is met for keeping the
4398 		 * Last-Recent-Use. If true, skipping the remaining action
4399 		 * steps and thus extending the previous tick's arrangement,
4400 		 * in particular, no alterations to on-slot CSGs.
4401 		 */
4402 		if (keep_lru_on_slots(kbdev))
4403 			return;
4404 	}
4405 
4406 	if (is_tick)
4407 		scheduler_rotate(kbdev);
4408 
4409 redo_local_tock:
4410 	scheduler_prepare(kbdev);
4411 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4412 	protm_grp = scheduler->active_protm_grp;
4413 
4414 	/* Avoid update if the top-group remains unchanged and in protected
4415 	 * mode. For the said case, all the slots update is effectively
4416 	 * competing against the active protected mode group (typically the
4417 	 * top-group). If we update other slots, even on leaving the
4418 	 * top-group slot untouched, the firmware would exit the protected mode
4419 	 * for interacting with the host-driver. After it, as the top-group
4420 	 * would again raise the request for entering protected mode, we would
4421 	 * be actively doing the switching over twice without progressing the
4422 	 * queue jobs.
4423 	 */
4424 	if (protm_grp && scheduler->top_grp == protm_grp) {
4425 		int new_val;
4426 		dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
4427 			protm_grp->handle);
4428 		new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
4429 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
4430 					 protm_grp, new_val);
4431 	} else if (scheduler->top_grp) {
4432 		if (protm_grp)
4433 			dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d",
4434 				protm_grp->handle);
4435 
4436 		if (!bitmap_empty(scheduler->top_grp->protm_pending_bitmap,
4437 			     kbdev->csf.global_iface.groups[0].stream_num)) {
4438 			dev_dbg(kbdev->dev, "Scheduler prepare protm exec: group-%d of context %d_%d",
4439 				scheduler->top_grp->handle,
4440 				scheduler->top_grp->kctx->tgid,
4441 				scheduler->top_grp->kctx->id);
4442 
4443 			/* When entering protected mode all CSG slots can be occupied
4444 			 * but only the protected mode CSG will be running. Any event
4445 			 * that would trigger the execution of an on-slot idle CSG will
4446 			 * need to be handled by the host during protected mode.
4447 			 */
4448 			new_protm_top_grp = true;
4449 		}
4450 
4451 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4452 
4453 		scheduler_apply(kbdev);
4454 
4455 		/* Post-apply, all the committed groups in this tick are on
4456 		 * slots, time to arrange the idle timer on/off decision.
4457 		 */
4458 		scheduler_handle_idle_timer_onoff(kbdev);
4459 
4460 		/* Scheduler is dropping the exec of the previous protm_grp,
4461 		 * Until the protm quit completes, the GPU is effectively
4462 		 * locked in the secure mode.
4463 		 */
4464 		if (protm_grp)
4465 			scheduler_force_protm_exit(kbdev);
4466 
4467 		wait_csg_slots_start(kbdev);
4468 		wait_csg_slots_finish_prio_update(kbdev);
4469 
4470 		if (new_protm_top_grp) {
4471 			scheduler_group_check_protm_enter(kbdev,
4472 						scheduler->top_grp);
4473 		} else if (!local_tock_slots &&
4474 			   atomic_read(&scheduler->non_idle_offslot_grps)) {
4475 			/* If during the scheduling action, we have off-slot
4476 			 * non-idle CSGs in waiting, if it happens to have
4477 			 * some new idle slots emerging during the committed
4478 			 * action steps, trigger a one-off fast local tock.
4479 			 */
4480 			local_tock_slots = prepare_fast_local_tock(kbdev);
4481 
4482 			if (local_tock_slots) {
4483 				dev_dbg(kbdev->dev,
4484 					"In-cycle %d idle slots available\n",
4485 					local_tock_slots);
4486 				goto redo_local_tock;
4487 			}
4488 		}
4489 
4490 		return;
4491 	}
4492 
4493 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4494 	return;
4495 }
4496 
4497 /**
4498  * can_skip_scheduling() - Check if the scheduling actions can be skipped.
4499  *
4500  * @kbdev: Pointer to the device
4501  *
4502  * This function is called on a scheduling tick or tock to determine if the
4503  * scheduling actions can be skipped.
4504  * If Scheduler is in sleeping state and exit from the sleep state is allowed
4505  * then activation of MCU will be triggered. The tick or tock work item could
4506  * have been in flight when the state of Scheduler was changed to sleeping.
4507  *
4508  * Return: true if the scheduling actions can be skipped.
4509  */
can_skip_scheduling(struct kbase_device * kbdev)4510 static bool can_skip_scheduling(struct kbase_device *kbdev)
4511 {
4512 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4513 
4514 	lockdep_assert_held(&scheduler->lock);
4515 
4516 	if (scheduler->state == SCHED_SUSPENDED)
4517 		return true;
4518 
4519 #ifdef KBASE_PM_RUNTIME
4520 	if (scheduler->state == SCHED_SLEEPING) {
4521 		unsigned long flags;
4522 
4523 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
4524 		if (kbdev->pm.backend.exit_gpu_sleep_mode) {
4525 			int ret = scheduler_pm_active_after_sleep(kbdev, flags);
4526 			/* hwaccess_lock is released in the previous function
4527 			 * call.
4528 			 */
4529 			if (!ret) {
4530 				scheduler->state = SCHED_INACTIVE;
4531 				return false;
4532 			}
4533 
4534 			dev_info(kbdev->dev,
4535 				 "Skip scheduling due to system suspend");
4536 			return true;
4537 		}
4538 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
4539 		return true;
4540 	}
4541 #endif
4542 
4543 	return false;
4544 }
4545 
schedule_on_tock(struct work_struct * work)4546 static void schedule_on_tock(struct work_struct *work)
4547 {
4548 	struct kbase_device *kbdev = container_of(work, struct kbase_device,
4549 					csf.scheduler.tock_work.work);
4550 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4551 	int err;
4552 
4553 	/* Tock work item is serviced */
4554 	scheduler->tock_pending_request = false;
4555 
4556 	err = kbase_reset_gpu_try_prevent(kbdev);
4557 	/* Regardless of whether reset failed or is currently happening, exit
4558 	 * early
4559 	 */
4560 	if (err)
4561 		return;
4562 
4563 	mutex_lock(&scheduler->lock);
4564 	if (can_skip_scheduling(kbdev))
4565 		goto exit_no_schedule_unlock;
4566 
4567 	WARN_ON(!(scheduler->state == SCHED_INACTIVE));
4568 	scheduler->state = SCHED_BUSY;
4569 
4570 	/* Undertaking schedule action steps */
4571 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK, NULL, 0u);
4572 	schedule_actions(kbdev, false);
4573 
4574 	/* Record time information on a non-skipped tock */
4575 	scheduler->last_schedule = jiffies;
4576 
4577 	scheduler->state = SCHED_INACTIVE;
4578 	if (!scheduler->total_runnable_grps)
4579 		queue_work(system_wq, &scheduler->gpu_idle_work);
4580 	mutex_unlock(&scheduler->lock);
4581 	kbase_reset_gpu_allow(kbdev);
4582 
4583 	dev_dbg(kbdev->dev,
4584 		"Waking up for event after schedule-on-tock completes.");
4585 	wake_up_all(&kbdev->csf.event_wait);
4586 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u);
4587 	return;
4588 
4589 exit_no_schedule_unlock:
4590 	mutex_unlock(&scheduler->lock);
4591 	kbase_reset_gpu_allow(kbdev);
4592 }
4593 
schedule_on_tick(struct work_struct * work)4594 static void schedule_on_tick(struct work_struct *work)
4595 {
4596 	struct kbase_device *kbdev = container_of(work, struct kbase_device,
4597 					csf.scheduler.tick_work);
4598 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4599 
4600 	int err = kbase_reset_gpu_try_prevent(kbdev);
4601 	/* Regardless of whether reset failed or is currently happening, exit
4602 	 * early
4603 	 */
4604 	if (err)
4605 		return;
4606 
4607 	mutex_lock(&scheduler->lock);
4608 
4609 	WARN_ON(scheduler->tick_timer_active);
4610 	if (can_skip_scheduling(kbdev))
4611 		goto exit_no_schedule_unlock;
4612 
4613 	scheduler->state = SCHED_BUSY;
4614 
4615 	/* Undertaking schedule action steps */
4616 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK, NULL,
4617 			 scheduler->total_runnable_grps);
4618 	schedule_actions(kbdev, true);
4619 
4620 	/* Record time information */
4621 	scheduler->last_schedule = jiffies;
4622 
4623 	/* Kicking next scheduling if needed */
4624 	if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
4625 			(scheduler->total_runnable_grps > 0)) {
4626 		start_tick_timer(kbdev);
4627 		dev_dbg(kbdev->dev,
4628 			"scheduling for next tick, num_runnable_groups:%u\n",
4629 			scheduler->total_runnable_grps);
4630 	} else if (!scheduler->total_runnable_grps)
4631 		queue_work(system_wq, &scheduler->gpu_idle_work);
4632 
4633 	scheduler->state = SCHED_INACTIVE;
4634 	mutex_unlock(&scheduler->lock);
4635 	kbase_reset_gpu_allow(kbdev);
4636 
4637 	dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes.");
4638 	wake_up_all(&kbdev->csf.event_wait);
4639 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL,
4640 			 scheduler->total_runnable_grps);
4641 	return;
4642 
4643 exit_no_schedule_unlock:
4644 	mutex_unlock(&scheduler->lock);
4645 	kbase_reset_gpu_allow(kbdev);
4646 }
4647 
wait_csg_slots_suspend(struct kbase_device * kbdev,const unsigned long * slot_mask,unsigned int timeout_ms)4648 static int wait_csg_slots_suspend(struct kbase_device *kbdev,
4649 			   const unsigned long *slot_mask,
4650 			   unsigned int timeout_ms)
4651 {
4652 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4653 	long remaining = kbase_csf_timeout_in_jiffies(timeout_ms);
4654 	u32 num_groups = kbdev->csf.global_iface.group_num;
4655 	int err = 0;
4656 	DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS);
4657 
4658 	lockdep_assert_held(&scheduler->lock);
4659 
4660 	bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS);
4661 
4662 	while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS)
4663 		&& remaining) {
4664 		DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
4665 
4666 		bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS);
4667 
4668 		remaining = wait_event_timeout(kbdev->csf.event_wait,
4669 			slots_state_changed(kbdev, changed,
4670 				csg_slot_stopped_locked),
4671 			remaining);
4672 
4673 		if (remaining) {
4674 			u32 i;
4675 
4676 			for_each_set_bit(i, changed, num_groups) {
4677 				struct kbase_queue_group *group;
4678 
4679 				if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
4680 					continue;
4681 
4682 				/* The on slot csg is now stopped */
4683 				clear_bit(i, slot_mask_local);
4684 
4685 				group = scheduler->csg_slots[i].resident_group;
4686 				if (likely(group)) {
4687 					/* Only do save/cleanup if the
4688 					 * group is not terminated during
4689 					 * the sleep.
4690 					 */
4691 					save_csg_slot(group);
4692 					if (cleanup_csg_slot(group))
4693 						sched_evict_group(group, true, true);
4694 				}
4695 			}
4696 		} else {
4697 			dev_warn(kbdev->dev, "[%llu] Timeout waiting for CSG slots to suspend, slot_mask: 0x%*pb\n",
4698 				 kbase_backend_get_cycle_cnt(kbdev),
4699 				 num_groups, slot_mask_local);
4700 
4701 
4702 			err = -ETIMEDOUT;
4703 		}
4704 	}
4705 
4706 	return err;
4707 }
4708 
suspend_active_queue_groups(struct kbase_device * kbdev,unsigned long * slot_mask)4709 static int suspend_active_queue_groups(struct kbase_device *kbdev,
4710 				       unsigned long *slot_mask)
4711 {
4712 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4713 	u32 num_groups = kbdev->csf.global_iface.group_num;
4714 	u32 slot_num;
4715 	int ret;
4716 
4717 	lockdep_assert_held(&scheduler->lock);
4718 
4719 	for (slot_num = 0; slot_num < num_groups; slot_num++) {
4720 		struct kbase_queue_group *group =
4721 			scheduler->csg_slots[slot_num].resident_group;
4722 
4723 		if (group) {
4724 			suspend_queue_group(group);
4725 			set_bit(slot_num, slot_mask);
4726 		}
4727 	}
4728 
4729 	ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms);
4730 	return ret;
4731 }
4732 
suspend_active_queue_groups_on_reset(struct kbase_device * kbdev)4733 static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
4734 {
4735 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4736 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
4737 	int ret;
4738 	int ret2;
4739 
4740 	mutex_lock(&scheduler->lock);
4741 
4742 	ret = suspend_active_queue_groups(kbdev, slot_mask);
4743 
4744 	if (ret) {
4745 		dev_warn(kbdev->dev, "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
4746 			 kbdev->csf.global_iface.group_num, slot_mask);
4747 	}
4748 
4749 	/* Need to flush the GPU cache to ensure suspend buffer
4750 	 * contents are not lost on reset of GPU.
4751 	 * Do this even if suspend operation had timed out for some of
4752 	 * the CSG slots.
4753 	 * In case the scheduler already in suspended state, the
4754 	 * cache clean is required as the async reset request from
4755 	 * the debugfs may race against the scheduler suspend operation
4756 	 * due to the extra context ref-count, which prevents the
4757 	 * L2 powering down cache clean operation in the non racing
4758 	 * case.
4759 	 * LSC is being flushed together to cover buslogging usecase,
4760 	 * where GPU reset is done regularly to avoid the log buffer
4761 	 * overflow.
4762 	 */
4763 	kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
4764 	ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
4765 			kbdev->reset_timeout_ms);
4766 	if (ret2) {
4767 		dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset",
4768 			 kbase_backend_get_cycle_cnt(kbdev));
4769 		if (!ret)
4770 			ret = ret2;
4771 	}
4772 
4773 	mutex_unlock(&scheduler->lock);
4774 
4775 	return ret;
4776 }
4777 
4778 /**
4779  * scheduler_handle_reset_in_protected_mode() - Update the state of normal mode
4780  *                                              groups when reset is done during
4781  *                                              protected mode execution.
4782  *
4783  * @kbdev: Pointer to the device.
4784  *
4785  * This function is called at the time of GPU reset, before the suspension of
4786  * queue groups, to handle the case when the reset is getting performed whilst
4787  * GPU is in protected mode.
4788  * On entry to protected mode all the groups, except the top group that executes
4789  * in protected mode, are implicitly suspended by the FW. Thus this function
4790  * simply marks the normal mode groups as suspended (and cleans up the
4791  * corresponding CSG slots) to prevent their potential forceful eviction from
4792  * the Scheduler. So if GPU was in protected mode and there was no fault, then
4793  * only the protected mode group would be suspended in the regular way post exit
4794  * from this function. And if GPU was in normal mode, then all on-slot groups
4795  * will get suspended in the regular way.
4796  *
4797  * Return: true if the groups remaining on the CSG slots need to be suspended in
4798  *         the regular way by sending CSG SUSPEND reqs to FW, otherwise false.
4799  */
scheduler_handle_reset_in_protected_mode(struct kbase_device * kbdev)4800 static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev)
4801 {
4802 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4803 	u32 const num_groups = kbdev->csf.global_iface.group_num;
4804 	struct kbase_queue_group *protm_grp;
4805 	bool suspend_on_slot_groups = true;
4806 	bool pmode_active;
4807 	unsigned long flags;
4808 	u32 csg_nr;
4809 
4810 	mutex_lock(&scheduler->lock);
4811 
4812 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4813 	protm_grp = scheduler->active_protm_grp;
4814 	pmode_active = kbdev->protected_mode;
4815 
4816 	if (likely(!protm_grp && !pmode_active)) {
4817 		/* Case 1: GPU is not in protected mode or it successfully
4818 		 * exited protected mode. All on-slot groups can be suspended in
4819 		 * the regular way before reset.
4820 		 */
4821 		suspend_on_slot_groups = true;
4822 	} else if (protm_grp && pmode_active) {
4823 		/* Case 2: GPU went successfully into protected mode and hasn't
4824 		 * exited from it yet and the protected mode group is still
4825 		 * active. If there was no fault for the protected mode group
4826 		 * then it can be suspended in the regular way before reset.
4827 		 * The other normal mode on-slot groups were already implicitly
4828 		 * suspended on entry to protected mode so they can be marked as
4829 		 * suspended right away.
4830 		 */
4831 		suspend_on_slot_groups = !protm_grp->faulted;
4832 	} else if (!protm_grp && pmode_active) {
4833 		/* Case 3: GPU went successfully into protected mode and hasn't
4834 		 * exited from it yet but the protected mode group got deleted.
4835 		 * This would have happened if the FW got stuck during protected
4836 		 * mode for some reason (like GPU page fault or some internal
4837 		 * error). In normal cases FW is expected to send the pmode exit
4838 		 * interrupt before it handles the CSG termination request.
4839 		 * The other normal mode on-slot groups would already have been
4840 		 * implicitly suspended on entry to protected mode so they can be
4841 		 * marked as suspended right away.
4842 		 */
4843 		suspend_on_slot_groups = false;
4844 	} else if (protm_grp && !pmode_active) {
4845 		/* Case 4: GPU couldn't successfully enter protected mode, i.e.
4846 		 * PROTM_ENTER request had timed out.
4847 		 * All the on-slot groups need to be suspended in the regular
4848 		 * way before reset.
4849 		 */
4850 		suspend_on_slot_groups = true;
4851 	}
4852 
4853 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4854 
4855 	if (likely(!pmode_active))
4856 		goto unlock;
4857 
4858 	/* GPU hasn't exited protected mode, so all the on-slot groups barring
4859 	 * the protected mode group can be marked as suspended right away.
4860 	 */
4861 	for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
4862 		struct kbase_queue_group *const group =
4863 			kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
4864 		int new_val;
4865 
4866 		if (!group || (group == protm_grp))
4867 			continue;
4868 
4869 		cleanup_csg_slot(group);
4870 		group->run_state = KBASE_CSF_GROUP_SUSPENDED;
4871 
4872 		/* Simply treat the normal mode groups as non-idle. The tick
4873 		 * scheduled after the reset will re-initialize the counter
4874 		 * anyways.
4875 		 */
4876 		new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
4877 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
4878 					 group, new_val);
4879 	}
4880 
4881 unlock:
4882 	mutex_unlock(&scheduler->lock);
4883 	return suspend_on_slot_groups;
4884 }
4885 
cancel_tock_work(struct kbase_csf_scheduler * const scheduler)4886 static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler)
4887 {
4888 	cancel_delayed_work_sync(&scheduler->tock_work);
4889 	scheduler->tock_pending_request = false;
4890 }
4891 
scheduler_inner_reset(struct kbase_device * kbdev)4892 static void scheduler_inner_reset(struct kbase_device *kbdev)
4893 {
4894 	u32 const num_groups = kbdev->csf.global_iface.group_num;
4895 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4896 	unsigned long flags;
4897 
4898 	WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
4899 
4900 	/* Cancel any potential queued delayed work(s) */
4901 	cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
4902 	cancel_tick_timer(kbdev);
4903 	cancel_work_sync(&scheduler->tick_work);
4904 	cancel_tock_work(scheduler);
4905 	cancel_delayed_work_sync(&scheduler->ping_work);
4906 
4907 	mutex_lock(&scheduler->lock);
4908 
4909 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4910 	bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
4911 	if (scheduler->active_protm_grp)
4912 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM,
4913 					 scheduler->active_protm_grp, 0u);
4914 	scheduler->active_protm_grp = NULL;
4915 	memset(kbdev->csf.scheduler.csg_slots, 0,
4916 	       num_groups * sizeof(struct kbase_csf_csg_slot));
4917 	bitmap_zero(kbdev->csf.scheduler.csg_inuse_bitmap, num_groups);
4918 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4919 
4920 	scheduler->top_ctx = NULL;
4921 	scheduler->top_grp = NULL;
4922 
4923 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
4924 			scheduler->num_active_address_spaces |
4925 			(((u64)scheduler->total_runnable_grps) << 32));
4926 
4927 	mutex_unlock(&scheduler->lock);
4928 }
4929 
kbase_csf_scheduler_reset(struct kbase_device * kbdev)4930 void kbase_csf_scheduler_reset(struct kbase_device *kbdev)
4931 {
4932 	struct kbase_context *kctx;
4933 
4934 	WARN_ON(!kbase_reset_gpu_is_active(kbdev));
4935 
4936 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u);
4937 
4938 	if (scheduler_handle_reset_in_protected_mode(kbdev) &&
4939 	    !suspend_active_queue_groups_on_reset(kbdev)) {
4940 		/* As all groups have been successfully evicted from the CSG
4941 		 * slots, clear out thee scheduler data fields and return
4942 		 */
4943 		scheduler_inner_reset(kbdev);
4944 		return;
4945 	}
4946 
4947 	mutex_lock(&kbdev->kctx_list_lock);
4948 
4949 	/* The loop to iterate over the kbase contexts is present due to lock
4950 	 * ordering issue between kctx->csf.lock & kbdev->csf.scheduler.lock.
4951 	 * CSF ioctls first take kctx->csf.lock which is context-specific and
4952 	 * then take kbdev->csf.scheduler.lock for global actions like assigning
4953 	 * a CSG slot.
4954 	 * If the lock ordering constraint was not there then could have
4955 	 * directly looped over the active queue groups.
4956 	 */
4957 	list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
4958 		/* Firmware reload would reinitialize the CSG & CS interface IO
4959 		 * pages, so just need to internally mark the currently active
4960 		 * queue groups as terminated (similar to the unexpected OoM
4961 		 * event case).
4962 		 * No further work can now get executed for the active groups
4963 		 * (new groups would have to be created to execute work) and
4964 		 * in near future Clients would be duly informed of this
4965 		 * reset. The resources (like User IO pages, GPU queue memory)
4966 		 * allocated for the associated queues would be freed when the
4967 		 * Clients do the teardown when they become aware of the reset.
4968 		 */
4969 		kbase_csf_active_queue_groups_reset(kbdev, kctx);
4970 	}
4971 
4972 	mutex_unlock(&kbdev->kctx_list_lock);
4973 
4974 	/* After queue groups reset, the scheduler data fields clear out */
4975 	scheduler_inner_reset(kbdev);
4976 }
4977 
firmware_aliveness_monitor(struct work_struct * work)4978 static void firmware_aliveness_monitor(struct work_struct *work)
4979 {
4980 	struct kbase_device *kbdev = container_of(work, struct kbase_device,
4981 					csf.scheduler.ping_work.work);
4982 	int err;
4983 
4984 	/* Ensure that reset will not be occurring while this function is being
4985 	 * executed as otherwise calling kbase_reset_gpu when reset is already
4986 	 * occurring is a programming error.
4987 	 *
4988 	 * We must use the 'try' variant as the Reset worker can try to flush
4989 	 * this workqueue, which would otherwise deadlock here if we tried to
4990 	 * wait for the reset (and thus ourselves) to complete.
4991 	 */
4992 	err = kbase_reset_gpu_try_prevent(kbdev);
4993 	if (err) {
4994 		/* It doesn't matter whether the value was -EAGAIN or a fatal
4995 		 * error, just stop processing. In case of -EAGAIN, the Reset
4996 		 * worker will restart the scheduler later to resume ping
4997 		 */
4998 		return;
4999 	}
5000 
5001 	mutex_lock(&kbdev->csf.scheduler.lock);
5002 
5003 #ifdef CONFIG_MALI_BIFROST_DEBUG
5004 	if (fw_debug) {
5005 		/* ping requests cause distraction in firmware debugging */
5006 		goto exit;
5007 	}
5008 #endif
5009 
5010 	if (kbdev->csf.scheduler.state == SCHED_SUSPENDED ||
5011 	    kbdev->csf.scheduler.state == SCHED_SLEEPING)
5012 		goto exit;
5013 
5014 	if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) != 1)
5015 		goto exit;
5016 
5017 	if (kbase_csf_scheduler_protected_mode_in_use(kbdev))
5018 		goto exit;
5019 
5020 	if (kbase_pm_context_active_handle_suspend(kbdev,
5021 			KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
5022 		/* Suspend pending - no real need to ping */
5023 		goto exit;
5024 	}
5025 
5026 	kbase_csf_scheduler_wait_mcu_active(kbdev);
5027 
5028 	err = kbase_csf_firmware_ping_wait(kbdev);
5029 
5030 	if (err) {
5031 		/* It is acceptable to enqueue a reset whilst we've prevented
5032 		 * them, it will happen after we've allowed them again
5033 		 */
5034 		if (kbase_prepare_to_reset_gpu(
5035 			    kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
5036 			kbase_reset_gpu(kbdev);
5037 	} else if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) == 1) {
5038 		queue_delayed_work(system_long_wq,
5039 			&kbdev->csf.scheduler.ping_work,
5040 			msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS));
5041 	}
5042 
5043 	kbase_pm_context_idle(kbdev);
5044 exit:
5045 	mutex_unlock(&kbdev->csf.scheduler.lock);
5046 	kbase_reset_gpu_allow(kbdev);
5047 	return;
5048 }
5049 
kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group * group,struct kbase_suspend_copy_buffer * sus_buf)5050 int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
5051 		struct kbase_suspend_copy_buffer *sus_buf)
5052 {
5053 	struct kbase_context *const kctx = group->kctx;
5054 	struct kbase_device *const kbdev = kctx->kbdev;
5055 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5056 	bool on_slot;
5057 	int err = 0;
5058 
5059 	kbase_reset_gpu_assert_prevented(kbdev);
5060 	lockdep_assert_held(&kctx->csf.lock);
5061 	mutex_lock(&scheduler->lock);
5062 
5063 	on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
5064 
5065 #ifdef KBASE_PM_RUNTIME
5066 	if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
5067 		if (wait_for_scheduler_to_exit_sleep(kbdev)) {
5068 			dev_warn(
5069 				kbdev->dev,
5070 				"Wait for scheduler to exit sleep state timedout when copying suspend buffer for group %d of ctx %d_%d on slot %d",
5071 				group->handle, group->kctx->tgid,
5072 				group->kctx->id, group->csg_nr);
5073 
5074 			scheduler_wakeup(kbdev, true);
5075 
5076 			/* Wait for MCU firmware to start running */
5077 			if (kbase_csf_scheduler_wait_mcu_active(kbdev))
5078 				dev_warn(
5079 					kbdev->dev,
5080 					"Wait for MCU active failed when copying suspend buffer for group %d of ctx %d_%d on slot %d",
5081 					group->handle, group->kctx->tgid,
5082 					group->kctx->id, group->csg_nr);
5083 		}
5084 
5085 		/* Check the group state again as scheduler lock would have been
5086 		 * released when waiting for the exit from SLEEPING state.
5087 		 */
5088 		on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
5089 	}
5090 #endif
5091 	if (on_slot) {
5092 		DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
5093 
5094 		set_bit(kbase_csf_scheduler_group_get_slot(group), slot_mask);
5095 
5096 		if (!WARN_ON(scheduler->state == SCHED_SUSPENDED))
5097 			suspend_queue_group(group);
5098 		err = wait_csg_slots_suspend(kbdev, slot_mask,
5099 					     kbdev->csf.fw_timeout_ms);
5100 		if (err) {
5101 			dev_warn(kbdev->dev, "[%llu] Timeout waiting for the group %d to suspend on slot %d",
5102 				 kbase_backend_get_cycle_cnt(kbdev),
5103 				 group->handle, group->csg_nr);
5104 			goto exit;
5105 		}
5106 	}
5107 
5108 	if (queue_group_suspended_locked(group)) {
5109 		unsigned int target_page_nr = 0, i = 0;
5110 		u64 offset = sus_buf->offset;
5111 		size_t to_copy = sus_buf->size;
5112 		const u32 csg_suspend_buf_nr_pages =
5113 			PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
5114 
5115 		if (scheduler->state != SCHED_SUSPENDED) {
5116 			/* Similar to the case of HW counters, need to flush
5117 			 * the GPU L2 cache before reading from the suspend buffer
5118 			 * pages as they are mapped and cached on GPU side.
5119 			 * Flushing LSC is not done here, since only the flush of
5120 			 * CSG suspend buffer contents is needed from the L2 cache.
5121 			 */
5122 			kbase_gpu_start_cache_clean(
5123 				kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
5124 			kbase_gpu_wait_cache_clean(kbdev);
5125 		} else {
5126 			/* Make sure power down transitions have completed,
5127 			 * i.e. L2 has been powered off as that would ensure
5128 			 * its contents are flushed to memory.
5129 			 * This is needed as Scheduler doesn't wait for the
5130 			 * power down to finish.
5131 			 */
5132 			kbase_pm_wait_for_desired_state(kbdev);
5133 		}
5134 
5135 		for (i = 0; i < csg_suspend_buf_nr_pages &&
5136 				target_page_nr < sus_buf->nr_pages; i++) {
5137 			struct page *pg =
5138 				as_page(group->normal_suspend_buf.phy[i]);
5139 			void *sus_page = kmap(pg);
5140 
5141 			if (sus_page) {
5142 				kbase_sync_single_for_cpu(kbdev,
5143 					kbase_dma_addr(pg),
5144 					PAGE_SIZE, DMA_BIDIRECTIONAL);
5145 
5146 				err = kbase_mem_copy_to_pinned_user_pages(
5147 						sus_buf->pages, sus_page,
5148 						&to_copy, sus_buf->nr_pages,
5149 						&target_page_nr, offset);
5150 				kunmap(pg);
5151 				if (err)
5152 					break;
5153 			} else {
5154 				err = -ENOMEM;
5155 				break;
5156 			}
5157 		}
5158 		schedule_in_cycle(group, false);
5159 	} else {
5160 		/* If addr-space fault, the group may have been evicted */
5161 		err = -EIO;
5162 	}
5163 
5164 exit:
5165 	mutex_unlock(&scheduler->lock);
5166 	return err;
5167 }
5168 
5169 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_group_copy_suspend_buf);
5170 
5171 /**
5172  * group_sync_updated() - Evaluate sync wait condition of all blocked command
5173  *                        queues of the group.
5174  *
5175  * @group: Pointer to the command queue group that has blocked command queue(s)
5176  *         bound to it.
5177  *
5178  * Return: true if sync wait condition is satisfied for at least one blocked
5179  *         queue of the group.
5180  */
group_sync_updated(struct kbase_queue_group * group)5181 static bool group_sync_updated(struct kbase_queue_group *group)
5182 {
5183 	bool updated = false;
5184 	int stream;
5185 
5186 	/* Groups can also be blocked on-slot during protected mode. */
5187 	WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC &&
5188 		    group->run_state != KBASE_CSF_GROUP_IDLE);
5189 
5190 	for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) {
5191 		struct kbase_queue *const queue = group->bound_queues[stream];
5192 
5193 		/* To check the necessity of sync-wait evaluation,
5194 		 * we rely on the cached 'status_wait' instead of reading it
5195 		 * directly from shared memory as the CSG has been already
5196 		 * evicted from the CSG slot, thus this CSG doesn't have
5197 		 * valid information in the shared memory.
5198 		 */
5199 		if (queue && queue->enabled &&
5200 		    CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait))
5201 			if (evaluate_sync_update(queue)) {
5202 				updated = true;
5203 				queue->status_wait = 0;
5204 			}
5205 	}
5206 
5207 	return updated;
5208 }
5209 
5210 /**
5211  * scheduler_get_protm_enter_async_group() -  Check if the GPU queue group
5212  *                          can be now allowed to execute in protected mode.
5213  *
5214  * @kbdev:    Pointer to the GPU device.
5215  * @group:    Pointer to the GPU queue group.
5216  *
5217  * This function is called outside the scheduling tick/tock to determine
5218  * if the given GPU queue group can now execute in protected mode or not.
5219  * If the group pointer passed is NULL then the evaluation is done for the
5220  * highest priority group on the scheduler maintained group lists without
5221  * tick associated rotation actions. This is referred as the 'top-group'
5222  * in a tock action sense.
5223  *
5224  * It returns the same group pointer, that was passed as an argument, if that
5225  * group matches the highest priority group and has pending protected region
5226  * requests otherwise NULL is returned.
5227  *
5228  * If the group pointer passed is NULL then the internal evaluated highest
5229  * priority group is returned if that has pending protected region requests
5230  * otherwise NULL is returned.
5231  *
5232  * The evaluated highest priority group may not necessarily be the same as the
5233  * scheduler->top_grp. This can happen if there is dynamic de-idle update
5234  * during the tick interval for some on-slots groups that were idle during the
5235  * scheduler normal scheduling action, where the scheduler->top_grp was set.
5236  * The recorded scheduler->top_grp is untouched by this evualuation, so will not
5237  * affect the scheduler context/priority list rotation arrangement.
5238  *
5239  * Return: the pointer to queue group that can currently execute in protected
5240  *         mode or NULL.
5241  */
scheduler_get_protm_enter_async_group(struct kbase_device * const kbdev,struct kbase_queue_group * const group)5242 static struct kbase_queue_group *scheduler_get_protm_enter_async_group(
5243 		struct kbase_device *const kbdev,
5244 		struct kbase_queue_group *const group)
5245 {
5246 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5247 	struct kbase_queue_group *match_grp, *input_grp;
5248 
5249 	lockdep_assert_held(&scheduler->lock);
5250 
5251 	if (scheduler->state != SCHED_INACTIVE)
5252 		return NULL;
5253 
5254 	match_grp = get_tock_top_group(scheduler);
5255 	input_grp = group ? group : match_grp;
5256 
5257 	if (input_grp && (input_grp == match_grp)) {
5258 		struct kbase_csf_cmd_stream_group_info *ginfo =
5259 				&kbdev->csf.global_iface.groups[0];
5260 		unsigned long *pending =
5261 				input_grp->protm_pending_bitmap;
5262 		unsigned long flags;
5263 
5264 		spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5265 
5266 		if (kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
5267 		    bitmap_empty(pending, ginfo->stream_num))
5268 			input_grp = NULL;
5269 
5270 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5271 	} else {
5272 		input_grp = NULL;
5273 	}
5274 
5275 	return input_grp;
5276 }
5277 
kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group * group)5278 void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
5279 {
5280 	struct kbase_device *const kbdev = group->kctx->kbdev;
5281 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5282 
5283 	int err = kbase_reset_gpu_try_prevent(kbdev);
5284 	/* Regardless of whether reset failed or is currently happening, exit
5285 	 * early
5286 	 */
5287 	if (err)
5288 		return;
5289 
5290 	mutex_lock(&scheduler->lock);
5291 
5292 	/* Check if the group is now eligible for execution in protected mode. */
5293 	if (scheduler_get_protm_enter_async_group(kbdev, group))
5294 		scheduler_group_check_protm_enter(kbdev, group);
5295 
5296 	mutex_unlock(&scheduler->lock);
5297 	kbase_reset_gpu_allow(kbdev);
5298 }
5299 
5300 /**
5301  * check_sync_update_for_on_slot_group() - Check the sync wait condition
5302  *                                         for all the queues bound to
5303  *                                         the given on-slot group.
5304  *
5305  * @group:    Pointer to the on-slot group that requires evaluation.
5306  *
5307  * This function is called if the GPU is in protected mode and there are on
5308  * slot idle groups with higher priority than the active protected mode group
5309  * or this function is called when CQS object is signaled whilst GPU is in
5310  * sleep state.
5311  * This function will evaluate the sync condition, if any, of all the queues
5312  * bound to the given group.
5313  *
5314  * Return: true if the sync condition of at least one queue has been satisfied.
5315  */
check_sync_update_for_on_slot_group(struct kbase_queue_group * group)5316 static bool check_sync_update_for_on_slot_group(
5317 		struct kbase_queue_group *group)
5318 {
5319 	struct kbase_device *const kbdev = group->kctx->kbdev;
5320 	struct kbase_csf_scheduler *const scheduler =
5321 				&kbdev->csf.scheduler;
5322 	bool sync_update_done = false;
5323 	int i;
5324 
5325 	lockdep_assert_held(&scheduler->lock);
5326 
5327 	for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
5328 		struct kbase_queue *queue = group->bound_queues[i];
5329 
5330 		if (queue && queue->enabled && !sync_update_done) {
5331 			struct kbase_csf_cmd_stream_group_info *const ginfo =
5332 				&kbdev->csf.global_iface.groups[group->csg_nr];
5333 			struct kbase_csf_cmd_stream_info *const stream =
5334 				&ginfo->streams[queue->csi_index];
5335 			u32 status = kbase_csf_firmware_cs_output(
5336 					stream, CS_STATUS_WAIT);
5337 			unsigned long flags;
5338 
5339 			KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT,
5340 						   queue->group, queue, status);
5341 
5342 			if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status))
5343 				continue;
5344 
5345 			/* Save the information of sync object of the command
5346 			 * queue so the callback function, 'group_sync_updated'
5347 			 * can evaluate the sync object when it gets updated
5348 			 * later.
5349 			 */
5350 			queue->status_wait = status;
5351 			queue->sync_ptr = kbase_csf_firmware_cs_output(
5352 				stream, CS_STATUS_WAIT_SYNC_POINTER_LO);
5353 			queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(
5354 				stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32;
5355 			queue->sync_value = kbase_csf_firmware_cs_output(
5356 				stream, CS_STATUS_WAIT_SYNC_VALUE);
5357 			queue->blocked_reason =
5358 				CS_STATUS_BLOCKED_REASON_REASON_GET(
5359 					kbase_csf_firmware_cs_output(
5360 						stream,
5361 						CS_STATUS_BLOCKED_REASON));
5362 
5363 			if (!evaluate_sync_update(queue))
5364 				continue;
5365 
5366 			/* Update csg_slots_idle_mask and group's run_state */
5367 			if (group->run_state != KBASE_CSF_GROUP_RUNNABLE) {
5368 				/* Only clear the group's idle flag if it has been dealt
5369 				 * with by the scheduler's tick/tock action, otherwise
5370 				 * leave it untouched.
5371 				 */
5372 				spin_lock_irqsave(&scheduler->interrupt_lock,
5373 						  flags);
5374 				clear_bit((unsigned int)group->csg_nr,
5375 					  scheduler->csg_slots_idle_mask);
5376 				KBASE_KTRACE_ADD_CSF_GRP(
5377 					kbdev, CSG_SLOT_IDLE_CLEAR, group,
5378 					scheduler->csg_slots_idle_mask[0]);
5379 				spin_unlock_irqrestore(
5380 					&scheduler->interrupt_lock, flags);
5381 				group->run_state = KBASE_CSF_GROUP_RUNNABLE;
5382 			}
5383 
5384 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
5385 			sync_update_done = true;
5386 		}
5387 	}
5388 
5389 	return sync_update_done;
5390 }
5391 
5392 /**
5393  * check_sync_update_for_idle_groups_protm() - Check the sync wait condition
5394  *                                             for the idle groups on slot
5395  *                                             during protected mode.
5396  *
5397  * @kbdev:    Pointer to the GPU device
5398  *
5399  * This function checks the gpu queues of all the idle groups on slot during
5400  * protected mode that has a higher priority than the active protected mode
5401  * group.
5402  *
5403  * Return: true if the sync condition of at least one queue in a group has been
5404  * satisfied.
5405  */
check_sync_update_for_idle_groups_protm(struct kbase_device * kbdev)5406 static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev)
5407 {
5408 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5409 	struct kbase_queue_group *protm_grp;
5410 	bool exit_protm = false;
5411 	unsigned long flags;
5412 	u32 num_groups;
5413 	u32 i;
5414 
5415 	lockdep_assert_held(&scheduler->lock);
5416 
5417 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5418 	protm_grp = scheduler->active_protm_grp;
5419 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5420 
5421 	if (!protm_grp)
5422 		return exit_protm;
5423 
5424 	num_groups = kbdev->csf.global_iface.group_num;
5425 
5426 	for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
5427 		struct kbase_csf_csg_slot *csg_slot =
5428 					&scheduler->csg_slots[i];
5429 		struct kbase_queue_group *group = csg_slot->resident_group;
5430 
5431 		if (group->scan_seq_num < protm_grp->scan_seq_num) {
5432 			/* If sync update has been performed for the group that
5433 			 * has a higher priority than the protm group, then we
5434 			 * need to exit protected mode.
5435 			 */
5436 			if (check_sync_update_for_on_slot_group(group))
5437 				exit_protm = true;
5438 		}
5439 	}
5440 
5441 	return exit_protm;
5442 }
5443 
check_sync_update_in_sleep_mode(struct kbase_device * kbdev)5444 static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
5445 {
5446 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5447 	u32 const num_groups = kbdev->csf.global_iface.group_num;
5448 	u32 csg_nr;
5449 
5450 	lockdep_assert_held(&scheduler->lock);
5451 
5452 	for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
5453 		struct kbase_queue_group *const group =
5454 			kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
5455 
5456 		if (!group)
5457 			continue;
5458 
5459 		if (check_sync_update_for_on_slot_group(group)) {
5460 			scheduler_wakeup(kbdev, true);
5461 			return;
5462 		}
5463 	}
5464 }
5465 
5466 /**
5467  * check_group_sync_update_worker() - Check the sync wait condition for all the
5468  *                                    blocked queue groups
5469  *
5470  * @work:    Pointer to the context-specific work item for evaluating the wait
5471  *           condition for all the queue groups in idle_wait_groups list.
5472  *
5473  * This function checks the gpu queues of all the groups present in both
5474  * idle_wait_groups list of a context and all on slot idle groups (if GPU
5475  * is in protected mode).
5476  * If the sync wait condition for at least one queue bound to the group has
5477  * been satisfied then the group is moved to the per context list of
5478  * runnable groups so that Scheduler can consider scheduling the group
5479  * in next tick or exit protected mode.
5480  */
check_group_sync_update_worker(struct work_struct * work)5481 static void check_group_sync_update_worker(struct work_struct *work)
5482 {
5483 	struct kbase_context *const kctx = container_of(work,
5484 		struct kbase_context, csf.sched.sync_update_work);
5485 	struct kbase_device *const kbdev = kctx->kbdev;
5486 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5487 	bool sync_updated = false;
5488 
5489 	mutex_lock(&scheduler->lock);
5490 
5491 	KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_BEGIN, kctx, 0u);
5492 	if (kctx->csf.sched.num_idle_wait_grps != 0) {
5493 		struct kbase_queue_group *group, *temp;
5494 
5495 		list_for_each_entry_safe(group, temp,
5496 				&kctx->csf.sched.idle_wait_groups, link) {
5497 			if (group_sync_updated(group)) {
5498 				sync_updated = true;
5499 				/* Move this group back in to the runnable
5500 				 * groups list of the context.
5501 				 */
5502 				update_idle_suspended_group_state(group);
5503 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
5504 			}
5505 		}
5506 	} else {
5507 		WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups));
5508 	}
5509 
5510 	if (check_sync_update_for_idle_groups_protm(kbdev)) {
5511 		scheduler_force_protm_exit(kbdev);
5512 		sync_updated = true;
5513 	}
5514 
5515 	/* If scheduler is in sleep or suspended state, re-activate it
5516 	 * to serve on-slot CSGs blocked on CQS which has been signaled.
5517 	 */
5518 	if (!sync_updated && (scheduler->state == SCHED_SLEEPING))
5519 		check_sync_update_in_sleep_mode(kbdev);
5520 
5521 	KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
5522 
5523 	mutex_unlock(&scheduler->lock);
5524 }
5525 
5526 static
check_group_sync_update_cb(void * param)5527 enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param)
5528 {
5529 	struct kbase_context *const kctx = param;
5530 
5531 	KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u);
5532 	queue_work(kctx->csf.sched.sync_update_wq,
5533 		&kctx->csf.sched.sync_update_work);
5534 
5535 	return KBASE_CSF_EVENT_CALLBACK_KEEP;
5536 }
5537 
kbase_csf_scheduler_context_init(struct kbase_context * kctx)5538 int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
5539 {
5540 	int priority;
5541 	int err;
5542 
5543 	for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
5544 	     ++priority) {
5545 		INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]);
5546 	}
5547 
5548 	kctx->csf.sched.num_runnable_grps = 0;
5549 	INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups);
5550 	kctx->csf.sched.num_idle_wait_grps = 0;
5551 	kctx->csf.sched.ngrp_to_schedule = 0;
5552 
5553 	kctx->csf.sched.sync_update_wq =
5554 		alloc_ordered_workqueue("mali_kbase_csf_sync_update_wq",
5555 			WQ_HIGHPRI);
5556 	if (!kctx->csf.sched.sync_update_wq) {
5557 		dev_err(kctx->kbdev->dev,
5558 			"Failed to initialize scheduler context workqueue");
5559 		return -ENOMEM;
5560 	}
5561 
5562 	INIT_WORK(&kctx->csf.sched.sync_update_work,
5563 		check_group_sync_update_worker);
5564 
5565 	err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx);
5566 
5567 	if (err) {
5568 		dev_err(kctx->kbdev->dev,
5569 			"Failed to register a sync update callback");
5570 		destroy_workqueue(kctx->csf.sched.sync_update_wq);
5571 	}
5572 
5573 	return err;
5574 }
5575 
kbase_csf_scheduler_context_term(struct kbase_context * kctx)5576 void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
5577 {
5578 	kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx);
5579 	cancel_work_sync(&kctx->csf.sched.sync_update_work);
5580 	destroy_workqueue(kctx->csf.sched.sync_update_wq);
5581 }
5582 
kbase_csf_scheduler_init(struct kbase_device * kbdev)5583 int kbase_csf_scheduler_init(struct kbase_device *kbdev)
5584 {
5585 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5586 	u32 num_groups = kbdev->csf.global_iface.group_num;
5587 
5588 	bitmap_zero(scheduler->csg_inuse_bitmap, num_groups);
5589 	bitmap_zero(scheduler->csg_slots_idle_mask, num_groups);
5590 
5591 	scheduler->csg_slots = kcalloc(num_groups,
5592 				sizeof(*scheduler->csg_slots), GFP_KERNEL);
5593 	if (!scheduler->csg_slots) {
5594 		dev_err(kbdev->dev,
5595 			"Failed to allocate memory for csg slot status array\n");
5596 		return -ENOMEM;
5597 	}
5598 
5599 	return 0;
5600 }
5601 
kbase_csf_scheduler_early_init(struct kbase_device * kbdev)5602 int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
5603 {
5604 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5605 
5606 	scheduler->timer_enabled = true;
5607 
5608 	scheduler->wq = alloc_ordered_workqueue("csf_scheduler_wq", WQ_HIGHPRI);
5609 	if (!scheduler->wq) {
5610 		dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n");
5611 		return -ENOMEM;
5612 	}
5613 
5614 	INIT_WORK(&scheduler->tick_work, schedule_on_tick);
5615 	INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock);
5616 
5617 	INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor);
5618 
5619 	mutex_init(&scheduler->lock);
5620 	spin_lock_init(&scheduler->interrupt_lock);
5621 
5622 	/* Internal lists */
5623 	INIT_LIST_HEAD(&scheduler->runnable_kctxs);
5624 	INIT_LIST_HEAD(&scheduler->groups_to_schedule);
5625 	INIT_LIST_HEAD(&scheduler->idle_groups_to_schedule);
5626 
5627 	BUILD_BUG_ON(MAX_SUPPORTED_CSGS >
5628 		(sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE));
5629 	bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
5630 	scheduler->state = SCHED_SUSPENDED;
5631 	scheduler->pm_active_count = 0;
5632 	scheduler->ngrp_to_schedule = 0;
5633 	scheduler->total_runnable_grps = 0;
5634 	scheduler->top_ctx = NULL;
5635 	scheduler->top_grp = NULL;
5636 	scheduler->last_schedule = 0;
5637 	scheduler->tock_pending_request = false;
5638 	scheduler->active_protm_grp = NULL;
5639 	scheduler->gpu_idle_fw_timer_enabled = false;
5640 	scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
5641 	scheduler_doorbell_init(kbdev);
5642 
5643 	INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
5644 	atomic_set(&scheduler->non_idle_offslot_grps, 0);
5645 
5646 	hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
5647 	scheduler->tick_timer.function = tick_timer_callback;
5648 	scheduler->tick_timer_active = false;
5649 
5650 	return 0;
5651 }
5652 
kbase_csf_scheduler_term(struct kbase_device * kbdev)5653 void kbase_csf_scheduler_term(struct kbase_device *kbdev)
5654 {
5655 	if (kbdev->csf.scheduler.csg_slots) {
5656 		WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps));
5657 		/* The unload of Driver can take place only when all contexts have
5658 		 * been terminated. The groups that were not terminated by the User
5659 		 * are terminated on context termination. So no CSGs are expected
5660 		 * to be active at the time of Driver unload.
5661 		 */
5662 		WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
5663 		flush_work(&kbdev->csf.scheduler.gpu_idle_work);
5664 		mutex_lock(&kbdev->csf.scheduler.lock);
5665 
5666 		if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) {
5667 			/* The power policy could prevent the Scheduler from
5668 			 * getting suspended when GPU becomes idle.
5669 			 */
5670 			WARN_ON(kbase_pm_idle_groups_sched_suspendable(kbdev));
5671 			scheduler_suspend(kbdev);
5672 		}
5673 
5674 		mutex_unlock(&kbdev->csf.scheduler.lock);
5675 		cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
5676 		cancel_tick_timer(kbdev);
5677 		cancel_work_sync(&kbdev->csf.scheduler.tick_work);
5678 		cancel_tock_work(&kbdev->csf.scheduler);
5679 		mutex_destroy(&kbdev->csf.scheduler.lock);
5680 		kfree(kbdev->csf.scheduler.csg_slots);
5681 		kbdev->csf.scheduler.csg_slots = NULL;
5682 	}
5683 }
5684 
kbase_csf_scheduler_early_term(struct kbase_device * kbdev)5685 void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
5686 {
5687 	if (kbdev->csf.scheduler.wq)
5688 		destroy_workqueue(kbdev->csf.scheduler.wq);
5689 }
5690 
5691 /**
5692  * scheduler_enable_tick_timer_nolock - Enable the scheduler tick timer.
5693  *
5694  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
5695  *
5696  * This function will restart the scheduler tick so that regular scheduling can
5697  * be resumed without any explicit trigger (like kicking of GPU queues). This
5698  * is a variant of kbase_csf_scheduler_enable_tick_timer() that assumes the
5699  * CSF scheduler lock to already have been held.
5700  */
scheduler_enable_tick_timer_nolock(struct kbase_device * kbdev)5701 static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
5702 {
5703 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5704 
5705 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
5706 
5707 	if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev)))
5708 		return;
5709 
5710 	WARN_ON((scheduler->state != SCHED_INACTIVE) &&
5711 		(scheduler->state != SCHED_SUSPENDED) &&
5712 		(scheduler->state != SCHED_SLEEPING));
5713 
5714 	if (scheduler->total_runnable_grps > 0) {
5715 		enqueue_tick_work(kbdev);
5716 		dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n");
5717 	} else if (scheduler->state != SCHED_SUSPENDED) {
5718 		queue_work(system_wq, &scheduler->gpu_idle_work);
5719 	}
5720 }
5721 
kbase_csf_scheduler_enable_tick_timer(struct kbase_device * kbdev)5722 void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev)
5723 {
5724 	mutex_lock(&kbdev->csf.scheduler.lock);
5725 	scheduler_enable_tick_timer_nolock(kbdev);
5726 	mutex_unlock(&kbdev->csf.scheduler.lock);
5727 }
5728 
kbase_csf_scheduler_timer_is_enabled(struct kbase_device * kbdev)5729 bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev)
5730 {
5731 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5732 	bool enabled;
5733 
5734 	mutex_lock(&scheduler->lock);
5735 	enabled = scheduler_timer_is_enabled_nolock(kbdev);
5736 	mutex_unlock(&scheduler->lock);
5737 
5738 	return enabled;
5739 }
5740 
kbase_csf_scheduler_timer_set_enabled(struct kbase_device * kbdev,bool enable)5741 void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
5742 		bool enable)
5743 {
5744 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5745 	bool currently_enabled;
5746 
5747 	mutex_lock(&scheduler->lock);
5748 
5749 	currently_enabled = scheduler_timer_is_enabled_nolock(kbdev);
5750 	if (currently_enabled && !enable) {
5751 		scheduler->timer_enabled = false;
5752 		cancel_tick_timer(kbdev);
5753 		cancel_delayed_work(&scheduler->tock_work);
5754 		scheduler->tock_pending_request = false;
5755 		mutex_unlock(&scheduler->lock);
5756 		/* The non-sync version to cancel the normal work item is not
5757 		 * available, so need to drop the lock before cancellation.
5758 		 */
5759 		cancel_work_sync(&scheduler->tick_work);
5760 		return;
5761 	}
5762 
5763 	if (!currently_enabled && enable) {
5764 		scheduler->timer_enabled = true;
5765 
5766 		scheduler_enable_tick_timer_nolock(kbdev);
5767 	}
5768 
5769 	mutex_unlock(&scheduler->lock);
5770 }
5771 
kbase_csf_scheduler_kick(struct kbase_device * kbdev)5772 void kbase_csf_scheduler_kick(struct kbase_device *kbdev)
5773 {
5774 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5775 
5776 	mutex_lock(&scheduler->lock);
5777 
5778 	if (unlikely(scheduler_timer_is_enabled_nolock(kbdev)))
5779 		goto out;
5780 
5781 	if (scheduler->total_runnable_grps > 0) {
5782 		enqueue_tick_work(kbdev);
5783 		dev_dbg(kbdev->dev, "Kicking the scheduler manually\n");
5784 	}
5785 
5786 out:
5787 	mutex_unlock(&scheduler->lock);
5788 }
5789 
kbase_csf_scheduler_pm_suspend(struct kbase_device * kbdev)5790 int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
5791 {
5792 	int result = 0;
5793 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5794 
5795 	/* Cancel any potential queued delayed work(s) */
5796 	cancel_work_sync(&scheduler->tick_work);
5797 	cancel_tock_work(scheduler);
5798 
5799 	result = kbase_reset_gpu_prevent_and_wait(kbdev);
5800 	if (result) {
5801 		dev_warn(kbdev->dev,
5802 			 "Stop PM suspending for failing to prevent gpu reset.\n");
5803 		return result;
5804 	}
5805 
5806 	mutex_lock(&scheduler->lock);
5807 
5808 	disable_gpu_idle_fw_timer(kbdev);
5809 
5810 #ifdef KBASE_PM_RUNTIME
5811 	/* If scheduler is in sleeping state, then MCU needs to be activated
5812 	 * to suspend CSGs.
5813 	 */
5814 	if (scheduler->state == SCHED_SLEEPING) {
5815 		dev_info(kbdev->dev, "Activating MCU out of sleep on system suspend");
5816 		result = force_scheduler_to_exit_sleep(kbdev);
5817 		if (result) {
5818 			dev_warn(kbdev->dev, "Scheduler failed to exit from sleep");
5819 			goto exit;
5820 		}
5821 	}
5822 #endif
5823 	if (scheduler->state != SCHED_SUSPENDED) {
5824 		result = suspend_active_groups_on_powerdown(kbdev, true);
5825 		if (result) {
5826 			dev_warn(kbdev->dev, "failed to suspend active groups");
5827 			goto exit;
5828 		} else {
5829 			dev_info(kbdev->dev, "Scheduler PM suspend");
5830 			scheduler_suspend(kbdev);
5831 			cancel_tick_timer(kbdev);
5832 		}
5833 	}
5834 
5835 exit:
5836 	mutex_unlock(&scheduler->lock);
5837 
5838 	kbase_reset_gpu_allow(kbdev);
5839 
5840 	return result;
5841 }
5842 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend);
5843 
kbase_csf_scheduler_pm_resume(struct kbase_device * kbdev)5844 void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
5845 {
5846 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5847 
5848 	mutex_lock(&scheduler->lock);
5849 	if ((scheduler->total_runnable_grps > 0) &&
5850 	    (scheduler->state == SCHED_SUSPENDED)) {
5851 		dev_info(kbdev->dev, "Scheduler PM resume");
5852 		scheduler_wakeup(kbdev, true);
5853 	}
5854 	mutex_unlock(&scheduler->lock);
5855 }
5856 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume);
5857 
kbase_csf_scheduler_pm_active(struct kbase_device * kbdev)5858 void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev)
5859 {
5860 	/* Here the lock is taken to synchronize against the runtime suspend
5861 	 * callback function, which may need to wake up the MCU for suspending
5862 	 * the CSGs before powering down the GPU.
5863 	 */
5864 	mutex_lock(&kbdev->csf.scheduler.lock);
5865 	scheduler_pm_active_handle_suspend(kbdev,
5866 			KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
5867 	mutex_unlock(&kbdev->csf.scheduler.lock);
5868 }
5869 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active);
5870 
kbase_csf_scheduler_pm_idle(struct kbase_device * kbdev)5871 void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
5872 {
5873 	/* Here the lock is taken just to maintain symmetry with
5874 	 * kbase_csf_scheduler_pm_active().
5875 	 */
5876 	mutex_lock(&kbdev->csf.scheduler.lock);
5877 	scheduler_pm_idle(kbdev);
5878 	mutex_unlock(&kbdev->csf.scheduler.lock);
5879 }
5880 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle);
5881 
kbase_csf_scheduler_wait_mcu_active(struct kbase_device * kbdev)5882 int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
5883 {
5884 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5885 	unsigned long flags;
5886 	int err;
5887 
5888 	kbase_pm_lock(kbdev);
5889 	WARN_ON(!kbdev->pm.active_count);
5890 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
5891 	WARN_ON(!scheduler->pm_active_count);
5892 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5893 	kbase_pm_unlock(kbdev);
5894 
5895 	kbase_pm_wait_for_poweroff_work_complete(kbdev);
5896 
5897 	err = kbase_pm_wait_for_desired_state(kbdev);
5898 	if (!err) {
5899 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
5900 		WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON);
5901 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5902 	}
5903 
5904 	return err;
5905 }
5906 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active);
5907 
5908 #ifdef KBASE_PM_RUNTIME
kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device * kbdev)5909 int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev)
5910 {
5911 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5912 	unsigned long flags;
5913 	int ret;
5914 
5915 	dev_dbg(kbdev->dev, "Handling runtime suspend");
5916 
5917 	kbase_reset_gpu_assert_prevented(kbdev);
5918 	lockdep_assert_held(&scheduler->lock);
5919 	WARN_ON(scheduler->pm_active_count);
5920 
5921 	if (scheduler->state == SCHED_SUSPENDED) {
5922 		WARN_ON(kbdev->pm.backend.gpu_sleep_mode_active);
5923 		return 0;
5924 	}
5925 
5926 	ret = suspend_active_groups_on_powerdown(kbdev, false);
5927 
5928 	if (ret) {
5929 		dev_dbg(kbdev->dev, "Aborting runtime suspend (grps: %d)",
5930 			 atomic_read(&scheduler->non_idle_offslot_grps));
5931 
5932 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
5933 		kbdev->pm.backend.exit_gpu_sleep_mode = true;
5934 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5935 
5936 		kbase_csf_scheduler_invoke_tick(kbdev);
5937 		return ret;
5938 	}
5939 
5940 	scheduler->state = SCHED_SUSPENDED;
5941 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
5942 	kbdev->pm.backend.gpu_sleep_mode_active = false;
5943 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5944 
5945 	wake_up_all(&kbdev->csf.event_wait);
5946 	return 0;
5947 }
5948 
kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device * kbdev)5949 void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev)
5950 {
5951 	u32 csg_nr;
5952 
5953 	lockdep_assert_held(&kbdev->hwaccess_lock);
5954 
5955 	WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP);
5956 
5957 	for (csg_nr = 0; csg_nr < kbdev->csf.global_iface.group_num; csg_nr++) {
5958 		struct kbase_csf_cmd_stream_group_info *ginfo =
5959 			&kbdev->csf.global_iface.groups[csg_nr];
5960 		bool csg_idle;
5961 
5962 		 if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
5963 			continue;
5964 
5965 		csg_idle =
5966 			kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
5967 			CSG_STATUS_STATE_IDLE_MASK;
5968 		if (!csg_idle) {
5969 			dev_dbg(kbdev->dev,
5970 				"Re-activate Scheduler after MCU sleep");
5971 			kbdev->pm.backend.exit_gpu_sleep_mode = true;
5972 			kbase_csf_scheduler_invoke_tick(kbdev);
5973 			break;
5974 		}
5975 	}
5976 }
5977 
kbase_csf_scheduler_force_sleep(struct kbase_device * kbdev)5978 void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev)
5979 {
5980 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5981 
5982 	mutex_lock(&scheduler->lock);
5983 	if (kbase_pm_gpu_sleep_allowed(kbdev) &&
5984 	    (scheduler->state == SCHED_INACTIVE))
5985 		scheduler_sleep_on_idle(kbdev);
5986 	mutex_unlock(&scheduler->lock);
5987 }
5988 #endif
5989 
kbase_csf_scheduler_force_wakeup(struct kbase_device * kbdev)5990 void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev)
5991 {
5992 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5993 
5994 	mutex_lock(&scheduler->lock);
5995 	scheduler_wakeup(kbdev, true);
5996 	mutex_unlock(&scheduler->lock);
5997 }
5998