1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3 *
4 * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 #include <mali_kbase.h>
23 #include "mali_kbase_config_defaults.h"
24 #include <mali_kbase_ctx_sched.h>
25 #include <mali_kbase_reset_gpu.h>
26 #include <mali_kbase_as_fault_debugfs.h>
27 #include "mali_kbase_csf.h"
28 #include <tl/mali_kbase_tracepoints.h>
29 #include <backend/gpu/mali_kbase_pm_internal.h>
30 #include <linux/export.h>
31 #include <uapi/gpu/arm/bifrost/csf/mali_gpu_csf_registers.h>
32 #include <uapi/gpu/arm/bifrost/mali_base_kernel.h>
33 #include <mali_kbase_hwaccess_time.h>
34
35 /* Value to indicate that a queue group is not groups_to_schedule list */
36 #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
37
38 /* This decides the upper limit on the waiting time for the Scheduler
39 * to exit the sleep state. Usually the value of autosuspend_delay is
40 * expected to be around 100 milli seconds.
41 */
42 #define MAX_AUTO_SUSPEND_DELAY_MS (5000)
43
44 /* Maximum number of endpoints which may run tiler jobs. */
45 #define CSG_TILER_MAX ((u8)1)
46
47 /* Maximum dynamic CSG slot priority value */
48 #define MAX_CSG_SLOT_PRIORITY ((u8)15)
49
50 /* CSF scheduler time slice value */
51 #define CSF_SCHEDULER_TIME_TICK_MS (100) /* 100 milliseconds */
52
53 /*
54 * CSF scheduler time threshold for converting "tock" requests into "tick" if
55 * they come too close to the end of a tick interval. This avoids scheduling
56 * twice in a row.
57 */
58 #define CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS \
59 CSF_SCHEDULER_TIME_TICK_MS
60
61 #define CSF_SCHEDULER_TIME_TICK_THRESHOLD_JIFFIES \
62 msecs_to_jiffies(CSF_SCHEDULER_TIME_TICK_THRESHOLD_MS)
63
64 /* Nanoseconds per millisecond */
65 #define NS_PER_MS ((u64)1000 * 1000)
66
67 /*
68 * CSF minimum time to reschedule for a new "tock" request. Bursts of "tock"
69 * requests are not serviced immediately, but shall wait for a minimum time in
70 * order to reduce load on the CSF scheduler thread.
71 */
72 #define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */
73
74 /* CS suspended and is idle (empty ring buffer) */
75 #define CS_IDLE_FLAG (1 << 0)
76
77 /* CS suspended and is wait for a CQS condition */
78 #define CS_WAIT_SYNC_FLAG (1 << 1)
79
80 /* A GPU address space slot is reserved for MCU. */
81 #define NUM_RESERVED_AS_SLOTS (1)
82
83 static int scheduler_group_schedule(struct kbase_queue_group *group);
84 static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
85 static
86 void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
87 struct kbase_queue_group *const group,
88 enum kbase_csf_group_state run_state);
89 static struct kbase_queue_group *scheduler_get_protm_enter_async_group(
90 struct kbase_device *const kbdev,
91 struct kbase_queue_group *const group);
92 static struct kbase_queue_group *get_tock_top_group(
93 struct kbase_csf_scheduler *const scheduler);
94 static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev);
95 static int suspend_active_queue_groups(struct kbase_device *kbdev,
96 unsigned long *slot_mask);
97 static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
98 bool system_suspend);
99 static void schedule_in_cycle(struct kbase_queue_group *group, bool force);
100
101 #define kctx_as_enabled(kctx) (!kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
102
103 #ifdef KBASE_PM_RUNTIME
104 /**
105 * wait_for_scheduler_to_exit_sleep() - Wait for Scheduler to exit the
106 * sleeping state.
107 *
108 * @kbdev: Pointer to the device
109 *
110 * This function waits until the Scheduler has exited the sleep state and
111 * it is called when an on-slot group is terminated or when the suspend
112 * buffer of an on-slot group needs to be captured.
113 *
114 * Return: 0 when the wait is successful, otherwise an error code.
115 */
wait_for_scheduler_to_exit_sleep(struct kbase_device * kbdev)116 static int wait_for_scheduler_to_exit_sleep(struct kbase_device *kbdev)
117 {
118 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
119 int autosuspend_delay = kbdev->dev->power.autosuspend_delay;
120 unsigned int sleep_exit_wait_time;
121 long remaining;
122 int ret = 0;
123
124 lockdep_assert_held(&scheduler->lock);
125 WARN_ON(scheduler->state != SCHED_SLEEPING);
126
127 /* No point in waiting if autosuspend_delay value is negative.
128 * For the negative value of autosuspend_delay Driver will directly
129 * go for the suspend of Scheduler, but the autosuspend_delay value
130 * could have been changed after the sleep was initiated.
131 */
132 if (autosuspend_delay < 0)
133 return -EINVAL;
134
135 if (autosuspend_delay > MAX_AUTO_SUSPEND_DELAY_MS)
136 autosuspend_delay = MAX_AUTO_SUSPEND_DELAY_MS;
137
138 /* Usually Scheduler would remain in sleeping state until the
139 * auto-suspend timer expires and all active CSGs are suspended.
140 */
141 sleep_exit_wait_time = autosuspend_delay + kbdev->reset_timeout_ms;
142
143 remaining = kbase_csf_timeout_in_jiffies(sleep_exit_wait_time);
144
145 while ((scheduler->state == SCHED_SLEEPING) && !ret) {
146 mutex_unlock(&scheduler->lock);
147 remaining = wait_event_timeout(
148 kbdev->csf.event_wait,
149 (scheduler->state != SCHED_SLEEPING),
150 remaining);
151 mutex_lock(&scheduler->lock);
152 if (!remaining && (scheduler->state == SCHED_SLEEPING))
153 ret = -ETIMEDOUT;
154 }
155
156 return ret;
157 }
158
159 /**
160 * force_scheduler_to_exit_sleep() - Force scheduler to exit sleep state
161 *
162 * @kbdev: Pointer to the device
163 *
164 * This function will force the Scheduler to exit the sleep state by doing the
165 * wake up of MCU and suspension of on-slot groups. It is called at the time of
166 * system suspend.
167 *
168 * Return: 0 on success.
169 */
force_scheduler_to_exit_sleep(struct kbase_device * kbdev)170 static int force_scheduler_to_exit_sleep(struct kbase_device *kbdev)
171 {
172 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
173 unsigned long flags;
174 int ret = 0;
175
176 lockdep_assert_held(&scheduler->lock);
177 WARN_ON(scheduler->state != SCHED_SLEEPING);
178 WARN_ON(!kbdev->pm.backend.gpu_sleep_mode_active);
179
180 kbase_pm_lock(kbdev);
181 ret = kbase_pm_force_mcu_wakeup_after_sleep(kbdev);
182 kbase_pm_unlock(kbdev);
183 if (ret) {
184 dev_warn(kbdev->dev,
185 "[%llu] Wait for MCU wake up failed on forced scheduler suspend",
186 kbase_backend_get_cycle_cnt(kbdev));
187 goto out;
188 }
189
190 if (suspend_active_groups_on_powerdown(kbdev, true))
191 goto out;
192
193 kbase_pm_lock(kbdev);
194 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
195 kbdev->pm.backend.gpu_sleep_mode_active = false;
196 kbdev->pm.backend.gpu_wakeup_override = false;
197 kbase_pm_update_state(kbdev);
198 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
199 ret = kbase_pm_wait_for_desired_state(kbdev);
200 kbase_pm_unlock(kbdev);
201 if (ret) {
202 dev_warn(kbdev->dev,
203 "[%llu] Wait for pm state change failed on forced scheduler suspend",
204 kbase_backend_get_cycle_cnt(kbdev));
205 goto out;
206 }
207
208 scheduler->state = SCHED_SUSPENDED;
209
210 return 0;
211
212 out:
213 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
214 kbdev->pm.backend.exit_gpu_sleep_mode = true;
215 kbdev->pm.backend.gpu_wakeup_override = false;
216 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
217 kbase_csf_scheduler_invoke_tick(kbdev);
218
219 return ret;
220 }
221 #endif
222
223 /**
224 * tick_timer_callback() - Callback function for the scheduling tick hrtimer
225 *
226 * @timer: Pointer to the scheduling tick hrtimer
227 *
228 * This function will enqueue the scheduling tick work item for immediate
229 * execution, if it has not been queued already.
230 *
231 * Return: enum value to indicate that timer should not be restarted.
232 */
tick_timer_callback(struct hrtimer * timer)233 static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
234 {
235 struct kbase_device *kbdev = container_of(timer, struct kbase_device,
236 csf.scheduler.tick_timer);
237
238 kbase_csf_scheduler_advance_tick(kbdev);
239 return HRTIMER_NORESTART;
240 }
241
242 /**
243 * start_tick_timer() - Start the scheduling tick hrtimer.
244 *
245 * @kbdev: Pointer to the device
246 *
247 * This function will start the scheduling tick hrtimer and is supposed to
248 * be called only from the tick work item function. The tick hrtimer should
249 * should not be active already.
250 */
start_tick_timer(struct kbase_device * kbdev)251 static void start_tick_timer(struct kbase_device *kbdev)
252 {
253 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
254 unsigned long flags;
255
256 lockdep_assert_held(&scheduler->lock);
257
258 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
259 WARN_ON(scheduler->tick_timer_active);
260 if (likely(!work_pending(&scheduler->tick_work))) {
261 scheduler->tick_timer_active = true;
262
263 hrtimer_start(&scheduler->tick_timer,
264 HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms),
265 HRTIMER_MODE_REL);
266 }
267 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
268 }
269
270 /**
271 * cancel_tick_timer() - Cancel the scheduling tick hrtimer
272 *
273 * @kbdev: Pointer to the device
274 */
cancel_tick_timer(struct kbase_device * kbdev)275 static void cancel_tick_timer(struct kbase_device *kbdev)
276 {
277 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
278 unsigned long flags;
279
280 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
281 scheduler->tick_timer_active = false;
282 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
283 hrtimer_cancel(&scheduler->tick_timer);
284 }
285
286 /**
287 * enqueue_tick_work() - Enqueue the scheduling tick work item
288 *
289 * @kbdev: Pointer to the device
290 *
291 * This function will queue the scheduling tick work item for immediate
292 * execution. This shall only be called when both the tick hrtimer and tick
293 * work item are not active/pending.
294 */
enqueue_tick_work(struct kbase_device * kbdev)295 static void enqueue_tick_work(struct kbase_device *kbdev)
296 {
297 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
298
299 lockdep_assert_held(&scheduler->lock);
300
301 kbase_csf_scheduler_invoke_tick(kbdev);
302 }
303
release_doorbell(struct kbase_device * kbdev,int doorbell_nr)304 static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr)
305 {
306 WARN_ON(doorbell_nr >= CSF_NUM_DOORBELL);
307
308 lockdep_assert_held(&kbdev->csf.scheduler.lock);
309 clear_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap);
310 }
311
acquire_doorbell(struct kbase_device * kbdev)312 static int acquire_doorbell(struct kbase_device *kbdev)
313 {
314 int doorbell_nr;
315
316 lockdep_assert_held(&kbdev->csf.scheduler.lock);
317
318 doorbell_nr = find_first_zero_bit(
319 kbdev->csf.scheduler.doorbell_inuse_bitmap,
320 CSF_NUM_DOORBELL);
321
322 if (doorbell_nr >= CSF_NUM_DOORBELL)
323 return KBASEP_USER_DB_NR_INVALID;
324
325 set_bit(doorbell_nr, kbdev->csf.scheduler.doorbell_inuse_bitmap);
326
327 return doorbell_nr;
328 }
329
unassign_user_doorbell_from_group(struct kbase_device * kbdev,struct kbase_queue_group * group)330 static void unassign_user_doorbell_from_group(struct kbase_device *kbdev,
331 struct kbase_queue_group *group)
332 {
333 lockdep_assert_held(&kbdev->csf.scheduler.lock);
334
335 if (group->doorbell_nr != KBASEP_USER_DB_NR_INVALID) {
336 release_doorbell(kbdev, group->doorbell_nr);
337 group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
338 }
339 }
340
unassign_user_doorbell_from_queue(struct kbase_device * kbdev,struct kbase_queue * queue)341 static void unassign_user_doorbell_from_queue(struct kbase_device *kbdev,
342 struct kbase_queue *queue)
343 {
344 lockdep_assert_held(&kbdev->csf.scheduler.lock);
345
346 mutex_lock(&kbdev->csf.reg_lock);
347
348 if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID) {
349 queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
350 /* After this the dummy page would be mapped in */
351 unmap_mapping_range(kbdev->csf.db_filp->f_inode->i_mapping,
352 queue->db_file_offset << PAGE_SHIFT, PAGE_SIZE, 1);
353 }
354
355 mutex_unlock(&kbdev->csf.reg_lock);
356 }
357
assign_user_doorbell_to_group(struct kbase_device * kbdev,struct kbase_queue_group * group)358 static void assign_user_doorbell_to_group(struct kbase_device *kbdev,
359 struct kbase_queue_group *group)
360 {
361 lockdep_assert_held(&kbdev->csf.scheduler.lock);
362
363 if (group->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
364 group->doorbell_nr = acquire_doorbell(kbdev);
365 }
366
assign_user_doorbell_to_queue(struct kbase_device * kbdev,struct kbase_queue * const queue)367 static void assign_user_doorbell_to_queue(struct kbase_device *kbdev,
368 struct kbase_queue *const queue)
369 {
370 lockdep_assert_held(&kbdev->csf.scheduler.lock);
371
372 mutex_lock(&kbdev->csf.reg_lock);
373
374 /* If bind operation for the queue hasn't completed yet, then the
375 * the CSI can't be programmed for the queue
376 * (even in stopped state) and so the doorbell also can't be assigned
377 * to it.
378 */
379 if ((queue->bind_state == KBASE_CSF_QUEUE_BOUND) &&
380 (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)) {
381 WARN_ON(queue->group->doorbell_nr == KBASEP_USER_DB_NR_INVALID);
382 queue->doorbell_nr = queue->group->doorbell_nr;
383
384 /* After this the real Hw doorbell page would be mapped in */
385 unmap_mapping_range(
386 kbdev->csf.db_filp->f_inode->i_mapping,
387 queue->db_file_offset << PAGE_SHIFT,
388 PAGE_SIZE, 1);
389 }
390
391 mutex_unlock(&kbdev->csf.reg_lock);
392 }
393
scheduler_doorbell_init(struct kbase_device * kbdev)394 static void scheduler_doorbell_init(struct kbase_device *kbdev)
395 {
396 int doorbell_nr;
397
398 bitmap_zero(kbdev->csf.scheduler.doorbell_inuse_bitmap,
399 CSF_NUM_DOORBELL);
400
401 mutex_lock(&kbdev->csf.scheduler.lock);
402 /* Reserve doorbell 0 for use by kernel driver */
403 doorbell_nr = acquire_doorbell(kbdev);
404 mutex_unlock(&kbdev->csf.scheduler.lock);
405
406 WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR);
407 }
408
kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device * kbdev)409 u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev)
410 {
411 u32 nr_active_csgs;
412
413 lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
414
415 nr_active_csgs = bitmap_weight(kbdev->csf.scheduler.csg_inuse_bitmap,
416 kbdev->csf.global_iface.group_num);
417
418 return nr_active_csgs;
419 }
420
kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device * kbdev)421 u32 kbase_csf_scheduler_get_nr_active_csgs(struct kbase_device *kbdev)
422 {
423 u32 nr_active_csgs;
424 unsigned long flags;
425
426 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
427 nr_active_csgs = kbase_csf_scheduler_get_nr_active_csgs_locked(kbdev);
428 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
429
430 return nr_active_csgs;
431 }
432
433 /**
434 * csg_slot_in_use - returns true if a queue group has been programmed on a
435 * given CSG slot.
436 *
437 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
438 * @slot: Index/number of the CSG slot in question.
439 *
440 * Return: the interface is actively engaged flag.
441 *
442 * Note: Caller must hold the scheduler lock.
443 */
csg_slot_in_use(struct kbase_device * kbdev,int slot)444 static inline bool csg_slot_in_use(struct kbase_device *kbdev, int slot)
445 {
446 lockdep_assert_held(&kbdev->csf.scheduler.lock);
447
448 return (kbdev->csf.scheduler.csg_slots[slot].resident_group != NULL);
449 }
450
queue_group_suspended_locked(struct kbase_queue_group * group)451 static bool queue_group_suspended_locked(struct kbase_queue_group *group)
452 {
453 lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
454
455 return (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
456 group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE ||
457 group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
458 }
459
queue_group_idle_locked(struct kbase_queue_group * group)460 static bool queue_group_idle_locked(struct kbase_queue_group *group)
461 {
462 lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
463
464 return (group->run_state == KBASE_CSF_GROUP_IDLE ||
465 group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE);
466 }
467
on_slot_group_idle_locked(struct kbase_queue_group * group)468 static bool on_slot_group_idle_locked(struct kbase_queue_group *group)
469 {
470 lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
471
472 return (group->run_state == KBASE_CSF_GROUP_IDLE);
473 }
474
queue_group_scheduled(struct kbase_queue_group * group)475 static bool queue_group_scheduled(struct kbase_queue_group *group)
476 {
477 return (group->run_state != KBASE_CSF_GROUP_INACTIVE &&
478 group->run_state != KBASE_CSF_GROUP_TERMINATED &&
479 group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED);
480 }
481
queue_group_scheduled_locked(struct kbase_queue_group * group)482 static bool queue_group_scheduled_locked(struct kbase_queue_group *group)
483 {
484 lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
485
486 return queue_group_scheduled(group);
487 }
488
489 /**
490 * scheduler_wait_protm_quit() - Wait for GPU to exit protected mode.
491 *
492 * @kbdev: Pointer to the GPU device
493 *
494 * This function waits for the GPU to exit protected mode which is confirmed
495 * when active_protm_grp is set to NULL.
496 */
scheduler_wait_protm_quit(struct kbase_device * kbdev)497 static void scheduler_wait_protm_quit(struct kbase_device *kbdev)
498 {
499 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
500 long wt = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
501 long remaining;
502
503 lockdep_assert_held(&scheduler->lock);
504
505 KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT, NULL,
506 jiffies_to_msecs(wt));
507
508 remaining = wait_event_timeout(kbdev->csf.event_wait,
509 !kbase_csf_scheduler_protected_mode_in_use(kbdev), wt);
510
511 if (!remaining)
512 dev_warn(kbdev->dev, "[%llu] Timeout (%d ms), protm_quit wait skipped",
513 kbase_backend_get_cycle_cnt(kbdev),
514 kbdev->csf.fw_timeout_ms);
515
516 KBASE_KTRACE_ADD(kbdev, SCHEDULER_WAIT_PROTM_QUIT_DONE, NULL,
517 jiffies_to_msecs(remaining));
518 }
519
520 /**
521 * scheduler_force_protm_exit() - Force GPU to exit protected mode.
522 *
523 * @kbdev: Pointer to the GPU device
524 *
525 * This function sends a ping request to the firmware and waits for the GPU
526 * to exit protected mode.
527 */
scheduler_force_protm_exit(struct kbase_device * kbdev)528 static void scheduler_force_protm_exit(struct kbase_device *kbdev)
529 {
530 lockdep_assert_held(&kbdev->csf.scheduler.lock);
531
532 kbase_csf_firmware_ping(kbdev);
533 scheduler_wait_protm_quit(kbdev);
534 }
535
536 /**
537 * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up
538 * automatically for periodic tasks.
539 *
540 * @kbdev: Pointer to the device
541 *
542 * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the
543 * CSF scheduler lock to already have been held.
544 *
545 * Return: true if the scheduler is configured to wake up periodically
546 */
scheduler_timer_is_enabled_nolock(struct kbase_device * kbdev)547 static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev)
548 {
549 lockdep_assert_held(&kbdev->csf.scheduler.lock);
550
551 return kbdev->csf.scheduler.timer_enabled;
552 }
553
enable_gpu_idle_fw_timer(struct kbase_device * kbdev)554 static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev)
555 {
556 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
557 unsigned long flags;
558
559 lockdep_assert_held(&scheduler->lock);
560
561 if (scheduler->gpu_idle_fw_timer_enabled)
562 return;
563
564 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
565
566 /* Update the timer_enabled flag requires holding interrupt_lock */
567 scheduler->gpu_idle_fw_timer_enabled = true;
568 kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
569
570 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
571 }
572
disable_gpu_idle_fw_timer_locked(struct kbase_device * kbdev)573 static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev)
574 {
575 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
576
577 lockdep_assert_held(&scheduler->lock);
578 lockdep_assert_held(&scheduler->interrupt_lock);
579
580 /* Update of the timer_enabled flag requires holding interrupt_lock */
581 if (scheduler->gpu_idle_fw_timer_enabled) {
582 scheduler->gpu_idle_fw_timer_enabled = false;
583 kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
584 }
585 }
586
disable_gpu_idle_fw_timer(struct kbase_device * kbdev)587 static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev)
588 {
589 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
590 unsigned long flags;
591
592 lockdep_assert_held(&scheduler->lock);
593
594 if (!scheduler->gpu_idle_fw_timer_enabled)
595 return;
596
597 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
598 disable_gpu_idle_fw_timer_locked(kbdev);
599 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
600 }
601
602 /**
603 * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
604 * Scheduler
605 *
606 * @kbdev: Pointer to the device
607 * @suspend_handler: Handler code for how to handle a suspend that might occur.
608 *
609 * This function is usually called when Scheduler needs to be activated.
610 * The PM reference count is acquired for the Scheduler and the power on
611 * of GPU is initiated.
612 *
613 * Return: 0 if successful or a negative error code on failure.
614 */
scheduler_pm_active_handle_suspend(struct kbase_device * kbdev,enum kbase_pm_suspend_handler suspend_handler)615 static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
616 enum kbase_pm_suspend_handler suspend_handler)
617 {
618 unsigned long flags;
619 u32 prev_count;
620 int ret = 0;
621
622 lockdep_assert_held(&kbdev->csf.scheduler.lock);
623
624 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
625 prev_count = kbdev->csf.scheduler.pm_active_count;
626 if (!WARN_ON(prev_count == U32_MAX))
627 kbdev->csf.scheduler.pm_active_count++;
628 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
629
630 /* On 0 => 1, make a pm_ctx_active request */
631 if (!prev_count) {
632 ret = kbase_pm_context_active_handle_suspend(kbdev,
633 suspend_handler);
634 if (ret) {
635 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
636 kbdev->csf.scheduler.pm_active_count--;
637 kbase_pm_update_state(kbdev);
638 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
639 }
640 }
641
642 return ret;
643 }
644
645 #ifdef KBASE_PM_RUNTIME
646 /**
647 * scheduler_pm_active_after_sleep() - Acquire the PM reference count for
648 * Scheduler
649 *
650 * @kbdev: Pointer to the device
651 * @flags: flags containing previous interrupt state
652 *
653 * This function is called when Scheduler needs to be activated from the
654 * sleeping state.
655 * The PM reference count is acquired for the Scheduler and the wake up of
656 * MCU is initiated. It resets the flag that indicates to the MCU state
657 * machine that MCU needs to be put in sleep state.
658 *
659 * Note: This function shall be called with hwaccess lock held and it will
660 * release that lock.
661 *
662 * Return: zero when the PM reference was taken and non-zero when the
663 * system is being suspending/suspended.
664 */
scheduler_pm_active_after_sleep(struct kbase_device * kbdev,unsigned long flags)665 static int scheduler_pm_active_after_sleep(struct kbase_device *kbdev,
666 unsigned long flags)
667 {
668 u32 prev_count;
669 int ret = 0;
670
671 lockdep_assert_held(&kbdev->csf.scheduler.lock);
672 lockdep_assert_held(&kbdev->hwaccess_lock);
673
674 prev_count = kbdev->csf.scheduler.pm_active_count;
675 if (!WARN_ON(prev_count == U32_MAX))
676 kbdev->csf.scheduler.pm_active_count++;
677 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
678
679 /* On 0 => 1, make a pm_ctx_active request */
680 if (!prev_count) {
681 ret = kbase_pm_context_active_handle_suspend(kbdev,
682 KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
683
684 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
685 if (ret)
686 kbdev->csf.scheduler.pm_active_count--;
687 else
688 kbdev->pm.backend.gpu_sleep_mode_active = false;
689 kbase_pm_update_state(kbdev);
690 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
691 }
692
693 return ret;
694 }
695 #endif
696
697 /**
698 * scheduler_pm_idle() - Release the PM reference count held by Scheduler
699 *
700 * @kbdev: Pointer to the device
701 *
702 * This function is usually called after Scheduler is suspended.
703 * The PM reference count held by the Scheduler is released to trigger the
704 * power down of GPU.
705 */
scheduler_pm_idle(struct kbase_device * kbdev)706 static void scheduler_pm_idle(struct kbase_device *kbdev)
707 {
708 unsigned long flags;
709 u32 prev_count;
710
711 lockdep_assert_held(&kbdev->csf.scheduler.lock);
712
713 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
714 prev_count = kbdev->csf.scheduler.pm_active_count;
715 if (!WARN_ON(prev_count == 0))
716 kbdev->csf.scheduler.pm_active_count--;
717 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
718
719 if (prev_count == 1)
720 kbase_pm_context_idle(kbdev);
721 }
722
723 #ifdef KBASE_PM_RUNTIME
724 /**
725 * scheduler_pm_idle_before_sleep() - Release the PM reference count and
726 * trigger the tranistion to sleep state.
727 *
728 * @kbdev: Pointer to the device
729 *
730 * This function is called on the GPU idle notification. It releases the
731 * Scheduler's PM reference count and sets the flag to indicate to the
732 * MCU state machine that MCU needs to be put in sleep state.
733 */
scheduler_pm_idle_before_sleep(struct kbase_device * kbdev)734 static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
735 {
736 unsigned long flags;
737 u32 prev_count;
738
739 lockdep_assert_held(&kbdev->csf.scheduler.lock);
740
741 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
742 prev_count = kbdev->csf.scheduler.pm_active_count;
743 if (!WARN_ON(prev_count == 0))
744 kbdev->csf.scheduler.pm_active_count--;
745 kbdev->pm.backend.gpu_sleep_mode_active = true;
746 kbdev->pm.backend.exit_gpu_sleep_mode = false;
747 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
748
749 if (prev_count == 1)
750 kbase_pm_context_idle(kbdev);
751 }
752 #endif
753
scheduler_wakeup(struct kbase_device * kbdev,bool kick)754 static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
755 {
756 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
757 int ret;
758
759 lockdep_assert_held(&scheduler->lock);
760
761 if ((scheduler->state != SCHED_SUSPENDED) &&
762 (scheduler->state != SCHED_SLEEPING))
763 return;
764
765 if (scheduler->state == SCHED_SUSPENDED) {
766 dev_dbg(kbdev->dev,
767 "Re-activating the Scheduler after suspend");
768 ret = scheduler_pm_active_handle_suspend(kbdev,
769 KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
770 } else {
771 #ifdef KBASE_PM_RUNTIME
772 unsigned long flags;
773
774 dev_dbg(kbdev->dev,
775 "Re-activating the Scheduler out of sleep");
776
777 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
778 ret = scheduler_pm_active_after_sleep(kbdev, flags);
779 /* hwaccess_lock is released in the previous function call. */
780 #endif
781 }
782
783 if (ret) {
784 /* GPUCORE-29850 would add the handling for the case where
785 * Scheduler could not be activated due to system suspend.
786 */
787 dev_info(kbdev->dev,
788 "Couldn't wakeup Scheduler due to system suspend");
789 return;
790 }
791
792 scheduler->state = SCHED_INACTIVE;
793
794 if (kick)
795 scheduler_enable_tick_timer_nolock(kbdev);
796 }
797
scheduler_suspend(struct kbase_device * kbdev)798 static void scheduler_suspend(struct kbase_device *kbdev)
799 {
800 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
801
802 lockdep_assert_held(&scheduler->lock);
803
804 if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) {
805 dev_dbg(kbdev->dev, "Suspending the Scheduler");
806 scheduler_pm_idle(kbdev);
807 scheduler->state = SCHED_SUSPENDED;
808 }
809 }
810
811 /**
812 * update_idle_suspended_group_state() - Move the queue group to a non-idle
813 * suspended state.
814 * @group: Pointer to the queue group.
815 *
816 * This function is called to change the state of queue group to non-idle
817 * suspended state, if the group was suspended when all the queues bound to it
818 * became empty or when some queues got blocked on a sync wait & others became
819 * empty. The group is also moved to the runnable list from idle wait list in
820 * the latter case.
821 * So the function gets called when a queue is kicked or sync wait condition
822 * gets satisfied.
823 */
update_idle_suspended_group_state(struct kbase_queue_group * group)824 static void update_idle_suspended_group_state(struct kbase_queue_group *group)
825 {
826 struct kbase_csf_scheduler *scheduler =
827 &group->kctx->kbdev->csf.scheduler;
828 int new_val;
829
830 lockdep_assert_held(&scheduler->lock);
831
832 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC) {
833 remove_group_from_idle_wait(group);
834 insert_group_to_runnable(scheduler, group,
835 KBASE_CSF_GROUP_SUSPENDED);
836 } else if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE) {
837 group->run_state = KBASE_CSF_GROUP_SUSPENDED;
838
839 /* If scheduler is not suspended and the given group's
840 * static priority (reflected by the scan_seq_num) is inside
841 * the current tick slot-range, or there are some on_slot
842 * idle groups, schedule an async tock.
843 */
844 if (scheduler->state != SCHED_SUSPENDED) {
845 unsigned long flags;
846 int n_idle;
847 int n_used;
848 int n_slots =
849 group->kctx->kbdev->csf.global_iface.group_num;
850
851 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
852 n_idle = bitmap_weight(scheduler->csg_slots_idle_mask,
853 n_slots);
854 n_used = bitmap_weight(scheduler->csg_inuse_bitmap,
855 n_slots);
856 spin_unlock_irqrestore(&scheduler->interrupt_lock,
857 flags);
858
859 if (n_idle ||
860 n_used < scheduler->num_csg_slots_for_tick ||
861 group->scan_seq_num <
862 scheduler->num_csg_slots_for_tick)
863 schedule_in_cycle(group, true);
864 }
865 } else
866 return;
867
868 new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
869 KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
870 group, new_val);
871 }
872
kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group * group)873 int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group)
874 {
875 struct kbase_csf_scheduler *scheduler =
876 &group->kctx->kbdev->csf.scheduler;
877 int slot_num = group->csg_nr;
878
879 lockdep_assert_held(&scheduler->interrupt_lock);
880
881 if (slot_num >= 0) {
882 if (WARN_ON(scheduler->csg_slots[slot_num].resident_group !=
883 group))
884 return -1;
885 }
886
887 return slot_num;
888 }
889
kbase_csf_scheduler_group_get_slot(struct kbase_queue_group * group)890 int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group)
891 {
892 struct kbase_csf_scheduler *scheduler =
893 &group->kctx->kbdev->csf.scheduler;
894 unsigned long flags;
895 int slot_num;
896
897 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
898 slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
899 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
900
901 return slot_num;
902 }
903
904 /* kbasep_csf_scheduler_group_is_on_slot_locked() - Check if CSG is on slot.
905 *
906 * @group: GPU queue group to be checked
907 *
908 * This function needs to be called with scheduler's lock held
909 *
910 * Return: true if @group is on slot.
911 */
kbasep_csf_scheduler_group_is_on_slot_locked(struct kbase_queue_group * group)912 static bool kbasep_csf_scheduler_group_is_on_slot_locked(
913 struct kbase_queue_group *group)
914 {
915 struct kbase_csf_scheduler *scheduler =
916 &group->kctx->kbdev->csf.scheduler;
917 int slot_num = group->csg_nr;
918
919 lockdep_assert_held(&scheduler->lock);
920
921 if (slot_num >= 0) {
922 if (!WARN_ON(scheduler->csg_slots[slot_num].resident_group !=
923 group))
924 return true;
925 }
926
927 return false;
928 }
929
kbase_csf_scheduler_group_events_enabled(struct kbase_device * kbdev,struct kbase_queue_group * group)930 bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev,
931 struct kbase_queue_group *group)
932 {
933 struct kbase_csf_scheduler *scheduler =
934 &group->kctx->kbdev->csf.scheduler;
935 int slot_num = group->csg_nr;
936
937 lockdep_assert_held(&scheduler->interrupt_lock);
938
939 if (WARN_ON(slot_num < 0))
940 return false;
941
942 return test_bit(slot_num, scheduler->csgs_events_enable_mask);
943 }
944
kbase_csf_scheduler_get_group_on_slot(struct kbase_device * kbdev,int slot)945 struct kbase_queue_group *kbase_csf_scheduler_get_group_on_slot(
946 struct kbase_device *kbdev, int slot)
947 {
948 lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
949
950 return kbdev->csf.scheduler.csg_slots[slot].resident_group;
951 }
952
halt_stream_sync(struct kbase_queue * queue)953 static int halt_stream_sync(struct kbase_queue *queue)
954 {
955 struct kbase_queue_group *group = queue->group;
956 struct kbase_device *kbdev = queue->kctx->kbdev;
957 struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
958 struct kbase_csf_cmd_stream_group_info *ginfo;
959 struct kbase_csf_cmd_stream_info *stream;
960 int csi_index = queue->csi_index;
961 long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
962
963 if (WARN_ON(!group) ||
964 WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
965 return -EINVAL;
966
967 lockdep_assert_held(&kbdev->csf.scheduler.lock);
968 ginfo = &global_iface->groups[group->csg_nr];
969 stream = &ginfo->streams[csi_index];
970
971 if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) ==
972 CS_REQ_STATE_START) {
973
974 remaining = wait_event_timeout(kbdev->csf.event_wait,
975 (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
976 == CS_ACK_STATE_START), remaining);
977
978 if (!remaining) {
979 dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to start on csi %d bound to group %d on slot %d",
980 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
981 csi_index, group->handle, group->csg_nr);
982 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
983 kbase_reset_gpu(kbdev);
984
985
986 return -ETIMEDOUT;
987 }
988
989 remaining =
990 kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
991 }
992
993 /* Set state to STOP */
994 kbase_csf_firmware_cs_input_mask(stream, CS_REQ, CS_REQ_STATE_STOP,
995 CS_REQ_STATE_MASK);
996
997 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQUESTED, group, queue, 0u);
998 kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr, true);
999
1000 /* Timed wait */
1001 remaining = wait_event_timeout(kbdev->csf.event_wait,
1002 (CS_ACK_STATE_GET(kbase_csf_firmware_cs_output(stream, CS_ACK))
1003 == CS_ACK_STATE_STOP), remaining);
1004
1005 if (!remaining) {
1006 dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for queue to stop on csi %d bound to group %d on slot %d",
1007 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
1008 queue->csi_index, group->handle, group->csg_nr);
1009
1010 /* TODO GPUCORE-25328: The CSG can't be terminated, the GPU
1011 * will be reset as a work-around.
1012 */
1013 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
1014 kbase_reset_gpu(kbdev);
1015
1016
1017 }
1018 return (remaining) ? 0 : -ETIMEDOUT;
1019 }
1020
can_halt_stream(struct kbase_device * kbdev,struct kbase_queue_group * group)1021 static bool can_halt_stream(struct kbase_device *kbdev,
1022 struct kbase_queue_group *group)
1023 {
1024 struct kbase_csf_csg_slot *const csg_slot =
1025 kbdev->csf.scheduler.csg_slots;
1026 unsigned long flags;
1027 bool can_halt;
1028 int slot;
1029
1030 if (!queue_group_scheduled(group))
1031 return true;
1032
1033 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
1034 slot = kbase_csf_scheduler_group_get_slot_locked(group);
1035 can_halt = (slot >= 0) &&
1036 (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING);
1037 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock,
1038 flags);
1039
1040 return can_halt;
1041 }
1042
1043 /**
1044 * sched_halt_stream() - Stop a GPU queue when its queue group is not running
1045 * on a CSG slot.
1046 * @queue: Pointer to the GPU queue to stop.
1047 *
1048 * This function handles stopping gpu queues for groups that are either not on
1049 * a CSG slot or are on the slot but undergoing transition to
1050 * resume or suspend states.
1051 * It waits until the queue group is scheduled on a slot and starts running,
1052 * which is needed as groups that were suspended may need to resume all queues
1053 * that were enabled and running at the time of suspension.
1054 *
1055 * Return: 0 on success, or negative on failure.
1056 */
sched_halt_stream(struct kbase_queue * queue)1057 static int sched_halt_stream(struct kbase_queue *queue)
1058 {
1059 struct kbase_queue_group *group = queue->group;
1060 struct kbase_device *kbdev = queue->kctx->kbdev;
1061 struct kbase_csf_scheduler *const scheduler =
1062 &kbdev->csf.scheduler;
1063 struct kbase_csf_csg_slot *const csg_slot =
1064 kbdev->csf.scheduler.csg_slots;
1065 bool retry_needed = false;
1066 bool retried = false;
1067 long remaining;
1068 int slot;
1069 int err = 0;
1070 const u32 group_schedule_timeout =
1071 20 * kbdev->csf.scheduler.csg_scheduling_period_ms;
1072
1073 if (WARN_ON(!group))
1074 return -EINVAL;
1075
1076 lockdep_assert_held(&queue->kctx->csf.lock);
1077 lockdep_assert_held(&scheduler->lock);
1078
1079 slot = kbase_csf_scheduler_group_get_slot(group);
1080
1081 if (slot >= 0) {
1082 WARN_ON(atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING);
1083
1084 if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) {
1085 dev_dbg(kbdev->dev, "Stopping a queue on csi %d when Group-%d is in under transition to running state",
1086 queue->csi_index, group->handle);
1087 retry_needed = true;
1088 }
1089 }
1090 retry:
1091 /* Update the group state so that it can get scheduled soon */
1092 update_idle_suspended_group_state(group);
1093
1094 mutex_unlock(&scheduler->lock);
1095
1096 /* This function is called when the queue group is either not on a CSG
1097 * slot or is on the slot but undergoing transition.
1098 *
1099 * To stop the queue, the function needs to wait either for the queue
1100 * group to be assigned a CSG slot (and that slot has to reach the
1101 * running state) or for the eviction of the queue group from the
1102 * scheduler's list.
1103 *
1104 * In order to evaluate the latter condition, the function doesn't
1105 * really need to lock the scheduler, as any update to the run_state
1106 * of the queue group by sched_evict_group() would be visible due
1107 * to implicit barriers provided by the kernel waitqueue macros.
1108 *
1109 * The group pointer cannot disappear meanwhile, as the high level
1110 * CSF context is locked. Therefore, the scheduler would be
1111 * the only one to update the run_state of the group.
1112 */
1113 remaining = wait_event_timeout(
1114 kbdev->csf.event_wait, can_halt_stream(kbdev, group),
1115 kbase_csf_timeout_in_jiffies(group_schedule_timeout));
1116
1117 mutex_lock(&scheduler->lock);
1118
1119 if (remaining && queue_group_scheduled_locked(group)) {
1120 slot = kbase_csf_scheduler_group_get_slot(group);
1121
1122 /* If the group is still on slot and slot is in running state
1123 * then explicitly stop the CSI of the
1124 * queue. Otherwise there are different cases to consider
1125 *
1126 * - If the queue group was already undergoing transition to
1127 * resume/start state when this function was entered then it
1128 * would not have disabled the CSI of the
1129 * queue being stopped and the previous wait would have ended
1130 * once the slot was in a running state with CS
1131 * interface still enabled.
1132 * Now the group is going through another transition either
1133 * to a suspend state or to a resume state (it could have
1134 * been suspended before the scheduler lock was grabbed).
1135 * In both scenarios need to wait again for the group to
1136 * come on a slot and that slot to reach the running state,
1137 * as that would guarantee that firmware will observe the
1138 * CSI as disabled.
1139 *
1140 * - If the queue group was either off the slot or was
1141 * undergoing transition to suspend state on entering this
1142 * function, then the group would have been resumed with the
1143 * queue's CSI in disabled state.
1144 * So now if the group is undergoing another transition
1145 * (after the resume) then just need to wait for the state
1146 * bits in the ACK register of CSI to be
1147 * set to STOP value. It is expected that firmware will
1148 * process the stop/disable request of the CS
1149 * interface after resuming the group before it processes
1150 * another state change request of the group.
1151 */
1152 if ((slot >= 0) &&
1153 (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING)) {
1154 err = halt_stream_sync(queue);
1155 } else if (retry_needed && !retried) {
1156 retried = true;
1157 goto retry;
1158 } else if (slot >= 0) {
1159 struct kbase_csf_global_iface *global_iface =
1160 &kbdev->csf.global_iface;
1161 struct kbase_csf_cmd_stream_group_info *ginfo =
1162 &global_iface->groups[slot];
1163 struct kbase_csf_cmd_stream_info *stream =
1164 &ginfo->streams[queue->csi_index];
1165 u32 cs_req =
1166 kbase_csf_firmware_cs_input_read(stream, CS_REQ);
1167
1168 if (!WARN_ON(CS_REQ_STATE_GET(cs_req) !=
1169 CS_REQ_STATE_STOP)) {
1170 /* Timed wait */
1171 remaining = wait_event_timeout(
1172 kbdev->csf.event_wait,
1173 (CS_ACK_STATE_GET(
1174 kbase_csf_firmware_cs_output(
1175 stream, CS_ACK)) ==
1176 CS_ACK_STATE_STOP),
1177 kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms));
1178
1179 if (!remaining) {
1180 dev_warn(kbdev->dev,
1181 "[%llu] Timeout (%d ms) waiting for queue stop ack on csi %d bound to group %d on slot %d",
1182 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
1183 queue->csi_index,
1184 group->handle, group->csg_nr);
1185
1186
1187 err = -ETIMEDOUT;
1188 }
1189 }
1190 }
1191 } else if (!remaining) {
1192 dev_warn(kbdev->dev, "[%llu] Group-%d failed to get a slot for stopping the queue on csi %d (timeout %d ms)",
1193 kbase_backend_get_cycle_cnt(kbdev),
1194 group->handle, queue->csi_index,
1195 group_schedule_timeout);
1196
1197
1198 err = -ETIMEDOUT;
1199 }
1200
1201 return err;
1202 }
1203
1204 /**
1205 * scheduler_activate_on_queue_stop() - Activate the Scheduler when the GPU
1206 * queue needs to be stopped.
1207 *
1208 * @queue: Pointer the GPU command queue
1209 *
1210 * This function is called when the CSI to which GPU queue is bound needs to
1211 * be stopped. For that the corresponding queue group needs to be resident on
1212 * the CSG slot and MCU firmware should be running. So this function makes the
1213 * Scheduler exit the sleeping or suspended state.
1214 */
scheduler_activate_on_queue_stop(struct kbase_queue * queue)1215 static void scheduler_activate_on_queue_stop(struct kbase_queue *queue)
1216 {
1217 struct kbase_device *kbdev = queue->kctx->kbdev;
1218
1219 scheduler_wakeup(kbdev, true);
1220
1221 /* Wait for MCU firmware to start running */
1222 if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
1223 dev_warn(
1224 kbdev->dev,
1225 "[%llu] Wait for MCU active failed for stopping queue on csi %d bound to group %d of context %d_%d on slot %d",
1226 kbase_backend_get_cycle_cnt(kbdev),
1227 queue->csi_index, queue->group->handle,
1228 queue->kctx->tgid, queue->kctx->id,
1229 queue->group->csg_nr);
1230 }
1231 }
1232
kbase_csf_scheduler_queue_stop(struct kbase_queue * queue)1233 int kbase_csf_scheduler_queue_stop(struct kbase_queue *queue)
1234 {
1235 struct kbase_device *kbdev = queue->kctx->kbdev;
1236 struct kbase_queue_group *group = queue->group;
1237 bool const cs_enabled = queue->enabled;
1238 int err = 0;
1239
1240 if (WARN_ON(!group))
1241 return -EINVAL;
1242
1243 kbase_reset_gpu_assert_failed_or_prevented(kbdev);
1244 lockdep_assert_held(&queue->kctx->csf.lock);
1245 mutex_lock(&kbdev->csf.scheduler.lock);
1246
1247 queue->enabled = false;
1248 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP, group, queue, cs_enabled);
1249
1250 if (cs_enabled && queue_group_scheduled_locked(group)) {
1251 struct kbase_csf_csg_slot *const csg_slot =
1252 kbdev->csf.scheduler.csg_slots;
1253 int slot = kbase_csf_scheduler_group_get_slot(group);
1254
1255 /* Since the group needs to be resumed in order to stop the queue,
1256 * check if GPU needs to be powered up.
1257 */
1258 scheduler_activate_on_queue_stop(queue);
1259
1260 if ((slot >= 0) &&
1261 (atomic_read(&csg_slot[slot].state) == CSG_SLOT_RUNNING))
1262 err = halt_stream_sync(queue);
1263 else
1264 err = sched_halt_stream(queue);
1265
1266 unassign_user_doorbell_from_queue(kbdev, queue);
1267 }
1268
1269 mutex_unlock(&kbdev->csf.scheduler.lock);
1270 return err;
1271 }
1272
update_hw_active(struct kbase_queue * queue,bool active)1273 static void update_hw_active(struct kbase_queue *queue, bool active)
1274 {
1275 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
1276 if (queue && queue->enabled) {
1277 u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
1278
1279 output_addr[CS_ACTIVE / sizeof(u32)] = active;
1280 }
1281 #else
1282 CSTD_UNUSED(queue);
1283 CSTD_UNUSED(active);
1284 #endif
1285 }
1286
program_cs_extract_init(struct kbase_queue * queue)1287 static void program_cs_extract_init(struct kbase_queue *queue)
1288 {
1289 u64 *input_addr = (u64 *)queue->user_io_addr;
1290 u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
1291
1292 input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] =
1293 output_addr[CS_EXTRACT_LO / sizeof(u64)];
1294 }
1295
program_cs_trace_cfg(struct kbase_csf_cmd_stream_info * stream,struct kbase_queue * queue)1296 static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream,
1297 struct kbase_queue *queue)
1298 {
1299 struct kbase_device *kbdev = queue->kctx->kbdev;
1300 u32 const glb_version = kbdev->csf.global_iface.version;
1301
1302 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1303
1304 /* If cs_trace_command not supported, nothing to program */
1305 if (glb_version < kbase_csf_interface_version(1, 1, 0))
1306 return;
1307
1308 /* Program for cs_trace if enabled. In the current arrangement, it is
1309 * possible for the context to enable the cs_trace after some queues
1310 * has been registered in cs_trace in disabled state. This is tracked by
1311 * the queue's trace buffer base address, which had been validated at the
1312 * queue's register_ex call.
1313 */
1314 if (kbase_csf_scheduler_queue_has_trace(queue)) {
1315 u32 cs_cfg = CS_INSTR_CONFIG_JASID_SET(
1316 queue->trace_cfg, queue->kctx->as_nr);
1317
1318 kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, cs_cfg);
1319 kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE,
1320 queue->trace_buffer_size);
1321
1322 kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_LO,
1323 queue->trace_buffer_base & U32_MAX);
1324 kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_BASE_HI,
1325 queue->trace_buffer_base >> 32);
1326
1327 kbase_csf_firmware_cs_input(
1328 stream, CS_INSTR_BUFFER_OFFSET_POINTER_LO,
1329 queue->trace_offset_ptr & U32_MAX);
1330 kbase_csf_firmware_cs_input(
1331 stream, CS_INSTR_BUFFER_OFFSET_POINTER_HI,
1332 queue->trace_offset_ptr >> 32);
1333 } else {
1334 /* Place the configuration to the disabled condition */
1335 kbase_csf_firmware_cs_input(stream, CS_INSTR_CONFIG, 0);
1336 kbase_csf_firmware_cs_input(stream, CS_INSTR_BUFFER_SIZE, 0);
1337 }
1338 }
1339
program_cs(struct kbase_device * kbdev,struct kbase_queue * queue,bool ring_csg_doorbell)1340 static void program_cs(struct kbase_device *kbdev,
1341 struct kbase_queue *queue, bool ring_csg_doorbell)
1342 {
1343 struct kbase_queue_group *group = queue->group;
1344 struct kbase_csf_cmd_stream_group_info *ginfo;
1345 struct kbase_csf_cmd_stream_info *stream;
1346 int csi_index = queue->csi_index;
1347 u64 user_input;
1348 u64 user_output;
1349
1350 if (WARN_ON(!group))
1351 return;
1352
1353 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1354
1355 if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
1356 return;
1357
1358 ginfo = &kbdev->csf.global_iface.groups[group->csg_nr];
1359
1360 if (WARN_ON(csi_index < 0) ||
1361 WARN_ON(csi_index >= ginfo->stream_num))
1362 return;
1363
1364 assign_user_doorbell_to_queue(kbdev, queue);
1365 if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
1366 return;
1367
1368 WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
1369
1370 if (queue->enabled && queue_group_suspended_locked(group))
1371 program_cs_extract_init(queue);
1372
1373 stream = &ginfo->streams[csi_index];
1374
1375 kbase_csf_firmware_cs_input(stream, CS_BASE_LO,
1376 queue->base_addr & 0xFFFFFFFF);
1377 kbase_csf_firmware_cs_input(stream, CS_BASE_HI,
1378 queue->base_addr >> 32);
1379 kbase_csf_firmware_cs_input(stream, CS_SIZE,
1380 queue->size);
1381
1382 user_input = (queue->reg->start_pfn << PAGE_SHIFT);
1383 kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_LO,
1384 user_input & 0xFFFFFFFF);
1385 kbase_csf_firmware_cs_input(stream, CS_USER_INPUT_HI,
1386 user_input >> 32);
1387
1388 user_output = ((queue->reg->start_pfn + 1) << PAGE_SHIFT);
1389 kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_LO,
1390 user_output & 0xFFFFFFFF);
1391 kbase_csf_firmware_cs_input(stream, CS_USER_OUTPUT_HI,
1392 user_output >> 32);
1393
1394 kbase_csf_firmware_cs_input(stream, CS_CONFIG,
1395 (queue->doorbell_nr << 8) | (queue->priority & 0xF));
1396
1397 /* Program the queue's cs_trace configuration */
1398 program_cs_trace_cfg(stream, queue);
1399
1400 /* Enable all interrupts for now */
1401 kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0));
1402
1403 /*
1404 * Enable the CSG idle notification once the CS's ringbuffer
1405 * becomes empty or the CS becomes sync_idle, waiting sync update
1406 * or protected mode switch.
1407 */
1408 kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
1409 CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK,
1410 CS_REQ_IDLE_EMPTY_MASK | CS_REQ_IDLE_SYNC_WAIT_MASK);
1411
1412 /* Set state to START/STOP */
1413 kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
1414 queue->enabled ? CS_REQ_STATE_START : CS_REQ_STATE_STOP,
1415 CS_REQ_STATE_MASK);
1416
1417 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled);
1418
1419 kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr,
1420 ring_csg_doorbell);
1421 update_hw_active(queue, true);
1422 }
1423
kbase_csf_scheduler_queue_start(struct kbase_queue * queue)1424 int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
1425 {
1426 struct kbase_queue_group *group = queue->group;
1427 struct kbase_device *kbdev = queue->kctx->kbdev;
1428 bool const cs_enabled = queue->enabled;
1429 int err = 0;
1430 bool evicted = false;
1431
1432 kbase_reset_gpu_assert_prevented(kbdev);
1433 lockdep_assert_held(&queue->kctx->csf.lock);
1434
1435 if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND))
1436 return -EINVAL;
1437
1438 mutex_lock(&kbdev->csf.scheduler.lock);
1439
1440 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_START, group, queue,
1441 group->run_state);
1442 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT, queue->group,
1443 queue, queue->status_wait);
1444
1445 if (group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED) {
1446 err = -EIO;
1447 evicted = true;
1448 } else if ((group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
1449 && CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) {
1450 dev_dbg(kbdev->dev, "blocked queue(csi_index=%d) of group %d was kicked",
1451 queue->csi_index, group->handle);
1452 } else {
1453 err = scheduler_group_schedule(group);
1454
1455 if (!err) {
1456 queue->enabled = true;
1457 if (kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
1458 if (cs_enabled) {
1459 /* In normal situation, when a queue is
1460 * already running, the queue update
1461 * would be a doorbell kick on user
1462 * side. However, if such a kick is
1463 * shortly following a start or resume,
1464 * the queue may actually in transition
1465 * hence the said kick would enter the
1466 * kernel as the hw_active flag is yet
1467 * to be set. The sheduler needs to
1468 * give a kick to the corresponding
1469 * user door-bell on such a case.
1470 */
1471 kbase_csf_ring_cs_user_doorbell(kbdev, queue);
1472 } else
1473 program_cs(kbdev, queue, true);
1474 }
1475 queue_delayed_work(system_long_wq,
1476 &kbdev->csf.scheduler.ping_work,
1477 msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS));
1478 }
1479 }
1480
1481 mutex_unlock(&kbdev->csf.scheduler.lock);
1482
1483 if (evicted)
1484 kbase_csf_term_descheduled_queue_group(group);
1485
1486 return err;
1487 }
1488
update_csg_slot_status(struct kbase_device * kbdev,s8 slot)1489 static enum kbase_csf_csg_slot_state update_csg_slot_status(
1490 struct kbase_device *kbdev, s8 slot)
1491 {
1492 struct kbase_csf_csg_slot *csg_slot =
1493 &kbdev->csf.scheduler.csg_slots[slot];
1494 struct kbase_csf_cmd_stream_group_info *ginfo =
1495 &kbdev->csf.global_iface.groups[slot];
1496 u32 state;
1497 enum kbase_csf_csg_slot_state slot_state;
1498
1499 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1500
1501 state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo,
1502 CSG_ACK));
1503 slot_state = atomic_read(&csg_slot->state);
1504
1505 switch (slot_state) {
1506 case CSG_SLOT_READY2RUN:
1507 if ((state == CSG_ACK_STATE_START) ||
1508 (state == CSG_ACK_STATE_RESUME)) {
1509 slot_state = CSG_SLOT_RUNNING;
1510 atomic_set(&csg_slot->state, slot_state);
1511 csg_slot->trigger_jiffies = jiffies;
1512 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STARTED, csg_slot->resident_group, state);
1513 dev_dbg(kbdev->dev, "Group %u running on slot %d\n",
1514 csg_slot->resident_group->handle, slot);
1515 }
1516 break;
1517 case CSG_SLOT_DOWN2STOP:
1518 if ((state == CSG_ACK_STATE_SUSPEND) ||
1519 (state == CSG_ACK_STATE_TERMINATE)) {
1520 slot_state = CSG_SLOT_STOPPED;
1521 atomic_set(&csg_slot->state, slot_state);
1522 csg_slot->trigger_jiffies = jiffies;
1523 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, csg_slot->resident_group, state);
1524 dev_dbg(kbdev->dev, "Group %u stopped on slot %d\n",
1525 csg_slot->resident_group->handle, slot);
1526 }
1527 break;
1528 case CSG_SLOT_DOWN2STOP_TIMEDOUT:
1529 case CSG_SLOT_READY2RUN_TIMEDOUT:
1530 case CSG_SLOT_READY:
1531 case CSG_SLOT_RUNNING:
1532 case CSG_SLOT_STOPPED:
1533 break;
1534 default:
1535 dev_warn(kbdev->dev, "Unknown CSG slot state %d", slot_state);
1536 break;
1537 }
1538
1539 return slot_state;
1540 }
1541
csg_slot_running(struct kbase_device * kbdev,s8 slot)1542 static bool csg_slot_running(struct kbase_device *kbdev, s8 slot)
1543 {
1544 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1545
1546 return (update_csg_slot_status(kbdev, slot) == CSG_SLOT_RUNNING);
1547 }
1548
csg_slot_stopped_locked(struct kbase_device * kbdev,s8 slot)1549 static bool csg_slot_stopped_locked(struct kbase_device *kbdev, s8 slot)
1550 {
1551 enum kbase_csf_csg_slot_state slot_state;
1552
1553 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1554
1555 slot_state = update_csg_slot_status(kbdev, slot);
1556
1557 return (slot_state == CSG_SLOT_STOPPED ||
1558 slot_state == CSG_SLOT_READY);
1559 }
1560
csg_slot_stopped_raw(struct kbase_device * kbdev,s8 slot)1561 static bool csg_slot_stopped_raw(struct kbase_device *kbdev, s8 slot)
1562 {
1563 struct kbase_csf_cmd_stream_group_info *ginfo =
1564 &kbdev->csf.global_iface.groups[slot];
1565 u32 state;
1566
1567 state = CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo,
1568 CSG_ACK));
1569
1570 if (state == CSG_ACK_STATE_SUSPEND || state == CSG_ACK_STATE_TERMINATE) {
1571 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOPPED, kbdev->csf.scheduler.csg_slots[slot].resident_group, state);
1572 dev_dbg(kbdev->dev, "(raw status) slot %d stopped\n", slot);
1573 return true;
1574 }
1575
1576 return false;
1577 }
1578
halt_csg_slot(struct kbase_queue_group * group,bool suspend)1579 static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
1580 {
1581 struct kbase_device *kbdev = group->kctx->kbdev;
1582 struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
1583 struct kbase_csf_csg_slot *csg_slot =
1584 kbdev->csf.scheduler.csg_slots;
1585 s8 slot;
1586
1587 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1588
1589 if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
1590 return;
1591
1592 slot = group->csg_nr;
1593
1594 /* When in transition, wait for it to complete */
1595 if (atomic_read(&csg_slot[slot].state) == CSG_SLOT_READY2RUN) {
1596 long remaining =
1597 kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
1598
1599 dev_dbg(kbdev->dev, "slot %d wait for up-running\n", slot);
1600 remaining = wait_event_timeout(kbdev->csf.event_wait,
1601 csg_slot_running(kbdev, slot), remaining);
1602 if (!remaining)
1603 dev_warn(kbdev->dev,
1604 "[%llu] slot %d timeout (%d ms) on up-running\n",
1605 kbase_backend_get_cycle_cnt(kbdev),
1606 slot, kbdev->csf.fw_timeout_ms);
1607 }
1608
1609 if (csg_slot_running(kbdev, slot)) {
1610 unsigned long flags;
1611 struct kbase_csf_cmd_stream_group_info *ginfo =
1612 &global_iface->groups[slot];
1613 u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND :
1614 CSG_REQ_STATE_TERMINATE;
1615
1616 dev_dbg(kbdev->dev, "Halting(suspend=%d) group %d of context %d_%d on slot %d",
1617 suspend, group->handle, group->kctx->tgid, group->kctx->id, slot);
1618
1619 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
1620 /* Set state to SUSPEND/TERMINATE */
1621 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, halt_cmd,
1622 CSG_REQ_STATE_MASK);
1623 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock,
1624 flags);
1625 atomic_set(&csg_slot[slot].state, CSG_SLOT_DOWN2STOP);
1626 csg_slot[slot].trigger_jiffies = jiffies;
1627 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP, group, halt_cmd);
1628
1629 KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(
1630 kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot);
1631 kbase_csf_ring_csg_doorbell(kbdev, slot);
1632 }
1633 }
1634
term_csg_slot(struct kbase_queue_group * group)1635 static void term_csg_slot(struct kbase_queue_group *group)
1636 {
1637 halt_csg_slot(group, false);
1638 }
1639
suspend_csg_slot(struct kbase_queue_group * group)1640 static void suspend_csg_slot(struct kbase_queue_group *group)
1641 {
1642 halt_csg_slot(group, true);
1643 }
1644
1645 /**
1646 * evaluate_sync_update() - Evaluate the sync wait condition the GPU command
1647 * queue has been blocked on.
1648 *
1649 * @queue: Pointer to the GPU command queue
1650 *
1651 * Return: true if sync wait condition is satisfied.
1652 */
evaluate_sync_update(struct kbase_queue * queue)1653 static bool evaluate_sync_update(struct kbase_queue *queue)
1654 {
1655 struct kbase_vmap_struct *mapping;
1656 bool updated = false;
1657 u32 *sync_ptr;
1658 u32 sync_wait_cond;
1659 u32 sync_current_val;
1660 struct kbase_device *kbdev;
1661
1662 if (WARN_ON(!queue))
1663 return false;
1664
1665 kbdev = queue->kctx->kbdev;
1666 lockdep_assert_held(&kbdev->csf.scheduler.lock);
1667
1668 sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
1669 &mapping);
1670
1671 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE, queue->group,
1672 queue, queue->sync_ptr);
1673 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_BLOCKED_REASON,
1674 queue->group, queue, queue->blocked_reason);
1675
1676 if (!sync_ptr) {
1677 dev_dbg(queue->kctx->kbdev->dev, "sync memory VA 0x%016llX already freed",
1678 queue->sync_ptr);
1679 goto out;
1680 }
1681
1682 sync_wait_cond =
1683 CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(queue->status_wait);
1684
1685 WARN_ON((sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
1686 (sync_wait_cond != CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE));
1687
1688 sync_current_val = READ_ONCE(*sync_ptr);
1689 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_CURRENT_VAL, queue->group,
1690 queue, sync_current_val);
1691
1692 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_TEST_VAL, queue->group,
1693 queue, queue->sync_value);
1694
1695 if (((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT) &&
1696 (sync_current_val > queue->sync_value)) ||
1697 ((sync_wait_cond == CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE) &&
1698 (sync_current_val <= queue->sync_value))) {
1699 /* The sync wait condition is satisfied so the group to which
1700 * queue is bound can be re-scheduled.
1701 */
1702 updated = true;
1703 } else {
1704 dev_dbg(queue->kctx->kbdev->dev,
1705 "sync memory not updated yet(%u)", sync_current_val);
1706 }
1707
1708 kbase_phy_alloc_mapping_put(queue->kctx, mapping);
1709 out:
1710 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_UPDATE_EVALUATED,
1711 queue->group, queue, updated);
1712 return updated;
1713 }
1714
1715 /**
1716 * save_slot_cs() - Save the state for blocked GPU command queue.
1717 *
1718 * @ginfo: Pointer to the CSG interface used by the group
1719 * the queue is bound to.
1720 * @queue: Pointer to the GPU command queue.
1721 *
1722 * This function will check if GPU command queue is blocked on a sync wait and
1723 * evaluate the wait condition. If the wait condition isn't satisfied it would
1724 * save the state needed to reevaluate the condition in future.
1725 * The group to which queue is bound shall be in idle state.
1726 *
1727 * Return: true if the queue is blocked on a sync wait operation.
1728 */
1729 static
save_slot_cs(struct kbase_csf_cmd_stream_group_info const * const ginfo,struct kbase_queue * queue)1730 bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
1731 struct kbase_queue *queue)
1732 {
1733 struct kbase_csf_cmd_stream_info *const stream =
1734 &ginfo->streams[queue->csi_index];
1735 u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
1736 bool is_waiting = false;
1737
1738 KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT,
1739 queue->group, queue, status);
1740
1741 if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) {
1742 queue->status_wait = status;
1743 queue->sync_ptr = kbase_csf_firmware_cs_output(stream,
1744 CS_STATUS_WAIT_SYNC_POINTER_LO);
1745 queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(stream,
1746 CS_STATUS_WAIT_SYNC_POINTER_HI) << 32;
1747 queue->sync_value = kbase_csf_firmware_cs_output(stream,
1748 CS_STATUS_WAIT_SYNC_VALUE);
1749
1750 queue->sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET(
1751 kbase_csf_firmware_cs_output(stream,
1752 CS_STATUS_SCOREBOARDS));
1753 queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_GET(
1754 kbase_csf_firmware_cs_output(stream,
1755 CS_STATUS_BLOCKED_REASON));
1756
1757 if (!evaluate_sync_update(queue)) {
1758 is_waiting = true;
1759 } else {
1760 /* Sync object already got updated & met the condition
1761 * thus it doesn't need to be reevaluated and so can
1762 * clear the 'status_wait' here.
1763 */
1764 queue->status_wait = 0;
1765 }
1766 } else {
1767 /* Invalidate wait status info that would have been recorded if
1768 * this queue was blocked when the group (in idle state) was
1769 * suspended previously. After that the group could have been
1770 * unblocked due to the kicking of another queue bound to it &
1771 * so the wait status info would have stuck with this queue.
1772 */
1773 queue->status_wait = 0;
1774 }
1775
1776 return is_waiting;
1777 }
1778
schedule_in_cycle(struct kbase_queue_group * group,bool force)1779 static void schedule_in_cycle(struct kbase_queue_group *group, bool force)
1780 {
1781 struct kbase_context *kctx = group->kctx;
1782 struct kbase_device *kbdev = kctx->kbdev;
1783 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
1784
1785 lockdep_assert_held(&scheduler->lock);
1786
1787 /* Only try to schedule work for this event if no requests are pending,
1788 * otherwise the function will end up canceling previous work requests,
1789 * and scheduler is configured to wake up periodically (or the schedule
1790 * of work needs to be enforced in situation such as entering into
1791 * protected mode).
1792 */
1793 if ((likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) &&
1794 !scheduler->tock_pending_request) {
1795 scheduler->tock_pending_request = true;
1796 dev_dbg(kbdev->dev, "Kicking async for group %d\n",
1797 group->handle);
1798 mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
1799 }
1800 }
1801
1802 static
insert_group_to_runnable(struct kbase_csf_scheduler * const scheduler,struct kbase_queue_group * const group,enum kbase_csf_group_state run_state)1803 void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
1804 struct kbase_queue_group *const group,
1805 enum kbase_csf_group_state run_state)
1806 {
1807 struct kbase_context *const kctx = group->kctx;
1808 struct kbase_device *const kbdev = kctx->kbdev;
1809
1810 lockdep_assert_held(&scheduler->lock);
1811
1812 WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
1813
1814 if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
1815 return;
1816
1817 group->run_state = run_state;
1818
1819 if (run_state == KBASE_CSF_GROUP_RUNNABLE)
1820 group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID;
1821
1822 list_add_tail(&group->link,
1823 &kctx->csf.sched.runnable_groups[group->priority]);
1824 kctx->csf.sched.num_runnable_grps++;
1825 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_INSERT_RUNNABLE, group,
1826 kctx->csf.sched.num_runnable_grps);
1827
1828 /* Add the kctx if not yet in runnable kctxs */
1829 if (kctx->csf.sched.num_runnable_grps == 1) {
1830 /* First runnable csg, adds to the runnable_kctxs */
1831 INIT_LIST_HEAD(&kctx->csf.link);
1832 list_add_tail(&kctx->csf.link, &scheduler->runnable_kctxs);
1833 KBASE_KTRACE_ADD(kbdev, SCHEDULER_INSERT_RUNNABLE, kctx, 0u);
1834 }
1835
1836 scheduler->total_runnable_grps++;
1837
1838 if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
1839 (scheduler->total_runnable_grps == 1 ||
1840 scheduler->state == SCHED_SUSPENDED ||
1841 scheduler->state == SCHED_SLEEPING)) {
1842 dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n");
1843 /* Fire a scheduling to start the time-slice */
1844 enqueue_tick_work(kbdev);
1845 } else
1846 schedule_in_cycle(group, false);
1847
1848 /* Since a new group has become runnable, check if GPU needs to be
1849 * powered up.
1850 */
1851 scheduler_wakeup(kbdev, false);
1852 }
1853
1854 static
remove_group_from_runnable(struct kbase_csf_scheduler * const scheduler,struct kbase_queue_group * group,enum kbase_csf_group_state run_state)1855 void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
1856 struct kbase_queue_group *group,
1857 enum kbase_csf_group_state run_state)
1858 {
1859 struct kbase_context *kctx = group->kctx;
1860 struct kbase_queue_group *new_head_grp;
1861 struct list_head *list =
1862 &kctx->csf.sched.runnable_groups[group->priority];
1863 unsigned long flags;
1864
1865 lockdep_assert_held(&scheduler->lock);
1866
1867 WARN_ON(!queue_group_scheduled_locked(group));
1868
1869 group->run_state = run_state;
1870 list_del_init(&group->link);
1871
1872 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
1873 /* The below condition will be true when the group running in protected
1874 * mode is being terminated but the protected mode exit interrupt was't
1875 * received. This can happen if the FW got stuck during protected mode
1876 * for some reason (like GPU page fault or some internal error).
1877 * In normal cases FW is expected to send the protected mode exit
1878 * interrupt before it handles the CSG termination request.
1879 */
1880 if (unlikely(scheduler->active_protm_grp == group)) {
1881 /* CSG slot cleanup should have happened for the pmode group */
1882 WARN_ON(kbasep_csf_scheduler_group_is_on_slot_locked(group));
1883 WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
1884 /* Initiate a GPU reset, in case it wasn't initiated yet,
1885 * in order to rectify the anomaly.
1886 */
1887 if (kbase_prepare_to_reset_gpu(kctx->kbdev, RESET_FLAGS_NONE))
1888 kbase_reset_gpu(kctx->kbdev);
1889
1890 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_EXIT_PROTM,
1891 scheduler->active_protm_grp, 0u);
1892 scheduler->active_protm_grp = NULL;
1893 }
1894 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
1895
1896 if (scheduler->top_grp == group) {
1897 /*
1898 * Note: this disables explicit rotation in the next scheduling
1899 * cycle. However, removing the top_grp is the same as an
1900 * implicit rotation (e.g. if we instead rotated the top_ctx
1901 * and then remove top_grp)
1902 *
1903 * This implicit rotation is assumed by the scheduler rotate
1904 * functions.
1905 */
1906 scheduler->top_grp = NULL;
1907
1908 /*
1909 * Trigger a scheduling tock for a CSG containing protected
1910 * content in case there has been any in order to minimise
1911 * latency.
1912 */
1913 group = scheduler_get_protm_enter_async_group(kctx->kbdev,
1914 NULL);
1915 if (group)
1916 schedule_in_cycle(group, true);
1917 }
1918
1919 kctx->csf.sched.num_runnable_grps--;
1920 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_RUNNABLE, group,
1921 kctx->csf.sched.num_runnable_grps);
1922 new_head_grp = (!list_empty(list)) ?
1923 list_first_entry(list, struct kbase_queue_group, link) :
1924 NULL;
1925 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_RUNNABLE, new_head_grp,
1926 0u);
1927
1928 if (kctx->csf.sched.num_runnable_grps == 0) {
1929 struct kbase_context *new_head_kctx;
1930 struct list_head *kctx_list = &scheduler->runnable_kctxs;
1931 /* drop the kctx */
1932 list_del_init(&kctx->csf.link);
1933 if (scheduler->top_ctx == kctx)
1934 scheduler->top_ctx = NULL;
1935 KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_REMOVE_RUNNABLE, kctx,
1936 0u);
1937 new_head_kctx = (!list_empty(kctx_list)) ?
1938 list_first_entry(kctx_list, struct kbase_context, csf.link) :
1939 NULL;
1940 KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_HEAD_RUNNABLE,
1941 new_head_kctx, 0u);
1942 }
1943
1944 WARN_ON(scheduler->total_runnable_grps == 0);
1945 scheduler->total_runnable_grps--;
1946 if (!scheduler->total_runnable_grps) {
1947 dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups");
1948 cancel_tick_timer(kctx->kbdev);
1949 WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
1950 if (scheduler->state != SCHED_SUSPENDED)
1951 queue_work(system_wq, &scheduler->gpu_idle_work);
1952 }
1953 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
1954 scheduler->num_active_address_spaces |
1955 (((u64)scheduler->total_runnable_grps) << 32));
1956 }
1957
insert_group_to_idle_wait(struct kbase_queue_group * const group)1958 static void insert_group_to_idle_wait(struct kbase_queue_group *const group)
1959 {
1960 struct kbase_context *kctx = group->kctx;
1961
1962 lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
1963
1964 WARN_ON(group->run_state != KBASE_CSF_GROUP_IDLE);
1965
1966 list_add_tail(&group->link, &kctx->csf.sched.idle_wait_groups);
1967 kctx->csf.sched.num_idle_wait_grps++;
1968 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_INSERT_IDLE_WAIT, group,
1969 kctx->csf.sched.num_idle_wait_grps);
1970 group->run_state = KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC;
1971 dev_dbg(kctx->kbdev->dev,
1972 "Group-%d suspended on sync_wait, total wait_groups: %u\n",
1973 group->handle, kctx->csf.sched.num_idle_wait_grps);
1974 }
1975
remove_group_from_idle_wait(struct kbase_queue_group * const group)1976 static void remove_group_from_idle_wait(struct kbase_queue_group *const group)
1977 {
1978 struct kbase_context *kctx = group->kctx;
1979 struct list_head *list = &kctx->csf.sched.idle_wait_groups;
1980 struct kbase_queue_group *new_head_grp;
1981
1982 lockdep_assert_held(&kctx->kbdev->csf.scheduler.lock);
1983
1984 WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC);
1985
1986 list_del_init(&group->link);
1987 WARN_ON(kctx->csf.sched.num_idle_wait_grps == 0);
1988 kctx->csf.sched.num_idle_wait_grps--;
1989 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_REMOVE_IDLE_WAIT, group,
1990 kctx->csf.sched.num_idle_wait_grps);
1991 new_head_grp = (!list_empty(list)) ?
1992 list_first_entry(list, struct kbase_queue_group, link) :
1993 NULL;
1994 KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, GROUP_HEAD_IDLE_WAIT,
1995 new_head_grp, 0u);
1996 group->run_state = KBASE_CSF_GROUP_INACTIVE;
1997 }
1998
deschedule_idle_wait_group(struct kbase_csf_scheduler * scheduler,struct kbase_queue_group * group)1999 static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
2000 struct kbase_queue_group *group)
2001 {
2002 lockdep_assert_held(&scheduler->lock);
2003
2004 if (WARN_ON(!group))
2005 return;
2006
2007 remove_group_from_runnable(scheduler, group, KBASE_CSF_GROUP_IDLE);
2008 insert_group_to_idle_wait(group);
2009 }
2010
update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group * group)2011 static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group)
2012 {
2013 struct kbase_device *kbdev = group->kctx->kbdev;
2014 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2015
2016 lockdep_assert_held(&scheduler->lock);
2017
2018 if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
2019 int new_val =
2020 atomic_dec_return(&scheduler->non_idle_offslot_grps);
2021 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
2022 group, new_val);
2023 }
2024 }
2025
update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group * group)2026 static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group)
2027 {
2028 struct kbase_device *kbdev = group->kctx->kbdev;
2029 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2030
2031 lockdep_assert_held(&scheduler->lock);
2032
2033 WARN_ON(group->csg_nr < 0);
2034
2035 if (group->prepared_seq_num < scheduler->non_idle_scanout_grps) {
2036 int new_val =
2037 atomic_dec_return(&scheduler->non_idle_offslot_grps);
2038 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
2039 group, new_val);
2040 }
2041 }
2042
update_offslot_non_idle_cnt_on_grp_suspend(struct kbase_queue_group * group)2043 static void update_offslot_non_idle_cnt_on_grp_suspend(
2044 struct kbase_queue_group *group)
2045 {
2046 struct kbase_device *kbdev = group->kctx->kbdev;
2047 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2048
2049 lockdep_assert_held(&scheduler->lock);
2050
2051 if (scheduler->state == SCHED_BUSY) {
2052 /* active phase or, async entering the protected mode */
2053 if (group->prepared_seq_num >=
2054 scheduler->non_idle_scanout_grps) {
2055 /* At scanout, it was tagged as on-slot idle */
2056 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
2057 int new_val = atomic_inc_return(
2058 &scheduler->non_idle_offslot_grps);
2059 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
2060 group, new_val);
2061 }
2062 } else {
2063 if (group->run_state != KBASE_CSF_GROUP_SUSPENDED) {
2064 int new_val = atomic_dec_return(
2065 &scheduler->non_idle_offslot_grps);
2066 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
2067 group, new_val);
2068 }
2069 }
2070 } else {
2071 /* async phases */
2072 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED) {
2073 int new_val = atomic_inc_return(
2074 &scheduler->non_idle_offslot_grps);
2075 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
2076 group, new_val);
2077 }
2078 }
2079 }
2080
confirm_cmd_buf_empty(struct kbase_queue * queue)2081 static bool confirm_cmd_buf_empty(struct kbase_queue *queue)
2082 {
2083 bool cs_empty;
2084 bool cs_idle;
2085 u32 sb_status = 0;
2086
2087 struct kbase_device const *const kbdev = queue->group->kctx->kbdev;
2088 struct kbase_csf_global_iface const *const iface =
2089 &kbdev->csf.global_iface;
2090
2091 u32 glb_version = iface->version;
2092
2093 u64 *input_addr = (u64 *)queue->user_io_addr;
2094 u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
2095
2096 if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
2097 /* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
2098 struct kbase_csf_cmd_stream_group_info const *const ginfo =
2099 &kbdev->csf.global_iface.groups[queue->group->csg_nr];
2100 struct kbase_csf_cmd_stream_info const *const stream =
2101 &ginfo->streams[queue->csi_index];
2102
2103 sb_status = CS_STATUS_SCOREBOARDS_NONZERO_GET(
2104 kbase_csf_firmware_cs_output(stream,
2105 CS_STATUS_SCOREBOARDS));
2106 }
2107
2108 cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] ==
2109 output_addr[CS_EXTRACT_LO / sizeof(u64)]);
2110 cs_idle = cs_empty && (!sb_status);
2111
2112 return cs_idle;
2113 }
2114
save_csg_slot(struct kbase_queue_group * group)2115 static void save_csg_slot(struct kbase_queue_group *group)
2116 {
2117 struct kbase_device *kbdev = group->kctx->kbdev;
2118 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2119 struct kbase_csf_cmd_stream_group_info *ginfo;
2120 u32 state;
2121
2122 lockdep_assert_held(&scheduler->lock);
2123
2124 if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
2125 return;
2126
2127 ginfo = &kbdev->csf.global_iface.groups[group->csg_nr];
2128
2129 state =
2130 CSG_ACK_STATE_GET(kbase_csf_firmware_csg_output(ginfo, CSG_ACK));
2131
2132 if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) &&
2133 (state != CSG_ACK_STATE_TERMINATE))) {
2134 u32 max_streams = ginfo->stream_num;
2135 u32 i;
2136 bool sync_wait = false;
2137 bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
2138 CSG_STATUS_STATE_IDLE_MASK;
2139 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
2140 for (i = 0; i < max_streams; i++)
2141 update_hw_active(group->bound_queues[i], false);
2142 #endif /* CONFIG_MALI_BIFROST_NO_MALI */
2143 for (i = 0; idle && i < max_streams; i++) {
2144 struct kbase_queue *const queue =
2145 group->bound_queues[i];
2146
2147 if (!queue || !queue->enabled)
2148 continue;
2149
2150 if (save_slot_cs(ginfo, queue))
2151 sync_wait = true;
2152 else {
2153 /* Need to confirm if ringbuffer of the GPU
2154 * queue is empty or not. A race can arise
2155 * between the flush of GPU queue and suspend
2156 * of CSG. If a queue is flushed after FW has
2157 * set the IDLE bit in CSG_STATUS_STATE, then
2158 * Scheduler will incorrectly consider CSG
2159 * as idle. And there may not be any further
2160 * flush call for the GPU queue, which would
2161 * have de-idled the CSG.
2162 */
2163 idle = confirm_cmd_buf_empty(queue);
2164 }
2165 }
2166
2167 if (idle) {
2168 /* Take the suspended group out of the runnable_groups
2169 * list of the context and move it to the
2170 * idle_wait_groups list.
2171 */
2172 if (sync_wait)
2173 deschedule_idle_wait_group(scheduler, group);
2174 else {
2175 group->run_state =
2176 KBASE_CSF_GROUP_SUSPENDED_ON_IDLE;
2177 dev_dbg(kbdev->dev, "Group-%d suspended: idle",
2178 group->handle);
2179 }
2180 } else {
2181 group->run_state = KBASE_CSF_GROUP_SUSPENDED;
2182 }
2183
2184 update_offslot_non_idle_cnt_on_grp_suspend(group);
2185 }
2186 }
2187
2188 /* Cleanup_csg_slot after it has been vacated, ready for next csg run.
2189 * Return whether there is a kctx address fault associated with the group
2190 * for which the clean-up is done.
2191 */
cleanup_csg_slot(struct kbase_queue_group * group)2192 static bool cleanup_csg_slot(struct kbase_queue_group *group)
2193 {
2194 struct kbase_context *kctx = group->kctx;
2195 struct kbase_device *kbdev = kctx->kbdev;
2196 struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
2197 struct kbase_csf_cmd_stream_group_info *ginfo;
2198 s8 slot;
2199 struct kbase_csf_csg_slot *csg_slot;
2200 unsigned long flags;
2201 u32 i;
2202 bool as_fault = false;
2203
2204 lockdep_assert_held(&kbdev->csf.scheduler.lock);
2205
2206 if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
2207 return as_fault;
2208
2209 slot = group->csg_nr;
2210 csg_slot = &kbdev->csf.scheduler.csg_slots[slot];
2211 ginfo = &global_iface->groups[slot];
2212
2213 /* Now loop through all the bound CSs, and clean them via a stop */
2214 for (i = 0; i < ginfo->stream_num; i++) {
2215 struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[i];
2216
2217 if (group->bound_queues[i]) {
2218 if (group->bound_queues[i]->enabled) {
2219 kbase_csf_firmware_cs_input_mask(stream,
2220 CS_REQ, CS_REQ_STATE_STOP,
2221 CS_REQ_STATE_MASK);
2222 }
2223
2224 unassign_user_doorbell_from_queue(kbdev,
2225 group->bound_queues[i]);
2226 }
2227 }
2228
2229 unassign_user_doorbell_from_group(kbdev, group);
2230
2231 /* The csg does not need cleanup other than drop its AS */
2232 spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
2233 as_fault = kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT);
2234 kbase_ctx_sched_release_ctx(kctx);
2235 if (unlikely(group->faulted))
2236 as_fault = true;
2237 spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
2238
2239 /* now marking the slot is vacant */
2240 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2241 kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL;
2242 clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask);
2243 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
2244 kbdev->csf.scheduler.csg_slots_idle_mask[0]);
2245
2246 group->csg_nr = KBASEP_CSG_NR_INVALID;
2247 set_bit(slot, kbdev->csf.scheduler.csgs_events_enable_mask);
2248 clear_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap);
2249 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2250
2251 csg_slot->trigger_jiffies = jiffies;
2252 atomic_set(&csg_slot->state, CSG_SLOT_READY);
2253
2254 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_CLEANED, group, slot);
2255 dev_dbg(kbdev->dev, "Cleanup done for group %d on slot %d\n",
2256 group->handle, slot);
2257
2258 KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(kbdev,
2259 kbdev->gpu_props.props.raw_props.gpu_id, slot);
2260
2261 return as_fault;
2262 }
2263
update_csg_slot_priority(struct kbase_queue_group * group,u8 prio)2264 static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
2265 {
2266 struct kbase_device *kbdev = group->kctx->kbdev;
2267 struct kbase_csf_csg_slot *csg_slot;
2268 struct kbase_csf_cmd_stream_group_info *ginfo;
2269 s8 slot;
2270 u8 prev_prio;
2271 u32 ep_cfg;
2272 u32 csg_req;
2273 unsigned long flags;
2274
2275 lockdep_assert_held(&kbdev->csf.scheduler.lock);
2276
2277 if (WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(group)))
2278 return;
2279
2280 slot = group->csg_nr;
2281 csg_slot = &kbdev->csf.scheduler.csg_slots[slot];
2282 ginfo = &kbdev->csf.global_iface.groups[slot];
2283
2284 /* CSGs remaining on-slot can be either idle or runnable.
2285 * This also applies in protected mode.
2286 */
2287 WARN_ON(!((group->run_state == KBASE_CSF_GROUP_RUNNABLE) ||
2288 (group->run_state == KBASE_CSF_GROUP_IDLE)));
2289
2290 /* Update consumes a group from scanout */
2291 update_offslot_non_idle_cnt_for_onslot_grp(group);
2292
2293 if (csg_slot->priority == prio)
2294 return;
2295
2296 /* Read the csg_ep_cfg back for updating the priority field */
2297 ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ);
2298 prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg);
2299 ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
2300 kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
2301
2302 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2303 csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
2304 csg_req ^= CSG_REQ_EP_CFG_MASK;
2305 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
2306 CSG_REQ_EP_CFG_MASK);
2307 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2308
2309 csg_slot->priority = prio;
2310
2311 dev_dbg(kbdev->dev, "Priority for group %d of context %d_%d on slot %d to be updated from %u to %u\n",
2312 group->handle, group->kctx->tgid, group->kctx->id, slot,
2313 prev_prio, prio);
2314
2315 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PRIO_UPDATE, group, prev_prio);
2316
2317 kbase_csf_ring_csg_doorbell(kbdev, slot);
2318 set_bit(slot, kbdev->csf.scheduler.csg_slots_prio_update);
2319 }
2320
program_csg_slot(struct kbase_queue_group * group,s8 slot,u8 prio)2321 static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
2322 u8 prio)
2323 {
2324 struct kbase_context *kctx = group->kctx;
2325 struct kbase_device *kbdev = kctx->kbdev;
2326 struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
2327 const u64 shader_core_mask =
2328 kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER);
2329 const u64 tiler_core_mask =
2330 kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_TILER);
2331 const u64 compute_mask = shader_core_mask & group->compute_mask;
2332 const u64 fragment_mask = shader_core_mask & group->fragment_mask;
2333 const u64 tiler_mask = tiler_core_mask & group->tiler_mask;
2334 const u8 num_cores = kbdev->gpu_props.num_cores;
2335 const u8 compute_max = min(num_cores, group->compute_max);
2336 const u8 fragment_max = min(num_cores, group->fragment_max);
2337 const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max);
2338 struct kbase_csf_cmd_stream_group_info *ginfo;
2339 u32 ep_cfg = 0;
2340 u32 csg_req;
2341 u32 state;
2342 int i;
2343 unsigned long flags;
2344 const u64 normal_suspend_buf =
2345 group->normal_suspend_buf.reg->start_pfn << PAGE_SHIFT;
2346 struct kbase_csf_csg_slot *csg_slot =
2347 &kbdev->csf.scheduler.csg_slots[slot];
2348
2349 lockdep_assert_held(&kbdev->csf.scheduler.lock);
2350
2351 if (WARN_ON(slot < 0) &&
2352 WARN_ON(slot >= global_iface->group_num))
2353 return;
2354
2355 WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_READY);
2356
2357 ginfo = &global_iface->groups[slot];
2358
2359 /* Pick an available address space for this context */
2360 mutex_lock(&kbdev->mmu_hw_mutex);
2361 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
2362 kbase_ctx_sched_retain_ctx(kctx);
2363 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
2364 mutex_unlock(&kbdev->mmu_hw_mutex);
2365
2366 if (kctx->as_nr == KBASEP_AS_NR_INVALID) {
2367 dev_warn(kbdev->dev, "Could not get a valid AS for group %d of context %d_%d on slot %d\n",
2368 group->handle, kctx->tgid, kctx->id, slot);
2369 return;
2370 }
2371
2372 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2373 set_bit(slot, kbdev->csf.scheduler.csg_inuse_bitmap);
2374 kbdev->csf.scheduler.csg_slots[slot].resident_group = group;
2375 group->csg_nr = slot;
2376 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2377
2378 assign_user_doorbell_to_group(kbdev, group);
2379
2380 /* Now loop through all the bound & kicked CSs, and program them */
2381 for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
2382 struct kbase_queue *queue = group->bound_queues[i];
2383
2384 if (queue)
2385 program_cs(kbdev, queue, false);
2386 }
2387
2388
2389 /* Endpoint programming for CSG */
2390 kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_LO,
2391 compute_mask & U32_MAX);
2392 kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_COMPUTE_HI,
2393 compute_mask >> 32);
2394 kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_LO,
2395 fragment_mask & U32_MAX);
2396 kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI,
2397 fragment_mask >> 32);
2398 kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER,
2399 tiler_mask & U32_MAX);
2400
2401 /* Register group UID with firmware */
2402 kbase_csf_firmware_csg_input(ginfo, CSG_ITER_TRACE_CONFIG,
2403 group->group_uid);
2404
2405 ep_cfg = CSG_EP_REQ_COMPUTE_EP_SET(ep_cfg, compute_max);
2406 ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max);
2407 ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max);
2408 ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
2409 kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
2410
2411 /* Program the address space number assigned to the context */
2412 kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr);
2413
2414 kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_LO,
2415 normal_suspend_buf & U32_MAX);
2416 kbase_csf_firmware_csg_input(ginfo, CSG_SUSPEND_BUF_HI,
2417 normal_suspend_buf >> 32);
2418
2419 if (group->protected_suspend_buf.reg) {
2420 const u64 protm_suspend_buf =
2421 group->protected_suspend_buf.reg->start_pfn <<
2422 PAGE_SHIFT;
2423 kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_LO,
2424 protm_suspend_buf & U32_MAX);
2425 kbase_csf_firmware_csg_input(ginfo, CSG_PROTM_SUSPEND_BUF_HI,
2426 protm_suspend_buf >> 32);
2427 }
2428
2429
2430 /* Enable all interrupts for now */
2431 kbase_csf_firmware_csg_input(ginfo, CSG_ACK_IRQ_MASK, ~((u32)0));
2432
2433 spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
2434 csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
2435 csg_req ^= CSG_REQ_EP_CFG_MASK;
2436 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
2437 CSG_REQ_EP_CFG_MASK);
2438
2439 /* Set state to START/RESUME */
2440 if (queue_group_suspended_locked(group)) {
2441 state = CSG_REQ_STATE_RESUME;
2442 } else {
2443 WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE);
2444 state = CSG_REQ_STATE_START;
2445 }
2446
2447 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ,
2448 state, CSG_REQ_STATE_MASK);
2449 spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
2450
2451 /* Update status before rings the door-bell, marking ready => run */
2452 atomic_set(&csg_slot->state, CSG_SLOT_READY2RUN);
2453 csg_slot->trigger_jiffies = jiffies;
2454 csg_slot->priority = prio;
2455
2456 /* Trace the programming of the CSG on the slot */
2457 KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(
2458 kbdev, kbdev->gpu_props.props.raw_props.gpu_id, group->kctx->id,
2459 group->handle, slot, (state == CSG_REQ_STATE_RESUME) ? 1 : 0);
2460
2461 dev_dbg(kbdev->dev, "Starting group %d of context %d_%d on slot %d with priority %u\n",
2462 group->handle, kctx->tgid, kctx->id, slot, prio);
2463
2464 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_START, group,
2465 (((u64)ep_cfg) << 32) |
2466 ((((u32)kctx->as_nr) & 0xF) << 16) |
2467 (state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
2468
2469 kbase_csf_ring_csg_doorbell(kbdev, slot);
2470
2471 /* Programming a slot consumes a group from scanout */
2472 update_offslot_non_idle_cnt_for_onslot_grp(group);
2473 }
2474
remove_scheduled_group(struct kbase_device * kbdev,struct kbase_queue_group * group)2475 static void remove_scheduled_group(struct kbase_device *kbdev,
2476 struct kbase_queue_group *group)
2477 {
2478 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2479
2480 lockdep_assert_held(&scheduler->lock);
2481
2482 WARN_ON(group->prepared_seq_num ==
2483 KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID);
2484 WARN_ON(list_empty(&group->link_to_schedule));
2485
2486 list_del_init(&group->link_to_schedule);
2487 scheduler->ngrp_to_schedule--;
2488 group->prepared_seq_num = KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID;
2489 group->kctx->csf.sched.ngrp_to_schedule--;
2490 }
2491
sched_evict_group(struct kbase_queue_group * group,bool fault,bool update_non_idle_offslot_grps_cnt)2492 static void sched_evict_group(struct kbase_queue_group *group, bool fault,
2493 bool update_non_idle_offslot_grps_cnt)
2494 {
2495 struct kbase_context *kctx = group->kctx;
2496 struct kbase_device *kbdev = kctx->kbdev;
2497 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2498
2499 lockdep_assert_held(&kbdev->csf.scheduler.lock);
2500
2501 if (queue_group_scheduled_locked(group)) {
2502 u32 i;
2503
2504 if (update_non_idle_offslot_grps_cnt &&
2505 (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
2506 group->run_state == KBASE_CSF_GROUP_RUNNABLE)) {
2507 int new_val = atomic_dec_return(
2508 &scheduler->non_idle_offslot_grps);
2509 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
2510 group, new_val);
2511 }
2512
2513 for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
2514 if (group->bound_queues[i])
2515 group->bound_queues[i]->enabled = false;
2516 }
2517
2518 if (group->prepared_seq_num !=
2519 KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID)
2520 remove_scheduled_group(kbdev, group);
2521
2522 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
2523 remove_group_from_idle_wait(group);
2524 else {
2525 remove_group_from_runnable(scheduler, group,
2526 KBASE_CSF_GROUP_INACTIVE);
2527 }
2528
2529 WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE);
2530
2531 if (fault)
2532 group->run_state = KBASE_CSF_GROUP_FAULT_EVICTED;
2533
2534 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_EVICT_SCHED, group,
2535 (((u64)scheduler->total_runnable_grps) << 32) |
2536 ((u32)group->run_state));
2537 dev_dbg(kbdev->dev, "group %d exited scheduler, num_runnable_grps %d\n",
2538 group->handle, scheduler->total_runnable_grps);
2539 /* Notify a group has been evicted */
2540 wake_up_all(&kbdev->csf.event_wait);
2541 }
2542 }
2543
term_group_sync(struct kbase_queue_group * group)2544 static int term_group_sync(struct kbase_queue_group *group)
2545 {
2546 struct kbase_device *kbdev = group->kctx->kbdev;
2547 long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
2548 int err = 0;
2549
2550 term_csg_slot(group);
2551
2552 remaining = wait_event_timeout(kbdev->csf.event_wait,
2553 csg_slot_stopped_locked(kbdev, group->csg_nr), remaining);
2554
2555 if (!remaining) {
2556 dev_warn(kbdev->dev, "[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d",
2557 kbase_backend_get_cycle_cnt(kbdev), kbdev->csf.fw_timeout_ms,
2558 group->handle, group->kctx->tgid,
2559 group->kctx->id, group->csg_nr);
2560 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
2561 kbase_reset_gpu(kbdev);
2562
2563
2564 err = -ETIMEDOUT;
2565 }
2566
2567 return err;
2568 }
2569
kbase_csf_scheduler_group_deschedule(struct kbase_queue_group * group)2570 void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
2571 {
2572 struct kbase_device *kbdev = group->kctx->kbdev;
2573 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2574 bool on_slot;
2575
2576 kbase_reset_gpu_assert_failed_or_prevented(kbdev);
2577 lockdep_assert_held(&group->kctx->csf.lock);
2578 mutex_lock(&scheduler->lock);
2579
2580 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_DESCHEDULE, group, group->run_state);
2581 if (!queue_group_scheduled_locked(group))
2582 goto unlock;
2583
2584 on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
2585
2586 #ifdef KBASE_PM_RUNTIME
2587 /* If the queue group is on slot and Scheduler is in SLEEPING state,
2588 * then we need to wait here for Scheduler to exit the sleep state
2589 * (i.e. wait for the runtime suspend or power down of GPU). This would
2590 * be better than aborting the power down. The group will be suspended
2591 * anyways on power down, so won't have to send the CSG termination
2592 * request to FW.
2593 */
2594 if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
2595 if (wait_for_scheduler_to_exit_sleep(kbdev)) {
2596 dev_warn(
2597 kbdev->dev,
2598 "Wait for scheduler to exit sleep state timedout when terminating group %d of context %d_%d on slot %d",
2599 group->handle, group->kctx->tgid,
2600 group->kctx->id, group->csg_nr);
2601
2602 scheduler_wakeup(kbdev, true);
2603
2604 /* Wait for MCU firmware to start running */
2605 if (kbase_csf_scheduler_wait_mcu_active(kbdev))
2606 dev_warn(
2607 kbdev->dev,
2608 "[%llu] Wait for MCU active failed when when terminating group %d of context %d_%d on slot %d",
2609 kbase_backend_get_cycle_cnt(kbdev),
2610 group->handle, group->kctx->tgid,
2611 group->kctx->id, group->csg_nr);
2612 }
2613
2614 /* Check the group state again as scheduler lock would have been
2615 * released when waiting for the exit from SLEEPING state.
2616 */
2617 if (!queue_group_scheduled_locked(group))
2618 goto unlock;
2619
2620 on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
2621 }
2622 #endif
2623 if (!on_slot) {
2624 sched_evict_group(group, false, true);
2625 } else {
2626 bool as_faulty;
2627
2628 term_group_sync(group);
2629 /* Treat the csg been terminated */
2630 as_faulty = cleanup_csg_slot(group);
2631 /* remove from the scheduler list */
2632 sched_evict_group(group, as_faulty, false);
2633 }
2634
2635 WARN_ON(queue_group_scheduled_locked(group));
2636
2637 unlock:
2638 mutex_unlock(&scheduler->lock);
2639 }
2640
2641 /**
2642 * scheduler_group_schedule() - Schedule a GPU command queue group on firmware
2643 *
2644 * @group: Pointer to the queue group to be scheduled.
2645 *
2646 * This function would enable the scheduling of GPU command queue group on
2647 * firmware.
2648 *
2649 * Return: 0 on success, or negative on failure.
2650 */
scheduler_group_schedule(struct kbase_queue_group * group)2651 static int scheduler_group_schedule(struct kbase_queue_group *group)
2652 {
2653 struct kbase_context *kctx = group->kctx;
2654 struct kbase_device *kbdev = kctx->kbdev;
2655 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2656
2657 lockdep_assert_held(&kctx->csf.lock);
2658 lockdep_assert_held(&scheduler->lock);
2659
2660 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SCHEDULE, group, group->run_state);
2661 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
2662 update_idle_suspended_group_state(group);
2663 else if (queue_group_idle_locked(group)) {
2664 WARN_ON(kctx->csf.sched.num_runnable_grps == 0);
2665 WARN_ON(kbdev->csf.scheduler.total_runnable_grps == 0);
2666
2667 if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_IDLE)
2668 update_idle_suspended_group_state(group);
2669 else {
2670 struct kbase_queue_group *protm_grp;
2671 unsigned long flags;
2672
2673 WARN_ON(!kbasep_csf_scheduler_group_is_on_slot_locked(
2674 group));
2675
2676 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
2677
2678 /* A normal mode CSG could be idle onslot during
2679 * protected mode. In this case clear the
2680 * appropriate bit in csg_slots_idle_mask.
2681 */
2682 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
2683 protm_grp = scheduler->active_protm_grp;
2684 if (protm_grp && protm_grp != group) {
2685 clear_bit((unsigned int)group->csg_nr,
2686 scheduler->csg_slots_idle_mask);
2687 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
2688 scheduler->csg_slots_idle_mask[0]);
2689 }
2690 spin_unlock_irqrestore(&scheduler->interrupt_lock,
2691 flags);
2692
2693 /* If GPU is in protected mode then any doorbells rang
2694 * would have no effect. Check if GPU is in protected
2695 * mode and if this group has higher priority than the
2696 * active protected mode group. If so prompt the FW
2697 * to exit protected mode.
2698 */
2699 if (protm_grp &&
2700 group->scan_seq_num < protm_grp->scan_seq_num) {
2701 /* Prompt the FW to exit protected mode */
2702 scheduler_force_protm_exit(kbdev);
2703 }
2704 }
2705 } else if (!queue_group_scheduled_locked(group)) {
2706 int new_val;
2707 insert_group_to_runnable(&kbdev->csf.scheduler, group,
2708 KBASE_CSF_GROUP_RUNNABLE);
2709 /* A new group into the scheduler */
2710 new_val = atomic_inc_return(
2711 &kbdev->csf.scheduler.non_idle_offslot_grps);
2712 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
2713 group, new_val);
2714 }
2715
2716 /* Since a group has become active now, check if GPU needs to be
2717 * powered up. Also rekick the Scheduler.
2718 */
2719 scheduler_wakeup(kbdev, true);
2720
2721 return 0;
2722 }
2723
2724 /**
2725 * set_max_csg_slots() - Set the number of available CSG slots
2726 *
2727 * @kbdev: Pointer of the GPU device.
2728 *
2729 * This function would set/limit the number of CSG slots that
2730 * can be used in the given tick/tock. It would be less than the total CSG
2731 * slots supported by firmware if the number of GPU address space slots
2732 * required to utilize all the CSG slots is more than the available
2733 * address space slots.
2734 */
set_max_csg_slots(struct kbase_device * kbdev)2735 static inline void set_max_csg_slots(struct kbase_device *kbdev)
2736 {
2737 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2738 unsigned int total_csg_slots = kbdev->csf.global_iface.group_num;
2739 unsigned int max_address_space_slots =
2740 kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS;
2741
2742 WARN_ON(scheduler->num_active_address_spaces > total_csg_slots);
2743
2744 if (likely(scheduler->num_active_address_spaces <=
2745 max_address_space_slots))
2746 scheduler->num_csg_slots_for_tick = total_csg_slots;
2747 }
2748
2749 /**
2750 * count_active_address_space() - Count the number of GPU address space slots
2751 *
2752 * @kbdev: Pointer of the GPU device.
2753 * @kctx: Pointer of the Kbase context.
2754 *
2755 * This function would update the counter that is tracking the number of GPU
2756 * address space slots that would be required to program the CS
2757 * group slots from the groups at the head of groups_to_schedule list.
2758 */
count_active_address_space(struct kbase_device * kbdev,struct kbase_context * kctx)2759 static inline void count_active_address_space(struct kbase_device *kbdev,
2760 struct kbase_context *kctx)
2761 {
2762 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2763 unsigned int total_csg_slots = kbdev->csf.global_iface.group_num;
2764 unsigned int max_address_space_slots =
2765 kbdev->nr_hw_address_spaces - NUM_RESERVED_AS_SLOTS;
2766
2767 if (scheduler->ngrp_to_schedule <= total_csg_slots) {
2768 if (kctx->csf.sched.ngrp_to_schedule == 1)
2769 scheduler->num_active_address_spaces++;
2770
2771 if (scheduler->num_active_address_spaces <=
2772 max_address_space_slots)
2773 scheduler->num_csg_slots_for_tick++;
2774 }
2775 }
2776
2777 /* Two schemes are used in assigning the priority to CSG slots for a given
2778 * CSG from the 'groups_to_schedule' list.
2779 * This is needed as an idle on-slot group is deprioritized by moving it to
2780 * the tail of 'groups_to_schedule' list. As a result it can either get
2781 * evicted from the CSG slot in current tick/tock dealing, or its position
2782 * can be after the lower priority non-idle groups in the 'groups_to_schedule'
2783 * list. The latter case can result in the on-slot subset containing both
2784 * non-idle and idle CSGs, and is handled through the 2nd scheme described
2785 * below.
2786 *
2787 * First scheme :- If all the slots are going to be occupied by the non-idle or
2788 * idle groups, then a simple assignment of the priority is done as per the
2789 * position of a group in the 'groups_to_schedule' list. So maximum priority
2790 * gets assigned to the slot of a group which is at the head of the list.
2791 * Here the 'groups_to_schedule' list would effectively be ordered as per the
2792 * static priority of groups.
2793 *
2794 * Second scheme :- If the slots are going to be occupied by a mix of idle and
2795 * non-idle groups then the priority assignment needs to ensure that the
2796 * priority of a slot belonging to a higher priority idle group will always be
2797 * greater than the priority of a slot belonging to a lower priority non-idle
2798 * group, reflecting the original position of a group in the scan order (i.e
2799 * static priority) 'scan_seq_num', which is set during the prepare phase of a
2800 * tick/tock before the group is moved to 'idle_groups_to_schedule' list if it
2801 * is idle.
2802 * The priority range [MAX_CSG_SLOT_PRIORITY, 0] is partitioned with the first
2803 * 'slots_for_tick' groups in the original scan order are assigned a priority in
2804 * the subrange [MAX_CSG_SLOT_PRIORITY, MAX_CSG_SLOT_PRIORITY - slots_for_tick),
2805 * whereas rest of the groups are assigned the priority in the subrange
2806 * [MAX_CSG_SLOT_PRIORITY - slots_for_tick, 0]. This way even if an idle higher
2807 * priority group ends up after the non-idle lower priority groups in the
2808 * 'groups_to_schedule' list, it will get a higher slot priority. And this will
2809 * enable the FW to quickly start the execution of higher priority group when it
2810 * gets de-idled.
2811 */
get_slot_priority(struct kbase_queue_group * group)2812 static u8 get_slot_priority(struct kbase_queue_group *group)
2813 {
2814 struct kbase_csf_scheduler *scheduler =
2815 &group->kctx->kbdev->csf.scheduler;
2816 u8 slot_prio;
2817 u32 slots_for_tick = scheduler->num_csg_slots_for_tick;
2818 u32 used_slots = slots_for_tick - scheduler->remaining_tick_slots;
2819 /* Check if all the slots are going to be occupied by the non-idle or
2820 * idle groups.
2821 */
2822 if (scheduler->non_idle_scanout_grps >= slots_for_tick ||
2823 !scheduler->non_idle_scanout_grps) {
2824 slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - used_slots);
2825 } else {
2826 /* There will be a mix of idle and non-idle groups. */
2827 if (group->scan_seq_num < slots_for_tick)
2828 slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY -
2829 group->scan_seq_num);
2830 else if (MAX_CSG_SLOT_PRIORITY > (slots_for_tick + used_slots))
2831 slot_prio = (u8)(MAX_CSG_SLOT_PRIORITY - (slots_for_tick + used_slots));
2832 else
2833 slot_prio = 0;
2834 }
2835 return slot_prio;
2836 }
2837
2838 /**
2839 * update_resident_groups_priority() - Update the priority of resident groups
2840 *
2841 * @kbdev: The GPU device.
2842 *
2843 * This function will update the priority of all resident queue groups
2844 * that are at the head of groups_to_schedule list, preceding the first
2845 * non-resident group.
2846 *
2847 * This function will also adjust kbase_csf_scheduler.remaining_tick_slots on
2848 * the priority update.
2849 */
update_resident_groups_priority(struct kbase_device * kbdev)2850 static void update_resident_groups_priority(struct kbase_device *kbdev)
2851 {
2852 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2853 u32 num_groups = scheduler->num_csg_slots_for_tick;
2854
2855 lockdep_assert_held(&kbdev->csf.scheduler.lock);
2856 while (!list_empty(&scheduler->groups_to_schedule)) {
2857 struct kbase_queue_group *group =
2858 list_first_entry(&scheduler->groups_to_schedule,
2859 struct kbase_queue_group,
2860 link_to_schedule);
2861 bool resident =
2862 kbasep_csf_scheduler_group_is_on_slot_locked(group);
2863
2864 if ((group->prepared_seq_num >= num_groups) || !resident)
2865 break;
2866
2867 update_csg_slot_priority(group,
2868 get_slot_priority(group));
2869
2870 /* Drop the head group from the list */
2871 remove_scheduled_group(kbdev, group);
2872 scheduler->remaining_tick_slots--;
2873 }
2874 }
2875
2876 /**
2877 * program_group_on_vacant_csg_slot() - Program a non-resident group on the
2878 * given vacant CSG slot.
2879 * @kbdev: Pointer to the GPU device.
2880 * @slot: Vacant CSG slot number.
2881 *
2882 * This function will program a non-resident group at the head of
2883 * kbase_csf_scheduler.groups_to_schedule list on the given vacant
2884 * CSG slot, provided the initial position of the non-resident
2885 * group in the list is less than the number of CSG slots and there is
2886 * an available GPU address space slot.
2887 * kbase_csf_scheduler.remaining_tick_slots would also be adjusted after
2888 * programming the slot.
2889 */
program_group_on_vacant_csg_slot(struct kbase_device * kbdev,s8 slot)2890 static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev,
2891 s8 slot)
2892 {
2893 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2894 struct kbase_queue_group *const group =
2895 list_empty(&scheduler->groups_to_schedule) ? NULL :
2896 list_first_entry(&scheduler->groups_to_schedule,
2897 struct kbase_queue_group,
2898 link_to_schedule);
2899 u32 num_groups = scheduler->num_csg_slots_for_tick;
2900
2901 lockdep_assert_held(&kbdev->csf.scheduler.lock);
2902 if (group && (group->prepared_seq_num < num_groups)) {
2903 bool ret = kbasep_csf_scheduler_group_is_on_slot_locked(group);
2904
2905 if (!WARN_ON(ret)) {
2906 if (kctx_as_enabled(group->kctx) && !group->faulted) {
2907 program_csg_slot(group, slot,
2908 get_slot_priority(group));
2909
2910 if (likely(csg_slot_in_use(kbdev, slot))) {
2911 /* Drop the head group from the list */
2912 remove_scheduled_group(kbdev, group);
2913 scheduler->remaining_tick_slots--;
2914 }
2915 } else {
2916 update_offslot_non_idle_cnt_for_faulty_grp(
2917 group);
2918 remove_scheduled_group(kbdev, group);
2919 }
2920 }
2921 }
2922 }
2923
2924 /**
2925 * program_vacant_csg_slot() - Program the vacant CSG slot with a non-resident
2926 * group and update the priority of resident groups.
2927 *
2928 * @kbdev: Pointer to the GPU device.
2929 * @slot: Vacant CSG slot number.
2930 *
2931 * This function will first update the priority of all resident queue groups
2932 * that are at the head of groups_to_schedule list, preceding the first
2933 * non-resident group, it will then try to program the given CS
2934 * group slot with the non-resident group. Finally update the priority of all
2935 * resident queue groups following the non-resident group.
2936 *
2937 * kbase_csf_scheduler.remaining_tick_slots would also be adjusted.
2938 */
program_vacant_csg_slot(struct kbase_device * kbdev,s8 slot)2939 static void program_vacant_csg_slot(struct kbase_device *kbdev, s8 slot)
2940 {
2941 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
2942 struct kbase_csf_csg_slot *const csg_slot =
2943 scheduler->csg_slots;
2944
2945 lockdep_assert_held(&kbdev->csf.scheduler.lock);
2946 WARN_ON(atomic_read(&csg_slot[slot].state) != CSG_SLOT_READY);
2947
2948 /* First update priority for already resident groups (if any)
2949 * before the non-resident group
2950 */
2951 update_resident_groups_priority(kbdev);
2952
2953 /* Now consume the vacant slot for the non-resident group */
2954 program_group_on_vacant_csg_slot(kbdev, slot);
2955
2956 /* Now update priority for already resident groups (if any)
2957 * following the non-resident group
2958 */
2959 update_resident_groups_priority(kbdev);
2960 }
2961
slots_state_changed(struct kbase_device * kbdev,unsigned long * slots_mask,bool (* state_check_func)(struct kbase_device *,s8))2962 static bool slots_state_changed(struct kbase_device *kbdev,
2963 unsigned long *slots_mask,
2964 bool (*state_check_func)(struct kbase_device *, s8))
2965 {
2966 u32 num_groups = kbdev->csf.global_iface.group_num;
2967 DECLARE_BITMAP(changed_slots, MAX_SUPPORTED_CSGS) = {0};
2968 bool changed = false;
2969 u32 i;
2970
2971 for_each_set_bit(i, slots_mask, num_groups) {
2972 if (state_check_func(kbdev, (s8)i)) {
2973 set_bit(i, changed_slots);
2974 changed = true;
2975 }
2976 }
2977
2978 if (changed)
2979 bitmap_copy(slots_mask, changed_slots, MAX_SUPPORTED_CSGS);
2980
2981 return changed;
2982 }
2983
2984 /**
2985 * program_suspending_csg_slots() - Program the CSG slots vacated on suspension
2986 * of queue groups running on them.
2987 *
2988 * @kbdev: Pointer to the GPU device.
2989 *
2990 * This function will first wait for the ongoing suspension to complete on a
2991 * CSG slot and will then program the vacant slot with the
2992 * non-resident queue group inside the groups_to_schedule list.
2993 * The programming of the non-resident queue group on the vacant slot could
2994 * fail due to unavailability of free GPU address space slot and so the
2995 * programming is re-attempted after the ongoing suspension has completed
2996 * for all the CSG slots.
2997 * The priority of resident groups before and after the non-resident group
2998 * in the groups_to_schedule list would also be updated.
2999 * This would be repeated for all the slots undergoing suspension.
3000 * GPU reset would be initiated if the wait for suspend times out.
3001 */
program_suspending_csg_slots(struct kbase_device * kbdev)3002 static void program_suspending_csg_slots(struct kbase_device *kbdev)
3003 {
3004 u32 num_groups = kbdev->csf.global_iface.group_num;
3005 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3006 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS);
3007 DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0};
3008 bool suspend_wait_failed = false;
3009 long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3010
3011 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3012
3013 /* In the current implementation, csgs_events_enable_mask would be used
3014 * only to indicate suspending CSGs.
3015 */
3016 bitmap_complement(slot_mask, scheduler->csgs_events_enable_mask,
3017 MAX_SUPPORTED_CSGS);
3018
3019 while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
3020 DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
3021
3022 bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS);
3023
3024 remaining = wait_event_timeout(kbdev->csf.event_wait,
3025 slots_state_changed(kbdev, changed,
3026 csg_slot_stopped_raw),
3027 remaining);
3028
3029 if (remaining) {
3030 u32 i;
3031
3032 for_each_set_bit(i, changed, num_groups) {
3033 struct kbase_queue_group *group =
3034 scheduler->csg_slots[i].resident_group;
3035
3036 if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) {
3037 continue;
3038 }
3039 /* The on slot csg is now stopped */
3040 clear_bit(i, slot_mask);
3041
3042 if (likely(group)) {
3043 bool as_fault;
3044 /* Only do save/cleanup if the
3045 * group is not terminated during
3046 * the sleep.
3047 */
3048 save_csg_slot(group);
3049 as_fault = cleanup_csg_slot(group);
3050 /* If AS fault detected, evict it */
3051 if (as_fault) {
3052 sched_evict_group(group, true, true);
3053 set_bit(i, evicted_mask);
3054 }
3055 }
3056
3057 program_vacant_csg_slot(kbdev, (s8)i);
3058 }
3059 } else {
3060 u32 i;
3061
3062 /* Groups that have failed to suspend in time shall
3063 * raise a fatal error as they could no longer be
3064 * safely resumed.
3065 */
3066 for_each_set_bit(i, slot_mask, num_groups) {
3067 struct kbase_queue_group *const group =
3068 scheduler->csg_slots[i].resident_group;
3069
3070 struct base_gpu_queue_group_error const
3071 err_payload = { .error_type =
3072 BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
3073 .payload = {
3074 .fatal_group = {
3075 .status =
3076 GPU_EXCEPTION_TYPE_SW_FAULT_2,
3077 } } };
3078
3079 if (unlikely(group == NULL))
3080 continue;
3081
3082 kbase_csf_add_group_fatal_error(group,
3083 &err_payload);
3084 kbase_event_wakeup(group->kctx);
3085
3086 /* TODO GPUCORE-25328: The CSG can't be
3087 * terminated, the GPU will be reset as a
3088 * work-around.
3089 */
3090 dev_warn(
3091 kbdev->dev,
3092 "[%llu] Group %d of context %d_%d on slot %u failed to suspend (timeout %d ms)",
3093 kbase_backend_get_cycle_cnt(kbdev),
3094 group->handle, group->kctx->tgid,
3095 group->kctx->id, i,
3096 kbdev->csf.fw_timeout_ms);
3097
3098 /* The group has failed suspension, stop
3099 * further examination.
3100 */
3101 clear_bit(i, slot_mask);
3102 set_bit(i, scheduler->csgs_events_enable_mask);
3103 update_offslot_non_idle_cnt_for_onslot_grp(
3104 group);
3105 }
3106
3107 suspend_wait_failed = true;
3108 }
3109 }
3110
3111 if (!bitmap_empty(evicted_mask, MAX_SUPPORTED_CSGS))
3112 dev_info(kbdev->dev, "Scheduler evicting slots: 0x%*pb\n",
3113 num_groups, evicted_mask);
3114
3115 if (likely(!suspend_wait_failed)) {
3116 u32 i;
3117
3118 while (scheduler->ngrp_to_schedule &&
3119 scheduler->remaining_tick_slots) {
3120 i = find_first_zero_bit(scheduler->csg_inuse_bitmap,
3121 num_groups);
3122 if (WARN_ON(i == num_groups))
3123 break;
3124 program_vacant_csg_slot(kbdev, (s8)i);
3125 if (!csg_slot_in_use(kbdev, (int)i)) {
3126 dev_warn(kbdev->dev, "Couldn't use CSG slot %d despite being vacant", i);
3127 break;
3128 }
3129 }
3130 } else {
3131 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
3132 kbase_reset_gpu(kbdev);
3133 }
3134 }
3135
suspend_queue_group(struct kbase_queue_group * group)3136 static void suspend_queue_group(struct kbase_queue_group *group)
3137 {
3138 unsigned long flags;
3139 struct kbase_csf_scheduler *const scheduler =
3140 &group->kctx->kbdev->csf.scheduler;
3141
3142 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3143 /* This shall be used in program_suspending_csg_slots() where we
3144 * assume that whilst CSGs are being suspended, this bitmask is not
3145 * used by anything else i.e., it indicates only the CSGs going
3146 * through suspension.
3147 */
3148 clear_bit(group->csg_nr, scheduler->csgs_events_enable_mask);
3149 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3150
3151 /* If AS fault detected, terminate the group */
3152 if (!kctx_as_enabled(group->kctx) || group->faulted)
3153 term_csg_slot(group);
3154 else
3155 suspend_csg_slot(group);
3156 }
3157
wait_csg_slots_start(struct kbase_device * kbdev)3158 static void wait_csg_slots_start(struct kbase_device *kbdev)
3159 {
3160 u32 num_groups = kbdev->csf.global_iface.group_num;
3161 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3162 long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3163 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
3164 u32 i;
3165
3166 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3167
3168 /* extract start slot flags for check */
3169 for (i = 0; i < num_groups; i++) {
3170 if (atomic_read(&scheduler->csg_slots[i].state) ==
3171 CSG_SLOT_READY2RUN)
3172 set_bit(i, slot_mask);
3173 }
3174
3175 while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
3176 DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
3177
3178 bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS);
3179
3180 remaining = wait_event_timeout(kbdev->csf.event_wait,
3181 slots_state_changed(kbdev, changed, csg_slot_running),
3182 remaining);
3183
3184 if (remaining) {
3185 for_each_set_bit(i, changed, num_groups) {
3186 struct kbase_queue_group *group =
3187 scheduler->csg_slots[i].resident_group;
3188
3189 /* The on slot csg is now running */
3190 clear_bit(i, slot_mask);
3191 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
3192 }
3193 } else {
3194 dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to start, slots: 0x%*pb\n",
3195 kbase_backend_get_cycle_cnt(kbdev),
3196 kbdev->csf.fw_timeout_ms,
3197 num_groups, slot_mask);
3198
3199 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
3200 kbase_reset_gpu(kbdev);
3201 break;
3202 }
3203 }
3204 }
3205
3206 /**
3207 * group_on_slot_is_idle() - Check if the given slot has a CSG-idle state
3208 * flagged after the completion of a CSG status
3209 * update command
3210 *
3211 * @kbdev: Pointer to the GPU device.
3212 * @slot: The given slot for checking an occupying resident group's idle
3213 * state.
3214 *
3215 * This function is called at the start of scheduling tick to check the
3216 * idle status of a queue group resident on a CSG slot.
3217 * The caller must make sure the corresponding status update command has
3218 * been called and completed before checking this status.
3219 *
3220 * Return: true if the group resident on slot is idle, otherwise false.
3221 */
group_on_slot_is_idle(struct kbase_device * kbdev,unsigned long slot)3222 static bool group_on_slot_is_idle(struct kbase_device *kbdev,
3223 unsigned long slot)
3224 {
3225 struct kbase_csf_cmd_stream_group_info *ginfo =
3226 &kbdev->csf.global_iface.groups[slot];
3227 bool idle = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
3228 CSG_STATUS_STATE_IDLE_MASK;
3229
3230 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3231
3232 return idle;
3233 }
3234
3235 /**
3236 * slots_update_state_changed() - Check the handshake state of a subset of
3237 * command group slots.
3238 *
3239 * @kbdev: The GPU device.
3240 * @field_mask: The field mask for checking the state in the csg_req/ack.
3241 * @slots_mask: A bit_map specifying the slots to check.
3242 * @slots_done: A cleared bit_map for returning the slots that
3243 * have finished update.
3244 *
3245 * Checks the state of a subset of slots selected through the slots_mask
3246 * bit_map. Records which slots' handshake completed and send it back in the
3247 * slots_done bit_map.
3248 *
3249 * Return: true if the slots_done is set for at least one slot.
3250 * Otherwise false.
3251 */
3252 static
slots_update_state_changed(struct kbase_device * kbdev,u32 field_mask,const unsigned long * slots_mask,unsigned long * slots_done)3253 bool slots_update_state_changed(struct kbase_device *kbdev, u32 field_mask,
3254 const unsigned long *slots_mask, unsigned long *slots_done)
3255 {
3256 u32 num_groups = kbdev->csf.global_iface.group_num;
3257 bool changed = false;
3258 u32 i;
3259
3260 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3261
3262 for_each_set_bit(i, slots_mask, num_groups) {
3263 struct kbase_csf_cmd_stream_group_info const *const ginfo =
3264 &kbdev->csf.global_iface.groups[i];
3265 u32 state = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
3266
3267 state ^= kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
3268
3269 if (!(state & field_mask)) {
3270 set_bit(i, slots_done);
3271 changed = true;
3272 }
3273 }
3274
3275 return changed;
3276 }
3277
3278 /**
3279 * wait_csg_slots_handshake_ack - Wait the req/ack handshakes to complete on
3280 * the specified groups.
3281 *
3282 * @kbdev: Pointer to the GPU device.
3283 * @field_mask: The field mask for checking the state in the csg_req/ack.
3284 * @slot_mask: Bitmap reflecting the slots, the function will modify
3285 * the acknowledged slots by clearing their corresponding
3286 * bits.
3287 * @wait_in_jiffies: Wait duration in jiffies, controlling the time-out.
3288 *
3289 * This function waits for the acknowledgment of the request that have
3290 * already been placed for the CSG slots by the caller. Currently used for
3291 * the CSG priority update and status update requests.
3292 *
3293 * Return: 0 on all specified slots acknowledged; otherwise -ETIMEDOUT. For
3294 * timed out condition with unacknowledged slots, their bits remain
3295 * set in the slot_mask.
3296 */
wait_csg_slots_handshake_ack(struct kbase_device * kbdev,u32 field_mask,unsigned long * slot_mask,long wait_in_jiffies)3297 static int wait_csg_slots_handshake_ack(struct kbase_device *kbdev,
3298 u32 field_mask, unsigned long *slot_mask, long wait_in_jiffies)
3299 {
3300 const u32 num_groups = kbdev->csf.global_iface.group_num;
3301 long remaining = wait_in_jiffies;
3302
3303 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3304
3305 while (!bitmap_empty(slot_mask, num_groups) &&
3306 !kbase_reset_gpu_is_active(kbdev)) {
3307 DECLARE_BITMAP(dones, MAX_SUPPORTED_CSGS) = { 0 };
3308
3309 remaining = wait_event_timeout(kbdev->csf.event_wait,
3310 slots_update_state_changed(kbdev, field_mask,
3311 slot_mask, dones),
3312 remaining);
3313
3314 if (remaining)
3315 bitmap_andnot(slot_mask, slot_mask, dones, num_groups);
3316 else {
3317
3318
3319 /* Timed-out on the wait */
3320 return -ETIMEDOUT;
3321 }
3322 }
3323
3324 return 0;
3325 }
3326
wait_csg_slots_finish_prio_update(struct kbase_device * kbdev)3327 static void wait_csg_slots_finish_prio_update(struct kbase_device *kbdev)
3328 {
3329 unsigned long *slot_mask =
3330 kbdev->csf.scheduler.csg_slots_prio_update;
3331 long wait_time = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3332 int ret = wait_csg_slots_handshake_ack(kbdev, CSG_REQ_EP_CFG_MASK,
3333 slot_mask, wait_time);
3334
3335 lockdep_assert_held(&kbdev->csf.scheduler.lock);
3336
3337 if (ret != 0) {
3338 /* The update timeout is not regarded as a serious
3339 * issue, no major consequences are expected as a
3340 * result, so just warn the case.
3341 */
3342 dev_warn(
3343 kbdev->dev,
3344 "[%llu] Timeout (%d ms) on CSG_REQ:EP_CFG, skipping the update wait: slot mask=0x%lx",
3345 kbase_backend_get_cycle_cnt(kbdev),
3346 kbdev->csf.fw_timeout_ms,
3347 slot_mask[0]);
3348 }
3349 }
3350
kbase_csf_scheduler_evict_ctx_slots(struct kbase_device * kbdev,struct kbase_context * kctx,struct list_head * evicted_groups)3351 void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
3352 struct kbase_context *kctx, struct list_head *evicted_groups)
3353 {
3354 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3355 struct kbase_queue_group *group;
3356 u32 num_groups = kbdev->csf.global_iface.group_num;
3357 u32 slot;
3358 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
3359
3360 lockdep_assert_held(&kctx->csf.lock);
3361 mutex_lock(&scheduler->lock);
3362
3363 /* This code is only called during reset, so we don't wait for the CSG
3364 * slots to be stopped
3365 */
3366 WARN_ON(!kbase_reset_gpu_is_active(kbdev));
3367
3368 KBASE_KTRACE_ADD(kbdev, EVICT_CTX_SLOTS, kctx, 0u);
3369 for (slot = 0; slot < num_groups; slot++) {
3370 group = kbdev->csf.scheduler.csg_slots[slot].resident_group;
3371 if (group && group->kctx == kctx) {
3372 bool as_fault;
3373
3374 term_csg_slot(group);
3375 as_fault = cleanup_csg_slot(group);
3376 /* remove the group from the scheduler list */
3377 sched_evict_group(group, as_fault, false);
3378 /* return the evicted group to the caller */
3379 list_add_tail(&group->link, evicted_groups);
3380 set_bit(slot, slot_mask);
3381 }
3382 }
3383
3384 dev_info(kbdev->dev, "Evicting context %d_%d slots: 0x%*pb\n",
3385 kctx->tgid, kctx->id, num_groups, slot_mask);
3386
3387 mutex_unlock(&scheduler->lock);
3388 }
3389
3390 /**
3391 * scheduler_slot_protm_ack - Acknowledging the protected region requests
3392 * from the resident group on a given slot.
3393 *
3394 * @kbdev: Pointer to the GPU device.
3395 * @group: Pointer to the resident group on the given slot.
3396 * @slot: The slot that the given group is actively operating on.
3397 *
3398 * The function assumes that the given slot is in stable running state and
3399 * has already been judged by the caller on that any pending protected region
3400 * requests of the resident group should be acknowledged.
3401 *
3402 * Return: true if the group has pending protm request(s) and is acknowledged.
3403 * The caller should arrange to enter the protected mode for servicing
3404 * it. Otherwise return false, indicating the group has no pending protm
3405 * request.
3406 */
scheduler_slot_protm_ack(struct kbase_device * const kbdev,struct kbase_queue_group * const group,const int slot)3407 static bool scheduler_slot_protm_ack(struct kbase_device *const kbdev,
3408 struct kbase_queue_group *const group,
3409 const int slot)
3410 {
3411 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
3412 bool protm_ack = false;
3413 struct kbase_csf_cmd_stream_group_info *ginfo =
3414 &kbdev->csf.global_iface.groups[slot];
3415 u32 max_csi;
3416 int i;
3417
3418 if (WARN_ON(scheduler->csg_slots[slot].resident_group != group))
3419 return protm_ack;
3420
3421 lockdep_assert_held(&scheduler->lock);
3422 lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.interrupt_lock);
3423
3424 max_csi = ginfo->stream_num;
3425 for (i = find_first_bit(group->protm_pending_bitmap, max_csi);
3426 i < max_csi;
3427 i = find_next_bit(group->protm_pending_bitmap, max_csi, i + 1)) {
3428 struct kbase_queue *queue = group->bound_queues[i];
3429
3430 clear_bit(i, group->protm_pending_bitmap);
3431 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_CLEAR, group,
3432 queue, group->protm_pending_bitmap[0]);
3433
3434 if (!WARN_ON(!queue) && queue->enabled) {
3435 struct kbase_csf_cmd_stream_info *stream =
3436 &ginfo->streams[i];
3437 u32 cs_protm_ack = kbase_csf_firmware_cs_output(
3438 stream, CS_ACK) &
3439 CS_ACK_PROTM_PEND_MASK;
3440 u32 cs_protm_req = kbase_csf_firmware_cs_input_read(
3441 stream, CS_REQ) &
3442 CS_REQ_PROTM_PEND_MASK;
3443
3444 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_ACK, group,
3445 queue, cs_protm_ack ^ cs_protm_req);
3446
3447 if (cs_protm_ack == cs_protm_req) {
3448 dev_dbg(kbdev->dev,
3449 "PROTM-ack already done for queue-%d group-%d slot-%d",
3450 queue->csi_index, group->handle, slot);
3451 continue;
3452 }
3453
3454 kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
3455 cs_protm_ack,
3456 CS_ACK_PROTM_PEND_MASK);
3457 protm_ack = true;
3458 dev_dbg(kbdev->dev,
3459 "PROTM-ack for queue-%d, group-%d slot-%d",
3460 queue->csi_index, group->handle, slot);
3461 }
3462 }
3463
3464 return protm_ack;
3465 }
3466
3467 /**
3468 * scheduler_group_check_protm_enter - Request the given group to be evaluated
3469 * for triggering the protected mode.
3470 *
3471 * @kbdev: Pointer to the GPU device.
3472 * @input_grp: Pointer to the GPU queue group.
3473 *
3474 * The function assumes the given group is either an active running group or
3475 * the scheduler internally maintained field scheduler->top_grp.
3476 *
3477 * If the GPU is not already running in protected mode and the input group
3478 * has protected region requests from its bound queues, the requests are
3479 * acknowledged and the GPU is instructed to enter the protected mode.
3480 */
scheduler_group_check_protm_enter(struct kbase_device * const kbdev,struct kbase_queue_group * const input_grp)3481 static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
3482 struct kbase_queue_group *const input_grp)
3483 {
3484 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3485 unsigned long flags;
3486 bool protm_in_use;
3487
3488 lockdep_assert_held(&scheduler->lock);
3489
3490 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3491
3492 /* Check if the previous transition to enter & exit the protected
3493 * mode has completed or not.
3494 */
3495 protm_in_use = kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
3496 kbdev->protected_mode;
3497 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_CHECK_PROTM_ENTER, input_grp,
3498 protm_in_use);
3499
3500 /* Firmware samples the PROTM_PEND ACK bit for CSs when
3501 * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit
3502 * is set for a CS after Host has sent the PROTM_ENTER
3503 * Global request, then there is no guarantee that firmware will
3504 * notice that prior to switching to protected mode. And firmware
3505 * may not again raise the PROTM_PEND interrupt for that CS
3506 * later on. To avoid that uncertainty PROTM_PEND ACK bit
3507 * is not set for a CS if the request to enter protected
3508 * mode has already been sent. It will be set later (after the exit
3509 * from protected mode has taken place) when the group to which
3510 * CS is bound becomes the top group.
3511 *
3512 * The actual decision of entering protected mode is hinging on the
3513 * input group is the top priority group, or, in case the previous
3514 * top-group is evicted from the scheduler during the tick, its would
3515 * be replacement, and that it is currently in a stable state (i.e. the
3516 * slot state is running).
3517 */
3518 if (!protm_in_use && !WARN_ON(!input_grp)) {
3519 const int slot =
3520 kbase_csf_scheduler_group_get_slot_locked(input_grp);
3521
3522 /* check the input_grp is running and requesting protected mode
3523 */
3524 if (slot >= 0 &&
3525 atomic_read(&scheduler->csg_slots[slot].state) ==
3526 CSG_SLOT_RUNNING) {
3527 if (kctx_as_enabled(input_grp->kctx) &&
3528 scheduler_slot_protm_ack(kbdev, input_grp, slot)) {
3529 /* Option of acknowledging to multiple
3530 * CSGs from the same kctx is dropped,
3531 * after consulting with the
3532 * architecture team. See the comment in
3533 * GPUCORE-21394.
3534 */
3535
3536 /* Disable the idle timer */
3537 disable_gpu_idle_fw_timer_locked(kbdev);
3538
3539 /* Switch to protected mode */
3540 scheduler->active_protm_grp = input_grp;
3541 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM,
3542 input_grp, 0u);
3543
3544 kbase_csf_enter_protected_mode(kbdev);
3545 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3546
3547 kbase_csf_wait_protected_mode_enter(kbdev);
3548 return;
3549 }
3550 }
3551 }
3552
3553 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3554 }
3555
scheduler_apply(struct kbase_device * kbdev)3556 static void scheduler_apply(struct kbase_device *kbdev)
3557 {
3558 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3559 const u32 total_csg_slots = kbdev->csf.global_iface.group_num;
3560 const u32 available_csg_slots = scheduler->num_csg_slots_for_tick;
3561 u32 suspend_cnt = 0;
3562 u32 remain_cnt = 0;
3563 u32 resident_cnt = 0;
3564 struct kbase_queue_group *group;
3565 u32 i;
3566 u32 spare;
3567
3568 lockdep_assert_held(&scheduler->lock);
3569
3570 /* Suspend those resident groups not in the run list */
3571 for (i = 0; i < total_csg_slots; i++) {
3572 group = scheduler->csg_slots[i].resident_group;
3573 if (group) {
3574 resident_cnt++;
3575 if (group->prepared_seq_num >= available_csg_slots) {
3576 suspend_queue_group(group);
3577 suspend_cnt++;
3578 } else
3579 remain_cnt++;
3580 }
3581 }
3582
3583 /* Initialize the remaining available csg slots for the tick/tock */
3584 scheduler->remaining_tick_slots = available_csg_slots;
3585
3586 /* If there are spare slots, apply heads in the list */
3587 spare = (available_csg_slots > resident_cnt) ?
3588 (available_csg_slots - resident_cnt) : 0;
3589 while (!list_empty(&scheduler->groups_to_schedule)) {
3590 group = list_first_entry(&scheduler->groups_to_schedule,
3591 struct kbase_queue_group,
3592 link_to_schedule);
3593
3594 if (kbasep_csf_scheduler_group_is_on_slot_locked(group) &&
3595 group->prepared_seq_num < available_csg_slots) {
3596 /* One of the resident remainders */
3597 update_csg_slot_priority(group,
3598 get_slot_priority(group));
3599 } else if (spare != 0) {
3600 s8 slot = (s8)find_first_zero_bit(
3601 kbdev->csf.scheduler.csg_inuse_bitmap,
3602 total_csg_slots);
3603
3604 if (WARN_ON(slot >= (s8)total_csg_slots))
3605 break;
3606
3607 if (!kctx_as_enabled(group->kctx) || group->faulted) {
3608 /* Drop the head group and continue */
3609 update_offslot_non_idle_cnt_for_faulty_grp(
3610 group);
3611 remove_scheduled_group(kbdev, group);
3612 continue;
3613 }
3614 program_csg_slot(group, slot,
3615 get_slot_priority(group));
3616 if (unlikely(!csg_slot_in_use(kbdev, slot)))
3617 break;
3618
3619 spare--;
3620 } else
3621 break;
3622
3623 /* Drop the head csg from the list */
3624 remove_scheduled_group(kbdev, group);
3625 if (!WARN_ON(!scheduler->remaining_tick_slots))
3626 scheduler->remaining_tick_slots--;
3627 }
3628
3629 /* Dealing with groups currently going through suspend */
3630 program_suspending_csg_slots(kbdev);
3631 }
3632
scheduler_ctx_scan_groups(struct kbase_device * kbdev,struct kbase_context * kctx,int priority)3633 static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
3634 struct kbase_context *kctx, int priority)
3635 {
3636 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3637 struct kbase_queue_group *group;
3638
3639 lockdep_assert_held(&scheduler->lock);
3640 if (WARN_ON(priority < 0) ||
3641 WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
3642 return;
3643
3644 if (!kctx_as_enabled(kctx))
3645 return;
3646
3647 list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority],
3648 link) {
3649 if (WARN_ON(!list_empty(&group->link_to_schedule)))
3650 /* This would be a bug */
3651 list_del_init(&group->link_to_schedule);
3652
3653 if (unlikely(group->faulted))
3654 continue;
3655
3656 /* Set the scanout sequence number, starting from 0 */
3657 group->scan_seq_num = scheduler->csg_scan_count_for_tick++;
3658
3659 if (queue_group_idle_locked(group)) {
3660 if (on_slot_group_idle_locked(group))
3661 list_add_tail(&group->link_to_schedule,
3662 &scheduler->idle_groups_to_schedule);
3663 continue;
3664 }
3665
3666 if (!scheduler->ngrp_to_schedule) {
3667 /* keep the top csg's origin */
3668 scheduler->top_ctx = kctx;
3669 scheduler->top_grp = group;
3670 }
3671
3672 list_add_tail(&group->link_to_schedule,
3673 &scheduler->groups_to_schedule);
3674 group->prepared_seq_num = scheduler->ngrp_to_schedule++;
3675
3676 kctx->csf.sched.ngrp_to_schedule++;
3677 count_active_address_space(kbdev, kctx);
3678 }
3679 }
3680
3681 /**
3682 * scheduler_rotate_groups() - Rotate the runnable queue groups to provide
3683 * fairness of scheduling within a single
3684 * kbase_context.
3685 *
3686 * @kbdev: Pointer to the GPU device.
3687 *
3688 * Since only kbase_csf_scheduler's top_grp (i.e. the queue group assigned
3689 * the highest slot priority) is guaranteed to get the resources that it
3690 * needs we only rotate the kbase_context corresponding to it -
3691 * kbase_csf_scheduler's top_ctx.
3692 *
3693 * The priority level chosen for rotation is the one containing the previous
3694 * scheduling cycle's kbase_csf_scheduler's top_grp.
3695 *
3696 * In a 'fresh-slice-cycle' this always corresponds to the highest group
3697 * priority in use by kbase_csf_scheduler's top_ctx. That is, it's the priority
3698 * level of the previous scheduling cycle's first runnable kbase_context.
3699 *
3700 * We choose this priority level because when higher priority work is
3701 * scheduled, we should always cause the scheduler to run and do a scan. The
3702 * scan always enumerates the highest priority work first (whether that be
3703 * based on process priority or group priority), and thus
3704 * kbase_csf_scheduler's top_grp will point to the first of those high priority
3705 * groups, which necessarily must be the highest priority group in
3706 * kbase_csf_scheduler's top_ctx. The fresh-slice-cycle will run later and pick
3707 * up that group appropriately.
3708 *
3709 * If kbase_csf_scheduler's top_grp was instead evicted (and thus is NULL),
3710 * then no explicit rotation occurs on the next fresh-slice-cycle schedule, but
3711 * will set up kbase_csf_scheduler's top_ctx again for the next scheduling
3712 * cycle. Implicitly, a rotation had already occurred by removing
3713 * the kbase_csf_scheduler's top_grp
3714 *
3715 * If kbase_csf_scheduler's top_grp became idle and all other groups belonging
3716 * to kbase_csf_scheduler's top_grp's priority level in kbase_csf_scheduler's
3717 * top_ctx are also idle, then the effect of this will be to rotate idle
3718 * groups, which might not actually become resident in the next
3719 * scheduling slice. However this is acceptable since a queue group becoming
3720 * idle is implicitly a rotation (as above with evicted queue groups), as it
3721 * automatically allows a new queue group to take the maximum slot priority
3722 * whilst the idle kbase_csf_scheduler's top_grp ends up near the back of
3723 * the kbase_csf_scheduler's groups_to_schedule list. In this example, it will
3724 * be for a group in the next lowest priority level or in absence of those the
3725 * next kbase_context's queue groups.
3726 */
scheduler_rotate_groups(struct kbase_device * kbdev)3727 static void scheduler_rotate_groups(struct kbase_device *kbdev)
3728 {
3729 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3730 struct kbase_context *const top_ctx = scheduler->top_ctx;
3731 struct kbase_queue_group *const top_grp = scheduler->top_grp;
3732
3733 lockdep_assert_held(&scheduler->lock);
3734 if (top_ctx && top_grp) {
3735 struct list_head *list =
3736 &top_ctx->csf.sched.runnable_groups[top_grp->priority];
3737
3738 WARN_ON(top_grp->kctx != top_ctx);
3739 if (!WARN_ON(list_empty(list))) {
3740 struct kbase_queue_group *new_head_grp;
3741 list_move_tail(&top_grp->link, list);
3742 new_head_grp = (!list_empty(list)) ?
3743 list_first_entry(list, struct kbase_queue_group, link) :
3744 NULL;
3745 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_ROTATE_RUNNABLE,
3746 top_grp, top_ctx->csf.sched.num_runnable_grps);
3747 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_HEAD_RUNNABLE,
3748 new_head_grp, 0u);
3749 dev_dbg(kbdev->dev,
3750 "groups rotated for a context, num_runnable_groups: %u\n",
3751 scheduler->top_ctx->csf.sched.num_runnable_grps);
3752 }
3753 }
3754 }
3755
scheduler_rotate_ctxs(struct kbase_device * kbdev)3756 static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
3757 {
3758 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3759 struct list_head *list = &scheduler->runnable_kctxs;
3760
3761 lockdep_assert_held(&scheduler->lock);
3762 if (scheduler->top_ctx) {
3763 if (!WARN_ON(list_empty(list))) {
3764 struct kbase_context *pos;
3765 bool found = false;
3766
3767 /* Locate the ctx on the list */
3768 list_for_each_entry(pos, list, csf.link) {
3769 if (scheduler->top_ctx == pos) {
3770 found = true;
3771 break;
3772 }
3773 }
3774
3775 if (!WARN_ON(!found)) {
3776 struct kbase_context *new_head_kctx;
3777 list_move_tail(&pos->csf.link, list);
3778 KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos,
3779 0u);
3780 new_head_kctx = (!list_empty(list)) ?
3781 list_first_entry(list, struct kbase_context, csf.link) :
3782 NULL;
3783 KBASE_KTRACE_ADD(kbdev, SCHEDULER_HEAD_RUNNABLE,
3784 new_head_kctx, 0u);
3785 dev_dbg(kbdev->dev, "contexts rotated\n");
3786 }
3787 }
3788 }
3789 }
3790
3791 /**
3792 * scheduler_update_idle_slots_status() - Get the status update for the CSG
3793 * slots for which the IDLE notification was received
3794 * previously.
3795 *
3796 * @kbdev: Pointer to the GPU device.
3797 * @csg_bitmap: Bitmap of the CSG slots for which
3798 * the status update request completed successfully.
3799 * @failed_csg_bitmap: Bitmap of the CSG slots for which
3800 * the status update request timedout.
3801 *
3802 * This function sends a CSG status update request for all the CSG slots
3803 * present in the bitmap scheduler->csg_slots_idle_mask and wait for the
3804 * request to complete.
3805 * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
3806 * this function.
3807 */
scheduler_update_idle_slots_status(struct kbase_device * kbdev,unsigned long * csg_bitmap,unsigned long * failed_csg_bitmap)3808 static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
3809 unsigned long *csg_bitmap, unsigned long *failed_csg_bitmap)
3810 {
3811 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
3812 const u32 num_groups = kbdev->csf.global_iface.group_num;
3813 struct kbase_csf_global_iface *const global_iface =
3814 &kbdev->csf.global_iface;
3815 unsigned long flags, i;
3816
3817 lockdep_assert_held(&scheduler->lock);
3818
3819 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3820 for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
3821 struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
3822 struct kbase_queue_group *group = csg_slot->resident_group;
3823 struct kbase_csf_cmd_stream_group_info *const ginfo =
3824 &global_iface->groups[i];
3825 u32 csg_req;
3826
3827 clear_bit(i, scheduler->csg_slots_idle_mask);
3828 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
3829 scheduler->csg_slots_idle_mask[0]);
3830 if (WARN_ON(!group))
3831 continue;
3832
3833 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STATUS_UPDATE, group,
3834 i);
3835
3836 csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
3837 csg_req ^= CSG_REQ_STATUS_UPDATE_MASK;
3838 kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
3839 CSG_REQ_STATUS_UPDATE_MASK);
3840
3841 set_bit(i, csg_bitmap);
3842 }
3843 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3844
3845 /* The groups are aggregated into a single kernel doorbell request */
3846 if (!bitmap_empty(csg_bitmap, num_groups)) {
3847 long wt =
3848 kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
3849 u32 db_slots = (u32)csg_bitmap[0];
3850
3851 kbase_csf_ring_csg_slots_doorbell(kbdev, db_slots);
3852
3853 if (wait_csg_slots_handshake_ack(kbdev,
3854 CSG_REQ_STATUS_UPDATE_MASK, csg_bitmap, wt)) {
3855 dev_warn(
3856 kbdev->dev,
3857 "[%llu] Timeout (%d ms) on CSG_REQ:STATUS_UPDATE, treat groups as not idle: slot mask=0x%lx",
3858 kbase_backend_get_cycle_cnt(kbdev),
3859 kbdev->csf.fw_timeout_ms,
3860 csg_bitmap[0]);
3861
3862 /* Store the bitmap of timed out slots */
3863 bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups);
3864 csg_bitmap[0] = ~csg_bitmap[0] & db_slots;
3865 } else {
3866 KBASE_KTRACE_ADD(kbdev, SLOTS_STATUS_UPDATE_ACK, NULL,
3867 db_slots);
3868 csg_bitmap[0] = db_slots;
3869 }
3870 }
3871 }
3872
3873 /**
3874 * scheduler_handle_idle_slots() - Update the idle status of queue groups
3875 * resident on CSG slots for which the
3876 * IDLE notification was received previously.
3877 *
3878 * @kbdev: Pointer to the GPU device.
3879 *
3880 * This function is called at the start of scheduling tick/tock to reconfirm
3881 * the idle status of queue groups resident on CSG slots for
3882 * which idle notification was received previously, i.e. all the CSG slots
3883 * present in the bitmap scheduler->csg_slots_idle_mask.
3884 * The confirmation is done by sending the CSG status update request to the
3885 * firmware. On completion, the firmware will mark the idleness at the
3886 * slot's interface CSG_STATUS_STATE register accordingly.
3887 *
3888 * The run state of the groups resident on still idle CSG slots is changed to
3889 * KBASE_CSF_GROUP_IDLE and the bitmap scheduler->csg_slots_idle_mask is
3890 * updated accordingly.
3891 * The bits corresponding to slots for which the status update request timedout
3892 * remain set in scheduler->csg_slots_idle_mask.
3893 */
scheduler_handle_idle_slots(struct kbase_device * kbdev)3894 static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
3895 {
3896 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3897 u32 num_groups = kbdev->csf.global_iface.group_num;
3898 unsigned long flags, i;
3899 DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
3900 DECLARE_BITMAP(failed_csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
3901
3902 lockdep_assert_held(&scheduler->lock);
3903
3904 scheduler_update_idle_slots_status(kbdev, csg_bitmap,
3905 failed_csg_bitmap);
3906
3907 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
3908 for_each_set_bit(i, csg_bitmap, num_groups) {
3909 struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
3910 struct kbase_queue_group *group = csg_slot->resident_group;
3911
3912 if (WARN_ON(atomic_read(&csg_slot->state) != CSG_SLOT_RUNNING))
3913 continue;
3914 if (WARN_ON(!group))
3915 continue;
3916 if (WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE &&
3917 group->run_state != KBASE_CSF_GROUP_IDLE))
3918 continue;
3919 if (WARN_ON(group->priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
3920 continue;
3921
3922 if (group_on_slot_is_idle(kbdev, i)) {
3923 group->run_state = KBASE_CSF_GROUP_IDLE;
3924 set_bit(i, scheduler->csg_slots_idle_mask);
3925 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET,
3926 group, scheduler->csg_slots_idle_mask[0]);
3927 } else
3928 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
3929 }
3930
3931 bitmap_or(scheduler->csg_slots_idle_mask,
3932 scheduler->csg_slots_idle_mask,
3933 failed_csg_bitmap, num_groups);
3934 KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, NULL,
3935 scheduler->csg_slots_idle_mask[0]);
3936 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
3937 }
3938
scheduler_scan_idle_groups(struct kbase_device * kbdev)3939 static void scheduler_scan_idle_groups(struct kbase_device *kbdev)
3940 {
3941 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3942 struct kbase_queue_group *group, *n;
3943
3944 list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule,
3945 link_to_schedule) {
3946
3947 WARN_ON(!on_slot_group_idle_locked(group));
3948
3949 if (!scheduler->ngrp_to_schedule) {
3950 /* keep the top csg's origin */
3951 scheduler->top_ctx = group->kctx;
3952 scheduler->top_grp = group;
3953 }
3954
3955 group->prepared_seq_num = scheduler->ngrp_to_schedule++;
3956 list_move_tail(&group->link_to_schedule,
3957 &scheduler->groups_to_schedule);
3958
3959 group->kctx->csf.sched.ngrp_to_schedule++;
3960 count_active_address_space(kbdev, group->kctx);
3961 }
3962 }
3963
scheduler_rotate(struct kbase_device * kbdev)3964 static void scheduler_rotate(struct kbase_device *kbdev)
3965 {
3966 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3967
3968 lockdep_assert_held(&scheduler->lock);
3969
3970 /* Dealing with rotation */
3971 scheduler_rotate_groups(kbdev);
3972 scheduler_rotate_ctxs(kbdev);
3973 }
3974
get_tock_top_group(struct kbase_csf_scheduler * const scheduler)3975 static struct kbase_queue_group *get_tock_top_group(
3976 struct kbase_csf_scheduler *const scheduler)
3977 {
3978 struct kbase_context *kctx;
3979 int i;
3980
3981 lockdep_assert_held(&scheduler->lock);
3982 for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
3983 list_for_each_entry(kctx,
3984 &scheduler->runnable_kctxs, csf.link) {
3985 struct kbase_queue_group *group;
3986
3987 list_for_each_entry(group,
3988 &kctx->csf.sched.runnable_groups[i],
3989 link) {
3990 if (queue_group_idle_locked(group))
3991 continue;
3992
3993 return group;
3994 }
3995 }
3996 }
3997
3998 return NULL;
3999 }
4000
4001 /**
4002 * suspend_active_groups_on_powerdown() - Suspend active CSG groups upon
4003 * suspend or GPU IDLE.
4004 *
4005 * @kbdev: Pointer to the device
4006 * @system_suspend: Flag to indicate it's for system suspend.
4007 *
4008 * This function will suspend all active CSG groups upon either
4009 * system suspend, runtime suspend or GPU IDLE.
4010 *
4011 * Return: 0 on success, -1 otherwise.
4012 */
suspend_active_groups_on_powerdown(struct kbase_device * kbdev,bool system_suspend)4013 static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
4014 bool system_suspend)
4015 {
4016 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4017 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
4018
4019 int ret = suspend_active_queue_groups(kbdev, slot_mask);
4020
4021 if (ret) {
4022 /* The suspend of CSGs failed,
4023 * trigger the GPU reset to be in a deterministic state.
4024 */
4025 dev_warn(kbdev->dev, "[%llu] Timeout (%d ms) waiting for CSG slots to suspend on power down, slot_mask: 0x%*pb\n",
4026 kbase_backend_get_cycle_cnt(kbdev),
4027 kbdev->csf.fw_timeout_ms,
4028 kbdev->csf.global_iface.group_num, slot_mask);
4029
4030 if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
4031 kbase_reset_gpu(kbdev);
4032
4033 return -1;
4034 }
4035
4036 /* Check if the groups became active whilst the suspend was ongoing,
4037 * but only for the case where the system suspend is not in progress
4038 */
4039 if (!system_suspend && atomic_read(&scheduler->non_idle_offslot_grps))
4040 return -1;
4041
4042 return 0;
4043 }
4044
scheduler_idle_suspendable(struct kbase_device * kbdev)4045 static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
4046 {
4047 bool suspend;
4048 unsigned long flags;
4049 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4050
4051 lockdep_assert_held(&scheduler->lock);
4052
4053 if ((scheduler->state == SCHED_SUSPENDED) ||
4054 (scheduler->state == SCHED_SLEEPING))
4055 return false;
4056
4057 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
4058 if (scheduler->total_runnable_grps) {
4059 spin_lock(&scheduler->interrupt_lock);
4060
4061 /* Check both on-slots and off-slots groups idle status */
4062 suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) &&
4063 !atomic_read(&scheduler->non_idle_offslot_grps) &&
4064 kbase_pm_idle_groups_sched_suspendable(kbdev);
4065
4066 spin_unlock(&scheduler->interrupt_lock);
4067 } else
4068 suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
4069
4070 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
4071
4072 return suspend;
4073 }
4074
4075 #ifdef KBASE_PM_RUNTIME
4076 /**
4077 * scheduler_sleep_on_idle - Put the Scheduler in sleeping state on GPU
4078 * becoming idle.
4079 *
4080 * @kbdev: Pointer to the device.
4081 *
4082 * This function is called on GPU idle notification to trigger the transition of
4083 * GPU to sleep state, where MCU firmware pauses execution and L2 cache is
4084 * turned off. Scheduler's state is changed to sleeping and all the active queue
4085 * groups remain on the CSG slots.
4086 */
scheduler_sleep_on_idle(struct kbase_device * kbdev)4087 static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
4088 {
4089 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4090
4091 lockdep_assert_held(&scheduler->lock);
4092
4093 dev_dbg(kbdev->dev,
4094 "Scheduler to be put to sleep on GPU becoming idle");
4095 cancel_tick_timer(kbdev);
4096 scheduler_pm_idle_before_sleep(kbdev);
4097 scheduler->state = SCHED_SLEEPING;
4098 }
4099 #endif
4100
4101 /**
4102 * scheduler_suspend_on_idle - Put the Scheduler in suspended state on GPU
4103 * becoming idle.
4104 *
4105 * @kbdev: Pointer to the device.
4106 *
4107 * This function is called on GPU idle notification to trigger the power down of
4108 * GPU. Scheduler's state is changed to suspended and all the active queue
4109 * groups are suspended before halting the MCU firmware.
4110 *
4111 * Return: true if scheduler will be suspended or false if suspend is aborted.
4112 */
scheduler_suspend_on_idle(struct kbase_device * kbdev)4113 static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
4114 {
4115 int ret = suspend_active_groups_on_powerdown(kbdev, false);
4116
4117 if (ret) {
4118 dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)",
4119 atomic_read(
4120 &kbdev->csf.scheduler.non_idle_offslot_grps));
4121 /* Bring forward the next tick */
4122 kbase_csf_scheduler_advance_tick(kbdev);
4123 return false;
4124 }
4125
4126 dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle");
4127 scheduler_suspend(kbdev);
4128 cancel_tick_timer(kbdev);
4129 return true;
4130 }
4131
gpu_idle_worker(struct work_struct * work)4132 static void gpu_idle_worker(struct work_struct *work)
4133 {
4134 struct kbase_device *kbdev = container_of(
4135 work, struct kbase_device, csf.scheduler.gpu_idle_work);
4136 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4137 bool scheduler_is_idle_suspendable = false;
4138 bool all_groups_suspended = false;
4139
4140 KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_BEGIN, NULL, 0u);
4141
4142 #define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \
4143 (((u32)reset) | (((u32)idle) << 4) | (((u32)all_suspend) << 8))
4144
4145 if (kbase_reset_gpu_try_prevent(kbdev)) {
4146 dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n");
4147 KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL,
4148 __ENCODE_KTRACE_INFO(true, false, false));
4149 return;
4150 }
4151 mutex_lock(&scheduler->lock);
4152
4153 /* Cycle completed, disable the firmware idle timer */
4154 disable_gpu_idle_fw_timer(kbdev);
4155 scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
4156 if (scheduler_is_idle_suspendable) {
4157 KBASE_KTRACE_ADD(kbdev, GPU_IDLE_HANDLING_START, NULL,
4158 kbase_csf_ktrace_gpu_cycle_cnt(kbdev));
4159 #ifdef KBASE_PM_RUNTIME
4160 if (kbase_pm_gpu_sleep_allowed(kbdev) &&
4161 scheduler->total_runnable_grps)
4162 scheduler_sleep_on_idle(kbdev);
4163 else
4164 #endif
4165 all_groups_suspended = scheduler_suspend_on_idle(kbdev);
4166 }
4167
4168 mutex_unlock(&scheduler->lock);
4169 kbase_reset_gpu_allow(kbdev);
4170 KBASE_KTRACE_ADD(kbdev, IDLE_WORKER_END, NULL,
4171 __ENCODE_KTRACE_INFO(false,
4172 scheduler_is_idle_suspendable,
4173 all_groups_suspended));
4174 #undef __ENCODE_KTRACE_INFO
4175 }
4176
scheduler_prepare(struct kbase_device * kbdev)4177 static int scheduler_prepare(struct kbase_device *kbdev)
4178 {
4179 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4180 int i;
4181
4182 lockdep_assert_held(&scheduler->lock);
4183
4184 /* Empty the groups_to_schedule */
4185 while (!list_empty(&scheduler->groups_to_schedule)) {
4186 struct kbase_queue_group *grp =
4187 list_first_entry(&scheduler->groups_to_schedule,
4188 struct kbase_queue_group,
4189 link_to_schedule);
4190
4191 remove_scheduled_group(kbdev, grp);
4192 }
4193
4194 /* Pre-scan init scheduler fields */
4195 if (WARN_ON(scheduler->ngrp_to_schedule != 0))
4196 scheduler->ngrp_to_schedule = 0;
4197 scheduler->top_ctx = NULL;
4198 scheduler->top_grp = NULL;
4199 scheduler->csg_scan_count_for_tick = 0;
4200 WARN_ON(!list_empty(&scheduler->idle_groups_to_schedule));
4201 scheduler->num_active_address_spaces = 0;
4202 scheduler->num_csg_slots_for_tick = 0;
4203 bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS);
4204
4205 /* Scan out to run groups */
4206 for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
4207 struct kbase_context *kctx;
4208
4209 list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link)
4210 scheduler_ctx_scan_groups(kbdev, kctx, i);
4211 }
4212
4213 /* Update this tick's non-idle groups */
4214 scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
4215
4216 /* Initial number of non-idle off-slot groups, before the scheduler's
4217 * scheduler_apply() operation. This gives a sensible start point view
4218 * of the tick. It will be subject to up/downs during the scheduler
4219 * active phase.
4220 */
4221 atomic_set(&scheduler->non_idle_offslot_grps,
4222 scheduler->non_idle_scanout_grps);
4223 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC, NULL,
4224 scheduler->non_idle_scanout_grps);
4225
4226 /* Adds those idle but runnable groups to the scanout list */
4227 scheduler_scan_idle_groups(kbdev);
4228
4229 WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule);
4230
4231 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
4232 scheduler->num_active_address_spaces |
4233 (((u64)scheduler->ngrp_to_schedule) << 32));
4234 set_max_csg_slots(kbdev);
4235 dev_dbg(kbdev->dev, "prepared groups length: %u, num_active_address_spaces: %u\n",
4236 scheduler->ngrp_to_schedule, scheduler->num_active_address_spaces);
4237 return 0;
4238 }
4239
scheduler_handle_idle_timer_onoff(struct kbase_device * kbdev)4240 static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev)
4241 {
4242 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4243
4244 lockdep_assert_held(&scheduler->lock);
4245
4246 /* After the scheduler apply operation, the internal variable
4247 * scheduler->non_idle_offslot_grps reflects the end-point view
4248 * of the count at the end of the active phase.
4249 *
4250 * Any changes that follow (after the scheduler has dropped the
4251 * scheduler->lock), reflects async operations to the scheduler,
4252 * such as a group gets killed (evicted) or a new group inserted,
4253 * cqs wait-sync triggered state transtion etc.
4254 *
4255 * The condition for enable the idle timer is that there is no
4256 * non-idle groups off-slots. If there is non-idle group off-slot,
4257 * the timer should be disabled.
4258 */
4259 if (atomic_read(&scheduler->non_idle_offslot_grps))
4260 disable_gpu_idle_fw_timer(kbdev);
4261 else
4262 enable_gpu_idle_fw_timer(kbdev);
4263 }
4264
4265 /**
4266 * keep_lru_on_slots() - Check the condition for LRU is met.
4267 *
4268 * This function tries to maintain the Last-Recent-Use case on slots, when
4269 * the scheduler has no non-idle off-slot CSGs for a replacement
4270 * consideration. This effectively extends the previous scheduling results
4271 * for the new one. That is, the last recent used CSGs are retained on slots
4272 * for the new tick/tock action.
4273 *
4274 * @kbdev: Pointer to the device.
4275 *
4276 * Return: true for avoiding on-slot CSGs changes (i.e. keep existing LRU),
4277 * otherwise false.
4278 */
keep_lru_on_slots(struct kbase_device * kbdev)4279 static bool keep_lru_on_slots(struct kbase_device *kbdev)
4280 {
4281 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4282 bool keep_lru = false;
4283 int on_slots = bitmap_weight(scheduler->csg_inuse_bitmap,
4284 kbdev->csf.global_iface.group_num);
4285
4286 lockdep_assert_held(&scheduler->lock);
4287
4288 if (on_slots && !atomic_read(&scheduler->non_idle_offslot_grps)) {
4289 unsigned long flags;
4290
4291 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4292 /* All on-slots are idle, no non-idle off-slot CSGs available
4293 * for considering a meaningful change. Set keep_lru.
4294 */
4295 keep_lru = kbase_csf_scheduler_all_csgs_idle(kbdev);
4296
4297 if (keep_lru && !scheduler->gpu_idle_fw_timer_enabled) {
4298 scheduler->gpu_idle_fw_timer_enabled = true;
4299 kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
4300 }
4301 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4302
4303 dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n",
4304 keep_lru, on_slots);
4305 }
4306
4307 return keep_lru;
4308 }
4309
4310 /**
4311 * prepare_fast_local_tock() - making preparation arrangement for exercizing
4312 * a fast local tock inside scheduling-actions.
4313 *
4314 * The function assumes that a scheduling action of firing a fast local tock
4315 * call (i.e. an equivalent tock action without dropping the lock) is desired
4316 * if there are idle onslot CSGs. The function updates those affected CSGs'
4317 * run-state as a preparation. This should only be called from inside the
4318 * schedule_actions(), where the previous idle-flags are still considered to
4319 * be reflective, following its earlier idle confirmation operational call,
4320 * plus some potential newly idle CSGs in the scheduling action committing
4321 * steps.
4322 *
4323 * @kbdev: Pointer to the GPU device.
4324 *
4325 * Return: number of on-slots CSGs that can be considered for replacing.
4326 */
prepare_fast_local_tock(struct kbase_device * kbdev)4327 static int prepare_fast_local_tock(struct kbase_device *kbdev)
4328 {
4329 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4330 u32 num_groups = kbdev->csf.global_iface.group_num;
4331 unsigned long flags, i;
4332 DECLARE_BITMAP(csg_bitmap, MAX_SUPPORTED_CSGS) = { 0 };
4333
4334 lockdep_assert_held(&scheduler->lock);
4335
4336 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4337 bitmap_copy(csg_bitmap, scheduler->csg_slots_idle_mask, num_groups);
4338 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4339
4340 /* Marking the flagged idle CSGs' run state to IDLE, so
4341 * the intended fast local tock can replacing them with off-slots
4342 * non-idle CSGs.
4343 */
4344 for_each_set_bit(i, csg_bitmap, num_groups) {
4345 struct kbase_csf_csg_slot *csg_slot = &scheduler->csg_slots[i];
4346 struct kbase_queue_group *group = csg_slot->resident_group;
4347
4348 if (!queue_group_idle_locked(group))
4349 group->run_state = KBASE_CSF_GROUP_IDLE;
4350 }
4351
4352 /* Return the number of idle slots for potential replacement */
4353 return bitmap_weight(csg_bitmap, num_groups);
4354 }
4355
schedule_actions(struct kbase_device * kbdev,bool is_tick)4356 static void schedule_actions(struct kbase_device *kbdev, bool is_tick)
4357 {
4358 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4359 unsigned long flags;
4360 struct kbase_queue_group *protm_grp;
4361 int ret;
4362 bool skip_scheduling_actions;
4363 bool skip_idle_slots_update;
4364 bool new_protm_top_grp = false;
4365 int local_tock_slots = 0;
4366
4367 kbase_reset_gpu_assert_prevented(kbdev);
4368 lockdep_assert_held(&scheduler->lock);
4369
4370 ret = kbase_csf_scheduler_wait_mcu_active(kbdev);
4371 if (ret) {
4372 dev_err(kbdev->dev,
4373 "Wait for MCU power on failed on scheduling tick/tock");
4374 return;
4375 }
4376
4377 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4378 skip_idle_slots_update = kbase_csf_scheduler_protected_mode_in_use(kbdev);
4379 skip_scheduling_actions =
4380 !skip_idle_slots_update && kbdev->protected_mode;
4381 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4382
4383 /* Skip scheduling actions as GPU reset hasn't been performed yet to
4384 * rectify the anomaly that happened when pmode exit interrupt wasn't
4385 * received before the termination of group running in pmode.
4386 */
4387 if (unlikely(skip_scheduling_actions)) {
4388 dev_info(kbdev->dev,
4389 "Scheduling actions skipped due to anomaly in pmode");
4390 return;
4391 }
4392
4393 if (!skip_idle_slots_update) {
4394 /* Updating on-slot idle CSGs when not in protected mode. */
4395 scheduler_handle_idle_slots(kbdev);
4396
4397 /* Determine whether the condition is met for keeping the
4398 * Last-Recent-Use. If true, skipping the remaining action
4399 * steps and thus extending the previous tick's arrangement,
4400 * in particular, no alterations to on-slot CSGs.
4401 */
4402 if (keep_lru_on_slots(kbdev))
4403 return;
4404 }
4405
4406 if (is_tick)
4407 scheduler_rotate(kbdev);
4408
4409 redo_local_tock:
4410 scheduler_prepare(kbdev);
4411 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4412 protm_grp = scheduler->active_protm_grp;
4413
4414 /* Avoid update if the top-group remains unchanged and in protected
4415 * mode. For the said case, all the slots update is effectively
4416 * competing against the active protected mode group (typically the
4417 * top-group). If we update other slots, even on leaving the
4418 * top-group slot untouched, the firmware would exit the protected mode
4419 * for interacting with the host-driver. After it, as the top-group
4420 * would again raise the request for entering protected mode, we would
4421 * be actively doing the switching over twice without progressing the
4422 * queue jobs.
4423 */
4424 if (protm_grp && scheduler->top_grp == protm_grp) {
4425 int new_val;
4426 dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
4427 protm_grp->handle);
4428 new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
4429 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_DEC,
4430 protm_grp, new_val);
4431 } else if (scheduler->top_grp) {
4432 if (protm_grp)
4433 dev_dbg(kbdev->dev, "Scheduler drop protm exec: group-%d",
4434 protm_grp->handle);
4435
4436 if (!bitmap_empty(scheduler->top_grp->protm_pending_bitmap,
4437 kbdev->csf.global_iface.groups[0].stream_num)) {
4438 dev_dbg(kbdev->dev, "Scheduler prepare protm exec: group-%d of context %d_%d",
4439 scheduler->top_grp->handle,
4440 scheduler->top_grp->kctx->tgid,
4441 scheduler->top_grp->kctx->id);
4442
4443 /* When entering protected mode all CSG slots can be occupied
4444 * but only the protected mode CSG will be running. Any event
4445 * that would trigger the execution of an on-slot idle CSG will
4446 * need to be handled by the host during protected mode.
4447 */
4448 new_protm_top_grp = true;
4449 }
4450
4451 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4452
4453 scheduler_apply(kbdev);
4454
4455 /* Post-apply, all the committed groups in this tick are on
4456 * slots, time to arrange the idle timer on/off decision.
4457 */
4458 scheduler_handle_idle_timer_onoff(kbdev);
4459
4460 /* Scheduler is dropping the exec of the previous protm_grp,
4461 * Until the protm quit completes, the GPU is effectively
4462 * locked in the secure mode.
4463 */
4464 if (protm_grp)
4465 scheduler_force_protm_exit(kbdev);
4466
4467 wait_csg_slots_start(kbdev);
4468 wait_csg_slots_finish_prio_update(kbdev);
4469
4470 if (new_protm_top_grp) {
4471 scheduler_group_check_protm_enter(kbdev,
4472 scheduler->top_grp);
4473 } else if (!local_tock_slots &&
4474 atomic_read(&scheduler->non_idle_offslot_grps)) {
4475 /* If during the scheduling action, we have off-slot
4476 * non-idle CSGs in waiting, if it happens to have
4477 * some new idle slots emerging during the committed
4478 * action steps, trigger a one-off fast local tock.
4479 */
4480 local_tock_slots = prepare_fast_local_tock(kbdev);
4481
4482 if (local_tock_slots) {
4483 dev_dbg(kbdev->dev,
4484 "In-cycle %d idle slots available\n",
4485 local_tock_slots);
4486 goto redo_local_tock;
4487 }
4488 }
4489
4490 return;
4491 }
4492
4493 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4494 return;
4495 }
4496
4497 /**
4498 * can_skip_scheduling() - Check if the scheduling actions can be skipped.
4499 *
4500 * @kbdev: Pointer to the device
4501 *
4502 * This function is called on a scheduling tick or tock to determine if the
4503 * scheduling actions can be skipped.
4504 * If Scheduler is in sleeping state and exit from the sleep state is allowed
4505 * then activation of MCU will be triggered. The tick or tock work item could
4506 * have been in flight when the state of Scheduler was changed to sleeping.
4507 *
4508 * Return: true if the scheduling actions can be skipped.
4509 */
can_skip_scheduling(struct kbase_device * kbdev)4510 static bool can_skip_scheduling(struct kbase_device *kbdev)
4511 {
4512 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4513
4514 lockdep_assert_held(&scheduler->lock);
4515
4516 if (scheduler->state == SCHED_SUSPENDED)
4517 return true;
4518
4519 #ifdef KBASE_PM_RUNTIME
4520 if (scheduler->state == SCHED_SLEEPING) {
4521 unsigned long flags;
4522
4523 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
4524 if (kbdev->pm.backend.exit_gpu_sleep_mode) {
4525 int ret = scheduler_pm_active_after_sleep(kbdev, flags);
4526 /* hwaccess_lock is released in the previous function
4527 * call.
4528 */
4529 if (!ret) {
4530 scheduler->state = SCHED_INACTIVE;
4531 return false;
4532 }
4533
4534 dev_info(kbdev->dev,
4535 "Skip scheduling due to system suspend");
4536 return true;
4537 }
4538 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
4539 return true;
4540 }
4541 #endif
4542
4543 return false;
4544 }
4545
schedule_on_tock(struct work_struct * work)4546 static void schedule_on_tock(struct work_struct *work)
4547 {
4548 struct kbase_device *kbdev = container_of(work, struct kbase_device,
4549 csf.scheduler.tock_work.work);
4550 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4551 int err;
4552
4553 /* Tock work item is serviced */
4554 scheduler->tock_pending_request = false;
4555
4556 err = kbase_reset_gpu_try_prevent(kbdev);
4557 /* Regardless of whether reset failed or is currently happening, exit
4558 * early
4559 */
4560 if (err)
4561 return;
4562
4563 mutex_lock(&scheduler->lock);
4564 if (can_skip_scheduling(kbdev))
4565 goto exit_no_schedule_unlock;
4566
4567 WARN_ON(!(scheduler->state == SCHED_INACTIVE));
4568 scheduler->state = SCHED_BUSY;
4569
4570 /* Undertaking schedule action steps */
4571 KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK, NULL, 0u);
4572 schedule_actions(kbdev, false);
4573
4574 /* Record time information on a non-skipped tock */
4575 scheduler->last_schedule = jiffies;
4576
4577 scheduler->state = SCHED_INACTIVE;
4578 if (!scheduler->total_runnable_grps)
4579 queue_work(system_wq, &scheduler->gpu_idle_work);
4580 mutex_unlock(&scheduler->lock);
4581 kbase_reset_gpu_allow(kbdev);
4582
4583 dev_dbg(kbdev->dev,
4584 "Waking up for event after schedule-on-tock completes.");
4585 wake_up_all(&kbdev->csf.event_wait);
4586 KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u);
4587 return;
4588
4589 exit_no_schedule_unlock:
4590 mutex_unlock(&scheduler->lock);
4591 kbase_reset_gpu_allow(kbdev);
4592 }
4593
schedule_on_tick(struct work_struct * work)4594 static void schedule_on_tick(struct work_struct *work)
4595 {
4596 struct kbase_device *kbdev = container_of(work, struct kbase_device,
4597 csf.scheduler.tick_work);
4598 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4599
4600 int err = kbase_reset_gpu_try_prevent(kbdev);
4601 /* Regardless of whether reset failed or is currently happening, exit
4602 * early
4603 */
4604 if (err)
4605 return;
4606
4607 mutex_lock(&scheduler->lock);
4608
4609 WARN_ON(scheduler->tick_timer_active);
4610 if (can_skip_scheduling(kbdev))
4611 goto exit_no_schedule_unlock;
4612
4613 scheduler->state = SCHED_BUSY;
4614
4615 /* Undertaking schedule action steps */
4616 KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK, NULL,
4617 scheduler->total_runnable_grps);
4618 schedule_actions(kbdev, true);
4619
4620 /* Record time information */
4621 scheduler->last_schedule = jiffies;
4622
4623 /* Kicking next scheduling if needed */
4624 if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
4625 (scheduler->total_runnable_grps > 0)) {
4626 start_tick_timer(kbdev);
4627 dev_dbg(kbdev->dev,
4628 "scheduling for next tick, num_runnable_groups:%u\n",
4629 scheduler->total_runnable_grps);
4630 } else if (!scheduler->total_runnable_grps)
4631 queue_work(system_wq, &scheduler->gpu_idle_work);
4632
4633 scheduler->state = SCHED_INACTIVE;
4634 mutex_unlock(&scheduler->lock);
4635 kbase_reset_gpu_allow(kbdev);
4636
4637 dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes.");
4638 wake_up_all(&kbdev->csf.event_wait);
4639 KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL,
4640 scheduler->total_runnable_grps);
4641 return;
4642
4643 exit_no_schedule_unlock:
4644 mutex_unlock(&scheduler->lock);
4645 kbase_reset_gpu_allow(kbdev);
4646 }
4647
wait_csg_slots_suspend(struct kbase_device * kbdev,const unsigned long * slot_mask,unsigned int timeout_ms)4648 static int wait_csg_slots_suspend(struct kbase_device *kbdev,
4649 const unsigned long *slot_mask,
4650 unsigned int timeout_ms)
4651 {
4652 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4653 long remaining = kbase_csf_timeout_in_jiffies(timeout_ms);
4654 u32 num_groups = kbdev->csf.global_iface.group_num;
4655 int err = 0;
4656 DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS);
4657
4658 lockdep_assert_held(&scheduler->lock);
4659
4660 bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS);
4661
4662 while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS)
4663 && remaining) {
4664 DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
4665
4666 bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS);
4667
4668 remaining = wait_event_timeout(kbdev->csf.event_wait,
4669 slots_state_changed(kbdev, changed,
4670 csg_slot_stopped_locked),
4671 remaining);
4672
4673 if (remaining) {
4674 u32 i;
4675
4676 for_each_set_bit(i, changed, num_groups) {
4677 struct kbase_queue_group *group;
4678
4679 if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
4680 continue;
4681
4682 /* The on slot csg is now stopped */
4683 clear_bit(i, slot_mask_local);
4684
4685 group = scheduler->csg_slots[i].resident_group;
4686 if (likely(group)) {
4687 /* Only do save/cleanup if the
4688 * group is not terminated during
4689 * the sleep.
4690 */
4691 save_csg_slot(group);
4692 if (cleanup_csg_slot(group))
4693 sched_evict_group(group, true, true);
4694 }
4695 }
4696 } else {
4697 dev_warn(kbdev->dev, "[%llu] Timeout waiting for CSG slots to suspend, slot_mask: 0x%*pb\n",
4698 kbase_backend_get_cycle_cnt(kbdev),
4699 num_groups, slot_mask_local);
4700
4701
4702 err = -ETIMEDOUT;
4703 }
4704 }
4705
4706 return err;
4707 }
4708
suspend_active_queue_groups(struct kbase_device * kbdev,unsigned long * slot_mask)4709 static int suspend_active_queue_groups(struct kbase_device *kbdev,
4710 unsigned long *slot_mask)
4711 {
4712 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4713 u32 num_groups = kbdev->csf.global_iface.group_num;
4714 u32 slot_num;
4715 int ret;
4716
4717 lockdep_assert_held(&scheduler->lock);
4718
4719 for (slot_num = 0; slot_num < num_groups; slot_num++) {
4720 struct kbase_queue_group *group =
4721 scheduler->csg_slots[slot_num].resident_group;
4722
4723 if (group) {
4724 suspend_queue_group(group);
4725 set_bit(slot_num, slot_mask);
4726 }
4727 }
4728
4729 ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms);
4730 return ret;
4731 }
4732
suspend_active_queue_groups_on_reset(struct kbase_device * kbdev)4733 static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
4734 {
4735 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
4736 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
4737 int ret;
4738 int ret2;
4739
4740 mutex_lock(&scheduler->lock);
4741
4742 ret = suspend_active_queue_groups(kbdev, slot_mask);
4743
4744 if (ret) {
4745 dev_warn(kbdev->dev, "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
4746 kbdev->csf.global_iface.group_num, slot_mask);
4747 }
4748
4749 /* Need to flush the GPU cache to ensure suspend buffer
4750 * contents are not lost on reset of GPU.
4751 * Do this even if suspend operation had timed out for some of
4752 * the CSG slots.
4753 * In case the scheduler already in suspended state, the
4754 * cache clean is required as the async reset request from
4755 * the debugfs may race against the scheduler suspend operation
4756 * due to the extra context ref-count, which prevents the
4757 * L2 powering down cache clean operation in the non racing
4758 * case.
4759 * LSC is being flushed together to cover buslogging usecase,
4760 * where GPU reset is done regularly to avoid the log buffer
4761 * overflow.
4762 */
4763 kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
4764 ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
4765 kbdev->reset_timeout_ms);
4766 if (ret2) {
4767 dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset",
4768 kbase_backend_get_cycle_cnt(kbdev));
4769 if (!ret)
4770 ret = ret2;
4771 }
4772
4773 mutex_unlock(&scheduler->lock);
4774
4775 return ret;
4776 }
4777
4778 /**
4779 * scheduler_handle_reset_in_protected_mode() - Update the state of normal mode
4780 * groups when reset is done during
4781 * protected mode execution.
4782 *
4783 * @kbdev: Pointer to the device.
4784 *
4785 * This function is called at the time of GPU reset, before the suspension of
4786 * queue groups, to handle the case when the reset is getting performed whilst
4787 * GPU is in protected mode.
4788 * On entry to protected mode all the groups, except the top group that executes
4789 * in protected mode, are implicitly suspended by the FW. Thus this function
4790 * simply marks the normal mode groups as suspended (and cleans up the
4791 * corresponding CSG slots) to prevent their potential forceful eviction from
4792 * the Scheduler. So if GPU was in protected mode and there was no fault, then
4793 * only the protected mode group would be suspended in the regular way post exit
4794 * from this function. And if GPU was in normal mode, then all on-slot groups
4795 * will get suspended in the regular way.
4796 *
4797 * Return: true if the groups remaining on the CSG slots need to be suspended in
4798 * the regular way by sending CSG SUSPEND reqs to FW, otherwise false.
4799 */
scheduler_handle_reset_in_protected_mode(struct kbase_device * kbdev)4800 static bool scheduler_handle_reset_in_protected_mode(struct kbase_device *kbdev)
4801 {
4802 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4803 u32 const num_groups = kbdev->csf.global_iface.group_num;
4804 struct kbase_queue_group *protm_grp;
4805 bool suspend_on_slot_groups = true;
4806 bool pmode_active;
4807 unsigned long flags;
4808 u32 csg_nr;
4809
4810 mutex_lock(&scheduler->lock);
4811
4812 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4813 protm_grp = scheduler->active_protm_grp;
4814 pmode_active = kbdev->protected_mode;
4815
4816 if (likely(!protm_grp && !pmode_active)) {
4817 /* Case 1: GPU is not in protected mode or it successfully
4818 * exited protected mode. All on-slot groups can be suspended in
4819 * the regular way before reset.
4820 */
4821 suspend_on_slot_groups = true;
4822 } else if (protm_grp && pmode_active) {
4823 /* Case 2: GPU went successfully into protected mode and hasn't
4824 * exited from it yet and the protected mode group is still
4825 * active. If there was no fault for the protected mode group
4826 * then it can be suspended in the regular way before reset.
4827 * The other normal mode on-slot groups were already implicitly
4828 * suspended on entry to protected mode so they can be marked as
4829 * suspended right away.
4830 */
4831 suspend_on_slot_groups = !protm_grp->faulted;
4832 } else if (!protm_grp && pmode_active) {
4833 /* Case 3: GPU went successfully into protected mode and hasn't
4834 * exited from it yet but the protected mode group got deleted.
4835 * This would have happened if the FW got stuck during protected
4836 * mode for some reason (like GPU page fault or some internal
4837 * error). In normal cases FW is expected to send the pmode exit
4838 * interrupt before it handles the CSG termination request.
4839 * The other normal mode on-slot groups would already have been
4840 * implicitly suspended on entry to protected mode so they can be
4841 * marked as suspended right away.
4842 */
4843 suspend_on_slot_groups = false;
4844 } else if (protm_grp && !pmode_active) {
4845 /* Case 4: GPU couldn't successfully enter protected mode, i.e.
4846 * PROTM_ENTER request had timed out.
4847 * All the on-slot groups need to be suspended in the regular
4848 * way before reset.
4849 */
4850 suspend_on_slot_groups = true;
4851 }
4852
4853 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4854
4855 if (likely(!pmode_active))
4856 goto unlock;
4857
4858 /* GPU hasn't exited protected mode, so all the on-slot groups barring
4859 * the protected mode group can be marked as suspended right away.
4860 */
4861 for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
4862 struct kbase_queue_group *const group =
4863 kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
4864 int new_val;
4865
4866 if (!group || (group == protm_grp))
4867 continue;
4868
4869 cleanup_csg_slot(group);
4870 group->run_state = KBASE_CSF_GROUP_SUSPENDED;
4871
4872 /* Simply treat the normal mode groups as non-idle. The tick
4873 * scheduled after the reset will re-initialize the counter
4874 * anyways.
4875 */
4876 new_val = atomic_inc_return(&scheduler->non_idle_offslot_grps);
4877 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_INC,
4878 group, new_val);
4879 }
4880
4881 unlock:
4882 mutex_unlock(&scheduler->lock);
4883 return suspend_on_slot_groups;
4884 }
4885
cancel_tock_work(struct kbase_csf_scheduler * const scheduler)4886 static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler)
4887 {
4888 cancel_delayed_work_sync(&scheduler->tock_work);
4889 scheduler->tock_pending_request = false;
4890 }
4891
scheduler_inner_reset(struct kbase_device * kbdev)4892 static void scheduler_inner_reset(struct kbase_device *kbdev)
4893 {
4894 u32 const num_groups = kbdev->csf.global_iface.group_num;
4895 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
4896 unsigned long flags;
4897
4898 WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
4899
4900 /* Cancel any potential queued delayed work(s) */
4901 cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
4902 cancel_tick_timer(kbdev);
4903 cancel_work_sync(&scheduler->tick_work);
4904 cancel_tock_work(scheduler);
4905 cancel_delayed_work_sync(&scheduler->ping_work);
4906
4907 mutex_lock(&scheduler->lock);
4908
4909 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
4910 bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
4911 if (scheduler->active_protm_grp)
4912 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM,
4913 scheduler->active_protm_grp, 0u);
4914 scheduler->active_protm_grp = NULL;
4915 memset(kbdev->csf.scheduler.csg_slots, 0,
4916 num_groups * sizeof(struct kbase_csf_csg_slot));
4917 bitmap_zero(kbdev->csf.scheduler.csg_inuse_bitmap, num_groups);
4918 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
4919
4920 scheduler->top_ctx = NULL;
4921 scheduler->top_grp = NULL;
4922
4923 KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
4924 scheduler->num_active_address_spaces |
4925 (((u64)scheduler->total_runnable_grps) << 32));
4926
4927 mutex_unlock(&scheduler->lock);
4928 }
4929
kbase_csf_scheduler_reset(struct kbase_device * kbdev)4930 void kbase_csf_scheduler_reset(struct kbase_device *kbdev)
4931 {
4932 struct kbase_context *kctx;
4933
4934 WARN_ON(!kbase_reset_gpu_is_active(kbdev));
4935
4936 KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET, NULL, 0u);
4937
4938 if (scheduler_handle_reset_in_protected_mode(kbdev) &&
4939 !suspend_active_queue_groups_on_reset(kbdev)) {
4940 /* As all groups have been successfully evicted from the CSG
4941 * slots, clear out thee scheduler data fields and return
4942 */
4943 scheduler_inner_reset(kbdev);
4944 return;
4945 }
4946
4947 mutex_lock(&kbdev->kctx_list_lock);
4948
4949 /* The loop to iterate over the kbase contexts is present due to lock
4950 * ordering issue between kctx->csf.lock & kbdev->csf.scheduler.lock.
4951 * CSF ioctls first take kctx->csf.lock which is context-specific and
4952 * then take kbdev->csf.scheduler.lock for global actions like assigning
4953 * a CSG slot.
4954 * If the lock ordering constraint was not there then could have
4955 * directly looped over the active queue groups.
4956 */
4957 list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
4958 /* Firmware reload would reinitialize the CSG & CS interface IO
4959 * pages, so just need to internally mark the currently active
4960 * queue groups as terminated (similar to the unexpected OoM
4961 * event case).
4962 * No further work can now get executed for the active groups
4963 * (new groups would have to be created to execute work) and
4964 * in near future Clients would be duly informed of this
4965 * reset. The resources (like User IO pages, GPU queue memory)
4966 * allocated for the associated queues would be freed when the
4967 * Clients do the teardown when they become aware of the reset.
4968 */
4969 kbase_csf_active_queue_groups_reset(kbdev, kctx);
4970 }
4971
4972 mutex_unlock(&kbdev->kctx_list_lock);
4973
4974 /* After queue groups reset, the scheduler data fields clear out */
4975 scheduler_inner_reset(kbdev);
4976 }
4977
firmware_aliveness_monitor(struct work_struct * work)4978 static void firmware_aliveness_monitor(struct work_struct *work)
4979 {
4980 struct kbase_device *kbdev = container_of(work, struct kbase_device,
4981 csf.scheduler.ping_work.work);
4982 int err;
4983
4984 /* Ensure that reset will not be occurring while this function is being
4985 * executed as otherwise calling kbase_reset_gpu when reset is already
4986 * occurring is a programming error.
4987 *
4988 * We must use the 'try' variant as the Reset worker can try to flush
4989 * this workqueue, which would otherwise deadlock here if we tried to
4990 * wait for the reset (and thus ourselves) to complete.
4991 */
4992 err = kbase_reset_gpu_try_prevent(kbdev);
4993 if (err) {
4994 /* It doesn't matter whether the value was -EAGAIN or a fatal
4995 * error, just stop processing. In case of -EAGAIN, the Reset
4996 * worker will restart the scheduler later to resume ping
4997 */
4998 return;
4999 }
5000
5001 mutex_lock(&kbdev->csf.scheduler.lock);
5002
5003 #ifdef CONFIG_MALI_BIFROST_DEBUG
5004 if (fw_debug) {
5005 /* ping requests cause distraction in firmware debugging */
5006 goto exit;
5007 }
5008 #endif
5009
5010 if (kbdev->csf.scheduler.state == SCHED_SUSPENDED ||
5011 kbdev->csf.scheduler.state == SCHED_SLEEPING)
5012 goto exit;
5013
5014 if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) != 1)
5015 goto exit;
5016
5017 if (kbase_csf_scheduler_protected_mode_in_use(kbdev))
5018 goto exit;
5019
5020 if (kbase_pm_context_active_handle_suspend(kbdev,
5021 KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
5022 /* Suspend pending - no real need to ping */
5023 goto exit;
5024 }
5025
5026 kbase_csf_scheduler_wait_mcu_active(kbdev);
5027
5028 err = kbase_csf_firmware_ping_wait(kbdev);
5029
5030 if (err) {
5031 /* It is acceptable to enqueue a reset whilst we've prevented
5032 * them, it will happen after we've allowed them again
5033 */
5034 if (kbase_prepare_to_reset_gpu(
5035 kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
5036 kbase_reset_gpu(kbdev);
5037 } else if (kbase_csf_scheduler_get_nr_active_csgs(kbdev) == 1) {
5038 queue_delayed_work(system_long_wq,
5039 &kbdev->csf.scheduler.ping_work,
5040 msecs_to_jiffies(FIRMWARE_PING_INTERVAL_MS));
5041 }
5042
5043 kbase_pm_context_idle(kbdev);
5044 exit:
5045 mutex_unlock(&kbdev->csf.scheduler.lock);
5046 kbase_reset_gpu_allow(kbdev);
5047 return;
5048 }
5049
kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group * group,struct kbase_suspend_copy_buffer * sus_buf)5050 int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
5051 struct kbase_suspend_copy_buffer *sus_buf)
5052 {
5053 struct kbase_context *const kctx = group->kctx;
5054 struct kbase_device *const kbdev = kctx->kbdev;
5055 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5056 bool on_slot;
5057 int err = 0;
5058
5059 kbase_reset_gpu_assert_prevented(kbdev);
5060 lockdep_assert_held(&kctx->csf.lock);
5061 mutex_lock(&scheduler->lock);
5062
5063 on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
5064
5065 #ifdef KBASE_PM_RUNTIME
5066 if (on_slot && (scheduler->state == SCHED_SLEEPING)) {
5067 if (wait_for_scheduler_to_exit_sleep(kbdev)) {
5068 dev_warn(
5069 kbdev->dev,
5070 "Wait for scheduler to exit sleep state timedout when copying suspend buffer for group %d of ctx %d_%d on slot %d",
5071 group->handle, group->kctx->tgid,
5072 group->kctx->id, group->csg_nr);
5073
5074 scheduler_wakeup(kbdev, true);
5075
5076 /* Wait for MCU firmware to start running */
5077 if (kbase_csf_scheduler_wait_mcu_active(kbdev))
5078 dev_warn(
5079 kbdev->dev,
5080 "Wait for MCU active failed when copying suspend buffer for group %d of ctx %d_%d on slot %d",
5081 group->handle, group->kctx->tgid,
5082 group->kctx->id, group->csg_nr);
5083 }
5084
5085 /* Check the group state again as scheduler lock would have been
5086 * released when waiting for the exit from SLEEPING state.
5087 */
5088 on_slot = kbasep_csf_scheduler_group_is_on_slot_locked(group);
5089 }
5090 #endif
5091 if (on_slot) {
5092 DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = {0};
5093
5094 set_bit(kbase_csf_scheduler_group_get_slot(group), slot_mask);
5095
5096 if (!WARN_ON(scheduler->state == SCHED_SUSPENDED))
5097 suspend_queue_group(group);
5098 err = wait_csg_slots_suspend(kbdev, slot_mask,
5099 kbdev->csf.fw_timeout_ms);
5100 if (err) {
5101 dev_warn(kbdev->dev, "[%llu] Timeout waiting for the group %d to suspend on slot %d",
5102 kbase_backend_get_cycle_cnt(kbdev),
5103 group->handle, group->csg_nr);
5104 goto exit;
5105 }
5106 }
5107
5108 if (queue_group_suspended_locked(group)) {
5109 unsigned int target_page_nr = 0, i = 0;
5110 u64 offset = sus_buf->offset;
5111 size_t to_copy = sus_buf->size;
5112 const u32 csg_suspend_buf_nr_pages =
5113 PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
5114
5115 if (scheduler->state != SCHED_SUSPENDED) {
5116 /* Similar to the case of HW counters, need to flush
5117 * the GPU L2 cache before reading from the suspend buffer
5118 * pages as they are mapped and cached on GPU side.
5119 * Flushing LSC is not done here, since only the flush of
5120 * CSG suspend buffer contents is needed from the L2 cache.
5121 */
5122 kbase_gpu_start_cache_clean(
5123 kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
5124 kbase_gpu_wait_cache_clean(kbdev);
5125 } else {
5126 /* Make sure power down transitions have completed,
5127 * i.e. L2 has been powered off as that would ensure
5128 * its contents are flushed to memory.
5129 * This is needed as Scheduler doesn't wait for the
5130 * power down to finish.
5131 */
5132 kbase_pm_wait_for_desired_state(kbdev);
5133 }
5134
5135 for (i = 0; i < csg_suspend_buf_nr_pages &&
5136 target_page_nr < sus_buf->nr_pages; i++) {
5137 struct page *pg =
5138 as_page(group->normal_suspend_buf.phy[i]);
5139 void *sus_page = kmap(pg);
5140
5141 if (sus_page) {
5142 kbase_sync_single_for_cpu(kbdev,
5143 kbase_dma_addr(pg),
5144 PAGE_SIZE, DMA_BIDIRECTIONAL);
5145
5146 err = kbase_mem_copy_to_pinned_user_pages(
5147 sus_buf->pages, sus_page,
5148 &to_copy, sus_buf->nr_pages,
5149 &target_page_nr, offset);
5150 kunmap(pg);
5151 if (err)
5152 break;
5153 } else {
5154 err = -ENOMEM;
5155 break;
5156 }
5157 }
5158 schedule_in_cycle(group, false);
5159 } else {
5160 /* If addr-space fault, the group may have been evicted */
5161 err = -EIO;
5162 }
5163
5164 exit:
5165 mutex_unlock(&scheduler->lock);
5166 return err;
5167 }
5168
5169 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_group_copy_suspend_buf);
5170
5171 /**
5172 * group_sync_updated() - Evaluate sync wait condition of all blocked command
5173 * queues of the group.
5174 *
5175 * @group: Pointer to the command queue group that has blocked command queue(s)
5176 * bound to it.
5177 *
5178 * Return: true if sync wait condition is satisfied for at least one blocked
5179 * queue of the group.
5180 */
group_sync_updated(struct kbase_queue_group * group)5181 static bool group_sync_updated(struct kbase_queue_group *group)
5182 {
5183 bool updated = false;
5184 int stream;
5185
5186 /* Groups can also be blocked on-slot during protected mode. */
5187 WARN_ON(group->run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC &&
5188 group->run_state != KBASE_CSF_GROUP_IDLE);
5189
5190 for (stream = 0; stream < MAX_SUPPORTED_STREAMS_PER_GROUP; ++stream) {
5191 struct kbase_queue *const queue = group->bound_queues[stream];
5192
5193 /* To check the necessity of sync-wait evaluation,
5194 * we rely on the cached 'status_wait' instead of reading it
5195 * directly from shared memory as the CSG has been already
5196 * evicted from the CSG slot, thus this CSG doesn't have
5197 * valid information in the shared memory.
5198 */
5199 if (queue && queue->enabled &&
5200 CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait))
5201 if (evaluate_sync_update(queue)) {
5202 updated = true;
5203 queue->status_wait = 0;
5204 }
5205 }
5206
5207 return updated;
5208 }
5209
5210 /**
5211 * scheduler_get_protm_enter_async_group() - Check if the GPU queue group
5212 * can be now allowed to execute in protected mode.
5213 *
5214 * @kbdev: Pointer to the GPU device.
5215 * @group: Pointer to the GPU queue group.
5216 *
5217 * This function is called outside the scheduling tick/tock to determine
5218 * if the given GPU queue group can now execute in protected mode or not.
5219 * If the group pointer passed is NULL then the evaluation is done for the
5220 * highest priority group on the scheduler maintained group lists without
5221 * tick associated rotation actions. This is referred as the 'top-group'
5222 * in a tock action sense.
5223 *
5224 * It returns the same group pointer, that was passed as an argument, if that
5225 * group matches the highest priority group and has pending protected region
5226 * requests otherwise NULL is returned.
5227 *
5228 * If the group pointer passed is NULL then the internal evaluated highest
5229 * priority group is returned if that has pending protected region requests
5230 * otherwise NULL is returned.
5231 *
5232 * The evaluated highest priority group may not necessarily be the same as the
5233 * scheduler->top_grp. This can happen if there is dynamic de-idle update
5234 * during the tick interval for some on-slots groups that were idle during the
5235 * scheduler normal scheduling action, where the scheduler->top_grp was set.
5236 * The recorded scheduler->top_grp is untouched by this evualuation, so will not
5237 * affect the scheduler context/priority list rotation arrangement.
5238 *
5239 * Return: the pointer to queue group that can currently execute in protected
5240 * mode or NULL.
5241 */
scheduler_get_protm_enter_async_group(struct kbase_device * const kbdev,struct kbase_queue_group * const group)5242 static struct kbase_queue_group *scheduler_get_protm_enter_async_group(
5243 struct kbase_device *const kbdev,
5244 struct kbase_queue_group *const group)
5245 {
5246 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5247 struct kbase_queue_group *match_grp, *input_grp;
5248
5249 lockdep_assert_held(&scheduler->lock);
5250
5251 if (scheduler->state != SCHED_INACTIVE)
5252 return NULL;
5253
5254 match_grp = get_tock_top_group(scheduler);
5255 input_grp = group ? group : match_grp;
5256
5257 if (input_grp && (input_grp == match_grp)) {
5258 struct kbase_csf_cmd_stream_group_info *ginfo =
5259 &kbdev->csf.global_iface.groups[0];
5260 unsigned long *pending =
5261 input_grp->protm_pending_bitmap;
5262 unsigned long flags;
5263
5264 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5265
5266 if (kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
5267 bitmap_empty(pending, ginfo->stream_num))
5268 input_grp = NULL;
5269
5270 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5271 } else {
5272 input_grp = NULL;
5273 }
5274
5275 return input_grp;
5276 }
5277
kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group * group)5278 void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
5279 {
5280 struct kbase_device *const kbdev = group->kctx->kbdev;
5281 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5282
5283 int err = kbase_reset_gpu_try_prevent(kbdev);
5284 /* Regardless of whether reset failed or is currently happening, exit
5285 * early
5286 */
5287 if (err)
5288 return;
5289
5290 mutex_lock(&scheduler->lock);
5291
5292 /* Check if the group is now eligible for execution in protected mode. */
5293 if (scheduler_get_protm_enter_async_group(kbdev, group))
5294 scheduler_group_check_protm_enter(kbdev, group);
5295
5296 mutex_unlock(&scheduler->lock);
5297 kbase_reset_gpu_allow(kbdev);
5298 }
5299
5300 /**
5301 * check_sync_update_for_on_slot_group() - Check the sync wait condition
5302 * for all the queues bound to
5303 * the given on-slot group.
5304 *
5305 * @group: Pointer to the on-slot group that requires evaluation.
5306 *
5307 * This function is called if the GPU is in protected mode and there are on
5308 * slot idle groups with higher priority than the active protected mode group
5309 * or this function is called when CQS object is signaled whilst GPU is in
5310 * sleep state.
5311 * This function will evaluate the sync condition, if any, of all the queues
5312 * bound to the given group.
5313 *
5314 * Return: true if the sync condition of at least one queue has been satisfied.
5315 */
check_sync_update_for_on_slot_group(struct kbase_queue_group * group)5316 static bool check_sync_update_for_on_slot_group(
5317 struct kbase_queue_group *group)
5318 {
5319 struct kbase_device *const kbdev = group->kctx->kbdev;
5320 struct kbase_csf_scheduler *const scheduler =
5321 &kbdev->csf.scheduler;
5322 bool sync_update_done = false;
5323 int i;
5324
5325 lockdep_assert_held(&scheduler->lock);
5326
5327 for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
5328 struct kbase_queue *queue = group->bound_queues[i];
5329
5330 if (queue && queue->enabled && !sync_update_done) {
5331 struct kbase_csf_cmd_stream_group_info *const ginfo =
5332 &kbdev->csf.global_iface.groups[group->csg_nr];
5333 struct kbase_csf_cmd_stream_info *const stream =
5334 &ginfo->streams[queue->csi_index];
5335 u32 status = kbase_csf_firmware_cs_output(
5336 stream, CS_STATUS_WAIT);
5337 unsigned long flags;
5338
5339 KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, QUEUE_SYNC_STATUS_WAIT,
5340 queue->group, queue, status);
5341
5342 if (!CS_STATUS_WAIT_SYNC_WAIT_GET(status))
5343 continue;
5344
5345 /* Save the information of sync object of the command
5346 * queue so the callback function, 'group_sync_updated'
5347 * can evaluate the sync object when it gets updated
5348 * later.
5349 */
5350 queue->status_wait = status;
5351 queue->sync_ptr = kbase_csf_firmware_cs_output(
5352 stream, CS_STATUS_WAIT_SYNC_POINTER_LO);
5353 queue->sync_ptr |= (u64)kbase_csf_firmware_cs_output(
5354 stream, CS_STATUS_WAIT_SYNC_POINTER_HI) << 32;
5355 queue->sync_value = kbase_csf_firmware_cs_output(
5356 stream, CS_STATUS_WAIT_SYNC_VALUE);
5357 queue->blocked_reason =
5358 CS_STATUS_BLOCKED_REASON_REASON_GET(
5359 kbase_csf_firmware_cs_output(
5360 stream,
5361 CS_STATUS_BLOCKED_REASON));
5362
5363 if (!evaluate_sync_update(queue))
5364 continue;
5365
5366 /* Update csg_slots_idle_mask and group's run_state */
5367 if (group->run_state != KBASE_CSF_GROUP_RUNNABLE) {
5368 /* Only clear the group's idle flag if it has been dealt
5369 * with by the scheduler's tick/tock action, otherwise
5370 * leave it untouched.
5371 */
5372 spin_lock_irqsave(&scheduler->interrupt_lock,
5373 flags);
5374 clear_bit((unsigned int)group->csg_nr,
5375 scheduler->csg_slots_idle_mask);
5376 KBASE_KTRACE_ADD_CSF_GRP(
5377 kbdev, CSG_SLOT_IDLE_CLEAR, group,
5378 scheduler->csg_slots_idle_mask[0]);
5379 spin_unlock_irqrestore(
5380 &scheduler->interrupt_lock, flags);
5381 group->run_state = KBASE_CSF_GROUP_RUNNABLE;
5382 }
5383
5384 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
5385 sync_update_done = true;
5386 }
5387 }
5388
5389 return sync_update_done;
5390 }
5391
5392 /**
5393 * check_sync_update_for_idle_groups_protm() - Check the sync wait condition
5394 * for the idle groups on slot
5395 * during protected mode.
5396 *
5397 * @kbdev: Pointer to the GPU device
5398 *
5399 * This function checks the gpu queues of all the idle groups on slot during
5400 * protected mode that has a higher priority than the active protected mode
5401 * group.
5402 *
5403 * Return: true if the sync condition of at least one queue in a group has been
5404 * satisfied.
5405 */
check_sync_update_for_idle_groups_protm(struct kbase_device * kbdev)5406 static bool check_sync_update_for_idle_groups_protm(struct kbase_device *kbdev)
5407 {
5408 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5409 struct kbase_queue_group *protm_grp;
5410 bool exit_protm = false;
5411 unsigned long flags;
5412 u32 num_groups;
5413 u32 i;
5414
5415 lockdep_assert_held(&scheduler->lock);
5416
5417 spin_lock_irqsave(&scheduler->interrupt_lock, flags);
5418 protm_grp = scheduler->active_protm_grp;
5419 spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
5420
5421 if (!protm_grp)
5422 return exit_protm;
5423
5424 num_groups = kbdev->csf.global_iface.group_num;
5425
5426 for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
5427 struct kbase_csf_csg_slot *csg_slot =
5428 &scheduler->csg_slots[i];
5429 struct kbase_queue_group *group = csg_slot->resident_group;
5430
5431 if (group->scan_seq_num < protm_grp->scan_seq_num) {
5432 /* If sync update has been performed for the group that
5433 * has a higher priority than the protm group, then we
5434 * need to exit protected mode.
5435 */
5436 if (check_sync_update_for_on_slot_group(group))
5437 exit_protm = true;
5438 }
5439 }
5440
5441 return exit_protm;
5442 }
5443
check_sync_update_in_sleep_mode(struct kbase_device * kbdev)5444 static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
5445 {
5446 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5447 u32 const num_groups = kbdev->csf.global_iface.group_num;
5448 u32 csg_nr;
5449
5450 lockdep_assert_held(&scheduler->lock);
5451
5452 for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
5453 struct kbase_queue_group *const group =
5454 kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
5455
5456 if (!group)
5457 continue;
5458
5459 if (check_sync_update_for_on_slot_group(group)) {
5460 scheduler_wakeup(kbdev, true);
5461 return;
5462 }
5463 }
5464 }
5465
5466 /**
5467 * check_group_sync_update_worker() - Check the sync wait condition for all the
5468 * blocked queue groups
5469 *
5470 * @work: Pointer to the context-specific work item for evaluating the wait
5471 * condition for all the queue groups in idle_wait_groups list.
5472 *
5473 * This function checks the gpu queues of all the groups present in both
5474 * idle_wait_groups list of a context and all on slot idle groups (if GPU
5475 * is in protected mode).
5476 * If the sync wait condition for at least one queue bound to the group has
5477 * been satisfied then the group is moved to the per context list of
5478 * runnable groups so that Scheduler can consider scheduling the group
5479 * in next tick or exit protected mode.
5480 */
check_group_sync_update_worker(struct work_struct * work)5481 static void check_group_sync_update_worker(struct work_struct *work)
5482 {
5483 struct kbase_context *const kctx = container_of(work,
5484 struct kbase_context, csf.sched.sync_update_work);
5485 struct kbase_device *const kbdev = kctx->kbdev;
5486 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5487 bool sync_updated = false;
5488
5489 mutex_lock(&scheduler->lock);
5490
5491 KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_BEGIN, kctx, 0u);
5492 if (kctx->csf.sched.num_idle_wait_grps != 0) {
5493 struct kbase_queue_group *group, *temp;
5494
5495 list_for_each_entry_safe(group, temp,
5496 &kctx->csf.sched.idle_wait_groups, link) {
5497 if (group_sync_updated(group)) {
5498 sync_updated = true;
5499 /* Move this group back in to the runnable
5500 * groups list of the context.
5501 */
5502 update_idle_suspended_group_state(group);
5503 KBASE_KTRACE_ADD_CSF_GRP(kbdev, GROUP_SYNC_UPDATE_DONE, group, 0u);
5504 }
5505 }
5506 } else {
5507 WARN_ON(!list_empty(&kctx->csf.sched.idle_wait_groups));
5508 }
5509
5510 if (check_sync_update_for_idle_groups_protm(kbdev)) {
5511 scheduler_force_protm_exit(kbdev);
5512 sync_updated = true;
5513 }
5514
5515 /* If scheduler is in sleep or suspended state, re-activate it
5516 * to serve on-slot CSGs blocked on CQS which has been signaled.
5517 */
5518 if (!sync_updated && (scheduler->state == SCHED_SLEEPING))
5519 check_sync_update_in_sleep_mode(kbdev);
5520
5521 KBASE_KTRACE_ADD(kbdev, GROUP_SYNC_UPDATE_WORKER_END, kctx, 0u);
5522
5523 mutex_unlock(&scheduler->lock);
5524 }
5525
5526 static
check_group_sync_update_cb(void * param)5527 enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param)
5528 {
5529 struct kbase_context *const kctx = param;
5530
5531 KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u);
5532 queue_work(kctx->csf.sched.sync_update_wq,
5533 &kctx->csf.sched.sync_update_work);
5534
5535 return KBASE_CSF_EVENT_CALLBACK_KEEP;
5536 }
5537
kbase_csf_scheduler_context_init(struct kbase_context * kctx)5538 int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
5539 {
5540 int priority;
5541 int err;
5542
5543 for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
5544 ++priority) {
5545 INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]);
5546 }
5547
5548 kctx->csf.sched.num_runnable_grps = 0;
5549 INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups);
5550 kctx->csf.sched.num_idle_wait_grps = 0;
5551 kctx->csf.sched.ngrp_to_schedule = 0;
5552
5553 kctx->csf.sched.sync_update_wq =
5554 alloc_ordered_workqueue("mali_kbase_csf_sync_update_wq",
5555 WQ_HIGHPRI);
5556 if (!kctx->csf.sched.sync_update_wq) {
5557 dev_err(kctx->kbdev->dev,
5558 "Failed to initialize scheduler context workqueue");
5559 return -ENOMEM;
5560 }
5561
5562 INIT_WORK(&kctx->csf.sched.sync_update_work,
5563 check_group_sync_update_worker);
5564
5565 err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx);
5566
5567 if (err) {
5568 dev_err(kctx->kbdev->dev,
5569 "Failed to register a sync update callback");
5570 destroy_workqueue(kctx->csf.sched.sync_update_wq);
5571 }
5572
5573 return err;
5574 }
5575
kbase_csf_scheduler_context_term(struct kbase_context * kctx)5576 void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
5577 {
5578 kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx);
5579 cancel_work_sync(&kctx->csf.sched.sync_update_work);
5580 destroy_workqueue(kctx->csf.sched.sync_update_wq);
5581 }
5582
kbase_csf_scheduler_init(struct kbase_device * kbdev)5583 int kbase_csf_scheduler_init(struct kbase_device *kbdev)
5584 {
5585 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5586 u32 num_groups = kbdev->csf.global_iface.group_num;
5587
5588 bitmap_zero(scheduler->csg_inuse_bitmap, num_groups);
5589 bitmap_zero(scheduler->csg_slots_idle_mask, num_groups);
5590
5591 scheduler->csg_slots = kcalloc(num_groups,
5592 sizeof(*scheduler->csg_slots), GFP_KERNEL);
5593 if (!scheduler->csg_slots) {
5594 dev_err(kbdev->dev,
5595 "Failed to allocate memory for csg slot status array\n");
5596 return -ENOMEM;
5597 }
5598
5599 return 0;
5600 }
5601
kbase_csf_scheduler_early_init(struct kbase_device * kbdev)5602 int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
5603 {
5604 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5605
5606 scheduler->timer_enabled = true;
5607
5608 scheduler->wq = alloc_ordered_workqueue("csf_scheduler_wq", WQ_HIGHPRI);
5609 if (!scheduler->wq) {
5610 dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n");
5611 return -ENOMEM;
5612 }
5613
5614 INIT_WORK(&scheduler->tick_work, schedule_on_tick);
5615 INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock);
5616
5617 INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor);
5618
5619 mutex_init(&scheduler->lock);
5620 spin_lock_init(&scheduler->interrupt_lock);
5621
5622 /* Internal lists */
5623 INIT_LIST_HEAD(&scheduler->runnable_kctxs);
5624 INIT_LIST_HEAD(&scheduler->groups_to_schedule);
5625 INIT_LIST_HEAD(&scheduler->idle_groups_to_schedule);
5626
5627 BUILD_BUG_ON(MAX_SUPPORTED_CSGS >
5628 (sizeof(scheduler->csgs_events_enable_mask) * BITS_PER_BYTE));
5629 bitmap_fill(scheduler->csgs_events_enable_mask, MAX_SUPPORTED_CSGS);
5630 scheduler->state = SCHED_SUSPENDED;
5631 scheduler->pm_active_count = 0;
5632 scheduler->ngrp_to_schedule = 0;
5633 scheduler->total_runnable_grps = 0;
5634 scheduler->top_ctx = NULL;
5635 scheduler->top_grp = NULL;
5636 scheduler->last_schedule = 0;
5637 scheduler->tock_pending_request = false;
5638 scheduler->active_protm_grp = NULL;
5639 scheduler->gpu_idle_fw_timer_enabled = false;
5640 scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
5641 scheduler_doorbell_init(kbdev);
5642
5643 INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
5644 atomic_set(&scheduler->non_idle_offslot_grps, 0);
5645
5646 hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
5647 scheduler->tick_timer.function = tick_timer_callback;
5648 scheduler->tick_timer_active = false;
5649
5650 return 0;
5651 }
5652
kbase_csf_scheduler_term(struct kbase_device * kbdev)5653 void kbase_csf_scheduler_term(struct kbase_device *kbdev)
5654 {
5655 if (kbdev->csf.scheduler.csg_slots) {
5656 WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps));
5657 /* The unload of Driver can take place only when all contexts have
5658 * been terminated. The groups that were not terminated by the User
5659 * are terminated on context termination. So no CSGs are expected
5660 * to be active at the time of Driver unload.
5661 */
5662 WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
5663 flush_work(&kbdev->csf.scheduler.gpu_idle_work);
5664 mutex_lock(&kbdev->csf.scheduler.lock);
5665
5666 if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) {
5667 /* The power policy could prevent the Scheduler from
5668 * getting suspended when GPU becomes idle.
5669 */
5670 WARN_ON(kbase_pm_idle_groups_sched_suspendable(kbdev));
5671 scheduler_suspend(kbdev);
5672 }
5673
5674 mutex_unlock(&kbdev->csf.scheduler.lock);
5675 cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
5676 cancel_tick_timer(kbdev);
5677 cancel_work_sync(&kbdev->csf.scheduler.tick_work);
5678 cancel_tock_work(&kbdev->csf.scheduler);
5679 mutex_destroy(&kbdev->csf.scheduler.lock);
5680 kfree(kbdev->csf.scheduler.csg_slots);
5681 kbdev->csf.scheduler.csg_slots = NULL;
5682 }
5683 }
5684
kbase_csf_scheduler_early_term(struct kbase_device * kbdev)5685 void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
5686 {
5687 if (kbdev->csf.scheduler.wq)
5688 destroy_workqueue(kbdev->csf.scheduler.wq);
5689 }
5690
5691 /**
5692 * scheduler_enable_tick_timer_nolock - Enable the scheduler tick timer.
5693 *
5694 * @kbdev: Instance of a GPU platform device that implements a CSF interface.
5695 *
5696 * This function will restart the scheduler tick so that regular scheduling can
5697 * be resumed without any explicit trigger (like kicking of GPU queues). This
5698 * is a variant of kbase_csf_scheduler_enable_tick_timer() that assumes the
5699 * CSF scheduler lock to already have been held.
5700 */
scheduler_enable_tick_timer_nolock(struct kbase_device * kbdev)5701 static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
5702 {
5703 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5704
5705 lockdep_assert_held(&kbdev->csf.scheduler.lock);
5706
5707 if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev)))
5708 return;
5709
5710 WARN_ON((scheduler->state != SCHED_INACTIVE) &&
5711 (scheduler->state != SCHED_SUSPENDED) &&
5712 (scheduler->state != SCHED_SLEEPING));
5713
5714 if (scheduler->total_runnable_grps > 0) {
5715 enqueue_tick_work(kbdev);
5716 dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n");
5717 } else if (scheduler->state != SCHED_SUSPENDED) {
5718 queue_work(system_wq, &scheduler->gpu_idle_work);
5719 }
5720 }
5721
kbase_csf_scheduler_enable_tick_timer(struct kbase_device * kbdev)5722 void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev)
5723 {
5724 mutex_lock(&kbdev->csf.scheduler.lock);
5725 scheduler_enable_tick_timer_nolock(kbdev);
5726 mutex_unlock(&kbdev->csf.scheduler.lock);
5727 }
5728
kbase_csf_scheduler_timer_is_enabled(struct kbase_device * kbdev)5729 bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev)
5730 {
5731 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5732 bool enabled;
5733
5734 mutex_lock(&scheduler->lock);
5735 enabled = scheduler_timer_is_enabled_nolock(kbdev);
5736 mutex_unlock(&scheduler->lock);
5737
5738 return enabled;
5739 }
5740
kbase_csf_scheduler_timer_set_enabled(struct kbase_device * kbdev,bool enable)5741 void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
5742 bool enable)
5743 {
5744 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5745 bool currently_enabled;
5746
5747 mutex_lock(&scheduler->lock);
5748
5749 currently_enabled = scheduler_timer_is_enabled_nolock(kbdev);
5750 if (currently_enabled && !enable) {
5751 scheduler->timer_enabled = false;
5752 cancel_tick_timer(kbdev);
5753 cancel_delayed_work(&scheduler->tock_work);
5754 scheduler->tock_pending_request = false;
5755 mutex_unlock(&scheduler->lock);
5756 /* The non-sync version to cancel the normal work item is not
5757 * available, so need to drop the lock before cancellation.
5758 */
5759 cancel_work_sync(&scheduler->tick_work);
5760 return;
5761 }
5762
5763 if (!currently_enabled && enable) {
5764 scheduler->timer_enabled = true;
5765
5766 scheduler_enable_tick_timer_nolock(kbdev);
5767 }
5768
5769 mutex_unlock(&scheduler->lock);
5770 }
5771
kbase_csf_scheduler_kick(struct kbase_device * kbdev)5772 void kbase_csf_scheduler_kick(struct kbase_device *kbdev)
5773 {
5774 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5775
5776 mutex_lock(&scheduler->lock);
5777
5778 if (unlikely(scheduler_timer_is_enabled_nolock(kbdev)))
5779 goto out;
5780
5781 if (scheduler->total_runnable_grps > 0) {
5782 enqueue_tick_work(kbdev);
5783 dev_dbg(kbdev->dev, "Kicking the scheduler manually\n");
5784 }
5785
5786 out:
5787 mutex_unlock(&scheduler->lock);
5788 }
5789
kbase_csf_scheduler_pm_suspend(struct kbase_device * kbdev)5790 int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
5791 {
5792 int result = 0;
5793 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5794
5795 /* Cancel any potential queued delayed work(s) */
5796 cancel_work_sync(&scheduler->tick_work);
5797 cancel_tock_work(scheduler);
5798
5799 result = kbase_reset_gpu_prevent_and_wait(kbdev);
5800 if (result) {
5801 dev_warn(kbdev->dev,
5802 "Stop PM suspending for failing to prevent gpu reset.\n");
5803 return result;
5804 }
5805
5806 mutex_lock(&scheduler->lock);
5807
5808 disable_gpu_idle_fw_timer(kbdev);
5809
5810 #ifdef KBASE_PM_RUNTIME
5811 /* If scheduler is in sleeping state, then MCU needs to be activated
5812 * to suspend CSGs.
5813 */
5814 if (scheduler->state == SCHED_SLEEPING) {
5815 dev_info(kbdev->dev, "Activating MCU out of sleep on system suspend");
5816 result = force_scheduler_to_exit_sleep(kbdev);
5817 if (result) {
5818 dev_warn(kbdev->dev, "Scheduler failed to exit from sleep");
5819 goto exit;
5820 }
5821 }
5822 #endif
5823 if (scheduler->state != SCHED_SUSPENDED) {
5824 result = suspend_active_groups_on_powerdown(kbdev, true);
5825 if (result) {
5826 dev_warn(kbdev->dev, "failed to suspend active groups");
5827 goto exit;
5828 } else {
5829 dev_info(kbdev->dev, "Scheduler PM suspend");
5830 scheduler_suspend(kbdev);
5831 cancel_tick_timer(kbdev);
5832 }
5833 }
5834
5835 exit:
5836 mutex_unlock(&scheduler->lock);
5837
5838 kbase_reset_gpu_allow(kbdev);
5839
5840 return result;
5841 }
5842 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend);
5843
kbase_csf_scheduler_pm_resume(struct kbase_device * kbdev)5844 void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
5845 {
5846 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5847
5848 mutex_lock(&scheduler->lock);
5849 if ((scheduler->total_runnable_grps > 0) &&
5850 (scheduler->state == SCHED_SUSPENDED)) {
5851 dev_info(kbdev->dev, "Scheduler PM resume");
5852 scheduler_wakeup(kbdev, true);
5853 }
5854 mutex_unlock(&scheduler->lock);
5855 }
5856 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume);
5857
kbase_csf_scheduler_pm_active(struct kbase_device * kbdev)5858 void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev)
5859 {
5860 /* Here the lock is taken to synchronize against the runtime suspend
5861 * callback function, which may need to wake up the MCU for suspending
5862 * the CSGs before powering down the GPU.
5863 */
5864 mutex_lock(&kbdev->csf.scheduler.lock);
5865 scheduler_pm_active_handle_suspend(kbdev,
5866 KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
5867 mutex_unlock(&kbdev->csf.scheduler.lock);
5868 }
5869 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active);
5870
kbase_csf_scheduler_pm_idle(struct kbase_device * kbdev)5871 void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
5872 {
5873 /* Here the lock is taken just to maintain symmetry with
5874 * kbase_csf_scheduler_pm_active().
5875 */
5876 mutex_lock(&kbdev->csf.scheduler.lock);
5877 scheduler_pm_idle(kbdev);
5878 mutex_unlock(&kbdev->csf.scheduler.lock);
5879 }
5880 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle);
5881
kbase_csf_scheduler_wait_mcu_active(struct kbase_device * kbdev)5882 int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
5883 {
5884 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5885 unsigned long flags;
5886 int err;
5887
5888 kbase_pm_lock(kbdev);
5889 WARN_ON(!kbdev->pm.active_count);
5890 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
5891 WARN_ON(!scheduler->pm_active_count);
5892 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5893 kbase_pm_unlock(kbdev);
5894
5895 kbase_pm_wait_for_poweroff_work_complete(kbdev);
5896
5897 err = kbase_pm_wait_for_desired_state(kbdev);
5898 if (!err) {
5899 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
5900 WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON);
5901 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5902 }
5903
5904 return err;
5905 }
5906 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active);
5907
5908 #ifdef KBASE_PM_RUNTIME
kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device * kbdev)5909 int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev)
5910 {
5911 struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
5912 unsigned long flags;
5913 int ret;
5914
5915 dev_dbg(kbdev->dev, "Handling runtime suspend");
5916
5917 kbase_reset_gpu_assert_prevented(kbdev);
5918 lockdep_assert_held(&scheduler->lock);
5919 WARN_ON(scheduler->pm_active_count);
5920
5921 if (scheduler->state == SCHED_SUSPENDED) {
5922 WARN_ON(kbdev->pm.backend.gpu_sleep_mode_active);
5923 return 0;
5924 }
5925
5926 ret = suspend_active_groups_on_powerdown(kbdev, false);
5927
5928 if (ret) {
5929 dev_dbg(kbdev->dev, "Aborting runtime suspend (grps: %d)",
5930 atomic_read(&scheduler->non_idle_offslot_grps));
5931
5932 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
5933 kbdev->pm.backend.exit_gpu_sleep_mode = true;
5934 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5935
5936 kbase_csf_scheduler_invoke_tick(kbdev);
5937 return ret;
5938 }
5939
5940 scheduler->state = SCHED_SUSPENDED;
5941 spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
5942 kbdev->pm.backend.gpu_sleep_mode_active = false;
5943 spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
5944
5945 wake_up_all(&kbdev->csf.event_wait);
5946 return 0;
5947 }
5948
kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device * kbdev)5949 void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev)
5950 {
5951 u32 csg_nr;
5952
5953 lockdep_assert_held(&kbdev->hwaccess_lock);
5954
5955 WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP);
5956
5957 for (csg_nr = 0; csg_nr < kbdev->csf.global_iface.group_num; csg_nr++) {
5958 struct kbase_csf_cmd_stream_group_info *ginfo =
5959 &kbdev->csf.global_iface.groups[csg_nr];
5960 bool csg_idle;
5961
5962 if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
5963 continue;
5964
5965 csg_idle =
5966 kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
5967 CSG_STATUS_STATE_IDLE_MASK;
5968 if (!csg_idle) {
5969 dev_dbg(kbdev->dev,
5970 "Re-activate Scheduler after MCU sleep");
5971 kbdev->pm.backend.exit_gpu_sleep_mode = true;
5972 kbase_csf_scheduler_invoke_tick(kbdev);
5973 break;
5974 }
5975 }
5976 }
5977
kbase_csf_scheduler_force_sleep(struct kbase_device * kbdev)5978 void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev)
5979 {
5980 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5981
5982 mutex_lock(&scheduler->lock);
5983 if (kbase_pm_gpu_sleep_allowed(kbdev) &&
5984 (scheduler->state == SCHED_INACTIVE))
5985 scheduler_sleep_on_idle(kbdev);
5986 mutex_unlock(&scheduler->lock);
5987 }
5988 #endif
5989
kbase_csf_scheduler_force_wakeup(struct kbase_device * kbdev)5990 void kbase_csf_scheduler_force_wakeup(struct kbase_device *kbdev)
5991 {
5992 struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
5993
5994 mutex_lock(&scheduler->lock);
5995 scheduler_wakeup(kbdev, true);
5996 mutex_unlock(&scheduler->lock);
5997 }
5998