1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * CPUFreq governor based on scheduler-provided CPU utilization data.
4 *
5 * Copyright (C) 2016, Intel Corporation
6 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
7 */
8
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11 #include "sched.h"
12
13 #include <linux/sched/cpufreq.h>
14 #include <trace/events/power.h>
15 #include <trace/hooks/sched.h>
16
17 #define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8)
18
19 struct sugov_tunables {
20 struct gov_attr_set attr_set;
21 unsigned int rate_limit_us;
22 };
23
24 struct sugov_policy {
25 struct cpufreq_policy *policy;
26
27 struct sugov_tunables *tunables;
28 struct list_head tunables_hook;
29
30 raw_spinlock_t update_lock;
31 u64 last_freq_update_time;
32 s64 freq_update_delay_ns;
33 unsigned int next_freq;
34 unsigned int cached_raw_freq;
35
36 /* The next fields are only needed if fast switch cannot be used: */
37 struct irq_work irq_work;
38 struct kthread_work work;
39 struct mutex work_lock;
40 struct kthread_worker worker;
41 struct task_struct *thread;
42 bool work_in_progress;
43
44 bool limits_changed;
45 bool need_freq_update;
46 };
47
48 struct sugov_cpu {
49 struct update_util_data update_util;
50 struct sugov_policy *sg_policy;
51 unsigned int cpu;
52
53 bool iowait_boost_pending;
54 unsigned int iowait_boost;
55 u64 last_update;
56
57 unsigned long util;
58 unsigned long bw_dl;
59 unsigned long max;
60
61 /* The field below is for single-CPU policies only: */
62 #ifdef CONFIG_NO_HZ_COMMON
63 unsigned long saved_idle_calls;
64 #endif
65 };
66
67 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
68
69 /************************ Governor internals ***********************/
70
sugov_should_update_freq(struct sugov_policy * sg_policy,u64 time)71 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
72 {
73 s64 delta_ns;
74
75 /*
76 * Since cpufreq_update_util() is called with rq->lock held for
77 * the @target_cpu, our per-CPU data is fully serialized.
78 *
79 * However, drivers cannot in general deal with cross-CPU
80 * requests, so while get_next_freq() will work, our
81 * sugov_update_commit() call may not for the fast switching platforms.
82 *
83 * Hence stop here for remote requests if they aren't supported
84 * by the hardware, as calculating the frequency is pointless if
85 * we cannot in fact act on it.
86 *
87 * This is needed on the slow switching platforms too to prevent CPUs
88 * going offline from leaving stale IRQ work items behind.
89 */
90 if (!cpufreq_this_cpu_can_update(sg_policy->policy))
91 return false;
92
93 if (unlikely(sg_policy->limits_changed)) {
94 sg_policy->limits_changed = false;
95 sg_policy->need_freq_update = true;
96 return true;
97 }
98
99 delta_ns = time - sg_policy->last_freq_update_time;
100
101 return delta_ns >= sg_policy->freq_update_delay_ns;
102 }
103
sugov_update_next_freq(struct sugov_policy * sg_policy,u64 time,unsigned int next_freq)104 static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
105 unsigned int next_freq)
106 {
107 bool should_update = true;
108
109 if (sg_policy->need_freq_update)
110 sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
111 else if (sg_policy->next_freq == next_freq)
112 return false;
113
114 trace_android_rvh_set_sugov_update(sg_policy, next_freq, &should_update);
115 if (!should_update)
116 return false;
117
118 sg_policy->next_freq = next_freq;
119 sg_policy->last_freq_update_time = time;
120
121 return true;
122 }
123
sugov_deferred_update(struct sugov_policy * sg_policy)124 static void sugov_deferred_update(struct sugov_policy *sg_policy)
125 {
126 if (!sg_policy->work_in_progress) {
127 sg_policy->work_in_progress = true;
128 irq_work_queue(&sg_policy->irq_work);
129 }
130 }
131
132 /**
133 * get_next_freq - Compute a new frequency for a given cpufreq policy.
134 * @sg_policy: schedutil policy object to compute the new frequency for.
135 * @util: Current CPU utilization.
136 * @max: CPU capacity.
137 *
138 * If the utilization is frequency-invariant, choose the new frequency to be
139 * proportional to it, that is
140 *
141 * next_freq = C * max_freq * util / max
142 *
143 * Otherwise, approximate the would-be frequency-invariant utilization by
144 * util_raw * (curr_freq / max_freq) which leads to
145 *
146 * next_freq = C * curr_freq * util_raw / max
147 *
148 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
149 *
150 * The lowest driver-supported frequency which is equal or greater than the raw
151 * next_freq (as calculated above) is returned, subject to policy min/max and
152 * cpufreq driver limitations.
153 */
get_next_freq(struct sugov_policy * sg_policy,unsigned long util,unsigned long max)154 static unsigned int get_next_freq(struct sugov_policy *sg_policy,
155 unsigned long util, unsigned long max)
156 {
157 struct cpufreq_policy *policy = sg_policy->policy;
158 unsigned int freq = arch_scale_freq_invariant() ?
159 policy->cpuinfo.max_freq : policy->cur;
160 unsigned long next_freq = 0;
161
162 util = map_util_perf(util);
163 trace_android_vh_map_util_freq(util, freq, max, &next_freq);
164 trace_android_vh_map_util_freq_new(util, freq, max, &next_freq, policy,
165 &sg_policy->need_freq_update);
166 if (next_freq)
167 freq = next_freq;
168 else
169 freq = map_util_freq(util, freq, max);
170
171 if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
172 return sg_policy->next_freq;
173
174 sg_policy->cached_raw_freq = freq;
175 return cpufreq_driver_resolve_freq(policy, freq);
176 }
177
sugov_get_util(struct sugov_cpu * sg_cpu)178 static void sugov_get_util(struct sugov_cpu *sg_cpu)
179 {
180 struct rq *rq = cpu_rq(sg_cpu->cpu);
181 unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu);
182
183 sg_cpu->max = max;
184 sg_cpu->bw_dl = cpu_bw_dl(rq);
185 sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(rq), max,
186 FREQUENCY_UTIL, NULL);
187 }
188
189 /**
190 * sugov_iowait_reset() - Reset the IO boost status of a CPU.
191 * @sg_cpu: the sugov data for the CPU to boost
192 * @time: the update time from the caller
193 * @set_iowait_boost: true if an IO boost has been requested
194 *
195 * The IO wait boost of a task is disabled after a tick since the last update
196 * of a CPU. If a new IO wait boost is requested after more then a tick, then
197 * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy
198 * efficiency by ignoring sporadic wakeups from IO.
199 */
sugov_iowait_reset(struct sugov_cpu * sg_cpu,u64 time,bool set_iowait_boost)200 static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
201 bool set_iowait_boost)
202 {
203 s64 delta_ns = time - sg_cpu->last_update;
204
205 /* Reset boost only if a tick has elapsed since last request */
206 if (delta_ns <= TICK_NSEC)
207 return false;
208
209 sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0;
210 sg_cpu->iowait_boost_pending = set_iowait_boost;
211
212 return true;
213 }
214
215 /**
216 * sugov_iowait_boost() - Updates the IO boost status of a CPU.
217 * @sg_cpu: the sugov data for the CPU to boost
218 * @time: the update time from the caller
219 * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait
220 *
221 * Each time a task wakes up after an IO operation, the CPU utilization can be
222 * boosted to a certain utilization which doubles at each "frequent and
223 * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization
224 * of the maximum OPP.
225 *
226 * To keep doubling, an IO boost has to be requested at least once per tick,
227 * otherwise we restart from the utilization of the minimum OPP.
228 */
sugov_iowait_boost(struct sugov_cpu * sg_cpu,u64 time,unsigned int flags)229 static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
230 unsigned int flags)
231 {
232 bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT;
233
234 /* Reset boost if the CPU appears to have been idle enough */
235 if (sg_cpu->iowait_boost &&
236 sugov_iowait_reset(sg_cpu, time, set_iowait_boost))
237 return;
238
239 /* Boost only tasks waking up after IO */
240 if (!set_iowait_boost)
241 return;
242
243 /* Ensure boost doubles only one time at each request */
244 if (sg_cpu->iowait_boost_pending)
245 return;
246 sg_cpu->iowait_boost_pending = true;
247
248 /* Double the boost at each request */
249 if (sg_cpu->iowait_boost) {
250 sg_cpu->iowait_boost =
251 min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
252 return;
253 }
254
255 /* First wakeup after IO: start with minimum boost */
256 sg_cpu->iowait_boost = IOWAIT_BOOST_MIN;
257 }
258
259 /**
260 * sugov_iowait_apply() - Apply the IO boost to a CPU.
261 * @sg_cpu: the sugov data for the cpu to boost
262 * @time: the update time from the caller
263 *
264 * A CPU running a task which woken up after an IO operation can have its
265 * utilization boosted to speed up the completion of those IO operations.
266 * The IO boost value is increased each time a task wakes up from IO, in
267 * sugov_iowait_apply(), and it's instead decreased by this function,
268 * each time an increase has not been requested (!iowait_boost_pending).
269 *
270 * A CPU which also appears to have been idle for at least one tick has also
271 * its IO boost utilization reset.
272 *
273 * This mechanism is designed to boost high frequently IO waiting tasks, while
274 * being more conservative on tasks which does sporadic IO operations.
275 */
sugov_iowait_apply(struct sugov_cpu * sg_cpu,u64 time)276 static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
277 {
278 unsigned long boost;
279
280 /* No boost currently required */
281 if (!sg_cpu->iowait_boost)
282 return;
283
284 /* Reset boost if the CPU appears to have been idle enough */
285 if (sugov_iowait_reset(sg_cpu, time, false))
286 return;
287
288 if (!sg_cpu->iowait_boost_pending) {
289 /*
290 * No boost pending; reduce the boost value.
291 */
292 sg_cpu->iowait_boost >>= 1;
293 if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) {
294 sg_cpu->iowait_boost = 0;
295 return;
296 }
297 }
298
299 sg_cpu->iowait_boost_pending = false;
300
301 /*
302 * sg_cpu->util is already in capacity scale; convert iowait_boost
303 * into the same scale so we can compare.
304 */
305 boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
306 boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
307 if (sg_cpu->util < boost)
308 sg_cpu->util = boost;
309 }
310
311 #ifdef CONFIG_NO_HZ_COMMON
sugov_cpu_is_busy(struct sugov_cpu * sg_cpu)312 static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
313 {
314 unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
315 bool ret = idle_calls == sg_cpu->saved_idle_calls;
316
317 sg_cpu->saved_idle_calls = idle_calls;
318 return ret;
319 }
320 #else
sugov_cpu_is_busy(struct sugov_cpu * sg_cpu)321 static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
322 #endif /* CONFIG_NO_HZ_COMMON */
323
324 /*
325 * Make sugov_should_update_freq() ignore the rate limit when DL
326 * has increased the utilization.
327 */
ignore_dl_rate_limit(struct sugov_cpu * sg_cpu)328 static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
329 {
330 if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
331 sg_cpu->sg_policy->limits_changed = true;
332 }
333
sugov_update_single_common(struct sugov_cpu * sg_cpu,u64 time,unsigned int flags)334 static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
335 u64 time, unsigned int flags)
336 {
337 sugov_iowait_boost(sg_cpu, time, flags);
338 sg_cpu->last_update = time;
339
340 ignore_dl_rate_limit(sg_cpu);
341
342 if (!sugov_should_update_freq(sg_cpu->sg_policy, time))
343 return false;
344
345 sugov_get_util(sg_cpu);
346 sugov_iowait_apply(sg_cpu, time);
347
348 return true;
349 }
350
sugov_update_single_freq(struct update_util_data * hook,u64 time,unsigned int flags)351 static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
352 unsigned int flags)
353 {
354 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
355 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
356 unsigned int cached_freq = sg_policy->cached_raw_freq;
357 unsigned int next_f;
358
359 if (!sugov_update_single_common(sg_cpu, time, flags))
360 return;
361
362 next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max);
363 /*
364 * Do not reduce the frequency if the CPU has not been idle
365 * recently, as the reduction is likely to be premature then.
366 */
367 if (sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq) {
368 next_f = sg_policy->next_freq;
369
370 /* Restore cached freq as next_freq has changed */
371 sg_policy->cached_raw_freq = cached_freq;
372 }
373
374 if (!sugov_update_next_freq(sg_policy, time, next_f))
375 return;
376
377 /*
378 * This code runs under rq->lock for the target CPU, so it won't run
379 * concurrently on two different CPUs for the same target and it is not
380 * necessary to acquire the lock in the fast switch case.
381 */
382 if (sg_policy->policy->fast_switch_enabled) {
383 cpufreq_driver_fast_switch(sg_policy->policy, next_f);
384 } else {
385 raw_spin_lock(&sg_policy->update_lock);
386 sugov_deferred_update(sg_policy);
387 raw_spin_unlock(&sg_policy->update_lock);
388 }
389 }
390
sugov_update_single_perf(struct update_util_data * hook,u64 time,unsigned int flags)391 static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
392 unsigned int flags)
393 {
394 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
395 unsigned long prev_util = sg_cpu->util;
396
397 /*
398 * Fall back to the "frequency" path if frequency invariance is not
399 * supported, because the direct mapping between the utilization and
400 * the performance levels depends on the frequency invariance.
401 */
402 if (!arch_scale_freq_invariant()) {
403 sugov_update_single_freq(hook, time, flags);
404 return;
405 }
406
407 if (!sugov_update_single_common(sg_cpu, time, flags))
408 return;
409
410 /*
411 * Do not reduce the target performance level if the CPU has not been
412 * idle recently, as the reduction is likely to be premature then.
413 */
414 if (sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
415 sg_cpu->util = prev_util;
416
417 cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
418 map_util_perf(sg_cpu->util), sg_cpu->max);
419
420 sg_cpu->sg_policy->last_freq_update_time = time;
421 }
422
sugov_next_freq_shared(struct sugov_cpu * sg_cpu,u64 time)423 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
424 {
425 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
426 struct cpufreq_policy *policy = sg_policy->policy;
427 unsigned long util = 0, max = 1;
428 unsigned int j;
429
430 for_each_cpu(j, policy->cpus) {
431 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
432 unsigned long j_util, j_max;
433
434 sugov_get_util(j_sg_cpu);
435 sugov_iowait_apply(j_sg_cpu, time);
436 j_util = j_sg_cpu->util;
437 j_max = j_sg_cpu->max;
438
439 if (j_util * max > j_max * util) {
440 util = j_util;
441 max = j_max;
442 }
443 }
444
445 return get_next_freq(sg_policy, util, max);
446 }
447
448 static void
sugov_update_shared(struct update_util_data * hook,u64 time,unsigned int flags)449 sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
450 {
451 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
452 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
453 unsigned int next_f;
454
455 raw_spin_lock(&sg_policy->update_lock);
456
457 sugov_iowait_boost(sg_cpu, time, flags);
458 sg_cpu->last_update = time;
459
460 ignore_dl_rate_limit(sg_cpu);
461
462 if (sugov_should_update_freq(sg_policy, time)) {
463 next_f = sugov_next_freq_shared(sg_cpu, time);
464
465 if (!sugov_update_next_freq(sg_policy, time, next_f))
466 goto unlock;
467
468 if (sg_policy->policy->fast_switch_enabled)
469 cpufreq_driver_fast_switch(sg_policy->policy, next_f);
470 else
471 sugov_deferred_update(sg_policy);
472 }
473 unlock:
474 raw_spin_unlock(&sg_policy->update_lock);
475 }
476
sugov_work(struct kthread_work * work)477 static void sugov_work(struct kthread_work *work)
478 {
479 struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
480 unsigned int freq;
481 unsigned long flags;
482
483 /*
484 * Hold sg_policy->update_lock shortly to handle the case where:
485 * in case sg_policy->next_freq is read here, and then updated by
486 * sugov_deferred_update() just before work_in_progress is set to false
487 * here, we may miss queueing the new update.
488 *
489 * Note: If a work was queued after the update_lock is released,
490 * sugov_work() will just be called again by kthread_work code; and the
491 * request will be proceed before the sugov thread sleeps.
492 */
493 raw_spin_lock_irqsave(&sg_policy->update_lock, flags);
494 freq = sg_policy->next_freq;
495 sg_policy->work_in_progress = false;
496 raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags);
497
498 mutex_lock(&sg_policy->work_lock);
499 __cpufreq_driver_target(sg_policy->policy, freq, CPUFREQ_RELATION_L);
500 mutex_unlock(&sg_policy->work_lock);
501 }
502
sugov_irq_work(struct irq_work * irq_work)503 static void sugov_irq_work(struct irq_work *irq_work)
504 {
505 struct sugov_policy *sg_policy;
506
507 sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
508
509 kthread_queue_work(&sg_policy->worker, &sg_policy->work);
510 }
511
512 /************************** sysfs interface ************************/
513
514 static struct sugov_tunables *global_tunables;
515 static DEFINE_MUTEX(global_tunables_lock);
516
to_sugov_tunables(struct gov_attr_set * attr_set)517 static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
518 {
519 return container_of(attr_set, struct sugov_tunables, attr_set);
520 }
521
rate_limit_us_show(struct gov_attr_set * attr_set,char * buf)522 static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
523 {
524 struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
525
526 return sprintf(buf, "%u\n", tunables->rate_limit_us);
527 }
528
529 static ssize_t
rate_limit_us_store(struct gov_attr_set * attr_set,const char * buf,size_t count)530 rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
531 {
532 struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
533 struct sugov_policy *sg_policy;
534 unsigned int rate_limit_us;
535
536 if (kstrtouint(buf, 10, &rate_limit_us))
537 return -EINVAL;
538
539 tunables->rate_limit_us = rate_limit_us;
540
541 list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
542 sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
543
544 return count;
545 }
546
547 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
548
549 static struct attribute *sugov_attrs[] = {
550 &rate_limit_us.attr,
551 NULL
552 };
553 ATTRIBUTE_GROUPS(sugov);
554
sugov_tunables_free(struct kobject * kobj)555 static void sugov_tunables_free(struct kobject *kobj)
556 {
557 struct gov_attr_set *attr_set = container_of(kobj, struct gov_attr_set, kobj);
558
559 kfree(to_sugov_tunables(attr_set));
560 }
561
562 static struct kobj_type sugov_tunables_ktype = {
563 .default_groups = sugov_groups,
564 .sysfs_ops = &governor_sysfs_ops,
565 .release = &sugov_tunables_free,
566 };
567
568 /********************** cpufreq governor interface *********************/
569
570 struct cpufreq_governor schedutil_gov;
571
sugov_policy_alloc(struct cpufreq_policy * policy)572 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
573 {
574 struct sugov_policy *sg_policy;
575
576 sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
577 if (!sg_policy)
578 return NULL;
579
580 sg_policy->policy = policy;
581 raw_spin_lock_init(&sg_policy->update_lock);
582 return sg_policy;
583 }
584
sugov_policy_free(struct sugov_policy * sg_policy)585 static void sugov_policy_free(struct sugov_policy *sg_policy)
586 {
587 kfree(sg_policy);
588 }
589
sugov_kthread_create(struct sugov_policy * sg_policy)590 static int sugov_kthread_create(struct sugov_policy *sg_policy)
591 {
592 struct task_struct *thread;
593 struct sched_attr attr = {
594 .size = sizeof(struct sched_attr),
595 .sched_policy = SCHED_DEADLINE,
596 .sched_flags = SCHED_FLAG_SUGOV,
597 .sched_nice = 0,
598 .sched_priority = 0,
599 /*
600 * Fake (unused) bandwidth; workaround to "fix"
601 * priority inheritance.
602 */
603 .sched_runtime = 1000000,
604 .sched_deadline = 10000000,
605 .sched_period = 10000000,
606 };
607 struct cpufreq_policy *policy = sg_policy->policy;
608 int ret;
609
610 /* kthread only required for slow path */
611 if (policy->fast_switch_enabled)
612 return 0;
613
614 trace_android_vh_set_sugov_sched_attr(&attr);
615 kthread_init_work(&sg_policy->work, sugov_work);
616 kthread_init_worker(&sg_policy->worker);
617 thread = kthread_create(kthread_worker_fn, &sg_policy->worker,
618 "sugov:%d",
619 cpumask_first(policy->related_cpus));
620 if (IS_ERR(thread)) {
621 pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread));
622 return PTR_ERR(thread);
623 }
624
625 ret = sched_setattr_nocheck(thread, &attr);
626 if (ret) {
627 kthread_stop(thread);
628 pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
629 return ret;
630 }
631
632 sg_policy->thread = thread;
633 kthread_bind_mask(thread, policy->related_cpus);
634 init_irq_work(&sg_policy->irq_work, sugov_irq_work);
635 mutex_init(&sg_policy->work_lock);
636
637 wake_up_process(thread);
638
639 return 0;
640 }
641
sugov_kthread_stop(struct sugov_policy * sg_policy)642 static void sugov_kthread_stop(struct sugov_policy *sg_policy)
643 {
644 /* kthread only required for slow path */
645 if (sg_policy->policy->fast_switch_enabled)
646 return;
647
648 kthread_flush_worker(&sg_policy->worker);
649 kthread_stop(sg_policy->thread);
650 mutex_destroy(&sg_policy->work_lock);
651 }
652
sugov_tunables_alloc(struct sugov_policy * sg_policy)653 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
654 {
655 struct sugov_tunables *tunables;
656
657 tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
658 if (tunables) {
659 gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
660 if (!have_governor_per_policy())
661 global_tunables = tunables;
662 }
663 return tunables;
664 }
665
sugov_clear_global_tunables(void)666 static void sugov_clear_global_tunables(void)
667 {
668 if (!have_governor_per_policy())
669 global_tunables = NULL;
670 }
671
sugov_init(struct cpufreq_policy * policy)672 static int sugov_init(struct cpufreq_policy *policy)
673 {
674 struct sugov_policy *sg_policy;
675 struct sugov_tunables *tunables;
676 int ret = 0;
677
678 /* State should be equivalent to EXIT */
679 if (policy->governor_data)
680 return -EBUSY;
681
682 cpufreq_enable_fast_switch(policy);
683
684 sg_policy = sugov_policy_alloc(policy);
685 if (!sg_policy) {
686 ret = -ENOMEM;
687 goto disable_fast_switch;
688 }
689
690 ret = sugov_kthread_create(sg_policy);
691 if (ret)
692 goto free_sg_policy;
693
694 mutex_lock(&global_tunables_lock);
695
696 if (global_tunables) {
697 if (WARN_ON(have_governor_per_policy())) {
698 ret = -EINVAL;
699 goto stop_kthread;
700 }
701 policy->governor_data = sg_policy;
702 sg_policy->tunables = global_tunables;
703
704 gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
705 goto out;
706 }
707
708 tunables = sugov_tunables_alloc(sg_policy);
709 if (!tunables) {
710 ret = -ENOMEM;
711 goto stop_kthread;
712 }
713
714 tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
715
716 policy->governor_data = sg_policy;
717 sg_policy->tunables = tunables;
718
719 ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
720 get_governor_parent_kobj(policy), "%s",
721 schedutil_gov.name);
722 if (ret)
723 goto fail;
724
725 out:
726 mutex_unlock(&global_tunables_lock);
727 return 0;
728
729 fail:
730 kobject_put(&tunables->attr_set.kobj);
731 policy->governor_data = NULL;
732 sugov_clear_global_tunables();
733
734 stop_kthread:
735 sugov_kthread_stop(sg_policy);
736 mutex_unlock(&global_tunables_lock);
737
738 free_sg_policy:
739 sugov_policy_free(sg_policy);
740
741 disable_fast_switch:
742 cpufreq_disable_fast_switch(policy);
743
744 pr_err("initialization failed (error %d)\n", ret);
745 return ret;
746 }
747
sugov_exit(struct cpufreq_policy * policy)748 static void sugov_exit(struct cpufreq_policy *policy)
749 {
750 struct sugov_policy *sg_policy = policy->governor_data;
751 struct sugov_tunables *tunables = sg_policy->tunables;
752 unsigned int count;
753
754 mutex_lock(&global_tunables_lock);
755
756 count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
757 policy->governor_data = NULL;
758 if (!count)
759 sugov_clear_global_tunables();
760
761 mutex_unlock(&global_tunables_lock);
762
763 sugov_kthread_stop(sg_policy);
764 sugov_policy_free(sg_policy);
765 cpufreq_disable_fast_switch(policy);
766 }
767
sugov_start(struct cpufreq_policy * policy)768 static int sugov_start(struct cpufreq_policy *policy)
769 {
770 struct sugov_policy *sg_policy = policy->governor_data;
771 void (*uu)(struct update_util_data *data, u64 time, unsigned int flags);
772 unsigned int cpu;
773
774 sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
775 sg_policy->last_freq_update_time = 0;
776 sg_policy->next_freq = 0;
777 sg_policy->work_in_progress = false;
778 sg_policy->limits_changed = false;
779 sg_policy->cached_raw_freq = 0;
780
781 sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
782
783 for_each_cpu(cpu, policy->cpus) {
784 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
785
786 memset(sg_cpu, 0, sizeof(*sg_cpu));
787 sg_cpu->cpu = cpu;
788 sg_cpu->sg_policy = sg_policy;
789 }
790
791 if (policy_is_shared(policy))
792 uu = sugov_update_shared;
793 else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf())
794 uu = sugov_update_single_perf;
795 else
796 uu = sugov_update_single_freq;
797
798 for_each_cpu(cpu, policy->cpus) {
799 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
800
801 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, uu);
802 }
803 return 0;
804 }
805
sugov_stop(struct cpufreq_policy * policy)806 static void sugov_stop(struct cpufreq_policy *policy)
807 {
808 struct sugov_policy *sg_policy = policy->governor_data;
809 unsigned int cpu;
810
811 for_each_cpu(cpu, policy->cpus)
812 cpufreq_remove_update_util_hook(cpu);
813
814 synchronize_rcu();
815
816 if (!policy->fast_switch_enabled) {
817 irq_work_sync(&sg_policy->irq_work);
818 kthread_cancel_work_sync(&sg_policy->work);
819 }
820 }
821
sugov_limits(struct cpufreq_policy * policy)822 static void sugov_limits(struct cpufreq_policy *policy)
823 {
824 struct sugov_policy *sg_policy = policy->governor_data;
825
826 if (!policy->fast_switch_enabled) {
827 mutex_lock(&sg_policy->work_lock);
828 cpufreq_policy_apply_limits(policy);
829 mutex_unlock(&sg_policy->work_lock);
830 }
831
832 sg_policy->limits_changed = true;
833 }
834
835 struct cpufreq_governor schedutil_gov = {
836 .name = "schedutil",
837 .owner = THIS_MODULE,
838 .flags = CPUFREQ_GOV_DYNAMIC_SWITCHING,
839 .init = sugov_init,
840 .exit = sugov_exit,
841 .start = sugov_start,
842 .stop = sugov_stop,
843 .limits = sugov_limits,
844 };
845
846 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
cpufreq_default_governor(void)847 struct cpufreq_governor *cpufreq_default_governor(void)
848 {
849 return &schedutil_gov;
850 }
851 #endif
852
853 cpufreq_governor_init(schedutil_gov);
854