1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * CPUFreq governor based on scheduler-provided CPU utilization data.
4 *
5 * Copyright (C) 2016, Intel Corporation
6 * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
7 */
8
9 #include <trace/hooks/sched.h>
10
11 #define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8)
12
13 struct sugov_tunables {
14 struct gov_attr_set attr_set;
15 unsigned int rate_limit_us;
16 };
17
18 struct sugov_policy {
19 struct cpufreq_policy *policy;
20
21 struct sugov_tunables *tunables;
22 struct list_head tunables_hook;
23
24 raw_spinlock_t update_lock;
25 u64 last_freq_update_time;
26 s64 freq_update_delay_ns;
27 unsigned int next_freq;
28 unsigned int cached_raw_freq;
29
30 /* The next fields are only needed if fast switch cannot be used: */
31 struct irq_work irq_work;
32 struct kthread_work work;
33 struct mutex work_lock;
34 struct kthread_worker worker;
35 struct task_struct *thread;
36 bool work_in_progress;
37
38 bool limits_changed;
39 bool need_freq_update;
40 };
41
42 struct sugov_cpu {
43 struct update_util_data update_util;
44 struct sugov_policy *sg_policy;
45 unsigned int cpu;
46
47 bool iowait_boost_pending;
48 unsigned int iowait_boost;
49 u64 last_update;
50
51 unsigned long util;
52 unsigned long bw_dl;
53 unsigned long max;
54
55 /* The field below is for single-CPU policies only: */
56 #ifdef CONFIG_NO_HZ_COMMON
57 unsigned long saved_idle_calls;
58 #endif
59 };
60
61 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
62
63 /************************ Governor internals ***********************/
64
sugov_should_update_freq(struct sugov_policy * sg_policy,u64 time)65 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
66 {
67 s64 delta_ns;
68
69 /*
70 * Since cpufreq_update_util() is called with rq->lock held for
71 * the @target_cpu, our per-CPU data is fully serialized.
72 *
73 * However, drivers cannot in general deal with cross-CPU
74 * requests, so while get_next_freq() will work, our
75 * sugov_update_commit() call may not for the fast switching platforms.
76 *
77 * Hence stop here for remote requests if they aren't supported
78 * by the hardware, as calculating the frequency is pointless if
79 * we cannot in fact act on it.
80 *
81 * This is needed on the slow switching platforms too to prevent CPUs
82 * going offline from leaving stale IRQ work items behind.
83 */
84 if (!cpufreq_this_cpu_can_update(sg_policy->policy))
85 return false;
86
87 if (unlikely(sg_policy->limits_changed)) {
88 sg_policy->limits_changed = false;
89 sg_policy->need_freq_update = true;
90 return true;
91 }
92
93 delta_ns = time - sg_policy->last_freq_update_time;
94
95 return delta_ns >= sg_policy->freq_update_delay_ns;
96 }
97
sugov_update_next_freq(struct sugov_policy * sg_policy,u64 time,unsigned int next_freq)98 static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
99 unsigned int next_freq)
100 {
101 if (sg_policy->need_freq_update)
102 sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
103 else if (sg_policy->next_freq == next_freq)
104 return false;
105
106 sg_policy->next_freq = next_freq;
107 sg_policy->last_freq_update_time = time;
108
109 return true;
110 }
111
sugov_deferred_update(struct sugov_policy * sg_policy)112 static void sugov_deferred_update(struct sugov_policy *sg_policy)
113 {
114 if (!sg_policy->work_in_progress) {
115 sg_policy->work_in_progress = true;
116 irq_work_queue(&sg_policy->irq_work);
117 }
118 }
119
120 /**
121 * get_next_freq - Compute a new frequency for a given cpufreq policy.
122 * @sg_policy: schedutil policy object to compute the new frequency for.
123 * @util: Current CPU utilization.
124 * @max: CPU capacity.
125 *
126 * If the utilization is frequency-invariant, choose the new frequency to be
127 * proportional to it, that is
128 *
129 * next_freq = C * max_freq * util / max
130 *
131 * Otherwise, approximate the would-be frequency-invariant utilization by
132 * util_raw * (curr_freq / max_freq) which leads to
133 *
134 * next_freq = C * curr_freq * util_raw / max
135 *
136 * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
137 *
138 * The lowest driver-supported frequency which is equal or greater than the raw
139 * next_freq (as calculated above) is returned, subject to policy min/max and
140 * cpufreq driver limitations.
141 */
get_next_freq(struct sugov_policy * sg_policy,unsigned long util,unsigned long max)142 static unsigned int get_next_freq(struct sugov_policy *sg_policy,
143 unsigned long util, unsigned long max)
144 {
145 struct cpufreq_policy *policy = sg_policy->policy;
146 unsigned int freq = arch_scale_freq_invariant() ?
147 policy->cpuinfo.max_freq : policy->cur;
148 unsigned long next_freq = 0;
149
150 util = map_util_perf(util);
151 trace_android_vh_map_util_freq(util, freq, max, &next_freq, policy,
152 &sg_policy->need_freq_update);
153 if (next_freq)
154 freq = next_freq;
155 else
156 freq = map_util_freq(util, freq, max);
157
158 if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
159 return sg_policy->next_freq;
160
161 sg_policy->cached_raw_freq = freq;
162 return cpufreq_driver_resolve_freq(policy, freq);
163 }
164
sugov_get_util(struct sugov_cpu * sg_cpu)165 static void sugov_get_util(struct sugov_cpu *sg_cpu)
166 {
167 struct rq *rq = cpu_rq(sg_cpu->cpu);
168
169 sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
170 sg_cpu->bw_dl = cpu_bw_dl(rq);
171 sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu),
172 FREQUENCY_UTIL, NULL);
173 }
174
175 /**
176 * sugov_iowait_reset() - Reset the IO boost status of a CPU.
177 * @sg_cpu: the sugov data for the CPU to boost
178 * @time: the update time from the caller
179 * @set_iowait_boost: true if an IO boost has been requested
180 *
181 * The IO wait boost of a task is disabled after a tick since the last update
182 * of a CPU. If a new IO wait boost is requested after more then a tick, then
183 * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy
184 * efficiency by ignoring sporadic wakeups from IO.
185 */
sugov_iowait_reset(struct sugov_cpu * sg_cpu,u64 time,bool set_iowait_boost)186 static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
187 bool set_iowait_boost)
188 {
189 s64 delta_ns = time - sg_cpu->last_update;
190
191 /* Reset boost only if a tick has elapsed since last request */
192 if (delta_ns <= TICK_NSEC)
193 return false;
194
195 sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0;
196 sg_cpu->iowait_boost_pending = set_iowait_boost;
197
198 return true;
199 }
200
201 /**
202 * sugov_iowait_boost() - Updates the IO boost status of a CPU.
203 * @sg_cpu: the sugov data for the CPU to boost
204 * @time: the update time from the caller
205 * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait
206 *
207 * Each time a task wakes up after an IO operation, the CPU utilization can be
208 * boosted to a certain utilization which doubles at each "frequent and
209 * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization
210 * of the maximum OPP.
211 *
212 * To keep doubling, an IO boost has to be requested at least once per tick,
213 * otherwise we restart from the utilization of the minimum OPP.
214 */
sugov_iowait_boost(struct sugov_cpu * sg_cpu,u64 time,unsigned int flags)215 static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
216 unsigned int flags)
217 {
218 bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT;
219
220 /* Reset boost if the CPU appears to have been idle enough */
221 if (sg_cpu->iowait_boost &&
222 sugov_iowait_reset(sg_cpu, time, set_iowait_boost))
223 return;
224
225 /* Boost only tasks waking up after IO */
226 if (!set_iowait_boost)
227 return;
228
229 /* Ensure boost doubles only one time at each request */
230 if (sg_cpu->iowait_boost_pending)
231 return;
232 sg_cpu->iowait_boost_pending = true;
233
234 /* Double the boost at each request */
235 if (sg_cpu->iowait_boost) {
236 sg_cpu->iowait_boost =
237 min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
238 return;
239 }
240
241 /* First wakeup after IO: start with minimum boost */
242 sg_cpu->iowait_boost = IOWAIT_BOOST_MIN;
243 }
244
245 /**
246 * sugov_iowait_apply() - Apply the IO boost to a CPU.
247 * @sg_cpu: the sugov data for the cpu to boost
248 * @time: the update time from the caller
249 *
250 * A CPU running a task which woken up after an IO operation can have its
251 * utilization boosted to speed up the completion of those IO operations.
252 * The IO boost value is increased each time a task wakes up from IO, in
253 * sugov_iowait_apply(), and it's instead decreased by this function,
254 * each time an increase has not been requested (!iowait_boost_pending).
255 *
256 * A CPU which also appears to have been idle for at least one tick has also
257 * its IO boost utilization reset.
258 *
259 * This mechanism is designed to boost high frequently IO waiting tasks, while
260 * being more conservative on tasks which does sporadic IO operations.
261 */
sugov_iowait_apply(struct sugov_cpu * sg_cpu,u64 time)262 static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
263 {
264 unsigned long boost;
265
266 /* No boost currently required */
267 if (!sg_cpu->iowait_boost)
268 return;
269
270 /* Reset boost if the CPU appears to have been idle enough */
271 if (sugov_iowait_reset(sg_cpu, time, false))
272 return;
273
274 if (!sg_cpu->iowait_boost_pending) {
275 /*
276 * No boost pending; reduce the boost value.
277 */
278 sg_cpu->iowait_boost >>= 1;
279 if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) {
280 sg_cpu->iowait_boost = 0;
281 return;
282 }
283 }
284
285 sg_cpu->iowait_boost_pending = false;
286
287 /*
288 * sg_cpu->util is already in capacity scale; convert iowait_boost
289 * into the same scale so we can compare.
290 */
291 boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
292 boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
293 if (sg_cpu->util < boost)
294 sg_cpu->util = boost;
295 }
296
297 #ifdef CONFIG_NO_HZ_COMMON
sugov_cpu_is_busy(struct sugov_cpu * sg_cpu)298 static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
299 {
300 unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
301 bool ret = idle_calls == sg_cpu->saved_idle_calls;
302
303 sg_cpu->saved_idle_calls = idle_calls;
304 return ret;
305 }
306 #else
sugov_cpu_is_busy(struct sugov_cpu * sg_cpu)307 static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
308 #endif /* CONFIG_NO_HZ_COMMON */
309
310 /*
311 * Make sugov_should_update_freq() ignore the rate limit when DL
312 * has increased the utilization.
313 */
ignore_dl_rate_limit(struct sugov_cpu * sg_cpu)314 static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
315 {
316 if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
317 sg_cpu->sg_policy->limits_changed = true;
318 }
319
sugov_update_single_common(struct sugov_cpu * sg_cpu,u64 time,unsigned int flags)320 static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
321 u64 time, unsigned int flags)
322 {
323 sugov_iowait_boost(sg_cpu, time, flags);
324 sg_cpu->last_update = time;
325
326 ignore_dl_rate_limit(sg_cpu);
327
328 if (!sugov_should_update_freq(sg_cpu->sg_policy, time))
329 return false;
330
331 sugov_get_util(sg_cpu);
332 sugov_iowait_apply(sg_cpu, time);
333
334 return true;
335 }
336
sugov_update_single_freq(struct update_util_data * hook,u64 time,unsigned int flags)337 static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
338 unsigned int flags)
339 {
340 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
341 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
342 unsigned int cached_freq = sg_policy->cached_raw_freq;
343 unsigned int next_f;
344
345 if (!sugov_update_single_common(sg_cpu, time, flags))
346 return;
347
348 next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max);
349 /*
350 * Do not reduce the frequency if the CPU has not been idle
351 * recently, as the reduction is likely to be premature then.
352 *
353 * Except when the rq is capped by uclamp_max.
354 */
355 if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
356 sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq &&
357 !sg_policy->need_freq_update) {
358 next_f = sg_policy->next_freq;
359
360 /* Restore cached freq as next_freq has changed */
361 sg_policy->cached_raw_freq = cached_freq;
362 }
363
364 if (!sugov_update_next_freq(sg_policy, time, next_f))
365 return;
366
367 /*
368 * This code runs under rq->lock for the target CPU, so it won't run
369 * concurrently on two different CPUs for the same target and it is not
370 * necessary to acquire the lock in the fast switch case.
371 */
372 if (sg_policy->policy->fast_switch_enabled) {
373 cpufreq_driver_fast_switch(sg_policy->policy, next_f);
374 } else {
375 raw_spin_lock(&sg_policy->update_lock);
376 sugov_deferred_update(sg_policy);
377 raw_spin_unlock(&sg_policy->update_lock);
378 }
379 }
380
sugov_update_single_perf(struct update_util_data * hook,u64 time,unsigned int flags)381 static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
382 unsigned int flags)
383 {
384 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
385 unsigned long prev_util = sg_cpu->util;
386
387 /*
388 * Fall back to the "frequency" path if frequency invariance is not
389 * supported, because the direct mapping between the utilization and
390 * the performance levels depends on the frequency invariance.
391 */
392 if (!arch_scale_freq_invariant()) {
393 sugov_update_single_freq(hook, time, flags);
394 return;
395 }
396
397 if (!sugov_update_single_common(sg_cpu, time, flags))
398 return;
399
400 /*
401 * Do not reduce the target performance level if the CPU has not been
402 * idle recently, as the reduction is likely to be premature then.
403 *
404 * Except when the rq is capped by uclamp_max.
405 */
406 if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) &&
407 sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
408 sg_cpu->util = prev_util;
409
410 cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
411 map_util_perf(sg_cpu->util), sg_cpu->max);
412
413 sg_cpu->sg_policy->last_freq_update_time = time;
414 }
415
sugov_next_freq_shared(struct sugov_cpu * sg_cpu,u64 time)416 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
417 {
418 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
419 struct cpufreq_policy *policy = sg_policy->policy;
420 unsigned long util = 0, max = 1;
421 unsigned int j;
422
423 for_each_cpu(j, policy->cpus) {
424 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
425 unsigned long j_util, j_max;
426
427 sugov_get_util(j_sg_cpu);
428 sugov_iowait_apply(j_sg_cpu, time);
429 j_util = j_sg_cpu->util;
430 j_max = j_sg_cpu->max;
431
432 if (j_util * max > j_max * util) {
433 util = j_util;
434 max = j_max;
435 }
436 }
437
438 return get_next_freq(sg_policy, util, max);
439 }
440
441 static void
sugov_update_shared(struct update_util_data * hook,u64 time,unsigned int flags)442 sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
443 {
444 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
445 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
446 unsigned int next_f;
447
448 raw_spin_lock(&sg_policy->update_lock);
449
450 sugov_iowait_boost(sg_cpu, time, flags);
451 sg_cpu->last_update = time;
452
453 ignore_dl_rate_limit(sg_cpu);
454
455 if (sugov_should_update_freq(sg_policy, time)) {
456 next_f = sugov_next_freq_shared(sg_cpu, time);
457
458 if (!sugov_update_next_freq(sg_policy, time, next_f))
459 goto unlock;
460
461 if (sg_policy->policy->fast_switch_enabled)
462 cpufreq_driver_fast_switch(sg_policy->policy, next_f);
463 else
464 sugov_deferred_update(sg_policy);
465 }
466 unlock:
467 raw_spin_unlock(&sg_policy->update_lock);
468 }
469
sugov_work(struct kthread_work * work)470 static void sugov_work(struct kthread_work *work)
471 {
472 struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
473 unsigned int freq;
474 unsigned long flags;
475
476 /*
477 * Hold sg_policy->update_lock shortly to handle the case where:
478 * in case sg_policy->next_freq is read here, and then updated by
479 * sugov_deferred_update() just before work_in_progress is set to false
480 * here, we may miss queueing the new update.
481 *
482 * Note: If a work was queued after the update_lock is released,
483 * sugov_work() will just be called again by kthread_work code; and the
484 * request will be proceed before the sugov thread sleeps.
485 */
486 raw_spin_lock_irqsave(&sg_policy->update_lock, flags);
487 freq = sg_policy->next_freq;
488 sg_policy->work_in_progress = false;
489 raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags);
490
491 mutex_lock(&sg_policy->work_lock);
492 __cpufreq_driver_target(sg_policy->policy, freq, CPUFREQ_RELATION_L);
493 mutex_unlock(&sg_policy->work_lock);
494 }
495
sugov_irq_work(struct irq_work * irq_work)496 static void sugov_irq_work(struct irq_work *irq_work)
497 {
498 struct sugov_policy *sg_policy;
499
500 sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
501
502 kthread_queue_work(&sg_policy->worker, &sg_policy->work);
503 }
504
505 /************************** sysfs interface ************************/
506
507 static struct sugov_tunables *global_tunables;
508 static DEFINE_MUTEX(global_tunables_lock);
509
to_sugov_tunables(struct gov_attr_set * attr_set)510 static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
511 {
512 return container_of(attr_set, struct sugov_tunables, attr_set);
513 }
514
rate_limit_us_show(struct gov_attr_set * attr_set,char * buf)515 static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
516 {
517 struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
518
519 return sprintf(buf, "%u\n", tunables->rate_limit_us);
520 }
521
522 static ssize_t
rate_limit_us_store(struct gov_attr_set * attr_set,const char * buf,size_t count)523 rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
524 {
525 struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
526 struct sugov_policy *sg_policy;
527 unsigned int rate_limit_us;
528
529 if (kstrtouint(buf, 10, &rate_limit_us))
530 return -EINVAL;
531
532 tunables->rate_limit_us = rate_limit_us;
533
534 list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
535 sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
536
537 return count;
538 }
539
540 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
541
542 static struct attribute *sugov_attrs[] = {
543 &rate_limit_us.attr,
544 NULL
545 };
546 ATTRIBUTE_GROUPS(sugov);
547
sugov_tunables_free(struct kobject * kobj)548 static void sugov_tunables_free(struct kobject *kobj)
549 {
550 struct gov_attr_set *attr_set = to_gov_attr_set(kobj);
551
552 kfree(to_sugov_tunables(attr_set));
553 }
554
555 static struct kobj_type sugov_tunables_ktype = {
556 .default_groups = sugov_groups,
557 .sysfs_ops = &governor_sysfs_ops,
558 .release = &sugov_tunables_free,
559 };
560
561 /********************** cpufreq governor interface *********************/
562
563 struct cpufreq_governor schedutil_gov;
564
sugov_policy_alloc(struct cpufreq_policy * policy)565 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
566 {
567 struct sugov_policy *sg_policy;
568
569 sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
570 if (!sg_policy)
571 return NULL;
572
573 sg_policy->policy = policy;
574 raw_spin_lock_init(&sg_policy->update_lock);
575 return sg_policy;
576 }
577
sugov_policy_free(struct sugov_policy * sg_policy)578 static void sugov_policy_free(struct sugov_policy *sg_policy)
579 {
580 kfree(sg_policy);
581 }
582
sugov_kthread_create(struct sugov_policy * sg_policy)583 static int sugov_kthread_create(struct sugov_policy *sg_policy)
584 {
585 struct task_struct *thread;
586 struct sched_attr attr = {
587 .size = sizeof(struct sched_attr),
588 .sched_policy = SCHED_DEADLINE,
589 .sched_flags = SCHED_FLAG_SUGOV,
590 .sched_nice = 0,
591 .sched_priority = 0,
592 /*
593 * Fake (unused) bandwidth; workaround to "fix"
594 * priority inheritance.
595 */
596 .sched_runtime = 1000000,
597 .sched_deadline = 10000000,
598 .sched_period = 10000000,
599 };
600 struct cpufreq_policy *policy = sg_policy->policy;
601 int ret;
602
603 /* kthread only required for slow path */
604 if (policy->fast_switch_enabled)
605 return 0;
606
607 trace_android_vh_set_sugov_sched_attr(&attr);
608 kthread_init_work(&sg_policy->work, sugov_work);
609 kthread_init_worker(&sg_policy->worker);
610 thread = kthread_create(kthread_worker_fn, &sg_policy->worker,
611 "sugov:%d",
612 cpumask_first(policy->related_cpus));
613 if (IS_ERR(thread)) {
614 pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread));
615 return PTR_ERR(thread);
616 }
617
618 ret = sched_setattr_nocheck(thread, &attr);
619 if (ret) {
620 kthread_stop(thread);
621 pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
622 return ret;
623 }
624
625 sg_policy->thread = thread;
626 kthread_bind_mask(thread, policy->related_cpus);
627 init_irq_work(&sg_policy->irq_work, sugov_irq_work);
628 mutex_init(&sg_policy->work_lock);
629
630 wake_up_process(thread);
631
632 return 0;
633 }
634
sugov_kthread_stop(struct sugov_policy * sg_policy)635 static void sugov_kthread_stop(struct sugov_policy *sg_policy)
636 {
637 /* kthread only required for slow path */
638 if (sg_policy->policy->fast_switch_enabled)
639 return;
640
641 kthread_flush_worker(&sg_policy->worker);
642 kthread_stop(sg_policy->thread);
643 mutex_destroy(&sg_policy->work_lock);
644 }
645
sugov_tunables_alloc(struct sugov_policy * sg_policy)646 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
647 {
648 struct sugov_tunables *tunables;
649
650 tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
651 if (tunables) {
652 gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
653 if (!have_governor_per_policy())
654 global_tunables = tunables;
655 }
656 return tunables;
657 }
658
sugov_clear_global_tunables(void)659 static void sugov_clear_global_tunables(void)
660 {
661 if (!have_governor_per_policy())
662 global_tunables = NULL;
663 }
664
sugov_init(struct cpufreq_policy * policy)665 static int sugov_init(struct cpufreq_policy *policy)
666 {
667 struct sugov_policy *sg_policy;
668 struct sugov_tunables *tunables;
669 int ret = 0;
670
671 /* State should be equivalent to EXIT */
672 if (policy->governor_data)
673 return -EBUSY;
674
675 cpufreq_enable_fast_switch(policy);
676
677 sg_policy = sugov_policy_alloc(policy);
678 if (!sg_policy) {
679 ret = -ENOMEM;
680 goto disable_fast_switch;
681 }
682
683 ret = sugov_kthread_create(sg_policy);
684 if (ret)
685 goto free_sg_policy;
686
687 mutex_lock(&global_tunables_lock);
688
689 if (global_tunables) {
690 if (WARN_ON(have_governor_per_policy())) {
691 ret = -EINVAL;
692 goto stop_kthread;
693 }
694 policy->governor_data = sg_policy;
695 sg_policy->tunables = global_tunables;
696
697 gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
698 goto out;
699 }
700
701 tunables = sugov_tunables_alloc(sg_policy);
702 if (!tunables) {
703 ret = -ENOMEM;
704 goto stop_kthread;
705 }
706
707 tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
708
709 policy->governor_data = sg_policy;
710 sg_policy->tunables = tunables;
711
712 ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
713 get_governor_parent_kobj(policy), "%s",
714 schedutil_gov.name);
715 if (ret)
716 goto fail;
717
718 out:
719 mutex_unlock(&global_tunables_lock);
720 return 0;
721
722 fail:
723 kobject_put(&tunables->attr_set.kobj);
724 policy->governor_data = NULL;
725 sugov_clear_global_tunables();
726
727 stop_kthread:
728 sugov_kthread_stop(sg_policy);
729 mutex_unlock(&global_tunables_lock);
730
731 free_sg_policy:
732 sugov_policy_free(sg_policy);
733
734 disable_fast_switch:
735 cpufreq_disable_fast_switch(policy);
736
737 pr_err("initialization failed (error %d)\n", ret);
738 return ret;
739 }
740
sugov_exit(struct cpufreq_policy * policy)741 static void sugov_exit(struct cpufreq_policy *policy)
742 {
743 struct sugov_policy *sg_policy = policy->governor_data;
744 struct sugov_tunables *tunables = sg_policy->tunables;
745 unsigned int count;
746
747 mutex_lock(&global_tunables_lock);
748
749 count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
750 policy->governor_data = NULL;
751 if (!count)
752 sugov_clear_global_tunables();
753
754 mutex_unlock(&global_tunables_lock);
755
756 sugov_kthread_stop(sg_policy);
757 sugov_policy_free(sg_policy);
758 cpufreq_disable_fast_switch(policy);
759 }
760
sugov_start(struct cpufreq_policy * policy)761 static int sugov_start(struct cpufreq_policy *policy)
762 {
763 struct sugov_policy *sg_policy = policy->governor_data;
764 void (*uu)(struct update_util_data *data, u64 time, unsigned int flags);
765 unsigned int cpu;
766
767 sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
768 sg_policy->last_freq_update_time = 0;
769 sg_policy->next_freq = 0;
770 sg_policy->work_in_progress = false;
771 sg_policy->limits_changed = false;
772 sg_policy->cached_raw_freq = 0;
773
774 sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
775
776 for_each_cpu(cpu, policy->cpus) {
777 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
778
779 memset(sg_cpu, 0, sizeof(*sg_cpu));
780 sg_cpu->cpu = cpu;
781 sg_cpu->sg_policy = sg_policy;
782 }
783
784 if (policy_is_shared(policy))
785 uu = sugov_update_shared;
786 else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf())
787 uu = sugov_update_single_perf;
788 else
789 uu = sugov_update_single_freq;
790
791 for_each_cpu(cpu, policy->cpus) {
792 struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
793
794 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, uu);
795 }
796 return 0;
797 }
798
sugov_stop(struct cpufreq_policy * policy)799 static void sugov_stop(struct cpufreq_policy *policy)
800 {
801 struct sugov_policy *sg_policy = policy->governor_data;
802 unsigned int cpu;
803
804 for_each_cpu(cpu, policy->cpus)
805 cpufreq_remove_update_util_hook(cpu);
806
807 synchronize_rcu();
808
809 if (!policy->fast_switch_enabled) {
810 irq_work_sync(&sg_policy->irq_work);
811 kthread_cancel_work_sync(&sg_policy->work);
812 }
813 }
814
sugov_limits(struct cpufreq_policy * policy)815 static void sugov_limits(struct cpufreq_policy *policy)
816 {
817 struct sugov_policy *sg_policy = policy->governor_data;
818
819 if (!policy->fast_switch_enabled) {
820 mutex_lock(&sg_policy->work_lock);
821 cpufreq_policy_apply_limits(policy);
822 mutex_unlock(&sg_policy->work_lock);
823 }
824
825 sg_policy->limits_changed = true;
826 }
827
828 struct cpufreq_governor schedutil_gov = {
829 .name = "schedutil",
830 .owner = THIS_MODULE,
831 .flags = CPUFREQ_GOV_DYNAMIC_SWITCHING,
832 .init = sugov_init,
833 .exit = sugov_exit,
834 .start = sugov_start,
835 .stop = sugov_stop,
836 .limits = sugov_limits,
837 };
838
839 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
cpufreq_default_governor(void)840 struct cpufreq_governor *cpufreq_default_governor(void)
841 {
842 return &schedutil_gov;
843 }
844 #endif
845
846 cpufreq_governor_init(schedutil_gov);
847