• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * walt.c
4  *
5  * Window Assistant Load Tracking
6  *
7  * This software is licensed under the terms of the GNU General Public
8  * License version 2, as published by the Free Software Foundation, and
9  * may be copied, distributed, and modified under those terms.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  */
17 
18 #include <linux/syscore_ops.h>
19 #include <linux/cpufreq.h>
20 #include <linux/list_sort.h>
21 #include <linux/jiffies.h>
22 #include <linux/sched/stat.h>
23 #include <trace/events/sched.h>
24 #include "sched.h"
25 #include "walt.h"
26 #include "core_ctl.h"
27 #include "rtg/rtg.h"
28 #define CREATE_TRACE_POINTS
29 #include <trace/events/walt.h>
30 #undef CREATE_TRACE_POINTS
31 
32 const char *task_event_names[] = {"PUT_PREV_TASK", "PICK_NEXT_TASK",
33 				  "TASK_WAKE", "TASK_MIGRATE", "TASK_UPDATE",
34 				  "IRQ_UPDATE"};
35 const char *migrate_type_names[] = {"GROUP_TO_RQ", "RQ_TO_GROUP",
36 					"RQ_TO_RQ", "GROUP_TO_GROUP"};
37 
38 #define SCHED_FREQ_ACCOUNT_WAIT_TIME 0
39 #define SCHED_ACCOUNT_WAIT_TIME 1
40 
41 static ktime_t ktime_last;
42 static bool sched_ktime_suspended;
43 DEFINE_MUTEX(cluster_lock);
44 static atomic64_t walt_irq_work_lastq_ws;
45 u64 walt_load_reported_window;
46 
47 static struct irq_work walt_cpufreq_irq_work;
48 static struct irq_work walt_migration_irq_work;
49 
sched_ktime_clock(void)50 u64 sched_ktime_clock(void)
51 {
52 	if (unlikely(sched_ktime_suspended))
53 		return ktime_to_ns(ktime_last);
54 	return ktime_get_ns();
55 }
56 
sched_resume(void)57 static void sched_resume(void)
58 {
59 	sched_ktime_suspended = false;
60 }
61 
sched_suspend(void)62 static int sched_suspend(void)
63 {
64 	ktime_last = ktime_get();
65 	sched_ktime_suspended = true;
66 	return 0;
67 }
68 
69 static struct syscore_ops sched_syscore_ops = {
70 	.resume = sched_resume,
71 	.suspend = sched_suspend
72 };
73 
sched_init_ops(void)74 static int __init sched_init_ops(void)
75 {
76 	register_syscore_ops(&sched_syscore_ops);
77 	return 0;
78 }
79 late_initcall(sched_init_ops);
80 
acquire_rq_locks_irqsave(const cpumask_t * cpus,unsigned long * flags)81 static void acquire_rq_locks_irqsave(const cpumask_t *cpus,
82 				     unsigned long *flags)
83 {
84 	int cpu;
85 	int level = 0;
86 
87 	local_irq_save(*flags);
88 	for_each_cpu(cpu, cpus) {
89 		if (level == 0)
90 			raw_spin_lock(&cpu_rq(cpu)->lock);
91 		else
92 			raw_spin_lock_nested(&cpu_rq(cpu)->lock, level);
93 		level++;
94 	}
95 }
96 
release_rq_locks_irqrestore(const cpumask_t * cpus,unsigned long * flags)97 static void release_rq_locks_irqrestore(const cpumask_t *cpus,
98 					unsigned long *flags)
99 {
100 	int cpu;
101 
102 	for_each_cpu(cpu, cpus)
103 		raw_spin_unlock(&cpu_rq(cpu)->lock);
104 	local_irq_restore(*flags);
105 }
106 
107 #ifdef CONFIG_HZ_300
108 /*
109  * Tick interval becomes to 3333333 due to
110  * rounding error when HZ=300.
111  */
112 #define MIN_SCHED_RAVG_WINDOW (3333333 * 6)
113 #else
114 /* Min window size (in ns) = 20ms */
115 #define MIN_SCHED_RAVG_WINDOW 20000000
116 #endif
117 
118 /* Max window size (in ns) = 1s */
119 #define MAX_SCHED_RAVG_WINDOW 1000000000
120 
121 /* 1 -> use PELT based load stats, 0 -> use window-based load stats */
122 unsigned int __read_mostly walt_disabled;
123 
124 __read_mostly unsigned int sysctl_sched_cpu_high_irqload = (10 * NSEC_PER_MSEC);
125 
126 /*
127  * sched_window_stats_policy and sched_ravg_hist_size have a 'sysctl' copy
128  * associated with them. This is required for atomic update of those variables
129  * when being modifed via sysctl interface.
130  *
131  * IMPORTANT: Initialize both copies to same value!!
132  */
133 
134 __read_mostly unsigned int sched_ravg_hist_size = 5;
135 __read_mostly unsigned int sysctl_sched_ravg_hist_size = 5;
136 
137 __read_mostly unsigned int sched_window_stats_policy = WINDOW_STATS_MAX_RECENT_AVG;
138 __read_mostly unsigned int sysctl_sched_window_stats_policy = WINDOW_STATS_MAX_RECENT_AVG;
139 
140 static __read_mostly unsigned int sched_io_is_busy = 1;
141 
142 unsigned int sysctl_sched_use_walt_cpu_util = 1;
143 unsigned int sysctl_sched_use_walt_task_util = 1;
144 unsigned int sysctl_sched_walt_init_task_load_pct = 15;
145 __read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload = (10 * NSEC_PER_MSEC);
146 
147 /* Window size (in ns) */
148 __read_mostly unsigned int sched_ravg_window = MIN_SCHED_RAVG_WINDOW;
149 
150 /*
151  * A after-boot constant divisor for cpu_util_freq_walt() to apply the load
152  * boost.
153  */
154 __read_mostly unsigned int walt_cpu_util_freq_divisor;
155 
156 /* Initial task load. Newly created tasks are assigned this load. */
157 unsigned int __read_mostly sched_init_task_load_windows;
158 unsigned int __read_mostly sched_init_task_load_windows_scaled;
159 unsigned int __read_mostly sysctl_sched_init_task_load_pct = 15;
160 
161 /*
162  * Maximum possible frequency across all cpus. Task demand and cpu
163  * capacity (cpu_power) metrics are scaled in reference to it.
164  */
165 unsigned int max_possible_freq = 1;
166 
167 /*
168  * Minimum possible max_freq across all cpus. This will be same as
169  * max_possible_freq on homogeneous systems and could be different from
170  * max_possible_freq on heterogenous systems. min_max_freq is used to derive
171  */
172 unsigned int min_max_freq = 1;
173 
174 unsigned int max_capacity = 1024; /* max(rq->capacity) */
175 unsigned int min_capacity = 1024; /* min(rq->capacity) */
176 unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */
177 unsigned int
178 min_max_possible_capacity = 1024; /* min(rq->max_possible_capacity) */
179 
180 /* Temporarily disable window-stats activity on all cpus */
181 unsigned int __read_mostly sched_disable_window_stats;
182 
183 /*
184  * This governs what load needs to be used when reporting CPU busy time
185  * to the cpufreq governor.
186  */
187 __read_mostly unsigned int sysctl_sched_freq_reporting_policy;
188 
set_sched_ravg_window(char * str)189 static int __init set_sched_ravg_window(char *str)
190 {
191 	unsigned int window_size;
192 
193 	get_option(&str, &window_size);
194 
195 	if (window_size < MIN_SCHED_RAVG_WINDOW ||
196 			window_size > MAX_SCHED_RAVG_WINDOW) {
197 		WARN_ON(1);
198 		return -EINVAL;
199 	}
200 
201 	sched_ravg_window = window_size;
202 	return 0;
203 }
204 early_param("sched_ravg_window", set_sched_ravg_window);
205 
206 __read_mostly unsigned int walt_scale_demand_divisor;
207 #define scale_demand(d) ((d)/walt_scale_demand_divisor)
208 
inc_rq_walt_stats(struct rq * rq,struct task_struct * p)209 void inc_rq_walt_stats(struct rq *rq, struct task_struct *p)
210 {
211 	walt_inc_cumulative_runnable_avg(rq, p);
212 }
213 
dec_rq_walt_stats(struct rq * rq,struct task_struct * p)214 void dec_rq_walt_stats(struct rq *rq, struct task_struct *p)
215 {
216 	walt_dec_cumulative_runnable_avg(rq, p);
217 }
218 
fixup_walt_sched_stats_common(struct rq * rq,struct task_struct * p,u16 updated_demand_scaled)219 void fixup_walt_sched_stats_common(struct rq *rq, struct task_struct *p,
220 				   u16 updated_demand_scaled)
221 {
222 	s64 task_load_delta = (s64)updated_demand_scaled -
223 			      p->ravg.demand_scaled;
224 
225 	fixup_cumulative_runnable_avg(&rq->walt_stats, task_load_delta);
226 
227 	walt_fixup_cum_window_demand(rq, task_load_delta);
228 }
229 
230 static u64
update_window_start(struct rq * rq,u64 wallclock,int event)231 update_window_start(struct rq *rq, u64 wallclock, int event)
232 {
233 	s64 delta;
234 	int nr_windows;
235 	u64 old_window_start = rq->window_start;
236 
237 	delta = wallclock - rq->window_start;
238 	BUG_ON(delta < 0);
239 	if (delta < sched_ravg_window)
240 		return old_window_start;
241 
242 	nr_windows = div64_u64(delta, sched_ravg_window);
243 	rq->window_start += (u64)nr_windows * (u64)sched_ravg_window;
244 
245 	rq->cum_window_demand_scaled =
246 			rq->walt_stats.cumulative_runnable_avg_scaled;
247 
248 	return old_window_start;
249 }
250 
sched_account_irqtime(int cpu,struct task_struct * curr,u64 delta,u64 wallclock)251 void sched_account_irqtime(int cpu, struct task_struct *curr,
252 				 u64 delta, u64 wallclock)
253 {
254 	struct rq *rq = cpu_rq(cpu);
255 	unsigned long flags, nr_windows;
256 	u64 cur_jiffies_ts;
257 
258 	raw_spin_lock_irqsave(&rq->lock, flags);
259 
260 	/*
261 	 * cputime (wallclock) uses sched_clock so use the same here for
262 	 * consistency.
263 	 */
264 	delta += sched_clock() - wallclock;
265 	cur_jiffies_ts = get_jiffies_64();
266 
267 	if (is_idle_task(curr))
268 		update_task_ravg(curr, rq, IRQ_UPDATE, sched_ktime_clock(),
269 				 delta);
270 
271 	nr_windows = cur_jiffies_ts - rq->irqload_ts;
272 
273 	if (nr_windows) {
274 		if (nr_windows < 10) {
275 			/* Decay CPU's irqload by 3/4 for each window. */
276 			rq->avg_irqload *= (3 * nr_windows);
277 			rq->avg_irqload = div64_u64(rq->avg_irqload,
278 						    4 * nr_windows);
279 		} else {
280 			rq->avg_irqload = 0;
281 		}
282 		rq->avg_irqload += rq->cur_irqload;
283 		rq->cur_irqload = 0;
284 	}
285 
286 	rq->cur_irqload += delta;
287 	rq->irqload_ts = cur_jiffies_ts;
288 	raw_spin_unlock_irqrestore(&rq->lock, flags);
289 }
290 
291 static int
account_busy_for_task_demand(struct rq * rq,struct task_struct * p,int event)292 account_busy_for_task_demand(struct rq *rq, struct task_struct *p, int event)
293 {
294 	/*
295 	 * No need to bother updating task demand for exiting tasks
296 	 * or the idle task.
297 	 */
298 	if (exiting_task(p) || is_idle_task(p))
299 		return 0;
300 
301 	/*
302 	 * When a task is waking up it is completing a segment of non-busy
303 	 * time. Likewise, if wait time is not treated as busy time, then
304 	 * when a task begins to run or is migrated, it is not running and
305 	 * is completing a segment of non-busy time.
306 	 */
307 	if (event == TASK_WAKE || (!SCHED_ACCOUNT_WAIT_TIME &&
308 			(event == PICK_NEXT_TASK || event == TASK_MIGRATE)))
309 		return 0;
310 
311 	/*
312 	 * The idle exit time is not accounted for the first task _picked_ up to
313 	 * run on the idle CPU.
314 	 */
315 	if (event == PICK_NEXT_TASK && rq->curr == rq->idle)
316 		return 0;
317 
318 	/*
319 	 * TASK_UPDATE can be called on sleeping task, when its moved between
320 	 * related groups
321 	 */
322 	if (event == TASK_UPDATE) {
323 		if (rq->curr == p)
324 			return 1;
325 
326 		return p->on_rq ? SCHED_ACCOUNT_WAIT_TIME : 0;
327 	}
328 
329 	return 1;
330 }
331 
332 /*
333  * In this function we match the accumulated subtractions with the current
334  * and previous windows we are operating with. Ignore any entries where
335  * the window start in the load_subtraction struct does not match either
336  * the curent or the previous window. This could happen whenever CPUs
337  * become idle or busy with interrupts disabled for an extended period.
338  */
account_load_subtractions(struct rq * rq)339 static inline void account_load_subtractions(struct rq *rq)
340 {
341 	u64 ws = rq->window_start;
342 	u64 prev_ws = ws - sched_ravg_window;
343 	struct load_subtractions *ls = rq->load_subs;
344 	int i;
345 
346 	for (i = 0; i < NUM_TRACKED_WINDOWS; i++) {
347 		if (ls[i].window_start == ws) {
348 			rq->curr_runnable_sum -= ls[i].subs;
349 			rq->nt_curr_runnable_sum -= ls[i].new_subs;
350 		} else if (ls[i].window_start == prev_ws) {
351 			rq->prev_runnable_sum -= ls[i].subs;
352 			rq->nt_prev_runnable_sum -= ls[i].new_subs;
353 		}
354 
355 		ls[i].subs = 0;
356 		ls[i].new_subs = 0;
357 	}
358 
359 	BUG_ON((s64)rq->prev_runnable_sum < 0);
360 	BUG_ON((s64)rq->curr_runnable_sum < 0);
361 	BUG_ON((s64)rq->nt_prev_runnable_sum < 0);
362 	BUG_ON((s64)rq->nt_curr_runnable_sum < 0);
363 }
364 
create_subtraction_entry(struct rq * rq,u64 ws,int index)365 static inline void create_subtraction_entry(struct rq *rq, u64 ws, int index)
366 {
367 	rq->load_subs[index].window_start = ws;
368 	rq->load_subs[index].subs = 0;
369 	rq->load_subs[index].new_subs = 0;
370 }
371 
get_subtraction_index(struct rq * rq,u64 ws)372 static bool get_subtraction_index(struct rq *rq, u64 ws)
373 {
374 	int i;
375 	u64 oldest = ULLONG_MAX;
376 	int oldest_index = 0;
377 
378 	for (i = 0; i < NUM_TRACKED_WINDOWS; i++) {
379 		u64 entry_ws = rq->load_subs[i].window_start;
380 
381 		if (ws == entry_ws)
382 			return i;
383 
384 		if (entry_ws < oldest) {
385 			oldest = entry_ws;
386 			oldest_index = i;
387 		}
388 	}
389 
390 	create_subtraction_entry(rq, ws, oldest_index);
391 	return oldest_index;
392 }
393 
update_rq_load_subtractions(int index,struct rq * rq,u32 sub_load,bool new_task)394 static void update_rq_load_subtractions(int index, struct rq *rq,
395 					u32 sub_load, bool new_task)
396 {
397 	rq->load_subs[index].subs +=  sub_load;
398 	if (new_task)
399 		rq->load_subs[index].new_subs += sub_load;
400 }
401 
update_cluster_load_subtractions(struct task_struct * p,int cpu,u64 ws,bool new_task)402 void update_cluster_load_subtractions(struct task_struct *p,
403 				      int cpu, u64 ws, bool new_task)
404 {
405 	struct sched_cluster *cluster = cpu_cluster(cpu);
406 	struct cpumask cluster_cpus = cluster->cpus;
407 	u64 prev_ws = ws - sched_ravg_window;
408 	int i;
409 
410 	cpumask_clear_cpu(cpu, &cluster_cpus);
411 	raw_spin_lock(&cluster->load_lock);
412 
413 	for_each_cpu(i, &cluster_cpus) {
414 		struct rq *rq = cpu_rq(i);
415 		int index;
416 
417 		if (p->ravg.curr_window_cpu[i]) {
418 			index = get_subtraction_index(rq, ws);
419 			update_rq_load_subtractions(index, rq,
420 				p->ravg.curr_window_cpu[i], new_task);
421 			p->ravg.curr_window_cpu[i] = 0;
422 		}
423 
424 		if (p->ravg.prev_window_cpu[i]) {
425 			index = get_subtraction_index(rq, prev_ws);
426 			update_rq_load_subtractions(index, rq,
427 				p->ravg.prev_window_cpu[i], new_task);
428 			p->ravg.prev_window_cpu[i] = 0;
429 		}
430 	}
431 
432 	raw_spin_unlock(&cluster->load_lock);
433 }
434 
inter_cluster_migration_fixup(struct task_struct * p,int new_cpu,int task_cpu,bool new_task)435 static inline void inter_cluster_migration_fixup
436 	(struct task_struct *p, int new_cpu, int task_cpu, bool new_task)
437 {
438 	struct rq *dest_rq = cpu_rq(new_cpu);
439 	struct rq *src_rq = cpu_rq(task_cpu);
440 
441 	if (same_freq_domain(new_cpu, task_cpu))
442 		return;
443 
444 	p->ravg.curr_window_cpu[new_cpu] = p->ravg.curr_window;
445 	p->ravg.prev_window_cpu[new_cpu] = p->ravg.prev_window;
446 
447 	dest_rq->curr_runnable_sum += p->ravg.curr_window;
448 	dest_rq->prev_runnable_sum += p->ravg.prev_window;
449 
450 	src_rq->curr_runnable_sum -=  p->ravg.curr_window_cpu[task_cpu];
451 	src_rq->prev_runnable_sum -=  p->ravg.prev_window_cpu[task_cpu];
452 
453 	if (new_task) {
454 		dest_rq->nt_curr_runnable_sum += p->ravg.curr_window;
455 		dest_rq->nt_prev_runnable_sum += p->ravg.prev_window;
456 
457 		src_rq->nt_curr_runnable_sum -=
458 				p->ravg.curr_window_cpu[task_cpu];
459 		src_rq->nt_prev_runnable_sum -=
460 				p->ravg.prev_window_cpu[task_cpu];
461 	}
462 
463 	p->ravg.curr_window_cpu[task_cpu] = 0;
464 	p->ravg.prev_window_cpu[task_cpu] = 0;
465 
466 	update_cluster_load_subtractions(p, task_cpu,
467 			src_rq->window_start, new_task);
468 
469 	BUG_ON((s64)src_rq->prev_runnable_sum < 0);
470 	BUG_ON((s64)src_rq->curr_runnable_sum < 0);
471 	BUG_ON((s64)src_rq->nt_prev_runnable_sum < 0);
472 	BUG_ON((s64)src_rq->nt_curr_runnable_sum < 0);
473 }
474 
fixup_busy_time(struct task_struct * p,int new_cpu)475 void fixup_busy_time(struct task_struct *p, int new_cpu)
476 {
477 	struct rq *src_rq = task_rq(p);
478 	struct rq *dest_rq = cpu_rq(new_cpu);
479 	u64 wallclock;
480 	bool new_task;
481 #ifdef CONFIG_SCHED_RTG
482 	u64 *src_curr_runnable_sum, *dst_curr_runnable_sum;
483 	u64 *src_prev_runnable_sum, *dst_prev_runnable_sum;
484 	u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum;
485 	u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum;
486 	struct related_thread_group *grp;
487 #endif
488 
489 	if (!p->on_rq && p->state != TASK_WAKING)
490 		return;
491 
492 	if (exiting_task(p))
493 		return;
494 
495 	if (p->state == TASK_WAKING)
496 		double_rq_lock(src_rq, dest_rq);
497 
498 	if (sched_disable_window_stats)
499 		goto done;
500 
501 	wallclock = sched_ktime_clock();
502 
503 	update_task_ravg(task_rq(p)->curr, task_rq(p),
504 			 TASK_UPDATE,
505 			 wallclock, 0);
506 	update_task_ravg(dest_rq->curr, dest_rq,
507 			 TASK_UPDATE, wallclock, 0);
508 
509 	update_task_ravg(p, task_rq(p), TASK_MIGRATE,
510 			 wallclock, 0);
511 
512 	/*
513 	 * When a task is migrating during the wakeup, adjust
514 	 * the task's contribution towards cumulative window
515 	 * demand.
516 	 */
517 	if (p->state == TASK_WAKING && p->last_sleep_ts >=
518 				       src_rq->window_start) {
519 		walt_fixup_cum_window_demand(src_rq,
520 					     -(s64)p->ravg.demand_scaled);
521 		walt_fixup_cum_window_demand(dest_rq, p->ravg.demand_scaled);
522 	}
523 
524 	new_task = is_new_task(p);
525 #ifdef CONFIG_SCHED_RTG
526 	/* Protected by rq_lock */
527 	grp = task_related_thread_group(p);
528 
529 	/*
530 	 * For frequency aggregation, we continue to do migration fixups
531 	 * even for intra cluster migrations. This is because, the aggregated
532 	 * load has to reported on a single CPU regardless.
533 	 */
534 	if (grp) {
535 		struct group_cpu_time *cpu_time;
536 
537 		cpu_time = &src_rq->grp_time;
538 		src_curr_runnable_sum = &cpu_time->curr_runnable_sum;
539 		src_prev_runnable_sum = &cpu_time->prev_runnable_sum;
540 		src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
541 		src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
542 
543 		cpu_time = &dest_rq->grp_time;
544 		dst_curr_runnable_sum = &cpu_time->curr_runnable_sum;
545 		dst_prev_runnable_sum = &cpu_time->prev_runnable_sum;
546 		dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
547 		dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
548 
549 		if (p->ravg.curr_window) {
550 			*src_curr_runnable_sum -= p->ravg.curr_window;
551 			*dst_curr_runnable_sum += p->ravg.curr_window;
552 			if (new_task) {
553 				*src_nt_curr_runnable_sum -=
554 							p->ravg.curr_window;
555 				*dst_nt_curr_runnable_sum +=
556 							p->ravg.curr_window;
557 			}
558 		}
559 
560 		if (p->ravg.prev_window) {
561 			*src_prev_runnable_sum -= p->ravg.prev_window;
562 			*dst_prev_runnable_sum += p->ravg.prev_window;
563 			if (new_task) {
564 				*src_nt_prev_runnable_sum -=
565 							p->ravg.prev_window;
566 				*dst_nt_prev_runnable_sum +=
567 							p->ravg.prev_window;
568 			}
569 		}
570 	} else {
571 #endif
572 		inter_cluster_migration_fixup(p, new_cpu,
573 						task_cpu(p), new_task);
574 #ifdef CONFIG_SCHED_RTG
575 	}
576 #endif
577 
578 	if (!same_freq_domain(new_cpu, task_cpu(p)))
579 		irq_work_queue(&walt_migration_irq_work);
580 
581 done:
582 	if (p->state == TASK_WAKING)
583 		double_rq_unlock(src_rq, dest_rq);
584 }
585 
set_window_start(struct rq * rq)586 void set_window_start(struct rq *rq)
587 {
588 	static int sync_cpu_available;
589 
590 	if (likely(rq->window_start))
591 		return;
592 
593 	if (!sync_cpu_available) {
594 		rq->window_start = 1;
595 		sync_cpu_available = 1;
596 		atomic64_set(&walt_irq_work_lastq_ws, rq->window_start);
597 		walt_load_reported_window =
598 					atomic64_read(&walt_irq_work_lastq_ws);
599 
600 	} else {
601 		struct rq *sync_rq = cpu_rq(cpumask_any(cpu_online_mask));
602 
603 		raw_spin_unlock(&rq->lock);
604 		double_rq_lock(rq, sync_rq);
605 		rq->window_start = sync_rq->window_start;
606 		rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
607 		rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
608 		raw_spin_unlock(&sync_rq->lock);
609 	}
610 
611 	rq->curr->ravg.mark_start = rq->window_start;
612 }
613 
614 /*
615  * Called when new window is starting for a task, to record cpu usage over
616  * recently concluded window(s). Normally 'samples' should be 1. It can be > 1
617  * when, say, a real-time task runs without preemption for several windows at a
618  * stretch.
619  */
update_history(struct rq * rq,struct task_struct * p,u32 runtime,int samples,int event)620 static void update_history(struct rq *rq, struct task_struct *p,
621 			   u32 runtime, int samples, int event)
622 {
623 	u32 *hist = &p->ravg.sum_history[0];
624 	int ridx, widx;
625 	u32 max = 0, avg, demand;
626 	u64 sum = 0;
627 	u16 demand_scaled;
628 
629 	/* Ignore windows where task had no activity */
630 	if (!runtime || is_idle_task(p) || exiting_task(p) || !samples)
631 		goto done;
632 
633 	/* Push new 'runtime' value onto stack */
634 	widx = sched_ravg_hist_size - 1;
635 	ridx = widx - samples;
636 	for (; ridx >= 0; --widx, --ridx) {
637 		hist[widx] = hist[ridx];
638 		sum += hist[widx];
639 		if (hist[widx] > max)
640 			max = hist[widx];
641 	}
642 
643 	for (widx = 0; widx < samples && widx < sched_ravg_hist_size; widx++) {
644 		hist[widx] = runtime;
645 		sum += hist[widx];
646 		if (hist[widx] > max)
647 			max = hist[widx];
648 	}
649 
650 	p->ravg.sum = 0;
651 
652 	if (sched_window_stats_policy == WINDOW_STATS_RECENT) {
653 		demand = runtime;
654 	} else if (sched_window_stats_policy == WINDOW_STATS_MAX) {
655 		demand = max;
656 	} else {
657 		avg = div64_u64(sum, sched_ravg_hist_size);
658 		if (sched_window_stats_policy == WINDOW_STATS_AVG)
659 			demand = avg;
660 		else
661 			demand = max(avg, runtime);
662 	}
663 	demand_scaled = scale_demand(demand);
664 
665 	/*
666 	 * A throttled deadline sched class task gets dequeued without
667 	 * changing p->on_rq. Since the dequeue decrements walt stats
668 	 * avoid decrementing it here again.
669 	 *
670 	 * When window is rolled over, the cumulative window demand
671 	 * is reset to the cumulative runnable average (contribution from
672 	 * the tasks on the runqueue). If the current task is dequeued
673 	 * already, it's demand is not included in the cumulative runnable
674 	 * average. So add the task demand separately to cumulative window
675 	 * demand.
676 	 */
677 	if (!task_has_dl_policy(p) || !p->dl.dl_throttled) {
678 		if (task_on_rq_queued(p)
679 				&& p->sched_class->fixup_walt_sched_stats)
680 			p->sched_class->fixup_walt_sched_stats(rq, p,
681 					demand_scaled);
682 		else if (rq->curr == p)
683 			walt_fixup_cum_window_demand(rq, demand_scaled);
684 	}
685 
686 	p->ravg.demand = demand;
687 	p->ravg.demand_scaled = demand_scaled;
688 
689 done:
690 	trace_sched_update_history(rq, p, runtime, samples, event);
691 }
692 
693 #define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y)
694 
add_to_task_demand(struct rq * rq,struct task_struct * p,u64 delta)695 static u64 add_to_task_demand(struct rq *rq, struct task_struct *p, u64 delta)
696 {
697 	delta = scale_exec_time(delta, rq);
698 	p->ravg.sum += delta;
699 	if (unlikely(p->ravg.sum > sched_ravg_window))
700 		p->ravg.sum = sched_ravg_window;
701 
702 	return delta;
703 }
704 
705 /*
706  * Account cpu demand of task and/or update task's cpu demand history
707  *
708  * ms = p->ravg.mark_start;
709  * wc = wallclock
710  * ws = rq->window_start
711  *
712  * Three possibilities:
713  *
714  *	a) Task event is contained within one window.
715  *		window_start < mark_start < wallclock
716  *
717  *		ws   ms  wc
718  *		|    |   |
719  *		V    V   V
720  *		|---------------|
721  *
722  *	In this case, p->ravg.sum is updated *iff* event is appropriate
723  *	(ex: event == PUT_PREV_TASK)
724  *
725  *	b) Task event spans two windows.
726  *		mark_start < window_start < wallclock
727  *
728  *		ms   ws   wc
729  *		|    |    |
730  *		V    V    V
731  *		-----|-------------------
732  *
733  *	In this case, p->ravg.sum is updated with (ws - ms) *iff* event
734  *	is appropriate, then a new window sample is recorded followed
735  *	by p->ravg.sum being set to (wc - ws) *iff* event is appropriate.
736  *
737  *	c) Task event spans more than two windows.
738  *
739  *		ms ws_tmp			   ws  wc
740  *		|  |				   |   |
741  *		V  V				   V   V
742  *		---|-------|-------|-------|-------|------
743  *		   |				   |
744  *		   |<------ nr_full_windows ------>|
745  *
746  *	In this case, p->ravg.sum is updated with (ws_tmp - ms) first *iff*
747  *	event is appropriate, window sample of p->ravg.sum is recorded,
748  *	'nr_full_window' samples of window_size is also recorded *iff*
749  *	event is appropriate and finally p->ravg.sum is set to (wc - ws)
750  *	*iff* event is appropriate.
751  *
752  * IMPORTANT : Leave p->ravg.mark_start unchanged, as update_cpu_busy_time()
753  * depends on it!
754  */
update_task_demand(struct task_struct * p,struct rq * rq,int event,u64 wallclock)755 static u64 update_task_demand(struct task_struct *p, struct rq *rq,
756 			      int event, u64 wallclock)
757 {
758 	u64 mark_start = p->ravg.mark_start;
759 	u64 delta, window_start = rq->window_start;
760 	int new_window, nr_full_windows;
761 	u32 window_size = sched_ravg_window;
762 	u64 runtime;
763 
764 #ifdef CONFIG_SCHED_RTG
765 	update_group_demand(p, rq, event, wallclock);
766 #endif
767 
768 	new_window = mark_start < window_start;
769 	if (!account_busy_for_task_demand(rq, p, event)) {
770 		if (new_window)
771 			/*
772 			 * If the time accounted isn't being accounted as
773 			 * busy time, and a new window started, only the
774 			 * previous window need be closed out with the
775 			 * pre-existing demand. Multiple windows may have
776 			 * elapsed, but since empty windows are dropped,
777 			 * it is not necessary to account those.
778 			 */
779 			update_history(rq, p, p->ravg.sum, 1, event);
780 		return 0;
781 	}
782 
783 	if (!new_window) {
784 		/*
785 		 * The simple case - busy time contained within the existing
786 		 * window.
787 		 */
788 		return add_to_task_demand(rq, p, wallclock - mark_start);
789 	}
790 
791 	/*
792 	 * Busy time spans at least two windows. Temporarily rewind
793 	 * window_start to first window boundary after mark_start.
794 	 */
795 	delta = window_start - mark_start;
796 	nr_full_windows = div64_u64(delta, window_size);
797 	window_start -= (u64)nr_full_windows * (u64)window_size;
798 
799 	/* Process (window_start - mark_start) first */
800 	runtime = add_to_task_demand(rq, p, window_start - mark_start);
801 
802 	/* Push new sample(s) into task's demand history */
803 	update_history(rq, p, p->ravg.sum, 1, event);
804 	if (nr_full_windows) {
805 		u64 scaled_window = scale_exec_time(window_size, rq);
806 
807 		update_history(rq, p, scaled_window, nr_full_windows, event);
808 		runtime += nr_full_windows * scaled_window;
809 	}
810 
811 	/*
812 	 * Roll window_start back to current to process any remainder
813 	 * in current window.
814 	 */
815 	window_start += (u64)nr_full_windows * (u64)window_size;
816 
817 	/* Process (wallclock - window_start) next */
818 	mark_start = window_start;
819 	runtime += add_to_task_demand(rq, p, wallclock - mark_start);
820 
821 	return runtime;
822 }
823 
824 static u32 empty_windows[NR_CPUS];
825 
rollover_task_window(struct task_struct * p,bool full_window)826 static void rollover_task_window(struct task_struct *p, bool full_window)
827 {
828 	u32 *curr_cpu_windows = empty_windows;
829 	u32 curr_window;
830 	int i;
831 
832 	/* Rollover the sum */
833 	curr_window = 0;
834 
835 	if (!full_window) {
836 		curr_window = p->ravg.curr_window;
837 		curr_cpu_windows = p->ravg.curr_window_cpu;
838 	}
839 
840 	p->ravg.prev_window = curr_window;
841 	p->ravg.curr_window = 0;
842 
843 	/* Roll over individual CPU contributions */
844 	for (i = 0; i < nr_cpu_ids; i++) {
845 		p->ravg.prev_window_cpu[i] = curr_cpu_windows[i];
846 		p->ravg.curr_window_cpu[i] = 0;
847 	}
848 }
849 
rollover_cpu_window(struct rq * rq,bool full_window)850 static void rollover_cpu_window(struct rq *rq, bool full_window)
851 {
852 	u64 curr_sum = rq->curr_runnable_sum;
853 	u64 nt_curr_sum = rq->nt_curr_runnable_sum;
854 
855 	if (unlikely(full_window)) {
856 		curr_sum = 0;
857 		nt_curr_sum = 0;
858 	}
859 
860 	rq->prev_runnable_sum = curr_sum;
861 	rq->nt_prev_runnable_sum = nt_curr_sum;
862 
863 	rq->curr_runnable_sum = 0;
864 	rq->nt_curr_runnable_sum = 0;
865 }
866 
cpu_is_waiting_on_io(struct rq * rq)867 static inline int cpu_is_waiting_on_io(struct rq *rq)
868 {
869 	if (!sched_io_is_busy)
870 		return 0;
871 
872 	return atomic_read(&rq->nr_iowait);
873 }
874 
account_busy_for_cpu_time(struct rq * rq,struct task_struct * p,u64 irqtime,int event)875 static int account_busy_for_cpu_time(struct rq *rq, struct task_struct *p,
876 				     u64 irqtime, int event)
877 {
878 	if (is_idle_task(p)) {
879 		/* TASK_WAKE && TASK_MIGRATE is not possible on idle task! */
880 		if (event == PICK_NEXT_TASK)
881 			return 0;
882 
883 		/* PUT_PREV_TASK, TASK_UPDATE && IRQ_UPDATE are left */
884 		return irqtime || cpu_is_waiting_on_io(rq);
885 	}
886 
887 	if (event == TASK_WAKE)
888 		return 0;
889 
890 	if (event == PUT_PREV_TASK || event == IRQ_UPDATE)
891 		return 1;
892 
893 	/*
894 	 * TASK_UPDATE can be called on sleeping task, when its moved between
895 	 * related groups
896 	 */
897 	if (event == TASK_UPDATE) {
898 		if (rq->curr == p)
899 			return 1;
900 
901 		return p->on_rq ? SCHED_FREQ_ACCOUNT_WAIT_TIME : 0;
902 	}
903 
904 	/* TASK_MIGRATE, PICK_NEXT_TASK left */
905 	return SCHED_FREQ_ACCOUNT_WAIT_TIME;
906 }
907 
908 /*
909  * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
910  */
update_cpu_busy_time(struct task_struct * p,struct rq * rq,int event,u64 wallclock,u64 irqtime)911 static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
912 				 int event, u64 wallclock, u64 irqtime)
913 {
914 	int new_window, full_window = 0;
915 	int p_is_curr_task = (p == rq->curr);
916 	u64 mark_start = p->ravg.mark_start;
917 	u64 window_start = rq->window_start;
918 	u32 window_size = sched_ravg_window;
919 	u64 delta;
920 	u64 *curr_runnable_sum = &rq->curr_runnable_sum;
921 	u64 *prev_runnable_sum = &rq->prev_runnable_sum;
922 	u64 *nt_curr_runnable_sum = &rq->nt_curr_runnable_sum;
923 	u64 *nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
924 	bool new_task;
925 	int cpu = rq->cpu;
926 #ifdef CONFIG_SCHED_RTG
927 	struct group_cpu_time *cpu_time;
928 	struct related_thread_group *grp;
929 #endif
930 
931 	new_window = mark_start < window_start;
932 	if (new_window) {
933 		full_window = (window_start - mark_start) >= window_size;
934 		if (p->ravg.active_windows < USHRT_MAX)
935 			p->ravg.active_windows++;
936 	}
937 
938 	new_task = is_new_task(p);
939 
940 	/*
941 	 * Handle per-task window rollover. We don't care about the idle
942 	 * task or exiting tasks.
943 	 */
944 	if (!is_idle_task(p) && !exiting_task(p)) {
945 		if (new_window)
946 			rollover_task_window(p, full_window);
947 	}
948 
949 	if (p_is_curr_task && new_window)
950 		rollover_cpu_window(rq, full_window);
951 
952 	if (!account_busy_for_cpu_time(rq, p, irqtime, event))
953 		goto done;
954 
955 #ifdef CONFIG_SCHED_RTG
956 	grp = task_related_thread_group(p);
957 	if (grp) {
958 		cpu_time = &rq->grp_time;
959 
960 		curr_runnable_sum = &cpu_time->curr_runnable_sum;
961 		prev_runnable_sum = &cpu_time->prev_runnable_sum;
962 
963 		nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
964 		nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
965 	}
966 #endif
967 
968 	if (!new_window) {
969 		/*
970 		 * account_busy_for_cpu_time() = 1 so busy time needs
971 		 * to be accounted to the current window. No rollover
972 		 * since we didn't start a new window. An example of this is
973 		 * when a task starts execution and then sleeps within the
974 		 * same window.
975 		 */
976 
977 		if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq))
978 			delta = wallclock - mark_start;
979 		else
980 			delta = irqtime;
981 		delta = scale_exec_time(delta, rq);
982 		*curr_runnable_sum += delta;
983 		if (new_task)
984 			*nt_curr_runnable_sum += delta;
985 
986 		if (!is_idle_task(p) && !exiting_task(p)) {
987 			p->ravg.curr_window += delta;
988 			p->ravg.curr_window_cpu[cpu] += delta;
989 		}
990 
991 		goto done;
992 	}
993 
994 	if (!p_is_curr_task) {
995 		/*
996 		 * account_busy_for_cpu_time() = 1 so busy time needs
997 		 * to be accounted to the current window. A new window
998 		 * has also started, but p is not the current task, so the
999 		 * window is not rolled over - just split up and account
1000 		 * as necessary into curr and prev. The window is only
1001 		 * rolled over when a new window is processed for the current
1002 		 * task.
1003 		 *
1004 		 * Irqtime can't be accounted by a task that isn't the
1005 		 * currently running task.
1006 		 */
1007 
1008 		if (!full_window) {
1009 			/*
1010 			 * A full window hasn't elapsed, account partial
1011 			 * contribution to previous completed window.
1012 			 */
1013 			delta = scale_exec_time(window_start - mark_start, rq);
1014 			if (!exiting_task(p)) {
1015 				p->ravg.prev_window += delta;
1016 				p->ravg.prev_window_cpu[cpu] += delta;
1017 			}
1018 		} else {
1019 			/*
1020 			 * Since at least one full window has elapsed,
1021 			 * the contribution to the previous window is the
1022 			 * full window (window_size).
1023 			 */
1024 			delta = scale_exec_time(window_size, rq);
1025 			if (!exiting_task(p)) {
1026 				p->ravg.prev_window = delta;
1027 				p->ravg.prev_window_cpu[cpu] = delta;
1028 			}
1029 		}
1030 
1031 		*prev_runnable_sum += delta;
1032 		if (new_task)
1033 			*nt_prev_runnable_sum += delta;
1034 
1035 		/* Account piece of busy time in the current window. */
1036 		delta = scale_exec_time(wallclock - window_start, rq);
1037 		*curr_runnable_sum += delta;
1038 		if (new_task)
1039 			*nt_curr_runnable_sum += delta;
1040 
1041 		if (!exiting_task(p)) {
1042 			p->ravg.curr_window = delta;
1043 			p->ravg.curr_window_cpu[cpu] = delta;
1044 		}
1045 
1046 		goto done;
1047 	}
1048 
1049 	if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq)) {
1050 		/*
1051 		 * account_busy_for_cpu_time() = 1 so busy time needs
1052 		 * to be accounted to the current window. A new window
1053 		 * has started and p is the current task so rollover is
1054 		 * needed. If any of these three above conditions are true
1055 		 * then this busy time can't be accounted as irqtime.
1056 		 *
1057 		 * Busy time for the idle task or exiting tasks need not
1058 		 * be accounted.
1059 		 *
1060 		 * An example of this would be a task that starts execution
1061 		 * and then sleeps once a new window has begun.
1062 		 */
1063 
1064 		if (!full_window) {
1065 			/*
1066 			 * A full window hasn't elapsed, account partial
1067 			 * contribution to previous completed window.
1068 			 */
1069 			delta = scale_exec_time(window_start - mark_start, rq);
1070 			if (!is_idle_task(p) && !exiting_task(p)) {
1071 				p->ravg.prev_window += delta;
1072 				p->ravg.prev_window_cpu[cpu] += delta;
1073 			}
1074 		} else {
1075 			/*
1076 			 * Since at least one full window has elapsed,
1077 			 * the contribution to the previous window is the
1078 			 * full window (window_size).
1079 			 */
1080 			delta = scale_exec_time(window_size, rq);
1081 			if (!is_idle_task(p) && !exiting_task(p)) {
1082 				p->ravg.prev_window = delta;
1083 				p->ravg.prev_window_cpu[cpu] = delta;
1084 			}
1085 		}
1086 
1087 		/*
1088 		 * Rollover is done here by overwriting the values in
1089 		 * prev_runnable_sum and curr_runnable_sum.
1090 		 */
1091 		*prev_runnable_sum += delta;
1092 		if (new_task)
1093 			*nt_prev_runnable_sum += delta;
1094 
1095 		/* Account piece of busy time in the current window. */
1096 		delta = scale_exec_time(wallclock - window_start, rq);
1097 		*curr_runnable_sum += delta;
1098 		if (new_task)
1099 			*nt_curr_runnable_sum += delta;
1100 
1101 		if (!is_idle_task(p) && !exiting_task(p)) {
1102 			p->ravg.curr_window = delta;
1103 			p->ravg.curr_window_cpu[cpu] = delta;
1104 		}
1105 
1106 		goto done;
1107 	}
1108 
1109 	if (irqtime) {
1110 		/*
1111 		 * account_busy_for_cpu_time() = 1 so busy time needs
1112 		 * to be accounted to the current window. A new window
1113 		 * has started and p is the current task so rollover is
1114 		 * needed. The current task must be the idle task because
1115 		 * irqtime is not accounted for any other task.
1116 		 *
1117 		 * Irqtime will be accounted each time we process IRQ activity
1118 		 * after a period of idleness, so we know the IRQ busy time
1119 		 * started at wallclock - irqtime.
1120 		 */
1121 
1122 		BUG_ON(!is_idle_task(p));
1123 		mark_start = wallclock - irqtime;
1124 
1125 		/*
1126 		 * Roll window over. If IRQ busy time was just in the current
1127 		 * window then that is all that need be accounted.
1128 		 */
1129 		if (mark_start > window_start) {
1130 			*curr_runnable_sum = scale_exec_time(irqtime, rq);
1131 			return;
1132 		}
1133 
1134 		/*
1135 		 * The IRQ busy time spanned multiple windows. Process the
1136 		 * window then that is all that need be accounted.
1137 		 */
1138 		delta = window_start - mark_start;
1139 		if (delta > window_size)
1140 			delta = window_size;
1141 		delta = scale_exec_time(delta, rq);
1142 		*prev_runnable_sum += delta;
1143 
1144 		/* Process the remaining IRQ busy time in the current window. */
1145 		delta = wallclock - window_start;
1146 		rq->curr_runnable_sum = scale_exec_time(delta, rq);
1147 
1148 		return;
1149 	}
1150 
1151 done:
1152 	return;
1153 }
1154 
run_walt_irq_work(u64 old_window_start,struct rq * rq)1155 static inline void run_walt_irq_work(u64 old_window_start, struct rq *rq)
1156 {
1157 	u64 result;
1158 
1159 	if (old_window_start == rq->window_start)
1160 		return;
1161 
1162 	result = atomic64_cmpxchg(&walt_irq_work_lastq_ws, old_window_start,
1163 				   rq->window_start);
1164 	if (result == old_window_start)
1165 		irq_work_queue(&walt_cpufreq_irq_work);
1166 }
1167 
1168 /* Reflect task activity on its demand and cpu's busy time statistics */
update_task_ravg(struct task_struct * p,struct rq * rq,int event,u64 wallclock,u64 irqtime)1169 void update_task_ravg(struct task_struct *p, struct rq *rq, int event,
1170 						u64 wallclock, u64 irqtime)
1171 {
1172 	u64 old_window_start;
1173 
1174 	if (!rq->window_start || sched_disable_window_stats ||
1175 	    p->ravg.mark_start == wallclock)
1176 		return;
1177 
1178 	lockdep_assert_held(&rq->lock);
1179 
1180 	old_window_start = update_window_start(rq, wallclock, event);
1181 
1182 #ifdef CONFIG_SCHED_RTG
1183 	update_group_nr_running(p, event, wallclock);
1184 #endif
1185 	if (!p->ravg.mark_start)
1186 		goto done;
1187 
1188 	update_task_demand(p, rq, event, wallclock);
1189 	update_cpu_busy_time(p, rq, event, wallclock, irqtime);
1190 
1191 	if (exiting_task(p))
1192 		goto done;
1193 
1194 	trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime);
1195 done:
1196 	p->ravg.mark_start = wallclock;
1197 
1198 	run_walt_irq_work(old_window_start, rq);
1199 }
1200 
sysctl_sched_walt_init_task_load_pct_sysctl_handler(struct ctl_table * table,int write,void __user * buffer,size_t * length,loff_t * ppos)1201 int sysctl_sched_walt_init_task_load_pct_sysctl_handler(struct ctl_table *table,
1202 		int write, void __user *buffer, size_t *length, loff_t *ppos)
1203 {
1204 	int rc;
1205 
1206 	rc = proc_dointvec(table, write, buffer, length, ppos);
1207 	if (rc)
1208 		return rc;
1209 
1210 	sysctl_sched_init_task_load_pct = sysctl_sched_walt_init_task_load_pct;
1211 
1212 	return 0;
1213 }
1214 
sched_get_init_task_load(struct task_struct * p)1215 u32 sched_get_init_task_load(struct task_struct *p)
1216 {
1217 	return p->init_load_pct;
1218 }
1219 
sched_set_init_task_load(struct task_struct * p,int init_load_pct)1220 int sched_set_init_task_load(struct task_struct *p, int init_load_pct)
1221 {
1222 	if (init_load_pct < 0 || init_load_pct > 100)
1223 		return -EINVAL;
1224 
1225 	p->init_load_pct = init_load_pct;
1226 
1227 	return 0;
1228 }
1229 
init_new_task_load(struct task_struct * p)1230 void init_new_task_load(struct task_struct *p)
1231 {
1232 	int i;
1233 	u32 init_load_windows = sched_init_task_load_windows;
1234 	u32 init_load_windows_scaled = sched_init_task_load_windows_scaled;
1235 	u32 init_load_pct = current->init_load_pct;
1236 
1237 #ifdef CONFIG_SCHED_RTG
1238 	init_task_rtg(p);
1239 #endif
1240 
1241 	p->last_sleep_ts = 0;
1242 	p->init_load_pct = 0;
1243 	memset(&p->ravg, 0, sizeof(struct ravg));
1244 
1245 	p->ravg.curr_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32),
1246 					  GFP_KERNEL | __GFP_NOFAIL);
1247 	p->ravg.prev_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32),
1248 					  GFP_KERNEL | __GFP_NOFAIL);
1249 
1250 	if (init_load_pct) {
1251 		init_load_windows = div64_u64((u64)init_load_pct *
1252 			  (u64)sched_ravg_window, 100);
1253 		init_load_windows_scaled = scale_demand(init_load_windows);
1254 	}
1255 
1256 	p->ravg.demand = init_load_windows;
1257 	p->ravg.demand_scaled = init_load_windows_scaled;
1258 	for (i = 0; i < RAVG_HIST_SIZE_MAX; ++i)
1259 		p->ravg.sum_history[i] = init_load_windows;
1260 }
1261 
free_task_load_ptrs(struct task_struct * p)1262 void free_task_load_ptrs(struct task_struct *p)
1263 {
1264 	kfree(p->ravg.curr_window_cpu);
1265 	kfree(p->ravg.prev_window_cpu);
1266 
1267 	/*
1268 	 * update_task_ravg() can be called for exiting tasks. While the
1269 	 * function itself ensures correct behavior, the corresponding
1270 	 * trace event requires that these pointers be NULL.
1271 	 */
1272 	p->ravg.curr_window_cpu = NULL;
1273 	p->ravg.prev_window_cpu = NULL;
1274 }
1275 
reset_task_stats(struct task_struct * p)1276 void reset_task_stats(struct task_struct *p)
1277 {
1278 	u32 sum = 0;
1279 	u32 *curr_window_ptr = NULL;
1280 	u32 *prev_window_ptr = NULL;
1281 
1282 	if (exiting_task(p)) {
1283 		sum = EXITING_TASK_MARKER;
1284 	} else {
1285 		curr_window_ptr =  p->ravg.curr_window_cpu;
1286 		prev_window_ptr = p->ravg.prev_window_cpu;
1287 		memset(curr_window_ptr, 0, sizeof(u32) * nr_cpu_ids);
1288 		memset(prev_window_ptr, 0, sizeof(u32) * nr_cpu_ids);
1289 	}
1290 
1291 	memset(&p->ravg, 0, sizeof(struct ravg));
1292 
1293 	p->ravg.curr_window_cpu = curr_window_ptr;
1294 	p->ravg.prev_window_cpu = prev_window_ptr;
1295 
1296 	/* Retain EXITING_TASK marker */
1297 	p->ravg.sum_history[0] = sum;
1298 }
1299 
mark_task_starting(struct task_struct * p)1300 void mark_task_starting(struct task_struct *p)
1301 {
1302 	u64 wallclock;
1303 	struct rq *rq = task_rq(p);
1304 
1305 	if (!rq->window_start || sched_disable_window_stats) {
1306 		reset_task_stats(p);
1307 		return;
1308 	}
1309 
1310 	wallclock = sched_ktime_clock();
1311 	p->ravg.mark_start = wallclock;
1312 }
1313 
1314 unsigned int max_possible_efficiency = 1;
1315 unsigned int min_possible_efficiency = UINT_MAX;
1316 unsigned int max_power_cost = 1;
1317 
1318 static cpumask_t all_cluster_cpus = CPU_MASK_NONE;
1319 DECLARE_BITMAP(all_cluster_ids, NR_CPUS);
1320 struct sched_cluster *sched_cluster[NR_CPUS];
1321 int num_clusters;
1322 
1323 struct list_head cluster_head;
1324 
1325 static void
insert_cluster(struct sched_cluster * cluster,struct list_head * head)1326 insert_cluster(struct sched_cluster *cluster, struct list_head *head)
1327 {
1328 	struct sched_cluster *tmp;
1329 	struct list_head *iter = head;
1330 
1331 	list_for_each_entry(tmp, head, list) {
1332 		if (cluster->max_power_cost < tmp->max_power_cost)
1333 			break;
1334 		iter = &tmp->list;
1335 	}
1336 
1337 	list_add(&cluster->list, iter);
1338 }
1339 
alloc_new_cluster(const struct cpumask * cpus)1340 static struct sched_cluster *alloc_new_cluster(const struct cpumask *cpus)
1341 {
1342 	struct sched_cluster *cluster = NULL;
1343 
1344 	cluster = kzalloc(sizeof(struct sched_cluster), GFP_ATOMIC);
1345 	if (!cluster) {
1346 		pr_warn("Cluster allocation failed. Possible bad scheduling\n");
1347 		return NULL;
1348 	}
1349 
1350 	INIT_LIST_HEAD(&cluster->list);
1351 	cluster->max_power_cost		=	1;
1352 	cluster->min_power_cost		=	1;
1353 	cluster->capacity		=	1024;
1354 	cluster->max_possible_capacity	=	1024;
1355 	cluster->efficiency		=	1;
1356 	cluster->load_scale_factor	=	1024;
1357 	cluster->cur_freq		=	1;
1358 	cluster->max_freq		=	1;
1359 	cluster->min_freq		=	1;
1360 	cluster->max_possible_freq	=	1;
1361 	cluster->freq_init_done		=	false;
1362 
1363 	raw_spin_lock_init(&cluster->load_lock);
1364 	cluster->cpus = *cpus;
1365 	cluster->efficiency = topology_get_cpu_scale(cpumask_first(cpus));
1366 
1367 	if (cluster->efficiency > max_possible_efficiency)
1368 		max_possible_efficiency = cluster->efficiency;
1369 	if (cluster->efficiency < min_possible_efficiency)
1370 		min_possible_efficiency = cluster->efficiency;
1371 
1372 	return cluster;
1373 }
1374 
add_cluster(const struct cpumask * cpus,struct list_head * head)1375 static void add_cluster(const struct cpumask *cpus, struct list_head *head)
1376 {
1377 	struct sched_cluster *cluster = alloc_new_cluster(cpus);
1378 	int i;
1379 
1380 	if (!cluster)
1381 		return;
1382 
1383 	for_each_cpu(i, cpus)
1384 		cpu_rq(i)->cluster = cluster;
1385 
1386 	insert_cluster(cluster, head);
1387 	set_bit(num_clusters, all_cluster_ids);
1388 	num_clusters++;
1389 }
1390 
compute_max_possible_capacity(struct sched_cluster * cluster)1391 static int compute_max_possible_capacity(struct sched_cluster *cluster)
1392 {
1393 	int capacity = 1024;
1394 
1395 	capacity *= capacity_scale_cpu_efficiency(cluster);
1396 	capacity >>= 10;
1397 
1398 	capacity *= (1024 * cluster->max_possible_freq) / min_max_freq;
1399 	capacity >>= 10;
1400 
1401 	return capacity;
1402 }
1403 
walt_update_min_max_capacity(void)1404 void walt_update_min_max_capacity(void)
1405 {
1406 	unsigned long flags;
1407 
1408 	acquire_rq_locks_irqsave(cpu_possible_mask, &flags);
1409 	__update_min_max_capacity();
1410 	release_rq_locks_irqrestore(cpu_possible_mask, &flags);
1411 }
1412 
1413 static int
compare_clusters(void * priv,const struct list_head * a,const struct list_head * b)1414 compare_clusters(void *priv, const struct list_head *a, const struct list_head *b)
1415 {
1416 	struct sched_cluster *cluster1, *cluster2;
1417 	int ret;
1418 
1419 	cluster1 = container_of(a, struct sched_cluster, list);
1420 	cluster2 = container_of(b, struct sched_cluster, list);
1421 
1422 	/*
1423 	 * Don't assume higher capacity means higher power. If the
1424 	 * power cost is same, sort the higher capacity cluster before
1425 	 * the lower capacity cluster to start placing the tasks
1426 	 * on the higher capacity cluster.
1427 	 */
1428 	ret = cluster1->max_power_cost > cluster2->max_power_cost ||
1429 		(cluster1->max_power_cost == cluster2->max_power_cost &&
1430 		cluster1->max_possible_capacity <
1431 				cluster2->max_possible_capacity);
1432 
1433 	return ret;
1434 }
1435 
sort_clusters(void)1436 void sort_clusters(void)
1437 {
1438 	struct sched_cluster *cluster;
1439 	struct list_head new_head;
1440 	unsigned int tmp_max = 1;
1441 
1442 	INIT_LIST_HEAD(&new_head);
1443 
1444 	for_each_sched_cluster(cluster) {
1445 		cluster->max_power_cost = power_cost(cluster_first_cpu(cluster),
1446 							       max_task_load());
1447 		cluster->min_power_cost = power_cost(cluster_first_cpu(cluster),
1448 							       0);
1449 
1450 		if (cluster->max_power_cost > tmp_max)
1451 			tmp_max = cluster->max_power_cost;
1452 	}
1453 	max_power_cost = tmp_max;
1454 
1455 	move_list(&new_head, &cluster_head, true);
1456 
1457 	list_sort(NULL, &new_head, compare_clusters);
1458 	assign_cluster_ids(&new_head);
1459 
1460 	/*
1461 	 * Ensure cluster ids are visible to all CPUs before making
1462 	 * cluster_head visible.
1463 	 */
1464 	move_list(&cluster_head, &new_head, false);
1465 }
1466 
update_all_clusters_stats(void)1467 static void update_all_clusters_stats(void)
1468 {
1469 	struct sched_cluster *cluster;
1470 	u64 highest_mpc = 0, lowest_mpc = U64_MAX;
1471 	unsigned long flags;
1472 
1473 	acquire_rq_locks_irqsave(cpu_possible_mask, &flags);
1474 
1475 	for_each_sched_cluster(cluster) {
1476 		u64 mpc;
1477 
1478 		cluster->capacity = compute_capacity(cluster);
1479 		mpc = cluster->max_possible_capacity =
1480 			compute_max_possible_capacity(cluster);
1481 		cluster->load_scale_factor = compute_load_scale_factor(cluster);
1482 
1483 		cluster->exec_scale_factor =
1484 			DIV_ROUND_UP(cluster->efficiency * 1024,
1485 				     max_possible_efficiency);
1486 
1487 		if (mpc > highest_mpc)
1488 			highest_mpc = mpc;
1489 
1490 		if (mpc < lowest_mpc)
1491 			lowest_mpc = mpc;
1492 	}
1493 
1494 	max_possible_capacity = highest_mpc;
1495 	min_max_possible_capacity = lowest_mpc;
1496 
1497 	__update_min_max_capacity();
1498 	release_rq_locks_irqrestore(cpu_possible_mask, &flags);
1499 }
1500 
update_cluster_topology(void)1501 void update_cluster_topology(void)
1502 {
1503 	struct cpumask cpus = *cpu_possible_mask;
1504 	const struct cpumask *cluster_cpus;
1505 	struct list_head new_head;
1506 	int i;
1507 
1508 	INIT_LIST_HEAD(&new_head);
1509 
1510 	for_each_cpu(i, &cpus) {
1511 		cluster_cpus = cpu_coregroup_mask(i);
1512 		cpumask_or(&all_cluster_cpus, &all_cluster_cpus, cluster_cpus);
1513 		cpumask_andnot(&cpus, &cpus, cluster_cpus);
1514 		add_cluster(cluster_cpus, &new_head);
1515 	}
1516 
1517 	assign_cluster_ids(&new_head);
1518 
1519 	/*
1520 	 * Ensure cluster ids are visible to all CPUs before making
1521 	 * cluster_head visible.
1522 	 */
1523 	move_list(&cluster_head, &new_head, false);
1524 	update_all_clusters_stats();
1525 }
1526 
1527 struct sched_cluster init_cluster = {
1528 	.list			=	LIST_HEAD_INIT(init_cluster.list),
1529 	.id			=	0,
1530 	.max_power_cost		=	1,
1531 	.min_power_cost		=	1,
1532 	.capacity		=	1024,
1533 	.max_possible_capacity	=	1024,
1534 	.efficiency		=	1,
1535 	.load_scale_factor	=	1024,
1536 	.cur_freq		=	1,
1537 	.max_freq		=	1,
1538 	.min_freq		=	1,
1539 	.max_possible_freq	=	1,
1540 	.exec_scale_factor	=	1024,
1541 };
1542 
init_clusters(void)1543 void init_clusters(void)
1544 {
1545 	bitmap_clear(all_cluster_ids, 0, NR_CPUS);
1546 	init_cluster.cpus = *cpu_possible_mask;
1547 	raw_spin_lock_init(&init_cluster.load_lock);
1548 	INIT_LIST_HEAD(&cluster_head);
1549 }
1550 
1551 static unsigned long cpu_max_table_freq[NR_CPUS];
1552 
update_cpu_cluster_capacity(const cpumask_t * cpus)1553 void update_cpu_cluster_capacity(const cpumask_t *cpus)
1554 {
1555 	int i;
1556 	struct sched_cluster *cluster;
1557 	struct cpumask cpumask;
1558 	unsigned long flags;
1559 
1560 	cpumask_copy(&cpumask, cpus);
1561 	acquire_rq_locks_irqsave(cpu_possible_mask, &flags);
1562 
1563 	for_each_cpu(i, &cpumask) {
1564 		cluster = cpu_rq(i)->cluster;
1565 		cpumask_andnot(&cpumask, &cpumask, &cluster->cpus);
1566 
1567 		cluster->capacity = compute_capacity(cluster);
1568 		cluster->load_scale_factor = compute_load_scale_factor(cluster);
1569 	}
1570 
1571 	__update_min_max_capacity();
1572 
1573 	release_rq_locks_irqrestore(cpu_possible_mask, &flags);
1574 }
1575 
cpufreq_notifier_policy(struct notifier_block * nb,unsigned long val,void * data)1576 static int cpufreq_notifier_policy(struct notifier_block *nb,
1577 		unsigned long val, void *data)
1578 {
1579 	struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
1580 	struct sched_cluster *cluster = NULL;
1581 	struct cpumask policy_cluster = *policy->related_cpus;
1582 	unsigned int orig_max_freq = 0;
1583 	int i, j, update_capacity = 0;
1584 
1585 	if (val != CPUFREQ_CREATE_POLICY)
1586 		return 0;
1587 
1588 	walt_update_min_max_capacity();
1589 
1590 	max_possible_freq = max(max_possible_freq, policy->cpuinfo.max_freq);
1591 	if (min_max_freq == 1)
1592 		min_max_freq = UINT_MAX;
1593 	min_max_freq = min(min_max_freq, policy->cpuinfo.max_freq);
1594 	BUG_ON(!min_max_freq);
1595 	BUG_ON(!policy->max);
1596 
1597 	for_each_cpu(i, &policy_cluster)
1598 		cpu_max_table_freq[i] = policy->cpuinfo.max_freq;
1599 
1600 	for_each_cpu(i, &policy_cluster) {
1601 		cluster = cpu_rq(i)->cluster;
1602 		cpumask_andnot(&policy_cluster, &policy_cluster,
1603 						&cluster->cpus);
1604 
1605 		orig_max_freq = cluster->max_freq;
1606 		cluster->min_freq = policy->min;
1607 		cluster->max_freq = policy->max;
1608 		cluster->cur_freq = policy->cur;
1609 
1610 		if (!cluster->freq_init_done) {
1611 			mutex_lock(&cluster_lock);
1612 			for_each_cpu(j, &cluster->cpus)
1613 				cpumask_copy(&cpu_rq(j)->freq_domain_cpumask,
1614 						policy->related_cpus);
1615 			cluster->max_possible_freq = policy->cpuinfo.max_freq;
1616 			cluster->max_possible_capacity =
1617 				compute_max_possible_capacity(cluster);
1618 			cluster->freq_init_done = true;
1619 
1620 			sort_clusters();
1621 			update_all_clusters_stats();
1622 			mutex_unlock(&cluster_lock);
1623 			continue;
1624 		}
1625 
1626 		update_capacity += (orig_max_freq != cluster->max_freq);
1627 	}
1628 
1629 	if (update_capacity)
1630 		update_cpu_cluster_capacity(policy->related_cpus);
1631 
1632 	return 0;
1633 }
1634 
1635 static struct notifier_block notifier_policy_block = {
1636 	.notifier_call = cpufreq_notifier_policy
1637 };
1638 
cpufreq_notifier_trans(struct notifier_block * nb,unsigned long val,void * data)1639 static int cpufreq_notifier_trans(struct notifier_block *nb,
1640 		unsigned long val, void *data)
1641 {
1642 	struct cpufreq_freqs *freq = (struct cpufreq_freqs *)data;
1643 	unsigned int cpu = freq->policy->cpu, new_freq = freq->new;
1644 	unsigned long flags;
1645 	struct sched_cluster *cluster;
1646 	struct cpumask policy_cpus = cpu_rq(cpu)->freq_domain_cpumask;
1647 	int i, j;
1648 
1649 	if (val != CPUFREQ_POSTCHANGE)
1650 		return NOTIFY_DONE;
1651 
1652 	if (cpu_cur_freq(cpu) == new_freq)
1653 		return NOTIFY_OK;
1654 
1655 	for_each_cpu(i, &policy_cpus) {
1656 		cluster = cpu_rq(i)->cluster;
1657 
1658 		for_each_cpu(j, &cluster->cpus) {
1659 			struct rq *rq = cpu_rq(j);
1660 
1661 			raw_spin_lock_irqsave(&rq->lock, flags);
1662 			update_task_ravg(rq->curr, rq, TASK_UPDATE,
1663 					 sched_ktime_clock(), 0);
1664 			raw_spin_unlock_irqrestore(&rq->lock, flags);
1665 		}
1666 
1667 		cluster->cur_freq = new_freq;
1668 		cpumask_andnot(&policy_cpus, &policy_cpus, &cluster->cpus);
1669 	}
1670 
1671 	return NOTIFY_OK;
1672 }
1673 
1674 static struct notifier_block notifier_trans_block = {
1675 	.notifier_call = cpufreq_notifier_trans
1676 };
1677 
register_walt_callback(void)1678 static int register_walt_callback(void)
1679 {
1680 	int ret;
1681 
1682 	ret = cpufreq_register_notifier(&notifier_policy_block,
1683 					CPUFREQ_POLICY_NOTIFIER);
1684 	if (!ret)
1685 		ret = cpufreq_register_notifier(&notifier_trans_block,
1686 						CPUFREQ_TRANSITION_NOTIFIER);
1687 
1688 	return ret;
1689 }
1690 /*
1691  * cpufreq callbacks can be registered at core_initcall or later time.
1692  * Any registration done prior to that is "forgotten" by cpufreq. See
1693  * initialization of variable init_cpufreq_transition_notifier_list_called
1694  * for further information.
1695  */
1696 core_initcall(register_walt_callback);
1697 
1698 /*
1699  * Runs in hard-irq context. This should ideally run just after the latest
1700  * window roll-over.
1701  */
walt_irq_work(struct irq_work * irq_work)1702 void walt_irq_work(struct irq_work *irq_work)
1703 {
1704 	struct sched_cluster *cluster;
1705 	struct rq *rq;
1706 	int cpu;
1707 	u64 wc;
1708 	bool is_migration = false;
1709 	int level = 0;
1710 
1711 	/* Am I the window rollover work or the migration work? */
1712 	if (irq_work == &walt_migration_irq_work)
1713 		is_migration = true;
1714 
1715 	for_each_cpu(cpu, cpu_possible_mask) {
1716 		if (level == 0)
1717 			raw_spin_lock(&cpu_rq(cpu)->lock);
1718 		else
1719 			raw_spin_lock_nested(&cpu_rq(cpu)->lock, level);
1720 		level++;
1721 	}
1722 
1723 	wc = sched_ktime_clock();
1724 	walt_load_reported_window = atomic64_read(&walt_irq_work_lastq_ws);
1725 	for_each_sched_cluster(cluster) {
1726 		raw_spin_lock(&cluster->load_lock);
1727 
1728 		for_each_cpu(cpu, &cluster->cpus) {
1729 			rq = cpu_rq(cpu);
1730 			if (rq->curr) {
1731 				update_task_ravg(rq->curr, rq,
1732 						TASK_UPDATE, wc, 0);
1733 				account_load_subtractions(rq);
1734 			}
1735 		}
1736 
1737 		raw_spin_unlock(&cluster->load_lock);
1738 	}
1739 
1740 	for_each_sched_cluster(cluster) {
1741 		cpumask_t cluster_online_cpus;
1742 		unsigned int num_cpus, i = 1;
1743 
1744 		cpumask_and(&cluster_online_cpus, &cluster->cpus,
1745 						cpu_online_mask);
1746 		num_cpus = cpumask_weight(&cluster_online_cpus);
1747 		for_each_cpu(cpu, &cluster_online_cpus) {
1748 			int flag = SCHED_CPUFREQ_WALT;
1749 
1750 			rq = cpu_rq(cpu);
1751 
1752 			if (i == num_cpus)
1753 				cpufreq_update_util(cpu_rq(cpu), flag);
1754 			else
1755 				cpufreq_update_util(cpu_rq(cpu), flag |
1756 							SCHED_CPUFREQ_CONTINUE);
1757 			i++;
1758 		}
1759 	}
1760 
1761 	for_each_cpu(cpu, cpu_possible_mask)
1762 		raw_spin_unlock(&cpu_rq(cpu)->lock);
1763 
1764 	if (!is_migration)
1765 		core_ctl_check(this_rq()->window_start);
1766 }
1767 
walt_init_once(void)1768 static void walt_init_once(void)
1769 {
1770 	init_irq_work(&walt_migration_irq_work, walt_irq_work);
1771 	init_irq_work(&walt_cpufreq_irq_work, walt_irq_work);
1772 
1773 	walt_cpu_util_freq_divisor =
1774 	    (sched_ravg_window >> SCHED_CAPACITY_SHIFT) * 100;
1775 	walt_scale_demand_divisor = sched_ravg_window >> SCHED_CAPACITY_SHIFT;
1776 
1777 	sched_init_task_load_windows =
1778 		div64_u64((u64)sysctl_sched_init_task_load_pct *
1779 			  (u64)sched_ravg_window, 100);
1780 	sched_init_task_load_windows_scaled =
1781 		scale_demand(sched_init_task_load_windows);
1782 }
1783 
walt_sched_init_rq(struct rq * rq)1784 void walt_sched_init_rq(struct rq *rq)
1785 {
1786 	static bool init;
1787 	int j;
1788 
1789 	if (!init) {
1790 		walt_init_once();
1791 		init = true;
1792 	}
1793 
1794 	cpumask_set_cpu(cpu_of(rq), &rq->freq_domain_cpumask);
1795 
1796 	rq->walt_stats.cumulative_runnable_avg_scaled = 0;
1797 	rq->window_start = 0;
1798 	rq->walt_flags = 0;
1799 	rq->cur_irqload = 0;
1800 	rq->avg_irqload = 0;
1801 	rq->irqload_ts = 0;
1802 
1803 	/*
1804 	 * All cpus part of same cluster by default. This avoids the
1805 	 * need to check for rq->cluster being non-NULL in hot-paths
1806 	 * like select_best_cpu()
1807 	 */
1808 	rq->cluster = &init_cluster;
1809 	rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
1810 	rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
1811 	rq->cum_window_demand_scaled = 0;
1812 
1813 	for (j = 0; j < NUM_TRACKED_WINDOWS; j++)
1814 		memset(&rq->load_subs[j], 0, sizeof(struct load_subtractions));
1815 }
1816 
1817 #define min_cap_cluster() \
1818 	list_first_entry(&cluster_head, struct sched_cluster, list)
1819 #define max_cap_cluster() \
1820 	list_last_entry(&cluster_head, struct sched_cluster, list)
sched_cluster_debug_show(struct seq_file * file,void * param)1821 static int sched_cluster_debug_show(struct seq_file *file, void *param)
1822 {
1823 	struct sched_cluster *cluster = NULL;
1824 
1825 	seq_printf(file, "min_id:%d, max_id:%d\n",
1826 		min_cap_cluster()->id,
1827 		max_cap_cluster()->id);
1828 
1829 	for_each_sched_cluster(cluster) {
1830 		seq_printf(file, "id:%d, cpumask:%d(%*pbl)\n",
1831 			   cluster->id,
1832 			   cpumask_first(&cluster->cpus),
1833 			   cpumask_pr_args(&cluster->cpus));
1834 	}
1835 
1836 	return 0;
1837 }
1838 
sched_cluster_debug_open(struct inode * inode,struct file * filp)1839 static int sched_cluster_debug_open(struct inode *inode, struct file *filp)
1840 {
1841 	return single_open(filp, sched_cluster_debug_show, NULL);
1842 }
1843 
1844 static const struct proc_ops sched_cluster_fops = {
1845 	.proc_open		= sched_cluster_debug_open,
1846 	.proc_read		= seq_read,
1847 	.proc_lseek		= seq_lseek,
1848 	.proc_release		= seq_release,
1849 };
1850 
init_sched_cluster_debug_procfs(void)1851 static int __init init_sched_cluster_debug_procfs(void)
1852 {
1853 	struct proc_dir_entry *pe = NULL;
1854 
1855 	pe = proc_create("sched_cluster",
1856 		0444, NULL, &sched_cluster_fops);
1857 	if (!pe)
1858 		return -ENOMEM;
1859 	return 0;
1860 }
1861 late_initcall(init_sched_cluster_debug_procfs);
1862