• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Energy Model of CPUs
4  *
5  * Copyright (c) 2018, Arm ltd.
6  * Written by: Quentin Perret, Arm ltd.
7  */
8 
9 #define pr_fmt(fmt) "energy_model: " fmt
10 
11 #include <linux/cpu.h>
12 #include <linux/cpumask.h>
13 #include <linux/debugfs.h>
14 #include <linux/energy_model.h>
15 #include <linux/sched/topology.h>
16 #include <linux/slab.h>
17 
18 /* Mapping of each CPU to the performance domain to which it belongs. */
19 static DEFINE_PER_CPU(struct em_perf_domain *, em_data);
20 
21 /*
22  * Mutex serializing the registrations of performance domains and letting
23  * callbacks defined by drivers sleep.
24  */
25 static DEFINE_MUTEX(em_pd_mutex);
26 
27 #ifdef CONFIG_DEBUG_FS
28 static struct dentry *rootdir;
29 
em_debug_create_cs(struct em_cap_state * cs,struct dentry * pd)30 static void em_debug_create_cs(struct em_cap_state *cs, struct dentry *pd)
31 {
32 	struct dentry *d;
33 	char name[24];
34 
35 	snprintf(name, sizeof(name), "cs:%lu", cs->frequency);
36 
37 	/* Create per-cs directory */
38 	d = debugfs_create_dir(name, pd);
39 	debugfs_create_ulong("frequency", 0444, d, &cs->frequency);
40 	debugfs_create_ulong("power", 0444, d, &cs->power);
41 	debugfs_create_ulong("cost", 0444, d, &cs->cost);
42 }
43 
em_debug_cpus_show(struct seq_file * s,void * unused)44 static int em_debug_cpus_show(struct seq_file *s, void *unused)
45 {
46 	seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private)));
47 
48 	return 0;
49 }
50 DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
51 
em_debug_create_pd(struct em_perf_domain * pd,int cpu)52 static void em_debug_create_pd(struct em_perf_domain *pd, int cpu)
53 {
54 	struct dentry *d;
55 	char name[8];
56 	int i;
57 
58 	snprintf(name, sizeof(name), "pd%d", cpu);
59 
60 	/* Create the directory of the performance domain */
61 	d = debugfs_create_dir(name, rootdir);
62 
63 	debugfs_create_file("cpus", 0444, d, pd->cpus, &em_debug_cpus_fops);
64 
65 	/* Create a sub-directory for each capacity state */
66 	for (i = 0; i < pd->nr_cap_states; i++)
67 		em_debug_create_cs(&pd->table[i], d);
68 }
69 
em_debug_init(void)70 static int __init em_debug_init(void)
71 {
72 	/* Create /sys/kernel/debug/energy_model directory */
73 	rootdir = debugfs_create_dir("energy_model", NULL);
74 
75 	return 0;
76 }
77 fs_initcall(em_debug_init);
78 #else /* CONFIG_DEBUG_FS */
em_debug_create_pd(struct em_perf_domain * pd,int cpu)79 static void em_debug_create_pd(struct em_perf_domain *pd, int cpu) {}
80 #endif
em_create_pd(cpumask_t * span,int nr_states,struct em_data_callback * cb)81 static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states,
82 						struct em_data_callback *cb)
83 {
84 	unsigned long opp_eff, prev_opp_eff = ULONG_MAX;
85 	unsigned long power, freq, prev_freq = 0;
86 	int i, ret, cpu = cpumask_first(span);
87 	struct em_cap_state *table;
88 	struct em_perf_domain *pd;
89 	u64 fmax;
90 
91 	if (!cb->active_power)
92 		return NULL;
93 
94 	pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL);
95 	if (!pd)
96 		return NULL;
97 
98 	table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
99 	if (!table)
100 		goto free_pd;
101 
102 	/* Build the list of capacity states for this performance domain */
103 	for (i = 0, freq = 0; i < nr_states; i++, freq++) {
104 		/*
105 		 * active_power() is a driver callback which ceils 'freq' to
106 		 * lowest capacity state of 'cpu' above 'freq' and updates
107 		 * 'power' and 'freq' accordingly.
108 		 */
109 		ret = cb->active_power(&power, &freq, cpu);
110 		if (ret) {
111 			pr_err("pd%d: invalid cap. state: %d\n", cpu, ret);
112 			goto free_cs_table;
113 		}
114 
115 		/*
116 		 * We expect the driver callback to increase the frequency for
117 		 * higher capacity states.
118 		 */
119 		if (freq <= prev_freq) {
120 			pr_err("pd%d: non-increasing freq: %lu\n", cpu, freq);
121 			goto free_cs_table;
122 		}
123 
124 		/*
125 		 * The power returned by active_state() is expected to be
126 		 * positive, in milli-watts and to fit into 16 bits.
127 		 */
128 		if (!power || power > EM_CPU_MAX_POWER) {
129 			pr_err("pd%d: invalid power: %lu\n", cpu, power);
130 			goto free_cs_table;
131 		}
132 
133 		table[i].power = power;
134 		table[i].frequency = prev_freq = freq;
135 
136 		/*
137 		 * The hertz/watts efficiency ratio should decrease as the
138 		 * frequency grows on sane platforms. But this isn't always
139 		 * true in practice so warn the user if a higher OPP is more
140 		 * power efficient than a lower one.
141 		 */
142 		opp_eff = freq / power;
143 		if (opp_eff >= prev_opp_eff)
144 			pr_warn("pd%d: hertz/watts ratio non-monotonically decreasing: em_cap_state %d >= em_cap_state%d\n",
145 					cpu, i, i - 1);
146 		prev_opp_eff = opp_eff;
147 	}
148 
149 	/* Compute the cost of each capacity_state. */
150 	fmax = (u64) table[nr_states - 1].frequency;
151 	for (i = 0; i < nr_states; i++) {
152 		unsigned long power_res = em_scale_power(table[i].power);
153 
154 		table[i].cost = div64_u64(fmax * power_res,
155 					  table[i].frequency);
156 	}
157 
158 	pd->table = table;
159 	pd->nr_cap_states = nr_states;
160 	cpumask_copy(to_cpumask(pd->cpus), span);
161 
162 	em_debug_create_pd(pd, cpu);
163 
164 	return pd;
165 
166 free_cs_table:
167 	kfree(table);
168 free_pd:
169 	kfree(pd);
170 
171 	return NULL;
172 }
173 
174 /**
175  * em_cpu_get() - Return the performance domain for a CPU
176  * @cpu : CPU to find the performance domain for
177  *
178  * Return: the performance domain to which 'cpu' belongs, or NULL if it doesn't
179  * exist.
180  */
em_cpu_get(int cpu)181 struct em_perf_domain *em_cpu_get(int cpu)
182 {
183 	return READ_ONCE(per_cpu(em_data, cpu));
184 }
185 EXPORT_SYMBOL_GPL(em_cpu_get);
186 
187 /**
188  * em_register_perf_domain() - Register the Energy Model of a performance domain
189  * @span	: Mask of CPUs in the performance domain
190  * @nr_states	: Number of capacity states to register
191  * @cb		: Callback functions providing the data of the Energy Model
192  *
193  * Create Energy Model tables for a performance domain using the callbacks
194  * defined in cb.
195  *
196  * If multiple clients register the same performance domain, all but the first
197  * registration will be ignored.
198  *
199  * Return 0 on success
200  */
em_register_perf_domain(cpumask_t * span,unsigned int nr_states,struct em_data_callback * cb)201 int em_register_perf_domain(cpumask_t *span, unsigned int nr_states,
202 						struct em_data_callback *cb)
203 {
204 	unsigned long cap, prev_cap = 0;
205 	struct em_perf_domain *pd;
206 	int cpu, ret = 0;
207 
208 	if (!span || !nr_states || !cb)
209 		return -EINVAL;
210 
211 	/*
212 	 * Use a mutex to serialize the registration of performance domains and
213 	 * let the driver-defined callback functions sleep.
214 	 */
215 	mutex_lock(&em_pd_mutex);
216 
217 	for_each_cpu(cpu, span) {
218 		/* Make sure we don't register again an existing domain. */
219 		if (READ_ONCE(per_cpu(em_data, cpu))) {
220 			ret = -EEXIST;
221 			goto unlock;
222 		}
223 
224 		/*
225 		 * All CPUs of a domain must have the same micro-architecture
226 		 * since they all share the same table.
227 		 */
228 		cap = arch_scale_cpu_capacity(cpu);
229 		if (prev_cap && prev_cap != cap) {
230 			pr_err("CPUs of %*pbl must have the same capacity\n",
231 							cpumask_pr_args(span));
232 			ret = -EINVAL;
233 			goto unlock;
234 		}
235 		prev_cap = cap;
236 	}
237 
238 	/* Create the performance domain and add it to the Energy Model. */
239 	pd = em_create_pd(span, nr_states, cb);
240 	if (!pd) {
241 		ret = -EINVAL;
242 		goto unlock;
243 	}
244 
245 	for_each_cpu(cpu, span) {
246 		/*
247 		 * The per-cpu array can be read concurrently from em_cpu_get().
248 		 * The barrier enforces the ordering needed to make sure readers
249 		 * can only access well formed em_perf_domain structs.
250 		 */
251 		smp_store_release(per_cpu_ptr(&em_data, cpu), pd);
252 	}
253 
254 	pr_debug("Created perf domain %*pbl\n", cpumask_pr_args(span));
255 unlock:
256 	mutex_unlock(&em_pd_mutex);
257 
258 	return ret;
259 }
260 EXPORT_SYMBOL_GPL(em_register_perf_domain);
261