• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Energy Model of devices
4  *
5  * Copyright (c) 2018-2020, Arm ltd.
6  * Written by: Quentin Perret, Arm ltd.
7  * Improvements provided by: Lukasz Luba, Arm ltd.
8  */
9 
10 #define pr_fmt(fmt) "energy_model: " fmt
11 
12 #include <linux/cpu.h>
13 #include <linux/cpumask.h>
14 #include <linux/debugfs.h>
15 #include <linux/energy_model.h>
16 #include <linux/sched/topology.h>
17 #include <linux/slab.h>
18 #include <trace/hooks/sched.h>
19 
20 /*
21  * Mutex serializing the registrations of performance domains and letting
22  * callbacks defined by drivers sleep.
23  */
24 static DEFINE_MUTEX(em_pd_mutex);
25 
_is_cpu_device(struct device * dev)26 static bool _is_cpu_device(struct device *dev)
27 {
28 	return (dev->bus == &cpu_subsys);
29 }
30 
31 #ifdef CONFIG_DEBUG_FS
32 static struct dentry *rootdir;
33 
em_debug_create_ps(struct em_perf_state * ps,struct dentry * pd)34 static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd)
35 {
36 	struct dentry *d;
37 	char name[24];
38 
39 	snprintf(name, sizeof(name), "ps:%lu", ps->frequency);
40 
41 	/* Create per-ps directory */
42 	d = debugfs_create_dir(name, pd);
43 	debugfs_create_ulong("frequency", 0444, d, &ps->frequency);
44 	debugfs_create_ulong("power", 0444, d, &ps->power);
45 	debugfs_create_ulong("cost", 0444, d, &ps->cost);
46 }
47 
em_debug_cpus_show(struct seq_file * s,void * unused)48 static int em_debug_cpus_show(struct seq_file *s, void *unused)
49 {
50 	seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private)));
51 
52 	return 0;
53 }
54 DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
55 
em_debug_units_show(struct seq_file * s,void * unused)56 static int em_debug_units_show(struct seq_file *s, void *unused)
57 {
58 	struct em_perf_domain *pd = s->private;
59 	char *units = pd->milliwatts ? "milliWatts" : "bogoWatts";
60 
61 	seq_printf(s, "%s\n", units);
62 
63 	return 0;
64 }
65 DEFINE_SHOW_ATTRIBUTE(em_debug_units);
66 
em_debug_create_pd(struct device * dev)67 static void em_debug_create_pd(struct device *dev)
68 {
69 	struct dentry *d;
70 	int i;
71 
72 	/* Create the directory of the performance domain */
73 	d = debugfs_create_dir(dev_name(dev), rootdir);
74 
75 	if (_is_cpu_device(dev))
76 		debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus,
77 				    &em_debug_cpus_fops);
78 
79 	debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops);
80 
81 	/* Create a sub-directory for each performance state */
82 	for (i = 0; i < dev->em_pd->nr_perf_states; i++)
83 		em_debug_create_ps(&dev->em_pd->table[i], d);
84 
85 }
86 
em_debug_remove_pd(struct device * dev)87 static void em_debug_remove_pd(struct device *dev)
88 {
89 	debugfs_lookup_and_remove(dev_name(dev), rootdir);
90 }
91 
em_debug_init(void)92 static int __init em_debug_init(void)
93 {
94 	/* Create /sys/kernel/debug/energy_model directory */
95 	rootdir = debugfs_create_dir("energy_model", NULL);
96 
97 	return 0;
98 }
99 fs_initcall(em_debug_init);
100 #else /* CONFIG_DEBUG_FS */
em_debug_create_pd(struct device * dev)101 static void em_debug_create_pd(struct device *dev) {}
em_debug_remove_pd(struct device * dev)102 static void em_debug_remove_pd(struct device *dev) {}
103 #endif
104 
em_create_perf_table(struct device * dev,struct em_perf_domain * pd,int nr_states,struct em_data_callback * cb)105 static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
106 				int nr_states, struct em_data_callback *cb)
107 {
108 	unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
109 	struct em_perf_state *table;
110 	int i, ret;
111 	u64 fmax;
112 
113 	table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
114 	if (!table)
115 		return -ENOMEM;
116 
117 	/* Build the list of performance states for this performance domain */
118 	for (i = 0, freq = 0; i < nr_states; i++, freq++) {
119 		/*
120 		 * active_power() is a driver callback which ceils 'freq' to
121 		 * lowest performance state of 'dev' above 'freq' and updates
122 		 * 'power' and 'freq' accordingly.
123 		 */
124 		ret = cb->active_power(&power, &freq, dev);
125 		if (ret) {
126 			dev_err(dev, "EM: invalid perf. state: %d\n",
127 				ret);
128 			goto free_ps_table;
129 		}
130 
131 		/*
132 		 * We expect the driver callback to increase the frequency for
133 		 * higher performance states.
134 		 */
135 		if (freq <= prev_freq) {
136 			dev_err(dev, "EM: non-increasing freq: %lu\n",
137 				freq);
138 			goto free_ps_table;
139 		}
140 
141 		/*
142 		 * The power returned by active_state() is expected to be
143 		 * positive and to fit into 16 bits.
144 		 */
145 		if (!power || power > EM_MAX_POWER) {
146 			dev_err(dev, "EM: invalid power: %lu\n",
147 				power);
148 			goto free_ps_table;
149 		}
150 
151 		table[i].power = power;
152 		table[i].frequency = prev_freq = freq;
153 	}
154 
155 	/* Compute the cost of each performance state. */
156 	fmax = (u64) table[nr_states - 1].frequency;
157 	for (i = nr_states - 1; i >= 0; i--) {
158 		unsigned long power_res = em_scale_power(table[i].power);
159 
160 		table[i].cost = div64_u64(fmax * power_res,
161 					  table[i].frequency);
162 		if (table[i].cost >= prev_cost) {
163 			dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
164 				table[i].frequency);
165 		} else {
166 			prev_cost = table[i].cost;
167 		}
168 	}
169 
170 	pd->table = table;
171 	pd->nr_perf_states = nr_states;
172 
173 	return 0;
174 
175 free_ps_table:
176 	kfree(table);
177 	return -EINVAL;
178 }
179 
em_create_pd(struct device * dev,int nr_states,struct em_data_callback * cb,cpumask_t * cpus)180 static int em_create_pd(struct device *dev, int nr_states,
181 			struct em_data_callback *cb, cpumask_t *cpus)
182 {
183 	struct em_perf_domain *pd;
184 	struct device *cpu_dev;
185 	int cpu, ret;
186 
187 	if (_is_cpu_device(dev)) {
188 		pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL);
189 		if (!pd)
190 			return -ENOMEM;
191 
192 		cpumask_copy(em_span_cpus(pd), cpus);
193 	} else {
194 		pd = kzalloc(sizeof(*pd), GFP_KERNEL);
195 		if (!pd)
196 			return -ENOMEM;
197 	}
198 
199 	ret = em_create_perf_table(dev, pd, nr_states, cb);
200 	if (ret) {
201 		kfree(pd);
202 		return ret;
203 	}
204 
205 	if (_is_cpu_device(dev))
206 		for_each_cpu(cpu, cpus) {
207 			cpu_dev = get_cpu_device(cpu);
208 			cpu_dev->em_pd = pd;
209 		}
210 
211 	dev->em_pd = pd;
212 
213 	return 0;
214 }
215 
216 /**
217  * em_pd_get() - Return the performance domain for a device
218  * @dev : Device to find the performance domain for
219  *
220  * Returns the performance domain to which @dev belongs, or NULL if it doesn't
221  * exist.
222  */
em_pd_get(struct device * dev)223 struct em_perf_domain *em_pd_get(struct device *dev)
224 {
225 	if (IS_ERR_OR_NULL(dev))
226 		return NULL;
227 
228 	return dev->em_pd;
229 }
230 EXPORT_SYMBOL_GPL(em_pd_get);
231 
232 /**
233  * em_cpu_get() - Return the performance domain for a CPU
234  * @cpu : CPU to find the performance domain for
235  *
236  * Returns the performance domain to which @cpu belongs, or NULL if it doesn't
237  * exist.
238  */
em_cpu_get(int cpu)239 struct em_perf_domain *em_cpu_get(int cpu)
240 {
241 	struct device *cpu_dev;
242 
243 	cpu_dev = get_cpu_device(cpu);
244 	if (!cpu_dev)
245 		return NULL;
246 
247 	return em_pd_get(cpu_dev);
248 }
249 EXPORT_SYMBOL_GPL(em_cpu_get);
250 
251 /**
252  * em_dev_register_perf_domain() - Register the Energy Model (EM) for a device
253  * @dev		: Device for which the EM is to register
254  * @nr_states	: Number of performance states to register
255  * @cb		: Callback functions providing the data of the Energy Model
256  * @cpus	: Pointer to cpumask_t, which in case of a CPU device is
257  *		obligatory. It can be taken from i.e. 'policy->cpus'. For other
258  *		type of devices this should be set to NULL.
259  * @milliwatts	: Flag indicating that the power values are in milliWatts or
260  *		in some other scale. It must be set properly.
261  *
262  * Create Energy Model tables for a performance domain using the callbacks
263  * defined in cb.
264  *
265  * The @milliwatts is important to set with correct value. Some kernel
266  * sub-systems might rely on this flag and check if all devices in the EM are
267  * using the same scale.
268  *
269  * If multiple clients register the same performance domain, all but the first
270  * registration will be ignored.
271  *
272  * Return 0 on success
273  */
em_dev_register_perf_domain(struct device * dev,unsigned int nr_states,struct em_data_callback * cb,cpumask_t * cpus,bool milliwatts)274 int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
275 				struct em_data_callback *cb, cpumask_t *cpus,
276 				bool milliwatts)
277 {
278 	unsigned long cap, prev_cap = 0;
279 	int cpu, ret;
280 	bool cond = false;
281 
282 	if (!dev || !nr_states || !cb)
283 		return -EINVAL;
284 
285 	/*
286 	 * Use a mutex to serialize the registration of performance domains and
287 	 * let the driver-defined callback functions sleep.
288 	 */
289 	mutex_lock(&em_pd_mutex);
290 
291 	if (dev->em_pd) {
292 		ret = -EEXIST;
293 		goto unlock;
294 	}
295 
296 	if (_is_cpu_device(dev)) {
297 		if (!cpus) {
298 			dev_err(dev, "EM: invalid CPU mask\n");
299 			ret = -EINVAL;
300 			goto unlock;
301 		}
302 
303 		for_each_cpu(cpu, cpus) {
304 			if (em_cpu_get(cpu)) {
305 				dev_err(dev, "EM: exists for CPU%d\n", cpu);
306 				ret = -EEXIST;
307 				goto unlock;
308 			}
309 
310 			trace_android_vh_em_dev_register_pd(&cond);
311 			if (cond)
312 				continue;
313 			/*
314 			 * All CPUs of a domain must have the same
315 			 * micro-architecture since they all share the same
316 			 * table.
317 			 */
318 			cap = arch_scale_cpu_capacity(cpu);
319 			if (prev_cap && prev_cap != cap) {
320 				dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n",
321 					cpumask_pr_args(cpus));
322 
323 				ret = -EINVAL;
324 				goto unlock;
325 			}
326 			prev_cap = cap;
327 		}
328 	}
329 
330 	ret = em_create_pd(dev, nr_states, cb, cpus);
331 	if (ret)
332 		goto unlock;
333 
334 	dev->em_pd->milliwatts = milliwatts;
335 
336 	em_debug_create_pd(dev);
337 	dev_info(dev, "EM: created perf domain\n");
338 
339 unlock:
340 	mutex_unlock(&em_pd_mutex);
341 	return ret;
342 }
343 EXPORT_SYMBOL_GPL(em_dev_register_perf_domain);
344 
345 /**
346  * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device
347  * @dev		: Device for which the EM is registered
348  *
349  * Unregister the EM for the specified @dev (but not a CPU device).
350  */
em_dev_unregister_perf_domain(struct device * dev)351 void em_dev_unregister_perf_domain(struct device *dev)
352 {
353 	if (IS_ERR_OR_NULL(dev) || !dev->em_pd)
354 		return;
355 
356 	if (_is_cpu_device(dev))
357 		return;
358 
359 	/*
360 	 * The mutex separates all register/unregister requests and protects
361 	 * from potential clean-up/setup issues in the debugfs directories.
362 	 * The debugfs directory name is the same as device's name.
363 	 */
364 	mutex_lock(&em_pd_mutex);
365 	em_debug_remove_pd(dev);
366 
367 	kfree(dev->em_pd->table);
368 	kfree(dev->em_pd);
369 	dev->em_pd = NULL;
370 	mutex_unlock(&em_pd_mutex);
371 }
372 EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain);
373