1 /*
2 * intel_pstate.c: Native P state management for Intel processors
3 *
4 * (C) Copyright 2012 Intel Corporation
5 * Author: Dirk Brandewie <dirk.j.brandewie@intel.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 */
12
13 #include <linux/kernel.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/module.h>
16 #include <linux/ktime.h>
17 #include <linux/hrtimer.h>
18 #include <linux/tick.h>
19 #include <linux/slab.h>
20 #include <linux/sched.h>
21 #include <linux/list.h>
22 #include <linux/cpu.h>
23 #include <linux/cpufreq.h>
24 #include <linux/sysfs.h>
25 #include <linux/types.h>
26 #include <linux/fs.h>
27 #include <linux/debugfs.h>
28 #include <linux/acpi.h>
29 #include <trace/events/power.h>
30
31 #include <asm/div64.h>
32 #include <asm/msr.h>
33 #include <asm/cpu_device_id.h>
34
35 #define BYT_RATIOS 0x66a
36 #define BYT_VIDS 0x66b
37 #define BYT_TURBO_RATIOS 0x66c
38 #define BYT_TURBO_VIDS 0x66d
39
40 #define FRAC_BITS 8
41 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
42 #define fp_toint(X) ((X) >> FRAC_BITS)
43
44
mul_fp(int32_t x,int32_t y)45 static inline int32_t mul_fp(int32_t x, int32_t y)
46 {
47 return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
48 }
49
div_fp(s64 x,s64 y)50 static inline int32_t div_fp(s64 x, s64 y)
51 {
52 return div64_s64((int64_t)x << FRAC_BITS, y);
53 }
54
ceiling_fp(int32_t x)55 static inline int ceiling_fp(int32_t x)
56 {
57 int mask, ret;
58
59 ret = fp_toint(x);
60 mask = (1 << FRAC_BITS) - 1;
61 if (x & mask)
62 ret += 1;
63 return ret;
64 }
65
66 struct sample {
67 int32_t core_pct_busy;
68 u64 aperf;
69 u64 mperf;
70 int freq;
71 ktime_t time;
72 };
73
74 struct pstate_data {
75 int current_pstate;
76 int min_pstate;
77 int max_pstate;
78 int scaling;
79 int turbo_pstate;
80 };
81
82 struct vid_data {
83 int min;
84 int max;
85 int turbo;
86 int32_t ratio;
87 };
88
89 struct _pid {
90 int setpoint;
91 int32_t integral;
92 int32_t p_gain;
93 int32_t i_gain;
94 int32_t d_gain;
95 int deadband;
96 int32_t last_err;
97 };
98
99 struct cpudata {
100 int cpu;
101
102 struct timer_list timer;
103
104 struct pstate_data pstate;
105 struct vid_data vid;
106 struct _pid pid;
107
108 ktime_t last_sample_time;
109 u64 prev_aperf;
110 u64 prev_mperf;
111 struct sample sample;
112 };
113
114 static struct cpudata **all_cpu_data;
115 struct pstate_adjust_policy {
116 int sample_rate_ms;
117 int deadband;
118 int setpoint;
119 int p_gain_pct;
120 int d_gain_pct;
121 int i_gain_pct;
122 };
123
124 struct pstate_funcs {
125 int (*get_max)(void);
126 int (*get_min)(void);
127 int (*get_turbo)(void);
128 int (*get_scaling)(void);
129 void (*set)(struct cpudata*, int pstate);
130 void (*get_vid)(struct cpudata *);
131 };
132
133 struct cpu_defaults {
134 struct pstate_adjust_policy pid_policy;
135 struct pstate_funcs funcs;
136 };
137
138 static struct pstate_adjust_policy pid_params;
139 static struct pstate_funcs pstate_funcs;
140
141 struct perf_limits {
142 int no_turbo;
143 int turbo_disabled;
144 int max_perf_pct;
145 int min_perf_pct;
146 int32_t max_perf;
147 int32_t min_perf;
148 int max_policy_pct;
149 int max_sysfs_pct;
150 };
151
152 static struct perf_limits limits = {
153 .no_turbo = 0,
154 .turbo_disabled = 0,
155 .max_perf_pct = 100,
156 .max_perf = int_tofp(1),
157 .min_perf_pct = 0,
158 .min_perf = 0,
159 .max_policy_pct = 100,
160 .max_sysfs_pct = 100,
161 };
162
pid_reset(struct _pid * pid,int setpoint,int busy,int deadband,int integral)163 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
164 int deadband, int integral) {
165 pid->setpoint = setpoint;
166 pid->deadband = deadband;
167 pid->integral = int_tofp(integral);
168 pid->last_err = int_tofp(setpoint) - int_tofp(busy);
169 }
170
pid_p_gain_set(struct _pid * pid,int percent)171 static inline void pid_p_gain_set(struct _pid *pid, int percent)
172 {
173 pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
174 }
175
pid_i_gain_set(struct _pid * pid,int percent)176 static inline void pid_i_gain_set(struct _pid *pid, int percent)
177 {
178 pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
179 }
180
pid_d_gain_set(struct _pid * pid,int percent)181 static inline void pid_d_gain_set(struct _pid *pid, int percent)
182 {
183 pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
184 }
185
pid_calc(struct _pid * pid,int32_t busy)186 static signed int pid_calc(struct _pid *pid, int32_t busy)
187 {
188 signed int result;
189 int32_t pterm, dterm, fp_error;
190 int32_t integral_limit;
191
192 fp_error = int_tofp(pid->setpoint) - busy;
193
194 if (abs(fp_error) <= int_tofp(pid->deadband))
195 return 0;
196
197 pterm = mul_fp(pid->p_gain, fp_error);
198
199 pid->integral += fp_error;
200
201 /* limit the integral term */
202 integral_limit = int_tofp(30);
203 if (pid->integral > integral_limit)
204 pid->integral = integral_limit;
205 if (pid->integral < -integral_limit)
206 pid->integral = -integral_limit;
207
208 dterm = mul_fp(pid->d_gain, fp_error - pid->last_err);
209 pid->last_err = fp_error;
210
211 result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm;
212 result = result + (1 << (FRAC_BITS-1));
213 return (signed int)fp_toint(result);
214 }
215
intel_pstate_busy_pid_reset(struct cpudata * cpu)216 static inline void intel_pstate_busy_pid_reset(struct cpudata *cpu)
217 {
218 pid_p_gain_set(&cpu->pid, pid_params.p_gain_pct);
219 pid_d_gain_set(&cpu->pid, pid_params.d_gain_pct);
220 pid_i_gain_set(&cpu->pid, pid_params.i_gain_pct);
221
222 pid_reset(&cpu->pid, pid_params.setpoint, 100, pid_params.deadband, 0);
223 }
224
intel_pstate_reset_all_pid(void)225 static inline void intel_pstate_reset_all_pid(void)
226 {
227 unsigned int cpu;
228
229 for_each_online_cpu(cpu) {
230 if (all_cpu_data[cpu])
231 intel_pstate_busy_pid_reset(all_cpu_data[cpu]);
232 }
233 }
234
update_turbo_state(void)235 static inline void update_turbo_state(void)
236 {
237 u64 misc_en;
238 struct cpudata *cpu;
239
240 cpu = all_cpu_data[0];
241 rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
242 limits.turbo_disabled =
243 (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
244 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
245 }
246
247 /************************** debugfs begin ************************/
pid_param_set(void * data,u64 val)248 static int pid_param_set(void *data, u64 val)
249 {
250 *(u32 *)data = val;
251 intel_pstate_reset_all_pid();
252 return 0;
253 }
254
pid_param_get(void * data,u64 * val)255 static int pid_param_get(void *data, u64 *val)
256 {
257 *val = *(u32 *)data;
258 return 0;
259 }
260 DEFINE_SIMPLE_ATTRIBUTE(fops_pid_param, pid_param_get, pid_param_set, "%llu\n");
261
262 struct pid_param {
263 char *name;
264 void *value;
265 };
266
267 static struct pid_param pid_files[] = {
268 {"sample_rate_ms", &pid_params.sample_rate_ms},
269 {"d_gain_pct", &pid_params.d_gain_pct},
270 {"i_gain_pct", &pid_params.i_gain_pct},
271 {"deadband", &pid_params.deadband},
272 {"setpoint", &pid_params.setpoint},
273 {"p_gain_pct", &pid_params.p_gain_pct},
274 {NULL, NULL}
275 };
276
intel_pstate_debug_expose_params(void)277 static void __init intel_pstate_debug_expose_params(void)
278 {
279 struct dentry *debugfs_parent;
280 int i = 0;
281
282 debugfs_parent = debugfs_create_dir("pstate_snb", NULL);
283 if (IS_ERR_OR_NULL(debugfs_parent))
284 return;
285 while (pid_files[i].name) {
286 debugfs_create_file(pid_files[i].name, 0660,
287 debugfs_parent, pid_files[i].value,
288 &fops_pid_param);
289 i++;
290 }
291 }
292
293 /************************** debugfs end ************************/
294
295 /************************** sysfs begin ************************/
296 #define show_one(file_name, object) \
297 static ssize_t show_##file_name \
298 (struct kobject *kobj, struct attribute *attr, char *buf) \
299 { \
300 return sprintf(buf, "%u\n", limits.object); \
301 }
302
show_no_turbo(struct kobject * kobj,struct attribute * attr,char * buf)303 static ssize_t show_no_turbo(struct kobject *kobj,
304 struct attribute *attr, char *buf)
305 {
306 ssize_t ret;
307
308 update_turbo_state();
309 if (limits.turbo_disabled)
310 ret = sprintf(buf, "%u\n", limits.turbo_disabled);
311 else
312 ret = sprintf(buf, "%u\n", limits.no_turbo);
313
314 return ret;
315 }
316
store_no_turbo(struct kobject * a,struct attribute * b,const char * buf,size_t count)317 static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
318 const char *buf, size_t count)
319 {
320 unsigned int input;
321 int ret;
322
323 ret = sscanf(buf, "%u", &input);
324 if (ret != 1)
325 return -EINVAL;
326
327 update_turbo_state();
328 if (limits.turbo_disabled) {
329 pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
330 return -EPERM;
331 }
332 limits.no_turbo = clamp_t(int, input, 0, 1);
333
334 return count;
335 }
336
store_max_perf_pct(struct kobject * a,struct attribute * b,const char * buf,size_t count)337 static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
338 const char *buf, size_t count)
339 {
340 unsigned int input;
341 int ret;
342
343 ret = sscanf(buf, "%u", &input);
344 if (ret != 1)
345 return -EINVAL;
346
347 limits.max_sysfs_pct = clamp_t(int, input, 0 , 100);
348 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
349 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
350
351 return count;
352 }
353
store_min_perf_pct(struct kobject * a,struct attribute * b,const char * buf,size_t count)354 static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
355 const char *buf, size_t count)
356 {
357 unsigned int input;
358 int ret;
359
360 ret = sscanf(buf, "%u", &input);
361 if (ret != 1)
362 return -EINVAL;
363 limits.min_perf_pct = clamp_t(int, input, 0 , 100);
364 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
365
366 return count;
367 }
368
369 show_one(max_perf_pct, max_perf_pct);
370 show_one(min_perf_pct, min_perf_pct);
371
372 define_one_global_rw(no_turbo);
373 define_one_global_rw(max_perf_pct);
374 define_one_global_rw(min_perf_pct);
375
376 static struct attribute *intel_pstate_attributes[] = {
377 &no_turbo.attr,
378 &max_perf_pct.attr,
379 &min_perf_pct.attr,
380 NULL
381 };
382
383 static struct attribute_group intel_pstate_attr_group = {
384 .attrs = intel_pstate_attributes,
385 };
386
intel_pstate_sysfs_expose_params(void)387 static void __init intel_pstate_sysfs_expose_params(void)
388 {
389 struct kobject *intel_pstate_kobject;
390 int rc;
391
392 intel_pstate_kobject = kobject_create_and_add("intel_pstate",
393 &cpu_subsys.dev_root->kobj);
394 BUG_ON(!intel_pstate_kobject);
395 rc = sysfs_create_group(intel_pstate_kobject, &intel_pstate_attr_group);
396 BUG_ON(rc);
397 }
398
399 /************************** sysfs end ************************/
byt_get_min_pstate(void)400 static int byt_get_min_pstate(void)
401 {
402 u64 value;
403
404 rdmsrl(BYT_RATIOS, value);
405 return (value >> 8) & 0x7F;
406 }
407
byt_get_max_pstate(void)408 static int byt_get_max_pstate(void)
409 {
410 u64 value;
411
412 rdmsrl(BYT_RATIOS, value);
413 return (value >> 16) & 0x7F;
414 }
415
byt_get_turbo_pstate(void)416 static int byt_get_turbo_pstate(void)
417 {
418 u64 value;
419
420 rdmsrl(BYT_TURBO_RATIOS, value);
421 return value & 0x7F;
422 }
423
byt_set_pstate(struct cpudata * cpudata,int pstate)424 static void byt_set_pstate(struct cpudata *cpudata, int pstate)
425 {
426 u64 val;
427 int32_t vid_fp;
428 u32 vid;
429
430 val = pstate << 8;
431 if (limits.no_turbo && !limits.turbo_disabled)
432 val |= (u64)1 << 32;
433
434 vid_fp = cpudata->vid.min + mul_fp(
435 int_tofp(pstate - cpudata->pstate.min_pstate),
436 cpudata->vid.ratio);
437
438 vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
439 vid = ceiling_fp(vid_fp);
440
441 if (pstate > cpudata->pstate.max_pstate)
442 vid = cpudata->vid.turbo;
443
444 val |= vid;
445
446 wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
447 }
448
449 #define BYT_BCLK_FREQS 5
450 static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800};
451
byt_get_scaling(void)452 static int byt_get_scaling(void)
453 {
454 u64 value;
455 int i;
456
457 rdmsrl(MSR_FSB_FREQ, value);
458 i = value & 0x3;
459
460 BUG_ON(i > BYT_BCLK_FREQS);
461
462 return byt_freq_table[i] * 100;
463 }
464
byt_get_vid(struct cpudata * cpudata)465 static void byt_get_vid(struct cpudata *cpudata)
466 {
467 u64 value;
468
469 rdmsrl(BYT_VIDS, value);
470 cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
471 cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
472 cpudata->vid.ratio = div_fp(
473 cpudata->vid.max - cpudata->vid.min,
474 int_tofp(cpudata->pstate.max_pstate -
475 cpudata->pstate.min_pstate));
476
477 rdmsrl(BYT_TURBO_VIDS, value);
478 cpudata->vid.turbo = value & 0x7f;
479 }
480
core_get_min_pstate(void)481 static int core_get_min_pstate(void)
482 {
483 u64 value;
484
485 rdmsrl(MSR_PLATFORM_INFO, value);
486 return (value >> 40) & 0xFF;
487 }
488
core_get_max_pstate(void)489 static int core_get_max_pstate(void)
490 {
491 u64 value;
492
493 rdmsrl(MSR_PLATFORM_INFO, value);
494 return (value >> 8) & 0xFF;
495 }
496
core_get_turbo_pstate(void)497 static int core_get_turbo_pstate(void)
498 {
499 u64 value;
500 int nont, ret;
501
502 rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
503 nont = core_get_max_pstate();
504 ret = (value) & 255;
505 if (ret <= nont)
506 ret = nont;
507 return ret;
508 }
509
core_get_scaling(void)510 static inline int core_get_scaling(void)
511 {
512 return 100000;
513 }
514
core_set_pstate(struct cpudata * cpudata,int pstate)515 static void core_set_pstate(struct cpudata *cpudata, int pstate)
516 {
517 u64 val;
518
519 val = pstate << 8;
520 if (limits.no_turbo && !limits.turbo_disabled)
521 val |= (u64)1 << 32;
522
523 wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
524 }
525
526 static struct cpu_defaults core_params = {
527 .pid_policy = {
528 .sample_rate_ms = 10,
529 .deadband = 0,
530 .setpoint = 97,
531 .p_gain_pct = 20,
532 .d_gain_pct = 0,
533 .i_gain_pct = 0,
534 },
535 .funcs = {
536 .get_max = core_get_max_pstate,
537 .get_min = core_get_min_pstate,
538 .get_turbo = core_get_turbo_pstate,
539 .get_scaling = core_get_scaling,
540 .set = core_set_pstate,
541 },
542 };
543
544 static struct cpu_defaults byt_params = {
545 .pid_policy = {
546 .sample_rate_ms = 10,
547 .deadband = 0,
548 .setpoint = 97,
549 .p_gain_pct = 14,
550 .d_gain_pct = 0,
551 .i_gain_pct = 4,
552 },
553 .funcs = {
554 .get_max = byt_get_max_pstate,
555 .get_min = byt_get_min_pstate,
556 .get_turbo = byt_get_turbo_pstate,
557 .set = byt_set_pstate,
558 .get_scaling = byt_get_scaling,
559 .get_vid = byt_get_vid,
560 },
561 };
562
intel_pstate_get_min_max(struct cpudata * cpu,int * min,int * max)563 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
564 {
565 int max_perf = cpu->pstate.turbo_pstate;
566 int max_perf_adj;
567 int min_perf;
568
569 if (limits.no_turbo || limits.turbo_disabled)
570 max_perf = cpu->pstate.max_pstate;
571
572 max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
573 *max = clamp_t(int, max_perf_adj,
574 cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
575
576 min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf));
577 *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
578 }
579
intel_pstate_set_pstate(struct cpudata * cpu,int pstate)580 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
581 {
582 int max_perf, min_perf;
583
584 update_turbo_state();
585
586 intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
587
588 pstate = clamp_t(int, pstate, min_perf, max_perf);
589
590 if (pstate == cpu->pstate.current_pstate)
591 return;
592
593 trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
594
595 cpu->pstate.current_pstate = pstate;
596
597 pstate_funcs.set(cpu, pstate);
598 }
599
intel_pstate_get_cpu_pstates(struct cpudata * cpu)600 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
601 {
602 cpu->pstate.min_pstate = pstate_funcs.get_min();
603 cpu->pstate.max_pstate = pstate_funcs.get_max();
604 cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
605 cpu->pstate.scaling = pstate_funcs.get_scaling();
606
607 if (pstate_funcs.get_vid)
608 pstate_funcs.get_vid(cpu);
609 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
610 }
611
intel_pstate_calc_busy(struct cpudata * cpu)612 static inline void intel_pstate_calc_busy(struct cpudata *cpu)
613 {
614 struct sample *sample = &cpu->sample;
615 int64_t core_pct;
616
617 core_pct = int_tofp(sample->aperf) * int_tofp(100);
618 core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
619
620 sample->freq = fp_toint(
621 mul_fp(int_tofp(
622 cpu->pstate.max_pstate * cpu->pstate.scaling / 100),
623 core_pct));
624
625 sample->core_pct_busy = (int32_t)core_pct;
626 }
627
intel_pstate_sample(struct cpudata * cpu)628 static inline void intel_pstate_sample(struct cpudata *cpu)
629 {
630 u64 aperf, mperf;
631 unsigned long flags;
632
633 local_irq_save(flags);
634 rdmsrl(MSR_IA32_APERF, aperf);
635 rdmsrl(MSR_IA32_MPERF, mperf);
636 local_irq_restore(flags);
637
638 cpu->last_sample_time = cpu->sample.time;
639 cpu->sample.time = ktime_get();
640 cpu->sample.aperf = aperf;
641 cpu->sample.mperf = mperf;
642 cpu->sample.aperf -= cpu->prev_aperf;
643 cpu->sample.mperf -= cpu->prev_mperf;
644
645 intel_pstate_calc_busy(cpu);
646
647 cpu->prev_aperf = aperf;
648 cpu->prev_mperf = mperf;
649 }
650
intel_pstate_set_sample_time(struct cpudata * cpu)651 static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
652 {
653 int delay;
654
655 delay = msecs_to_jiffies(pid_params.sample_rate_ms);
656 mod_timer_pinned(&cpu->timer, jiffies + delay);
657 }
658
intel_pstate_get_scaled_busy(struct cpudata * cpu)659 static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
660 {
661 int32_t core_busy, max_pstate, current_pstate, sample_ratio;
662 s64 duration_us;
663 u32 sample_time;
664
665 core_busy = cpu->sample.core_pct_busy;
666 max_pstate = int_tofp(cpu->pstate.max_pstate);
667 current_pstate = int_tofp(cpu->pstate.current_pstate);
668 core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
669
670 sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC;
671 duration_us = ktime_us_delta(cpu->sample.time,
672 cpu->last_sample_time);
673 if (duration_us > sample_time * 3) {
674 sample_ratio = div_fp(int_tofp(sample_time),
675 int_tofp(duration_us));
676 core_busy = mul_fp(core_busy, sample_ratio);
677 }
678
679 return core_busy;
680 }
681
intel_pstate_adjust_busy_pstate(struct cpudata * cpu)682 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
683 {
684 int32_t busy_scaled;
685 struct _pid *pid;
686 signed int ctl;
687
688 pid = &cpu->pid;
689 busy_scaled = intel_pstate_get_scaled_busy(cpu);
690
691 ctl = pid_calc(pid, busy_scaled);
692
693 /* Negative values of ctl increase the pstate and vice versa */
694 intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl);
695 }
696
intel_pstate_timer_func(unsigned long __data)697 static void intel_pstate_timer_func(unsigned long __data)
698 {
699 struct cpudata *cpu = (struct cpudata *) __data;
700 struct sample *sample;
701
702 intel_pstate_sample(cpu);
703
704 sample = &cpu->sample;
705
706 intel_pstate_adjust_busy_pstate(cpu);
707
708 trace_pstate_sample(fp_toint(sample->core_pct_busy),
709 fp_toint(intel_pstate_get_scaled_busy(cpu)),
710 cpu->pstate.current_pstate,
711 sample->mperf,
712 sample->aperf,
713 sample->freq);
714
715 intel_pstate_set_sample_time(cpu);
716 }
717
718 #define ICPU(model, policy) \
719 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
720 (unsigned long)&policy }
721
722 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
723 ICPU(0x2a, core_params),
724 ICPU(0x2d, core_params),
725 ICPU(0x37, byt_params),
726 ICPU(0x3a, core_params),
727 ICPU(0x3c, core_params),
728 ICPU(0x3d, core_params),
729 ICPU(0x3e, core_params),
730 ICPU(0x3f, core_params),
731 ICPU(0x45, core_params),
732 ICPU(0x46, core_params),
733 ICPU(0x4c, byt_params),
734 ICPU(0x4f, core_params),
735 ICPU(0x56, core_params),
736 {}
737 };
738 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
739
intel_pstate_init_cpu(unsigned int cpunum)740 static int intel_pstate_init_cpu(unsigned int cpunum)
741 {
742 struct cpudata *cpu;
743
744 if (!all_cpu_data[cpunum])
745 all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
746 GFP_KERNEL);
747 if (!all_cpu_data[cpunum])
748 return -ENOMEM;
749
750 cpu = all_cpu_data[cpunum];
751
752 cpu->cpu = cpunum;
753 intel_pstate_get_cpu_pstates(cpu);
754
755 init_timer_deferrable(&cpu->timer);
756 cpu->timer.function = intel_pstate_timer_func;
757 cpu->timer.data = (unsigned long)cpu;
758 cpu->timer.expires = jiffies + HZ/100;
759 intel_pstate_busy_pid_reset(cpu);
760 intel_pstate_sample(cpu);
761
762 add_timer_on(&cpu->timer, cpunum);
763
764 pr_debug("Intel pstate controlling: cpu %d\n", cpunum);
765
766 return 0;
767 }
768
intel_pstate_get(unsigned int cpu_num)769 static unsigned int intel_pstate_get(unsigned int cpu_num)
770 {
771 struct sample *sample;
772 struct cpudata *cpu;
773
774 cpu = all_cpu_data[cpu_num];
775 if (!cpu)
776 return 0;
777 sample = &cpu->sample;
778 return sample->freq;
779 }
780
intel_pstate_set_policy(struct cpufreq_policy * policy)781 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
782 {
783 if (!policy->cpuinfo.max_freq)
784 return -ENODEV;
785
786 if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
787 limits.min_perf_pct = 100;
788 limits.min_perf = int_tofp(1);
789 limits.max_policy_pct = 100;
790 limits.max_perf_pct = 100;
791 limits.max_perf = int_tofp(1);
792 limits.no_turbo = 0;
793 return 0;
794 }
795 limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
796 limits.min_perf_pct = clamp_t(int, limits.min_perf_pct, 0 , 100);
797 limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
798
799 limits.max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq;
800 limits.max_policy_pct = clamp_t(int, limits.max_policy_pct, 0 , 100);
801 limits.max_perf_pct = min(limits.max_policy_pct, limits.max_sysfs_pct);
802 limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
803
804 return 0;
805 }
806
intel_pstate_verify_policy(struct cpufreq_policy * policy)807 static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
808 {
809 cpufreq_verify_within_cpu_limits(policy);
810
811 if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
812 policy->policy != CPUFREQ_POLICY_PERFORMANCE)
813 return -EINVAL;
814
815 return 0;
816 }
817
intel_pstate_stop_cpu(struct cpufreq_policy * policy)818 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
819 {
820 int cpu_num = policy->cpu;
821 struct cpudata *cpu = all_cpu_data[cpu_num];
822
823 pr_info("intel_pstate CPU %d exiting\n", cpu_num);
824
825 del_timer_sync(&all_cpu_data[cpu_num]->timer);
826 intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
827 }
828
intel_pstate_cpu_init(struct cpufreq_policy * policy)829 static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
830 {
831 struct cpudata *cpu;
832 int rc;
833
834 rc = intel_pstate_init_cpu(policy->cpu);
835 if (rc)
836 return rc;
837
838 cpu = all_cpu_data[policy->cpu];
839
840 if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100)
841 policy->policy = CPUFREQ_POLICY_PERFORMANCE;
842 else
843 policy->policy = CPUFREQ_POLICY_POWERSAVE;
844
845 policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
846 policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
847
848 /* cpuinfo and default policy values */
849 policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
850 update_turbo_state();
851 policy->cpuinfo.max_freq = limits.turbo_disabled ?
852 cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
853 policy->cpuinfo.max_freq *= cpu->pstate.scaling;
854
855 policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
856 cpumask_set_cpu(policy->cpu, policy->cpus);
857
858 return 0;
859 }
860
861 static struct cpufreq_driver intel_pstate_driver = {
862 .flags = CPUFREQ_CONST_LOOPS,
863 .verify = intel_pstate_verify_policy,
864 .setpolicy = intel_pstate_set_policy,
865 .get = intel_pstate_get,
866 .init = intel_pstate_cpu_init,
867 .stop_cpu = intel_pstate_stop_cpu,
868 .name = "intel_pstate",
869 };
870
871 static int __initdata no_load;
872
intel_pstate_msrs_not_valid(void)873 static int intel_pstate_msrs_not_valid(void)
874 {
875 /* Check that all the msr's we are using are valid. */
876 u64 aperf, mperf, tmp;
877
878 rdmsrl(MSR_IA32_APERF, aperf);
879 rdmsrl(MSR_IA32_MPERF, mperf);
880
881 if (!pstate_funcs.get_max() ||
882 !pstate_funcs.get_min() ||
883 !pstate_funcs.get_turbo())
884 return -ENODEV;
885
886 rdmsrl(MSR_IA32_APERF, tmp);
887 if (!(tmp - aperf))
888 return -ENODEV;
889
890 rdmsrl(MSR_IA32_MPERF, tmp);
891 if (!(tmp - mperf))
892 return -ENODEV;
893
894 return 0;
895 }
896
copy_pid_params(struct pstate_adjust_policy * policy)897 static void copy_pid_params(struct pstate_adjust_policy *policy)
898 {
899 pid_params.sample_rate_ms = policy->sample_rate_ms;
900 pid_params.p_gain_pct = policy->p_gain_pct;
901 pid_params.i_gain_pct = policy->i_gain_pct;
902 pid_params.d_gain_pct = policy->d_gain_pct;
903 pid_params.deadband = policy->deadband;
904 pid_params.setpoint = policy->setpoint;
905 }
906
copy_cpu_funcs(struct pstate_funcs * funcs)907 static void copy_cpu_funcs(struct pstate_funcs *funcs)
908 {
909 pstate_funcs.get_max = funcs->get_max;
910 pstate_funcs.get_min = funcs->get_min;
911 pstate_funcs.get_turbo = funcs->get_turbo;
912 pstate_funcs.get_scaling = funcs->get_scaling;
913 pstate_funcs.set = funcs->set;
914 pstate_funcs.get_vid = funcs->get_vid;
915 }
916
917 #if IS_ENABLED(CONFIG_ACPI)
918 #include <acpi/processor.h>
919
intel_pstate_no_acpi_pss(void)920 static bool intel_pstate_no_acpi_pss(void)
921 {
922 int i;
923
924 for_each_possible_cpu(i) {
925 acpi_status status;
926 union acpi_object *pss;
927 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
928 struct acpi_processor *pr = per_cpu(processors, i);
929
930 if (!pr)
931 continue;
932
933 status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
934 if (ACPI_FAILURE(status))
935 continue;
936
937 pss = buffer.pointer;
938 if (pss && pss->type == ACPI_TYPE_PACKAGE) {
939 kfree(pss);
940 return false;
941 }
942
943 kfree(pss);
944 }
945
946 return true;
947 }
948
949 struct hw_vendor_info {
950 u16 valid;
951 char oem_id[ACPI_OEM_ID_SIZE];
952 char oem_table_id[ACPI_OEM_TABLE_ID_SIZE];
953 };
954
955 /* Hardware vendor-specific info that has its own power management modes */
956 static struct hw_vendor_info vendor_info[] = {
957 {1, "HP ", "ProLiant"},
958 {0, "", ""},
959 };
960
intel_pstate_platform_pwr_mgmt_exists(void)961 static bool intel_pstate_platform_pwr_mgmt_exists(void)
962 {
963 struct acpi_table_header hdr;
964 struct hw_vendor_info *v_info;
965
966 if (acpi_disabled ||
967 ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr)))
968 return false;
969
970 for (v_info = vendor_info; v_info->valid; v_info++) {
971 if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) &&
972 !strncmp(hdr.oem_table_id, v_info->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
973 intel_pstate_no_acpi_pss())
974 return true;
975 }
976
977 return false;
978 }
979 #else /* CONFIG_ACPI not enabled */
intel_pstate_platform_pwr_mgmt_exists(void)980 static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
981 #endif /* CONFIG_ACPI */
982
intel_pstate_init(void)983 static int __init intel_pstate_init(void)
984 {
985 int cpu, rc = 0;
986 const struct x86_cpu_id *id;
987 struct cpu_defaults *cpu_info;
988
989 if (no_load)
990 return -ENODEV;
991
992 id = x86_match_cpu(intel_pstate_cpu_ids);
993 if (!id)
994 return -ENODEV;
995
996 /*
997 * The Intel pstate driver will be ignored if the platform
998 * firmware has its own power management modes.
999 */
1000 if (intel_pstate_platform_pwr_mgmt_exists())
1001 return -ENODEV;
1002
1003 cpu_info = (struct cpu_defaults *)id->driver_data;
1004
1005 copy_pid_params(&cpu_info->pid_policy);
1006 copy_cpu_funcs(&cpu_info->funcs);
1007
1008 if (intel_pstate_msrs_not_valid())
1009 return -ENODEV;
1010
1011 pr_info("Intel P-state driver initializing.\n");
1012
1013 all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
1014 if (!all_cpu_data)
1015 return -ENOMEM;
1016
1017 rc = cpufreq_register_driver(&intel_pstate_driver);
1018 if (rc)
1019 goto out;
1020
1021 intel_pstate_debug_expose_params();
1022 intel_pstate_sysfs_expose_params();
1023
1024 return rc;
1025 out:
1026 get_online_cpus();
1027 for_each_online_cpu(cpu) {
1028 if (all_cpu_data[cpu]) {
1029 del_timer_sync(&all_cpu_data[cpu]->timer);
1030 kfree(all_cpu_data[cpu]);
1031 }
1032 }
1033
1034 put_online_cpus();
1035 vfree(all_cpu_data);
1036 return -ENODEV;
1037 }
1038 device_initcall(intel_pstate_init);
1039
intel_pstate_setup(char * str)1040 static int __init intel_pstate_setup(char *str)
1041 {
1042 if (!str)
1043 return -EINVAL;
1044
1045 if (!strcmp(str, "disable"))
1046 no_load = 1;
1047 return 0;
1048 }
1049 early_param("intel_pstate", intel_pstate_setup);
1050
1051 MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
1052 MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
1053 MODULE_LICENSE("GPL");
1054