• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *	Routines to indentify caches on Intel CPU.
3  *
4  *	Changes:
5  *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
6  *		Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
8  */
9 
10 #include <linux/init.h>
11 #include <linux/slab.h>
12 #include <linux/device.h>
13 #include <linux/compiler.h>
14 #include <linux/cpu.h>
15 #include <linux/sched.h>
16 #include <linux/pci.h>
17 
18 #include <asm/processor.h>
19 #include <asm/smp.h>
20 
21 #define LVL_1_INST	1
22 #define LVL_1_DATA	2
23 #define LVL_2		3
24 #define LVL_3		4
25 #define LVL_TRACE	5
26 
27 struct _cache_table
28 {
29 	unsigned char descriptor;
30 	char cache_type;
31 	short size;
32 };
33 
34 /* all the cache descriptor types we care about (no TLB or trace cache entries) */
35 static struct _cache_table cache_table[] __cpuinitdata =
36 {
37 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
38 	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
39 	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
40 	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
41 	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
42 	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
43 	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
44 	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
45 	{ 0x23, LVL_3,      1024 },	/* 8-way set assoc, sectored cache, 64 byte line size */
46 	{ 0x25, LVL_3,      2048 },	/* 8-way set assoc, sectored cache, 64 byte line size */
47 	{ 0x29, LVL_3,      4096 },	/* 8-way set assoc, sectored cache, 64 byte line size */
48 	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
49 	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
50 	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
51 	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
52 	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
53 	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
54 	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
55 	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
56 	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
57 	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
58 	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
59 	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
60 	{ 0x44, LVL_2,      1024 },	/* 4-way set assoc, 32 byte line size */
61 	{ 0x45, LVL_2,      2048 },	/* 4-way set assoc, 32 byte line size */
62 	{ 0x46, LVL_3,      4096 },	/* 4-way set assoc, 64 byte line size */
63 	{ 0x47, LVL_3,      8192 },	/* 8-way set assoc, 64 byte line size */
64 	{ 0x49, LVL_3,      4096 },	/* 16-way set assoc, 64 byte line size */
65 	{ 0x4a, LVL_3,      6144 },	/* 12-way set assoc, 64 byte line size */
66 	{ 0x4b, LVL_3,      8192 },	/* 16-way set assoc, 64 byte line size */
67 	{ 0x4c, LVL_3,     12288 },	/* 12-way set assoc, 64 byte line size */
68 	{ 0x4d, LVL_3,     16384 },	/* 16-way set assoc, 64 byte line size */
69 	{ 0x4e, LVL_2,      6144 },	/* 24-way set assoc, 64 byte line size */
70 	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
71 	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
72 	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
73 	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
74 	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
75 	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
76 	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
77 	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
78 	{ 0x78, LVL_2,    1024 },	/* 4-way set assoc, 64 byte line size */
79 	{ 0x79, LVL_2,     128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
80 	{ 0x7a, LVL_2,     256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
81 	{ 0x7b, LVL_2,     512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
82 	{ 0x7c, LVL_2,    1024 },	/* 8-way set assoc, sectored cache, 64 byte line size */
83 	{ 0x7d, LVL_2,    2048 },	/* 8-way set assoc, 64 byte line size */
84 	{ 0x7f, LVL_2,     512 },	/* 2-way set assoc, 64 byte line size */
85 	{ 0x82, LVL_2,     256 },	/* 8-way set assoc, 32 byte line size */
86 	{ 0x83, LVL_2,     512 },	/* 8-way set assoc, 32 byte line size */
87 	{ 0x84, LVL_2,    1024 },	/* 8-way set assoc, 32 byte line size */
88 	{ 0x85, LVL_2,    2048 },	/* 8-way set assoc, 32 byte line size */
89 	{ 0x86, LVL_2,     512 },	/* 4-way set assoc, 64 byte line size */
90 	{ 0x87, LVL_2,    1024 },	/* 8-way set assoc, 64 byte line size */
91 	{ 0xd0, LVL_3,     512 },	/* 4-way set assoc, 64 byte line size */
92 	{ 0xd1, LVL_3,    1024 },	/* 4-way set assoc, 64 byte line size */
93 	{ 0xd2, LVL_3,    2048 },	/* 4-way set assoc, 64 byte line size */
94 	{ 0xd6, LVL_3,    1024 },	/* 8-way set assoc, 64 byte line size */
95 	{ 0xd7, LVL_3,    2038 },	/* 8-way set assoc, 64 byte line size */
96 	{ 0xd8, LVL_3,    4096 },	/* 12-way set assoc, 64 byte line size */
97 	{ 0xdc, LVL_3,    2048 },	/* 12-way set assoc, 64 byte line size */
98 	{ 0xdd, LVL_3,    4096 },	/* 12-way set assoc, 64 byte line size */
99 	{ 0xde, LVL_3,    8192 },	/* 12-way set assoc, 64 byte line size */
100 	{ 0xe2, LVL_3,    2048 },	/* 16-way set assoc, 64 byte line size */
101 	{ 0xe3, LVL_3,    4096 },	/* 16-way set assoc, 64 byte line size */
102 	{ 0xe4, LVL_3,    8192 },	/* 16-way set assoc, 64 byte line size */
103 	{ 0x00, 0, 0}
104 };
105 
106 
107 enum _cache_type
108 {
109 	CACHE_TYPE_NULL	= 0,
110 	CACHE_TYPE_DATA = 1,
111 	CACHE_TYPE_INST = 2,
112 	CACHE_TYPE_UNIFIED = 3
113 };
114 
115 union _cpuid4_leaf_eax {
116 	struct {
117 		enum _cache_type	type:5;
118 		unsigned int		level:3;
119 		unsigned int		is_self_initializing:1;
120 		unsigned int		is_fully_associative:1;
121 		unsigned int		reserved:4;
122 		unsigned int		num_threads_sharing:12;
123 		unsigned int		num_cores_on_die:6;
124 	} split;
125 	u32 full;
126 };
127 
128 union _cpuid4_leaf_ebx {
129 	struct {
130 		unsigned int		coherency_line_size:12;
131 		unsigned int		physical_line_partition:10;
132 		unsigned int		ways_of_associativity:10;
133 	} split;
134 	u32 full;
135 };
136 
137 union _cpuid4_leaf_ecx {
138 	struct {
139 		unsigned int		number_of_sets:32;
140 	} split;
141 	u32 full;
142 };
143 
144 struct _cpuid4_info {
145 	union _cpuid4_leaf_eax eax;
146 	union _cpuid4_leaf_ebx ebx;
147 	union _cpuid4_leaf_ecx ecx;
148 	unsigned long size;
149 	unsigned long can_disable;
150 	cpumask_t shared_cpu_map;	/* future?: only cpus/node is needed */
151 };
152 
153 #ifdef CONFIG_PCI
154 static struct pci_device_id k8_nb_id[] = {
155 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
156 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
157 	{}
158 };
159 #endif
160 
161 unsigned short			num_cache_leaves;
162 
163 /* AMD doesn't have CPUID4. Emulate it here to report the same
164    information to the user.  This makes some assumptions about the machine:
165    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
166 
167    In theory the TLBs could be reported as fake type (they are in "dummy").
168    Maybe later */
169 union l1_cache {
170 	struct {
171 		unsigned line_size : 8;
172 		unsigned lines_per_tag : 8;
173 		unsigned assoc : 8;
174 		unsigned size_in_kb : 8;
175 	};
176 	unsigned val;
177 };
178 
179 union l2_cache {
180 	struct {
181 		unsigned line_size : 8;
182 		unsigned lines_per_tag : 4;
183 		unsigned assoc : 4;
184 		unsigned size_in_kb : 16;
185 	};
186 	unsigned val;
187 };
188 
189 union l3_cache {
190 	struct {
191 		unsigned line_size : 8;
192 		unsigned lines_per_tag : 4;
193 		unsigned assoc : 4;
194 		unsigned res : 2;
195 		unsigned size_encoded : 14;
196 	};
197 	unsigned val;
198 };
199 
200 static unsigned short assocs[] __cpuinitdata = {
201 	[1] = 1, [2] = 2, [4] = 4, [6] = 8,
202 	[8] = 16, [0xa] = 32, [0xb] = 48,
203 	[0xc] = 64,
204 	[0xf] = 0xffff // ??
205 };
206 
207 static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
208 static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
209 
210 static void __cpuinit
amd_cpuid4(int leaf,union _cpuid4_leaf_eax * eax,union _cpuid4_leaf_ebx * ebx,union _cpuid4_leaf_ecx * ecx)211 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
212 		     union _cpuid4_leaf_ebx *ebx,
213 		     union _cpuid4_leaf_ecx *ecx)
214 {
215 	unsigned dummy;
216 	unsigned line_size, lines_per_tag, assoc, size_in_kb;
217 	union l1_cache l1i, l1d;
218 	union l2_cache l2;
219 	union l3_cache l3;
220 	union l1_cache *l1 = &l1d;
221 
222 	eax->full = 0;
223 	ebx->full = 0;
224 	ecx->full = 0;
225 
226 	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
227 	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
228 
229 	switch (leaf) {
230 	case 1:
231 		l1 = &l1i;
232 	case 0:
233 		if (!l1->val)
234 			return;
235 		assoc = l1->assoc;
236 		line_size = l1->line_size;
237 		lines_per_tag = l1->lines_per_tag;
238 		size_in_kb = l1->size_in_kb;
239 		break;
240 	case 2:
241 		if (!l2.val)
242 			return;
243 		assoc = l2.assoc;
244 		line_size = l2.line_size;
245 		lines_per_tag = l2.lines_per_tag;
246 		/* cpu_data has errata corrections for K7 applied */
247 		size_in_kb = current_cpu_data.x86_cache_size;
248 		break;
249 	case 3:
250 		if (!l3.val)
251 			return;
252 		assoc = l3.assoc;
253 		line_size = l3.line_size;
254 		lines_per_tag = l3.lines_per_tag;
255 		size_in_kb = l3.size_encoded * 512;
256 		break;
257 	default:
258 		return;
259 	}
260 
261 	eax->split.is_self_initializing = 1;
262 	eax->split.type = types[leaf];
263 	eax->split.level = levels[leaf];
264 	if (leaf == 3)
265 		eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1;
266 	else
267 		eax->split.num_threads_sharing = 0;
268 	eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
269 
270 
271 	if (assoc == 0xf)
272 		eax->split.is_fully_associative = 1;
273 	ebx->split.coherency_line_size = line_size - 1;
274 	ebx->split.ways_of_associativity = assocs[assoc] - 1;
275 	ebx->split.physical_line_partition = lines_per_tag - 1;
276 	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
277 		(ebx->split.ways_of_associativity + 1) - 1;
278 }
279 
280 static void __cpuinit
amd_check_l3_disable(int index,struct _cpuid4_info * this_leaf)281 amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
282 {
283 	if (index < 3)
284 		return;
285 	this_leaf->can_disable = 1;
286 }
287 
288 static int
cpuid4_cache_lookup(int index,struct _cpuid4_info * this_leaf)289 __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
290 {
291 	union _cpuid4_leaf_eax 	eax;
292 	union _cpuid4_leaf_ebx 	ebx;
293 	union _cpuid4_leaf_ecx 	ecx;
294 	unsigned		edx;
295 
296 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
297 		amd_cpuid4(index, &eax, &ebx, &ecx);
298 		if (boot_cpu_data.x86 >= 0x10)
299 			amd_check_l3_disable(index, this_leaf);
300 	} else {
301 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
302 	}
303 
304 	if (eax.split.type == CACHE_TYPE_NULL)
305 		return -EIO; /* better error ? */
306 
307 	this_leaf->eax = eax;
308 	this_leaf->ebx = ebx;
309 	this_leaf->ecx = ecx;
310 	this_leaf->size = (ecx.split.number_of_sets          + 1) *
311 			  (ebx.split.coherency_line_size     + 1) *
312 			  (ebx.split.physical_line_partition + 1) *
313 			  (ebx.split.ways_of_associativity   + 1);
314 	return 0;
315 }
316 
find_num_cache_leaves(void)317 static int __cpuinit find_num_cache_leaves(void)
318 {
319 	unsigned int		eax, ebx, ecx, edx;
320 	union _cpuid4_leaf_eax	cache_eax;
321 	int 			i = -1;
322 
323 	do {
324 		++i;
325 		/* Do cpuid(4) loop to find out num_cache_leaves */
326 		cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
327 		cache_eax.full = eax;
328 	} while (cache_eax.split.type != CACHE_TYPE_NULL);
329 	return i;
330 }
331 
init_intel_cacheinfo(struct cpuinfo_x86 * c)332 unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
333 {
334 	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
335 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
336 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
337 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
338 #ifdef CONFIG_X86_HT
339 	unsigned int cpu = c->cpu_index;
340 #endif
341 
342 	if (c->cpuid_level > 3) {
343 		static int is_initialized;
344 
345 		if (is_initialized == 0) {
346 			/* Init num_cache_leaves from boot CPU */
347 			num_cache_leaves = find_num_cache_leaves();
348 			is_initialized++;
349 		}
350 
351 		/*
352 		 * Whenever possible use cpuid(4), deterministic cache
353 		 * parameters cpuid leaf to find the cache details
354 		 */
355 		for (i = 0; i < num_cache_leaves; i++) {
356 			struct _cpuid4_info this_leaf;
357 
358 			int retval;
359 
360 			retval = cpuid4_cache_lookup(i, &this_leaf);
361 			if (retval >= 0) {
362 				switch(this_leaf.eax.split.level) {
363 				    case 1:
364 					if (this_leaf.eax.split.type ==
365 							CACHE_TYPE_DATA)
366 						new_l1d = this_leaf.size/1024;
367 					else if (this_leaf.eax.split.type ==
368 							CACHE_TYPE_INST)
369 						new_l1i = this_leaf.size/1024;
370 					break;
371 				    case 2:
372 					new_l2 = this_leaf.size/1024;
373 					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
374 					index_msb = get_count_order(num_threads_sharing);
375 					l2_id = c->apicid >> index_msb;
376 					break;
377 				    case 3:
378 					new_l3 = this_leaf.size/1024;
379 					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
380 					index_msb = get_count_order(num_threads_sharing);
381 					l3_id = c->apicid >> index_msb;
382 					break;
383 				    default:
384 					break;
385 				}
386 			}
387 		}
388 	}
389 	/*
390 	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
391 	 * trace cache
392 	 */
393 	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
394 		/* supports eax=2  call */
395 		int j, n;
396 		unsigned int regs[4];
397 		unsigned char *dp = (unsigned char *)regs;
398 		int only_trace = 0;
399 
400 		if (num_cache_leaves != 0 && c->x86 == 15)
401 			only_trace = 1;
402 
403 		/* Number of times to iterate */
404 		n = cpuid_eax(2) & 0xFF;
405 
406 		for ( i = 0 ; i < n ; i++ ) {
407 			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
408 
409 			/* If bit 31 is set, this is an unknown format */
410 			for ( j = 0 ; j < 3 ; j++ ) {
411 				if (regs[j] & (1 << 31)) regs[j] = 0;
412 			}
413 
414 			/* Byte 0 is level count, not a descriptor */
415 			for ( j = 1 ; j < 16 ; j++ ) {
416 				unsigned char des = dp[j];
417 				unsigned char k = 0;
418 
419 				/* look up this descriptor in the table */
420 				while (cache_table[k].descriptor != 0)
421 				{
422 					if (cache_table[k].descriptor == des) {
423 						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
424 							break;
425 						switch (cache_table[k].cache_type) {
426 						case LVL_1_INST:
427 							l1i += cache_table[k].size;
428 							break;
429 						case LVL_1_DATA:
430 							l1d += cache_table[k].size;
431 							break;
432 						case LVL_2:
433 							l2 += cache_table[k].size;
434 							break;
435 						case LVL_3:
436 							l3 += cache_table[k].size;
437 							break;
438 						case LVL_TRACE:
439 							trace += cache_table[k].size;
440 							break;
441 						}
442 
443 						break;
444 					}
445 
446 					k++;
447 				}
448 			}
449 		}
450 	}
451 
452 	if (new_l1d)
453 		l1d = new_l1d;
454 
455 	if (new_l1i)
456 		l1i = new_l1i;
457 
458 	if (new_l2) {
459 		l2 = new_l2;
460 #ifdef CONFIG_X86_HT
461 		per_cpu(cpu_llc_id, cpu) = l2_id;
462 #endif
463 	}
464 
465 	if (new_l3) {
466 		l3 = new_l3;
467 #ifdef CONFIG_X86_HT
468 		per_cpu(cpu_llc_id, cpu) = l3_id;
469 #endif
470 	}
471 
472 	if (trace)
473 		printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
474 	else if ( l1i )
475 		printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
476 
477 	if (l1d)
478 		printk(", L1 D cache: %dK\n", l1d);
479 	else
480 		printk("\n");
481 
482 	if (l2)
483 		printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
484 
485 	if (l3)
486 		printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
487 
488 	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
489 
490 	return l2;
491 }
492 
493 /* pointer to _cpuid4_info array (for each cache leaf) */
494 static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
495 #define CPUID4_INFO_IDX(x, y)	(&((per_cpu(cpuid4_info, x))[y]))
496 
497 #ifdef CONFIG_SMP
cache_shared_cpu_map_setup(unsigned int cpu,int index)498 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
499 {
500 	struct _cpuid4_info	*this_leaf, *sibling_leaf;
501 	unsigned long num_threads_sharing;
502 	int index_msb, i;
503 	struct cpuinfo_x86 *c = &cpu_data(cpu);
504 
505 	this_leaf = CPUID4_INFO_IDX(cpu, index);
506 	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
507 
508 	if (num_threads_sharing == 1)
509 		cpu_set(cpu, this_leaf->shared_cpu_map);
510 	else {
511 		index_msb = get_count_order(num_threads_sharing);
512 
513 		for_each_online_cpu(i) {
514 			if (cpu_data(i).apicid >> index_msb ==
515 			    c->apicid >> index_msb) {
516 				cpu_set(i, this_leaf->shared_cpu_map);
517 				if (i != cpu && per_cpu(cpuid4_info, i))  {
518 					sibling_leaf = CPUID4_INFO_IDX(i, index);
519 					cpu_set(cpu, sibling_leaf->shared_cpu_map);
520 				}
521 			}
522 		}
523 	}
524 }
cache_remove_shared_cpu_map(unsigned int cpu,int index)525 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
526 {
527 	struct _cpuid4_info	*this_leaf, *sibling_leaf;
528 	int sibling;
529 
530 	this_leaf = CPUID4_INFO_IDX(cpu, index);
531 	for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
532 		sibling_leaf = CPUID4_INFO_IDX(sibling, index);
533 		cpu_clear(cpu, sibling_leaf->shared_cpu_map);
534 	}
535 }
536 #else
cache_shared_cpu_map_setup(unsigned int cpu,int index)537 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
cache_remove_shared_cpu_map(unsigned int cpu,int index)538 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
539 #endif
540 
free_cache_attributes(unsigned int cpu)541 static void __cpuinit free_cache_attributes(unsigned int cpu)
542 {
543 	int i;
544 
545 	for (i = 0; i < num_cache_leaves; i++)
546 		cache_remove_shared_cpu_map(cpu, i);
547 
548 	kfree(per_cpu(cpuid4_info, cpu));
549 	per_cpu(cpuid4_info, cpu) = NULL;
550 }
551 
get_cpu_leaves(void * _retval)552 static void __cpuinit get_cpu_leaves(void *_retval)
553 {
554 	int j, *retval = _retval, cpu = smp_processor_id();
555 
556 	/* Do cpuid and store the results */
557 	for (j = 0; j < num_cache_leaves; j++) {
558 		struct _cpuid4_info *this_leaf;
559 		this_leaf = CPUID4_INFO_IDX(cpu, j);
560 		*retval = cpuid4_cache_lookup(j, this_leaf);
561 		if (unlikely(*retval < 0)) {
562 			int i;
563 
564 			for (i = 0; i < j; i++)
565 				cache_remove_shared_cpu_map(cpu, i);
566 			break;
567 		}
568 		cache_shared_cpu_map_setup(cpu, j);
569 	}
570 }
571 
detect_cache_attributes(unsigned int cpu)572 static int __cpuinit detect_cache_attributes(unsigned int cpu)
573 {
574 	int			retval;
575 
576 	if (num_cache_leaves == 0)
577 		return -ENOENT;
578 
579 	per_cpu(cpuid4_info, cpu) = kzalloc(
580 	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
581 	if (per_cpu(cpuid4_info, cpu) == NULL)
582 		return -ENOMEM;
583 
584 	smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
585 	if (retval) {
586 		kfree(per_cpu(cpuid4_info, cpu));
587 		per_cpu(cpuid4_info, cpu) = NULL;
588 	}
589 
590 	return retval;
591 }
592 
593 #ifdef CONFIG_SYSFS
594 
595 #include <linux/kobject.h>
596 #include <linux/sysfs.h>
597 
598 extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
599 
600 /* pointer to kobject for cpuX/cache */
601 static DEFINE_PER_CPU(struct kobject *, cache_kobject);
602 
603 struct _index_kobject {
604 	struct kobject kobj;
605 	unsigned int cpu;
606 	unsigned short index;
607 };
608 
609 /* pointer to array of kobjects for cpuX/cache/indexY */
610 static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
611 #define INDEX_KOBJECT_PTR(x, y)		(&((per_cpu(index_kobject, x))[y]))
612 
613 #define show_one_plus(file_name, object, val)				\
614 static ssize_t show_##file_name						\
615 			(struct _cpuid4_info *this_leaf, char *buf)	\
616 {									\
617 	return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
618 }
619 
620 show_one_plus(level, eax.split.level, 0);
621 show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
622 show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
623 show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
624 show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
625 
show_size(struct _cpuid4_info * this_leaf,char * buf)626 static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
627 {
628 	return sprintf (buf, "%luK\n", this_leaf->size / 1024);
629 }
630 
show_shared_cpu_map_func(struct _cpuid4_info * this_leaf,int type,char * buf)631 static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
632 					int type, char *buf)
633 {
634 	ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
635 	int n = 0;
636 
637 	if (len > 1) {
638 		cpumask_t *mask = &this_leaf->shared_cpu_map;
639 
640 		n = type?
641 			cpulist_scnprintf(buf, len-2, mask) :
642 			cpumask_scnprintf(buf, len-2, mask);
643 		buf[n++] = '\n';
644 		buf[n] = '\0';
645 	}
646 	return n;
647 }
648 
show_shared_cpu_map(struct _cpuid4_info * leaf,char * buf)649 static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf)
650 {
651 	return show_shared_cpu_map_func(leaf, 0, buf);
652 }
653 
show_shared_cpu_list(struct _cpuid4_info * leaf,char * buf)654 static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
655 {
656 	return show_shared_cpu_map_func(leaf, 1, buf);
657 }
658 
show_type(struct _cpuid4_info * this_leaf,char * buf)659 static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
660 {
661 	switch (this_leaf->eax.split.type) {
662 	case CACHE_TYPE_DATA:
663 		return sprintf(buf, "Data\n");
664 	case CACHE_TYPE_INST:
665 		return sprintf(buf, "Instruction\n");
666 	case CACHE_TYPE_UNIFIED:
667 		return sprintf(buf, "Unified\n");
668 	default:
669 		return sprintf(buf, "Unknown\n");
670 	}
671 }
672 
673 #define to_object(k)	container_of(k, struct _index_kobject, kobj)
674 #define to_attr(a)	container_of(a, struct _cache_attr, attr)
675 
676 #ifdef CONFIG_PCI
get_k8_northbridge(int node)677 static struct pci_dev *get_k8_northbridge(int node)
678 {
679 	struct pci_dev *dev = NULL;
680 	int i;
681 
682 	for (i = 0; i <= node; i++) {
683 		do {
684 			dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
685 			if (!dev)
686 				break;
687 		} while (!pci_match_id(&k8_nb_id[0], dev));
688 		if (!dev)
689 			break;
690 	}
691 	return dev;
692 }
693 #else
get_k8_northbridge(int node)694 static struct pci_dev *get_k8_northbridge(int node)
695 {
696 	return NULL;
697 }
698 #endif
699 
show_cache_disable(struct _cpuid4_info * this_leaf,char * buf)700 static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
701 {
702 	int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
703 	struct pci_dev *dev = NULL;
704 	ssize_t ret = 0;
705 	int i;
706 
707 	if (!this_leaf->can_disable)
708 		return sprintf(buf, "Feature not enabled\n");
709 
710 	dev = get_k8_northbridge(node);
711 	if (!dev) {
712 		printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
713 		return -EINVAL;
714 	}
715 
716 	for (i = 0; i < 2; i++) {
717 		unsigned int reg;
718 
719 		pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
720 
721 		ret += sprintf(buf, "%sEntry: %d\n", buf, i);
722 		ret += sprintf(buf, "%sReads:  %s\tNew Entries: %s\n",
723 			buf,
724 			reg & 0x80000000 ? "Disabled" : "Allowed",
725 			reg & 0x40000000 ? "Disabled" : "Allowed");
726 		ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
727 			buf, (reg & 0x30000) >> 16, reg & 0xfff);
728 	}
729 	return ret;
730 }
731 
732 static ssize_t
store_cache_disable(struct _cpuid4_info * this_leaf,const char * buf,size_t count)733 store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
734 		    size_t count)
735 {
736 	int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
737 	struct pci_dev *dev = NULL;
738 	unsigned int ret, index, val;
739 
740 	if (!this_leaf->can_disable)
741 		return 0;
742 
743 	if (strlen(buf) > 15)
744 		return -EINVAL;
745 
746 	ret = sscanf(buf, "%x %x", &index, &val);
747 	if (ret != 2)
748 		return -EINVAL;
749 	if (index > 1)
750 		return -EINVAL;
751 
752 	val |= 0xc0000000;
753 	dev = get_k8_northbridge(node);
754 	if (!dev) {
755 		printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
756 		return -EINVAL;
757 	}
758 
759 	pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
760 	wbinvd();
761 	pci_write_config_dword(dev, 0x1BC + index * 4, val);
762 
763 	return 1;
764 }
765 
766 struct _cache_attr {
767 	struct attribute attr;
768 	ssize_t (*show)(struct _cpuid4_info *, char *);
769 	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
770 };
771 
772 #define define_one_ro(_name) \
773 static struct _cache_attr _name = \
774 	__ATTR(_name, 0444, show_##_name, NULL)
775 
776 define_one_ro(level);
777 define_one_ro(type);
778 define_one_ro(coherency_line_size);
779 define_one_ro(physical_line_partition);
780 define_one_ro(ways_of_associativity);
781 define_one_ro(number_of_sets);
782 define_one_ro(size);
783 define_one_ro(shared_cpu_map);
784 define_one_ro(shared_cpu_list);
785 
786 static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);
787 
788 static struct attribute * default_attrs[] = {
789 	&type.attr,
790 	&level.attr,
791 	&coherency_line_size.attr,
792 	&physical_line_partition.attr,
793 	&ways_of_associativity.attr,
794 	&number_of_sets.attr,
795 	&size.attr,
796 	&shared_cpu_map.attr,
797 	&shared_cpu_list.attr,
798 	&cache_disable.attr,
799 	NULL
800 };
801 
show(struct kobject * kobj,struct attribute * attr,char * buf)802 static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
803 {
804 	struct _cache_attr *fattr = to_attr(attr);
805 	struct _index_kobject *this_leaf = to_object(kobj);
806 	ssize_t ret;
807 
808 	ret = fattr->show ?
809 		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
810 			buf) :
811 		0;
812 	return ret;
813 }
814 
store(struct kobject * kobj,struct attribute * attr,const char * buf,size_t count)815 static ssize_t store(struct kobject * kobj, struct attribute * attr,
816 		     const char * buf, size_t count)
817 {
818 	struct _cache_attr *fattr = to_attr(attr);
819 	struct _index_kobject *this_leaf = to_object(kobj);
820 	ssize_t ret;
821 
822 	ret = fattr->store ?
823 		fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
824 			buf, count) :
825 		0;
826 	return ret;
827 }
828 
829 static struct sysfs_ops sysfs_ops = {
830 	.show   = show,
831 	.store  = store,
832 };
833 
834 static struct kobj_type ktype_cache = {
835 	.sysfs_ops	= &sysfs_ops,
836 	.default_attrs	= default_attrs,
837 };
838 
839 static struct kobj_type ktype_percpu_entry = {
840 	.sysfs_ops	= &sysfs_ops,
841 };
842 
cpuid4_cache_sysfs_exit(unsigned int cpu)843 static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
844 {
845 	kfree(per_cpu(cache_kobject, cpu));
846 	kfree(per_cpu(index_kobject, cpu));
847 	per_cpu(cache_kobject, cpu) = NULL;
848 	per_cpu(index_kobject, cpu) = NULL;
849 	free_cache_attributes(cpu);
850 }
851 
cpuid4_cache_sysfs_init(unsigned int cpu)852 static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
853 {
854 	int err;
855 
856 	if (num_cache_leaves == 0)
857 		return -ENOENT;
858 
859 	err = detect_cache_attributes(cpu);
860 	if (err)
861 		return err;
862 
863 	/* Allocate all required memory */
864 	per_cpu(cache_kobject, cpu) =
865 		kzalloc(sizeof(struct kobject), GFP_KERNEL);
866 	if (unlikely(per_cpu(cache_kobject, cpu) == NULL))
867 		goto err_out;
868 
869 	per_cpu(index_kobject, cpu) = kzalloc(
870 	    sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
871 	if (unlikely(per_cpu(index_kobject, cpu) == NULL))
872 		goto err_out;
873 
874 	return 0;
875 
876 err_out:
877 	cpuid4_cache_sysfs_exit(cpu);
878 	return -ENOMEM;
879 }
880 
881 static cpumask_t cache_dev_map = CPU_MASK_NONE;
882 
883 /* Add/Remove cache interface for CPU device */
cache_add_dev(struct sys_device * sys_dev)884 static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
885 {
886 	unsigned int cpu = sys_dev->id;
887 	unsigned long i, j;
888 	struct _index_kobject *this_object;
889 	int retval;
890 
891 	retval = cpuid4_cache_sysfs_init(cpu);
892 	if (unlikely(retval < 0))
893 		return retval;
894 
895 	retval = kobject_init_and_add(per_cpu(cache_kobject, cpu),
896 				      &ktype_percpu_entry,
897 				      &sys_dev->kobj, "%s", "cache");
898 	if (retval < 0) {
899 		cpuid4_cache_sysfs_exit(cpu);
900 		return retval;
901 	}
902 
903 	for (i = 0; i < num_cache_leaves; i++) {
904 		this_object = INDEX_KOBJECT_PTR(cpu,i);
905 		this_object->cpu = cpu;
906 		this_object->index = i;
907 		retval = kobject_init_and_add(&(this_object->kobj),
908 					      &ktype_cache,
909 					      per_cpu(cache_kobject, cpu),
910 					      "index%1lu", i);
911 		if (unlikely(retval)) {
912 			for (j = 0; j < i; j++) {
913 				kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj));
914 			}
915 			kobject_put(per_cpu(cache_kobject, cpu));
916 			cpuid4_cache_sysfs_exit(cpu);
917 			return retval;
918 		}
919 		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
920 	}
921 	cpu_set(cpu, cache_dev_map);
922 
923 	kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
924 	return 0;
925 }
926 
cache_remove_dev(struct sys_device * sys_dev)927 static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
928 {
929 	unsigned int cpu = sys_dev->id;
930 	unsigned long i;
931 
932 	if (per_cpu(cpuid4_info, cpu) == NULL)
933 		return;
934 	if (!cpu_isset(cpu, cache_dev_map))
935 		return;
936 	cpu_clear(cpu, cache_dev_map);
937 
938 	for (i = 0; i < num_cache_leaves; i++)
939 		kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
940 	kobject_put(per_cpu(cache_kobject, cpu));
941 	cpuid4_cache_sysfs_exit(cpu);
942 }
943 
cacheinfo_cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)944 static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
945 					unsigned long action, void *hcpu)
946 {
947 	unsigned int cpu = (unsigned long)hcpu;
948 	struct sys_device *sys_dev;
949 
950 	sys_dev = get_cpu_sysdev(cpu);
951 	switch (action) {
952 	case CPU_ONLINE:
953 	case CPU_ONLINE_FROZEN:
954 		cache_add_dev(sys_dev);
955 		break;
956 	case CPU_DEAD:
957 	case CPU_DEAD_FROZEN:
958 		cache_remove_dev(sys_dev);
959 		break;
960 	}
961 	return NOTIFY_OK;
962 }
963 
964 static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier =
965 {
966 	.notifier_call = cacheinfo_cpu_callback,
967 };
968 
cache_sysfs_init(void)969 static int __cpuinit cache_sysfs_init(void)
970 {
971 	int i;
972 
973 	if (num_cache_leaves == 0)
974 		return 0;
975 
976 	for_each_online_cpu(i) {
977 		int err;
978 		struct sys_device *sys_dev = get_cpu_sysdev(i);
979 
980 		err = cache_add_dev(sys_dev);
981 		if (err)
982 			return err;
983 	}
984 	register_hotcpu_notifier(&cacheinfo_cpu_notifier);
985 	return 0;
986 }
987 
988 device_initcall(cache_sysfs_init);
989 
990 #endif
991