• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Performance events support for SH7750-style performance counters
3  *
4  *  Copyright (C) 2009  Paul Mundt
5  *
6  * This file is subject to the terms and conditions of the GNU General Public
7  * License.  See the file "COPYING" in the main directory of this archive
8  * for more details.
9  */
10 #include <linux/kernel.h>
11 #include <linux/init.h>
12 #include <linux/io.h>
13 #include <linux/irq.h>
14 #include <linux/perf_event.h>
15 #include <asm/processor.h>
16 
17 #define PM_CR_BASE	0xff000084	/* 16-bit */
18 #define PM_CTR_BASE	0xff100004	/* 32-bit */
19 
20 #define PMCR(n)		(PM_CR_BASE + ((n) * 0x04))
21 #define PMCTRH(n)	(PM_CTR_BASE + 0x00 + ((n) * 0x08))
22 #define PMCTRL(n)	(PM_CTR_BASE + 0x04 + ((n) * 0x08))
23 
24 #define PMCR_PMM_MASK	0x0000003f
25 
26 #define PMCR_CLKF	0x00000100
27 #define PMCR_PMCLR	0x00002000
28 #define PMCR_PMST	0x00004000
29 #define PMCR_PMEN	0x00008000
30 
31 static struct sh_pmu sh7750_pmu;
32 
33 /*
34  * There are a number of events supported by each counter (33 in total).
35  * Since we have 2 counters, each counter will take the event code as it
36  * corresponds to the PMCR PMM setting. Each counter can be configured
37  * independently.
38  *
39  *	Event Code	Description
40  *	----------	-----------
41  *
42  *	0x01		Operand read access
43  *	0x02		Operand write access
44  *	0x03		UTLB miss
45  *	0x04		Operand cache read miss
46  *	0x05		Operand cache write miss
47  *	0x06		Instruction fetch (w/ cache)
48  *	0x07		Instruction TLB miss
49  *	0x08		Instruction cache miss
50  *	0x09		All operand accesses
51  *	0x0a		All instruction accesses
52  *	0x0b		OC RAM operand access
53  *	0x0d		On-chip I/O space access
54  *	0x0e		Operand access (r/w)
55  *	0x0f		Operand cache miss (r/w)
56  *	0x10		Branch instruction
57  *	0x11		Branch taken
58  *	0x12		BSR/BSRF/JSR
59  *	0x13		Instruction execution
60  *	0x14		Instruction execution in parallel
61  *	0x15		FPU Instruction execution
62  *	0x16		Interrupt
63  *	0x17		NMI
64  *	0x18		trapa instruction execution
65  *	0x19		UBCA match
66  *	0x1a		UBCB match
67  *	0x21		Instruction cache fill
68  *	0x22		Operand cache fill
69  *	0x23		Elapsed time
70  *	0x24		Pipeline freeze by I-cache miss
71  *	0x25		Pipeline freeze by D-cache miss
72  *	0x27		Pipeline freeze by branch instruction
73  *	0x28		Pipeline freeze by CPU register
74  *	0x29		Pipeline freeze by FPU
75  */
76 
77 static const int sh7750_general_events[] = {
78 	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0023,
79 	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x000a,
80 	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0006,	/* I-cache */
81 	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0008,	/* I-cache */
82 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x0010,
83 	[PERF_COUNT_HW_BRANCH_MISSES]		= -1,
84 	[PERF_COUNT_HW_BUS_CYCLES]		= -1,
85 };
86 
87 #define C(x)	PERF_COUNT_HW_CACHE_##x
88 
89 static const int sh7750_cache_events
90 			[PERF_COUNT_HW_CACHE_MAX]
91 			[PERF_COUNT_HW_CACHE_OP_MAX]
92 			[PERF_COUNT_HW_CACHE_RESULT_MAX] =
93 {
94 	[ C(L1D) ] = {
95 		[ C(OP_READ) ] = {
96 			[ C(RESULT_ACCESS) ] = 0x0001,
97 			[ C(RESULT_MISS)   ] = 0x0004,
98 		},
99 		[ C(OP_WRITE) ] = {
100 			[ C(RESULT_ACCESS) ] = 0x0002,
101 			[ C(RESULT_MISS)   ] = 0x0005,
102 		},
103 		[ C(OP_PREFETCH) ] = {
104 			[ C(RESULT_ACCESS) ] = 0,
105 			[ C(RESULT_MISS)   ] = 0,
106 		},
107 	},
108 
109 	[ C(L1I) ] = {
110 		[ C(OP_READ) ] = {
111 			[ C(RESULT_ACCESS) ] = 0x0006,
112 			[ C(RESULT_MISS)   ] = 0x0008,
113 		},
114 		[ C(OP_WRITE) ] = {
115 			[ C(RESULT_ACCESS) ] = -1,
116 			[ C(RESULT_MISS)   ] = -1,
117 		},
118 		[ C(OP_PREFETCH) ] = {
119 			[ C(RESULT_ACCESS) ] = 0,
120 			[ C(RESULT_MISS)   ] = 0,
121 		},
122 	},
123 
124 	[ C(LL) ] = {
125 		[ C(OP_READ) ] = {
126 			[ C(RESULT_ACCESS) ] = 0,
127 			[ C(RESULT_MISS)   ] = 0,
128 		},
129 		[ C(OP_WRITE) ] = {
130 			[ C(RESULT_ACCESS) ] = 0,
131 			[ C(RESULT_MISS)   ] = 0,
132 		},
133 		[ C(OP_PREFETCH) ] = {
134 			[ C(RESULT_ACCESS) ] = 0,
135 			[ C(RESULT_MISS)   ] = 0,
136 		},
137 	},
138 
139 	[ C(DTLB) ] = {
140 		[ C(OP_READ) ] = {
141 			[ C(RESULT_ACCESS) ] = 0,
142 			[ C(RESULT_MISS)   ] = 0x0003,
143 		},
144 		[ C(OP_WRITE) ] = {
145 			[ C(RESULT_ACCESS) ] = 0,
146 			[ C(RESULT_MISS)   ] = 0,
147 		},
148 		[ C(OP_PREFETCH) ] = {
149 			[ C(RESULT_ACCESS) ] = 0,
150 			[ C(RESULT_MISS)   ] = 0,
151 		},
152 	},
153 
154 	[ C(ITLB) ] = {
155 		[ C(OP_READ) ] = {
156 			[ C(RESULT_ACCESS) ] = 0,
157 			[ C(RESULT_MISS)   ] = 0x0007,
158 		},
159 		[ C(OP_WRITE) ] = {
160 			[ C(RESULT_ACCESS) ] = -1,
161 			[ C(RESULT_MISS)   ] = -1,
162 		},
163 		[ C(OP_PREFETCH) ] = {
164 			[ C(RESULT_ACCESS) ] = -1,
165 			[ C(RESULT_MISS)   ] = -1,
166 		},
167 	},
168 
169 	[ C(BPU) ] = {
170 		[ C(OP_READ) ] = {
171 			[ C(RESULT_ACCESS) ] = -1,
172 			[ C(RESULT_MISS)   ] = -1,
173 		},
174 		[ C(OP_WRITE) ] = {
175 			[ C(RESULT_ACCESS) ] = -1,
176 			[ C(RESULT_MISS)   ] = -1,
177 		},
178 		[ C(OP_PREFETCH) ] = {
179 			[ C(RESULT_ACCESS) ] = -1,
180 			[ C(RESULT_MISS)   ] = -1,
181 		},
182 	},
183 
184 	[ C(NODE) ] = {
185 		[ C(OP_READ) ] = {
186 			[ C(RESULT_ACCESS) ] = -1,
187 			[ C(RESULT_MISS)   ] = -1,
188 		},
189 		[ C(OP_WRITE) ] = {
190 			[ C(RESULT_ACCESS) ] = -1,
191 			[ C(RESULT_MISS)   ] = -1,
192 		},
193 		[ C(OP_PREFETCH) ] = {
194 			[ C(RESULT_ACCESS) ] = -1,
195 			[ C(RESULT_MISS)   ] = -1,
196 		},
197 	},
198 };
199 
sh7750_event_map(int event)200 static int sh7750_event_map(int event)
201 {
202 	return sh7750_general_events[event];
203 }
204 
sh7750_pmu_read(int idx)205 static u64 sh7750_pmu_read(int idx)
206 {
207 	return (u64)((u64)(__raw_readl(PMCTRH(idx)) & 0xffff) << 32) |
208 			   __raw_readl(PMCTRL(idx));
209 }
210 
sh7750_pmu_disable(struct hw_perf_event * hwc,int idx)211 static void sh7750_pmu_disable(struct hw_perf_event *hwc, int idx)
212 {
213 	unsigned int tmp;
214 
215 	tmp = __raw_readw(PMCR(idx));
216 	tmp &= ~(PMCR_PMM_MASK | PMCR_PMEN);
217 	__raw_writew(tmp, PMCR(idx));
218 }
219 
sh7750_pmu_enable(struct hw_perf_event * hwc,int idx)220 static void sh7750_pmu_enable(struct hw_perf_event *hwc, int idx)
221 {
222 	__raw_writew(__raw_readw(PMCR(idx)) | PMCR_PMCLR, PMCR(idx));
223 	__raw_writew(hwc->config | PMCR_PMEN | PMCR_PMST, PMCR(idx));
224 }
225 
sh7750_pmu_disable_all(void)226 static void sh7750_pmu_disable_all(void)
227 {
228 	int i;
229 
230 	for (i = 0; i < sh7750_pmu.num_events; i++)
231 		__raw_writew(__raw_readw(PMCR(i)) & ~PMCR_PMEN, PMCR(i));
232 }
233 
sh7750_pmu_enable_all(void)234 static void sh7750_pmu_enable_all(void)
235 {
236 	int i;
237 
238 	for (i = 0; i < sh7750_pmu.num_events; i++)
239 		__raw_writew(__raw_readw(PMCR(i)) | PMCR_PMEN, PMCR(i));
240 }
241 
242 static struct sh_pmu sh7750_pmu = {
243 	.name		= "sh7750",
244 	.num_events	= 2,
245 	.event_map	= sh7750_event_map,
246 	.max_events	= ARRAY_SIZE(sh7750_general_events),
247 	.raw_event_mask	= PMCR_PMM_MASK,
248 	.cache_events	= &sh7750_cache_events,
249 	.read		= sh7750_pmu_read,
250 	.disable	= sh7750_pmu_disable,
251 	.enable		= sh7750_pmu_enable,
252 	.disable_all	= sh7750_pmu_disable_all,
253 	.enable_all	= sh7750_pmu_enable_all,
254 };
255 
sh7750_pmu_init(void)256 static int __init sh7750_pmu_init(void)
257 {
258 	/*
259 	 * Make sure this CPU actually has perf counters.
260 	 */
261 	if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) {
262 		pr_notice("HW perf events unsupported, software events only.\n");
263 		return -ENODEV;
264 	}
265 
266 	return register_sh_pmu(&sh7750_pmu);
267 }
268 early_initcall(sh7750_pmu_init);
269