• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Meta performance counter support.
3  *  Copyright (C) 2012 Imagination Technologies Ltd
4  *
5  * This code is based on the sh pmu code:
6  *  Copyright (C) 2009 Paul Mundt
7  *
8  * and on the arm pmu code:
9  *  Copyright (C) 2009 picoChip Designs, Ltd., James Iles
10  *  Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
11  *
12  * This file is subject to the terms and conditions of the GNU General Public
13  * License.  See the file "COPYING" in the main directory of this archive
14  * for more details.
15  */
16 
17 #include <linux/atomic.h>
18 #include <linux/export.h>
19 #include <linux/init.h>
20 #include <linux/irqchip/metag.h>
21 #include <linux/perf_event.h>
22 #include <linux/slab.h>
23 
24 #include <asm/core_reg.h>
25 #include <asm/io.h>
26 #include <asm/irq.h>
27 #include <asm/processor.h>
28 
29 #include "perf_event.h"
30 
31 static int _hw_perf_event_init(struct perf_event *);
32 static void _hw_perf_event_destroy(struct perf_event *);
33 
34 /* Determines which core type we are */
35 static struct metag_pmu *metag_pmu __read_mostly;
36 
37 /* Processor specific data */
38 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
39 
40 /* PMU admin */
perf_pmu_name(void)41 const char *perf_pmu_name(void)
42 {
43 	if (!metag_pmu)
44 		return NULL;
45 
46 	return metag_pmu->name;
47 }
48 EXPORT_SYMBOL_GPL(perf_pmu_name);
49 
perf_num_counters(void)50 int perf_num_counters(void)
51 {
52 	if (metag_pmu)
53 		return metag_pmu->max_events;
54 
55 	return 0;
56 }
57 EXPORT_SYMBOL_GPL(perf_num_counters);
58 
metag_pmu_initialised(void)59 static inline int metag_pmu_initialised(void)
60 {
61 	return !!metag_pmu;
62 }
63 
release_pmu_hardware(void)64 static void release_pmu_hardware(void)
65 {
66 	int irq;
67 	unsigned int version = (metag_pmu->version &
68 			(METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
69 			METAC_ID_REV_S;
70 
71 	/* Early cores don't have overflow interrupts */
72 	if (version < 0x0104)
73 		return;
74 
75 	irq = internal_irq_map(17);
76 	if (irq >= 0)
77 		free_irq(irq, (void *)1);
78 
79 	irq = internal_irq_map(16);
80 	if (irq >= 0)
81 		free_irq(irq, (void *)0);
82 }
83 
reserve_pmu_hardware(void)84 static int reserve_pmu_hardware(void)
85 {
86 	int err = 0, irq[2];
87 	unsigned int version = (metag_pmu->version &
88 			(METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
89 			METAC_ID_REV_S;
90 
91 	/* Early cores don't have overflow interrupts */
92 	if (version < 0x0104)
93 		goto out;
94 
95 	/*
96 	 * Bit 16 on HWSTATMETA is the interrupt for performance counter 0;
97 	 * similarly, 17 is the interrupt for performance counter 1.
98 	 * We can't (yet) interrupt on the cycle counter, because it's a
99 	 * register, however it holds a 32-bit value as opposed to 24-bit.
100 	 */
101 	irq[0] = internal_irq_map(16);
102 	if (irq[0] < 0) {
103 		pr_err("unable to map internal IRQ %d\n", 16);
104 		goto out;
105 	}
106 	err = request_irq(irq[0], metag_pmu->handle_irq, IRQF_NOBALANCING,
107 			"metagpmu0", (void *)0);
108 	if (err) {
109 		pr_err("unable to request IRQ%d for metag PMU counters\n",
110 				irq[0]);
111 		goto out;
112 	}
113 
114 	irq[1] = internal_irq_map(17);
115 	if (irq[1] < 0) {
116 		pr_err("unable to map internal IRQ %d\n", 17);
117 		goto out_irq1;
118 	}
119 	err = request_irq(irq[1], metag_pmu->handle_irq, IRQF_NOBALANCING,
120 			"metagpmu1", (void *)1);
121 	if (err) {
122 		pr_err("unable to request IRQ%d for metag PMU counters\n",
123 				irq[1]);
124 		goto out_irq1;
125 	}
126 
127 	return 0;
128 
129 out_irq1:
130 	free_irq(irq[0], (void *)0);
131 out:
132 	return err;
133 }
134 
135 /* PMU operations */
metag_pmu_enable(struct pmu * pmu)136 static void metag_pmu_enable(struct pmu *pmu)
137 {
138 }
139 
metag_pmu_disable(struct pmu * pmu)140 static void metag_pmu_disable(struct pmu *pmu)
141 {
142 }
143 
metag_pmu_event_init(struct perf_event * event)144 static int metag_pmu_event_init(struct perf_event *event)
145 {
146 	int err = 0;
147 	atomic_t *active_events = &metag_pmu->active_events;
148 
149 	if (!metag_pmu_initialised()) {
150 		err = -ENODEV;
151 		goto out;
152 	}
153 
154 	if (has_branch_stack(event))
155 		return -EOPNOTSUPP;
156 
157 	event->destroy = _hw_perf_event_destroy;
158 
159 	if (!atomic_inc_not_zero(active_events)) {
160 		mutex_lock(&metag_pmu->reserve_mutex);
161 		if (atomic_read(active_events) == 0)
162 			err = reserve_pmu_hardware();
163 
164 		if (!err)
165 			atomic_inc(active_events);
166 
167 		mutex_unlock(&metag_pmu->reserve_mutex);
168 	}
169 
170 	/* Hardware and caches counters */
171 	switch (event->attr.type) {
172 	case PERF_TYPE_HARDWARE:
173 	case PERF_TYPE_HW_CACHE:
174 	case PERF_TYPE_RAW:
175 		err = _hw_perf_event_init(event);
176 		break;
177 
178 	default:
179 		return -ENOENT;
180 	}
181 
182 	if (err)
183 		event->destroy(event);
184 
185 out:
186 	return err;
187 }
188 
metag_pmu_event_update(struct perf_event * event,struct hw_perf_event * hwc,int idx)189 void metag_pmu_event_update(struct perf_event *event,
190 		struct hw_perf_event *hwc, int idx)
191 {
192 	u64 prev_raw_count, new_raw_count;
193 	s64 delta;
194 
195 	/*
196 	 * If this counter is chained, it may be that the previous counter
197 	 * value has been changed beneath us.
198 	 *
199 	 * To get around this, we read and exchange the new raw count, then
200 	 * add the delta (new - prev) to the generic counter atomically.
201 	 *
202 	 * Without interrupts, this is the simplest approach.
203 	 */
204 again:
205 	prev_raw_count = local64_read(&hwc->prev_count);
206 	new_raw_count = metag_pmu->read(idx);
207 
208 	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
209 			new_raw_count) != prev_raw_count)
210 		goto again;
211 
212 	/*
213 	 * Calculate the delta and add it to the counter.
214 	 */
215 	delta = (new_raw_count - prev_raw_count) & MAX_PERIOD;
216 
217 	local64_add(delta, &event->count);
218 	local64_sub(delta, &hwc->period_left);
219 }
220 
metag_pmu_event_set_period(struct perf_event * event,struct hw_perf_event * hwc,int idx)221 int metag_pmu_event_set_period(struct perf_event *event,
222 		struct hw_perf_event *hwc, int idx)
223 {
224 	s64 left = local64_read(&hwc->period_left);
225 	s64 period = hwc->sample_period;
226 	int ret = 0;
227 
228 	/* The period may have been changed */
229 	if (unlikely(period != hwc->last_period))
230 		left += period - hwc->last_period;
231 
232 	if (unlikely(left <= -period)) {
233 		left = period;
234 		local64_set(&hwc->period_left, left);
235 		hwc->last_period = period;
236 		ret = 1;
237 	}
238 
239 	if (unlikely(left <= 0)) {
240 		left += period;
241 		local64_set(&hwc->period_left, left);
242 		hwc->last_period = period;
243 		ret = 1;
244 	}
245 
246 	if (left > (s64)metag_pmu->max_period)
247 		left = metag_pmu->max_period;
248 
249 	if (metag_pmu->write) {
250 		local64_set(&hwc->prev_count, -(s32)left);
251 		metag_pmu->write(idx, -left & MAX_PERIOD);
252 	}
253 
254 	perf_event_update_userpage(event);
255 
256 	return ret;
257 }
258 
metag_pmu_start(struct perf_event * event,int flags)259 static void metag_pmu_start(struct perf_event *event, int flags)
260 {
261 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
262 	struct hw_perf_event *hwc = &event->hw;
263 	int idx = hwc->idx;
264 
265 	if (WARN_ON_ONCE(idx == -1))
266 		return;
267 
268 	/*
269 	 * We always have to reprogram the period, so ignore PERF_EF_RELOAD.
270 	 */
271 	if (flags & PERF_EF_RELOAD)
272 		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
273 
274 	hwc->state = 0;
275 
276 	/*
277 	 * Reset the period.
278 	 * Some counters can't be stopped (i.e. are core global), so when the
279 	 * counter was 'stopped' we merely disabled the IRQ. If we don't reset
280 	 * the period, then we'll either: a) get an overflow too soon;
281 	 * or b) too late if the overflow happened since disabling.
282 	 * Obviously, this has little bearing on cores without the overflow
283 	 * interrupt, as the performance counter resets to zero on write
284 	 * anyway.
285 	 */
286 	if (metag_pmu->max_period)
287 		metag_pmu_event_set_period(event, hwc, hwc->idx);
288 	cpuc->events[idx] = event;
289 	metag_pmu->enable(hwc, idx);
290 }
291 
metag_pmu_stop(struct perf_event * event,int flags)292 static void metag_pmu_stop(struct perf_event *event, int flags)
293 {
294 	struct hw_perf_event *hwc = &event->hw;
295 
296 	/*
297 	 * We should always update the counter on stop; see comment above
298 	 * why.
299 	 */
300 	if (!(hwc->state & PERF_HES_STOPPED)) {
301 		metag_pmu_event_update(event, hwc, hwc->idx);
302 		metag_pmu->disable(hwc, hwc->idx);
303 		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
304 	}
305 }
306 
metag_pmu_add(struct perf_event * event,int flags)307 static int metag_pmu_add(struct perf_event *event, int flags)
308 {
309 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
310 	struct hw_perf_event *hwc = &event->hw;
311 	int idx = 0, ret = 0;
312 
313 	perf_pmu_disable(event->pmu);
314 
315 	/* check whether we're counting instructions */
316 	if (hwc->config == 0x100) {
317 		if (__test_and_set_bit(METAG_INST_COUNTER,
318 				cpuc->used_mask)) {
319 			ret = -EAGAIN;
320 			goto out;
321 		}
322 		idx = METAG_INST_COUNTER;
323 	} else {
324 		/* Check whether we have a spare counter */
325 		idx = find_first_zero_bit(cpuc->used_mask,
326 				atomic_read(&metag_pmu->active_events));
327 		if (idx >= METAG_INST_COUNTER) {
328 			ret = -EAGAIN;
329 			goto out;
330 		}
331 
332 		__set_bit(idx, cpuc->used_mask);
333 	}
334 	hwc->idx = idx;
335 
336 	/* Make sure the counter is disabled */
337 	metag_pmu->disable(hwc, idx);
338 
339 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
340 	if (flags & PERF_EF_START)
341 		metag_pmu_start(event, PERF_EF_RELOAD);
342 
343 	perf_event_update_userpage(event);
344 out:
345 	perf_pmu_enable(event->pmu);
346 	return ret;
347 }
348 
metag_pmu_del(struct perf_event * event,int flags)349 static void metag_pmu_del(struct perf_event *event, int flags)
350 {
351 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
352 	struct hw_perf_event *hwc = &event->hw;
353 	int idx = hwc->idx;
354 
355 	WARN_ON(idx < 0);
356 	metag_pmu_stop(event, PERF_EF_UPDATE);
357 	cpuc->events[idx] = NULL;
358 	__clear_bit(idx, cpuc->used_mask);
359 
360 	perf_event_update_userpage(event);
361 }
362 
metag_pmu_read(struct perf_event * event)363 static void metag_pmu_read(struct perf_event *event)
364 {
365 	struct hw_perf_event *hwc = &event->hw;
366 
367 	/* Don't read disabled counters! */
368 	if (hwc->idx < 0)
369 		return;
370 
371 	metag_pmu_event_update(event, hwc, hwc->idx);
372 }
373 
374 static struct pmu pmu = {
375 	.pmu_enable	= metag_pmu_enable,
376 	.pmu_disable	= metag_pmu_disable,
377 
378 	.event_init	= metag_pmu_event_init,
379 
380 	.add		= metag_pmu_add,
381 	.del		= metag_pmu_del,
382 	.start		= metag_pmu_start,
383 	.stop		= metag_pmu_stop,
384 	.read		= metag_pmu_read,
385 };
386 
387 /* Core counter specific functions */
388 static const int metag_general_events[] = {
389 	[PERF_COUNT_HW_CPU_CYCLES] = 0x03,
390 	[PERF_COUNT_HW_INSTRUCTIONS] = 0x100,
391 	[PERF_COUNT_HW_CACHE_REFERENCES] = -1,
392 	[PERF_COUNT_HW_CACHE_MISSES] = -1,
393 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
394 	[PERF_COUNT_HW_BRANCH_MISSES] = -1,
395 	[PERF_COUNT_HW_BUS_CYCLES] = -1,
396 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = -1,
397 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = -1,
398 	[PERF_COUNT_HW_REF_CPU_CYCLES] = -1,
399 };
400 
401 static const int metag_pmu_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
402 	[C(L1D)] = {
403 		[C(OP_READ)] = {
404 			[C(RESULT_ACCESS)] = 0x08,
405 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
406 		},
407 		[C(OP_WRITE)] = {
408 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
409 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
410 		},
411 		[C(OP_PREFETCH)] = {
412 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
413 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
414 		},
415 	},
416 	[C(L1I)] = {
417 		[C(OP_READ)] = {
418 			[C(RESULT_ACCESS)] = 0x09,
419 			[C(RESULT_MISS)] = 0x0a,
420 		},
421 		[C(OP_WRITE)] = {
422 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
423 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
424 		},
425 		[C(OP_PREFETCH)] = {
426 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
427 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
428 		},
429 	},
430 	[C(LL)] = {
431 		[C(OP_READ)] = {
432 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
433 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
434 		},
435 		[C(OP_WRITE)] = {
436 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
437 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
438 		},
439 		[C(OP_PREFETCH)] = {
440 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
441 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
442 		},
443 	},
444 	[C(DTLB)] = {
445 		[C(OP_READ)] = {
446 			[C(RESULT_ACCESS)] = 0xd0,
447 			[C(RESULT_MISS)] = 0xd2,
448 		},
449 		[C(OP_WRITE)] = {
450 			[C(RESULT_ACCESS)] = 0xd4,
451 			[C(RESULT_MISS)] = 0xd5,
452 		},
453 		[C(OP_PREFETCH)] = {
454 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
455 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
456 		},
457 	},
458 	[C(ITLB)] = {
459 		[C(OP_READ)] = {
460 			[C(RESULT_ACCESS)] = 0xd1,
461 			[C(RESULT_MISS)] = 0xd3,
462 		},
463 		[C(OP_WRITE)] = {
464 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
465 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
466 		},
467 		[C(OP_PREFETCH)] = {
468 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
469 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
470 		},
471 	},
472 	[C(BPU)] = {
473 		[C(OP_READ)] = {
474 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
475 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
476 		},
477 		[C(OP_WRITE)] = {
478 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
479 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
480 		},
481 		[C(OP_PREFETCH)] = {
482 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
483 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
484 		},
485 	},
486 	[C(NODE)] = {
487 		[C(OP_READ)] = {
488 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
489 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
490 		},
491 		[C(OP_WRITE)] = {
492 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
493 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
494 		},
495 		[C(OP_PREFETCH)] = {
496 			[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
497 			[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
498 		},
499 	},
500 };
501 
502 
_hw_perf_event_destroy(struct perf_event * event)503 static void _hw_perf_event_destroy(struct perf_event *event)
504 {
505 	atomic_t *active_events = &metag_pmu->active_events;
506 	struct mutex *pmu_mutex = &metag_pmu->reserve_mutex;
507 
508 	if (atomic_dec_and_mutex_lock(active_events, pmu_mutex)) {
509 		release_pmu_hardware();
510 		mutex_unlock(pmu_mutex);
511 	}
512 }
513 
_hw_perf_cache_event(int config,int * evp)514 static int _hw_perf_cache_event(int config, int *evp)
515 {
516 	unsigned long type, op, result;
517 	int ev;
518 
519 	if (!metag_pmu->cache_events)
520 		return -EINVAL;
521 
522 	/* Unpack config */
523 	type = config & 0xff;
524 	op = (config >> 8) & 0xff;
525 	result = (config >> 16) & 0xff;
526 
527 	if (type >= PERF_COUNT_HW_CACHE_MAX ||
528 			op >= PERF_COUNT_HW_CACHE_OP_MAX ||
529 			result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
530 		return -EINVAL;
531 
532 	ev = (*metag_pmu->cache_events)[type][op][result];
533 	if (ev == 0)
534 		return -EOPNOTSUPP;
535 	if (ev == -1)
536 		return -EINVAL;
537 	*evp = ev;
538 	return 0;
539 }
540 
_hw_perf_event_init(struct perf_event * event)541 static int _hw_perf_event_init(struct perf_event *event)
542 {
543 	struct perf_event_attr *attr = &event->attr;
544 	struct hw_perf_event *hwc = &event->hw;
545 	int mapping = 0, err;
546 
547 	switch (attr->type) {
548 	case PERF_TYPE_HARDWARE:
549 		if (attr->config >= PERF_COUNT_HW_MAX)
550 			return -EINVAL;
551 
552 		mapping = metag_pmu->event_map(attr->config);
553 		break;
554 
555 	case PERF_TYPE_HW_CACHE:
556 		err = _hw_perf_cache_event(attr->config, &mapping);
557 		if (err)
558 			return err;
559 		break;
560 
561 	case PERF_TYPE_RAW:
562 		mapping = attr->config;
563 		break;
564 	}
565 
566 	/* Return early if the event is unsupported */
567 	if (mapping == -1)
568 		return -EINVAL;
569 
570 	/*
571 	 * Don't assign an index until the event is placed into the hardware.
572 	 * -1 signifies that we're still deciding where to put it. On SMP
573 	 * systems each core has its own set of counters, so we can't do any
574 	 * constraint checking yet.
575 	 */
576 	hwc->idx = -1;
577 
578 	/* Store the event encoding */
579 	hwc->config |= (unsigned long)mapping;
580 
581 	/*
582 	 * For non-sampling runs, limit the sample_period to half of the
583 	 * counter width. This way, the new counter value should be less
584 	 * likely to overtake the previous one (unless there are IRQ latency
585 	 * issues...)
586 	 */
587 	if (metag_pmu->max_period) {
588 		if (!hwc->sample_period) {
589 			hwc->sample_period = metag_pmu->max_period >> 1;
590 			hwc->last_period = hwc->sample_period;
591 			local64_set(&hwc->period_left, hwc->sample_period);
592 		}
593 	}
594 
595 	return 0;
596 }
597 
metag_pmu_enable_counter(struct hw_perf_event * event,int idx)598 static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx)
599 {
600 	struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events);
601 	unsigned int config = event->config;
602 	unsigned int tmp = config & 0xf0;
603 	unsigned long flags;
604 
605 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
606 
607 	/*
608 	 * Check if we're enabling the instruction counter (index of
609 	 * MAX_HWEVENTS - 1)
610 	 */
611 	if (METAG_INST_COUNTER == idx) {
612 		WARN_ONCE((config != 0x100),
613 			"invalid configuration (%d) for counter (%d)\n",
614 			config, idx);
615 		local64_set(&event->prev_count, __core_reg_get(TXTACTCYC));
616 		goto unlock;
617 	}
618 
619 	/* Check for a core internal or performance channel event. */
620 	if (tmp) {
621 		/* PERF_ICORE/PERF_CHAN only exist since Meta2 */
622 #ifdef METAC_2_1
623 		void *perf_addr;
624 
625 		/*
626 		 * Anything other than a cycle count will write the low-
627 		 * nibble to the correct counter register.
628 		 */
629 		switch (tmp) {
630 		case 0xd0:
631 			perf_addr = (void *)PERF_ICORE(idx);
632 			break;
633 
634 		case 0xf0:
635 			perf_addr = (void *)PERF_CHAN(idx);
636 			break;
637 
638 		default:
639 			perf_addr = NULL;
640 			break;
641 		}
642 
643 		if (perf_addr)
644 			metag_out32((config & 0x0f), perf_addr);
645 #endif
646 
647 		/*
648 		 * Now we use the high nibble as the performance event to
649 		 * to count.
650 		 */
651 		config = tmp >> 4;
652 	}
653 
654 	tmp = ((config & 0xf) << 28) |
655 			((1 << 24) << hard_processor_id());
656 	if (metag_pmu->max_period)
657 		/*
658 		 * Cores supporting overflow interrupts may have had the counter
659 		 * set to a specific value that needs preserving.
660 		 */
661 		tmp |= metag_in32(PERF_COUNT(idx)) & 0x00ffffff;
662 	else
663 		/*
664 		 * Older cores reset the counter on write, so prev_count needs
665 		 * resetting too so we can calculate a correct delta.
666 		 */
667 		local64_set(&event->prev_count, 0);
668 
669 	metag_out32(tmp, PERF_COUNT(idx));
670 unlock:
671 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
672 }
673 
metag_pmu_disable_counter(struct hw_perf_event * event,int idx)674 static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx)
675 {
676 	struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events);
677 	unsigned int tmp = 0;
678 	unsigned long flags;
679 
680 	/*
681 	 * The cycle counter can't be disabled per se, as it's a hardware
682 	 * thread register which is always counting. We merely return if this
683 	 * is the counter we're attempting to disable.
684 	 */
685 	if (METAG_INST_COUNTER == idx)
686 		return;
687 
688 	/*
689 	 * The counter value _should_ have been read prior to disabling,
690 	 * as if we're running on an early core then the value gets reset to
691 	 * 0, and any read after that would be useless. On the newer cores,
692 	 * however, it's better to read-modify-update this for purposes of
693 	 * the overflow interrupt.
694 	 * Here we remove the thread id AND the event nibble (there are at
695 	 * least two events that count events that are core global and ignore
696 	 * the thread id mask). This only works because we don't mix thread
697 	 * performance counts, and event 0x00 requires a thread id mask!
698 	 */
699 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
700 
701 	tmp = metag_in32(PERF_COUNT(idx));
702 	tmp &= 0x00ffffff;
703 	metag_out32(tmp, PERF_COUNT(idx));
704 
705 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
706 }
707 
metag_pmu_read_counter(int idx)708 static u64 metag_pmu_read_counter(int idx)
709 {
710 	u32 tmp = 0;
711 
712 	if (METAG_INST_COUNTER == idx) {
713 		tmp = __core_reg_get(TXTACTCYC);
714 		goto out;
715 	}
716 
717 	tmp = metag_in32(PERF_COUNT(idx)) & 0x00ffffff;
718 out:
719 	return tmp;
720 }
721 
metag_pmu_write_counter(int idx,u32 val)722 static void metag_pmu_write_counter(int idx, u32 val)
723 {
724 	struct cpu_hw_events *events = this_cpu_ptr(&cpu_hw_events);
725 	u32 tmp = 0;
726 	unsigned long flags;
727 
728 	/*
729 	 * This _shouldn't_ happen, but if it does, then we can just
730 	 * ignore the write, as the register is read-only and clear-on-write.
731 	 */
732 	if (METAG_INST_COUNTER == idx)
733 		return;
734 
735 	/*
736 	 * We'll keep the thread mask and event id, and just update the
737 	 * counter itself. Also , we should bound the value to 24-bits.
738 	 */
739 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
740 
741 	val &= 0x00ffffff;
742 	tmp = metag_in32(PERF_COUNT(idx)) & 0xff000000;
743 	val |= tmp;
744 	metag_out32(val, PERF_COUNT(idx));
745 
746 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
747 }
748 
metag_pmu_event_map(int idx)749 static int metag_pmu_event_map(int idx)
750 {
751 	return metag_general_events[idx];
752 }
753 
metag_pmu_counter_overflow(int irq,void * dev)754 static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev)
755 {
756 	int idx = (int)dev;
757 	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
758 	struct perf_event *event = cpuhw->events[idx];
759 	struct hw_perf_event *hwc = &event->hw;
760 	struct pt_regs *regs = get_irq_regs();
761 	struct perf_sample_data sampledata;
762 	unsigned long flags;
763 	u32 counter = 0;
764 
765 	/*
766 	 * We need to stop the core temporarily from generating another
767 	 * interrupt while we disable this counter. However, we don't want
768 	 * to flag the counter as free
769 	 */
770 	__global_lock2(flags);
771 	counter = metag_in32(PERF_COUNT(idx));
772 	metag_out32((counter & 0x00ffffff), PERF_COUNT(idx));
773 	__global_unlock2(flags);
774 
775 	/* Update the counts and reset the sample period */
776 	metag_pmu_event_update(event, hwc, idx);
777 	perf_sample_data_init(&sampledata, 0, hwc->last_period);
778 	metag_pmu_event_set_period(event, hwc, idx);
779 
780 	/*
781 	 * Enable the counter again once core overflow processing has
782 	 * completed. Note the counter value may have been modified while it was
783 	 * inactive to set it up ready for the next interrupt.
784 	 */
785 	if (!perf_event_overflow(event, &sampledata, regs)) {
786 		__global_lock2(flags);
787 		counter = (counter & 0xff000000) |
788 			  (metag_in32(PERF_COUNT(idx)) & 0x00ffffff);
789 		metag_out32(counter, PERF_COUNT(idx));
790 		__global_unlock2(flags);
791 	}
792 
793 	return IRQ_HANDLED;
794 }
795 
796 static struct metag_pmu _metag_pmu = {
797 	.handle_irq	= metag_pmu_counter_overflow,
798 	.enable		= metag_pmu_enable_counter,
799 	.disable	= metag_pmu_disable_counter,
800 	.read		= metag_pmu_read_counter,
801 	.write		= metag_pmu_write_counter,
802 	.event_map	= metag_pmu_event_map,
803 	.cache_events	= &metag_pmu_cache_events,
804 	.max_period	= MAX_PERIOD,
805 	.max_events	= MAX_HWEVENTS,
806 };
807 
808 /* PMU CPU hotplug notifier */
metag_pmu_starting_cpu(unsigned int cpu)809 static int metag_pmu_starting_cpu(unsigned int cpu)
810 {
811 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
812 
813 	memset(cpuc, 0, sizeof(struct cpu_hw_events));
814 	raw_spin_lock_init(&cpuc->pmu_lock);
815 
816 	return 0;
817 }
818 
819 /* PMU Initialisation */
init_hw_perf_events(void)820 static int __init init_hw_perf_events(void)
821 {
822 	int ret = 0, cpu;
823 	u32 version = *(u32 *)METAC_ID;
824 	int major = (version & METAC_ID_MAJOR_BITS) >> METAC_ID_MAJOR_S;
825 	int min_rev = (version & (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS))
826 			>> METAC_ID_REV_S;
827 
828 	/* Not a Meta 2 core, then not supported */
829 	if (0x02 > major) {
830 		pr_info("no hardware counter support available\n");
831 		goto out;
832 	} else if (0x02 == major) {
833 		metag_pmu = &_metag_pmu;
834 
835 		if (min_rev < 0x0104) {
836 			/*
837 			 * A core without overflow interrupts, and clear-on-
838 			 * write counters.
839 			 */
840 			metag_pmu->handle_irq = NULL;
841 			metag_pmu->write = NULL;
842 			metag_pmu->max_period = 0;
843 		}
844 
845 		metag_pmu->name = "meta2";
846 		metag_pmu->version = version;
847 		metag_pmu->pmu = pmu;
848 	}
849 
850 	pr_info("enabled with %s PMU driver, %d counters available\n",
851 			metag_pmu->name, metag_pmu->max_events);
852 
853 	/*
854 	 * Early cores have "limited" counters - they have no overflow
855 	 * interrupts - and so are unable to do sampling without extra work
856 	 * and timer assistance.
857 	 */
858 	if (metag_pmu->max_period == 0) {
859 		metag_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
860 	}
861 
862 	/* Initialise the active events and reservation mutex */
863 	atomic_set(&metag_pmu->active_events, 0);
864 	mutex_init(&metag_pmu->reserve_mutex);
865 
866 	/* Clear the counters */
867 	metag_out32(0, PERF_COUNT(0));
868 	metag_out32(0, PERF_COUNT(1));
869 
870 	cpuhp_setup_state(CPUHP_AP_PERF_METAG_STARTING,
871 			  "AP_PERF_METAG_STARTING", metag_pmu_starting_cpu,
872 			  NULL);
873 
874 	ret = perf_pmu_register(&pmu, metag_pmu->name, PERF_TYPE_RAW);
875 	if (ret)
876 		cpuhp_remove_state_nocalls(CPUHP_AP_PERF_METAG_STARTING);
877 	return ret;
878 }
879 early_initcall(init_hw_perf_events);
880