• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "perf_event_intel_uncore.h"
2 
3 static struct intel_uncore_type *empty_uncore[] = { NULL, };
4 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
5 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
6 
7 static bool pcidrv_registered;
8 struct pci_driver *uncore_pci_driver;
9 /* pci bus to socket mapping */
10 int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, };
11 struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
12 
13 static DEFINE_RAW_SPINLOCK(uncore_box_lock);
14 /* mask of cpus that collect uncore events */
15 static cpumask_t uncore_cpu_mask;
16 
17 /* constraint for the fixed counter */
18 static struct event_constraint uncore_constraint_fixed =
19 	EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
20 struct event_constraint uncore_constraint_empty =
21 	EVENT_CONSTRAINT(0, 0, 0);
22 
uncore_event_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)23 ssize_t uncore_event_show(struct kobject *kobj,
24 			  struct kobj_attribute *attr, char *buf)
25 {
26 	struct uncore_event_desc *event =
27 		container_of(attr, struct uncore_event_desc, attr);
28 	return sprintf(buf, "%s", event->config);
29 }
30 
uncore_event_to_pmu(struct perf_event * event)31 struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
32 {
33 	return container_of(event->pmu, struct intel_uncore_pmu, pmu);
34 }
35 
uncore_pmu_to_box(struct intel_uncore_pmu * pmu,int cpu)36 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
37 {
38 	struct intel_uncore_box *box;
39 
40 	box = *per_cpu_ptr(pmu->box, cpu);
41 	if (box)
42 		return box;
43 
44 	raw_spin_lock(&uncore_box_lock);
45 	/* Recheck in lock to handle races. */
46 	if (*per_cpu_ptr(pmu->box, cpu))
47 		goto out;
48 	list_for_each_entry(box, &pmu->box_list, list) {
49 		if (box->phys_id == topology_physical_package_id(cpu)) {
50 			atomic_inc(&box->refcnt);
51 			*per_cpu_ptr(pmu->box, cpu) = box;
52 			break;
53 		}
54 	}
55 out:
56 	raw_spin_unlock(&uncore_box_lock);
57 
58 	return *per_cpu_ptr(pmu->box, cpu);
59 }
60 
uncore_event_to_box(struct perf_event * event)61 struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
62 {
63 	/*
64 	 * perf core schedules event on the basis of cpu, uncore events are
65 	 * collected by one of the cpus inside a physical package.
66 	 */
67 	return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
68 }
69 
uncore_msr_read_counter(struct intel_uncore_box * box,struct perf_event * event)70 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
71 {
72 	u64 count;
73 
74 	rdmsrl(event->hw.event_base, count);
75 
76 	return count;
77 }
78 
79 /*
80  * generic get constraint function for shared match/mask registers.
81  */
82 struct event_constraint *
uncore_get_constraint(struct intel_uncore_box * box,struct perf_event * event)83 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
84 {
85 	struct intel_uncore_extra_reg *er;
86 	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
87 	struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
88 	unsigned long flags;
89 	bool ok = false;
90 
91 	/*
92 	 * reg->alloc can be set due to existing state, so for fake box we
93 	 * need to ignore this, otherwise we might fail to allocate proper
94 	 * fake state for this extra reg constraint.
95 	 */
96 	if (reg1->idx == EXTRA_REG_NONE ||
97 	    (!uncore_box_is_fake(box) && reg1->alloc))
98 		return NULL;
99 
100 	er = &box->shared_regs[reg1->idx];
101 	raw_spin_lock_irqsave(&er->lock, flags);
102 	if (!atomic_read(&er->ref) ||
103 	    (er->config1 == reg1->config && er->config2 == reg2->config)) {
104 		atomic_inc(&er->ref);
105 		er->config1 = reg1->config;
106 		er->config2 = reg2->config;
107 		ok = true;
108 	}
109 	raw_spin_unlock_irqrestore(&er->lock, flags);
110 
111 	if (ok) {
112 		if (!uncore_box_is_fake(box))
113 			reg1->alloc = 1;
114 		return NULL;
115 	}
116 
117 	return &uncore_constraint_empty;
118 }
119 
uncore_put_constraint(struct intel_uncore_box * box,struct perf_event * event)120 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
121 {
122 	struct intel_uncore_extra_reg *er;
123 	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
124 
125 	/*
126 	 * Only put constraint if extra reg was actually allocated. Also
127 	 * takes care of event which do not use an extra shared reg.
128 	 *
129 	 * Also, if this is a fake box we shouldn't touch any event state
130 	 * (reg->alloc) and we don't care about leaving inconsistent box
131 	 * state either since it will be thrown out.
132 	 */
133 	if (uncore_box_is_fake(box) || !reg1->alloc)
134 		return;
135 
136 	er = &box->shared_regs[reg1->idx];
137 	atomic_dec(&er->ref);
138 	reg1->alloc = 0;
139 }
140 
uncore_shared_reg_config(struct intel_uncore_box * box,int idx)141 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
142 {
143 	struct intel_uncore_extra_reg *er;
144 	unsigned long flags;
145 	u64 config;
146 
147 	er = &box->shared_regs[idx];
148 
149 	raw_spin_lock_irqsave(&er->lock, flags);
150 	config = er->config;
151 	raw_spin_unlock_irqrestore(&er->lock, flags);
152 
153 	return config;
154 }
155 
uncore_assign_hw_event(struct intel_uncore_box * box,struct perf_event * event,int idx)156 static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
157 {
158 	struct hw_perf_event *hwc = &event->hw;
159 
160 	hwc->idx = idx;
161 	hwc->last_tag = ++box->tags[idx];
162 
163 	if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
164 		hwc->event_base = uncore_fixed_ctr(box);
165 		hwc->config_base = uncore_fixed_ctl(box);
166 		return;
167 	}
168 
169 	hwc->config_base = uncore_event_ctl(box, hwc->idx);
170 	hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
171 }
172 
uncore_perf_event_update(struct intel_uncore_box * box,struct perf_event * event)173 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
174 {
175 	u64 prev_count, new_count, delta;
176 	int shift;
177 
178 	if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
179 		shift = 64 - uncore_fixed_ctr_bits(box);
180 	else
181 		shift = 64 - uncore_perf_ctr_bits(box);
182 
183 	/* the hrtimer might modify the previous event value */
184 again:
185 	prev_count = local64_read(&event->hw.prev_count);
186 	new_count = uncore_read_counter(box, event);
187 	if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
188 		goto again;
189 
190 	delta = (new_count << shift) - (prev_count << shift);
191 	delta >>= shift;
192 
193 	local64_add(delta, &event->count);
194 }
195 
196 /*
197  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
198  * for SandyBridge. So we use hrtimer to periodically poll the counter
199  * to avoid overflow.
200  */
uncore_pmu_hrtimer(struct hrtimer * hrtimer)201 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
202 {
203 	struct intel_uncore_box *box;
204 	struct perf_event *event;
205 	unsigned long flags;
206 	int bit;
207 
208 	box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
209 	if (!box->n_active || box->cpu != smp_processor_id())
210 		return HRTIMER_NORESTART;
211 	/*
212 	 * disable local interrupt to prevent uncore_pmu_event_start/stop
213 	 * to interrupt the update process
214 	 */
215 	local_irq_save(flags);
216 
217 	/*
218 	 * handle boxes with an active event list as opposed to active
219 	 * counters
220 	 */
221 	list_for_each_entry(event, &box->active_list, active_entry) {
222 		uncore_perf_event_update(box, event);
223 	}
224 
225 	for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
226 		uncore_perf_event_update(box, box->events[bit]);
227 
228 	local_irq_restore(flags);
229 
230 	hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
231 	return HRTIMER_RESTART;
232 }
233 
uncore_pmu_start_hrtimer(struct intel_uncore_box * box)234 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
235 {
236 	__hrtimer_start_range_ns(&box->hrtimer,
237 			ns_to_ktime(box->hrtimer_duration), 0,
238 			HRTIMER_MODE_REL_PINNED, 0);
239 }
240 
uncore_pmu_cancel_hrtimer(struct intel_uncore_box * box)241 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
242 {
243 	hrtimer_cancel(&box->hrtimer);
244 }
245 
uncore_pmu_init_hrtimer(struct intel_uncore_box * box)246 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
247 {
248 	hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
249 	box->hrtimer.function = uncore_pmu_hrtimer;
250 }
251 
uncore_alloc_box(struct intel_uncore_type * type,int node)252 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
253 {
254 	struct intel_uncore_box *box;
255 	int i, size;
256 
257 	size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
258 
259 	box = kzalloc_node(size, GFP_KERNEL, node);
260 	if (!box)
261 		return NULL;
262 
263 	for (i = 0; i < type->num_shared_regs; i++)
264 		raw_spin_lock_init(&box->shared_regs[i].lock);
265 
266 	uncore_pmu_init_hrtimer(box);
267 	atomic_set(&box->refcnt, 1);
268 	box->cpu = -1;
269 	box->phys_id = -1;
270 
271 	/* set default hrtimer timeout */
272 	box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
273 
274 	INIT_LIST_HEAD(&box->active_list);
275 
276 	return box;
277 }
278 
279 /*
280  * Using uncore_pmu_event_init pmu event_init callback
281  * as a detection point for uncore events.
282  */
283 static int uncore_pmu_event_init(struct perf_event *event);
284 
is_uncore_event(struct perf_event * event)285 static bool is_uncore_event(struct perf_event *event)
286 {
287 	return event->pmu->event_init == uncore_pmu_event_init;
288 }
289 
290 static int
uncore_collect_events(struct intel_uncore_box * box,struct perf_event * leader,bool dogrp)291 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
292 {
293 	struct perf_event *event;
294 	int n, max_count;
295 
296 	max_count = box->pmu->type->num_counters;
297 	if (box->pmu->type->fixed_ctl)
298 		max_count++;
299 
300 	if (box->n_events >= max_count)
301 		return -EINVAL;
302 
303 	n = box->n_events;
304 
305 	if (is_uncore_event(leader)) {
306 		box->event_list[n] = leader;
307 		n++;
308 	}
309 
310 	if (!dogrp)
311 		return n;
312 
313 	list_for_each_entry(event, &leader->sibling_list, group_entry) {
314 		if (!is_uncore_event(event) ||
315 		    event->state <= PERF_EVENT_STATE_OFF)
316 			continue;
317 
318 		if (n >= max_count)
319 			return -EINVAL;
320 
321 		box->event_list[n] = event;
322 		n++;
323 	}
324 	return n;
325 }
326 
327 static struct event_constraint *
uncore_get_event_constraint(struct intel_uncore_box * box,struct perf_event * event)328 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
329 {
330 	struct intel_uncore_type *type = box->pmu->type;
331 	struct event_constraint *c;
332 
333 	if (type->ops->get_constraint) {
334 		c = type->ops->get_constraint(box, event);
335 		if (c)
336 			return c;
337 	}
338 
339 	if (event->attr.config == UNCORE_FIXED_EVENT)
340 		return &uncore_constraint_fixed;
341 
342 	if (type->constraints) {
343 		for_each_event_constraint(c, type->constraints) {
344 			if ((event->hw.config & c->cmask) == c->code)
345 				return c;
346 		}
347 	}
348 
349 	return &type->unconstrainted;
350 }
351 
uncore_put_event_constraint(struct intel_uncore_box * box,struct perf_event * event)352 static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
353 {
354 	if (box->pmu->type->ops->put_constraint)
355 		box->pmu->type->ops->put_constraint(box, event);
356 }
357 
uncore_assign_events(struct intel_uncore_box * box,int assign[],int n)358 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
359 {
360 	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
361 	struct event_constraint *c;
362 	int i, wmin, wmax, ret = 0;
363 	struct hw_perf_event *hwc;
364 
365 	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
366 
367 	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
368 		hwc = &box->event_list[i]->hw;
369 		c = uncore_get_event_constraint(box, box->event_list[i]);
370 		hwc->constraint = c;
371 		wmin = min(wmin, c->weight);
372 		wmax = max(wmax, c->weight);
373 	}
374 
375 	/* fastpath, try to reuse previous register */
376 	for (i = 0; i < n; i++) {
377 		hwc = &box->event_list[i]->hw;
378 		c = hwc->constraint;
379 
380 		/* never assigned */
381 		if (hwc->idx == -1)
382 			break;
383 
384 		/* constraint still honored */
385 		if (!test_bit(hwc->idx, c->idxmsk))
386 			break;
387 
388 		/* not already used */
389 		if (test_bit(hwc->idx, used_mask))
390 			break;
391 
392 		__set_bit(hwc->idx, used_mask);
393 		if (assign)
394 			assign[i] = hwc->idx;
395 	}
396 	/* slow path */
397 	if (i != n)
398 		ret = perf_assign_events(box->event_list, n,
399 					 wmin, wmax, assign);
400 
401 	if (!assign || ret) {
402 		for (i = 0; i < n; i++)
403 			uncore_put_event_constraint(box, box->event_list[i]);
404 	}
405 	return ret ? -EINVAL : 0;
406 }
407 
uncore_pmu_event_start(struct perf_event * event,int flags)408 static void uncore_pmu_event_start(struct perf_event *event, int flags)
409 {
410 	struct intel_uncore_box *box = uncore_event_to_box(event);
411 	int idx = event->hw.idx;
412 
413 	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
414 		return;
415 
416 	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
417 		return;
418 
419 	event->hw.state = 0;
420 	box->events[idx] = event;
421 	box->n_active++;
422 	__set_bit(idx, box->active_mask);
423 
424 	local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
425 	uncore_enable_event(box, event);
426 
427 	if (box->n_active == 1) {
428 		uncore_enable_box(box);
429 		uncore_pmu_start_hrtimer(box);
430 	}
431 }
432 
uncore_pmu_event_stop(struct perf_event * event,int flags)433 static void uncore_pmu_event_stop(struct perf_event *event, int flags)
434 {
435 	struct intel_uncore_box *box = uncore_event_to_box(event);
436 	struct hw_perf_event *hwc = &event->hw;
437 
438 	if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
439 		uncore_disable_event(box, event);
440 		box->n_active--;
441 		box->events[hwc->idx] = NULL;
442 		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
443 		hwc->state |= PERF_HES_STOPPED;
444 
445 		if (box->n_active == 0) {
446 			uncore_disable_box(box);
447 			uncore_pmu_cancel_hrtimer(box);
448 		}
449 	}
450 
451 	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
452 		/*
453 		 * Drain the remaining delta count out of a event
454 		 * that we are disabling:
455 		 */
456 		uncore_perf_event_update(box, event);
457 		hwc->state |= PERF_HES_UPTODATE;
458 	}
459 }
460 
uncore_pmu_event_add(struct perf_event * event,int flags)461 static int uncore_pmu_event_add(struct perf_event *event, int flags)
462 {
463 	struct intel_uncore_box *box = uncore_event_to_box(event);
464 	struct hw_perf_event *hwc = &event->hw;
465 	int assign[UNCORE_PMC_IDX_MAX];
466 	int i, n, ret;
467 
468 	if (!box)
469 		return -ENODEV;
470 
471 	ret = n = uncore_collect_events(box, event, false);
472 	if (ret < 0)
473 		return ret;
474 
475 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
476 	if (!(flags & PERF_EF_START))
477 		hwc->state |= PERF_HES_ARCH;
478 
479 	ret = uncore_assign_events(box, assign, n);
480 	if (ret)
481 		return ret;
482 
483 	/* save events moving to new counters */
484 	for (i = 0; i < box->n_events; i++) {
485 		event = box->event_list[i];
486 		hwc = &event->hw;
487 
488 		if (hwc->idx == assign[i] &&
489 			hwc->last_tag == box->tags[assign[i]])
490 			continue;
491 		/*
492 		 * Ensure we don't accidentally enable a stopped
493 		 * counter simply because we rescheduled.
494 		 */
495 		if (hwc->state & PERF_HES_STOPPED)
496 			hwc->state |= PERF_HES_ARCH;
497 
498 		uncore_pmu_event_stop(event, PERF_EF_UPDATE);
499 	}
500 
501 	/* reprogram moved events into new counters */
502 	for (i = 0; i < n; i++) {
503 		event = box->event_list[i];
504 		hwc = &event->hw;
505 
506 		if (hwc->idx != assign[i] ||
507 			hwc->last_tag != box->tags[assign[i]])
508 			uncore_assign_hw_event(box, event, assign[i]);
509 		else if (i < box->n_events)
510 			continue;
511 
512 		if (hwc->state & PERF_HES_ARCH)
513 			continue;
514 
515 		uncore_pmu_event_start(event, 0);
516 	}
517 	box->n_events = n;
518 
519 	return 0;
520 }
521 
uncore_pmu_event_del(struct perf_event * event,int flags)522 static void uncore_pmu_event_del(struct perf_event *event, int flags)
523 {
524 	struct intel_uncore_box *box = uncore_event_to_box(event);
525 	int i;
526 
527 	uncore_pmu_event_stop(event, PERF_EF_UPDATE);
528 
529 	for (i = 0; i < box->n_events; i++) {
530 		if (event == box->event_list[i]) {
531 			uncore_put_event_constraint(box, event);
532 
533 			while (++i < box->n_events)
534 				box->event_list[i - 1] = box->event_list[i];
535 
536 			--box->n_events;
537 			break;
538 		}
539 	}
540 
541 	event->hw.idx = -1;
542 	event->hw.last_tag = ~0ULL;
543 }
544 
uncore_pmu_event_read(struct perf_event * event)545 void uncore_pmu_event_read(struct perf_event *event)
546 {
547 	struct intel_uncore_box *box = uncore_event_to_box(event);
548 	uncore_perf_event_update(box, event);
549 }
550 
551 /*
552  * validation ensures the group can be loaded onto the
553  * PMU if it was the only group available.
554  */
uncore_validate_group(struct intel_uncore_pmu * pmu,struct perf_event * event)555 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
556 				struct perf_event *event)
557 {
558 	struct perf_event *leader = event->group_leader;
559 	struct intel_uncore_box *fake_box;
560 	int ret = -EINVAL, n;
561 
562 	fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
563 	if (!fake_box)
564 		return -ENOMEM;
565 
566 	fake_box->pmu = pmu;
567 	/*
568 	 * the event is not yet connected with its
569 	 * siblings therefore we must first collect
570 	 * existing siblings, then add the new event
571 	 * before we can simulate the scheduling
572 	 */
573 	n = uncore_collect_events(fake_box, leader, true);
574 	if (n < 0)
575 		goto out;
576 
577 	fake_box->n_events = n;
578 	n = uncore_collect_events(fake_box, event, false);
579 	if (n < 0)
580 		goto out;
581 
582 	fake_box->n_events = n;
583 
584 	ret = uncore_assign_events(fake_box, NULL, n);
585 out:
586 	kfree(fake_box);
587 	return ret;
588 }
589 
uncore_pmu_event_init(struct perf_event * event)590 static int uncore_pmu_event_init(struct perf_event *event)
591 {
592 	struct intel_uncore_pmu *pmu;
593 	struct intel_uncore_box *box;
594 	struct hw_perf_event *hwc = &event->hw;
595 	int ret;
596 
597 	if (event->attr.type != event->pmu->type)
598 		return -ENOENT;
599 
600 	pmu = uncore_event_to_pmu(event);
601 	/* no device found for this pmu */
602 	if (pmu->func_id < 0)
603 		return -ENOENT;
604 
605 	/*
606 	 * Uncore PMU does measure at all privilege level all the time.
607 	 * So it doesn't make sense to specify any exclude bits.
608 	 */
609 	if (event->attr.exclude_user || event->attr.exclude_kernel ||
610 			event->attr.exclude_hv || event->attr.exclude_idle)
611 		return -EINVAL;
612 
613 	/* Sampling not supported yet */
614 	if (hwc->sample_period)
615 		return -EINVAL;
616 
617 	/*
618 	 * Place all uncore events for a particular physical package
619 	 * onto a single cpu
620 	 */
621 	if (event->cpu < 0)
622 		return -EINVAL;
623 	box = uncore_pmu_to_box(pmu, event->cpu);
624 	if (!box || box->cpu < 0)
625 		return -EINVAL;
626 	event->cpu = box->cpu;
627 
628 	event->hw.idx = -1;
629 	event->hw.last_tag = ~0ULL;
630 	event->hw.extra_reg.idx = EXTRA_REG_NONE;
631 	event->hw.branch_reg.idx = EXTRA_REG_NONE;
632 
633 	if (event->attr.config == UNCORE_FIXED_EVENT) {
634 		/* no fixed counter */
635 		if (!pmu->type->fixed_ctl)
636 			return -EINVAL;
637 		/*
638 		 * if there is only one fixed counter, only the first pmu
639 		 * can access the fixed counter
640 		 */
641 		if (pmu->type->single_fixed && pmu->pmu_idx > 0)
642 			return -EINVAL;
643 
644 		/* fixed counters have event field hardcoded to zero */
645 		hwc->config = 0ULL;
646 	} else {
647 		hwc->config = event->attr.config & pmu->type->event_mask;
648 		if (pmu->type->ops->hw_config) {
649 			ret = pmu->type->ops->hw_config(box, event);
650 			if (ret)
651 				return ret;
652 		}
653 	}
654 
655 	if (event->group_leader != event)
656 		ret = uncore_validate_group(pmu, event);
657 	else
658 		ret = 0;
659 
660 	return ret;
661 }
662 
uncore_get_attr_cpumask(struct device * dev,struct device_attribute * attr,char * buf)663 static ssize_t uncore_get_attr_cpumask(struct device *dev,
664 				struct device_attribute *attr, char *buf)
665 {
666 	int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask);
667 
668 	buf[n++] = '\n';
669 	buf[n] = '\0';
670 	return n;
671 }
672 
673 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
674 
675 static struct attribute *uncore_pmu_attrs[] = {
676 	&dev_attr_cpumask.attr,
677 	NULL,
678 };
679 
680 static struct attribute_group uncore_pmu_attr_group = {
681 	.attrs = uncore_pmu_attrs,
682 };
683 
uncore_pmu_register(struct intel_uncore_pmu * pmu)684 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
685 {
686 	int ret;
687 
688 	if (!pmu->type->pmu) {
689 		pmu->pmu = (struct pmu) {
690 			.attr_groups	= pmu->type->attr_groups,
691 			.task_ctx_nr	= perf_invalid_context,
692 			.event_init	= uncore_pmu_event_init,
693 			.add		= uncore_pmu_event_add,
694 			.del		= uncore_pmu_event_del,
695 			.start		= uncore_pmu_event_start,
696 			.stop		= uncore_pmu_event_stop,
697 			.read		= uncore_pmu_event_read,
698 		};
699 	} else {
700 		pmu->pmu = *pmu->type->pmu;
701 		pmu->pmu.attr_groups = pmu->type->attr_groups;
702 	}
703 
704 	if (pmu->type->num_boxes == 1) {
705 		if (strlen(pmu->type->name) > 0)
706 			sprintf(pmu->name, "uncore_%s", pmu->type->name);
707 		else
708 			sprintf(pmu->name, "uncore");
709 	} else {
710 		sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
711 			pmu->pmu_idx);
712 	}
713 
714 	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
715 	return ret;
716 }
717 
uncore_type_exit(struct intel_uncore_type * type)718 static void __init uncore_type_exit(struct intel_uncore_type *type)
719 {
720 	int i;
721 
722 	for (i = 0; i < type->num_boxes; i++)
723 		free_percpu(type->pmus[i].box);
724 	kfree(type->pmus);
725 	type->pmus = NULL;
726 	kfree(type->events_group);
727 	type->events_group = NULL;
728 }
729 
uncore_types_exit(struct intel_uncore_type ** types)730 static void __init uncore_types_exit(struct intel_uncore_type **types)
731 {
732 	int i;
733 	for (i = 0; types[i]; i++)
734 		uncore_type_exit(types[i]);
735 }
736 
uncore_type_init(struct intel_uncore_type * type)737 static int __init uncore_type_init(struct intel_uncore_type *type)
738 {
739 	struct intel_uncore_pmu *pmus;
740 	struct attribute_group *attr_group;
741 	struct attribute **attrs;
742 	int i, j;
743 
744 	pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
745 	if (!pmus)
746 		return -ENOMEM;
747 
748 	type->pmus = pmus;
749 
750 	type->unconstrainted = (struct event_constraint)
751 		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
752 				0, type->num_counters, 0, 0);
753 
754 	for (i = 0; i < type->num_boxes; i++) {
755 		pmus[i].func_id = -1;
756 		pmus[i].pmu_idx = i;
757 		pmus[i].type = type;
758 		INIT_LIST_HEAD(&pmus[i].box_list);
759 		pmus[i].box = alloc_percpu(struct intel_uncore_box *);
760 		if (!pmus[i].box)
761 			goto fail;
762 	}
763 
764 	if (type->event_descs) {
765 		i = 0;
766 		while (type->event_descs[i].attr.attr.name)
767 			i++;
768 
769 		attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
770 					sizeof(*attr_group), GFP_KERNEL);
771 		if (!attr_group)
772 			goto fail;
773 
774 		attrs = (struct attribute **)(attr_group + 1);
775 		attr_group->name = "events";
776 		attr_group->attrs = attrs;
777 
778 		for (j = 0; j < i; j++)
779 			attrs[j] = &type->event_descs[j].attr.attr;
780 
781 		type->events_group = attr_group;
782 	}
783 
784 	type->pmu_group = &uncore_pmu_attr_group;
785 	return 0;
786 fail:
787 	uncore_type_exit(type);
788 	return -ENOMEM;
789 }
790 
uncore_types_init(struct intel_uncore_type ** types)791 static int __init uncore_types_init(struct intel_uncore_type **types)
792 {
793 	int i, ret;
794 
795 	for (i = 0; types[i]; i++) {
796 		ret = uncore_type_init(types[i]);
797 		if (ret)
798 			goto fail;
799 	}
800 	return 0;
801 fail:
802 	while (--i >= 0)
803 		uncore_type_exit(types[i]);
804 	return ret;
805 }
806 
807 /*
808  * add a pci uncore device
809  */
uncore_pci_probe(struct pci_dev * pdev,const struct pci_device_id * id)810 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
811 {
812 	struct intel_uncore_pmu *pmu;
813 	struct intel_uncore_box *box;
814 	struct intel_uncore_type *type;
815 	int phys_id;
816 	bool first_box = false;
817 
818 	phys_id = uncore_pcibus_to_physid[pdev->bus->number];
819 	if (phys_id < 0)
820 		return -ENODEV;
821 
822 	if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
823 		int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
824 		uncore_extra_pci_dev[phys_id][idx] = pdev;
825 		pci_set_drvdata(pdev, NULL);
826 		return 0;
827 	}
828 
829 	type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
830 	box = uncore_alloc_box(type, NUMA_NO_NODE);
831 	if (!box)
832 		return -ENOMEM;
833 
834 	/*
835 	 * for performance monitoring unit with multiple boxes,
836 	 * each box has a different function id.
837 	 */
838 	pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
839 	if (pmu->func_id < 0)
840 		pmu->func_id = pdev->devfn;
841 	else
842 		WARN_ON_ONCE(pmu->func_id != pdev->devfn);
843 
844 	box->phys_id = phys_id;
845 	box->pci_dev = pdev;
846 	box->pmu = pmu;
847 	uncore_box_init(box);
848 	pci_set_drvdata(pdev, box);
849 
850 	raw_spin_lock(&uncore_box_lock);
851 	if (list_empty(&pmu->box_list))
852 		first_box = true;
853 	list_add_tail(&box->list, &pmu->box_list);
854 	raw_spin_unlock(&uncore_box_lock);
855 
856 	if (first_box)
857 		uncore_pmu_register(pmu);
858 	return 0;
859 }
860 
uncore_pci_remove(struct pci_dev * pdev)861 static void uncore_pci_remove(struct pci_dev *pdev)
862 {
863 	struct intel_uncore_box *box = pci_get_drvdata(pdev);
864 	struct intel_uncore_pmu *pmu;
865 	int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number];
866 	bool last_box = false;
867 
868 	box = pci_get_drvdata(pdev);
869 	if (!box) {
870 		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
871 			if (uncore_extra_pci_dev[phys_id][i] == pdev) {
872 				uncore_extra_pci_dev[phys_id][i] = NULL;
873 				break;
874 			}
875 		}
876 		WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
877 		return;
878 	}
879 
880 	pmu = box->pmu;
881 	if (WARN_ON_ONCE(phys_id != box->phys_id))
882 		return;
883 
884 	pci_set_drvdata(pdev, NULL);
885 
886 	raw_spin_lock(&uncore_box_lock);
887 	list_del(&box->list);
888 	if (list_empty(&pmu->box_list))
889 		last_box = true;
890 	raw_spin_unlock(&uncore_box_lock);
891 
892 	for_each_possible_cpu(cpu) {
893 		if (*per_cpu_ptr(pmu->box, cpu) == box) {
894 			*per_cpu_ptr(pmu->box, cpu) = NULL;
895 			atomic_dec(&box->refcnt);
896 		}
897 	}
898 
899 	WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
900 	kfree(box);
901 
902 	if (last_box)
903 		perf_pmu_unregister(&pmu->pmu);
904 }
905 
uncore_pci_init(void)906 static int __init uncore_pci_init(void)
907 {
908 	int ret;
909 
910 	switch (boot_cpu_data.x86_model) {
911 	case 45: /* Sandy Bridge-EP */
912 		ret = snbep_uncore_pci_init();
913 		break;
914 	case 62: /* Ivy Bridge-EP */
915 		ret = ivbep_uncore_pci_init();
916 		break;
917 	case 63: /* Haswell-EP */
918 		ret = hswep_uncore_pci_init();
919 		break;
920 	case 42: /* Sandy Bridge */
921 		ret = snb_uncore_pci_init();
922 		break;
923 	case 58: /* Ivy Bridge */
924 		ret = ivb_uncore_pci_init();
925 		break;
926 	case 60: /* Haswell */
927 	case 69: /* Haswell Celeron */
928 		ret = hsw_uncore_pci_init();
929 		break;
930 	default:
931 		return 0;
932 	}
933 
934 	if (ret)
935 		return ret;
936 
937 	ret = uncore_types_init(uncore_pci_uncores);
938 	if (ret)
939 		return ret;
940 
941 	uncore_pci_driver->probe = uncore_pci_probe;
942 	uncore_pci_driver->remove = uncore_pci_remove;
943 
944 	ret = pci_register_driver(uncore_pci_driver);
945 	if (ret == 0)
946 		pcidrv_registered = true;
947 	else
948 		uncore_types_exit(uncore_pci_uncores);
949 
950 	return ret;
951 }
952 
uncore_pci_exit(void)953 static void __init uncore_pci_exit(void)
954 {
955 	if (pcidrv_registered) {
956 		pcidrv_registered = false;
957 		pci_unregister_driver(uncore_pci_driver);
958 		uncore_types_exit(uncore_pci_uncores);
959 	}
960 }
961 
962 /* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
963 static LIST_HEAD(boxes_to_free);
964 
uncore_kfree_boxes(void)965 static void uncore_kfree_boxes(void)
966 {
967 	struct intel_uncore_box *box;
968 
969 	while (!list_empty(&boxes_to_free)) {
970 		box = list_entry(boxes_to_free.next,
971 				 struct intel_uncore_box, list);
972 		list_del(&box->list);
973 		kfree(box);
974 	}
975 }
976 
uncore_cpu_dying(int cpu)977 static void uncore_cpu_dying(int cpu)
978 {
979 	struct intel_uncore_type *type;
980 	struct intel_uncore_pmu *pmu;
981 	struct intel_uncore_box *box;
982 	int i, j;
983 
984 	for (i = 0; uncore_msr_uncores[i]; i++) {
985 		type = uncore_msr_uncores[i];
986 		for (j = 0; j < type->num_boxes; j++) {
987 			pmu = &type->pmus[j];
988 			box = *per_cpu_ptr(pmu->box, cpu);
989 			*per_cpu_ptr(pmu->box, cpu) = NULL;
990 			if (box && atomic_dec_and_test(&box->refcnt))
991 				list_add(&box->list, &boxes_to_free);
992 		}
993 	}
994 }
995 
uncore_cpu_starting(int cpu)996 static int uncore_cpu_starting(int cpu)
997 {
998 	struct intel_uncore_type *type;
999 	struct intel_uncore_pmu *pmu;
1000 	struct intel_uncore_box *box, *exist;
1001 	int i, j, k, phys_id;
1002 
1003 	phys_id = topology_physical_package_id(cpu);
1004 
1005 	for (i = 0; uncore_msr_uncores[i]; i++) {
1006 		type = uncore_msr_uncores[i];
1007 		for (j = 0; j < type->num_boxes; j++) {
1008 			pmu = &type->pmus[j];
1009 			box = *per_cpu_ptr(pmu->box, cpu);
1010 			/* called by uncore_cpu_init? */
1011 			if (box && box->phys_id >= 0) {
1012 				uncore_box_init(box);
1013 				continue;
1014 			}
1015 
1016 			for_each_online_cpu(k) {
1017 				exist = *per_cpu_ptr(pmu->box, k);
1018 				if (exist && exist->phys_id == phys_id) {
1019 					atomic_inc(&exist->refcnt);
1020 					*per_cpu_ptr(pmu->box, cpu) = exist;
1021 					if (box) {
1022 						list_add(&box->list,
1023 							 &boxes_to_free);
1024 						box = NULL;
1025 					}
1026 					break;
1027 				}
1028 			}
1029 
1030 			if (box) {
1031 				box->phys_id = phys_id;
1032 				uncore_box_init(box);
1033 			}
1034 		}
1035 	}
1036 	return 0;
1037 }
1038 
uncore_cpu_prepare(int cpu,int phys_id)1039 static int uncore_cpu_prepare(int cpu, int phys_id)
1040 {
1041 	struct intel_uncore_type *type;
1042 	struct intel_uncore_pmu *pmu;
1043 	struct intel_uncore_box *box;
1044 	int i, j;
1045 
1046 	for (i = 0; uncore_msr_uncores[i]; i++) {
1047 		type = uncore_msr_uncores[i];
1048 		for (j = 0; j < type->num_boxes; j++) {
1049 			pmu = &type->pmus[j];
1050 			if (pmu->func_id < 0)
1051 				pmu->func_id = j;
1052 
1053 			box = uncore_alloc_box(type, cpu_to_node(cpu));
1054 			if (!box)
1055 				return -ENOMEM;
1056 
1057 			box->pmu = pmu;
1058 			box->phys_id = phys_id;
1059 			*per_cpu_ptr(pmu->box, cpu) = box;
1060 		}
1061 	}
1062 	return 0;
1063 }
1064 
1065 static void
uncore_change_context(struct intel_uncore_type ** uncores,int old_cpu,int new_cpu)1066 uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
1067 {
1068 	struct intel_uncore_type *type;
1069 	struct intel_uncore_pmu *pmu;
1070 	struct intel_uncore_box *box;
1071 	int i, j;
1072 
1073 	for (i = 0; uncores[i]; i++) {
1074 		type = uncores[i];
1075 		for (j = 0; j < type->num_boxes; j++) {
1076 			pmu = &type->pmus[j];
1077 			if (old_cpu < 0)
1078 				box = uncore_pmu_to_box(pmu, new_cpu);
1079 			else
1080 				box = uncore_pmu_to_box(pmu, old_cpu);
1081 			if (!box)
1082 				continue;
1083 
1084 			if (old_cpu < 0) {
1085 				WARN_ON_ONCE(box->cpu != -1);
1086 				box->cpu = new_cpu;
1087 				continue;
1088 			}
1089 
1090 			WARN_ON_ONCE(box->cpu != old_cpu);
1091 			if (new_cpu >= 0) {
1092 				uncore_pmu_cancel_hrtimer(box);
1093 				perf_pmu_migrate_context(&pmu->pmu,
1094 						old_cpu, new_cpu);
1095 				box->cpu = new_cpu;
1096 			} else {
1097 				box->cpu = -1;
1098 			}
1099 		}
1100 	}
1101 }
1102 
uncore_event_exit_cpu(int cpu)1103 static void uncore_event_exit_cpu(int cpu)
1104 {
1105 	int i, phys_id, target;
1106 
1107 	/* if exiting cpu is used for collecting uncore events */
1108 	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1109 		return;
1110 
1111 	/* find a new cpu to collect uncore events */
1112 	phys_id = topology_physical_package_id(cpu);
1113 	target = -1;
1114 	for_each_online_cpu(i) {
1115 		if (i == cpu)
1116 			continue;
1117 		if (phys_id == topology_physical_package_id(i)) {
1118 			target = i;
1119 			break;
1120 		}
1121 	}
1122 
1123 	/* migrate uncore events to the new cpu */
1124 	if (target >= 0)
1125 		cpumask_set_cpu(target, &uncore_cpu_mask);
1126 
1127 	uncore_change_context(uncore_msr_uncores, cpu, target);
1128 	uncore_change_context(uncore_pci_uncores, cpu, target);
1129 }
1130 
uncore_event_init_cpu(int cpu)1131 static void uncore_event_init_cpu(int cpu)
1132 {
1133 	int i, phys_id;
1134 
1135 	phys_id = topology_physical_package_id(cpu);
1136 	for_each_cpu(i, &uncore_cpu_mask) {
1137 		if (phys_id == topology_physical_package_id(i))
1138 			return;
1139 	}
1140 
1141 	cpumask_set_cpu(cpu, &uncore_cpu_mask);
1142 
1143 	uncore_change_context(uncore_msr_uncores, -1, cpu);
1144 	uncore_change_context(uncore_pci_uncores, -1, cpu);
1145 }
1146 
uncore_cpu_notifier(struct notifier_block * self,unsigned long action,void * hcpu)1147 static int uncore_cpu_notifier(struct notifier_block *self,
1148 			       unsigned long action, void *hcpu)
1149 {
1150 	unsigned int cpu = (long)hcpu;
1151 
1152 	/* allocate/free data structure for uncore box */
1153 	switch (action & ~CPU_TASKS_FROZEN) {
1154 	case CPU_UP_PREPARE:
1155 		uncore_cpu_prepare(cpu, -1);
1156 		break;
1157 	case CPU_STARTING:
1158 		uncore_cpu_starting(cpu);
1159 		break;
1160 	case CPU_UP_CANCELED:
1161 	case CPU_DYING:
1162 		uncore_cpu_dying(cpu);
1163 		break;
1164 	case CPU_ONLINE:
1165 	case CPU_DEAD:
1166 		uncore_kfree_boxes();
1167 		break;
1168 	default:
1169 		break;
1170 	}
1171 
1172 	/* select the cpu that collects uncore events */
1173 	switch (action & ~CPU_TASKS_FROZEN) {
1174 	case CPU_DOWN_FAILED:
1175 	case CPU_STARTING:
1176 		uncore_event_init_cpu(cpu);
1177 		break;
1178 	case CPU_DOWN_PREPARE:
1179 		uncore_event_exit_cpu(cpu);
1180 		break;
1181 	default:
1182 		break;
1183 	}
1184 
1185 	return NOTIFY_OK;
1186 }
1187 
1188 static struct notifier_block uncore_cpu_nb = {
1189 	.notifier_call	= uncore_cpu_notifier,
1190 	/*
1191 	 * to migrate uncore events, our notifier should be executed
1192 	 * before perf core's notifier.
1193 	 */
1194 	.priority	= CPU_PRI_PERF + 1,
1195 };
1196 
uncore_cpu_setup(void * dummy)1197 static void __init uncore_cpu_setup(void *dummy)
1198 {
1199 	uncore_cpu_starting(smp_processor_id());
1200 }
1201 
uncore_cpu_init(void)1202 static int __init uncore_cpu_init(void)
1203 {
1204 	int ret;
1205 
1206 	switch (boot_cpu_data.x86_model) {
1207 	case 26: /* Nehalem */
1208 	case 30:
1209 	case 37: /* Westmere */
1210 	case 44:
1211 		nhm_uncore_cpu_init();
1212 		break;
1213 	case 42: /* Sandy Bridge */
1214 	case 58: /* Ivy Bridge */
1215 		snb_uncore_cpu_init();
1216 		break;
1217 	case 45: /* Sandy Bridge-EP */
1218 		snbep_uncore_cpu_init();
1219 		break;
1220 	case 46: /* Nehalem-EX */
1221 	case 47: /* Westmere-EX aka. Xeon E7 */
1222 		nhmex_uncore_cpu_init();
1223 		break;
1224 	case 62: /* Ivy Bridge-EP */
1225 		ivbep_uncore_cpu_init();
1226 		break;
1227 	case 63: /* Haswell-EP */
1228 		hswep_uncore_cpu_init();
1229 		break;
1230 	default:
1231 		return 0;
1232 	}
1233 
1234 	ret = uncore_types_init(uncore_msr_uncores);
1235 	if (ret)
1236 		return ret;
1237 
1238 	return 0;
1239 }
1240 
uncore_pmus_register(void)1241 static int __init uncore_pmus_register(void)
1242 {
1243 	struct intel_uncore_pmu *pmu;
1244 	struct intel_uncore_type *type;
1245 	int i, j;
1246 
1247 	for (i = 0; uncore_msr_uncores[i]; i++) {
1248 		type = uncore_msr_uncores[i];
1249 		for (j = 0; j < type->num_boxes; j++) {
1250 			pmu = &type->pmus[j];
1251 			uncore_pmu_register(pmu);
1252 		}
1253 	}
1254 
1255 	return 0;
1256 }
1257 
uncore_cpumask_init(void)1258 static void __init uncore_cpumask_init(void)
1259 {
1260 	int cpu;
1261 
1262 	/*
1263 	 * ony invoke once from msr or pci init code
1264 	 */
1265 	if (!cpumask_empty(&uncore_cpu_mask))
1266 		return;
1267 
1268 	cpu_notifier_register_begin();
1269 
1270 	for_each_online_cpu(cpu) {
1271 		int i, phys_id = topology_physical_package_id(cpu);
1272 
1273 		for_each_cpu(i, &uncore_cpu_mask) {
1274 			if (phys_id == topology_physical_package_id(i)) {
1275 				phys_id = -1;
1276 				break;
1277 			}
1278 		}
1279 		if (phys_id < 0)
1280 			continue;
1281 
1282 		uncore_cpu_prepare(cpu, phys_id);
1283 		uncore_event_init_cpu(cpu);
1284 	}
1285 	on_each_cpu(uncore_cpu_setup, NULL, 1);
1286 
1287 	__register_cpu_notifier(&uncore_cpu_nb);
1288 
1289 	cpu_notifier_register_done();
1290 }
1291 
1292 
intel_uncore_init(void)1293 static int __init intel_uncore_init(void)
1294 {
1295 	int ret;
1296 
1297 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
1298 		return -ENODEV;
1299 
1300 	if (cpu_has_hypervisor)
1301 		return -ENODEV;
1302 
1303 	ret = uncore_pci_init();
1304 	if (ret)
1305 		goto fail;
1306 	ret = uncore_cpu_init();
1307 	if (ret) {
1308 		uncore_pci_exit();
1309 		goto fail;
1310 	}
1311 	uncore_cpumask_init();
1312 
1313 	uncore_pmus_register();
1314 	return 0;
1315 fail:
1316 	return ret;
1317 }
1318 device_initcall(intel_uncore_init);
1319