• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "perf_event_intel_uncore.h"
2 
3 static struct intel_uncore_type *empty_uncore[] = { NULL, };
4 struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
5 struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
6 
7 static bool pcidrv_registered;
8 struct pci_driver *uncore_pci_driver;
9 /* pci bus to socket mapping */
10 DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
11 struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
12 struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
13 
14 static DEFINE_RAW_SPINLOCK(uncore_box_lock);
15 /* mask of cpus that collect uncore events */
16 static cpumask_t uncore_cpu_mask;
17 
18 /* constraint for the fixed counter */
19 static struct event_constraint uncore_constraint_fixed =
20 	EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
21 struct event_constraint uncore_constraint_empty =
22 	EVENT_CONSTRAINT(0, 0, 0);
23 
uncore_pcibus_to_physid(struct pci_bus * bus)24 int uncore_pcibus_to_physid(struct pci_bus *bus)
25 {
26 	struct pci2phy_map *map;
27 	int phys_id = -1;
28 
29 	raw_spin_lock(&pci2phy_map_lock);
30 	list_for_each_entry(map, &pci2phy_map_head, list) {
31 		if (map->segment == pci_domain_nr(bus)) {
32 			phys_id = map->pbus_to_physid[bus->number];
33 			break;
34 		}
35 	}
36 	raw_spin_unlock(&pci2phy_map_lock);
37 
38 	return phys_id;
39 }
40 
__find_pci2phy_map(int segment)41 struct pci2phy_map *__find_pci2phy_map(int segment)
42 {
43 	struct pci2phy_map *map, *alloc = NULL;
44 	int i;
45 
46 	lockdep_assert_held(&pci2phy_map_lock);
47 
48 lookup:
49 	list_for_each_entry(map, &pci2phy_map_head, list) {
50 		if (map->segment == segment)
51 			goto end;
52 	}
53 
54 	if (!alloc) {
55 		raw_spin_unlock(&pci2phy_map_lock);
56 		alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
57 		raw_spin_lock(&pci2phy_map_lock);
58 
59 		if (!alloc)
60 			return NULL;
61 
62 		goto lookup;
63 	}
64 
65 	map = alloc;
66 	alloc = NULL;
67 	map->segment = segment;
68 	for (i = 0; i < 256; i++)
69 		map->pbus_to_physid[i] = -1;
70 	list_add_tail(&map->list, &pci2phy_map_head);
71 
72 end:
73 	kfree(alloc);
74 	return map;
75 }
76 
uncore_event_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)77 ssize_t uncore_event_show(struct kobject *kobj,
78 			  struct kobj_attribute *attr, char *buf)
79 {
80 	struct uncore_event_desc *event =
81 		container_of(attr, struct uncore_event_desc, attr);
82 	return sprintf(buf, "%s", event->config);
83 }
84 
uncore_event_to_pmu(struct perf_event * event)85 struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
86 {
87 	return container_of(event->pmu, struct intel_uncore_pmu, pmu);
88 }
89 
uncore_pmu_to_box(struct intel_uncore_pmu * pmu,int cpu)90 struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
91 {
92 	struct intel_uncore_box *box;
93 
94 	box = *per_cpu_ptr(pmu->box, cpu);
95 	if (box)
96 		return box;
97 
98 	raw_spin_lock(&uncore_box_lock);
99 	/* Recheck in lock to handle races. */
100 	if (*per_cpu_ptr(pmu->box, cpu))
101 		goto out;
102 	list_for_each_entry(box, &pmu->box_list, list) {
103 		if (box->phys_id == topology_physical_package_id(cpu)) {
104 			atomic_inc(&box->refcnt);
105 			*per_cpu_ptr(pmu->box, cpu) = box;
106 			break;
107 		}
108 	}
109 out:
110 	raw_spin_unlock(&uncore_box_lock);
111 
112 	return *per_cpu_ptr(pmu->box, cpu);
113 }
114 
uncore_event_to_box(struct perf_event * event)115 struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
116 {
117 	/*
118 	 * perf core schedules event on the basis of cpu, uncore events are
119 	 * collected by one of the cpus inside a physical package.
120 	 */
121 	return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
122 }
123 
uncore_msr_read_counter(struct intel_uncore_box * box,struct perf_event * event)124 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
125 {
126 	u64 count;
127 
128 	rdmsrl(event->hw.event_base, count);
129 
130 	return count;
131 }
132 
133 /*
134  * generic get constraint function for shared match/mask registers.
135  */
136 struct event_constraint *
uncore_get_constraint(struct intel_uncore_box * box,struct perf_event * event)137 uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
138 {
139 	struct intel_uncore_extra_reg *er;
140 	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
141 	struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
142 	unsigned long flags;
143 	bool ok = false;
144 
145 	/*
146 	 * reg->alloc can be set due to existing state, so for fake box we
147 	 * need to ignore this, otherwise we might fail to allocate proper
148 	 * fake state for this extra reg constraint.
149 	 */
150 	if (reg1->idx == EXTRA_REG_NONE ||
151 	    (!uncore_box_is_fake(box) && reg1->alloc))
152 		return NULL;
153 
154 	er = &box->shared_regs[reg1->idx];
155 	raw_spin_lock_irqsave(&er->lock, flags);
156 	if (!atomic_read(&er->ref) ||
157 	    (er->config1 == reg1->config && er->config2 == reg2->config)) {
158 		atomic_inc(&er->ref);
159 		er->config1 = reg1->config;
160 		er->config2 = reg2->config;
161 		ok = true;
162 	}
163 	raw_spin_unlock_irqrestore(&er->lock, flags);
164 
165 	if (ok) {
166 		if (!uncore_box_is_fake(box))
167 			reg1->alloc = 1;
168 		return NULL;
169 	}
170 
171 	return &uncore_constraint_empty;
172 }
173 
uncore_put_constraint(struct intel_uncore_box * box,struct perf_event * event)174 void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
175 {
176 	struct intel_uncore_extra_reg *er;
177 	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
178 
179 	/*
180 	 * Only put constraint if extra reg was actually allocated. Also
181 	 * takes care of event which do not use an extra shared reg.
182 	 *
183 	 * Also, if this is a fake box we shouldn't touch any event state
184 	 * (reg->alloc) and we don't care about leaving inconsistent box
185 	 * state either since it will be thrown out.
186 	 */
187 	if (uncore_box_is_fake(box) || !reg1->alloc)
188 		return;
189 
190 	er = &box->shared_regs[reg1->idx];
191 	atomic_dec(&er->ref);
192 	reg1->alloc = 0;
193 }
194 
uncore_shared_reg_config(struct intel_uncore_box * box,int idx)195 u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
196 {
197 	struct intel_uncore_extra_reg *er;
198 	unsigned long flags;
199 	u64 config;
200 
201 	er = &box->shared_regs[idx];
202 
203 	raw_spin_lock_irqsave(&er->lock, flags);
204 	config = er->config;
205 	raw_spin_unlock_irqrestore(&er->lock, flags);
206 
207 	return config;
208 }
209 
uncore_assign_hw_event(struct intel_uncore_box * box,struct perf_event * event,int idx)210 static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
211 {
212 	struct hw_perf_event *hwc = &event->hw;
213 
214 	hwc->idx = idx;
215 	hwc->last_tag = ++box->tags[idx];
216 
217 	if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
218 		hwc->event_base = uncore_fixed_ctr(box);
219 		hwc->config_base = uncore_fixed_ctl(box);
220 		return;
221 	}
222 
223 	hwc->config_base = uncore_event_ctl(box, hwc->idx);
224 	hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
225 }
226 
uncore_perf_event_update(struct intel_uncore_box * box,struct perf_event * event)227 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
228 {
229 	u64 prev_count, new_count, delta;
230 	int shift;
231 
232 	if (event->hw.idx == UNCORE_PMC_IDX_FIXED)
233 		shift = 64 - uncore_fixed_ctr_bits(box);
234 	else
235 		shift = 64 - uncore_perf_ctr_bits(box);
236 
237 	/* the hrtimer might modify the previous event value */
238 again:
239 	prev_count = local64_read(&event->hw.prev_count);
240 	new_count = uncore_read_counter(box, event);
241 	if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
242 		goto again;
243 
244 	delta = (new_count << shift) - (prev_count << shift);
245 	delta >>= shift;
246 
247 	local64_add(delta, &event->count);
248 }
249 
250 /*
251  * The overflow interrupt is unavailable for SandyBridge-EP, is broken
252  * for SandyBridge. So we use hrtimer to periodically poll the counter
253  * to avoid overflow.
254  */
uncore_pmu_hrtimer(struct hrtimer * hrtimer)255 static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
256 {
257 	struct intel_uncore_box *box;
258 	struct perf_event *event;
259 	unsigned long flags;
260 	int bit;
261 
262 	box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
263 	if (!box->n_active || box->cpu != smp_processor_id())
264 		return HRTIMER_NORESTART;
265 	/*
266 	 * disable local interrupt to prevent uncore_pmu_event_start/stop
267 	 * to interrupt the update process
268 	 */
269 	local_irq_save(flags);
270 
271 	/*
272 	 * handle boxes with an active event list as opposed to active
273 	 * counters
274 	 */
275 	list_for_each_entry(event, &box->active_list, active_entry) {
276 		uncore_perf_event_update(box, event);
277 	}
278 
279 	for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
280 		uncore_perf_event_update(box, box->events[bit]);
281 
282 	local_irq_restore(flags);
283 
284 	hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
285 	return HRTIMER_RESTART;
286 }
287 
uncore_pmu_start_hrtimer(struct intel_uncore_box * box)288 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
289 {
290 	hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
291 		      HRTIMER_MODE_REL_PINNED);
292 }
293 
uncore_pmu_cancel_hrtimer(struct intel_uncore_box * box)294 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
295 {
296 	hrtimer_cancel(&box->hrtimer);
297 }
298 
uncore_pmu_init_hrtimer(struct intel_uncore_box * box)299 static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
300 {
301 	hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
302 	box->hrtimer.function = uncore_pmu_hrtimer;
303 }
304 
uncore_alloc_box(struct intel_uncore_type * type,int node)305 static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
306 {
307 	struct intel_uncore_box *box;
308 	int i, size;
309 
310 	size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
311 
312 	box = kzalloc_node(size, GFP_KERNEL, node);
313 	if (!box)
314 		return NULL;
315 
316 	for (i = 0; i < type->num_shared_regs; i++)
317 		raw_spin_lock_init(&box->shared_regs[i].lock);
318 
319 	uncore_pmu_init_hrtimer(box);
320 	atomic_set(&box->refcnt, 1);
321 	box->cpu = -1;
322 	box->phys_id = -1;
323 
324 	/* set default hrtimer timeout */
325 	box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
326 
327 	INIT_LIST_HEAD(&box->active_list);
328 
329 	return box;
330 }
331 
332 /*
333  * Using uncore_pmu_event_init pmu event_init callback
334  * as a detection point for uncore events.
335  */
336 static int uncore_pmu_event_init(struct perf_event *event);
337 
is_uncore_event(struct perf_event * event)338 static bool is_uncore_event(struct perf_event *event)
339 {
340 	return event->pmu->event_init == uncore_pmu_event_init;
341 }
342 
343 static int
uncore_collect_events(struct intel_uncore_box * box,struct perf_event * leader,bool dogrp)344 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
345 {
346 	struct perf_event *event;
347 	int n, max_count;
348 
349 	max_count = box->pmu->type->num_counters;
350 	if (box->pmu->type->fixed_ctl)
351 		max_count++;
352 
353 	if (box->n_events >= max_count)
354 		return -EINVAL;
355 
356 	n = box->n_events;
357 
358 	if (is_uncore_event(leader)) {
359 		box->event_list[n] = leader;
360 		n++;
361 	}
362 
363 	if (!dogrp)
364 		return n;
365 
366 	list_for_each_entry(event, &leader->sibling_list, group_entry) {
367 		if (!is_uncore_event(event) ||
368 		    event->state <= PERF_EVENT_STATE_OFF)
369 			continue;
370 
371 		if (n >= max_count)
372 			return -EINVAL;
373 
374 		box->event_list[n] = event;
375 		n++;
376 	}
377 	return n;
378 }
379 
380 static struct event_constraint *
uncore_get_event_constraint(struct intel_uncore_box * box,struct perf_event * event)381 uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
382 {
383 	struct intel_uncore_type *type = box->pmu->type;
384 	struct event_constraint *c;
385 
386 	if (type->ops->get_constraint) {
387 		c = type->ops->get_constraint(box, event);
388 		if (c)
389 			return c;
390 	}
391 
392 	if (event->attr.config == UNCORE_FIXED_EVENT)
393 		return &uncore_constraint_fixed;
394 
395 	if (type->constraints) {
396 		for_each_event_constraint(c, type->constraints) {
397 			if ((event->hw.config & c->cmask) == c->code)
398 				return c;
399 		}
400 	}
401 
402 	return &type->unconstrainted;
403 }
404 
uncore_put_event_constraint(struct intel_uncore_box * box,struct perf_event * event)405 static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
406 {
407 	if (box->pmu->type->ops->put_constraint)
408 		box->pmu->type->ops->put_constraint(box, event);
409 }
410 
uncore_assign_events(struct intel_uncore_box * box,int assign[],int n)411 static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
412 {
413 	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
414 	struct event_constraint *c;
415 	int i, wmin, wmax, ret = 0;
416 	struct hw_perf_event *hwc;
417 
418 	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
419 
420 	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
421 		c = uncore_get_event_constraint(box, box->event_list[i]);
422 		box->event_constraint[i] = c;
423 		wmin = min(wmin, c->weight);
424 		wmax = max(wmax, c->weight);
425 	}
426 
427 	/* fastpath, try to reuse previous register */
428 	for (i = 0; i < n; i++) {
429 		hwc = &box->event_list[i]->hw;
430 		c = box->event_constraint[i];
431 
432 		/* never assigned */
433 		if (hwc->idx == -1)
434 			break;
435 
436 		/* constraint still honored */
437 		if (!test_bit(hwc->idx, c->idxmsk))
438 			break;
439 
440 		/* not already used */
441 		if (test_bit(hwc->idx, used_mask))
442 			break;
443 
444 		__set_bit(hwc->idx, used_mask);
445 		if (assign)
446 			assign[i] = hwc->idx;
447 	}
448 	/* slow path */
449 	if (i != n)
450 		ret = perf_assign_events(box->event_constraint, n,
451 					 wmin, wmax, n, assign);
452 
453 	if (!assign || ret) {
454 		for (i = 0; i < n; i++)
455 			uncore_put_event_constraint(box, box->event_list[i]);
456 	}
457 	return ret ? -EINVAL : 0;
458 }
459 
uncore_pmu_event_start(struct perf_event * event,int flags)460 static void uncore_pmu_event_start(struct perf_event *event, int flags)
461 {
462 	struct intel_uncore_box *box = uncore_event_to_box(event);
463 	int idx = event->hw.idx;
464 
465 	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
466 		return;
467 
468 	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
469 		return;
470 
471 	event->hw.state = 0;
472 	box->events[idx] = event;
473 	box->n_active++;
474 	__set_bit(idx, box->active_mask);
475 
476 	local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
477 	uncore_enable_event(box, event);
478 
479 	if (box->n_active == 1) {
480 		uncore_enable_box(box);
481 		uncore_pmu_start_hrtimer(box);
482 	}
483 }
484 
uncore_pmu_event_stop(struct perf_event * event,int flags)485 static void uncore_pmu_event_stop(struct perf_event *event, int flags)
486 {
487 	struct intel_uncore_box *box = uncore_event_to_box(event);
488 	struct hw_perf_event *hwc = &event->hw;
489 
490 	if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
491 		uncore_disable_event(box, event);
492 		box->n_active--;
493 		box->events[hwc->idx] = NULL;
494 		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
495 		hwc->state |= PERF_HES_STOPPED;
496 
497 		if (box->n_active == 0) {
498 			uncore_disable_box(box);
499 			uncore_pmu_cancel_hrtimer(box);
500 		}
501 	}
502 
503 	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
504 		/*
505 		 * Drain the remaining delta count out of a event
506 		 * that we are disabling:
507 		 */
508 		uncore_perf_event_update(box, event);
509 		hwc->state |= PERF_HES_UPTODATE;
510 	}
511 }
512 
uncore_pmu_event_add(struct perf_event * event,int flags)513 static int uncore_pmu_event_add(struct perf_event *event, int flags)
514 {
515 	struct intel_uncore_box *box = uncore_event_to_box(event);
516 	struct hw_perf_event *hwc = &event->hw;
517 	int assign[UNCORE_PMC_IDX_MAX];
518 	int i, n, ret;
519 
520 	if (!box)
521 		return -ENODEV;
522 
523 	ret = n = uncore_collect_events(box, event, false);
524 	if (ret < 0)
525 		return ret;
526 
527 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
528 	if (!(flags & PERF_EF_START))
529 		hwc->state |= PERF_HES_ARCH;
530 
531 	ret = uncore_assign_events(box, assign, n);
532 	if (ret)
533 		return ret;
534 
535 	/* save events moving to new counters */
536 	for (i = 0; i < box->n_events; i++) {
537 		event = box->event_list[i];
538 		hwc = &event->hw;
539 
540 		if (hwc->idx == assign[i] &&
541 			hwc->last_tag == box->tags[assign[i]])
542 			continue;
543 		/*
544 		 * Ensure we don't accidentally enable a stopped
545 		 * counter simply because we rescheduled.
546 		 */
547 		if (hwc->state & PERF_HES_STOPPED)
548 			hwc->state |= PERF_HES_ARCH;
549 
550 		uncore_pmu_event_stop(event, PERF_EF_UPDATE);
551 	}
552 
553 	/* reprogram moved events into new counters */
554 	for (i = 0; i < n; i++) {
555 		event = box->event_list[i];
556 		hwc = &event->hw;
557 
558 		if (hwc->idx != assign[i] ||
559 			hwc->last_tag != box->tags[assign[i]])
560 			uncore_assign_hw_event(box, event, assign[i]);
561 		else if (i < box->n_events)
562 			continue;
563 
564 		if (hwc->state & PERF_HES_ARCH)
565 			continue;
566 
567 		uncore_pmu_event_start(event, 0);
568 	}
569 	box->n_events = n;
570 
571 	return 0;
572 }
573 
uncore_pmu_event_del(struct perf_event * event,int flags)574 static void uncore_pmu_event_del(struct perf_event *event, int flags)
575 {
576 	struct intel_uncore_box *box = uncore_event_to_box(event);
577 	int i;
578 
579 	uncore_pmu_event_stop(event, PERF_EF_UPDATE);
580 
581 	for (i = 0; i < box->n_events; i++) {
582 		if (event == box->event_list[i]) {
583 			uncore_put_event_constraint(box, event);
584 
585 			while (++i < box->n_events)
586 				box->event_list[i - 1] = box->event_list[i];
587 
588 			--box->n_events;
589 			break;
590 		}
591 	}
592 
593 	event->hw.idx = -1;
594 	event->hw.last_tag = ~0ULL;
595 }
596 
uncore_pmu_event_read(struct perf_event * event)597 void uncore_pmu_event_read(struct perf_event *event)
598 {
599 	struct intel_uncore_box *box = uncore_event_to_box(event);
600 	uncore_perf_event_update(box, event);
601 }
602 
603 /*
604  * validation ensures the group can be loaded onto the
605  * PMU if it was the only group available.
606  */
uncore_validate_group(struct intel_uncore_pmu * pmu,struct perf_event * event)607 static int uncore_validate_group(struct intel_uncore_pmu *pmu,
608 				struct perf_event *event)
609 {
610 	struct perf_event *leader = event->group_leader;
611 	struct intel_uncore_box *fake_box;
612 	int ret = -EINVAL, n;
613 
614 	fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
615 	if (!fake_box)
616 		return -ENOMEM;
617 
618 	fake_box->pmu = pmu;
619 	/*
620 	 * the event is not yet connected with its
621 	 * siblings therefore we must first collect
622 	 * existing siblings, then add the new event
623 	 * before we can simulate the scheduling
624 	 */
625 	n = uncore_collect_events(fake_box, leader, true);
626 	if (n < 0)
627 		goto out;
628 
629 	fake_box->n_events = n;
630 	n = uncore_collect_events(fake_box, event, false);
631 	if (n < 0)
632 		goto out;
633 
634 	fake_box->n_events = n;
635 
636 	ret = uncore_assign_events(fake_box, NULL, n);
637 out:
638 	kfree(fake_box);
639 	return ret;
640 }
641 
uncore_pmu_event_init(struct perf_event * event)642 static int uncore_pmu_event_init(struct perf_event *event)
643 {
644 	struct intel_uncore_pmu *pmu;
645 	struct intel_uncore_box *box;
646 	struct hw_perf_event *hwc = &event->hw;
647 	int ret;
648 
649 	if (event->attr.type != event->pmu->type)
650 		return -ENOENT;
651 
652 	pmu = uncore_event_to_pmu(event);
653 	/* no device found for this pmu */
654 	if (pmu->func_id < 0)
655 		return -ENOENT;
656 
657 	/*
658 	 * Uncore PMU does measure at all privilege level all the time.
659 	 * So it doesn't make sense to specify any exclude bits.
660 	 */
661 	if (event->attr.exclude_user || event->attr.exclude_kernel ||
662 			event->attr.exclude_hv || event->attr.exclude_idle)
663 		return -EINVAL;
664 
665 	/* Sampling not supported yet */
666 	if (hwc->sample_period)
667 		return -EINVAL;
668 
669 	/*
670 	 * Place all uncore events for a particular physical package
671 	 * onto a single cpu
672 	 */
673 	if (event->cpu < 0)
674 		return -EINVAL;
675 	box = uncore_pmu_to_box(pmu, event->cpu);
676 	if (!box || box->cpu < 0)
677 		return -EINVAL;
678 	event->cpu = box->cpu;
679 
680 	event->hw.idx = -1;
681 	event->hw.last_tag = ~0ULL;
682 	event->hw.extra_reg.idx = EXTRA_REG_NONE;
683 	event->hw.branch_reg.idx = EXTRA_REG_NONE;
684 
685 	if (event->attr.config == UNCORE_FIXED_EVENT) {
686 		/* no fixed counter */
687 		if (!pmu->type->fixed_ctl)
688 			return -EINVAL;
689 		/*
690 		 * if there is only one fixed counter, only the first pmu
691 		 * can access the fixed counter
692 		 */
693 		if (pmu->type->single_fixed && pmu->pmu_idx > 0)
694 			return -EINVAL;
695 
696 		/* fixed counters have event field hardcoded to zero */
697 		hwc->config = 0ULL;
698 	} else {
699 		hwc->config = event->attr.config & pmu->type->event_mask;
700 		if (pmu->type->ops->hw_config) {
701 			ret = pmu->type->ops->hw_config(box, event);
702 			if (ret)
703 				return ret;
704 		}
705 	}
706 
707 	if (event->group_leader != event)
708 		ret = uncore_validate_group(pmu, event);
709 	else
710 		ret = 0;
711 
712 	return ret;
713 }
714 
uncore_get_attr_cpumask(struct device * dev,struct device_attribute * attr,char * buf)715 static ssize_t uncore_get_attr_cpumask(struct device *dev,
716 				struct device_attribute *attr, char *buf)
717 {
718 	return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
719 }
720 
721 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
722 
723 static struct attribute *uncore_pmu_attrs[] = {
724 	&dev_attr_cpumask.attr,
725 	NULL,
726 };
727 
728 static struct attribute_group uncore_pmu_attr_group = {
729 	.attrs = uncore_pmu_attrs,
730 };
731 
uncore_pmu_register(struct intel_uncore_pmu * pmu)732 static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
733 {
734 	int ret;
735 
736 	if (!pmu->type->pmu) {
737 		pmu->pmu = (struct pmu) {
738 			.attr_groups	= pmu->type->attr_groups,
739 			.task_ctx_nr	= perf_invalid_context,
740 			.event_init	= uncore_pmu_event_init,
741 			.add		= uncore_pmu_event_add,
742 			.del		= uncore_pmu_event_del,
743 			.start		= uncore_pmu_event_start,
744 			.stop		= uncore_pmu_event_stop,
745 			.read		= uncore_pmu_event_read,
746 		};
747 	} else {
748 		pmu->pmu = *pmu->type->pmu;
749 		pmu->pmu.attr_groups = pmu->type->attr_groups;
750 	}
751 
752 	if (pmu->type->num_boxes == 1) {
753 		if (strlen(pmu->type->name) > 0)
754 			sprintf(pmu->name, "uncore_%s", pmu->type->name);
755 		else
756 			sprintf(pmu->name, "uncore");
757 	} else {
758 		sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
759 			pmu->pmu_idx);
760 	}
761 
762 	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
763 	return ret;
764 }
765 
uncore_type_exit(struct intel_uncore_type * type)766 static void __init uncore_type_exit(struct intel_uncore_type *type)
767 {
768 	int i;
769 
770 	for (i = 0; i < type->num_boxes; i++)
771 		free_percpu(type->pmus[i].box);
772 	kfree(type->pmus);
773 	type->pmus = NULL;
774 	kfree(type->events_group);
775 	type->events_group = NULL;
776 }
777 
uncore_types_exit(struct intel_uncore_type ** types)778 static void __init uncore_types_exit(struct intel_uncore_type **types)
779 {
780 	int i;
781 	for (i = 0; types[i]; i++)
782 		uncore_type_exit(types[i]);
783 }
784 
uncore_type_init(struct intel_uncore_type * type)785 static int __init uncore_type_init(struct intel_uncore_type *type)
786 {
787 	struct intel_uncore_pmu *pmus;
788 	struct attribute_group *attr_group;
789 	struct attribute **attrs;
790 	int i, j;
791 
792 	pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
793 	if (!pmus)
794 		return -ENOMEM;
795 
796 	type->pmus = pmus;
797 
798 	type->unconstrainted = (struct event_constraint)
799 		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
800 				0, type->num_counters, 0, 0);
801 
802 	for (i = 0; i < type->num_boxes; i++) {
803 		pmus[i].func_id = -1;
804 		pmus[i].pmu_idx = i;
805 		pmus[i].type = type;
806 		INIT_LIST_HEAD(&pmus[i].box_list);
807 		pmus[i].box = alloc_percpu(struct intel_uncore_box *);
808 		if (!pmus[i].box)
809 			goto fail;
810 	}
811 
812 	if (type->event_descs) {
813 		i = 0;
814 		while (type->event_descs[i].attr.attr.name)
815 			i++;
816 
817 		attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
818 					sizeof(*attr_group), GFP_KERNEL);
819 		if (!attr_group)
820 			goto fail;
821 
822 		attrs = (struct attribute **)(attr_group + 1);
823 		attr_group->name = "events";
824 		attr_group->attrs = attrs;
825 
826 		for (j = 0; j < i; j++)
827 			attrs[j] = &type->event_descs[j].attr.attr;
828 
829 		type->events_group = attr_group;
830 	}
831 
832 	type->pmu_group = &uncore_pmu_attr_group;
833 	return 0;
834 fail:
835 	uncore_type_exit(type);
836 	return -ENOMEM;
837 }
838 
uncore_types_init(struct intel_uncore_type ** types)839 static int __init uncore_types_init(struct intel_uncore_type **types)
840 {
841 	int i, ret;
842 
843 	for (i = 0; types[i]; i++) {
844 		ret = uncore_type_init(types[i]);
845 		if (ret)
846 			goto fail;
847 	}
848 	return 0;
849 fail:
850 	while (--i >= 0)
851 		uncore_type_exit(types[i]);
852 	return ret;
853 }
854 
855 /*
856  * add a pci uncore device
857  */
uncore_pci_probe(struct pci_dev * pdev,const struct pci_device_id * id)858 static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
859 {
860 	struct intel_uncore_pmu *pmu;
861 	struct intel_uncore_box *box;
862 	struct intel_uncore_type *type;
863 	int phys_id;
864 	bool first_box = false;
865 
866 	phys_id = uncore_pcibus_to_physid(pdev->bus);
867 	if (phys_id < 0)
868 		return -ENODEV;
869 
870 	if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
871 		int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
872 		uncore_extra_pci_dev[phys_id][idx] = pdev;
873 		pci_set_drvdata(pdev, NULL);
874 		return 0;
875 	}
876 
877 	type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
878 	box = uncore_alloc_box(type, NUMA_NO_NODE);
879 	if (!box)
880 		return -ENOMEM;
881 
882 	/*
883 	 * for performance monitoring unit with multiple boxes,
884 	 * each box has a different function id.
885 	 */
886 	pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
887 	if (pmu->func_id < 0)
888 		pmu->func_id = pdev->devfn;
889 	else
890 		WARN_ON_ONCE(pmu->func_id != pdev->devfn);
891 
892 	box->phys_id = phys_id;
893 	box->pci_dev = pdev;
894 	box->pmu = pmu;
895 	uncore_box_init(box);
896 	pci_set_drvdata(pdev, box);
897 
898 	raw_spin_lock(&uncore_box_lock);
899 	if (list_empty(&pmu->box_list))
900 		first_box = true;
901 	list_add_tail(&box->list, &pmu->box_list);
902 	raw_spin_unlock(&uncore_box_lock);
903 
904 	if (first_box)
905 		uncore_pmu_register(pmu);
906 	return 0;
907 }
908 
uncore_pci_remove(struct pci_dev * pdev)909 static void uncore_pci_remove(struct pci_dev *pdev)
910 {
911 	struct intel_uncore_box *box = pci_get_drvdata(pdev);
912 	struct intel_uncore_pmu *pmu;
913 	int i, cpu, phys_id;
914 	bool last_box = false;
915 
916 	phys_id = uncore_pcibus_to_physid(pdev->bus);
917 	box = pci_get_drvdata(pdev);
918 	if (!box) {
919 		for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
920 			if (uncore_extra_pci_dev[phys_id][i] == pdev) {
921 				uncore_extra_pci_dev[phys_id][i] = NULL;
922 				break;
923 			}
924 		}
925 		WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
926 		return;
927 	}
928 
929 	pmu = box->pmu;
930 	if (WARN_ON_ONCE(phys_id != box->phys_id))
931 		return;
932 
933 	pci_set_drvdata(pdev, NULL);
934 
935 	raw_spin_lock(&uncore_box_lock);
936 	list_del(&box->list);
937 	if (list_empty(&pmu->box_list))
938 		last_box = true;
939 	raw_spin_unlock(&uncore_box_lock);
940 
941 	for_each_possible_cpu(cpu) {
942 		if (*per_cpu_ptr(pmu->box, cpu) == box) {
943 			*per_cpu_ptr(pmu->box, cpu) = NULL;
944 			atomic_dec(&box->refcnt);
945 		}
946 	}
947 
948 	WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
949 	kfree(box);
950 
951 	if (last_box)
952 		perf_pmu_unregister(&pmu->pmu);
953 }
954 
uncore_pci_init(void)955 static int __init uncore_pci_init(void)
956 {
957 	int ret;
958 
959 	switch (boot_cpu_data.x86_model) {
960 	case 45: /* Sandy Bridge-EP */
961 		ret = snbep_uncore_pci_init();
962 		break;
963 	case 62: /* Ivy Bridge-EP */
964 		ret = ivbep_uncore_pci_init();
965 		break;
966 	case 63: /* Haswell-EP */
967 		ret = hswep_uncore_pci_init();
968 		break;
969 	case 86: /* BDX-DE */
970 		ret = bdx_uncore_pci_init();
971 		break;
972 	case 42: /* Sandy Bridge */
973 		ret = snb_uncore_pci_init();
974 		break;
975 	case 58: /* Ivy Bridge */
976 		ret = ivb_uncore_pci_init();
977 		break;
978 	case 60: /* Haswell */
979 	case 69: /* Haswell Celeron */
980 		ret = hsw_uncore_pci_init();
981 		break;
982 	case 61: /* Broadwell */
983 		ret = bdw_uncore_pci_init();
984 		break;
985 	default:
986 		return 0;
987 	}
988 
989 	if (ret)
990 		return ret;
991 
992 	ret = uncore_types_init(uncore_pci_uncores);
993 	if (ret)
994 		return ret;
995 
996 	uncore_pci_driver->probe = uncore_pci_probe;
997 	uncore_pci_driver->remove = uncore_pci_remove;
998 
999 	ret = pci_register_driver(uncore_pci_driver);
1000 	if (ret == 0)
1001 		pcidrv_registered = true;
1002 	else
1003 		uncore_types_exit(uncore_pci_uncores);
1004 
1005 	return ret;
1006 }
1007 
uncore_pci_exit(void)1008 static void __init uncore_pci_exit(void)
1009 {
1010 	if (pcidrv_registered) {
1011 		pcidrv_registered = false;
1012 		pci_unregister_driver(uncore_pci_driver);
1013 		uncore_types_exit(uncore_pci_uncores);
1014 	}
1015 }
1016 
1017 /* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
1018 static LIST_HEAD(boxes_to_free);
1019 
uncore_kfree_boxes(void)1020 static void uncore_kfree_boxes(void)
1021 {
1022 	struct intel_uncore_box *box;
1023 
1024 	while (!list_empty(&boxes_to_free)) {
1025 		box = list_entry(boxes_to_free.next,
1026 				 struct intel_uncore_box, list);
1027 		list_del(&box->list);
1028 		kfree(box);
1029 	}
1030 }
1031 
uncore_cpu_dying(int cpu)1032 static void uncore_cpu_dying(int cpu)
1033 {
1034 	struct intel_uncore_type *type;
1035 	struct intel_uncore_pmu *pmu;
1036 	struct intel_uncore_box *box;
1037 	int i, j;
1038 
1039 	for (i = 0; uncore_msr_uncores[i]; i++) {
1040 		type = uncore_msr_uncores[i];
1041 		for (j = 0; j < type->num_boxes; j++) {
1042 			pmu = &type->pmus[j];
1043 			box = *per_cpu_ptr(pmu->box, cpu);
1044 			*per_cpu_ptr(pmu->box, cpu) = NULL;
1045 			if (box && atomic_dec_and_test(&box->refcnt))
1046 				list_add(&box->list, &boxes_to_free);
1047 		}
1048 	}
1049 }
1050 
uncore_cpu_starting(int cpu)1051 static int uncore_cpu_starting(int cpu)
1052 {
1053 	struct intel_uncore_type *type;
1054 	struct intel_uncore_pmu *pmu;
1055 	struct intel_uncore_box *box, *exist;
1056 	int i, j, k, phys_id;
1057 
1058 	phys_id = topology_physical_package_id(cpu);
1059 
1060 	for (i = 0; uncore_msr_uncores[i]; i++) {
1061 		type = uncore_msr_uncores[i];
1062 		for (j = 0; j < type->num_boxes; j++) {
1063 			pmu = &type->pmus[j];
1064 			box = *per_cpu_ptr(pmu->box, cpu);
1065 			/* called by uncore_cpu_init? */
1066 			if (box && box->phys_id >= 0) {
1067 				uncore_box_init(box);
1068 				continue;
1069 			}
1070 
1071 			for_each_online_cpu(k) {
1072 				exist = *per_cpu_ptr(pmu->box, k);
1073 				if (exist && exist->phys_id == phys_id) {
1074 					atomic_inc(&exist->refcnt);
1075 					*per_cpu_ptr(pmu->box, cpu) = exist;
1076 					if (box) {
1077 						list_add(&box->list,
1078 							 &boxes_to_free);
1079 						box = NULL;
1080 					}
1081 					break;
1082 				}
1083 			}
1084 
1085 			if (box) {
1086 				box->phys_id = phys_id;
1087 				uncore_box_init(box);
1088 			}
1089 		}
1090 	}
1091 	return 0;
1092 }
1093 
uncore_cpu_prepare(int cpu,int phys_id)1094 static int uncore_cpu_prepare(int cpu, int phys_id)
1095 {
1096 	struct intel_uncore_type *type;
1097 	struct intel_uncore_pmu *pmu;
1098 	struct intel_uncore_box *box;
1099 	int i, j;
1100 
1101 	for (i = 0; uncore_msr_uncores[i]; i++) {
1102 		type = uncore_msr_uncores[i];
1103 		for (j = 0; j < type->num_boxes; j++) {
1104 			pmu = &type->pmus[j];
1105 			if (pmu->func_id < 0)
1106 				pmu->func_id = j;
1107 
1108 			box = uncore_alloc_box(type, cpu_to_node(cpu));
1109 			if (!box)
1110 				return -ENOMEM;
1111 
1112 			box->pmu = pmu;
1113 			box->phys_id = phys_id;
1114 			*per_cpu_ptr(pmu->box, cpu) = box;
1115 		}
1116 	}
1117 	return 0;
1118 }
1119 
1120 static void
uncore_change_context(struct intel_uncore_type ** uncores,int old_cpu,int new_cpu)1121 uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
1122 {
1123 	struct intel_uncore_type *type;
1124 	struct intel_uncore_pmu *pmu;
1125 	struct intel_uncore_box *box;
1126 	int i, j;
1127 
1128 	for (i = 0; uncores[i]; i++) {
1129 		type = uncores[i];
1130 		for (j = 0; j < type->num_boxes; j++) {
1131 			pmu = &type->pmus[j];
1132 			if (old_cpu < 0)
1133 				box = uncore_pmu_to_box(pmu, new_cpu);
1134 			else
1135 				box = uncore_pmu_to_box(pmu, old_cpu);
1136 			if (!box)
1137 				continue;
1138 
1139 			if (old_cpu < 0) {
1140 				WARN_ON_ONCE(box->cpu != -1);
1141 				box->cpu = new_cpu;
1142 				continue;
1143 			}
1144 
1145 			WARN_ON_ONCE(box->cpu != old_cpu);
1146 			if (new_cpu >= 0) {
1147 				uncore_pmu_cancel_hrtimer(box);
1148 				perf_pmu_migrate_context(&pmu->pmu,
1149 						old_cpu, new_cpu);
1150 				box->cpu = new_cpu;
1151 			} else {
1152 				box->cpu = -1;
1153 			}
1154 		}
1155 	}
1156 }
1157 
uncore_event_exit_cpu(int cpu)1158 static void uncore_event_exit_cpu(int cpu)
1159 {
1160 	int i, phys_id, target;
1161 
1162 	/* if exiting cpu is used for collecting uncore events */
1163 	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1164 		return;
1165 
1166 	/* find a new cpu to collect uncore events */
1167 	phys_id = topology_physical_package_id(cpu);
1168 	target = -1;
1169 	for_each_online_cpu(i) {
1170 		if (i == cpu)
1171 			continue;
1172 		if (phys_id == topology_physical_package_id(i)) {
1173 			target = i;
1174 			break;
1175 		}
1176 	}
1177 
1178 	/* migrate uncore events to the new cpu */
1179 	if (target >= 0)
1180 		cpumask_set_cpu(target, &uncore_cpu_mask);
1181 
1182 	uncore_change_context(uncore_msr_uncores, cpu, target);
1183 	uncore_change_context(uncore_pci_uncores, cpu, target);
1184 }
1185 
uncore_event_init_cpu(int cpu)1186 static void uncore_event_init_cpu(int cpu)
1187 {
1188 	int i, phys_id;
1189 
1190 	phys_id = topology_physical_package_id(cpu);
1191 	for_each_cpu(i, &uncore_cpu_mask) {
1192 		if (phys_id == topology_physical_package_id(i))
1193 			return;
1194 	}
1195 
1196 	cpumask_set_cpu(cpu, &uncore_cpu_mask);
1197 
1198 	uncore_change_context(uncore_msr_uncores, -1, cpu);
1199 	uncore_change_context(uncore_pci_uncores, -1, cpu);
1200 }
1201 
uncore_cpu_notifier(struct notifier_block * self,unsigned long action,void * hcpu)1202 static int uncore_cpu_notifier(struct notifier_block *self,
1203 			       unsigned long action, void *hcpu)
1204 {
1205 	unsigned int cpu = (long)hcpu;
1206 
1207 	/* allocate/free data structure for uncore box */
1208 	switch (action & ~CPU_TASKS_FROZEN) {
1209 	case CPU_UP_PREPARE:
1210 		uncore_cpu_prepare(cpu, -1);
1211 		break;
1212 	case CPU_STARTING:
1213 		uncore_cpu_starting(cpu);
1214 		break;
1215 	case CPU_UP_CANCELED:
1216 	case CPU_DYING:
1217 		uncore_cpu_dying(cpu);
1218 		break;
1219 	case CPU_ONLINE:
1220 	case CPU_DEAD:
1221 		uncore_kfree_boxes();
1222 		break;
1223 	default:
1224 		break;
1225 	}
1226 
1227 	/* select the cpu that collects uncore events */
1228 	switch (action & ~CPU_TASKS_FROZEN) {
1229 	case CPU_DOWN_FAILED:
1230 	case CPU_STARTING:
1231 		uncore_event_init_cpu(cpu);
1232 		break;
1233 	case CPU_DOWN_PREPARE:
1234 		uncore_event_exit_cpu(cpu);
1235 		break;
1236 	default:
1237 		break;
1238 	}
1239 
1240 	return NOTIFY_OK;
1241 }
1242 
1243 static struct notifier_block uncore_cpu_nb = {
1244 	.notifier_call	= uncore_cpu_notifier,
1245 	/*
1246 	 * to migrate uncore events, our notifier should be executed
1247 	 * before perf core's notifier.
1248 	 */
1249 	.priority	= CPU_PRI_PERF + 1,
1250 };
1251 
uncore_cpu_setup(void * dummy)1252 static void __init uncore_cpu_setup(void *dummy)
1253 {
1254 	uncore_cpu_starting(smp_processor_id());
1255 }
1256 
uncore_cpu_init(void)1257 static int __init uncore_cpu_init(void)
1258 {
1259 	int ret;
1260 
1261 	switch (boot_cpu_data.x86_model) {
1262 	case 26: /* Nehalem */
1263 	case 30:
1264 	case 37: /* Westmere */
1265 	case 44:
1266 		nhm_uncore_cpu_init();
1267 		break;
1268 	case 42: /* Sandy Bridge */
1269 	case 58: /* Ivy Bridge */
1270 	case 60: /* Haswell */
1271 	case 69: /* Haswell */
1272 	case 70: /* Haswell */
1273 	case 61: /* Broadwell */
1274 	case 71: /* Broadwell */
1275 		snb_uncore_cpu_init();
1276 		break;
1277 	case 45: /* Sandy Bridge-EP */
1278 		snbep_uncore_cpu_init();
1279 		break;
1280 	case 46: /* Nehalem-EX */
1281 	case 47: /* Westmere-EX aka. Xeon E7 */
1282 		nhmex_uncore_cpu_init();
1283 		break;
1284 	case 62: /* Ivy Bridge-EP */
1285 		ivbep_uncore_cpu_init();
1286 		break;
1287 	case 63: /* Haswell-EP */
1288 		hswep_uncore_cpu_init();
1289 		break;
1290 	case 86: /* BDX-DE */
1291 		bdx_uncore_cpu_init();
1292 		break;
1293 	default:
1294 		return 0;
1295 	}
1296 
1297 	ret = uncore_types_init(uncore_msr_uncores);
1298 	if (ret)
1299 		return ret;
1300 
1301 	return 0;
1302 }
1303 
uncore_pmus_register(void)1304 static int __init uncore_pmus_register(void)
1305 {
1306 	struct intel_uncore_pmu *pmu;
1307 	struct intel_uncore_type *type;
1308 	int i, j;
1309 
1310 	for (i = 0; uncore_msr_uncores[i]; i++) {
1311 		type = uncore_msr_uncores[i];
1312 		for (j = 0; j < type->num_boxes; j++) {
1313 			pmu = &type->pmus[j];
1314 			uncore_pmu_register(pmu);
1315 		}
1316 	}
1317 
1318 	return 0;
1319 }
1320 
uncore_cpumask_init(void)1321 static void __init uncore_cpumask_init(void)
1322 {
1323 	int cpu;
1324 
1325 	/*
1326 	 * ony invoke once from msr or pci init code
1327 	 */
1328 	if (!cpumask_empty(&uncore_cpu_mask))
1329 		return;
1330 
1331 	cpu_notifier_register_begin();
1332 
1333 	for_each_online_cpu(cpu) {
1334 		int i, phys_id = topology_physical_package_id(cpu);
1335 
1336 		for_each_cpu(i, &uncore_cpu_mask) {
1337 			if (phys_id == topology_physical_package_id(i)) {
1338 				phys_id = -1;
1339 				break;
1340 			}
1341 		}
1342 		if (phys_id < 0)
1343 			continue;
1344 
1345 		uncore_cpu_prepare(cpu, phys_id);
1346 		uncore_event_init_cpu(cpu);
1347 	}
1348 	on_each_cpu(uncore_cpu_setup, NULL, 1);
1349 
1350 	__register_cpu_notifier(&uncore_cpu_nb);
1351 
1352 	cpu_notifier_register_done();
1353 }
1354 
1355 
intel_uncore_init(void)1356 static int __init intel_uncore_init(void)
1357 {
1358 	int ret;
1359 
1360 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
1361 		return -ENODEV;
1362 
1363 	if (cpu_has_hypervisor)
1364 		return -ENODEV;
1365 
1366 	ret = uncore_pci_init();
1367 	if (ret)
1368 		goto fail;
1369 	ret = uncore_cpu_init();
1370 	if (ret) {
1371 		uncore_pci_exit();
1372 		goto fail;
1373 	}
1374 	uncore_cpumask_init();
1375 
1376 	uncore_pmus_register();
1377 	return 0;
1378 fail:
1379 	return ret;
1380 }
1381 device_initcall(intel_uncore_init);
1382