• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Debug Store support
3  *
4  * This provides a low-level interface to the hardware's Debug Store
5  * feature that is used for branch trace store (BTS) and
6  * precise-event based sampling (PEBS).
7  *
8  * It manages:
9  * - DS and BTS hardware configuration
10  * - buffer overflow handling (to be done)
11  * - buffer access
12  *
13  * It does not do:
14  * - security checking (is the caller allowed to trace the task)
15  * - buffer allocation (memory accounting)
16  *
17  *
18  * Copyright (C) 2007-2009 Intel Corporation.
19  * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
20  */
21 
22 
23 #include <asm/ds.h>
24 
25 #include <linux/errno.h>
26 #include <linux/string.h>
27 #include <linux/slab.h>
28 #include <linux/sched.h>
29 #include <linux/mm.h>
30 #include <linux/kernel.h>
31 
32 
33 /*
34  * The configuration for a particular DS hardware implementation.
35  */
36 struct ds_configuration {
37 	/* the name of the configuration */
38 	const char *name;
39 	/* the size of one pointer-typed field in the DS structure and
40 	   in the BTS and PEBS buffers in bytes;
41 	   this covers the first 8 DS fields related to buffer management. */
42 	unsigned char  sizeof_field;
43 	/* the size of a BTS/PEBS record in bytes */
44 	unsigned char  sizeof_rec[2];
45 	/* a series of bit-masks to control various features indexed
46 	 * by enum ds_feature */
47 	unsigned long ctl[dsf_ctl_max];
48 };
49 static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
50 
51 #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
52 
53 #define MAX_SIZEOF_DS (12 * 8)	/* maximal size of a DS configuration */
54 #define MAX_SIZEOF_BTS (3 * 8)	/* maximal size of a BTS record */
55 #define DS_ALIGNMENT (1 << 3)	/* BTS and PEBS buffer alignment */
56 
57 #define BTS_CONTROL \
58  (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
59   ds_cfg.ctl[dsf_bts_overflow])
60 
61 
62 /*
63  * A BTS or PEBS tracer.
64  *
65  * This holds the configuration of the tracer and serves as a handle
66  * to identify tracers.
67  */
68 struct ds_tracer {
69 	/* the DS context (partially) owned by this tracer */
70 	struct ds_context *context;
71 	/* the buffer provided on ds_request() and its size in bytes */
72 	void *buffer;
73 	size_t size;
74 };
75 
76 struct bts_tracer {
77 	/* the common DS part */
78 	struct ds_tracer ds;
79 	/* the trace including the DS configuration */
80 	struct bts_trace trace;
81 	/* buffer overflow notification function */
82 	bts_ovfl_callback_t ovfl;
83 };
84 
85 struct pebs_tracer {
86 	/* the common DS part */
87 	struct ds_tracer ds;
88 	/* the trace including the DS configuration */
89 	struct pebs_trace trace;
90 	/* buffer overflow notification function */
91 	pebs_ovfl_callback_t ovfl;
92 };
93 
94 /*
95  * Debug Store (DS) save area configuration (see Intel64 and IA32
96  * Architectures Software Developer's Manual, section 18.5)
97  *
98  * The DS configuration consists of the following fields; different
99  * architetures vary in the size of those fields.
100  * - double-word aligned base linear address of the BTS buffer
101  * - write pointer into the BTS buffer
102  * - end linear address of the BTS buffer (one byte beyond the end of
103  *   the buffer)
104  * - interrupt pointer into BTS buffer
105  *   (interrupt occurs when write pointer passes interrupt pointer)
106  * - double-word aligned base linear address of the PEBS buffer
107  * - write pointer into the PEBS buffer
108  * - end linear address of the PEBS buffer (one byte beyond the end of
109  *   the buffer)
110  * - interrupt pointer into PEBS buffer
111  *   (interrupt occurs when write pointer passes interrupt pointer)
112  * - value to which counter is reset following counter overflow
113  *
114  * Later architectures use 64bit pointers throughout, whereas earlier
115  * architectures use 32bit pointers in 32bit mode.
116  *
117  *
118  * We compute the base address for the first 8 fields based on:
119  * - the field size stored in the DS configuration
120  * - the relative field position
121  * - an offset giving the start of the respective region
122  *
123  * This offset is further used to index various arrays holding
124  * information for BTS and PEBS at the respective index.
125  *
126  * On later 32bit processors, we only access the lower 32bit of the
127  * 64bit pointer fields. The upper halves will be zeroed out.
128  */
129 
130 enum ds_field {
131 	ds_buffer_base = 0,
132 	ds_index,
133 	ds_absolute_maximum,
134 	ds_interrupt_threshold,
135 };
136 
137 enum ds_qualifier {
138 	ds_bts  = 0,
139 	ds_pebs
140 };
141 
ds_get(const unsigned char * base,enum ds_qualifier qual,enum ds_field field)142 static inline unsigned long ds_get(const unsigned char *base,
143 				   enum ds_qualifier qual, enum ds_field field)
144 {
145 	base += (ds_cfg.sizeof_field * (field + (4 * qual)));
146 	return *(unsigned long *)base;
147 }
148 
ds_set(unsigned char * base,enum ds_qualifier qual,enum ds_field field,unsigned long value)149 static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
150 			  enum ds_field field, unsigned long value)
151 {
152 	base += (ds_cfg.sizeof_field * (field + (4 * qual)));
153 	(*(unsigned long *)base) = value;
154 }
155 
156 
157 /*
158  * Locking is done only for allocating BTS or PEBS resources.
159  */
160 static DEFINE_SPINLOCK(ds_lock);
161 
162 
163 /*
164  * We either support (system-wide) per-cpu or per-thread allocation.
165  * We distinguish the two based on the task_struct pointer, where a
166  * NULL pointer indicates per-cpu allocation for the current cpu.
167  *
168  * Allocations are use-counted. As soon as resources are allocated,
169  * further allocations must be of the same type (per-cpu or
170  * per-thread). We model this by counting allocations (i.e. the number
171  * of tracers of a certain type) for one type negatively:
172  *   =0  no tracers
173  *   >0  number of per-thread tracers
174  *   <0  number of per-cpu tracers
175  *
176  * Tracers essentially gives the number of ds contexts for a certain
177  * type of allocation.
178  */
179 static atomic_t tracers = ATOMIC_INIT(0);
180 
get_tracer(struct task_struct * task)181 static inline void get_tracer(struct task_struct *task)
182 {
183 	if (task)
184 		atomic_inc(&tracers);
185 	else
186 		atomic_dec(&tracers);
187 }
188 
put_tracer(struct task_struct * task)189 static inline void put_tracer(struct task_struct *task)
190 {
191 	if (task)
192 		atomic_dec(&tracers);
193 	else
194 		atomic_inc(&tracers);
195 }
196 
check_tracer(struct task_struct * task)197 static inline int check_tracer(struct task_struct *task)
198 {
199 	return task ?
200 		(atomic_read(&tracers) >= 0) :
201 		(atomic_read(&tracers) <= 0);
202 }
203 
204 
205 /*
206  * The DS context is either attached to a thread or to a cpu:
207  * - in the former case, the thread_struct contains a pointer to the
208  *   attached context.
209  * - in the latter case, we use a static array of per-cpu context
210  *   pointers.
211  *
212  * Contexts are use-counted. They are allocated on first access and
213  * deallocated when the last user puts the context.
214  */
215 struct ds_context {
216 	/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
217 	unsigned char ds[MAX_SIZEOF_DS];
218 	/* the owner of the BTS and PEBS configuration, respectively */
219 	struct bts_tracer *bts_master;
220 	struct pebs_tracer *pebs_master;
221 	/* use count */
222 	unsigned long count;
223 	/* a pointer to the context location inside the thread_struct
224 	 * or the per_cpu context array */
225 	struct ds_context **this;
226 	/* a pointer to the task owning this context, or NULL, if the
227 	 * context is owned by a cpu */
228 	struct task_struct *task;
229 };
230 
231 static DEFINE_PER_CPU(struct ds_context *, system_context_array);
232 
233 #define system_context per_cpu(system_context_array, smp_processor_id())
234 
235 
ds_get_context(struct task_struct * task)236 static inline struct ds_context *ds_get_context(struct task_struct *task)
237 {
238 	struct ds_context **p_context =
239 		(task ? &task->thread.ds_ctx : &system_context);
240 	struct ds_context *context = NULL;
241 	struct ds_context *new_context = NULL;
242 	unsigned long irq;
243 
244 	/* Chances are small that we already have a context. */
245 	new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
246 	if (!new_context)
247 		return NULL;
248 
249 	spin_lock_irqsave(&ds_lock, irq);
250 
251 	context = *p_context;
252 	if (!context) {
253 		context = new_context;
254 
255 		context->this = p_context;
256 		context->task = task;
257 		context->count = 0;
258 
259 		if (task)
260 			set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
261 
262 		if (!task || (task == current))
263 			wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
264 
265 		*p_context = context;
266 	}
267 
268 	context->count++;
269 
270 	spin_unlock_irqrestore(&ds_lock, irq);
271 
272 	if (context != new_context)
273 		kfree(new_context);
274 
275 	return context;
276 }
277 
ds_put_context(struct ds_context * context)278 static inline void ds_put_context(struct ds_context *context)
279 {
280 	unsigned long irq;
281 
282 	if (!context)
283 		return;
284 
285 	spin_lock_irqsave(&ds_lock, irq);
286 
287 	if (--context->count) {
288 		spin_unlock_irqrestore(&ds_lock, irq);
289 		return;
290 	}
291 
292 	*(context->this) = NULL;
293 
294 	if (context->task)
295 		clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
296 
297 	if (!context->task || (context->task == current))
298 		wrmsrl(MSR_IA32_DS_AREA, 0);
299 
300 	spin_unlock_irqrestore(&ds_lock, irq);
301 
302 	kfree(context);
303 }
304 
305 
306 /*
307  * Call the tracer's callback on a buffer overflow.
308  *
309  * context: the ds context
310  * qual: the buffer type
311  */
ds_overflow(struct ds_context * context,enum ds_qualifier qual)312 static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
313 {
314 	switch (qual) {
315 	case ds_bts:
316 		if (context->bts_master &&
317 		    context->bts_master->ovfl)
318 			context->bts_master->ovfl(context->bts_master);
319 		break;
320 	case ds_pebs:
321 		if (context->pebs_master &&
322 		    context->pebs_master->ovfl)
323 			context->pebs_master->ovfl(context->pebs_master);
324 		break;
325 	}
326 }
327 
328 
329 /*
330  * Write raw data into the BTS or PEBS buffer.
331  *
332  * The remainder of any partially written record is zeroed out.
333  *
334  * context: the DS context
335  * qual: the buffer type
336  * record: the data to write
337  * size: the size of the data
338  */
ds_write(struct ds_context * context,enum ds_qualifier qual,const void * record,size_t size)339 static int ds_write(struct ds_context *context, enum ds_qualifier qual,
340 		    const void *record, size_t size)
341 {
342 	int bytes_written = 0;
343 
344 	if (!record)
345 		return -EINVAL;
346 
347 	while (size) {
348 		unsigned long base, index, end, write_end, int_th;
349 		unsigned long write_size, adj_write_size;
350 
351 		/*
352 		 * write as much as possible without producing an
353 		 * overflow interrupt.
354 		 *
355 		 * interrupt_threshold must either be
356 		 * - bigger than absolute_maximum or
357 		 * - point to a record between buffer_base and absolute_maximum
358 		 *
359 		 * index points to a valid record.
360 		 */
361 		base   = ds_get(context->ds, qual, ds_buffer_base);
362 		index  = ds_get(context->ds, qual, ds_index);
363 		end    = ds_get(context->ds, qual, ds_absolute_maximum);
364 		int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
365 
366 		write_end = min(end, int_th);
367 
368 		/* if we are already beyond the interrupt threshold,
369 		 * we fill the entire buffer */
370 		if (write_end <= index)
371 			write_end = end;
372 
373 		if (write_end <= index)
374 			break;
375 
376 		write_size = min((unsigned long) size, write_end - index);
377 		memcpy((void *)index, record, write_size);
378 
379 		record = (const char *)record + write_size;
380 		size -= write_size;
381 		bytes_written += write_size;
382 
383 		adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
384 		adj_write_size *= ds_cfg.sizeof_rec[qual];
385 
386 		/* zero out trailing bytes */
387 		memset((char *)index + write_size, 0,
388 		       adj_write_size - write_size);
389 		index += adj_write_size;
390 
391 		if (index >= end)
392 			index = base;
393 		ds_set(context->ds, qual, ds_index, index);
394 
395 		if (index >= int_th)
396 			ds_overflow(context, qual);
397 	}
398 
399 	return bytes_written;
400 }
401 
402 
403 /*
404  * Branch Trace Store (BTS) uses the following format. Different
405  * architectures vary in the size of those fields.
406  * - source linear address
407  * - destination linear address
408  * - flags
409  *
410  * Later architectures use 64bit pointers throughout, whereas earlier
411  * architectures use 32bit pointers in 32bit mode.
412  *
413  * We compute the base address for the first 8 fields based on:
414  * - the field size stored in the DS configuration
415  * - the relative field position
416  *
417  * In order to store additional information in the BTS buffer, we use
418  * a special source address to indicate that the record requires
419  * special interpretation.
420  *
421  * Netburst indicated via a bit in the flags field whether the branch
422  * was predicted; this is ignored.
423  *
424  * We use two levels of abstraction:
425  * - the raw data level defined here
426  * - an arch-independent level defined in ds.h
427  */
428 
429 enum bts_field {
430 	bts_from,
431 	bts_to,
432 	bts_flags,
433 
434 	bts_qual = bts_from,
435 	bts_jiffies = bts_to,
436 	bts_pid = bts_flags,
437 
438 	bts_qual_mask = (bts_qual_max - 1),
439 	bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
440 };
441 
bts_get(const char * base,enum bts_field field)442 static inline unsigned long bts_get(const char *base, enum bts_field field)
443 {
444 	base += (ds_cfg.sizeof_field * field);
445 	return *(unsigned long *)base;
446 }
447 
bts_set(char * base,enum bts_field field,unsigned long val)448 static inline void bts_set(char *base, enum bts_field field, unsigned long val)
449 {
450 	base += (ds_cfg.sizeof_field * field);;
451 	(*(unsigned long *)base) = val;
452 }
453 
454 
455 /*
456  * The raw BTS data is architecture dependent.
457  *
458  * For higher-level users, we give an arch-independent view.
459  * - ds.h defines struct bts_struct
460  * - bts_read translates one raw bts record into a bts_struct
461  * - bts_write translates one bts_struct into the raw format and
462  *   writes it into the top of the parameter tracer's buffer.
463  *
464  * return: bytes read/written on success; -Eerrno, otherwise
465  */
bts_read(struct bts_tracer * tracer,const void * at,struct bts_struct * out)466 static int bts_read(struct bts_tracer *tracer, const void *at,
467 		    struct bts_struct *out)
468 {
469 	if (!tracer)
470 		return -EINVAL;
471 
472 	if (at < tracer->trace.ds.begin)
473 		return -EINVAL;
474 
475 	if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
476 		return -EINVAL;
477 
478 	memset(out, 0, sizeof(*out));
479 	if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
480 		out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
481 		out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
482 		out->variant.timestamp.pid = bts_get(at, bts_pid);
483 	} else {
484 		out->qualifier = bts_branch;
485 		out->variant.lbr.from = bts_get(at, bts_from);
486 		out->variant.lbr.to   = bts_get(at, bts_to);
487 
488 		if (!out->variant.lbr.from && !out->variant.lbr.to)
489 			out->qualifier = bts_invalid;
490 	}
491 
492 	return ds_cfg.sizeof_rec[ds_bts];
493 }
494 
bts_write(struct bts_tracer * tracer,const struct bts_struct * in)495 static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
496 {
497 	unsigned char raw[MAX_SIZEOF_BTS];
498 
499 	if (!tracer)
500 		return -EINVAL;
501 
502 	if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
503 		return -EOVERFLOW;
504 
505 	switch (in->qualifier) {
506 	case bts_invalid:
507 		bts_set(raw, bts_from, 0);
508 		bts_set(raw, bts_to, 0);
509 		bts_set(raw, bts_flags, 0);
510 		break;
511 	case bts_branch:
512 		bts_set(raw, bts_from, in->variant.lbr.from);
513 		bts_set(raw, bts_to,   in->variant.lbr.to);
514 		bts_set(raw, bts_flags, 0);
515 		break;
516 	case bts_task_arrives:
517 	case bts_task_departs:
518 		bts_set(raw, bts_qual, (bts_escape | in->qualifier));
519 		bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
520 		bts_set(raw, bts_pid, in->variant.timestamp.pid);
521 		break;
522 	default:
523 		return -EINVAL;
524 	}
525 
526 	return ds_write(tracer->ds.context, ds_bts, raw,
527 			ds_cfg.sizeof_rec[ds_bts]);
528 }
529 
530 
ds_write_config(struct ds_context * context,struct ds_trace * cfg,enum ds_qualifier qual)531 static void ds_write_config(struct ds_context *context,
532 			    struct ds_trace *cfg, enum ds_qualifier qual)
533 {
534 	unsigned char *ds = context->ds;
535 
536 	ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
537 	ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
538 	ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
539 	ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
540 }
541 
ds_read_config(struct ds_context * context,struct ds_trace * cfg,enum ds_qualifier qual)542 static void ds_read_config(struct ds_context *context,
543 			   struct ds_trace *cfg, enum ds_qualifier qual)
544 {
545 	unsigned char *ds = context->ds;
546 
547 	cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
548 	cfg->top = (void *)ds_get(ds, qual, ds_index);
549 	cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
550 	cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
551 }
552 
ds_init_ds_trace(struct ds_trace * trace,enum ds_qualifier qual,void * base,size_t size,size_t ith,unsigned int flags)553 static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
554 			     void *base, size_t size, size_t ith,
555 			     unsigned int flags) {
556 	unsigned long buffer, adj;
557 
558 	/* adjust the buffer address and size to meet alignment
559 	 * constraints:
560 	 * - buffer is double-word aligned
561 	 * - size is multiple of record size
562 	 *
563 	 * We checked the size at the very beginning; we have enough
564 	 * space to do the adjustment.
565 	 */
566 	buffer = (unsigned long)base;
567 
568 	adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
569 	buffer += adj;
570 	size   -= adj;
571 
572 	trace->n = size / ds_cfg.sizeof_rec[qual];
573 	trace->size = ds_cfg.sizeof_rec[qual];
574 
575 	size = (trace->n * trace->size);
576 
577 	trace->begin = (void *)buffer;
578 	trace->top = trace->begin;
579 	trace->end = (void *)(buffer + size);
580 	/* The value for 'no threshold' is -1, which will set the
581 	 * threshold outside of the buffer, just like we want it.
582 	 */
583 	trace->ith = (void *)(buffer + size - ith);
584 
585 	trace->flags = flags;
586 }
587 
588 
ds_request(struct ds_tracer * tracer,struct ds_trace * trace,enum ds_qualifier qual,struct task_struct * task,void * base,size_t size,size_t th,unsigned int flags)589 static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
590 		      enum ds_qualifier qual, struct task_struct *task,
591 		      void *base, size_t size, size_t th, unsigned int flags)
592 {
593 	struct ds_context *context;
594 	int error;
595 
596 	error = -EINVAL;
597 	if (!base)
598 		goto out;
599 
600 	/* we require some space to do alignment adjustments below */
601 	error = -EINVAL;
602 	if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
603 		goto out;
604 
605 	if (th != (size_t)-1) {
606 		th *= ds_cfg.sizeof_rec[qual];
607 
608 		error = -EINVAL;
609 		if (size <= th)
610 			goto out;
611 	}
612 
613 	tracer->buffer = base;
614 	tracer->size = size;
615 
616 	error = -ENOMEM;
617 	context = ds_get_context(task);
618 	if (!context)
619 		goto out;
620 	tracer->context = context;
621 
622 	ds_init_ds_trace(trace, qual, base, size, th, flags);
623 
624 	error = 0;
625  out:
626 	return error;
627 }
628 
ds_request_bts(struct task_struct * task,void * base,size_t size,bts_ovfl_callback_t ovfl,size_t th,unsigned int flags)629 struct bts_tracer *ds_request_bts(struct task_struct *task,
630 				  void *base, size_t size,
631 				  bts_ovfl_callback_t ovfl, size_t th,
632 				  unsigned int flags)
633 {
634 	struct bts_tracer *tracer;
635 	unsigned long irq;
636 	int error;
637 
638 	error = -EOPNOTSUPP;
639 	if (!ds_cfg.ctl[dsf_bts])
640 		goto out;
641 
642 	/* buffer overflow notification is not yet implemented */
643 	error = -EOPNOTSUPP;
644 	if (ovfl)
645 		goto out;
646 
647 	error = -ENOMEM;
648 	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
649 	if (!tracer)
650 		goto out;
651 	tracer->ovfl = ovfl;
652 
653 	error = ds_request(&tracer->ds, &tracer->trace.ds,
654 			   ds_bts, task, base, size, th, flags);
655 	if (error < 0)
656 		goto out_tracer;
657 
658 
659 	spin_lock_irqsave(&ds_lock, irq);
660 
661 	error = -EPERM;
662 	if (!check_tracer(task))
663 		goto out_unlock;
664 	get_tracer(task);
665 
666 	error = -EPERM;
667 	if (tracer->ds.context->bts_master)
668 		goto out_put_tracer;
669 	tracer->ds.context->bts_master = tracer;
670 
671 	spin_unlock_irqrestore(&ds_lock, irq);
672 
673 
674 	tracer->trace.read  = bts_read;
675 	tracer->trace.write = bts_write;
676 
677 	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
678 	ds_resume_bts(tracer);
679 
680 	return tracer;
681 
682  out_put_tracer:
683 	put_tracer(task);
684  out_unlock:
685 	spin_unlock_irqrestore(&ds_lock, irq);
686 	ds_put_context(tracer->ds.context);
687  out_tracer:
688 	kfree(tracer);
689  out:
690 	return ERR_PTR(error);
691 }
692 
ds_request_pebs(struct task_struct * task,void * base,size_t size,pebs_ovfl_callback_t ovfl,size_t th,unsigned int flags)693 struct pebs_tracer *ds_request_pebs(struct task_struct *task,
694 				    void *base, size_t size,
695 				    pebs_ovfl_callback_t ovfl, size_t th,
696 				    unsigned int flags)
697 {
698 	struct pebs_tracer *tracer;
699 	unsigned long irq;
700 	int error;
701 
702 	/* buffer overflow notification is not yet implemented */
703 	error = -EOPNOTSUPP;
704 	if (ovfl)
705 		goto out;
706 
707 	error = -ENOMEM;
708 	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
709 	if (!tracer)
710 		goto out;
711 	tracer->ovfl = ovfl;
712 
713 	error = ds_request(&tracer->ds, &tracer->trace.ds,
714 			   ds_pebs, task, base, size, th, flags);
715 	if (error < 0)
716 		goto out_tracer;
717 
718 	spin_lock_irqsave(&ds_lock, irq);
719 
720 	error = -EPERM;
721 	if (!check_tracer(task))
722 		goto out_unlock;
723 	get_tracer(task);
724 
725 	error = -EPERM;
726 	if (tracer->ds.context->pebs_master)
727 		goto out_put_tracer;
728 	tracer->ds.context->pebs_master = tracer;
729 
730 	spin_unlock_irqrestore(&ds_lock, irq);
731 
732 	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
733 	ds_resume_pebs(tracer);
734 
735 	return tracer;
736 
737  out_put_tracer:
738 	put_tracer(task);
739  out_unlock:
740 	spin_unlock_irqrestore(&ds_lock, irq);
741 	ds_put_context(tracer->ds.context);
742  out_tracer:
743 	kfree(tracer);
744  out:
745 	return ERR_PTR(error);
746 }
747 
ds_release_bts(struct bts_tracer * tracer)748 void ds_release_bts(struct bts_tracer *tracer)
749 {
750 	if (!tracer)
751 		return;
752 
753 	ds_suspend_bts(tracer);
754 
755 	WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
756 	tracer->ds.context->bts_master = NULL;
757 
758 	put_tracer(tracer->ds.context->task);
759 	ds_put_context(tracer->ds.context);
760 
761 	kfree(tracer);
762 }
763 
ds_suspend_bts(struct bts_tracer * tracer)764 void ds_suspend_bts(struct bts_tracer *tracer)
765 {
766 	struct task_struct *task;
767 
768 	if (!tracer)
769 		return;
770 
771 	task = tracer->ds.context->task;
772 
773 	if (!task || (task == current))
774 		update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
775 
776 	if (task) {
777 		task->thread.debugctlmsr &= ~BTS_CONTROL;
778 
779 		if (!task->thread.debugctlmsr)
780 			clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
781 	}
782 }
783 
ds_resume_bts(struct bts_tracer * tracer)784 void ds_resume_bts(struct bts_tracer *tracer)
785 {
786 	struct task_struct *task;
787 	unsigned long control;
788 
789 	if (!tracer)
790 		return;
791 
792 	task = tracer->ds.context->task;
793 
794 	control = ds_cfg.ctl[dsf_bts];
795 	if (!(tracer->trace.ds.flags & BTS_KERNEL))
796 		control |= ds_cfg.ctl[dsf_bts_kernel];
797 	if (!(tracer->trace.ds.flags & BTS_USER))
798 		control |= ds_cfg.ctl[dsf_bts_user];
799 
800 	if (task) {
801 		task->thread.debugctlmsr |= control;
802 		set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
803 	}
804 
805 	if (!task || (task == current))
806 		update_debugctlmsr(get_debugctlmsr() | control);
807 }
808 
ds_release_pebs(struct pebs_tracer * tracer)809 void ds_release_pebs(struct pebs_tracer *tracer)
810 {
811 	if (!tracer)
812 		return;
813 
814 	ds_suspend_pebs(tracer);
815 
816 	WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
817 	tracer->ds.context->pebs_master = NULL;
818 
819 	put_tracer(tracer->ds.context->task);
820 	ds_put_context(tracer->ds.context);
821 
822 	kfree(tracer);
823 }
824 
ds_suspend_pebs(struct pebs_tracer * tracer)825 void ds_suspend_pebs(struct pebs_tracer *tracer)
826 {
827 
828 }
829 
ds_resume_pebs(struct pebs_tracer * tracer)830 void ds_resume_pebs(struct pebs_tracer *tracer)
831 {
832 
833 }
834 
ds_read_bts(struct bts_tracer * tracer)835 const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
836 {
837 	if (!tracer)
838 		return NULL;
839 
840 	ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
841 	return &tracer->trace;
842 }
843 
ds_read_pebs(struct pebs_tracer * tracer)844 const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
845 {
846 	if (!tracer)
847 		return NULL;
848 
849 	ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
850 	tracer->trace.reset_value =
851 		*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
852 
853 	return &tracer->trace;
854 }
855 
ds_reset_bts(struct bts_tracer * tracer)856 int ds_reset_bts(struct bts_tracer *tracer)
857 {
858 	if (!tracer)
859 		return -EINVAL;
860 
861 	tracer->trace.ds.top = tracer->trace.ds.begin;
862 
863 	ds_set(tracer->ds.context->ds, ds_bts, ds_index,
864 	       (unsigned long)tracer->trace.ds.top);
865 
866 	return 0;
867 }
868 
ds_reset_pebs(struct pebs_tracer * tracer)869 int ds_reset_pebs(struct pebs_tracer *tracer)
870 {
871 	if (!tracer)
872 		return -EINVAL;
873 
874 	tracer->trace.ds.top = tracer->trace.ds.begin;
875 
876 	ds_set(tracer->ds.context->ds, ds_bts, ds_index,
877 	       (unsigned long)tracer->trace.ds.top);
878 
879 	return 0;
880 }
881 
ds_set_pebs_reset(struct pebs_tracer * tracer,u64 value)882 int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
883 {
884 	if (!tracer)
885 		return -EINVAL;
886 
887 	*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
888 
889 	return 0;
890 }
891 
892 static const struct ds_configuration ds_cfg_netburst = {
893 	.name = "Netburst",
894 	.ctl[dsf_bts]		= (1 << 2) | (1 << 3),
895 	.ctl[dsf_bts_kernel]	= (1 << 5),
896 	.ctl[dsf_bts_user]	= (1 << 6),
897 
898 	.sizeof_field		= sizeof(long),
899 	.sizeof_rec[ds_bts]	= sizeof(long) * 3,
900 #ifdef __i386__
901 	.sizeof_rec[ds_pebs]	= sizeof(long) * 10,
902 #else
903 	.sizeof_rec[ds_pebs]	= sizeof(long) * 18,
904 #endif
905 };
906 static const struct ds_configuration ds_cfg_pentium_m = {
907 	.name = "Pentium M",
908 	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
909 
910 	.sizeof_field		= sizeof(long),
911 	.sizeof_rec[ds_bts]	= sizeof(long) * 3,
912 #ifdef __i386__
913 	.sizeof_rec[ds_pebs]	= sizeof(long) * 10,
914 #else
915 	.sizeof_rec[ds_pebs]	= sizeof(long) * 18,
916 #endif
917 };
918 static const struct ds_configuration ds_cfg_core2_atom = {
919 	.name = "Core 2/Atom",
920 	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
921 	.ctl[dsf_bts_kernel]	= (1 << 9),
922 	.ctl[dsf_bts_user]	= (1 << 10),
923 
924 	.sizeof_field		= 8,
925 	.sizeof_rec[ds_bts]	= 8 * 3,
926 	.sizeof_rec[ds_pebs]	= 8 * 18,
927 };
928 
929 static void
ds_configure(const struct ds_configuration * cfg)930 ds_configure(const struct ds_configuration *cfg)
931 {
932 	memset(&ds_cfg, 0, sizeof(ds_cfg));
933 	ds_cfg = *cfg;
934 
935 	printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
936 
937 	if (!cpu_has_bts) {
938 		ds_cfg.ctl[dsf_bts] = 0;
939 		printk(KERN_INFO "[ds] bts not available\n");
940 	}
941 	if (!cpu_has_pebs)
942 		printk(KERN_INFO "[ds] pebs not available\n");
943 
944 	WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
945 }
946 
ds_init_intel(struct cpuinfo_x86 * c)947 void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
948 {
949 	switch (c->x86) {
950 	case 0x6:
951 		switch (c->x86_model) {
952 		case 0x9:
953 		case 0xd: /* Pentium M */
954 			ds_configure(&ds_cfg_pentium_m);
955 			break;
956 		case 0xf:
957 		case 0x17: /* Core2 */
958 		case 0x1c: /* Atom */
959 			ds_configure(&ds_cfg_core2_atom);
960 			break;
961 		case 0x1a: /* i7 */
962 		default:
963 			/* sorry, don't know about them */
964 			break;
965 		}
966 		break;
967 	case 0xf:
968 		switch (c->x86_model) {
969 		case 0x0:
970 		case 0x1:
971 		case 0x2: /* Netburst */
972 			ds_configure(&ds_cfg_netburst);
973 			break;
974 		default:
975 			/* sorry, don't know about them */
976 			break;
977 		}
978 		break;
979 	default:
980 		/* sorry, don't know about them */
981 		break;
982 	}
983 }
984 
985 /*
986  * Change the DS configuration from tracing prev to tracing next.
987  */
ds_switch_to(struct task_struct * prev,struct task_struct * next)988 void ds_switch_to(struct task_struct *prev, struct task_struct *next)
989 {
990 	struct ds_context *prev_ctx = prev->thread.ds_ctx;
991 	struct ds_context *next_ctx = next->thread.ds_ctx;
992 
993 	if (prev_ctx) {
994 		update_debugctlmsr(0);
995 
996 		if (prev_ctx->bts_master &&
997 		    (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
998 			struct bts_struct ts = {
999 				.qualifier = bts_task_departs,
1000 				.variant.timestamp.jiffies = jiffies_64,
1001 				.variant.timestamp.pid = prev->pid
1002 			};
1003 			bts_write(prev_ctx->bts_master, &ts);
1004 		}
1005 	}
1006 
1007 	if (next_ctx) {
1008 		if (next_ctx->bts_master &&
1009 		    (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1010 			struct bts_struct ts = {
1011 				.qualifier = bts_task_arrives,
1012 				.variant.timestamp.jiffies = jiffies_64,
1013 				.variant.timestamp.pid = next->pid
1014 			};
1015 			bts_write(next_ctx->bts_master, &ts);
1016 		}
1017 
1018 		wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
1019 	}
1020 
1021 	update_debugctlmsr(next->thread.debugctlmsr);
1022 }
1023 
ds_copy_thread(struct task_struct * tsk,struct task_struct * father)1024 void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
1025 {
1026 	clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
1027 	tsk->thread.ds_ctx = NULL;
1028 }
1029 
ds_exit_thread(struct task_struct * tsk)1030 void ds_exit_thread(struct task_struct *tsk)
1031 {
1032 }
1033