• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /**
2   * @file cpu_buffer.c
3   *
4   * @remark Copyright 2002-2009 OProfile authors
5   * @remark Read the file COPYING
6   *
7   * @author John Levon <levon@movementarian.org>
8   * @author Barry Kasindorf <barry.kasindorf@amd.com>
9   * @author Robert Richter <robert.richter@amd.com>
10   *
11   * Each CPU has a local buffer that stores PC value/event
12   * pairs. We also log context switches when we notice them.
13   * Eventually each CPU's buffer is processed into the global
14   * event buffer by sync_buffer().
15   *
16   * We use a local buffer for two reasons: an NMI or similar
17   * interrupt cannot synchronise, and high sampling rates
18   * would lead to catastrophic global synchronisation if
19   * a global buffer was used.
20   */
21  
22  #include <linux/sched.h>
23  #include <linux/oprofile.h>
24  #include <linux/errno.h>
25  
26  #include <asm/ptrace.h>
27  
28  #include "event_buffer.h"
29  #include "cpu_buffer.h"
30  #include "buffer_sync.h"
31  #include "oprof.h"
32  
33  #define OP_BUFFER_FLAGS	0
34  
35  static struct ring_buffer *op_ring_buffer;
36  DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer);
37  
38  static void wq_sync_buffer(struct work_struct *work);
39  
40  #define DEFAULT_TIMER_EXPIRE (HZ / 10)
41  static int work_enabled;
42  
oprofile_get_cpu_buffer_size(void)43  unsigned long oprofile_get_cpu_buffer_size(void)
44  {
45  	return oprofile_cpu_buffer_size;
46  }
47  
oprofile_cpu_buffer_inc_smpl_lost(void)48  void oprofile_cpu_buffer_inc_smpl_lost(void)
49  {
50  	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
51  
52  	cpu_buf->sample_lost_overflow++;
53  }
54  
free_cpu_buffers(void)55  void free_cpu_buffers(void)
56  {
57  	if (op_ring_buffer)
58  		ring_buffer_free(op_ring_buffer);
59  	op_ring_buffer = NULL;
60  }
61  
62  #define RB_EVENT_HDR_SIZE 4
63  
alloc_cpu_buffers(void)64  int alloc_cpu_buffers(void)
65  {
66  	int i;
67  
68  	unsigned long buffer_size = oprofile_cpu_buffer_size;
69  	unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
70  						 RB_EVENT_HDR_SIZE);
71  
72  	op_ring_buffer = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
73  	if (!op_ring_buffer)
74  		goto fail;
75  
76  	for_each_possible_cpu(i) {
77  		struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
78  
79  		b->last_task = NULL;
80  		b->last_is_kernel = -1;
81  		b->tracing = 0;
82  		b->buffer_size = buffer_size;
83  		b->sample_received = 0;
84  		b->sample_lost_overflow = 0;
85  		b->backtrace_aborted = 0;
86  		b->sample_invalid_eip = 0;
87  		b->cpu = i;
88  		INIT_DELAYED_WORK(&b->work, wq_sync_buffer);
89  	}
90  	return 0;
91  
92  fail:
93  	free_cpu_buffers();
94  	return -ENOMEM;
95  }
96  
start_cpu_work(void)97  void start_cpu_work(void)
98  {
99  	int i;
100  
101  	work_enabled = 1;
102  
103  	for_each_online_cpu(i) {
104  		struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
105  
106  		/*
107  		 * Spread the work by 1 jiffy per cpu so they dont all
108  		 * fire at once.
109  		 */
110  		schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
111  	}
112  }
113  
end_cpu_work(void)114  void end_cpu_work(void)
115  {
116  	work_enabled = 0;
117  }
118  
flush_cpu_work(void)119  void flush_cpu_work(void)
120  {
121  	int i;
122  
123  	for_each_online_cpu(i) {
124  		struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
125  
126  		/* these works are per-cpu, no need for flush_sync */
127  		flush_delayed_work(&b->work);
128  	}
129  }
130  
131  /*
132   * This function prepares the cpu buffer to write a sample.
133   *
134   * Struct op_entry is used during operations on the ring buffer while
135   * struct op_sample contains the data that is stored in the ring
136   * buffer. Struct entry can be uninitialized. The function reserves a
137   * data array that is specified by size. Use
138   * op_cpu_buffer_write_commit() after preparing the sample. In case of
139   * errors a null pointer is returned, otherwise the pointer to the
140   * sample.
141   *
142   */
143  struct op_sample
op_cpu_buffer_write_reserve(struct op_entry * entry,unsigned long size)144  *op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size)
145  {
146  	entry->event = ring_buffer_lock_reserve
147  		(op_ring_buffer, sizeof(struct op_sample) +
148  		 size * sizeof(entry->sample->data[0]));
149  	if (!entry->event)
150  		return NULL;
151  	entry->sample = ring_buffer_event_data(entry->event);
152  	entry->size = size;
153  	entry->data = entry->sample->data;
154  
155  	return entry->sample;
156  }
157  
op_cpu_buffer_write_commit(struct op_entry * entry)158  int op_cpu_buffer_write_commit(struct op_entry *entry)
159  {
160  	return ring_buffer_unlock_commit(op_ring_buffer, entry->event);
161  }
162  
op_cpu_buffer_read_entry(struct op_entry * entry,int cpu)163  struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
164  {
165  	struct ring_buffer_event *e;
166  	e = ring_buffer_consume(op_ring_buffer, cpu, NULL, NULL);
167  	if (!e)
168  		return NULL;
169  
170  	entry->event = e;
171  	entry->sample = ring_buffer_event_data(e);
172  	entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample))
173  		/ sizeof(entry->sample->data[0]);
174  	entry->data = entry->sample->data;
175  	return entry->sample;
176  }
177  
op_cpu_buffer_entries(int cpu)178  unsigned long op_cpu_buffer_entries(int cpu)
179  {
180  	return ring_buffer_entries_cpu(op_ring_buffer, cpu);
181  }
182  
183  static int
op_add_code(struct oprofile_cpu_buffer * cpu_buf,unsigned long backtrace,int is_kernel,struct task_struct * task)184  op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
185  	    int is_kernel, struct task_struct *task)
186  {
187  	struct op_entry entry;
188  	struct op_sample *sample;
189  	unsigned long flags;
190  	int size;
191  
192  	flags = 0;
193  
194  	if (backtrace)
195  		flags |= TRACE_BEGIN;
196  
197  	/* notice a switch from user->kernel or vice versa */
198  	is_kernel = !!is_kernel;
199  	if (cpu_buf->last_is_kernel != is_kernel) {
200  		cpu_buf->last_is_kernel = is_kernel;
201  		flags |= KERNEL_CTX_SWITCH;
202  		if (is_kernel)
203  			flags |= IS_KERNEL;
204  	}
205  
206  	/* notice a task switch */
207  	if (cpu_buf->last_task != task) {
208  		cpu_buf->last_task = task;
209  		flags |= USER_CTX_SWITCH;
210  	}
211  
212  	if (!flags)
213  		/* nothing to do */
214  		return 0;
215  
216  	if (flags & USER_CTX_SWITCH)
217  		size = 1;
218  	else
219  		size = 0;
220  
221  	sample = op_cpu_buffer_write_reserve(&entry, size);
222  	if (!sample)
223  		return -ENOMEM;
224  
225  	sample->eip = ESCAPE_CODE;
226  	sample->event = flags;
227  
228  	if (size)
229  		op_cpu_buffer_add_data(&entry, (unsigned long)task);
230  
231  	op_cpu_buffer_write_commit(&entry);
232  
233  	return 0;
234  }
235  
236  static inline int
op_add_sample(struct oprofile_cpu_buffer * cpu_buf,unsigned long pc,unsigned long event)237  op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
238  	      unsigned long pc, unsigned long event)
239  {
240  	struct op_entry entry;
241  	struct op_sample *sample;
242  
243  	sample = op_cpu_buffer_write_reserve(&entry, 0);
244  	if (!sample)
245  		return -ENOMEM;
246  
247  	sample->eip = pc;
248  	sample->event = event;
249  
250  	return op_cpu_buffer_write_commit(&entry);
251  }
252  
253  /*
254   * This must be safe from any context.
255   *
256   * is_kernel is needed because on some architectures you cannot
257   * tell if you are in kernel or user space simply by looking at
258   * pc. We tag this in the buffer by generating kernel enter/exit
259   * events whenever is_kernel changes
260   */
261  static int
log_sample(struct oprofile_cpu_buffer * cpu_buf,unsigned long pc,unsigned long backtrace,int is_kernel,unsigned long event,struct task_struct * task)262  log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
263  	   unsigned long backtrace, int is_kernel, unsigned long event,
264  	   struct task_struct *task)
265  {
266  	struct task_struct *tsk = task ? task : current;
267  	cpu_buf->sample_received++;
268  
269  	if (pc == ESCAPE_CODE) {
270  		cpu_buf->sample_invalid_eip++;
271  		return 0;
272  	}
273  
274  	if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
275  		goto fail;
276  
277  	if (op_add_sample(cpu_buf, pc, event))
278  		goto fail;
279  
280  	return 1;
281  
282  fail:
283  	cpu_buf->sample_lost_overflow++;
284  	return 0;
285  }
286  
oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf)287  static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
288  {
289  	cpu_buf->tracing = 1;
290  }
291  
oprofile_end_trace(struct oprofile_cpu_buffer * cpu_buf)292  static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
293  {
294  	cpu_buf->tracing = 0;
295  }
296  
297  static inline void
__oprofile_add_ext_sample(unsigned long pc,struct pt_regs * const regs,unsigned long event,int is_kernel,struct task_struct * task)298  __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
299  			  unsigned long event, int is_kernel,
300  			  struct task_struct *task)
301  {
302  	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
303  	unsigned long backtrace = oprofile_backtrace_depth;
304  
305  	/*
306  	 * if log_sample() fail we can't backtrace since we lost the
307  	 * source of this event
308  	 */
309  	if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, task))
310  		/* failed */
311  		return;
312  
313  	if (!backtrace)
314  		return;
315  
316  	oprofile_begin_trace(cpu_buf);
317  	oprofile_ops.backtrace(regs, backtrace);
318  	oprofile_end_trace(cpu_buf);
319  }
320  
oprofile_add_ext_hw_sample(unsigned long pc,struct pt_regs * const regs,unsigned long event,int is_kernel,struct task_struct * task)321  void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs,
322  				unsigned long event, int is_kernel,
323  				struct task_struct *task)
324  {
325  	__oprofile_add_ext_sample(pc, regs, event, is_kernel, task);
326  }
327  
oprofile_add_ext_sample(unsigned long pc,struct pt_regs * const regs,unsigned long event,int is_kernel)328  void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
329  			     unsigned long event, int is_kernel)
330  {
331  	__oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
332  }
333  
oprofile_add_sample(struct pt_regs * const regs,unsigned long event)334  void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
335  {
336  	int is_kernel;
337  	unsigned long pc;
338  
339  	if (likely(regs)) {
340  		is_kernel = !user_mode(regs);
341  		pc = profile_pc(regs);
342  	} else {
343  		is_kernel = 0;    /* This value will not be used */
344  		pc = ESCAPE_CODE; /* as this causes an early return. */
345  	}
346  
347  	__oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
348  }
349  
350  /*
351   * Add samples with data to the ring buffer.
352   *
353   * Use oprofile_add_data(&entry, val) to add data and
354   * oprofile_write_commit(&entry) to commit the sample.
355   */
356  void
oprofile_write_reserve(struct op_entry * entry,struct pt_regs * const regs,unsigned long pc,int code,int size)357  oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs,
358  		       unsigned long pc, int code, int size)
359  {
360  	struct op_sample *sample;
361  	int is_kernel = !user_mode(regs);
362  	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
363  
364  	cpu_buf->sample_received++;
365  
366  	/* no backtraces for samples with data */
367  	if (op_add_code(cpu_buf, 0, is_kernel, current))
368  		goto fail;
369  
370  	sample = op_cpu_buffer_write_reserve(entry, size + 2);
371  	if (!sample)
372  		goto fail;
373  	sample->eip = ESCAPE_CODE;
374  	sample->event = 0;		/* no flags */
375  
376  	op_cpu_buffer_add_data(entry, code);
377  	op_cpu_buffer_add_data(entry, pc);
378  
379  	return;
380  
381  fail:
382  	entry->event = NULL;
383  	cpu_buf->sample_lost_overflow++;
384  }
385  
oprofile_add_data(struct op_entry * entry,unsigned long val)386  int oprofile_add_data(struct op_entry *entry, unsigned long val)
387  {
388  	if (!entry->event)
389  		return 0;
390  	return op_cpu_buffer_add_data(entry, val);
391  }
392  
oprofile_add_data64(struct op_entry * entry,u64 val)393  int oprofile_add_data64(struct op_entry *entry, u64 val)
394  {
395  	if (!entry->event)
396  		return 0;
397  	if (op_cpu_buffer_get_size(entry) < 2)
398  		/*
399  		 * the function returns 0 to indicate a too small
400  		 * buffer, even if there is some space left
401  		 */
402  		return 0;
403  	if (!op_cpu_buffer_add_data(entry, (u32)val))
404  		return 0;
405  	return op_cpu_buffer_add_data(entry, (u32)(val >> 32));
406  }
407  
oprofile_write_commit(struct op_entry * entry)408  int oprofile_write_commit(struct op_entry *entry)
409  {
410  	if (!entry->event)
411  		return -EINVAL;
412  	return op_cpu_buffer_write_commit(entry);
413  }
414  
oprofile_add_pc(unsigned long pc,int is_kernel,unsigned long event)415  void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
416  {
417  	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
418  	log_sample(cpu_buf, pc, 0, is_kernel, event, NULL);
419  }
420  
oprofile_add_trace(unsigned long pc)421  void oprofile_add_trace(unsigned long pc)
422  {
423  	struct oprofile_cpu_buffer *cpu_buf = this_cpu_ptr(&op_cpu_buffer);
424  
425  	if (!cpu_buf->tracing)
426  		return;
427  
428  	/*
429  	 * broken frame can give an eip with the same value as an
430  	 * escape code, abort the trace if we get it
431  	 */
432  	if (pc == ESCAPE_CODE)
433  		goto fail;
434  
435  	if (op_add_sample(cpu_buf, pc, 0))
436  		goto fail;
437  
438  	return;
439  fail:
440  	cpu_buf->tracing = 0;
441  	cpu_buf->backtrace_aborted++;
442  	return;
443  }
444  
445  /*
446   * This serves to avoid cpu buffer overflow, and makes sure
447   * the task mortuary progresses
448   *
449   * By using schedule_delayed_work_on and then schedule_delayed_work
450   * we guarantee this will stay on the correct cpu
451   */
wq_sync_buffer(struct work_struct * work)452  static void wq_sync_buffer(struct work_struct *work)
453  {
454  	struct oprofile_cpu_buffer *b =
455  		container_of(work, struct oprofile_cpu_buffer, work.work);
456  	if (b->cpu != smp_processor_id() && !cpu_online(b->cpu)) {
457  		cancel_delayed_work(&b->work);
458  		return;
459  	}
460  	sync_buffer(b->cpu);
461  
462  	/* don't re-add the work if we're shutting down */
463  	if (work_enabled)
464  		schedule_delayed_work(&b->work, DEFAULT_TIMER_EXPIRE);
465  }
466