• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * @file oprofile.c
3  * Main driver code
4  *
5  * @remark Copyright 2002 OProfile authors
6  * @remark Read the file COPYING
7  *
8  * @author John Levon
9  * @author Philippe Elie
10  */
11 
12 #include "oprofile.h"
13 #include "op_util.h"
14 #include "config.h"
15 
16 EXPORT_NO_SYMBOLS;
17 
18 MODULE_AUTHOR("John Levon (levon@movementarian.org)");
19 MODULE_DESCRIPTION("Continuous Profiling Module");
20 MODULE_LICENSE("GPL");
21 
22 MODULE_PARM(allow_unload, "i");
23 MODULE_PARM_DESC(allow_unload, "Allow module to be unloaded.");
24 #ifdef CONFIG_SMP
25 static int allow_unload;
26 #else
27 static int allow_unload = 1;
28 #endif
29 
30 /* sysctl settables */
31 struct oprof_sysctl sysctl_parms;
32 /* some of the sys ctl settable variable needs to be copied to protect
33  * against user that try to change through /proc/sys/dev/oprofile/ * running
34  * parameters during profiling */
35 struct oprof_sysctl sysctl;
36 
37 static enum oprof_state state __cacheline_aligned_in_smp = STOPPED;
38 
39 static int op_major;
40 
41 static volatile ulong oprof_opened __cacheline_aligned_in_smp;
42 static volatile ulong oprof_note_opened __cacheline_aligned_in_smp;
43 static DECLARE_WAIT_QUEUE_HEAD(oprof_wait);
44 
45 static u32 oprof_ready[NR_CPUS] __cacheline_aligned_in_smp;
46 struct _oprof_data oprof_data[NR_CPUS] __cacheline_aligned;
47 
48 struct op_note * note_buffer __cacheline_aligned_in_smp;
49 u32 note_pos __cacheline_aligned_in_smp;
50 
51 // the interrupt handler ops structure to use
52 static struct op_int_operations const * int_ops;
53 
54 static char const * op_version = PACKAGE " " VERSION;
55 
56 /* ---------------- interrupt entry routines ------------------ */
57 
need_wakeup(uint cpu,struct _oprof_data * data)58 inline static int need_wakeup(uint cpu, struct _oprof_data * data)
59 {
60 	return data->nextbuf >= (data->buf_size - data->buf_watermark) && !oprof_ready[cpu];
61 }
62 
next_sample(struct _oprof_data * data)63 inline static void next_sample(struct _oprof_data * data)
64 {
65 	if (unlikely(++data->nextbuf == data->buf_size))
66 		data->nextbuf = 0;
67 }
68 
evict_op_entry(uint cpu,struct _oprof_data * data,long irq_enabled)69 inline static void evict_op_entry(uint cpu, struct _oprof_data * data, long irq_enabled)
70 {
71 	next_sample(data);
72 	if (likely(!need_wakeup(cpu, data)))
73 		return;
74 
75 	/* locking rationale :
76 	 *
77 	 * other CPUs are not a race concern since we synch on oprof_wait->lock.
78 	 *
79 	 * for the current CPU, we might have interrupted another user of e.g.
80 	 * runqueue_lock, deadlocking on SMP and racing on UP. So we check that IRQs
81 	 * were not disabled (corresponding to the irqsave/restores in __wake_up().
82 	 *
83 	 * Note that this requires all spinlocks taken by the full wake_up path
84 	 * to have saved IRQs - otherwise we can interrupt whilst holding a spinlock
85 	 * taken from some non-wake_up() path and deadlock. Currently this means only
86 	 * oprof_wait->lock and runqueue_lock: all instances disable IRQs before
87 	 * taking the lock.
88 	 *
89 	 * This will mean that approaching the end of the buffer, a number of the
90 	 * evictions may fail to wake up the daemon. We simply hope this doesn't
91 	 * take long; a pathological case could cause buffer overflow.
92 	 *
93 	 * Note that we use oprof_ready as our flag for whether we have initiated a
94 	 * wake-up. Once the wake-up is received, the flag is reset as well as
95 	 * data->nextbuf, preventing multiple wakeups.
96 	 *
97 	 * On 2.2, a global waitqueue_lock is used, so we must check it's not held
98 	 * by the current CPU. We make sure that any users of the wait queue (i.e.
99 	 * us and the code for wait_event_interruptible()) disable interrupts so it's
100 	 * still safe to check IF_MASK.
101 	 */
102 	if (likely(irq_enabled)) {
103 		oprof_ready[cpu] = 1;
104 		wake_up(&oprof_wait);
105 	}
106 }
107 
108 inline static void
fill_op_entry(struct op_sample * ops,long eip,pid_t pid,pid_t tgid,int ctr)109 fill_op_entry(struct op_sample * ops, long eip, pid_t pid, pid_t tgid, int ctr)
110 {
111 	ops->eip = eip;
112 	ops->pid = pid;
113 	ops->tgid = tgid;
114 	ops->counter = ctr;
115 }
116 
op_do_profile(uint cpu,long eip,long irq_enabled,int ctr)117 void op_do_profile(uint cpu, long eip, long irq_enabled, int ctr)
118 {
119 	struct _oprof_data * data = &oprof_data[cpu];
120 	pid_t const pid = current->pid;
121 	pid_t const tgid = op_get_tgid();
122 	struct op_sample * samples = &data->buffer[data->nextbuf];
123 
124 	data->nr_irq++;
125 
126 	fill_op_entry(samples, eip, pid, tgid, ctr);
127 	evict_op_entry(cpu, data, irq_enabled);
128 }
129 
130 /* ---------------- driver routines ------------------ */
131 
132 /* only stop and start profiling interrupt when we are
133  * fully running !
134  */
stop_cpu_perfctr(int cpu)135 static void stop_cpu_perfctr(int cpu)
136 {
137 	if (state == RUNNING)
138 		int_ops->stop_cpu(cpu);
139 }
140 
start_cpu_perfctr(int cpu)141 static void start_cpu_perfctr(int cpu)
142 {
143 	if (state == RUNNING)
144 		int_ops->start_cpu(cpu);
145 }
146 
147 spinlock_t note_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
148 /* which buffer nr. is waiting to be read ? */
149 int cpu_buffer_waiting;
150 
is_ready(void)151 static int is_ready(void)
152 {
153 	uint cpu_nr;
154 	for (cpu_nr = 0 ; cpu_nr < smp_num_cpus; cpu_nr++) {
155 		if (oprof_ready[cpu_nr]) {
156 			cpu_buffer_waiting = cpu_nr;
157 			return 1;
158 		}
159 	}
160 	return 0;
161 }
162 
up_and_check_note(void)163 inline static void up_and_check_note(void)
164 {
165 	note_pos++;
166 	if (likely(note_pos < (sysctl.note_size - OP_PRE_NOTE_WATERMARK(sysctl.note_size)) && !is_ready()))
167 		return;
168 
169 	/* if we reach the end of the buffer, just pin
170 	 * to the last entry until it is read. This loses
171 	 * notes, but we have no choice. */
172 	if (unlikely(note_pos == sysctl.note_size)) {
173 		static int warned;
174 		if (!warned) {
175 			printk(KERN_WARNING "note buffer overflow: restart "
176 			       "oprofile with a larger note buffer.\n");
177 			warned = 1;
178 		}
179 		sysctl.nr_note_buffer_overflow++;
180 		note_pos = sysctl.note_size - 1;
181 	}
182 
183 	/* we just use cpu 0 as a convenient one to wake up */
184 	oprof_ready[0] = 2;
185 	oprof_wake_up(&oprof_wait);
186 }
187 
188 /* if holding note_lock */
__oprof_put_note(struct op_note * onote)189 void __oprof_put_note(struct op_note * onote)
190 {
191 	/* ignore note if we're not up and running fully */
192 	if (state != RUNNING)
193 		return;
194 
195 	memcpy(&note_buffer[note_pos], onote, sizeof(struct op_note));
196 	up_and_check_note();
197 }
198 
oprof_put_note(struct op_note * onote)199 void oprof_put_note(struct op_note * onote)
200 {
201 	spin_lock(&note_lock);
202 	__oprof_put_note(onote);
203 	spin_unlock(&note_lock);
204 }
205 
oprof_note_read(char * buf,size_t count,loff_t * ppos)206 static ssize_t oprof_note_read(char * buf, size_t count, loff_t * ppos)
207 {
208 	struct op_note * mybuf;
209 	uint num;
210 	ssize_t max;
211 
212 	max = sizeof(struct op_note) * sysctl.note_size;
213 
214 	if (*ppos || count != max)
215 		return -EINVAL;
216 
217 	mybuf = vmalloc(max);
218 	if (!mybuf)
219 		return -EFAULT;
220 
221 	spin_lock(&note_lock);
222 
223 	num = note_pos;
224 
225 	count = note_pos * sizeof(struct op_note);
226 
227 	if (count)
228 		memcpy(mybuf, note_buffer, count);
229 
230 	note_pos = 0;
231 
232 	spin_unlock(&note_lock);
233 
234 	if (count && copy_to_user(buf, mybuf, count))
235 		count = -EFAULT;
236 
237 	vfree(mybuf);
238 	return count;
239 }
240 
oprof_note_open(void)241 static int oprof_note_open(void)
242 {
243 	if (test_and_set_bit(0, &oprof_note_opened))
244 		return -EBUSY;
245 	INC_USE_COUNT_MAYBE;
246 	return 0;
247 }
248 
oprof_note_release(void)249 static int oprof_note_release(void)
250 {
251 	BUG_ON(!oprof_note_opened);
252 	clear_bit(0, &oprof_note_opened);
253 	DEC_USE_COUNT_MAYBE;
254 	return 0;
255 }
256 
check_buffer_amount(int cpu_nr)257 static int check_buffer_amount(int cpu_nr)
258 {
259 	struct _oprof_data * data = &oprof_data[cpu_nr];
260 	int size = data->buf_size;
261 	int num = data->nextbuf;
262 	if (num < size - data->buf_watermark && oprof_ready[cpu_nr] != 2) {
263 		printk(KERN_WARNING "oprofile: Detected overflow of size %d. "
264 		       "You must increase the module buffer size with\n"
265 		       "opcontrol --setup --bufer-size= or reduce the "
266 		       "interrupt frequency\n", num);
267 		data->nr_buffer_overflow += num;
268 		num = size;
269 	} else
270 		data->nextbuf = 0;
271 	return num;
272 }
273 
copy_buffer(char * buf,int cpu_nr)274 static int copy_buffer(char * buf, int cpu_nr)
275 {
276 	struct op_buffer_head head;
277 	int ret = -EFAULT;
278 
279 	stop_cpu_perfctr(cpu_nr);
280 
281 	head.cpu_nr = cpu_nr;
282 	head.count = check_buffer_amount(cpu_nr);
283 	head.state = state;
284 
285 	oprof_ready[cpu_nr] = 0;
286 
287 	if (copy_to_user(buf, &head, sizeof(struct op_buffer_head)))
288 		goto out;
289 
290 	if (head.count) {
291 		size_t const size = head.count * sizeof(struct op_sample);
292 		if (copy_to_user(buf + sizeof(struct op_buffer_head),
293 			oprof_data[cpu_nr].buffer, size))
294 			goto out;
295 		ret = size + sizeof(struct op_buffer_head);
296 	} else {
297 		ret = sizeof(struct op_buffer_head);
298 	}
299 
300 out:
301 	start_cpu_perfctr(cpu_nr);
302 	return ret;
303 }
304 
oprof_read(struct file * file,char * buf,size_t count,loff_t * ppos)305 static ssize_t oprof_read(struct file * file, char * buf, size_t count, loff_t * ppos)
306 {
307 	ssize_t max;
308 
309 	if (!capable(CAP_SYS_PTRACE))
310 		return -EPERM;
311 
312 	switch (MINOR(file->f_dentry->d_inode->i_rdev)) {
313 		case 2: return oprof_note_read(buf, count, ppos);
314 		case 0: break;
315 		default: return -EINVAL;
316 	}
317 
318 	max = sizeof(struct op_buffer_head) + sizeof(struct op_sample) * sysctl.buf_size;
319 
320 	if (*ppos || count != max)
321 		return -EINVAL;
322 
323 	switch (state) {
324 		case RUNNING:
325 			wait_event_interruptible(oprof_wait, is_ready());
326 			if (signal_pending(current))
327 				return -EINTR;
328 			break;
329 
330 		/* Non-obvious. If O_NONBLOCK is set, that means
331 		 * the daemon knows it has to quit and is asking
332 		 * for final buffer data. If it's not set, then we
333 		 * have just transitioned to STOPPING, and we must
334 		 * inform the daemon (which we can do just by a normal
335 		 * operation).
336 		 */
337 		case STOPPING: {
338 			int cpu;
339 
340 			if (!(file->f_flags & O_NONBLOCK))
341 				break;
342 
343 			for (cpu = 0; cpu < smp_num_cpus; ++cpu) {
344 				if (oprof_data[cpu].nextbuf) {
345 					cpu_buffer_waiting = cpu;
346 					oprof_ready[cpu] = 2;
347 					break;
348 				}
349 			}
350 
351 			if (cpu == smp_num_cpus)
352 				return -EAGAIN;
353 
354 		}
355 			break;
356 
357 		case STOPPED: BUG();
358 	}
359 
360 	return copy_buffer(buf, cpu_buffer_waiting);
361 }
362 
363 
364 static int oprof_start(void);
365 static int oprof_stop(void);
366 
oprof_open(struct inode * ino,struct file * file)367 static int oprof_open(struct inode * ino, struct file * file)
368 {
369 	int err;
370 
371 	if (!capable(CAP_SYS_PTRACE))
372 		return -EPERM;
373 
374 	switch (MINOR(file->f_dentry->d_inode->i_rdev)) {
375 		case 1: return oprof_hash_map_open();
376 		case 2: return oprof_note_open();
377 		case 0:
378 			/* make sure the other devices are open */
379 			if (is_map_ready())
380 				break;
381 		default:
382 			return -EINVAL;
383 	}
384 
385 	if (test_and_set_bit(0, &oprof_opened))
386 		return -EBUSY;
387 
388 	err = oprof_start();
389 	if (err)
390 		clear_bit(0, &oprof_opened);
391 	return err;
392 }
393 
oprof_release(struct inode * ino,struct file * file)394 static int oprof_release(struct inode * ino, struct file * file)
395 {
396 	switch (MINOR(file->f_dentry->d_inode->i_rdev)) {
397 		case 1: return oprof_hash_map_release();
398 		case 2: return oprof_note_release();
399 		case 0: break;
400 		default: return -EINVAL;
401 	}
402 
403 	BUG_ON(!oprof_opened);
404 
405 	clear_bit(0, &oprof_opened);
406 
407 	// FIXME: is this safe when I kill -9 the daemon ?
408 	return oprof_stop();
409 }
410 
oprof_mmap(struct file * file,struct vm_area_struct * vma)411 static int oprof_mmap(struct file * file, struct vm_area_struct * vma)
412 {
413 	if (MINOR(file->f_dentry->d_inode->i_rdev) == 1)
414 		return oprof_hash_map_mmap(file, vma);
415 	return -EINVAL;
416 }
417 
418 /* called under spinlock, cannot sleep */
oprof_free_mem(uint num)419 static void oprof_free_mem(uint num)
420 {
421 	uint i;
422 	for (i=0; i < num; i++) {
423 		if (oprof_data[i].buffer)
424 			vfree(oprof_data[i].buffer);
425 		oprof_data[i].buffer = NULL;
426 	}
427 	vfree(note_buffer);
428 	note_buffer = NULL;
429 }
430 
oprof_init_data(void)431 static int oprof_init_data(void)
432 {
433 	uint i, notebufsize;
434 	ulong buf_size;
435 	struct _oprof_data * data;
436 
437 	sysctl.nr_note_buffer_overflow = 0;
438 	notebufsize = sizeof(struct op_note) * sysctl.note_size;
439 	note_buffer = vmalloc(notebufsize);
440  	if (!note_buffer) {
441 		printk(KERN_ERR "oprofile: failed to allocate note buffer of %u bytes\n",
442 			notebufsize);
443 		return -EFAULT;
444 	}
445 	note_pos = 0;
446 
447 	// safe init
448 	for (i = 0; i < smp_num_cpus; ++i) {
449 		data = &oprof_data[i];
450 		data->buf_size = 0;
451 		data->buffer = 0;
452 		data->buf_watermark = 0;
453 		data->nr_buffer_overflow = 0;
454 	}
455 
456 	buf_size = (sizeof(struct op_sample) * sysctl.buf_size);
457 
458 	for (i = 0 ; i < smp_num_cpus ; ++i) {
459 		data = &oprof_data[i];
460 
461 		data->buffer = vmalloc(buf_size);
462 		if (!data->buffer) {
463 			printk(KERN_ERR "oprofile: failed to allocate eviction buffer of %lu bytes\n", buf_size);
464 			oprof_free_mem(i);
465 			return -EFAULT;
466 		}
467 
468 		memset(data->buffer, 0, buf_size);
469 
470 		data->buf_size = sysctl.buf_size;
471 		data->buf_watermark = OP_PRE_WATERMARK(data->buf_size);
472 		data->nextbuf = 0;
473 	}
474 
475 	return 0;
476 }
477 
parms_check(void)478 static int parms_check(void)
479 {
480 	int err;
481 
482 	if ((err = check_range(sysctl.buf_size, OP_MIN_BUF_SIZE, OP_MAX_BUF_SIZE,
483 		"sysctl.buf_size value %d not in range (%d %d)\n")))
484 		return err;
485 	if ((err = check_range(sysctl.note_size, OP_MIN_NOTE_TABLE_SIZE, OP_MAX_NOTE_TABLE_SIZE,
486 		"sysctl.note_size value %d not in range (%d %d)\n")))
487 		return err;
488 
489 	if ((err = int_ops->check_params()))
490 		return err;
491 
492 	return 0;
493 }
494 
495 
496 static DECLARE_MUTEX(sysctlsem);
497 
498 
oprof_start(void)499 static int oprof_start(void)
500 {
501 	int err = 0;
502 
503 	down(&sysctlsem);
504 
505 	/* save the sysctl settable things to protect against change through
506 	 * systcl the profiler params */
507 	sysctl_parms.cpu_type = sysctl.cpu_type;
508 	sysctl = sysctl_parms;
509 
510 	if ((err = oprof_init_data()))
511 		goto out;
512 
513 	if ((err = parms_check())) {
514 		oprof_free_mem(smp_num_cpus);
515 		goto out;
516 	}
517 
518 	if ((err = int_ops->setup())) {
519 		oprof_free_mem(smp_num_cpus);
520 		goto out;
521 	}
522 
523 	op_intercept_syscalls();
524 
525 	int_ops->start();
526 
527 	state = RUNNING;
528 
529 out:
530 	up(&sysctlsem);
531 	return err;
532 }
533 
534 /*
535  * stop interrupts being generated and notes arriving.
536  * This is idempotent.
537  */
oprof_partial_stop(void)538 static void oprof_partial_stop(void)
539 {
540 	BUG_ON(state == STOPPED);
541 
542 	if (state == RUNNING) {
543 		op_restore_syscalls();
544 		int_ops->stop();
545 	}
546 
547 	state = STOPPING;
548 }
549 
oprof_stop(void)550 static int oprof_stop(void)
551 {
552 	uint i;
553 	// FIXME: err not needed
554 	int err = -EINVAL;
555 
556 	down(&sysctlsem);
557 
558 	BUG_ON(state == STOPPED);
559 
560 	/* here we need to :
561 	 * bring back the old system calls
562 	 * stop the perf counter
563 	 * bring back the old NMI handler
564 	 * reset the map buffer stuff and ready values
565 	 *
566 	 * Nothing will be able to write into the map buffer because
567 	 * we synchronise via the spinlocks
568 	 */
569 
570 	oprof_partial_stop();
571 
572 	spin_lock(&note_lock);
573 
574 	for (i = 0 ; i < smp_num_cpus; i++) {
575 		struct _oprof_data * data = &oprof_data[i];
576 		oprof_ready[i] = 0;
577 		data->nextbuf = 0;
578 	}
579 
580 	oprof_free_mem(smp_num_cpus);
581 
582 	spin_unlock(&note_lock);
583 	err = 0;
584 
585 	/* FIXME: can we really say this ? */
586 	state = STOPPED;
587 	up(&sysctlsem);
588 	return err;
589 }
590 
591 static struct file_operations oprof_fops = {
592 #ifdef HAVE_FILE_OPERATIONS_OWNER
593 	owner: THIS_MODULE,
594 #endif
595 	open: oprof_open,
596 	release: oprof_release,
597 	read: oprof_read,
598 	mmap: oprof_mmap,
599 };
600 
601 /*
602  * /proc/sys/dev/oprofile/
603  *                        bufsize
604  *                        notesize
605  *                        dump
606  *                        dump_stop
607  *                        nr_interrupts
608  *                        #ctr/
609  *                          event
610  *                          enabled
611  *                          count
612  *                          unit_mask
613  *                          kernel
614  *                          user
615  *
616  * #ctr is in [0-1] for PPro core, [0-3] for Athlon core
617  *
618  */
619 
620 /* These access routines are basically not safe on SMP for module unload.
621  * And there is nothing we can do about it - the API is broken. We'll just
622  * make a best-efforts thing. Note the sem is needed to prevent parms_check
623  * bypassing during oprof_start().
624  */
625 
lock_sysctl(void)626 static void lock_sysctl(void)
627 {
628 	MOD_INC_USE_COUNT;
629 	down(&sysctlsem);
630 }
631 
unlock_sysctl(void)632 static void unlock_sysctl(void)
633 {
634 	up(&sysctlsem);
635 	MOD_DEC_USE_COUNT;
636 }
637 
get_nr_interrupts(ctl_table * table,int write,struct file * filp,void * buffer,size_t * lenp)638 static int get_nr_interrupts(ctl_table * table, int write, struct file * filp, void * buffer, size_t * lenp)
639 {
640 	uint cpu;
641 	int ret = -EINVAL;
642 
643 	lock_sysctl();
644 
645 	if (write)
646 		goto out;
647 
648 	sysctl.nr_interrupts = 0;
649 
650 	for (cpu = 0 ; cpu < smp_num_cpus; cpu++) {
651 		sysctl.nr_interrupts += oprof_data[cpu].nr_irq;
652 		oprof_data[cpu].nr_irq = 0;
653 	}
654 
655 	ret =  proc_dointvec(table, write, filp, buffer, lenp);
656 out:
657 	unlock_sysctl();
658 	return ret;
659 }
660 
get_nr_buffer_overflow(ctl_table * table,int write,struct file * filp,void * buffer,size_t * lenp)661 static int get_nr_buffer_overflow(ctl_table * table, int write, struct file * filp, void * buffer, size_t * lenp)
662 {
663 	uint cpu;
664 	int ret = -EINVAL;
665 
666 	lock_sysctl();
667 
668 	if (write)
669 		goto out;
670 
671 	for (cpu = 0 ; cpu < smp_num_cpus; cpu++) {
672 		sysctl.nr_buffer_overflow += oprof_data[cpu].nr_buffer_overflow;
673 		oprof_data[cpu].nr_buffer_overflow = 0;
674 	}
675 
676 	ret =  proc_dointvec(table, write, filp, buffer, lenp);
677 out:
678 	unlock_sysctl();
679 	return ret;
680 }
681 
lproc_dointvec(ctl_table * table,int write,struct file * filp,void * buffer,size_t * lenp)682 int lproc_dointvec(ctl_table * table, int write, struct file * filp, void * buffer, size_t * lenp)
683 {
684 	int err;
685 
686 	lock_sysctl();
687 	err = proc_dointvec(table, write, filp, buffer, lenp);
688 	unlock_sysctl();
689 
690 	return err;
691 }
692 
do_actual_dump(void)693 static void do_actual_dump(void)
694 {
695 	uint cpu;
696 
697 	for (cpu = 0 ; cpu < smp_num_cpus; cpu++)
698 		oprof_ready[cpu] = 2;
699 	oprof_wake_up(&oprof_wait);
700 }
701 
sysctl_do_dump(ctl_table * table,int write,struct file * filp,void * buffer,size_t * lenp)702 static int sysctl_do_dump(ctl_table * table, int write, struct file * filp, void * buffer, size_t * lenp)
703 {
704 	int err = -EINVAL;
705 
706 	lock_sysctl();
707 
708 	if (state != RUNNING)
709 		goto out;
710 
711 	if (!write) {
712 		err = proc_dointvec(table, write, filp, buffer, lenp);
713 		goto out;
714 	}
715 
716 	do_actual_dump();
717 
718 	err = 0;
719 out:
720 	unlock_sysctl();
721 	return err;
722 }
723 
sysctl_do_dump_stop(ctl_table * table,int write,struct file * filp,void * buffer,size_t * lenp)724 static int sysctl_do_dump_stop(ctl_table * table, int write, struct file * filp, void * buffer, size_t * lenp)
725 {
726 	int err = -EINVAL;
727 
728 	lock_sysctl();
729 
730 	if (state != RUNNING)
731 		goto out;
732 
733 	if (!write) {
734 		err = proc_dointvec(table, write, filp, buffer, lenp);
735 		goto out;
736 	}
737 
738 	oprof_partial_stop();
739 
740 	/* also wakes up daemon */
741 	do_actual_dump();
742 
743 	err = 0;
744 out:
745 	unlock_sysctl();
746 	return err;
747 }
748 
749 static int const nr_oprof_static = 8;
750 
751 static ctl_table oprof_table[] = {
752 	{ 1, "bufsize", &sysctl_parms.buf_size, sizeof(int), 0644, NULL, &lproc_dointvec, NULL, },
753 	{ 1, "dump", &sysctl_parms.dump, sizeof(int), 0666, NULL, &sysctl_do_dump, NULL, },
754 	{ 1, "dump_stop", &sysctl_parms.dump_stop, sizeof(int), 0644, NULL, &sysctl_do_dump_stop, NULL, },
755 	{ 1, "nr_interrupts", &sysctl.nr_interrupts, sizeof(int), 0444, NULL, &get_nr_interrupts, NULL, },
756 	{ 1, "notesize", &sysctl_parms.note_size, sizeof(int), 0644, NULL, &lproc_dointvec, NULL, },
757 	{ 1, "cpu_type", &sysctl.cpu_type, sizeof(int), 0444, NULL, &lproc_dointvec, NULL, },
758 	{ 1, "note_buffer_overflow", &sysctl.nr_note_buffer_overflow, sizeof(int), 0444, NULL, &lproc_dointvec, NULL, },
759 	{ 1, "buffer_overflow", &sysctl.nr_buffer_overflow, sizeof(int), 0444, NULL, &get_nr_buffer_overflow, NULL, },
760 	{ 0, }, { 0, }, { 0, }, { 0, }, { 0, }, { 0, }, { 0, }, { 0, },
761 	{ 0, },
762 };
763 
764 static ctl_table oprof_root[] = {
765 	{1, "oprofile", NULL, 0, 0755, oprof_table},
766  	{0, },
767 };
768 
769 static ctl_table dev_root[] = {
770 	{CTL_DEV, "dev", NULL, 0, 0555, oprof_root},
771 	{0, },
772 };
773 
774 static struct ctl_table_header * sysctl_header;
775 
776 /* NOTE: we do *not* support sysctl() syscall */
777 
init_sysctl(void)778 static int __init init_sysctl(void)
779 {
780 	int err = 0;
781 	ctl_table * next = &oprof_table[nr_oprof_static];
782 
783 	/* these sysctl parms need sensible value */
784 	sysctl_parms.buf_size = OP_DEFAULT_BUF_SIZE;
785 	sysctl_parms.note_size = OP_DEFAULT_NOTE_SIZE;
786 
787 	if ((err = int_ops->add_sysctls(next)))
788 		return err;
789 
790 	sysctl_header = register_sysctl_table(dev_root, 0);
791 	return err;
792 }
793 
794 /* not safe to mark as __exit since used from __init code */
cleanup_sysctl(void)795 static void cleanup_sysctl(void)
796 {
797 	ctl_table * next = &oprof_table[nr_oprof_static];
798 	unregister_sysctl_table(sysctl_header);
799 
800 	int_ops->remove_sysctls(next);
801 
802 	return;
803 }
804 
can_unload(void)805 static int can_unload(void)
806 {
807 	int can = -EBUSY;
808 	down(&sysctlsem);
809 
810 	if (allow_unload && state == STOPPED && !GET_USE_COUNT(THIS_MODULE))
811 		can = 0;
812 	up(&sysctlsem);
813 	return can;
814 }
815 
oprof_init(void)816 int __init oprof_init(void)
817 {
818 	int err = 0;
819 
820 	if (sysctl.cpu_type != CPU_RTC) {
821 		int_ops = op_int_interface();
822 
823 		// try to init, fall back to rtc if not
824 		if ((err = int_ops->init())) {
825 			int_ops = &op_rtc_ops;
826 			if ((err = int_ops->init()))
827 				return err;
828 			sysctl.cpu_type = CPU_RTC;
829 		}
830 	} else {
831 		int_ops = &op_rtc_ops;
832 		if ((err = int_ops->init()))
833 			return err;
834 	}
835 
836 	if ((err = init_sysctl()))
837 		goto out_err;
838 
839 	err = op_major = register_chrdev(0, "oprof", &oprof_fops);
840 	if (err < 0)
841 		goto out_err2;
842 
843 	err = oprof_init_hashmap();
844 	if (err < 0) {
845 		printk(KERN_ERR "oprofile: couldn't allocate hash map !\n");
846 		unregister_chrdev(op_major, "oprof");
847 		goto out_err2;
848 	}
849 
850 	/* module might not be unloadable */
851 	THIS_MODULE->can_unload = can_unload;
852 
853 	/* do this now so we don't have to track save/restores later */
854 	op_save_syscalls();
855 
856 	printk(KERN_INFO "%s loaded, major %u\n", op_version, op_major);
857 	return 0;
858 
859 out_err2:
860 	cleanup_sysctl();
861 out_err:
862 	int_ops->deinit();
863 	return err;
864 }
865 
oprof_exit(void)866 void __exit oprof_exit(void)
867 {
868 	oprof_free_hashmap();
869 
870 	unregister_chrdev(op_major, "oprof");
871 
872 	cleanup_sysctl();
873 
874 	int_ops->deinit();
875 }
876 
877 /*
878  * "The most valuable commodity I know of is information."
879  *      - Gordon Gekko
880  */
881