1 /**
2 * @file oprofile.c
3 * Main driver code
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 * @author Philippe Elie
10 */
11
12 #include "oprofile.h"
13 #include "op_util.h"
14 #include "config.h"
15
16 EXPORT_NO_SYMBOLS;
17
18 MODULE_AUTHOR("John Levon (levon@movementarian.org)");
19 MODULE_DESCRIPTION("Continuous Profiling Module");
20 MODULE_LICENSE("GPL");
21
22 MODULE_PARM(allow_unload, "i");
23 MODULE_PARM_DESC(allow_unload, "Allow module to be unloaded.");
24 #ifdef CONFIG_SMP
25 static int allow_unload;
26 #else
27 static int allow_unload = 1;
28 #endif
29
30 /* sysctl settables */
31 struct oprof_sysctl sysctl_parms;
32 /* some of the sys ctl settable variable needs to be copied to protect
33 * against user that try to change through /proc/sys/dev/oprofile/ * running
34 * parameters during profiling */
35 struct oprof_sysctl sysctl;
36
37 static enum oprof_state state __cacheline_aligned_in_smp = STOPPED;
38
39 static int op_major;
40
41 static volatile ulong oprof_opened __cacheline_aligned_in_smp;
42 static volatile ulong oprof_note_opened __cacheline_aligned_in_smp;
43 static DECLARE_WAIT_QUEUE_HEAD(oprof_wait);
44
45 static u32 oprof_ready[NR_CPUS] __cacheline_aligned_in_smp;
46 struct _oprof_data oprof_data[NR_CPUS] __cacheline_aligned;
47
48 struct op_note * note_buffer __cacheline_aligned_in_smp;
49 u32 note_pos __cacheline_aligned_in_smp;
50
51 // the interrupt handler ops structure to use
52 static struct op_int_operations const * int_ops;
53
54 static char const * op_version = PACKAGE " " VERSION;
55
56 /* ---------------- interrupt entry routines ------------------ */
57
need_wakeup(uint cpu,struct _oprof_data * data)58 inline static int need_wakeup(uint cpu, struct _oprof_data * data)
59 {
60 return data->nextbuf >= (data->buf_size - data->buf_watermark) && !oprof_ready[cpu];
61 }
62
next_sample(struct _oprof_data * data)63 inline static void next_sample(struct _oprof_data * data)
64 {
65 if (unlikely(++data->nextbuf == data->buf_size))
66 data->nextbuf = 0;
67 }
68
evict_op_entry(uint cpu,struct _oprof_data * data,long irq_enabled)69 inline static void evict_op_entry(uint cpu, struct _oprof_data * data, long irq_enabled)
70 {
71 next_sample(data);
72 if (likely(!need_wakeup(cpu, data)))
73 return;
74
75 /* locking rationale :
76 *
77 * other CPUs are not a race concern since we synch on oprof_wait->lock.
78 *
79 * for the current CPU, we might have interrupted another user of e.g.
80 * runqueue_lock, deadlocking on SMP and racing on UP. So we check that IRQs
81 * were not disabled (corresponding to the irqsave/restores in __wake_up().
82 *
83 * Note that this requires all spinlocks taken by the full wake_up path
84 * to have saved IRQs - otherwise we can interrupt whilst holding a spinlock
85 * taken from some non-wake_up() path and deadlock. Currently this means only
86 * oprof_wait->lock and runqueue_lock: all instances disable IRQs before
87 * taking the lock.
88 *
89 * This will mean that approaching the end of the buffer, a number of the
90 * evictions may fail to wake up the daemon. We simply hope this doesn't
91 * take long; a pathological case could cause buffer overflow.
92 *
93 * Note that we use oprof_ready as our flag for whether we have initiated a
94 * wake-up. Once the wake-up is received, the flag is reset as well as
95 * data->nextbuf, preventing multiple wakeups.
96 *
97 * On 2.2, a global waitqueue_lock is used, so we must check it's not held
98 * by the current CPU. We make sure that any users of the wait queue (i.e.
99 * us and the code for wait_event_interruptible()) disable interrupts so it's
100 * still safe to check IF_MASK.
101 */
102 if (likely(irq_enabled)) {
103 oprof_ready[cpu] = 1;
104 wake_up(&oprof_wait);
105 }
106 }
107
108 inline static void
fill_op_entry(struct op_sample * ops,long eip,pid_t pid,pid_t tgid,int ctr)109 fill_op_entry(struct op_sample * ops, long eip, pid_t pid, pid_t tgid, int ctr)
110 {
111 ops->eip = eip;
112 ops->pid = pid;
113 ops->tgid = tgid;
114 ops->counter = ctr;
115 }
116
op_do_profile(uint cpu,long eip,long irq_enabled,int ctr)117 void op_do_profile(uint cpu, long eip, long irq_enabled, int ctr)
118 {
119 struct _oprof_data * data = &oprof_data[cpu];
120 pid_t const pid = current->pid;
121 pid_t const tgid = op_get_tgid();
122 struct op_sample * samples = &data->buffer[data->nextbuf];
123
124 data->nr_irq++;
125
126 fill_op_entry(samples, eip, pid, tgid, ctr);
127 evict_op_entry(cpu, data, irq_enabled);
128 }
129
130 /* ---------------- driver routines ------------------ */
131
132 /* only stop and start profiling interrupt when we are
133 * fully running !
134 */
stop_cpu_perfctr(int cpu)135 static void stop_cpu_perfctr(int cpu)
136 {
137 if (state == RUNNING)
138 int_ops->stop_cpu(cpu);
139 }
140
start_cpu_perfctr(int cpu)141 static void start_cpu_perfctr(int cpu)
142 {
143 if (state == RUNNING)
144 int_ops->start_cpu(cpu);
145 }
146
147 spinlock_t note_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
148 /* which buffer nr. is waiting to be read ? */
149 int cpu_buffer_waiting;
150
is_ready(void)151 static int is_ready(void)
152 {
153 uint cpu_nr;
154 for (cpu_nr = 0 ; cpu_nr < smp_num_cpus; cpu_nr++) {
155 if (oprof_ready[cpu_nr]) {
156 cpu_buffer_waiting = cpu_nr;
157 return 1;
158 }
159 }
160 return 0;
161 }
162
up_and_check_note(void)163 inline static void up_and_check_note(void)
164 {
165 note_pos++;
166 if (likely(note_pos < (sysctl.note_size - OP_PRE_NOTE_WATERMARK(sysctl.note_size)) && !is_ready()))
167 return;
168
169 /* if we reach the end of the buffer, just pin
170 * to the last entry until it is read. This loses
171 * notes, but we have no choice. */
172 if (unlikely(note_pos == sysctl.note_size)) {
173 static int warned;
174 if (!warned) {
175 printk(KERN_WARNING "note buffer overflow: restart "
176 "oprofile with a larger note buffer.\n");
177 warned = 1;
178 }
179 sysctl.nr_note_buffer_overflow++;
180 note_pos = sysctl.note_size - 1;
181 }
182
183 /* we just use cpu 0 as a convenient one to wake up */
184 oprof_ready[0] = 2;
185 oprof_wake_up(&oprof_wait);
186 }
187
188 /* if holding note_lock */
__oprof_put_note(struct op_note * onote)189 void __oprof_put_note(struct op_note * onote)
190 {
191 /* ignore note if we're not up and running fully */
192 if (state != RUNNING)
193 return;
194
195 memcpy(¬e_buffer[note_pos], onote, sizeof(struct op_note));
196 up_and_check_note();
197 }
198
oprof_put_note(struct op_note * onote)199 void oprof_put_note(struct op_note * onote)
200 {
201 spin_lock(¬e_lock);
202 __oprof_put_note(onote);
203 spin_unlock(¬e_lock);
204 }
205
oprof_note_read(char * buf,size_t count,loff_t * ppos)206 static ssize_t oprof_note_read(char * buf, size_t count, loff_t * ppos)
207 {
208 struct op_note * mybuf;
209 uint num;
210 ssize_t max;
211
212 max = sizeof(struct op_note) * sysctl.note_size;
213
214 if (*ppos || count != max)
215 return -EINVAL;
216
217 mybuf = vmalloc(max);
218 if (!mybuf)
219 return -EFAULT;
220
221 spin_lock(¬e_lock);
222
223 num = note_pos;
224
225 count = note_pos * sizeof(struct op_note);
226
227 if (count)
228 memcpy(mybuf, note_buffer, count);
229
230 note_pos = 0;
231
232 spin_unlock(¬e_lock);
233
234 if (count && copy_to_user(buf, mybuf, count))
235 count = -EFAULT;
236
237 vfree(mybuf);
238 return count;
239 }
240
oprof_note_open(void)241 static int oprof_note_open(void)
242 {
243 if (test_and_set_bit(0, &oprof_note_opened))
244 return -EBUSY;
245 INC_USE_COUNT_MAYBE;
246 return 0;
247 }
248
oprof_note_release(void)249 static int oprof_note_release(void)
250 {
251 BUG_ON(!oprof_note_opened);
252 clear_bit(0, &oprof_note_opened);
253 DEC_USE_COUNT_MAYBE;
254 return 0;
255 }
256
check_buffer_amount(int cpu_nr)257 static int check_buffer_amount(int cpu_nr)
258 {
259 struct _oprof_data * data = &oprof_data[cpu_nr];
260 int size = data->buf_size;
261 int num = data->nextbuf;
262 if (num < size - data->buf_watermark && oprof_ready[cpu_nr] != 2) {
263 printk(KERN_WARNING "oprofile: Detected overflow of size %d. "
264 "You must increase the module buffer size with\n"
265 "opcontrol --setup --bufer-size= or reduce the "
266 "interrupt frequency\n", num);
267 data->nr_buffer_overflow += num;
268 num = size;
269 } else
270 data->nextbuf = 0;
271 return num;
272 }
273
copy_buffer(char * buf,int cpu_nr)274 static int copy_buffer(char * buf, int cpu_nr)
275 {
276 struct op_buffer_head head;
277 int ret = -EFAULT;
278
279 stop_cpu_perfctr(cpu_nr);
280
281 head.cpu_nr = cpu_nr;
282 head.count = check_buffer_amount(cpu_nr);
283 head.state = state;
284
285 oprof_ready[cpu_nr] = 0;
286
287 if (copy_to_user(buf, &head, sizeof(struct op_buffer_head)))
288 goto out;
289
290 if (head.count) {
291 size_t const size = head.count * sizeof(struct op_sample);
292 if (copy_to_user(buf + sizeof(struct op_buffer_head),
293 oprof_data[cpu_nr].buffer, size))
294 goto out;
295 ret = size + sizeof(struct op_buffer_head);
296 } else {
297 ret = sizeof(struct op_buffer_head);
298 }
299
300 out:
301 start_cpu_perfctr(cpu_nr);
302 return ret;
303 }
304
oprof_read(struct file * file,char * buf,size_t count,loff_t * ppos)305 static ssize_t oprof_read(struct file * file, char * buf, size_t count, loff_t * ppos)
306 {
307 ssize_t max;
308
309 if (!capable(CAP_SYS_PTRACE))
310 return -EPERM;
311
312 switch (MINOR(file->f_dentry->d_inode->i_rdev)) {
313 case 2: return oprof_note_read(buf, count, ppos);
314 case 0: break;
315 default: return -EINVAL;
316 }
317
318 max = sizeof(struct op_buffer_head) + sizeof(struct op_sample) * sysctl.buf_size;
319
320 if (*ppos || count != max)
321 return -EINVAL;
322
323 switch (state) {
324 case RUNNING:
325 wait_event_interruptible(oprof_wait, is_ready());
326 if (signal_pending(current))
327 return -EINTR;
328 break;
329
330 /* Non-obvious. If O_NONBLOCK is set, that means
331 * the daemon knows it has to quit and is asking
332 * for final buffer data. If it's not set, then we
333 * have just transitioned to STOPPING, and we must
334 * inform the daemon (which we can do just by a normal
335 * operation).
336 */
337 case STOPPING: {
338 int cpu;
339
340 if (!(file->f_flags & O_NONBLOCK))
341 break;
342
343 for (cpu = 0; cpu < smp_num_cpus; ++cpu) {
344 if (oprof_data[cpu].nextbuf) {
345 cpu_buffer_waiting = cpu;
346 oprof_ready[cpu] = 2;
347 break;
348 }
349 }
350
351 if (cpu == smp_num_cpus)
352 return -EAGAIN;
353
354 }
355 break;
356
357 case STOPPED: BUG();
358 }
359
360 return copy_buffer(buf, cpu_buffer_waiting);
361 }
362
363
364 static int oprof_start(void);
365 static int oprof_stop(void);
366
oprof_open(struct inode * ino,struct file * file)367 static int oprof_open(struct inode * ino, struct file * file)
368 {
369 int err;
370
371 if (!capable(CAP_SYS_PTRACE))
372 return -EPERM;
373
374 switch (MINOR(file->f_dentry->d_inode->i_rdev)) {
375 case 1: return oprof_hash_map_open();
376 case 2: return oprof_note_open();
377 case 0:
378 /* make sure the other devices are open */
379 if (is_map_ready())
380 break;
381 default:
382 return -EINVAL;
383 }
384
385 if (test_and_set_bit(0, &oprof_opened))
386 return -EBUSY;
387
388 err = oprof_start();
389 if (err)
390 clear_bit(0, &oprof_opened);
391 return err;
392 }
393
oprof_release(struct inode * ino,struct file * file)394 static int oprof_release(struct inode * ino, struct file * file)
395 {
396 switch (MINOR(file->f_dentry->d_inode->i_rdev)) {
397 case 1: return oprof_hash_map_release();
398 case 2: return oprof_note_release();
399 case 0: break;
400 default: return -EINVAL;
401 }
402
403 BUG_ON(!oprof_opened);
404
405 clear_bit(0, &oprof_opened);
406
407 // FIXME: is this safe when I kill -9 the daemon ?
408 return oprof_stop();
409 }
410
oprof_mmap(struct file * file,struct vm_area_struct * vma)411 static int oprof_mmap(struct file * file, struct vm_area_struct * vma)
412 {
413 if (MINOR(file->f_dentry->d_inode->i_rdev) == 1)
414 return oprof_hash_map_mmap(file, vma);
415 return -EINVAL;
416 }
417
418 /* called under spinlock, cannot sleep */
oprof_free_mem(uint num)419 static void oprof_free_mem(uint num)
420 {
421 uint i;
422 for (i=0; i < num; i++) {
423 if (oprof_data[i].buffer)
424 vfree(oprof_data[i].buffer);
425 oprof_data[i].buffer = NULL;
426 }
427 vfree(note_buffer);
428 note_buffer = NULL;
429 }
430
oprof_init_data(void)431 static int oprof_init_data(void)
432 {
433 uint i, notebufsize;
434 ulong buf_size;
435 struct _oprof_data * data;
436
437 sysctl.nr_note_buffer_overflow = 0;
438 notebufsize = sizeof(struct op_note) * sysctl.note_size;
439 note_buffer = vmalloc(notebufsize);
440 if (!note_buffer) {
441 printk(KERN_ERR "oprofile: failed to allocate note buffer of %u bytes\n",
442 notebufsize);
443 return -EFAULT;
444 }
445 note_pos = 0;
446
447 // safe init
448 for (i = 0; i < smp_num_cpus; ++i) {
449 data = &oprof_data[i];
450 data->buf_size = 0;
451 data->buffer = 0;
452 data->buf_watermark = 0;
453 data->nr_buffer_overflow = 0;
454 }
455
456 buf_size = (sizeof(struct op_sample) * sysctl.buf_size);
457
458 for (i = 0 ; i < smp_num_cpus ; ++i) {
459 data = &oprof_data[i];
460
461 data->buffer = vmalloc(buf_size);
462 if (!data->buffer) {
463 printk(KERN_ERR "oprofile: failed to allocate eviction buffer of %lu bytes\n", buf_size);
464 oprof_free_mem(i);
465 return -EFAULT;
466 }
467
468 memset(data->buffer, 0, buf_size);
469
470 data->buf_size = sysctl.buf_size;
471 data->buf_watermark = OP_PRE_WATERMARK(data->buf_size);
472 data->nextbuf = 0;
473 }
474
475 return 0;
476 }
477
parms_check(void)478 static int parms_check(void)
479 {
480 int err;
481
482 if ((err = check_range(sysctl.buf_size, OP_MIN_BUF_SIZE, OP_MAX_BUF_SIZE,
483 "sysctl.buf_size value %d not in range (%d %d)\n")))
484 return err;
485 if ((err = check_range(sysctl.note_size, OP_MIN_NOTE_TABLE_SIZE, OP_MAX_NOTE_TABLE_SIZE,
486 "sysctl.note_size value %d not in range (%d %d)\n")))
487 return err;
488
489 if ((err = int_ops->check_params()))
490 return err;
491
492 return 0;
493 }
494
495
496 static DECLARE_MUTEX(sysctlsem);
497
498
oprof_start(void)499 static int oprof_start(void)
500 {
501 int err = 0;
502
503 down(&sysctlsem);
504
505 /* save the sysctl settable things to protect against change through
506 * systcl the profiler params */
507 sysctl_parms.cpu_type = sysctl.cpu_type;
508 sysctl = sysctl_parms;
509
510 if ((err = oprof_init_data()))
511 goto out;
512
513 if ((err = parms_check())) {
514 oprof_free_mem(smp_num_cpus);
515 goto out;
516 }
517
518 if ((err = int_ops->setup())) {
519 oprof_free_mem(smp_num_cpus);
520 goto out;
521 }
522
523 op_intercept_syscalls();
524
525 int_ops->start();
526
527 state = RUNNING;
528
529 out:
530 up(&sysctlsem);
531 return err;
532 }
533
534 /*
535 * stop interrupts being generated and notes arriving.
536 * This is idempotent.
537 */
oprof_partial_stop(void)538 static void oprof_partial_stop(void)
539 {
540 BUG_ON(state == STOPPED);
541
542 if (state == RUNNING) {
543 op_restore_syscalls();
544 int_ops->stop();
545 }
546
547 state = STOPPING;
548 }
549
oprof_stop(void)550 static int oprof_stop(void)
551 {
552 uint i;
553 // FIXME: err not needed
554 int err = -EINVAL;
555
556 down(&sysctlsem);
557
558 BUG_ON(state == STOPPED);
559
560 /* here we need to :
561 * bring back the old system calls
562 * stop the perf counter
563 * bring back the old NMI handler
564 * reset the map buffer stuff and ready values
565 *
566 * Nothing will be able to write into the map buffer because
567 * we synchronise via the spinlocks
568 */
569
570 oprof_partial_stop();
571
572 spin_lock(¬e_lock);
573
574 for (i = 0 ; i < smp_num_cpus; i++) {
575 struct _oprof_data * data = &oprof_data[i];
576 oprof_ready[i] = 0;
577 data->nextbuf = 0;
578 }
579
580 oprof_free_mem(smp_num_cpus);
581
582 spin_unlock(¬e_lock);
583 err = 0;
584
585 /* FIXME: can we really say this ? */
586 state = STOPPED;
587 up(&sysctlsem);
588 return err;
589 }
590
591 static struct file_operations oprof_fops = {
592 #ifdef HAVE_FILE_OPERATIONS_OWNER
593 owner: THIS_MODULE,
594 #endif
595 open: oprof_open,
596 release: oprof_release,
597 read: oprof_read,
598 mmap: oprof_mmap,
599 };
600
601 /*
602 * /proc/sys/dev/oprofile/
603 * bufsize
604 * notesize
605 * dump
606 * dump_stop
607 * nr_interrupts
608 * #ctr/
609 * event
610 * enabled
611 * count
612 * unit_mask
613 * kernel
614 * user
615 *
616 * #ctr is in [0-1] for PPro core, [0-3] for Athlon core
617 *
618 */
619
620 /* These access routines are basically not safe on SMP for module unload.
621 * And there is nothing we can do about it - the API is broken. We'll just
622 * make a best-efforts thing. Note the sem is needed to prevent parms_check
623 * bypassing during oprof_start().
624 */
625
lock_sysctl(void)626 static void lock_sysctl(void)
627 {
628 MOD_INC_USE_COUNT;
629 down(&sysctlsem);
630 }
631
unlock_sysctl(void)632 static void unlock_sysctl(void)
633 {
634 up(&sysctlsem);
635 MOD_DEC_USE_COUNT;
636 }
637
get_nr_interrupts(ctl_table * table,int write,struct file * filp,void * buffer,size_t * lenp)638 static int get_nr_interrupts(ctl_table * table, int write, struct file * filp, void * buffer, size_t * lenp)
639 {
640 uint cpu;
641 int ret = -EINVAL;
642
643 lock_sysctl();
644
645 if (write)
646 goto out;
647
648 sysctl.nr_interrupts = 0;
649
650 for (cpu = 0 ; cpu < smp_num_cpus; cpu++) {
651 sysctl.nr_interrupts += oprof_data[cpu].nr_irq;
652 oprof_data[cpu].nr_irq = 0;
653 }
654
655 ret = proc_dointvec(table, write, filp, buffer, lenp);
656 out:
657 unlock_sysctl();
658 return ret;
659 }
660
get_nr_buffer_overflow(ctl_table * table,int write,struct file * filp,void * buffer,size_t * lenp)661 static int get_nr_buffer_overflow(ctl_table * table, int write, struct file * filp, void * buffer, size_t * lenp)
662 {
663 uint cpu;
664 int ret = -EINVAL;
665
666 lock_sysctl();
667
668 if (write)
669 goto out;
670
671 for (cpu = 0 ; cpu < smp_num_cpus; cpu++) {
672 sysctl.nr_buffer_overflow += oprof_data[cpu].nr_buffer_overflow;
673 oprof_data[cpu].nr_buffer_overflow = 0;
674 }
675
676 ret = proc_dointvec(table, write, filp, buffer, lenp);
677 out:
678 unlock_sysctl();
679 return ret;
680 }
681
lproc_dointvec(ctl_table * table,int write,struct file * filp,void * buffer,size_t * lenp)682 int lproc_dointvec(ctl_table * table, int write, struct file * filp, void * buffer, size_t * lenp)
683 {
684 int err;
685
686 lock_sysctl();
687 err = proc_dointvec(table, write, filp, buffer, lenp);
688 unlock_sysctl();
689
690 return err;
691 }
692
do_actual_dump(void)693 static void do_actual_dump(void)
694 {
695 uint cpu;
696
697 for (cpu = 0 ; cpu < smp_num_cpus; cpu++)
698 oprof_ready[cpu] = 2;
699 oprof_wake_up(&oprof_wait);
700 }
701
sysctl_do_dump(ctl_table * table,int write,struct file * filp,void * buffer,size_t * lenp)702 static int sysctl_do_dump(ctl_table * table, int write, struct file * filp, void * buffer, size_t * lenp)
703 {
704 int err = -EINVAL;
705
706 lock_sysctl();
707
708 if (state != RUNNING)
709 goto out;
710
711 if (!write) {
712 err = proc_dointvec(table, write, filp, buffer, lenp);
713 goto out;
714 }
715
716 do_actual_dump();
717
718 err = 0;
719 out:
720 unlock_sysctl();
721 return err;
722 }
723
sysctl_do_dump_stop(ctl_table * table,int write,struct file * filp,void * buffer,size_t * lenp)724 static int sysctl_do_dump_stop(ctl_table * table, int write, struct file * filp, void * buffer, size_t * lenp)
725 {
726 int err = -EINVAL;
727
728 lock_sysctl();
729
730 if (state != RUNNING)
731 goto out;
732
733 if (!write) {
734 err = proc_dointvec(table, write, filp, buffer, lenp);
735 goto out;
736 }
737
738 oprof_partial_stop();
739
740 /* also wakes up daemon */
741 do_actual_dump();
742
743 err = 0;
744 out:
745 unlock_sysctl();
746 return err;
747 }
748
749 static int const nr_oprof_static = 8;
750
751 static ctl_table oprof_table[] = {
752 { 1, "bufsize", &sysctl_parms.buf_size, sizeof(int), 0644, NULL, &lproc_dointvec, NULL, },
753 { 1, "dump", &sysctl_parms.dump, sizeof(int), 0666, NULL, &sysctl_do_dump, NULL, },
754 { 1, "dump_stop", &sysctl_parms.dump_stop, sizeof(int), 0644, NULL, &sysctl_do_dump_stop, NULL, },
755 { 1, "nr_interrupts", &sysctl.nr_interrupts, sizeof(int), 0444, NULL, &get_nr_interrupts, NULL, },
756 { 1, "notesize", &sysctl_parms.note_size, sizeof(int), 0644, NULL, &lproc_dointvec, NULL, },
757 { 1, "cpu_type", &sysctl.cpu_type, sizeof(int), 0444, NULL, &lproc_dointvec, NULL, },
758 { 1, "note_buffer_overflow", &sysctl.nr_note_buffer_overflow, sizeof(int), 0444, NULL, &lproc_dointvec, NULL, },
759 { 1, "buffer_overflow", &sysctl.nr_buffer_overflow, sizeof(int), 0444, NULL, &get_nr_buffer_overflow, NULL, },
760 { 0, }, { 0, }, { 0, }, { 0, }, { 0, }, { 0, }, { 0, }, { 0, },
761 { 0, },
762 };
763
764 static ctl_table oprof_root[] = {
765 {1, "oprofile", NULL, 0, 0755, oprof_table},
766 {0, },
767 };
768
769 static ctl_table dev_root[] = {
770 {CTL_DEV, "dev", NULL, 0, 0555, oprof_root},
771 {0, },
772 };
773
774 static struct ctl_table_header * sysctl_header;
775
776 /* NOTE: we do *not* support sysctl() syscall */
777
init_sysctl(void)778 static int __init init_sysctl(void)
779 {
780 int err = 0;
781 ctl_table * next = &oprof_table[nr_oprof_static];
782
783 /* these sysctl parms need sensible value */
784 sysctl_parms.buf_size = OP_DEFAULT_BUF_SIZE;
785 sysctl_parms.note_size = OP_DEFAULT_NOTE_SIZE;
786
787 if ((err = int_ops->add_sysctls(next)))
788 return err;
789
790 sysctl_header = register_sysctl_table(dev_root, 0);
791 return err;
792 }
793
794 /* not safe to mark as __exit since used from __init code */
cleanup_sysctl(void)795 static void cleanup_sysctl(void)
796 {
797 ctl_table * next = &oprof_table[nr_oprof_static];
798 unregister_sysctl_table(sysctl_header);
799
800 int_ops->remove_sysctls(next);
801
802 return;
803 }
804
can_unload(void)805 static int can_unload(void)
806 {
807 int can = -EBUSY;
808 down(&sysctlsem);
809
810 if (allow_unload && state == STOPPED && !GET_USE_COUNT(THIS_MODULE))
811 can = 0;
812 up(&sysctlsem);
813 return can;
814 }
815
oprof_init(void)816 int __init oprof_init(void)
817 {
818 int err = 0;
819
820 if (sysctl.cpu_type != CPU_RTC) {
821 int_ops = op_int_interface();
822
823 // try to init, fall back to rtc if not
824 if ((err = int_ops->init())) {
825 int_ops = &op_rtc_ops;
826 if ((err = int_ops->init()))
827 return err;
828 sysctl.cpu_type = CPU_RTC;
829 }
830 } else {
831 int_ops = &op_rtc_ops;
832 if ((err = int_ops->init()))
833 return err;
834 }
835
836 if ((err = init_sysctl()))
837 goto out_err;
838
839 err = op_major = register_chrdev(0, "oprof", &oprof_fops);
840 if (err < 0)
841 goto out_err2;
842
843 err = oprof_init_hashmap();
844 if (err < 0) {
845 printk(KERN_ERR "oprofile: couldn't allocate hash map !\n");
846 unregister_chrdev(op_major, "oprof");
847 goto out_err2;
848 }
849
850 /* module might not be unloadable */
851 THIS_MODULE->can_unload = can_unload;
852
853 /* do this now so we don't have to track save/restores later */
854 op_save_syscalls();
855
856 printk(KERN_INFO "%s loaded, major %u\n", op_version, op_major);
857 return 0;
858
859 out_err2:
860 cleanup_sysctl();
861 out_err:
862 int_ops->deinit();
863 return err;
864 }
865
oprof_exit(void)866 void __exit oprof_exit(void)
867 {
868 oprof_free_hashmap();
869
870 unregister_chrdev(op_major, "oprof");
871
872 cleanup_sysctl();
873
874 int_ops->deinit();
875 }
876
877 /*
878 * "The most valuable commodity I know of is information."
879 * - Gordon Gekko
880 */
881