1 /**
2 * @file op_syscalls.c
3 * Tracing of system calls
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Bob Montgomery
9 * @author Will Cohen
10 * @author John Levon
11 * @author Philippe Elie
12 */
13
14 #include <linux/sched.h>
15 #include <linux/unistd.h>
16 #include <linux/mman.h>
17 #include <linux/file.h>
18
19 #include "oprofile.h"
20 #include "op_dcache.h"
21 #include "op_util.h"
22
23 uint dname_top;
24 struct qstr **dname_stack;
25 char * pool_pos;
26 char * pool_start;
27 char * pool_end;
28
29 void oprof_put_note(struct op_note * samp);
30
31 /* ------------ system calls --------------- */
32
33 struct mmap_arg_struct {
34 unsigned long addr;
35 unsigned long len;
36 unsigned long prot;
37 unsigned long flags;
38 unsigned long fd;
39 unsigned long offset;
40 };
41
42 /* --------- IA64 versions of system calls ------ */
43 asmlinkage static int (*old_sys_clone)(long, long);
44 asmlinkage static int (*old_sys_clone2)(long, long, long);
45 asmlinkage static int (*old_sys_execve)(char *, char **, char **);
46 asmlinkage static unsigned long (*old_sys_mmap)(unsigned long,
47 unsigned long, int, int, int, long);
48 asmlinkage static unsigned long (*old_sys_mmap2)(unsigned long,
49 unsigned long, int, int, int, long);
50 asmlinkage static long (*old_sys_init_module)(char const *, struct module *);
51 asmlinkage static long (*old_sys_exit)(int);
52
53 /* --------- declarations of interception stubs for IA64 ------ */
54 asmlinkage long post_stub_clone(long, long);
55 asmlinkage long post_stub_clone2(long, long, long);
56 asmlinkage long my_ia64_execve(char *, char **, char **);
57 asmlinkage unsigned long post_stub_mmap(unsigned long,
58 unsigned long, int, int, int, long);
59 asmlinkage unsigned long post_stub_mmap2(unsigned long,
60 unsigned long, int, int, int, long);
61 asmlinkage long post_stub_init_module(char const *, struct module *);
62 asmlinkage long pre_stub_exit(int);
63
64 /* IA64 system call table doesn't use function pointers, it uses
65 * pointers to code (not the same thing). Basically it can violate the
66 * procedure calling rules because these "procedure calls" are made by
67 * the assembly language BREAK handler in ivt.S.
68 */
69
70 struct fdesc {
71 void * ip;
72 void * gp;
73 };
74
75 struct fdesc fdesc_clone;
76 struct fdesc fdesc_clone2;
77 struct fdesc fdesc_execve;
78 struct fdesc fdesc_mmap;
79 struct fdesc fdesc_mmap2;
80 struct fdesc fdesc_init_module;
81 struct fdesc fdesc_exit;
82 /* ----------- End of IA64 weirdness for now -------------- */
83
84 spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
85
86 /* called with map_lock held */
oprof_output_map(ulong addr,ulong len,ulong offset,struct file * file,int is_execve)87 static void oprof_output_map(ulong addr, ulong len,
88 ulong offset, struct file * file, int is_execve)
89 {
90 struct op_note note;
91
92 /* don't bother with /dev/zero mappings etc. */
93 if (!len)
94 return;
95
96 note.pid = current->pid;
97 note.tgid = op_get_tgid();
98 note.addr = addr;
99 note.len = len;
100 note.offset = offset;
101 note.type = is_execve ? OP_EXEC : OP_MAP;
102 note.hash = hash_path(file);
103 if (note.hash == -1)
104 return;
105 oprof_put_note(¬e);
106 }
107
oprof_output_maps(struct task_struct * task)108 static int oprof_output_maps(struct task_struct * task)
109 {
110 int size=0;
111 struct mm_struct * mm;
112 struct vm_area_struct * map;
113
114 /* we don't need to worry about mm_users here, since there is at
115 least one user (current), and if there's other code using this
116 mm, then mm_users must be at least 2; we should never have to
117 mmput() here. */
118
119 if (!(mm = task->mm))
120 goto out;
121
122 lock_mmap(mm);
123 spin_lock(&map_lock);
124
125 /* We need two pass, daemon assume than the first mmap notification
126 * is for the executable but some process doesn't follow this model.
127 */
128 for (map = mm->mmap; map; map = map->vm_next) {
129 if (!(map->vm_flags & VM_EXEC) || !map->vm_file)
130 continue;
131 if (!(map->vm_flags & VM_EXECUTABLE))
132 continue;
133
134 oprof_output_map(map->vm_start, map->vm_end-map->vm_start,
135 GET_VM_OFFSET(map), map->vm_file, 1);
136 }
137 for (map = mm->mmap; map; map = map->vm_next) {
138 if (!(map->vm_flags & VM_EXEC) || !map->vm_file)
139 continue;
140 if (map->vm_flags & VM_EXECUTABLE)
141 continue;
142
143 oprof_output_map(map->vm_start, map->vm_end-map->vm_start,
144 GET_VM_OFFSET(map), map->vm_file, 0);
145 }
146 spin_unlock(&map_lock);
147 unlock_mmap(mm);
148
149 out:
150 return size;
151 }
152
153
154 /* execve is a special case on IA64. The others get the result and
155 * arguments after the system call has been made from the ASM stub. */
156
157 asmlinkage long
my_sys_execve(char * filename,char ** argv,char ** envp,struct pt_regs * regs)158 my_sys_execve (char * filename, char **argv, char **envp, struct pt_regs * regs)
159 {
160 int error;
161
162 MOD_INC_USE_COUNT;
163
164 filename = getname(filename);
165 error = PTR_ERR(filename);
166 if (IS_ERR(filename))
167 goto out;
168 error = do_execve(filename, argv, envp, regs);
169
170 if (!error) {
171 PTRACE_OFF(current);
172 oprof_output_maps(current);
173 }
174 putname(filename);
175 out:
176 unlock_execve();
177 MOD_DEC_USE_COUNT;
178 return error;
179 }
180
181
out_mmap(ulong addr,ulong len,ulong prot,ulong flags,ulong fd,ulong offset)182 static void out_mmap(ulong addr, ulong len, ulong prot, ulong flags,
183 ulong fd, ulong offset)
184 {
185 struct file * file;
186
187 lock_out_mmap();
188
189 file = fget(fd);
190 if (!file)
191 goto out;
192
193 spin_lock(&map_lock);
194 oprof_output_map(addr, len, offset, file, 0);
195 spin_unlock(&map_lock);
196
197 fput(file);
198
199 out:
200 unlock_out_mmap();
201 }
202
203
204 /*
205 * IA64 mmap routines:
206 * The post_sys_* routines are called after the syscall has been made.
207 * The first argument is the return value from the system call.
208 */
post_sys_mmap2(ulong ret,ulong addr,ulong len,ulong prot,ulong flags,ulong fd,ulong pgoff)209 asmlinkage void post_sys_mmap2(ulong ret, ulong addr, ulong len,
210 ulong prot, ulong flags, ulong fd, ulong pgoff)
211 {
212 /* FIXME: This should be done in the ASM stub. */
213 MOD_INC_USE_COUNT;
214
215 if ((prot & PROT_EXEC) && ret >= 0)
216 out_mmap(ret, len, prot, flags, fd, pgoff << PAGE_SHIFT);
217 goto out;
218 out:
219 MOD_DEC_USE_COUNT;
220 }
221
post_sys_mmap(ulong ret,ulong addr,ulong len,ulong prot,ulong flags,ulong fd,ulong off)222 asmlinkage void post_sys_mmap(ulong ret, ulong addr, ulong len,
223 ulong prot, ulong flags, ulong fd, ulong off)
224 {
225 /* FIXME: This should be done in the ASM stub. */
226 MOD_INC_USE_COUNT;
227
228 if ((prot & PROT_EXEC) && ret >= 0)
229 out_mmap(ret, len, prot, flags, fd, off);
230 goto out;
231 out:
232 MOD_DEC_USE_COUNT;
233 }
234
235
oprof_report_fork(u32 old_pid,u32 new_pid,u32 old_tgid,u32 new_tgid)236 inline static void oprof_report_fork(u32 old_pid, u32 new_pid, u32 old_tgid, u32 new_tgid)
237 {
238 struct op_note note;
239
240 note.type = OP_FORK;
241 note.pid = old_pid;
242 note.tgid = old_tgid;
243 note.addr = new_pid;
244 note.len = new_tgid;
245 oprof_put_note(¬e);
246 }
247
248
post_sys_clone(long ret,long arg0,long arg1)249 asmlinkage void post_sys_clone(long ret, long arg0, long arg1)
250 {
251 u32 pid = current->pid;
252 u32 tgid = op_get_tgid();
253
254 /* FIXME: This should be done in the ASM stub. */
255 MOD_INC_USE_COUNT;
256
257 if (ret)
258 /* FIXME: my libc show clone() is not implemented in ia64
259 * but used only by fork() with a SIGCHILD first parameter
260 * so we assume it's a fork */
261 oprof_report_fork(pid, ret, pid, tgid);
262 MOD_DEC_USE_COUNT;
263 }
264
post_sys_clone2(long ret,long arg0,long arg1,long arg2)265 asmlinkage void post_sys_clone2(long ret, long arg0, long arg1, long arg2)
266 {
267 u32 pid = current->pid;
268 u32 tgid = op_get_tgid();
269 long clone_flags = arg0;
270
271 /* FIXME: This should be done in the ASM stub. */
272 MOD_INC_USE_COUNT;
273
274 if (ret) {
275 if (clone_flags & CLONE_THREAD)
276 oprof_report_fork(pid, ret, tgid, tgid);
277 else
278 oprof_report_fork(pid, ret, tgid, ret);
279 }
280 MOD_DEC_USE_COUNT;
281 }
282
283 asmlinkage void
post_sys_init_module(long ret,char const * name_user,struct module * mod_user)284 post_sys_init_module(long ret, char const * name_user,
285 struct module * mod_user)
286 {
287 /* FIXME: This should be done in the ASM stub. */
288 MOD_INC_USE_COUNT;
289
290 if (ret >= 0) {
291 struct op_note note;
292
293 note.type = OP_DROP_MODULES;
294 oprof_put_note(¬e);
295 }
296 MOD_DEC_USE_COUNT;
297 }
298
299 /* Exit must use a pre-call intercept stub. There is no post exit. */
pre_sys_exit(int error_code)300 asmlinkage void pre_sys_exit(int error_code)
301 {
302 struct op_note note;
303
304 MOD_INC_USE_COUNT;
305
306 note.addr = current->times.tms_utime;
307 note.len = current->times.tms_stime;
308 note.offset = current->start_time;
309 note.type = OP_EXIT;
310 note.pid = current->pid;
311 note.tgid = op_get_tgid();
312 oprof_put_note(¬e);
313
314 /* this looks UP-dangerous, as the exit sleeps and we don't
315 * have a use count, but in fact its ok as sys_exit is noreturn,
316 * so we can never come back to this non-existent exec page
317 */
318 MOD_DEC_USE_COUNT;
319 }
320
321 extern void * sys_call_table[];
322
323 /* FIXME: Now that I'm never trying to do a C-level call through these
324 * pointers, I should just save, intercept, and restore with void *
325 * instead of the void * part of the function descriptor, I think.
326 */
327
op_save_syscalls(void)328 void op_save_syscalls(void)
329 {
330 fdesc_clone.ip = sys_call_table[__NR_clone - __NR_ni_syscall];
331 old_sys_clone = (void *)&fdesc_clone;
332 fdesc_clone2.ip = sys_call_table[__NR_clone2 - __NR_ni_syscall];
333 old_sys_clone2 = (void *)&fdesc_clone2;
334 fdesc_execve.ip = sys_call_table[__NR_execve - __NR_ni_syscall];
335 old_sys_execve = (void *)&fdesc_execve;
336 fdesc_mmap.ip = sys_call_table[__NR_mmap - __NR_ni_syscall];
337 old_sys_mmap = (void *)&fdesc_mmap;
338 fdesc_mmap2.ip = sys_call_table[__NR_mmap2 - __NR_ni_syscall];
339 old_sys_mmap2 = (void *)&fdesc_mmap2;
340 fdesc_init_module.ip = sys_call_table[__NR_init_module - __NR_ni_syscall];
341 old_sys_init_module = (void *)&fdesc_init_module;
342 fdesc_exit.ip = sys_call_table[__NR_exit - __NR_ni_syscall];
343 old_sys_exit = (void *)&fdesc_exit;
344 }
345
op_intercept_syscalls(void)346 void op_intercept_syscalls(void)
347 {
348 /* Must extract the function address from the stub function
349 * descriptors.
350 */
351 sys_call_table[__NR_clone - __NR_ni_syscall] =
352 ((struct fdesc *)post_stub_clone)->ip;
353 sys_call_table[__NR_clone2 - __NR_ni_syscall] =
354 ((struct fdesc *)post_stub_clone2)->ip;
355 sys_call_table[__NR_execve - __NR_ni_syscall] =
356 ((struct fdesc *)my_ia64_execve)->ip;
357 sys_call_table[__NR_mmap - __NR_ni_syscall] =
358 ((struct fdesc *)post_stub_mmap)->ip;
359 sys_call_table[__NR_mmap2 - __NR_ni_syscall] =
360 ((struct fdesc *)post_stub_mmap2)->ip;
361 sys_call_table[__NR_init_module - __NR_ni_syscall] =
362 ((struct fdesc *)post_stub_init_module)->ip;
363 sys_call_table[__NR_exit - __NR_ni_syscall] =
364 ((struct fdesc *)pre_stub_exit)->ip;
365 }
366
op_restore_syscalls(void)367 void op_restore_syscalls(void)
368 {
369 sys_call_table[__NR_clone - __NR_ni_syscall] =
370 ((struct fdesc *)old_sys_clone)->ip;
371 sys_call_table[__NR_clone2 - __NR_ni_syscall] =
372 ((struct fdesc *)old_sys_clone2)->ip;
373 sys_call_table[__NR_execve - __NR_ni_syscall] =
374 ((struct fdesc *)old_sys_execve)->ip;
375 sys_call_table[__NR_mmap - __NR_ni_syscall] =
376 ((struct fdesc *)old_sys_mmap)->ip;
377 sys_call_table[__NR_mmap2 - __NR_ni_syscall] =
378 ((struct fdesc *)old_sys_mmap2)->ip;
379 sys_call_table[__NR_init_module - __NR_ni_syscall] =
380 ((struct fdesc *)old_sys_init_module)->ip;
381 sys_call_table[__NR_exit - __NR_ni_syscall] =
382 ((struct fdesc *)old_sys_exit)->ip;
383 }
384