• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 1995  Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *	Gareth Hughes <gareth@valinux.com>, May 2000
6  *
7  *  X86-64 port
8  *	Andi Kleen.
9  *
10  *	CPU hotplug support - ashok.raj@intel.com
11  */
12 
13 /*
14  * This file handles the architecture-dependent parts of process handling..
15  */
16 
17 #include <linux/cpu.h>
18 #include <linux/errno.h>
19 #include <linux/sched.h>
20 #include <linux/fs.h>
21 #include <linux/kernel.h>
22 #include <linux/mm.h>
23 #include <linux/elfcore.h>
24 #include <linux/smp.h>
25 #include <linux/slab.h>
26 #include <linux/user.h>
27 #include <linux/interrupt.h>
28 #include <linux/delay.h>
29 #include <linux/module.h>
30 #include <linux/ptrace.h>
31 #include <linux/notifier.h>
32 #include <linux/kprobes.h>
33 #include <linux/kdebug.h>
34 #include <linux/prctl.h>
35 #include <linux/uaccess.h>
36 #include <linux/io.h>
37 #include <linux/ftrace.h>
38 
39 #include <asm/pgtable.h>
40 #include <asm/processor.h>
41 #include <asm/i387.h>
42 #include <asm/fpu-internal.h>
43 #include <asm/mmu_context.h>
44 #include <asm/prctl.h>
45 #include <asm/desc.h>
46 #include <asm/proto.h>
47 #include <asm/ia32.h>
48 #include <asm/idle.h>
49 #include <asm/syscalls.h>
50 #include <asm/debugreg.h>
51 #include <asm/switch_to.h>
52 
53 asmlinkage extern void ret_from_fork(void);
54 
55 DEFINE_PER_CPU(unsigned long, old_rsp);
56 
57 /* Prints also some state that isn't saved in the pt_regs */
__show_regs(struct pt_regs * regs,int all)58 void __show_regs(struct pt_regs *regs, int all)
59 {
60 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
61 	unsigned long d0, d1, d2, d3, d6, d7;
62 	unsigned int fsindex, gsindex;
63 	unsigned int ds, cs, es;
64 
65 	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
66 	printk_address(regs->ip, 1);
67 	printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
68 			regs->sp, regs->flags);
69 	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
70 	       regs->ax, regs->bx, regs->cx);
71 	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
72 	       regs->dx, regs->si, regs->di);
73 	printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
74 	       regs->bp, regs->r8, regs->r9);
75 	printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
76 	       regs->r10, regs->r11, regs->r12);
77 	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
78 	       regs->r13, regs->r14, regs->r15);
79 
80 	asm("movl %%ds,%0" : "=r" (ds));
81 	asm("movl %%cs,%0" : "=r" (cs));
82 	asm("movl %%es,%0" : "=r" (es));
83 	asm("movl %%fs,%0" : "=r" (fsindex));
84 	asm("movl %%gs,%0" : "=r" (gsindex));
85 
86 	rdmsrl(MSR_FS_BASE, fs);
87 	rdmsrl(MSR_GS_BASE, gs);
88 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
89 
90 	if (!all)
91 		return;
92 
93 	cr0 = read_cr0();
94 	cr2 = read_cr2();
95 	cr3 = read_cr3();
96 	cr4 = read_cr4();
97 
98 	printk(KERN_DEFAULT "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
99 	       fs, fsindex, gs, gsindex, shadowgs);
100 	printk(KERN_DEFAULT "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
101 			es, cr0);
102 	printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
103 			cr4);
104 
105 	get_debugreg(d0, 0);
106 	get_debugreg(d1, 1);
107 	get_debugreg(d2, 2);
108 	printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
109 	get_debugreg(d3, 3);
110 	get_debugreg(d6, 6);
111 	get_debugreg(d7, 7);
112 	printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
113 }
114 
release_thread(struct task_struct * dead_task)115 void release_thread(struct task_struct *dead_task)
116 {
117 	if (dead_task->mm) {
118 		if (dead_task->mm->context.size) {
119 			pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
120 				dead_task->comm,
121 				dead_task->mm->context.ldt,
122 				dead_task->mm->context.size);
123 			BUG();
124 		}
125 	}
126 }
127 
set_32bit_tls(struct task_struct * t,int tls,u32 addr)128 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
129 {
130 	struct user_desc ud = {
131 		.base_addr = addr,
132 		.limit = 0xfffff,
133 		.seg_32bit = 1,
134 		.limit_in_pages = 1,
135 		.useable = 1,
136 	};
137 	struct desc_struct *desc = t->thread.tls_array;
138 	desc += tls;
139 	fill_ldt(desc, &ud);
140 }
141 
read_32bit_tls(struct task_struct * t,int tls)142 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
143 {
144 	return get_desc_base(&t->thread.tls_array[tls]);
145 }
146 
copy_thread(unsigned long clone_flags,unsigned long sp,unsigned long arg,struct task_struct * p)147 int copy_thread(unsigned long clone_flags, unsigned long sp,
148 		unsigned long arg, struct task_struct *p)
149 {
150 	int err;
151 	struct pt_regs *childregs;
152 	struct task_struct *me = current;
153 
154 	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
155 	childregs = task_pt_regs(p);
156 	p->thread.sp = (unsigned long) childregs;
157 	p->thread.usersp = me->thread.usersp;
158 	set_tsk_thread_flag(p, TIF_FORK);
159 	p->fpu_counter = 0;
160 	p->thread.io_bitmap_ptr = NULL;
161 
162 	savesegment(gs, p->thread.gsindex);
163 	p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
164 	savesegment(fs, p->thread.fsindex);
165 	p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
166 	savesegment(es, p->thread.es);
167 	savesegment(ds, p->thread.ds);
168 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
169 
170 	if (unlikely(p->flags & PF_KTHREAD)) {
171 		/* kernel thread */
172 		memset(childregs, 0, sizeof(struct pt_regs));
173 		childregs->sp = (unsigned long)childregs;
174 		childregs->ss = __KERNEL_DS;
175 		childregs->bx = sp; /* function */
176 		childregs->bp = arg;
177 		childregs->orig_ax = -1;
178 		childregs->cs = __KERNEL_CS | get_kernel_rpl();
179 		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
180 		return 0;
181 	}
182 	*childregs = *current_pt_regs();
183 
184 	childregs->ax = 0;
185 	if (sp)
186 		childregs->sp = sp;
187 
188 	err = -ENOMEM;
189 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
190 
191 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
192 		p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
193 						  IO_BITMAP_BYTES, GFP_KERNEL);
194 		if (!p->thread.io_bitmap_ptr) {
195 			p->thread.io_bitmap_max = 0;
196 			return -ENOMEM;
197 		}
198 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
199 	}
200 
201 	/*
202 	 * Set a new TLS for the child thread?
203 	 */
204 	if (clone_flags & CLONE_SETTLS) {
205 #ifdef CONFIG_IA32_EMULATION
206 		if (test_thread_flag(TIF_IA32))
207 			err = do_set_thread_area(p, -1,
208 				(struct user_desc __user *)childregs->si, 0);
209 		else
210 #endif
211 			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
212 		if (err)
213 			goto out;
214 	}
215 	err = 0;
216 out:
217 	if (err && p->thread.io_bitmap_ptr) {
218 		kfree(p->thread.io_bitmap_ptr);
219 		p->thread.io_bitmap_max = 0;
220 	}
221 
222 	return err;
223 }
224 
225 static void
start_thread_common(struct pt_regs * regs,unsigned long new_ip,unsigned long new_sp,unsigned int _cs,unsigned int _ss,unsigned int _ds)226 start_thread_common(struct pt_regs *regs, unsigned long new_ip,
227 		    unsigned long new_sp,
228 		    unsigned int _cs, unsigned int _ss, unsigned int _ds)
229 {
230 	loadsegment(fs, 0);
231 	loadsegment(es, _ds);
232 	loadsegment(ds, _ds);
233 	load_gs_index(0);
234 	current->thread.usersp	= new_sp;
235 	regs->ip		= new_ip;
236 	regs->sp		= new_sp;
237 	this_cpu_write(old_rsp, new_sp);
238 	regs->cs		= _cs;
239 	regs->ss		= _ss;
240 	regs->flags		= X86_EFLAGS_IF;
241 }
242 
243 void
start_thread(struct pt_regs * regs,unsigned long new_ip,unsigned long new_sp)244 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
245 {
246 	start_thread_common(regs, new_ip, new_sp,
247 			    __USER_CS, __USER_DS, 0);
248 }
249 
250 #ifdef CONFIG_IA32_EMULATION
start_thread_ia32(struct pt_regs * regs,u32 new_ip,u32 new_sp)251 void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
252 {
253 	start_thread_common(regs, new_ip, new_sp,
254 			    test_thread_flag(TIF_X32)
255 			    ? __USER_CS : __USER32_CS,
256 			    __USER_DS, __USER_DS);
257 }
258 #endif
259 
260 /*
261  *	switch_to(x,y) should switch tasks from x to y.
262  *
263  * This could still be optimized:
264  * - fold all the options into a flag word and test it with a single test.
265  * - could test fs/gs bitsliced
266  *
267  * Kprobes not supported here. Set the probe on schedule instead.
268  * Function graph tracer not supported too.
269  */
270 __notrace_funcgraph struct task_struct *
__switch_to(struct task_struct * prev_p,struct task_struct * next_p)271 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
272 {
273 	struct thread_struct *prev = &prev_p->thread;
274 	struct thread_struct *next = &next_p->thread;
275 	int cpu = smp_processor_id();
276 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
277 	unsigned fsindex, gsindex;
278 	fpu_switch_t fpu;
279 
280 	fpu = switch_fpu_prepare(prev_p, next_p, cpu);
281 
282 	/*
283 	 * Reload esp0, LDT and the page table pointer:
284 	 */
285 	load_sp0(tss, next);
286 
287 	/*
288 	 * Switch DS and ES.
289 	 * This won't pick up thread selector changes, but I guess that is ok.
290 	 */
291 	savesegment(es, prev->es);
292 	if (unlikely(next->es | prev->es))
293 		loadsegment(es, next->es);
294 
295 	savesegment(ds, prev->ds);
296 	if (unlikely(next->ds | prev->ds))
297 		loadsegment(ds, next->ds);
298 
299 
300 	/* We must save %fs and %gs before load_TLS() because
301 	 * %fs and %gs may be cleared by load_TLS().
302 	 *
303 	 * (e.g. xen_load_tls())
304 	 */
305 	savesegment(fs, fsindex);
306 	savesegment(gs, gsindex);
307 
308 	load_TLS(next, cpu);
309 
310 	/*
311 	 * Leave lazy mode, flushing any hypercalls made here.
312 	 * This must be done before restoring TLS segments so
313 	 * the GDT and LDT are properly updated, and must be
314 	 * done before math_state_restore, so the TS bit is up
315 	 * to date.
316 	 */
317 	arch_end_context_switch(next_p);
318 
319 	/*
320 	 * Switch FS and GS.
321 	 *
322 	 * Segment register != 0 always requires a reload.  Also
323 	 * reload when it has changed.  When prev process used 64bit
324 	 * base always reload to avoid an information leak.
325 	 */
326 	if (unlikely(fsindex | next->fsindex | prev->fs)) {
327 		loadsegment(fs, next->fsindex);
328 		/*
329 		 * Check if the user used a selector != 0; if yes
330 		 *  clear 64bit base, since overloaded base is always
331 		 *  mapped to the Null selector
332 		 */
333 		if (fsindex)
334 			prev->fs = 0;
335 	}
336 	/* when next process has a 64bit base use it */
337 	if (next->fs)
338 		wrmsrl(MSR_FS_BASE, next->fs);
339 	prev->fsindex = fsindex;
340 
341 	if (unlikely(gsindex | next->gsindex | prev->gs)) {
342 		load_gs_index(next->gsindex);
343 		if (gsindex)
344 			prev->gs = 0;
345 	}
346 	if (next->gs)
347 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
348 	prev->gsindex = gsindex;
349 
350 	switch_fpu_finish(next_p, fpu);
351 
352 	/*
353 	 * Switch the PDA and FPU contexts.
354 	 */
355 	prev->usersp = this_cpu_read(old_rsp);
356 	this_cpu_write(old_rsp, next->usersp);
357 	this_cpu_write(current_task, next_p);
358 
359 	this_cpu_write(kernel_stack,
360 		  (unsigned long)task_stack_page(next_p) +
361 		  THREAD_SIZE - KERNEL_STACK_OFFSET);
362 
363 	/*
364 	 * Now maybe reload the debug registers and handle I/O bitmaps
365 	 */
366 	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
367 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
368 		__switch_to_xtra(prev_p, next_p, tss);
369 
370 	return prev_p;
371 }
372 
set_personality_64bit(void)373 void set_personality_64bit(void)
374 {
375 	/* inherit personality from parent */
376 
377 	/* Make sure to be in 64bit mode */
378 	clear_thread_flag(TIF_IA32);
379 	clear_thread_flag(TIF_ADDR32);
380 	clear_thread_flag(TIF_X32);
381 
382 	/* Ensure the corresponding mm is not marked. */
383 	if (current->mm)
384 		current->mm->context.ia32_compat = 0;
385 
386 	/* TBD: overwrites user setup. Should have two bits.
387 	   But 64bit processes have always behaved this way,
388 	   so it's not too bad. The main problem is just that
389 	   32bit childs are affected again. */
390 	current->personality &= ~READ_IMPLIES_EXEC;
391 }
392 
set_personality_ia32(bool x32)393 void set_personality_ia32(bool x32)
394 {
395 	/* inherit personality from parent */
396 
397 	/* Make sure to be in 32bit mode */
398 	set_thread_flag(TIF_ADDR32);
399 
400 	/* Mark the associated mm as containing 32-bit tasks. */
401 	if (current->mm)
402 		current->mm->context.ia32_compat = 1;
403 
404 	if (x32) {
405 		clear_thread_flag(TIF_IA32);
406 		set_thread_flag(TIF_X32);
407 		current->personality &= ~READ_IMPLIES_EXEC;
408 		/* is_compat_task() uses the presence of the x32
409 		   syscall bit flag to determine compat status */
410 		current_thread_info()->status &= ~TS_COMPAT;
411 	} else {
412 		set_thread_flag(TIF_IA32);
413 		clear_thread_flag(TIF_X32);
414 		current->personality |= force_personality32;
415 		/* Prepare the first "return" to user space */
416 		current_thread_info()->status |= TS_COMPAT;
417 	}
418 }
419 EXPORT_SYMBOL_GPL(set_personality_ia32);
420 
get_wchan(struct task_struct * p)421 unsigned long get_wchan(struct task_struct *p)
422 {
423 	unsigned long stack;
424 	u64 fp, ip;
425 	int count = 0;
426 
427 	if (!p || p == current || p->state == TASK_RUNNING)
428 		return 0;
429 	stack = (unsigned long)task_stack_page(p);
430 	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
431 		return 0;
432 	fp = *(u64 *)(p->thread.sp);
433 	do {
434 		if (fp < (unsigned long)stack ||
435 		    fp >= (unsigned long)stack+THREAD_SIZE)
436 			return 0;
437 		ip = *(u64 *)(fp+8);
438 		if (!in_sched_functions(ip))
439 			return ip;
440 		fp = *(u64 *)fp;
441 	} while (count++ < 16);
442 	return 0;
443 }
444 
do_arch_prctl(struct task_struct * task,int code,unsigned long addr)445 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
446 {
447 	int ret = 0;
448 	int doit = task == current;
449 	int cpu;
450 
451 	switch (code) {
452 	case ARCH_SET_GS:
453 		if (addr >= TASK_SIZE_OF(task))
454 			return -EPERM;
455 		cpu = get_cpu();
456 		/* handle small bases via the GDT because that's faster to
457 		   switch. */
458 		if (addr <= 0xffffffff) {
459 			set_32bit_tls(task, GS_TLS, addr);
460 			if (doit) {
461 				load_TLS(&task->thread, cpu);
462 				load_gs_index(GS_TLS_SEL);
463 			}
464 			task->thread.gsindex = GS_TLS_SEL;
465 			task->thread.gs = 0;
466 		} else {
467 			task->thread.gsindex = 0;
468 			task->thread.gs = addr;
469 			if (doit) {
470 				load_gs_index(0);
471 				ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
472 			}
473 		}
474 		put_cpu();
475 		break;
476 	case ARCH_SET_FS:
477 		/* Not strictly needed for fs, but do it for symmetry
478 		   with gs */
479 		if (addr >= TASK_SIZE_OF(task))
480 			return -EPERM;
481 		cpu = get_cpu();
482 		/* handle small bases via the GDT because that's faster to
483 		   switch. */
484 		if (addr <= 0xffffffff) {
485 			set_32bit_tls(task, FS_TLS, addr);
486 			if (doit) {
487 				load_TLS(&task->thread, cpu);
488 				loadsegment(fs, FS_TLS_SEL);
489 			}
490 			task->thread.fsindex = FS_TLS_SEL;
491 			task->thread.fs = 0;
492 		} else {
493 			task->thread.fsindex = 0;
494 			task->thread.fs = addr;
495 			if (doit) {
496 				/* set the selector to 0 to not confuse
497 				   __switch_to */
498 				loadsegment(fs, 0);
499 				ret = wrmsrl_safe(MSR_FS_BASE, addr);
500 			}
501 		}
502 		put_cpu();
503 		break;
504 	case ARCH_GET_FS: {
505 		unsigned long base;
506 		if (task->thread.fsindex == FS_TLS_SEL)
507 			base = read_32bit_tls(task, FS_TLS);
508 		else if (doit)
509 			rdmsrl(MSR_FS_BASE, base);
510 		else
511 			base = task->thread.fs;
512 		ret = put_user(base, (unsigned long __user *)addr);
513 		break;
514 	}
515 	case ARCH_GET_GS: {
516 		unsigned long base;
517 		unsigned gsindex;
518 		if (task->thread.gsindex == GS_TLS_SEL)
519 			base = read_32bit_tls(task, GS_TLS);
520 		else if (doit) {
521 			savesegment(gs, gsindex);
522 			if (gsindex)
523 				rdmsrl(MSR_KERNEL_GS_BASE, base);
524 			else
525 				base = task->thread.gs;
526 		} else
527 			base = task->thread.gs;
528 		ret = put_user(base, (unsigned long __user *)addr);
529 		break;
530 	}
531 
532 	default:
533 		ret = -EINVAL;
534 		break;
535 	}
536 
537 	return ret;
538 }
539 
sys_arch_prctl(int code,unsigned long addr)540 long sys_arch_prctl(int code, unsigned long addr)
541 {
542 	return do_arch_prctl(current, code, addr);
543 }
544 
KSTK_ESP(struct task_struct * task)545 unsigned long KSTK_ESP(struct task_struct *task)
546 {
547 	return (test_tsk_thread_flag(task, TIF_IA32)) ?
548 			(task_pt_regs(task)->sp) : ((task)->thread.usersp);
549 }
550