• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 1995  Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *	Gareth Hughes <gareth@valinux.com>, May 2000
6  *
7  *  X86-64 port
8  *	Andi Kleen.
9  *
10  *	CPU hotplug support - ashok.raj@intel.com
11  */
12 
13 /*
14  * This file handles the architecture-dependent parts of process handling..
15  */
16 
17 #include <stdarg.h>
18 
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/utsname.h>
31 #include <linux/delay.h>
32 #include <linux/module.h>
33 #include <linux/ptrace.h>
34 #include <linux/random.h>
35 #include <linux/notifier.h>
36 #include <linux/kprobes.h>
37 #include <linux/kdebug.h>
38 #include <linux/tick.h>
39 #include <linux/prctl.h>
40 #include <linux/uaccess.h>
41 #include <linux/io.h>
42 #include <linux/ftrace.h>
43 #include <linux/dmi.h>
44 
45 #include <asm/pgtable.h>
46 #include <asm/system.h>
47 #include <asm/processor.h>
48 #include <asm/i387.h>
49 #include <asm/mmu_context.h>
50 #include <asm/pda.h>
51 #include <asm/prctl.h>
52 #include <asm/desc.h>
53 #include <asm/proto.h>
54 #include <asm/ia32.h>
55 #include <asm/idle.h>
56 #include <asm/syscalls.h>
57 #include <asm/ds.h>
58 
59 asmlinkage extern void ret_from_fork(void);
60 
61 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
62 
63 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
64 
idle_notifier_register(struct notifier_block * n)65 void idle_notifier_register(struct notifier_block *n)
66 {
67 	atomic_notifier_chain_register(&idle_notifier, n);
68 }
69 EXPORT_SYMBOL_GPL(idle_notifier_register);
70 
idle_notifier_unregister(struct notifier_block * n)71 void idle_notifier_unregister(struct notifier_block *n)
72 {
73 	atomic_notifier_chain_unregister(&idle_notifier, n);
74 }
75 EXPORT_SYMBOL_GPL(idle_notifier_unregister);
76 
enter_idle(void)77 void enter_idle(void)
78 {
79 	write_pda(isidle, 1);
80 	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
81 }
82 
__exit_idle(void)83 static void __exit_idle(void)
84 {
85 	if (test_and_clear_bit_pda(0, isidle) == 0)
86 		return;
87 	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
88 }
89 
90 /* Called from interrupts to signify idle end */
exit_idle(void)91 void exit_idle(void)
92 {
93 	/* idle loop has pid 0 */
94 	if (current->pid)
95 		return;
96 	__exit_idle();
97 }
98 
99 #ifndef CONFIG_SMP
play_dead(void)100 static inline void play_dead(void)
101 {
102 	BUG();
103 }
104 #endif
105 
106 /*
107  * The idle thread. There's no useful work to be
108  * done, so just try to conserve power and have a
109  * low exit latency (ie sit in a loop waiting for
110  * somebody to say that they'd like to reschedule)
111  */
cpu_idle(void)112 void cpu_idle(void)
113 {
114 	current_thread_info()->status |= TS_POLLING;
115 	/* endless idle loop with no priority at all */
116 	while (1) {
117 		tick_nohz_stop_sched_tick(1);
118 		while (!need_resched()) {
119 
120 			rmb();
121 
122 			if (cpu_is_offline(smp_processor_id()))
123 				play_dead();
124 			/*
125 			 * Idle routines should keep interrupts disabled
126 			 * from here on, until they go to idle.
127 			 * Otherwise, idle callbacks can misfire.
128 			 */
129 			local_irq_disable();
130 			enter_idle();
131 			/* Don't trace irqs off for idle */
132 			stop_critical_timings();
133 			pm_idle();
134 			start_critical_timings();
135 			/* In many cases the interrupt that ended idle
136 			   has already called exit_idle. But some idle
137 			   loops can be woken up without interrupt. */
138 			__exit_idle();
139 		}
140 
141 		tick_nohz_restart_sched_tick();
142 		preempt_enable_no_resched();
143 		schedule();
144 		preempt_disable();
145 	}
146 }
147 
148 /* Prints also some state that isn't saved in the pt_regs */
__show_regs(struct pt_regs * regs,int all)149 void __show_regs(struct pt_regs *regs, int all)
150 {
151 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
152 	unsigned long d0, d1, d2, d3, d6, d7;
153 	unsigned int fsindex, gsindex;
154 	unsigned int ds, cs, es;
155 	const char *board;
156 
157 	printk("\n");
158 	print_modules();
159 	board = dmi_get_system_info(DMI_PRODUCT_NAME);
160 	if (!board)
161 		board = "";
162 	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
163 		current->pid, current->comm, print_tainted(),
164 		init_utsname()->release,
165 		(int)strcspn(init_utsname()->version, " "),
166 		init_utsname()->version, board);
167 	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
168 	printk_address(regs->ip, 1);
169 	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
170 			regs->sp, regs->flags);
171 	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
172 	       regs->ax, regs->bx, regs->cx);
173 	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
174 	       regs->dx, regs->si, regs->di);
175 	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
176 	       regs->bp, regs->r8, regs->r9);
177 	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
178 	       regs->r10, regs->r11, regs->r12);
179 	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
180 	       regs->r13, regs->r14, regs->r15);
181 
182 	asm("movl %%ds,%0" : "=r" (ds));
183 	asm("movl %%cs,%0" : "=r" (cs));
184 	asm("movl %%es,%0" : "=r" (es));
185 	asm("movl %%fs,%0" : "=r" (fsindex));
186 	asm("movl %%gs,%0" : "=r" (gsindex));
187 
188 	rdmsrl(MSR_FS_BASE, fs);
189 	rdmsrl(MSR_GS_BASE, gs);
190 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
191 
192 	if (!all)
193 		return;
194 
195 	cr0 = read_cr0();
196 	cr2 = read_cr2();
197 	cr3 = read_cr3();
198 	cr4 = read_cr4();
199 
200 	printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
201 	       fs, fsindex, gs, gsindex, shadowgs);
202 	printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
203 			es, cr0);
204 	printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
205 			cr4);
206 
207 	get_debugreg(d0, 0);
208 	get_debugreg(d1, 1);
209 	get_debugreg(d2, 2);
210 	printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
211 	get_debugreg(d3, 3);
212 	get_debugreg(d6, 6);
213 	get_debugreg(d7, 7);
214 	printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
215 }
216 
show_regs(struct pt_regs * regs)217 void show_regs(struct pt_regs *regs)
218 {
219 	printk(KERN_INFO "CPU %d:", smp_processor_id());
220 	__show_regs(regs, 1);
221 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
222 }
223 
224 /*
225  * Free current thread data structures etc..
226  */
exit_thread(void)227 void exit_thread(void)
228 {
229 	struct task_struct *me = current;
230 	struct thread_struct *t = &me->thread;
231 
232 	if (me->thread.io_bitmap_ptr) {
233 		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
234 
235 		kfree(t->io_bitmap_ptr);
236 		t->io_bitmap_ptr = NULL;
237 		clear_thread_flag(TIF_IO_BITMAP);
238 		/*
239 		 * Careful, clear this in the TSS too:
240 		 */
241 		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
242 		t->io_bitmap_max = 0;
243 		put_cpu();
244 	}
245 
246 	ds_exit_thread(current);
247 }
248 
flush_thread(void)249 void flush_thread(void)
250 {
251 	struct task_struct *tsk = current;
252 
253 	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
254 		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
255 		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
256 			clear_tsk_thread_flag(tsk, TIF_IA32);
257 		} else {
258 			set_tsk_thread_flag(tsk, TIF_IA32);
259 			current_thread_info()->status |= TS_COMPAT;
260 		}
261 	}
262 	clear_tsk_thread_flag(tsk, TIF_DEBUG);
263 
264 	tsk->thread.debugreg0 = 0;
265 	tsk->thread.debugreg1 = 0;
266 	tsk->thread.debugreg2 = 0;
267 	tsk->thread.debugreg3 = 0;
268 	tsk->thread.debugreg6 = 0;
269 	tsk->thread.debugreg7 = 0;
270 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
271 	/*
272 	 * Forget coprocessor state..
273 	 */
274 	tsk->fpu_counter = 0;
275 	clear_fpu(tsk);
276 	clear_used_math();
277 }
278 
release_thread(struct task_struct * dead_task)279 void release_thread(struct task_struct *dead_task)
280 {
281 	if (dead_task->mm) {
282 		if (dead_task->mm->context.size) {
283 			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
284 					dead_task->comm,
285 					dead_task->mm->context.ldt,
286 					dead_task->mm->context.size);
287 			BUG();
288 		}
289 	}
290 }
291 
set_32bit_tls(struct task_struct * t,int tls,u32 addr)292 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
293 {
294 	struct user_desc ud = {
295 		.base_addr = addr,
296 		.limit = 0xfffff,
297 		.seg_32bit = 1,
298 		.limit_in_pages = 1,
299 		.useable = 1,
300 	};
301 	struct desc_struct *desc = t->thread.tls_array;
302 	desc += tls;
303 	fill_ldt(desc, &ud);
304 }
305 
read_32bit_tls(struct task_struct * t,int tls)306 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
307 {
308 	return get_desc_base(&t->thread.tls_array[tls]);
309 }
310 
311 /*
312  * This gets called before we allocate a new thread and copy
313  * the current task into it.
314  */
prepare_to_copy(struct task_struct * tsk)315 void prepare_to_copy(struct task_struct *tsk)
316 {
317 	unlazy_fpu(tsk);
318 }
319 
copy_thread(int nr,unsigned long clone_flags,unsigned long sp,unsigned long unused,struct task_struct * p,struct pt_regs * regs)320 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
321 		unsigned long unused,
322 	struct task_struct *p, struct pt_regs *regs)
323 {
324 	int err;
325 	struct pt_regs *childregs;
326 	struct task_struct *me = current;
327 
328 	childregs = ((struct pt_regs *)
329 			(THREAD_SIZE + task_stack_page(p))) - 1;
330 	*childregs = *regs;
331 
332 	childregs->ax = 0;
333 	childregs->sp = sp;
334 	if (sp == ~0UL)
335 		childregs->sp = (unsigned long)childregs;
336 
337 	p->thread.sp = (unsigned long) childregs;
338 	p->thread.sp0 = (unsigned long) (childregs+1);
339 	p->thread.usersp = me->thread.usersp;
340 
341 	set_tsk_thread_flag(p, TIF_FORK);
342 
343 	p->thread.fs = me->thread.fs;
344 	p->thread.gs = me->thread.gs;
345 
346 	savesegment(gs, p->thread.gsindex);
347 	savesegment(fs, p->thread.fsindex);
348 	savesegment(es, p->thread.es);
349 	savesegment(ds, p->thread.ds);
350 
351 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
352 		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
353 		if (!p->thread.io_bitmap_ptr) {
354 			p->thread.io_bitmap_max = 0;
355 			return -ENOMEM;
356 		}
357 		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
358 				IO_BITMAP_BYTES);
359 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
360 	}
361 
362 	/*
363 	 * Set a new TLS for the child thread?
364 	 */
365 	if (clone_flags & CLONE_SETTLS) {
366 #ifdef CONFIG_IA32_EMULATION
367 		if (test_thread_flag(TIF_IA32))
368 			err = do_set_thread_area(p, -1,
369 				(struct user_desc __user *)childregs->si, 0);
370 		else
371 #endif
372 			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
373 		if (err)
374 			goto out;
375 	}
376 
377 	ds_copy_thread(p, me);
378 
379 	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
380 	p->thread.debugctlmsr = 0;
381 
382 	err = 0;
383 out:
384 	if (err && p->thread.io_bitmap_ptr) {
385 		kfree(p->thread.io_bitmap_ptr);
386 		p->thread.io_bitmap_max = 0;
387 	}
388 	return err;
389 }
390 
391 void
start_thread(struct pt_regs * regs,unsigned long new_ip,unsigned long new_sp)392 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
393 {
394 	loadsegment(fs, 0);
395 	loadsegment(es, 0);
396 	loadsegment(ds, 0);
397 	load_gs_index(0);
398 	regs->ip		= new_ip;
399 	regs->sp		= new_sp;
400 	write_pda(oldrsp, new_sp);
401 	regs->cs		= __USER_CS;
402 	regs->ss		= __USER_DS;
403 	regs->flags		= 0x200;
404 	set_fs(USER_DS);
405 	/*
406 	 * Free the old FP and other extended state
407 	 */
408 	free_thread_xstate(current);
409 }
410 EXPORT_SYMBOL_GPL(start_thread);
411 
hard_disable_TSC(void)412 static void hard_disable_TSC(void)
413 {
414 	write_cr4(read_cr4() | X86_CR4_TSD);
415 }
416 
disable_TSC(void)417 void disable_TSC(void)
418 {
419 	preempt_disable();
420 	if (!test_and_set_thread_flag(TIF_NOTSC))
421 		/*
422 		 * Must flip the CPU state synchronously with
423 		 * TIF_NOTSC in the current running context.
424 		 */
425 		hard_disable_TSC();
426 	preempt_enable();
427 }
428 
hard_enable_TSC(void)429 static void hard_enable_TSC(void)
430 {
431 	write_cr4(read_cr4() & ~X86_CR4_TSD);
432 }
433 
enable_TSC(void)434 static void enable_TSC(void)
435 {
436 	preempt_disable();
437 	if (test_and_clear_thread_flag(TIF_NOTSC))
438 		/*
439 		 * Must flip the CPU state synchronously with
440 		 * TIF_NOTSC in the current running context.
441 		 */
442 		hard_enable_TSC();
443 	preempt_enable();
444 }
445 
get_tsc_mode(unsigned long adr)446 int get_tsc_mode(unsigned long adr)
447 {
448 	unsigned int val;
449 
450 	if (test_thread_flag(TIF_NOTSC))
451 		val = PR_TSC_SIGSEGV;
452 	else
453 		val = PR_TSC_ENABLE;
454 
455 	return put_user(val, (unsigned int __user *)adr);
456 }
457 
set_tsc_mode(unsigned int val)458 int set_tsc_mode(unsigned int val)
459 {
460 	if (val == PR_TSC_SIGSEGV)
461 		disable_TSC();
462 	else if (val == PR_TSC_ENABLE)
463 		enable_TSC();
464 	else
465 		return -EINVAL;
466 
467 	return 0;
468 }
469 
470 /*
471  * This special macro can be used to load a debugging register
472  */
473 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
474 
__switch_to_xtra(struct task_struct * prev_p,struct task_struct * next_p,struct tss_struct * tss)475 static inline void __switch_to_xtra(struct task_struct *prev_p,
476 				    struct task_struct *next_p,
477 				    struct tss_struct *tss)
478 {
479 	struct thread_struct *prev, *next;
480 
481 	prev = &prev_p->thread,
482 	next = &next_p->thread;
483 
484 	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
485 	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
486 		ds_switch_to(prev_p, next_p);
487 	else if (next->debugctlmsr != prev->debugctlmsr)
488 		update_debugctlmsr(next->debugctlmsr);
489 
490 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
491 		loaddebug(next, 0);
492 		loaddebug(next, 1);
493 		loaddebug(next, 2);
494 		loaddebug(next, 3);
495 		/* no 4 and 5 */
496 		loaddebug(next, 6);
497 		loaddebug(next, 7);
498 	}
499 
500 	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
501 	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
502 		/* prev and next are different */
503 		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
504 			hard_disable_TSC();
505 		else
506 			hard_enable_TSC();
507 	}
508 
509 	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
510 		/*
511 		 * Copy the relevant range of the IO bitmap.
512 		 * Normally this is 128 bytes or less:
513 		 */
514 		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
515 		       max(prev->io_bitmap_max, next->io_bitmap_max));
516 	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
517 		/*
518 		 * Clear any possible leftover bits:
519 		 */
520 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
521 	}
522 }
523 
524 /*
525  *	switch_to(x,y) should switch tasks from x to y.
526  *
527  * This could still be optimized:
528  * - fold all the options into a flag word and test it with a single test.
529  * - could test fs/gs bitsliced
530  *
531  * Kprobes not supported here. Set the probe on schedule instead.
532  * Function graph tracer not supported too.
533  */
534 __notrace_funcgraph struct task_struct *
__switch_to(struct task_struct * prev_p,struct task_struct * next_p)535 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
536 {
537 	struct thread_struct *prev = &prev_p->thread;
538 	struct thread_struct *next = &next_p->thread;
539 	int cpu = smp_processor_id();
540 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
541 	unsigned fsindex, gsindex;
542 
543 	/* we're going to use this soon, after a few expensive things */
544 	if (next_p->fpu_counter > 5)
545 		prefetch(next->xstate);
546 
547 	/*
548 	 * Reload esp0, LDT and the page table pointer:
549 	 */
550 	load_sp0(tss, next);
551 
552 	/*
553 	 * Switch DS and ES.
554 	 * This won't pick up thread selector changes, but I guess that is ok.
555 	 */
556 	savesegment(es, prev->es);
557 	if (unlikely(next->es | prev->es))
558 		loadsegment(es, next->es);
559 
560 	savesegment(ds, prev->ds);
561 	if (unlikely(next->ds | prev->ds))
562 		loadsegment(ds, next->ds);
563 
564 
565 	/* We must save %fs and %gs before load_TLS() because
566 	 * %fs and %gs may be cleared by load_TLS().
567 	 *
568 	 * (e.g. xen_load_tls())
569 	 */
570 	savesegment(fs, fsindex);
571 	savesegment(gs, gsindex);
572 
573 	load_TLS(next, cpu);
574 
575 	/*
576 	 * Leave lazy mode, flushing any hypercalls made here.
577 	 * This must be done before restoring TLS segments so
578 	 * the GDT and LDT are properly updated, and must be
579 	 * done before math_state_restore, so the TS bit is up
580 	 * to date.
581 	 */
582 	arch_leave_lazy_cpu_mode();
583 
584 	/*
585 	 * Switch FS and GS.
586 	 *
587 	 * Segment register != 0 always requires a reload.  Also
588 	 * reload when it has changed.  When prev process used 64bit
589 	 * base always reload to avoid an information leak.
590 	 */
591 	if (unlikely(fsindex | next->fsindex | prev->fs)) {
592 		loadsegment(fs, next->fsindex);
593 		/*
594 		 * Check if the user used a selector != 0; if yes
595 		 *  clear 64bit base, since overloaded base is always
596 		 *  mapped to the Null selector
597 		 */
598 		if (fsindex)
599 			prev->fs = 0;
600 	}
601 	/* when next process has a 64bit base use it */
602 	if (next->fs)
603 		wrmsrl(MSR_FS_BASE, next->fs);
604 	prev->fsindex = fsindex;
605 
606 	if (unlikely(gsindex | next->gsindex | prev->gs)) {
607 		load_gs_index(next->gsindex);
608 		if (gsindex)
609 			prev->gs = 0;
610 	}
611 	if (next->gs)
612 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
613 	prev->gsindex = gsindex;
614 
615 	/* Must be after DS reload */
616 	unlazy_fpu(prev_p);
617 
618 	/*
619 	 * Switch the PDA and FPU contexts.
620 	 */
621 	prev->usersp = read_pda(oldrsp);
622 	write_pda(oldrsp, next->usersp);
623 	write_pda(pcurrent, next_p);
624 
625 	write_pda(kernelstack,
626 		  (unsigned long)task_stack_page(next_p) +
627 		  THREAD_SIZE - PDA_STACKOFFSET);
628 #ifdef CONFIG_CC_STACKPROTECTOR
629 	write_pda(stack_canary, next_p->stack_canary);
630 	/*
631 	 * Build time only check to make sure the stack_canary is at
632 	 * offset 40 in the pda; this is a gcc ABI requirement
633 	 */
634 	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
635 #endif
636 
637 	/*
638 	 * Now maybe reload the debug registers and handle I/O bitmaps
639 	 */
640 	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
641 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
642 		__switch_to_xtra(prev_p, next_p, tss);
643 
644 	/* If the task has used fpu the last 5 timeslices, just do a full
645 	 * restore of the math state immediately to avoid the trap; the
646 	 * chances of needing FPU soon are obviously high now
647 	 *
648 	 * tsk_used_math() checks prevent calling math_state_restore(),
649 	 * which can sleep in the case of !tsk_used_math()
650 	 */
651 	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
652 		math_state_restore();
653 	return prev_p;
654 }
655 
656 /*
657  * sys_execve() executes a new program.
658  */
659 asmlinkage
sys_execve(char __user * name,char __user * __user * argv,char __user * __user * envp,struct pt_regs * regs)660 long sys_execve(char __user *name, char __user * __user *argv,
661 		char __user * __user *envp, struct pt_regs *regs)
662 {
663 	long error;
664 	char *filename;
665 
666 	filename = getname(name);
667 	error = PTR_ERR(filename);
668 	if (IS_ERR(filename))
669 		return error;
670 	error = do_execve(filename, argv, envp, regs);
671 	putname(filename);
672 	return error;
673 }
674 
set_personality_64bit(void)675 void set_personality_64bit(void)
676 {
677 	/* inherit personality from parent */
678 
679 	/* Make sure to be in 64bit mode */
680 	clear_thread_flag(TIF_IA32);
681 
682 	/* TBD: overwrites user setup. Should have two bits.
683 	   But 64bit processes have always behaved this way,
684 	   so it's not too bad. The main problem is just that
685 	   32bit childs are affected again. */
686 	current->personality &= ~READ_IMPLIES_EXEC;
687 }
688 
sys_fork(struct pt_regs * regs)689 asmlinkage long sys_fork(struct pt_regs *regs)
690 {
691 	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
692 }
693 
694 asmlinkage long
sys_clone(unsigned long clone_flags,unsigned long newsp,void __user * parent_tid,void __user * child_tid,struct pt_regs * regs)695 sys_clone(unsigned long clone_flags, unsigned long newsp,
696 	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
697 {
698 	if (!newsp)
699 		newsp = regs->sp;
700 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
701 }
702 
703 /*
704  * This is trivial, and on the face of it looks like it
705  * could equally well be done in user mode.
706  *
707  * Not so, for quite unobvious reasons - register pressure.
708  * In user mode vfork() cannot have a stack frame, and if
709  * done by calling the "clone()" system call directly, you
710  * do not have enough call-clobbered registers to hold all
711  * the information you need.
712  */
sys_vfork(struct pt_regs * regs)713 asmlinkage long sys_vfork(struct pt_regs *regs)
714 {
715 	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
716 		    NULL, NULL);
717 }
718 
get_wchan(struct task_struct * p)719 unsigned long get_wchan(struct task_struct *p)
720 {
721 	unsigned long stack;
722 	u64 fp, ip;
723 	int count = 0;
724 
725 	if (!p || p == current || p->state == TASK_RUNNING)
726 		return 0;
727 	stack = (unsigned long)task_stack_page(p);
728 	if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
729 		return 0;
730 	fp = *(u64 *)(p->thread.sp);
731 	do {
732 		if (fp < (unsigned long)stack ||
733 		    fp >= (unsigned long)stack+THREAD_SIZE)
734 			return 0;
735 		ip = *(u64 *)(fp+8);
736 		if (!in_sched_functions(ip))
737 			return ip;
738 		fp = *(u64 *)fp;
739 	} while (count++ < 16);
740 	return 0;
741 }
742 
do_arch_prctl(struct task_struct * task,int code,unsigned long addr)743 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
744 {
745 	int ret = 0;
746 	int doit = task == current;
747 	int cpu;
748 
749 	switch (code) {
750 	case ARCH_SET_GS:
751 		if (addr >= TASK_SIZE_OF(task))
752 			return -EPERM;
753 		cpu = get_cpu();
754 		/* handle small bases via the GDT because that's faster to
755 		   switch. */
756 		if (addr <= 0xffffffff) {
757 			set_32bit_tls(task, GS_TLS, addr);
758 			if (doit) {
759 				load_TLS(&task->thread, cpu);
760 				load_gs_index(GS_TLS_SEL);
761 			}
762 			task->thread.gsindex = GS_TLS_SEL;
763 			task->thread.gs = 0;
764 		} else {
765 			task->thread.gsindex = 0;
766 			task->thread.gs = addr;
767 			if (doit) {
768 				load_gs_index(0);
769 				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
770 			}
771 		}
772 		put_cpu();
773 		break;
774 	case ARCH_SET_FS:
775 		/* Not strictly needed for fs, but do it for symmetry
776 		   with gs */
777 		if (addr >= TASK_SIZE_OF(task))
778 			return -EPERM;
779 		cpu = get_cpu();
780 		/* handle small bases via the GDT because that's faster to
781 		   switch. */
782 		if (addr <= 0xffffffff) {
783 			set_32bit_tls(task, FS_TLS, addr);
784 			if (doit) {
785 				load_TLS(&task->thread, cpu);
786 				loadsegment(fs, FS_TLS_SEL);
787 			}
788 			task->thread.fsindex = FS_TLS_SEL;
789 			task->thread.fs = 0;
790 		} else {
791 			task->thread.fsindex = 0;
792 			task->thread.fs = addr;
793 			if (doit) {
794 				/* set the selector to 0 to not confuse
795 				   __switch_to */
796 				loadsegment(fs, 0);
797 				ret = checking_wrmsrl(MSR_FS_BASE, addr);
798 			}
799 		}
800 		put_cpu();
801 		break;
802 	case ARCH_GET_FS: {
803 		unsigned long base;
804 		if (task->thread.fsindex == FS_TLS_SEL)
805 			base = read_32bit_tls(task, FS_TLS);
806 		else if (doit)
807 			rdmsrl(MSR_FS_BASE, base);
808 		else
809 			base = task->thread.fs;
810 		ret = put_user(base, (unsigned long __user *)addr);
811 		break;
812 	}
813 	case ARCH_GET_GS: {
814 		unsigned long base;
815 		unsigned gsindex;
816 		if (task->thread.gsindex == GS_TLS_SEL)
817 			base = read_32bit_tls(task, GS_TLS);
818 		else if (doit) {
819 			savesegment(gs, gsindex);
820 			if (gsindex)
821 				rdmsrl(MSR_KERNEL_GS_BASE, base);
822 			else
823 				base = task->thread.gs;
824 		} else
825 			base = task->thread.gs;
826 		ret = put_user(base, (unsigned long __user *)addr);
827 		break;
828 	}
829 
830 	default:
831 		ret = -EINVAL;
832 		break;
833 	}
834 
835 	return ret;
836 }
837 
sys_arch_prctl(int code,unsigned long addr)838 long sys_arch_prctl(int code, unsigned long addr)
839 {
840 	return do_arch_prctl(current, code, addr);
841 }
842 
arch_align_stack(unsigned long sp)843 unsigned long arch_align_stack(unsigned long sp)
844 {
845 	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
846 		sp -= get_random_int() % 8192;
847 	return sp & ~0xf;
848 }
849 
arch_randomize_brk(struct mm_struct * mm)850 unsigned long arch_randomize_brk(struct mm_struct *mm)
851 {
852 	unsigned long range_end = mm->brk + 0x02000000;
853 	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
854 }
855