1/* 2 * Copyright (C) 1991,1992 Linus Torvalds 3 * 4 * entry_32.S contains the system-call and low-level fault and trap handling routines. 5 * 6 * Stack layout while running C code: 7 * ptrace needs to have all registers on the stack. 8 * If the order here is changed, it needs to be 9 * updated in fork.c:copy_process(), signal.c:do_signal(), 10 * ptrace.c and ptrace.h 11 * 12 * 0(%esp) - %ebx 13 * 4(%esp) - %ecx 14 * 8(%esp) - %edx 15 * C(%esp) - %esi 16 * 10(%esp) - %edi 17 * 14(%esp) - %ebp 18 * 18(%esp) - %eax 19 * 1C(%esp) - %ds 20 * 20(%esp) - %es 21 * 24(%esp) - %fs 22 * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS 23 * 2C(%esp) - orig_eax 24 * 30(%esp) - %eip 25 * 34(%esp) - %cs 26 * 38(%esp) - %eflags 27 * 3C(%esp) - %oldesp 28 * 40(%esp) - %oldss 29 */ 30 31#include <linux/linkage.h> 32#include <linux/err.h> 33#include <asm/thread_info.h> 34#include <asm/irqflags.h> 35#include <asm/errno.h> 36#include <asm/segment.h> 37#include <asm/smp.h> 38#include <asm/page_types.h> 39#include <asm/percpu.h> 40#include <asm/processor-flags.h> 41#include <asm/ftrace.h> 42#include <asm/irq_vectors.h> 43#include <asm/cpufeatures.h> 44#include <asm/alternative-asm.h> 45#include <asm/asm.h> 46#include <asm/smap.h> 47#include <asm/export.h> 48#include <asm/nospec-branch.h> 49 50 .section .entry.text, "ax" 51 52/* 53 * We use macros for low-level operations which need to be overridden 54 * for paravirtualization. The following will never clobber any registers: 55 * INTERRUPT_RETURN (aka. "iret") 56 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") 57 * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). 58 * 59 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must 60 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). 61 * Allowing a register to be clobbered can shrink the paravirt replacement 62 * enough to patch inline, increasing performance. 63 */ 64 65#ifdef CONFIG_PREEMPT 66# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 67#else 68# define preempt_stop(clobbers) 69# define resume_kernel restore_all 70#endif 71 72.macro TRACE_IRQS_IRET 73#ifdef CONFIG_TRACE_IRQFLAGS 74 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off? 75 jz 1f 76 TRACE_IRQS_ON 771: 78#endif 79.endm 80 81/* 82 * User gs save/restore 83 * 84 * %gs is used for userland TLS and kernel only uses it for stack 85 * canary which is required to be at %gs:20 by gcc. Read the comment 86 * at the top of stackprotector.h for more info. 87 * 88 * Local labels 98 and 99 are used. 89 */ 90#ifdef CONFIG_X86_32_LAZY_GS 91 92 /* unfortunately push/pop can't be no-op */ 93.macro PUSH_GS 94 pushl $0 95.endm 96.macro POP_GS pop=0 97 addl $(4 + \pop), %esp 98.endm 99.macro POP_GS_EX 100.endm 101 102 /* all the rest are no-op */ 103.macro PTGS_TO_GS 104.endm 105.macro PTGS_TO_GS_EX 106.endm 107.macro GS_TO_REG reg 108.endm 109.macro REG_TO_PTGS reg 110.endm 111.macro SET_KERNEL_GS reg 112.endm 113 114#else /* CONFIG_X86_32_LAZY_GS */ 115 116.macro PUSH_GS 117 pushl %gs 118.endm 119 120.macro POP_GS pop=0 12198: popl %gs 122 .if \pop <> 0 123 add $\pop, %esp 124 .endif 125.endm 126.macro POP_GS_EX 127.pushsection .fixup, "ax" 12899: movl $0, (%esp) 129 jmp 98b 130.popsection 131 _ASM_EXTABLE(98b, 99b) 132.endm 133 134.macro PTGS_TO_GS 13598: mov PT_GS(%esp), %gs 136.endm 137.macro PTGS_TO_GS_EX 138.pushsection .fixup, "ax" 13999: movl $0, PT_GS(%esp) 140 jmp 98b 141.popsection 142 _ASM_EXTABLE(98b, 99b) 143.endm 144 145.macro GS_TO_REG reg 146 movl %gs, \reg 147.endm 148.macro REG_TO_PTGS reg 149 movl \reg, PT_GS(%esp) 150.endm 151.macro SET_KERNEL_GS reg 152 movl $(__KERNEL_STACK_CANARY), \reg 153 movl \reg, %gs 154.endm 155 156#endif /* CONFIG_X86_32_LAZY_GS */ 157 158.macro SAVE_ALL pt_regs_ax=%eax 159 cld 160 PUSH_GS 161 pushl %fs 162 pushl %es 163 pushl %ds 164 pushl \pt_regs_ax 165 pushl %ebp 166 pushl %edi 167 pushl %esi 168 pushl %edx 169 pushl %ecx 170 pushl %ebx 171 movl $(__USER_DS), %edx 172 movl %edx, %ds 173 movl %edx, %es 174 movl $(__KERNEL_PERCPU), %edx 175 movl %edx, %fs 176 SET_KERNEL_GS %edx 177.endm 178 179.macro RESTORE_INT_REGS 180 popl %ebx 181 popl %ecx 182 popl %edx 183 popl %esi 184 popl %edi 185 popl %ebp 186 popl %eax 187.endm 188 189.macro RESTORE_REGS pop=0 190 RESTORE_INT_REGS 1911: popl %ds 1922: popl %es 1933: popl %fs 194 POP_GS \pop 195.pushsection .fixup, "ax" 1964: movl $0, (%esp) 197 jmp 1b 1985: movl $0, (%esp) 199 jmp 2b 2006: movl $0, (%esp) 201 jmp 3b 202.popsection 203 _ASM_EXTABLE(1b, 4b) 204 _ASM_EXTABLE(2b, 5b) 205 _ASM_EXTABLE(3b, 6b) 206 POP_GS_EX 207.endm 208 209/* 210 * %eax: prev task 211 * %edx: next task 212 */ 213ENTRY(__switch_to_asm) 214 /* 215 * Save callee-saved registers 216 * This must match the order in struct inactive_task_frame 217 */ 218 pushl %ebp 219 pushl %ebx 220 pushl %edi 221 pushl %esi 222 223 /* switch stack */ 224 movl %esp, TASK_threadsp(%eax) 225 movl TASK_threadsp(%edx), %esp 226 227#ifdef CONFIG_CC_STACKPROTECTOR 228 movl TASK_stack_canary(%edx), %ebx 229 movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset 230#endif 231 232#ifdef CONFIG_RETPOLINE 233 /* 234 * When switching from a shallower to a deeper call stack 235 * the RSB may either underflow or use entries populated 236 * with userspace addresses. On CPUs where those concerns 237 * exist, overwrite the RSB with entries which capture 238 * speculative execution to prevent attack. 239 */ 240 FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW 241#endif 242 243 /* restore callee-saved registers */ 244 popl %esi 245 popl %edi 246 popl %ebx 247 popl %ebp 248 249 jmp __switch_to 250END(__switch_to_asm) 251 252/* 253 * A newly forked process directly context switches into this address. 254 * 255 * eax: prev task we switched from 256 * ebx: kernel thread func (NULL for user thread) 257 * edi: kernel thread arg 258 */ 259ENTRY(ret_from_fork) 260 pushl %eax 261 call schedule_tail 262 popl %eax 263 264 testl %ebx, %ebx 265 jnz 1f /* kernel threads are uncommon */ 266 2672: 268 /* When we fork, we trace the syscall return in the child, too. */ 269 movl %esp, %eax 270 call syscall_return_slowpath 271 jmp restore_all 272 273 /* kernel thread */ 2741: movl %edi, %eax 275 CALL_NOSPEC %ebx 276 /* 277 * A kernel thread is allowed to return here after successfully 278 * calling do_execve(). Exit to userspace to complete the execve() 279 * syscall. 280 */ 281 movl $0, PT_EAX(%esp) 282 jmp 2b 283END(ret_from_fork) 284 285/* 286 * Return to user mode is not as complex as all this looks, 287 * but we want the default path for a system call return to 288 * go as quickly as possible which is why some of this is 289 * less clear than it otherwise should be. 290 */ 291 292 # userspace resumption stub bypassing syscall exit tracing 293 ALIGN 294ret_from_exception: 295 preempt_stop(CLBR_ANY) 296ret_from_intr: 297#ifdef CONFIG_VM86 298 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS 299 movb PT_CS(%esp), %al 300 andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax 301#else 302 /* 303 * We can be coming here from child spawned by kernel_thread(). 304 */ 305 movl PT_CS(%esp), %eax 306 andl $SEGMENT_RPL_MASK, %eax 307#endif 308 cmpl $USER_RPL, %eax 309 jb resume_kernel # not returning to v8086 or userspace 310 311ENTRY(resume_userspace) 312 DISABLE_INTERRUPTS(CLBR_ANY) 313 TRACE_IRQS_OFF 314 movl %esp, %eax 315 call prepare_exit_to_usermode 316 jmp restore_all 317END(ret_from_exception) 318 319#ifdef CONFIG_PREEMPT 320ENTRY(resume_kernel) 321 DISABLE_INTERRUPTS(CLBR_ANY) 322need_resched: 323 cmpl $0, PER_CPU_VAR(__preempt_count) 324 jnz restore_all 325 testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? 326 jz restore_all 327 call preempt_schedule_irq 328 jmp need_resched 329END(resume_kernel) 330#endif 331 332GLOBAL(__begin_SYSENTER_singlestep_region) 333/* 334 * All code from here through __end_SYSENTER_singlestep_region is subject 335 * to being single-stepped if a user program sets TF and executes SYSENTER. 336 * There is absolutely nothing that we can do to prevent this from happening 337 * (thanks Intel!). To keep our handling of this situation as simple as 338 * possible, we handle TF just like AC and NT, except that our #DB handler 339 * will ignore all of the single-step traps generated in this range. 340 */ 341 342#ifdef CONFIG_XEN 343/* 344 * Xen doesn't set %esp to be precisely what the normal SYSENTER 345 * entry point expects, so fix it up before using the normal path. 346 */ 347ENTRY(xen_sysenter_target) 348 addl $5*4, %esp /* remove xen-provided frame */ 349 jmp sysenter_past_esp 350#endif 351 352/* 353 * 32-bit SYSENTER entry. 354 * 355 * 32-bit system calls through the vDSO's __kernel_vsyscall enter here 356 * if X86_FEATURE_SEP is available. This is the preferred system call 357 * entry on 32-bit systems. 358 * 359 * The SYSENTER instruction, in principle, should *only* occur in the 360 * vDSO. In practice, a small number of Android devices were shipped 361 * with a copy of Bionic that inlined a SYSENTER instruction. This 362 * never happened in any of Google's Bionic versions -- it only happened 363 * in a narrow range of Intel-provided versions. 364 * 365 * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs. 366 * IF and VM in RFLAGS are cleared (IOW: interrupts are off). 367 * SYSENTER does not save anything on the stack, 368 * and does not save old EIP (!!!), ESP, or EFLAGS. 369 * 370 * To avoid losing track of EFLAGS.VM (and thus potentially corrupting 371 * user and/or vm86 state), we explicitly disable the SYSENTER 372 * instruction in vm86 mode by reprogramming the MSRs. 373 * 374 * Arguments: 375 * eax system call number 376 * ebx arg1 377 * ecx arg2 378 * edx arg3 379 * esi arg4 380 * edi arg5 381 * ebp user stack 382 * 0(%ebp) arg6 383 */ 384ENTRY(entry_SYSENTER_32) 385 movl TSS_sysenter_sp0(%esp), %esp 386sysenter_past_esp: 387 pushl $__USER_DS /* pt_regs->ss */ 388 pushl %ebp /* pt_regs->sp (stashed in bp) */ 389 pushfl /* pt_regs->flags (except IF = 0) */ 390 orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ 391 pushl $__USER_CS /* pt_regs->cs */ 392 pushl $0 /* pt_regs->ip = 0 (placeholder) */ 393 pushl %eax /* pt_regs->orig_ax */ 394 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ 395 396 /* 397 * SYSENTER doesn't filter flags, so we need to clear NT, AC 398 * and TF ourselves. To save a few cycles, we can check whether 399 * either was set instead of doing an unconditional popfq. 400 * This needs to happen before enabling interrupts so that 401 * we don't get preempted with NT set. 402 * 403 * If TF is set, we will single-step all the way to here -- do_debug 404 * will ignore all the traps. (Yes, this is slow, but so is 405 * single-stepping in general. This allows us to avoid having 406 * a more complicated code to handle the case where a user program 407 * forces us to single-step through the SYSENTER entry code.) 408 * 409 * NB.: .Lsysenter_fix_flags is a label with the code under it moved 410 * out-of-line as an optimization: NT is unlikely to be set in the 411 * majority of the cases and instead of polluting the I$ unnecessarily, 412 * we're keeping that code behind a branch which will predict as 413 * not-taken and therefore its instructions won't be fetched. 414 */ 415 testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp) 416 jnz .Lsysenter_fix_flags 417.Lsysenter_flags_fixed: 418 419 /* 420 * User mode is traced as though IRQs are on, and SYSENTER 421 * turned them off. 422 */ 423 TRACE_IRQS_OFF 424 425 movl %esp, %eax 426 call do_fast_syscall_32 427 /* XEN PV guests always use IRET path */ 428 ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ 429 "jmp .Lsyscall_32_done", X86_FEATURE_XENPV 430 431/* Opportunistic SYSEXIT */ 432 TRACE_IRQS_ON /* User mode traces as IRQs on. */ 433 movl PT_EIP(%esp), %edx /* pt_regs->ip */ 434 movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */ 4351: mov PT_FS(%esp), %fs 436 PTGS_TO_GS 437 popl %ebx /* pt_regs->bx */ 438 addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */ 439 popl %esi /* pt_regs->si */ 440 popl %edi /* pt_regs->di */ 441 popl %ebp /* pt_regs->bp */ 442 popl %eax /* pt_regs->ax */ 443 444 /* 445 * Restore all flags except IF. (We restore IF separately because 446 * STI gives a one-instruction window in which we won't be interrupted, 447 * whereas POPF does not.) 448 */ 449 addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */ 450 btr $X86_EFLAGS_IF_BIT, (%esp) 451 popfl 452 453 /* 454 * Return back to the vDSO, which will pop ecx and edx. 455 * Don't bother with DS and ES (they already contain __USER_DS). 456 */ 457 sti 458 sysexit 459 460.pushsection .fixup, "ax" 4612: movl $0, PT_FS(%esp) 462 jmp 1b 463.popsection 464 _ASM_EXTABLE(1b, 2b) 465 PTGS_TO_GS_EX 466 467.Lsysenter_fix_flags: 468 pushl $X86_EFLAGS_FIXED 469 popfl 470 jmp .Lsysenter_flags_fixed 471GLOBAL(__end_SYSENTER_singlestep_region) 472ENDPROC(entry_SYSENTER_32) 473 474/* 475 * 32-bit legacy system call entry. 476 * 477 * 32-bit x86 Linux system calls traditionally used the INT $0x80 478 * instruction. INT $0x80 lands here. 479 * 480 * This entry point can be used by any 32-bit perform system calls. 481 * Instances of INT $0x80 can be found inline in various programs and 482 * libraries. It is also used by the vDSO's __kernel_vsyscall 483 * fallback for hardware that doesn't support a faster entry method. 484 * Restarted 32-bit system calls also fall back to INT $0x80 485 * regardless of what instruction was originally used to do the system 486 * call. (64-bit programs can use INT $0x80 as well, but they can 487 * only run on 64-bit kernels and therefore land in 488 * entry_INT80_compat.) 489 * 490 * This is considered a slow path. It is not used by most libc 491 * implementations on modern hardware except during process startup. 492 * 493 * Arguments: 494 * eax system call number 495 * ebx arg1 496 * ecx arg2 497 * edx arg3 498 * esi arg4 499 * edi arg5 500 * ebp arg6 501 */ 502ENTRY(entry_INT80_32) 503 ASM_CLAC 504 pushl %eax /* pt_regs->orig_ax */ 505 SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */ 506 507 /* 508 * User mode is traced as though IRQs are on, and the interrupt gate 509 * turned them off. 510 */ 511 TRACE_IRQS_OFF 512 513 movl %esp, %eax 514 call do_int80_syscall_32 515.Lsyscall_32_done: 516 517restore_all: 518 TRACE_IRQS_IRET 519restore_all_notrace: 520#ifdef CONFIG_X86_ESPFIX32 521 ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX 522 523 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS 524 /* 525 * Warning: PT_OLDSS(%esp) contains the wrong/random values if we 526 * are returning to the kernel. 527 * See comments in process.c:copy_thread() for details. 528 */ 529 movb PT_OLDSS(%esp), %ah 530 movb PT_CS(%esp), %al 531 andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax 532 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax 533 je ldt_ss # returning to user-space with LDT SS 534#endif 535restore_nocheck: 536 RESTORE_REGS 4 # skip orig_eax/error_code 537irq_return: 538 INTERRUPT_RETURN 539.section .fixup, "ax" 540ENTRY(iret_exc ) 541 pushl $0 # no error code 542 pushl $do_iret_error 543 jmp error_code 544.previous 545 _ASM_EXTABLE(irq_return, iret_exc) 546 547#ifdef CONFIG_X86_ESPFIX32 548ldt_ss: 549/* 550 * Setup and switch to ESPFIX stack 551 * 552 * We're returning to userspace with a 16 bit stack. The CPU will not 553 * restore the high word of ESP for us on executing iret... This is an 554 * "official" bug of all the x86-compatible CPUs, which we can work 555 * around to make dosemu and wine happy. We do this by preloading the 556 * high word of ESP with the high word of the userspace ESP while 557 * compensating for the offset by changing to the ESPFIX segment with 558 * a base address that matches for the difference. 559 */ 560#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) 561 mov %esp, %edx /* load kernel esp */ 562 mov PT_OLDESP(%esp), %eax /* load userspace esp */ 563 mov %dx, %ax /* eax: new kernel esp */ 564 sub %eax, %edx /* offset (low word is 0) */ 565 shr $16, %edx 566 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ 567 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ 568 pushl $__ESPFIX_SS 569 pushl %eax /* new kernel esp */ 570 /* 571 * Disable interrupts, but do not irqtrace this section: we 572 * will soon execute iret and the tracer was already set to 573 * the irqstate after the IRET: 574 */ 575 DISABLE_INTERRUPTS(CLBR_EAX) 576 lss (%esp), %esp /* switch to espfix segment */ 577 jmp restore_nocheck 578#endif 579ENDPROC(entry_INT80_32) 580 581.macro FIXUP_ESPFIX_STACK 582/* 583 * Switch back for ESPFIX stack to the normal zerobased stack 584 * 585 * We can't call C functions using the ESPFIX stack. This code reads 586 * the high word of the segment base from the GDT and swiches to the 587 * normal stack and adjusts ESP with the matching offset. 588 */ 589#ifdef CONFIG_X86_ESPFIX32 590 /* fixup the stack */ 591 mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ 592 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ 593 shl $16, %eax 594 addl %esp, %eax /* the adjusted stack pointer */ 595 pushl $__KERNEL_DS 596 pushl %eax 597 lss (%esp), %esp /* switch to the normal stack segment */ 598#endif 599.endm 600.macro UNWIND_ESPFIX_STACK 601#ifdef CONFIG_X86_ESPFIX32 602 movl %ss, %eax 603 /* see if on espfix stack */ 604 cmpw $__ESPFIX_SS, %ax 605 jne 27f 606 movl $__KERNEL_DS, %eax 607 movl %eax, %ds 608 movl %eax, %es 609 /* switch to normal stack */ 610 FIXUP_ESPFIX_STACK 61127: 612#endif 613.endm 614 615/* 616 * Build the entry stubs with some assembler magic. 617 * We pack 1 stub into every 8-byte block. 618 */ 619 .align 8 620ENTRY(irq_entries_start) 621 vector=FIRST_EXTERNAL_VECTOR 622 .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) 623 pushl $(~vector+0x80) /* Note: always in signed byte range */ 624 vector=vector+1 625 jmp common_interrupt 626 .align 8 627 .endr 628END(irq_entries_start) 629 630/* 631 * the CPU automatically disables interrupts when executing an IRQ vector, 632 * so IRQ-flags tracing has to follow that: 633 */ 634 .p2align CONFIG_X86_L1_CACHE_SHIFT 635common_interrupt: 636 ASM_CLAC 637 addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ 638 SAVE_ALL 639 TRACE_IRQS_OFF 640 movl %esp, %eax 641 call do_IRQ 642 jmp ret_from_intr 643ENDPROC(common_interrupt) 644 645#define BUILD_INTERRUPT3(name, nr, fn) \ 646ENTRY(name) \ 647 ASM_CLAC; \ 648 pushl $~(nr); \ 649 SAVE_ALL; \ 650 TRACE_IRQS_OFF \ 651 movl %esp, %eax; \ 652 call fn; \ 653 jmp ret_from_intr; \ 654ENDPROC(name) 655 656 657#ifdef CONFIG_TRACING 658# define TRACE_BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name) 659#else 660# define TRACE_BUILD_INTERRUPT(name, nr) 661#endif 662 663#define BUILD_INTERRUPT(name, nr) \ 664 BUILD_INTERRUPT3(name, nr, smp_##name); \ 665 TRACE_BUILD_INTERRUPT(name, nr) 666 667/* The include is where all of the SMP etc. interrupts come from */ 668#include <asm/entry_arch.h> 669 670ENTRY(coprocessor_error) 671 ASM_CLAC 672 pushl $0 673 pushl $do_coprocessor_error 674 jmp error_code 675END(coprocessor_error) 676 677ENTRY(simd_coprocessor_error) 678 ASM_CLAC 679 pushl $0 680#ifdef CONFIG_X86_INVD_BUG 681 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ 682 ALTERNATIVE "pushl $do_general_protection", \ 683 "pushl $do_simd_coprocessor_error", \ 684 X86_FEATURE_XMM 685#else 686 pushl $do_simd_coprocessor_error 687#endif 688 jmp error_code 689END(simd_coprocessor_error) 690 691ENTRY(device_not_available) 692 ASM_CLAC 693 pushl $-1 # mark this as an int 694 pushl $do_device_not_available 695 jmp error_code 696END(device_not_available) 697 698#ifdef CONFIG_PARAVIRT 699ENTRY(native_iret) 700 iret 701 _ASM_EXTABLE(native_iret, iret_exc) 702END(native_iret) 703#endif 704 705ENTRY(overflow) 706 ASM_CLAC 707 pushl $0 708 pushl $do_overflow 709 jmp error_code 710END(overflow) 711 712ENTRY(bounds) 713 ASM_CLAC 714 pushl $0 715 pushl $do_bounds 716 jmp error_code 717END(bounds) 718 719ENTRY(invalid_op) 720 ASM_CLAC 721 pushl $0 722 pushl $do_invalid_op 723 jmp error_code 724END(invalid_op) 725 726ENTRY(coprocessor_segment_overrun) 727 ASM_CLAC 728 pushl $0 729 pushl $do_coprocessor_segment_overrun 730 jmp error_code 731END(coprocessor_segment_overrun) 732 733ENTRY(invalid_TSS) 734 ASM_CLAC 735 pushl $do_invalid_TSS 736 jmp error_code 737END(invalid_TSS) 738 739ENTRY(segment_not_present) 740 ASM_CLAC 741 pushl $do_segment_not_present 742 jmp error_code 743END(segment_not_present) 744 745ENTRY(stack_segment) 746 ASM_CLAC 747 pushl $do_stack_segment 748 jmp error_code 749END(stack_segment) 750 751ENTRY(alignment_check) 752 ASM_CLAC 753 pushl $do_alignment_check 754 jmp error_code 755END(alignment_check) 756 757ENTRY(divide_error) 758 ASM_CLAC 759 pushl $0 # no error code 760 pushl $do_divide_error 761 jmp error_code 762END(divide_error) 763 764#ifdef CONFIG_X86_MCE 765ENTRY(machine_check) 766 ASM_CLAC 767 pushl $0 768 pushl machine_check_vector 769 jmp error_code 770END(machine_check) 771#endif 772 773ENTRY(spurious_interrupt_bug) 774 ASM_CLAC 775 pushl $0 776 pushl $do_spurious_interrupt_bug 777 jmp error_code 778END(spurious_interrupt_bug) 779 780#ifdef CONFIG_XEN 781ENTRY(xen_hypervisor_callback) 782 pushl $-1 /* orig_ax = -1 => not a system call */ 783 SAVE_ALL 784 TRACE_IRQS_OFF 785 786 /* 787 * Check to see if we got the event in the critical 788 * region in xen_iret_direct, after we've reenabled 789 * events and checked for pending events. This simulates 790 * iret instruction's behaviour where it delivers a 791 * pending interrupt when enabling interrupts: 792 */ 793 movl PT_EIP(%esp), %eax 794 cmpl $xen_iret_start_crit, %eax 795 jb 1f 796 cmpl $xen_iret_end_crit, %eax 797 jae 1f 798 799 jmp xen_iret_crit_fixup 800 801ENTRY(xen_do_upcall) 8021: mov %esp, %eax 803 call xen_evtchn_do_upcall 804#ifndef CONFIG_PREEMPT 805 call xen_maybe_preempt_hcall 806#endif 807 jmp ret_from_intr 808ENDPROC(xen_hypervisor_callback) 809 810/* 811 * Hypervisor uses this for application faults while it executes. 812 * We get here for two reasons: 813 * 1. Fault while reloading DS, ES, FS or GS 814 * 2. Fault while executing IRET 815 * Category 1 we fix up by reattempting the load, and zeroing the segment 816 * register if the load fails. 817 * Category 2 we fix up by jumping to do_iret_error. We cannot use the 818 * normal Linux return path in this case because if we use the IRET hypercall 819 * to pop the stack frame we end up in an infinite loop of failsafe callbacks. 820 * We distinguish between categories by maintaining a status value in EAX. 821 */ 822ENTRY(xen_failsafe_callback) 823 pushl %eax 824 movl $1, %eax 8251: mov 4(%esp), %ds 8262: mov 8(%esp), %es 8273: mov 12(%esp), %fs 8284: mov 16(%esp), %gs 829 /* EAX == 0 => Category 1 (Bad segment) 830 EAX != 0 => Category 2 (Bad IRET) */ 831 testl %eax, %eax 832 popl %eax 833 lea 16(%esp), %esp 834 jz 5f 835 jmp iret_exc 8365: pushl $-1 /* orig_ax = -1 => not a system call */ 837 SAVE_ALL 838 jmp ret_from_exception 839 840.section .fixup, "ax" 8416: xorl %eax, %eax 842 movl %eax, 4(%esp) 843 jmp 1b 8447: xorl %eax, %eax 845 movl %eax, 8(%esp) 846 jmp 2b 8478: xorl %eax, %eax 848 movl %eax, 12(%esp) 849 jmp 3b 8509: xorl %eax, %eax 851 movl %eax, 16(%esp) 852 jmp 4b 853.previous 854 _ASM_EXTABLE(1b, 6b) 855 _ASM_EXTABLE(2b, 7b) 856 _ASM_EXTABLE(3b, 8b) 857 _ASM_EXTABLE(4b, 9b) 858ENDPROC(xen_failsafe_callback) 859 860BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, 861 xen_evtchn_do_upcall) 862 863#endif /* CONFIG_XEN */ 864 865#if IS_ENABLED(CONFIG_HYPERV) 866 867BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, 868 hyperv_vector_handler) 869 870#endif /* CONFIG_HYPERV */ 871 872#ifdef CONFIG_FUNCTION_TRACER 873#ifdef CONFIG_DYNAMIC_FTRACE 874 875ENTRY(mcount) 876 ret 877END(mcount) 878 879ENTRY(ftrace_caller) 880 pushl %eax 881 pushl %ecx 882 pushl %edx 883 pushl $0 /* Pass NULL as regs pointer */ 884 movl 4*4(%esp), %eax 885 movl 0x4(%ebp), %edx 886 movl function_trace_op, %ecx 887 subl $MCOUNT_INSN_SIZE, %eax 888 889.globl ftrace_call 890ftrace_call: 891 call ftrace_stub 892 893 addl $4, %esp /* skip NULL pointer */ 894 popl %edx 895 popl %ecx 896 popl %eax 897ftrace_ret: 898#ifdef CONFIG_FUNCTION_GRAPH_TRACER 899.globl ftrace_graph_call 900ftrace_graph_call: 901 jmp ftrace_stub 902#endif 903 904/* This is weak to keep gas from relaxing the jumps */ 905WEAK(ftrace_stub) 906 ret 907END(ftrace_caller) 908 909ENTRY(ftrace_regs_caller) 910 pushf /* push flags before compare (in cs location) */ 911 912 /* 913 * i386 does not save SS and ESP when coming from kernel. 914 * Instead, to get sp, ®s->sp is used (see ptrace.h). 915 * Unfortunately, that means eflags must be at the same location 916 * as the current return ip is. We move the return ip into the 917 * ip location, and move flags into the return ip location. 918 */ 919 pushl 4(%esp) /* save return ip into ip slot */ 920 921 pushl $0 /* Load 0 into orig_ax */ 922 pushl %gs 923 pushl %fs 924 pushl %es 925 pushl %ds 926 pushl %eax 927 pushl %ebp 928 pushl %edi 929 pushl %esi 930 pushl %edx 931 pushl %ecx 932 pushl %ebx 933 934 movl 13*4(%esp), %eax /* Get the saved flags */ 935 movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ 936 /* clobbering return ip */ 937 movl $__KERNEL_CS, 13*4(%esp) 938 939 movl 12*4(%esp), %eax /* Load ip (1st parameter) */ 940 subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ 941 movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ 942 movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ 943 pushl %esp /* Save pt_regs as 4th parameter */ 944 945GLOBAL(ftrace_regs_call) 946 call ftrace_stub 947 948 addl $4, %esp /* Skip pt_regs */ 949 movl 14*4(%esp), %eax /* Move flags back into cs */ 950 movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ 951 movl 12*4(%esp), %eax /* Get return ip from regs->ip */ 952 movl %eax, 14*4(%esp) /* Put return ip back for ret */ 953 954 popl %ebx 955 popl %ecx 956 popl %edx 957 popl %esi 958 popl %edi 959 popl %ebp 960 popl %eax 961 popl %ds 962 popl %es 963 popl %fs 964 popl %gs 965 addl $8, %esp /* Skip orig_ax and ip */ 966 popf /* Pop flags at end (no addl to corrupt flags) */ 967 jmp ftrace_ret 968 969 popf 970 jmp ftrace_stub 971#else /* ! CONFIG_DYNAMIC_FTRACE */ 972 973ENTRY(mcount) 974 cmpl $__PAGE_OFFSET, %esp 975 jb ftrace_stub /* Paging not enabled yet? */ 976 977 cmpl $ftrace_stub, ftrace_trace_function 978 jnz trace 979#ifdef CONFIG_FUNCTION_GRAPH_TRACER 980 cmpl $ftrace_stub, ftrace_graph_return 981 jnz ftrace_graph_caller 982 983 cmpl $ftrace_graph_entry_stub, ftrace_graph_entry 984 jnz ftrace_graph_caller 985#endif 986.globl ftrace_stub 987ftrace_stub: 988 ret 989 990 /* taken from glibc */ 991trace: 992 pushl %eax 993 pushl %ecx 994 pushl %edx 995 movl 0xc(%esp), %eax 996 movl 0x4(%ebp), %edx 997 subl $MCOUNT_INSN_SIZE, %eax 998 999 movl ftrace_trace_function, %ecx 1000 CALL_NOSPEC %ecx 1001 1002 popl %edx 1003 popl %ecx 1004 popl %eax 1005 jmp ftrace_stub 1006END(mcount) 1007#endif /* CONFIG_DYNAMIC_FTRACE */ 1008EXPORT_SYMBOL(mcount) 1009#endif /* CONFIG_FUNCTION_TRACER */ 1010 1011#ifdef CONFIG_FUNCTION_GRAPH_TRACER 1012ENTRY(ftrace_graph_caller) 1013 pushl %eax 1014 pushl %ecx 1015 pushl %edx 1016 movl 0xc(%esp), %eax 1017 lea 0x4(%ebp), %edx 1018 movl (%ebp), %ecx 1019 subl $MCOUNT_INSN_SIZE, %eax 1020 call prepare_ftrace_return 1021 popl %edx 1022 popl %ecx 1023 popl %eax 1024 ret 1025END(ftrace_graph_caller) 1026 1027.globl return_to_handler 1028return_to_handler: 1029 pushl %eax 1030 pushl %edx 1031 movl %ebp, %eax 1032 call ftrace_return_to_handler 1033 movl %eax, %ecx 1034 popl %edx 1035 popl %eax 1036 JMP_NOSPEC %ecx 1037#endif 1038 1039#ifdef CONFIG_TRACING 1040ENTRY(trace_page_fault) 1041 ASM_CLAC 1042 pushl $trace_do_page_fault 1043 jmp error_code 1044END(trace_page_fault) 1045#endif 1046 1047ENTRY(page_fault) 1048 ASM_CLAC 1049 pushl $do_page_fault 1050 ALIGN 1051error_code: 1052 /* the function address is in %gs's slot on the stack */ 1053 pushl %fs 1054 pushl %es 1055 pushl %ds 1056 pushl %eax 1057 pushl %ebp 1058 pushl %edi 1059 pushl %esi 1060 pushl %edx 1061 pushl %ecx 1062 pushl %ebx 1063 cld 1064 movl $(__KERNEL_PERCPU), %ecx 1065 movl %ecx, %fs 1066 UNWIND_ESPFIX_STACK 1067 GS_TO_REG %ecx 1068 movl PT_GS(%esp), %edi # get the function address 1069 movl PT_ORIG_EAX(%esp), %edx # get the error code 1070 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 1071 REG_TO_PTGS %ecx 1072 SET_KERNEL_GS %ecx 1073 movl $(__USER_DS), %ecx 1074 movl %ecx, %ds 1075 movl %ecx, %es 1076 TRACE_IRQS_OFF 1077 movl %esp, %eax # pt_regs pointer 1078 CALL_NOSPEC %edi 1079 jmp ret_from_exception 1080END(page_fault) 1081 1082ENTRY(debug) 1083 /* 1084 * #DB can happen at the first instruction of 1085 * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this 1086 * happens, then we will be running on a very small stack. We 1087 * need to detect this condition and switch to the thread 1088 * stack before calling any C code at all. 1089 * 1090 * If you edit this code, keep in mind that NMIs can happen in here. 1091 */ 1092 ASM_CLAC 1093 pushl $-1 # mark this as an int 1094 SAVE_ALL 1095 xorl %edx, %edx # error code 0 1096 movl %esp, %eax # pt_regs pointer 1097 1098 /* Are we currently on the SYSENTER stack? */ 1099 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 1100 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 1101 cmpl $SIZEOF_SYSENTER_stack, %ecx 1102 jb .Ldebug_from_sysenter_stack 1103 1104 TRACE_IRQS_OFF 1105 call do_debug 1106 jmp ret_from_exception 1107 1108.Ldebug_from_sysenter_stack: 1109 /* We're on the SYSENTER stack. Switch off. */ 1110 movl %esp, %ebp 1111 movl PER_CPU_VAR(cpu_current_top_of_stack), %esp 1112 TRACE_IRQS_OFF 1113 call do_debug 1114 movl %ebp, %esp 1115 jmp ret_from_exception 1116END(debug) 1117 1118/* 1119 * NMI is doubly nasty. It can happen on the first instruction of 1120 * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning 1121 * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32 1122 * switched stacks. We handle both conditions by simply checking whether we 1123 * interrupted kernel code running on the SYSENTER stack. 1124 */ 1125ENTRY(nmi) 1126 ASM_CLAC 1127#ifdef CONFIG_X86_ESPFIX32 1128 pushl %eax 1129 movl %ss, %eax 1130 cmpw $__ESPFIX_SS, %ax 1131 popl %eax 1132 je nmi_espfix_stack 1133#endif 1134 1135 pushl %eax # pt_regs->orig_ax 1136 SAVE_ALL 1137 xorl %edx, %edx # zero error code 1138 movl %esp, %eax # pt_regs pointer 1139 1140 /* Are we currently on the SYSENTER stack? */ 1141 PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) 1142 subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ 1143 cmpl $SIZEOF_SYSENTER_stack, %ecx 1144 jb .Lnmi_from_sysenter_stack 1145 1146 /* Not on SYSENTER stack. */ 1147 call do_nmi 1148 jmp restore_all_notrace 1149 1150.Lnmi_from_sysenter_stack: 1151 /* 1152 * We're on the SYSENTER stack. Switch off. No one (not even debug) 1153 * is using the thread stack right now, so it's safe for us to use it. 1154 */ 1155 movl %esp, %ebp 1156 movl PER_CPU_VAR(cpu_current_top_of_stack), %esp 1157 call do_nmi 1158 movl %ebp, %esp 1159 jmp restore_all_notrace 1160 1161#ifdef CONFIG_X86_ESPFIX32 1162nmi_espfix_stack: 1163 /* 1164 * create the pointer to lss back 1165 */ 1166 pushl %ss 1167 pushl %esp 1168 addl $4, (%esp) 1169 /* copy the iret frame of 12 bytes */ 1170 .rept 3 1171 pushl 16(%esp) 1172 .endr 1173 pushl %eax 1174 SAVE_ALL 1175 FIXUP_ESPFIX_STACK # %eax == %esp 1176 xorl %edx, %edx # zero error code 1177 call do_nmi 1178 RESTORE_REGS 1179 lss 12+4(%esp), %esp # back to espfix stack 1180 jmp irq_return 1181#endif 1182END(nmi) 1183 1184ENTRY(int3) 1185 ASM_CLAC 1186 pushl $-1 # mark this as an int 1187 SAVE_ALL 1188 TRACE_IRQS_OFF 1189 xorl %edx, %edx # zero error code 1190 movl %esp, %eax # pt_regs pointer 1191 call do_int3 1192 jmp ret_from_exception 1193END(int3) 1194 1195ENTRY(general_protection) 1196 pushl $do_general_protection 1197 jmp error_code 1198END(general_protection) 1199 1200#ifdef CONFIG_KVM_GUEST 1201ENTRY(async_page_fault) 1202 ASM_CLAC 1203 pushl $do_async_page_fault 1204 jmp error_code 1205END(async_page_fault) 1206#endif 1207 1208ENTRY(rewind_stack_do_exit) 1209 /* Prevent any naive code from trying to unwind to our caller. */ 1210 xorl %ebp, %ebp 1211 1212 movl PER_CPU_VAR(cpu_current_top_of_stack), %esi 1213 leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp 1214 1215 call do_exit 12161: jmp 1b 1217END(rewind_stack_do_exit) 1218