1;; -*- fundamental -*- 2;; ----------------------------------------------------------------------- 3;; 4;; Copyright 1994-2008 H. Peter Anvin - All Rights Reserved 5;; Copyright 2009 Intel Corporation; author: H. Peter Anvin 6;; 7;; This program is free software; you can redistribute it and/or modify 8;; it under the terms of the GNU General Public License as published by 9;; the Free Software Foundation, Inc., 53 Temple Place Ste 330, 10;; Boston MA 02111-1307, USA; either version 2 of the License, or 11;; (at your option) any later version; incorporated herein by reference. 12;; 13;; ----------------------------------------------------------------------- 14 15;; 16;; init16.asm 17;; 18;; Routine to initialize and to trampoline into 32-bit 19;; protected memory. This code is derived from bcopy32.inc and 20;; com32.inc in the main SYSLINUX distribution. 21;; 22 23%include '../version.gen' 24 25MY_CS equ 0x0800 ; Segment address to use 26CS_BASE equ (MY_CS << 4) ; Corresponding address 27 28; Low memory bounce buffer 29BOUNCE_SEG equ (MY_CS+0x1000) 30 31%define DO_WBINVD 0 32 33 section .rodata align=16 34 section .data align=16 35 section .bss align=16 36 section .stack align=16 nobits 37stack resb 512 38stack_end equ $ 39 40;; ----------------------------------------------------------------------- 41;; Kernel image header 42;; ----------------------------------------------------------------------- 43 44 section .text ; Must be first in image 45 bits 16 46 47cmdline times 497 db 0 ; We put the command line here 48setup_sects db 0 49root_flags dw 0 50syssize dw 0 51swap_dev dw 0 52ram_size dw 0 53vid_mode dw 0 54root_dev dw 0 55boot_flag dw 0xAA55 56 57_start: jmp short start 58 59 db "HdrS" ; Header signature 60 dw 0x0203 ; Header version number 61 62realmode_swtch dw 0, 0 ; default_switch, SETUPSEG 63start_sys_seg dw 0x1000 ; obsolete 64version_ptr dw memdisk_version-0x200 ; version string ptr 65type_of_loader db 0 ; Filled in by boot loader 66loadflags db 1 ; Please load high 67setup_move_size dw 0 ; Unused 68code32_start dd 0x100000 ; 32-bit start address 69ramdisk_image dd 0 ; Loaded ramdisk image address 70ramdisk_size dd 0 ; Size of loaded ramdisk 71bootsect_kludge dw 0, 0 72heap_end_ptr dw 0 73pad1 dw 0 74cmd_line_ptr dd 0 ; Command line 75ramdisk_max dd 0xffffffff ; Highest allowed ramdisk address 76 77; 78; These fields aren't real setup fields, they're poked in by the 79; 32-bit code. 80; 81b_esdi dd 0 ; ES:DI for boot sector invocation 82b_edx dd 0 ; EDX for boot sector invocation 83b_sssp dd 0 ; SS:SP on boot sector invocation 84b_csip dd 0 ; CS:IP on boot sector invocation 85 86 section .rodata 87memdisk_version: 88 db "MEMDISK ", VERSION_STR, " ", DATE, 0 89 90;; ----------------------------------------------------------------------- 91;; End kernel image header 92;; ----------------------------------------------------------------------- 93 94; 95; Move ourselves down into memory to reduce the risk of conflicts; 96; then canonicalize CS to match the other segments. 97; 98 section .text 99 bits 16 100start: 101 mov ax,MY_CS 102 mov es,ax 103 movzx cx,byte [setup_sects] 104 inc cx ; Add one for the boot sector 105 shl cx,7 ; Convert to dwords 106 xor si,si 107 xor di,di 108 mov fs,si ; fs <- 0 109 cld 110 rep movsd 111 mov ds,ax 112 mov ss,ax 113 mov esp,stack_end 114 jmp MY_CS:.next 115.next: 116 117; 118; Copy the command line, if there is one 119; 120copy_cmdline: 121 xor di,di ; Bottom of our own segment (= "boot sector") 122 mov eax,[cmd_line_ptr] 123 and eax,eax 124 jz .endcmd ; No command line 125 mov si,ax 126 shr eax,4 ; Convert to segment 127 and si,0x000F ; Starting offset only 128 mov gs,ax 129 mov cx,496 ; Max number of bytes 130.copycmd: 131 gs lodsb 132 and al,al 133 jz .endcmd 134 stosb 135 loop .copycmd 136.endcmd: 137 xor al,al 138 stosb 139 140; 141; Now jump to 32-bit code 142; 143 sti 144 call init32 145; 146; When init32 returns, we have been set up, the new boot sector loaded, 147; and we should go and and run the newly loaded boot sector. 148; 149; The setup function will have poked values into the setup area. 150; 151 movzx edi,word [cs:b_esdi] 152 mov es,word [cs:b_esdi+2] 153 mov edx,[cs:b_edx] 154 155 cli 156 xor esi,esi ; No partition table involved 157 mov ds,si ; Make all the segments consistent 158 mov fs,si 159 mov gs,si 160 lss sp,[cs:b_sssp] 161 movzx esp,sp 162 jmp far [cs:b_csip] 163 164; 165; We enter protected mode, set up a flat 32-bit environment, run rep movsd 166; and then exit. IMPORTANT: This code assumes cs == MY_CS. 167; 168; This code is probably excessively anal-retentive in its handling of 169; segments, but this stuff is painful enough as it is without having to rely 170; on everything happening "as it ought to." 171; 172DummyTSS equ 0x580 ; Hopefully safe place in low mmoery 173 174 section .data 175 176 ; desc base, limit, flags 177%macro desc 3 178 dd (%2 & 0xffff) | ((%1 & 0xffff) << 16) 179 dd (%1 & 0xff000000) | (%2 & 0xf0000) | ((%3 & 0xf0ff) << 8) | ((%1 & 0x00ff0000) >> 16) 180%endmacro 181 182 align 8, db 0 183call32_gdt: dw call32_gdt_size-1 ; Null descriptor - contains GDT 184.adj1: dd call32_gdt+CS_BASE ; pointer for LGDT instruction 185 dw 0 186 187 ; 0008: Dummy TSS to make Intel VT happy 188 ; Should never be actually accessed... 189 desc DummyTSS, 103, 0x8089 190 191 ; 0010: Code segment, use16, readable, dpl 0, base CS_BASE, 64K 192 desc CS_BASE, 0xffff, 0x009b 193 194 ; 0018: Data segment, use16, read/write, dpl 0, base CS_BASE, 64K 195 desc CS_BASE, 0xffff, 0x0093 196 197 ; 0020: Code segment, use32, read/write, dpl 0, base 0, 4G 198 desc 0, 0xfffff, 0xc09b 199 200 ; 0028: Data segment, use32, read/write, dpl 0, base 0, 4G 201 desc 0, 0xfffff, 0xc093 202 203call32_gdt_size: equ $-call32_gdt 204 205err_a20: db 'ERROR: A20 gate not responding!',13,10,0 206 207 section .bss 208 alignb 4 209Return resd 1 ; Return value 210SavedSP resw 1 ; Place to save SP 211A20Tries resb 1 212 213 section .data 214 align 4, db 0 215Target dd 0 ; Target address 216Target_Seg dw 20h ; Target CS 217 218A20Type dw 0 ; Default = unknown 219 220 section .text 221 bits 16 222; 223; Routines to enable and disable (yuck) A20. These routines are gathered 224; from tips from a couple of sources, including the Linux kernel and 225; http://www.x86.org/. The need for the delay to be as large as given here 226; is indicated by Donnie Barnes of RedHat, the problematic system being an 227; IBM ThinkPad 760EL. 228; 229; We typically toggle A20 twice for every 64K transferred. 230; 231%define io_delay call _io_delay 232%define IO_DELAY_PORT 80h ; Invalid port (we hope!) 233%define disable_wait 32 ; How long to wait for a disable 234 235%define A20_DUNNO 0 ; A20 type unknown 236%define A20_NONE 1 ; A20 always on? 237%define A20_BIOS 2 ; A20 BIOS enable 238%define A20_KBC 3 ; A20 through KBC 239%define A20_FAST 4 ; A20 through port 92h 240 241 align 2, db 0 242A20List dw a20_dunno, a20_none, a20_bios, a20_kbc, a20_fast 243A20DList dw a20d_dunno, a20d_none, a20d_bios, a20d_kbc, a20d_fast 244a20_adjust_cnt equ ($-A20List)/2 245 246slow_out: out dx, al ; Fall through 247 248_io_delay: out IO_DELAY_PORT,al 249 out IO_DELAY_PORT,al 250 ret 251 252enable_a20: 253 pushad 254 mov byte [A20Tries],255 ; Times to try to make this work 255 256try_enable_a20: 257 258; 259; Flush the caches 260; 261%if DO_WBINVD 262 call try_wbinvd 263%endif 264 265; 266; If the A20 type is known, jump straight to type 267; 268 mov bp,[A20Type] 269 add bp,bp ; Convert to word offset 270.adj4: jmp word [bp+A20List] 271 272; 273; First, see if we are on a system with no A20 gate 274; 275a20_dunno: 276a20_none: 277 mov byte [A20Type], A20_NONE 278 call a20_test 279 jnz a20_done 280 281; 282; Next, try the BIOS (INT 15h AX=2401h) 283; 284a20_bios: 285 mov byte [A20Type], A20_BIOS 286 mov ax,2401h 287 pushf ; Some BIOSes muck with IF 288 int 15h 289 popf 290 291 call a20_test 292 jnz a20_done 293 294; 295; Enable the keyboard controller A20 gate 296; 297a20_kbc: 298 mov dl, 1 ; Allow early exit 299 call empty_8042 300 jnz a20_done ; A20 live, no need to use KBC 301 302 mov byte [A20Type], A20_KBC ; Starting KBC command sequence 303 304 mov al,0D1h ; Write output port 305 out 064h, al 306 call empty_8042_uncond 307 308 mov al,0DFh ; A20 on 309 out 060h, al 310 call empty_8042_uncond 311 312 ; Apparently the UHCI spec assumes that A20 toggle 313 ; ends with a null command (assumed to be for sychronization?) 314 ; Put it here to see if it helps anything... 315 mov al,0FFh ; Null command 316 out 064h, al 317 call empty_8042_uncond 318 319 ; Verify that A20 actually is enabled. Do that by 320 ; observing a word in low memory and the same word in 321 ; the HMA until they are no longer coherent. Note that 322 ; we don't do the same check in the disable case, because 323 ; we don't want to *require* A20 masking (SYSLINUX should 324 ; work fine without it, if the BIOS does.) 325.kbc_wait: push cx 326 xor cx,cx 327.kbc_wait_loop: 328 call a20_test 329 jnz a20_done_pop 330 loop .kbc_wait_loop 331 332 pop cx 333; 334; Running out of options here. Final attempt: enable the "fast A20 gate" 335; 336a20_fast: 337 mov byte [A20Type], A20_FAST ; Haven't used the KBC yet 338 in al, 092h 339 or al,02h 340 and al,~01h ; Don't accidentally reset the machine! 341 out 092h, al 342 343.fast_wait: push cx 344 xor cx,cx 345.fast_wait_loop: 346 call a20_test 347 jnz a20_done_pop 348 loop .fast_wait_loop 349 350 pop cx 351 352; 353; Oh bugger. A20 is not responding. Try frobbing it again; eventually give up 354; and report failure to the user. 355; 356 357 dec byte [A20Tries] 358 jnz try_enable_a20 359 360 361 ; Error message time 362 mov si,err_a20 363print_err: 364 lodsb 365 and al,al 366 jz die 367 mov bx,7 368 mov ah,0xe 369 int 10h 370 jmp print_err 371 372 373die: 374 sti 375.hlt: hlt 376 jmp short .hlt 377 378; 379; A20 unmasked, proceed... 380; 381a20_done_pop: pop cx 382a20_done: popad 383 ret 384 385; 386; This routine tests if A20 is enabled (ZF = 0). This routine 387; must not destroy any register contents. 388; 389 390; This is the INT 1Fh vector, which is standard PCs is used by the 391; BIOS when the screen is in graphics mode. Even if it is, it points to 392; data, not code, so it should be safe enough to fiddle with. 393A20Test equ (1Fh*4) 394 395a20_test: 396 push ds 397 push es 398 push cx 399 push eax 400 xor ax,ax 401 mov ds,ax ; DS == 0 402 dec ax 403 mov es,ax ; ES == 0FFFFh 404 mov cx,32 ; Loop count 405 mov eax,[A20Test] 406 cmp eax,[es:A20Test+10h] 407 jne .a20_done 408 push eax 409.a20_wait: 410 inc eax 411 mov [A20Test],eax 412 io_delay 413 cmp eax,[es:A20Test+10h] 414 loopz .a20_wait 415 pop dword [A20Test] ; Restore original value 416.a20_done: 417 pop eax 418 pop cx 419 pop es 420 pop ds 421 ret 422 423disable_a20: 424 pushad 425; 426; Flush the caches 427; 428%if DO_WBINVD 429 call try_wbinvd 430%endif 431 432 mov bp,[A20Type] 433 add bp,bp ; Convert to word offset 434.adj5: jmp word [bp+A20DList] 435 436a20d_bios: 437 mov ax,2400h 438 pushf ; Some BIOSes muck with IF 439 int 15h 440 popf 441 jmp short a20d_snooze 442 443; 444; Disable the "fast A20 gate" 445; 446a20d_fast: 447 in al, 092h 448 and al,~03h 449 out 092h, al 450 jmp short a20d_snooze 451 452; 453; Disable the keyboard controller A20 gate 454; 455a20d_kbc: 456 call empty_8042_uncond 457 458 mov al,0D1h 459 out 064h, al ; Write output port 460 call empty_8042_uncond 461 462 mov al,0DDh ; A20 off 463 out 060h, al 464 call empty_8042_uncond 465 466 mov al,0FFh ; Null command/synchronization 467 out 064h, al 468 call empty_8042_uncond 469 470 ; Wait a bit for it to take effect 471a20d_snooze: 472 push cx 473 mov cx, disable_wait 474.delayloop: call a20_test 475 jz .disabled 476 loop .delayloop 477.disabled: pop cx 478a20d_dunno: 479a20d_none: 480 popad 481 ret 482 483; 484; Routine to empty the 8042 KBC controller. If dl != 0 485; then we will test A20 in the loop and exit if A20 is 486; suddenly enabled. 487; 488empty_8042_uncond: 489 xor dl,dl 490empty_8042: 491 call a20_test 492 jz .a20_on 493 and dl,dl 494 jnz .done 495.a20_on: io_delay 496 in al, 064h ; Status port 497 test al,1 498 jz .no_output 499 io_delay 500 in al, 060h ; Read input 501 jmp short empty_8042 502.no_output: 503 test al,2 504 jnz empty_8042 505 io_delay 506.done: ret 507 508; 509; Execute a WBINVD instruction if possible on this CPU 510; 511%if DO_WBINVD 512try_wbinvd: 513 wbinvd 514 ret 515%endif 516 517 section .bss 518 alignb 4 519PMESP resd 1 ; Protected mode %esp 520 521 section .idt nobits align=4096 522 alignb 4096 523pm_idt resb 4096 ; Protected-mode IDT, followed by interrupt stubs 524 525 526 527 528pm_entry: equ 0x100000 529 530 section .rodata 531 align 2, db 0 532call32_rmidt: 533 dw 0ffffh ; Limit 534 dd 0 ; Address 535 536 section .data 537 alignb 2 538call32_pmidt: 539 dw 8*256 ; Limit 540 dd 0 ; Address (entered later) 541 542 section .text 543; 544; This is the main entrypoint in this function 545; 546init32: 547 mov bx,call32_call_start ; Where to go in PM 548 549; 550; Enter protected mode. BX contains the entry point relative to the 551; real-mode CS. 552; 553call32_enter_pm: 554 mov ax,cs 555 mov ds,ax 556 movzx ebp,ax 557 shl ebp,4 ; EBP <- CS_BASE 558 movzx ebx,bx 559 add ebx,ebp ; entry point += CS_BASE 560 cli 561 mov [SavedSP],sp 562 cld 563 call enable_a20 564 mov byte [call32_gdt+8+5],89h ; Mark TSS unbusy 565 o32 lgdt [call32_gdt] ; Set up GDT 566 o32 lidt [call32_pmidt] ; Set up IDT 567 mov eax,cr0 568 or al,1 569 mov cr0,eax ; Enter protected mode 570 jmp 20h:strict dword .in_pm+CS_BASE 571.pm_jmp equ $-6 572 573 574 bits 32 575.in_pm: 576 xor eax,eax ; Available for future use... 577 mov fs,eax 578 mov gs,eax 579 lldt ax 580 581 mov al,28h ; Set up data segments 582 mov es,eax 583 mov ds,eax 584 mov ss,eax 585 586 mov al,08h 587 ltr ax 588 589 mov esp,[ebp+PMESP] ; Load protmode %esp if available 590 jmp ebx ; Go to where we need to go 591 592; 593; This is invoked before first dispatch of the 32-bit code, in 32-bit mode 594; 595call32_call_start: 596 ; 597 ; Set up a temporary stack in the bounce buffer; 598 ; start32.S will override this to point us to the real 599 ; high-memory stack. 600 ; 601 mov esp, (BOUNCE_SEG << 4) + 0x10000 602 603 push dword call32_enter_rm.rm_jmp+CS_BASE 604 push dword call32_enter_pm.pm_jmp+CS_BASE 605 push dword stack_end ; RM size 606 push dword call32_gdt+CS_BASE 607 push dword call32_handle_interrupt+CS_BASE 608 push dword CS_BASE ; Segment base 609 push dword (BOUNCE_SEG << 4) ; Bounce buffer address 610 push dword call32_syscall+CS_BASE ; Syscall entry point 611 612 call pm_entry-CS_BASE ; Run the program... 613 614 ; ... fall through to call32_exit ... 615 616call32_exit: 617 mov bx,call32_done ; Return to command loop 618 619call32_enter_rm: 620 ; Careful here... the PM code may have relocated the 621 ; entire RM code, so we need to figure out exactly 622 ; where we are executing from. If the PM code has 623 ; relocated us, it *will* have adjusted the GDT to 624 ; match, though. 625 call .here 626.here: pop ebp 627 sub ebp,.here 628 o32 sidt [ebp+call32_pmidt] 629 cli 630 cld 631 mov [ebp+PMESP],esp ; Save exit %esp 632 xor esp,esp ; Make sure the high bits are zero 633 jmp 10h:.in_pm16 ; Return to 16-bit mode first 634 635 bits 16 636.in_pm16: 637 mov ax,18h ; Real-mode-like segment 638 mov es,ax 639 mov ds,ax 640 mov ss,ax 641 mov fs,ax 642 mov gs,ax 643 644 lidt [call32_rmidt] ; Real-mode IDT (rm needs no GDT) 645 mov eax,cr0 646 and al,~1 647 mov cr0,eax 648 jmp MY_CS:.in_rm 649.rm_jmp equ $-2 650 651.in_rm: ; Back in real mode 652 mov ax,cs 653 mov ds,ax 654 mov es,ax 655 mov fs,ax 656 mov gs,ax 657 mov ss,ax 658 mov sp,[SavedSP] ; Restore stack 659 jmp bx ; Go to whereever we need to go... 660 661call32_done: 662 call disable_a20 663 sti 664 ret 665 666; 667; 16-bit support code 668; 669 bits 16 670 671; 672; 16-bit interrupt-handling code 673; 674call32_int_rm: 675 pushf ; Flags on stack 676 push cs ; Return segment 677 push word .cont ; Return address 678 push dword edx ; Segment:offset of IVT entry 679 retf ; Invoke IVT routine 680.cont: ; ... on resume ... 681 mov bx,call32_int_resume 682 jmp call32_enter_pm ; Go back to PM 683 684; 685; 16-bit system call handling code 686; 687call32_sys_rm: 688 pop gs 689 pop fs 690 pop es 691 pop ds 692 popad 693 popfd 694 retf ; Invoke routine 695.return: 696 pushfd 697 pushad 698 push ds 699 push es 700 push fs 701 push gs 702 mov bx,call32_sys_resume 703 jmp call32_enter_pm 704 705; 706; 32-bit support code 707; 708 bits 32 709 710; 711; This is invoked on getting an interrupt in protected mode. At 712; this point, we need to context-switch to real mode and invoke 713; the interrupt routine. 714; 715; When this gets invoked, the registers are saved on the stack and 716; AL contains the register number. 717; 718call32_handle_interrupt: 719 movzx eax,al 720 xor ebx,ebx ; Actually makes the code smaller 721 mov edx,[ebx+eax*4] ; Get the segment:offset of the routine 722 mov bx,call32_int_rm 723 jmp call32_enter_rm ; Go to real mode 724 725call32_int_resume: 726 popad 727 iret 728 729; 730; Syscall invocation. We manifest a structure on the real-mode stack, 731; containing the call32sys_t structure from <call32.h> as well as 732; the following entries (from low to high address): 733; - Target offset 734; - Target segment 735; - Return offset 736; - Return segment (== real mode cs) 737; - Return flags 738; 739call32_syscall: 740 pushfd ; Save IF among other things... 741 pushad ; We only need to save some, but... 742 cld 743 call .here 744.here: pop ebp 745 sub ebp,.here 746 747 movzx edi,word [ebp+SavedSP] 748 sub edi,54 ; Allocate 54 bytes 749 mov [ebp+SavedSP],di 750 add edi,ebp ; Create linear address 751 752 mov esi,[esp+11*4] ; Source regs 753 xor ecx,ecx 754 mov cl,11 ; 44 bytes to copy 755 rep movsd 756 757 movzx eax,byte [esp+10*4] ; Interrupt number 758 ; ecx == 0 here; adding it to the EA makes the 759 ; encoding smaller 760 mov eax,[ecx+eax*4] ; Get IVT entry 761 stosd ; Save in stack frame 762 mov ax,call32_sys_rm.return ; Return offset 763 stosw ; Save in stack frame 764 mov eax,ebp 765 shr eax,4 ; Return segment 766 stosw ; Save in stack frame 767 mov eax,[edi-12] ; Return flags 768 and eax,0x200cd7 ; Mask (potentially) unsafe flags 769 mov [edi-12],eax ; Primary flags entry 770 stosw ; Return flags 771 772 mov bx,call32_sys_rm 773 jmp call32_enter_rm ; Go to real mode 774 775 ; On return, the 44-byte return structure is on the 776 ; real-mode stack. call32_enter_pm will leave ebp 777 ; pointing to the real-mode base. 778call32_sys_resume: 779 movzx esi,word [ebp+SavedSP] 780 mov edi,[esp+12*4] ; Dest regs 781 add esi,ebp ; Create linear address 782 and edi,edi ; NULL pointer? 783 jnz .do_copy 784.no_copy: mov edi,esi ; Do a dummy copy-to-self 785.do_copy: xor ecx,ecx 786 mov cl,11 ; 44 bytes 787 rep movsd ; Copy register block 788 789 add word [ebp+SavedSP],44 ; Remove from stack 790 791 popad 792 popfd 793 ret ; Return to 32-bit program 794