1; z_Windows_NT-586_asm.asm: - microtasking routines specifically 2; written for IA-32 architecture and Intel(R) 64 running Windows* OS 3 4; 5;//===----------------------------------------------------------------------===// 6;// 7;// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8;// See https://llvm.org/LICENSE.txt for license information. 9;// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10;// 11;//===----------------------------------------------------------------------===// 12; 13 14 TITLE z_Windows_NT-586_asm.asm 15 16; ============================= IA-32 architecture ========================== 17ifdef _M_IA32 18 19 .586P 20 21if @Version gt 510 22 .model HUGE 23else 24_TEXT SEGMENT PARA USE32 PUBLIC 'CODE' 25_TEXT ENDS 26_DATA SEGMENT DWORD USE32 PUBLIC 'DATA' 27_DATA ENDS 28CONST SEGMENT DWORD USE32 PUBLIC 'CONST' 29CONST ENDS 30_BSS SEGMENT DWORD USE32 PUBLIC 'BSS' 31_BSS ENDS 32$$SYMBOLS SEGMENT BYTE USE32 'DEBSYM' 33$$SYMBOLS ENDS 34$$TYPES SEGMENT BYTE USE32 'DEBTYP' 35$$TYPES ENDS 36_TLS SEGMENT DWORD USE32 PUBLIC 'TLS' 37_TLS ENDS 38FLAT GROUP _DATA, CONST, _BSS 39 ASSUME CS: FLAT, DS: FLAT, SS: FLAT 40endif 41 42 43;------------------------------------------------------------------------ 44; FUNCTION ___kmp_x86_pause 45; 46; void 47; __kmp_x86_pause( void ) 48PUBLIC ___kmp_x86_pause 49_p$ = 4 50_d$ = 8 51_TEXT SEGMENT 52 ALIGN 16 53___kmp_x86_pause PROC NEAR 54 55 db 0f3H 56 db 090H ;; pause 57 ret 58 59___kmp_x86_pause ENDP 60_TEXT ENDS 61 62;------------------------------------------------------------------------ 63; FUNCTION ___kmp_x86_cpuid 64; 65; void 66; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); 67PUBLIC ___kmp_x86_cpuid 68_TEXT SEGMENT 69 ALIGN 16 70_mode$ = 8 71_mode2$ = 12 72_p$ = 16 73_eax$ = 0 74_ebx$ = 4 75_ecx$ = 8 76_edx$ = 12 77 78___kmp_x86_cpuid PROC NEAR 79 80 push ebp 81 mov ebp, esp 82 83 push edi 84 push ebx 85 push ecx 86 push edx 87 88 mov eax, DWORD PTR _mode$[ebp] 89 mov ecx, DWORD PTR _mode2$[ebp] 90 cpuid ; Query the CPUID for the current processor 91 92 mov edi, DWORD PTR _p$[ebp] 93 mov DWORD PTR _eax$[ edi ], eax 94 mov DWORD PTR _ebx$[ edi ], ebx 95 mov DWORD PTR _ecx$[ edi ], ecx 96 mov DWORD PTR _edx$[ edi ], edx 97 98 pop edx 99 pop ecx 100 pop ebx 101 pop edi 102 103 mov esp, ebp 104 pop ebp 105 ret 106 107___kmp_x86_cpuid ENDP 108_TEXT ENDS 109 110;------------------------------------------------------------------------ 111; FUNCTION ___kmp_test_then_add32 112; 113; kmp_int32 114; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 115PUBLIC ___kmp_test_then_add32 116_p$ = 4 117_d$ = 8 118_TEXT SEGMENT 119 ALIGN 16 120___kmp_test_then_add32 PROC NEAR 121 122 mov eax, DWORD PTR _d$[esp] 123 mov ecx, DWORD PTR _p$[esp] 124lock xadd DWORD PTR [ecx], eax 125 ret 126 127___kmp_test_then_add32 ENDP 128_TEXT ENDS 129 130;------------------------------------------------------------------------ 131; FUNCTION ___kmp_compare_and_store8 132; 133; kmp_int8 134; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 135PUBLIC ___kmp_compare_and_store8 136_TEXT SEGMENT 137 ALIGN 16 138_p$ = 4 139_cv$ = 8 140_sv$ = 12 141 142___kmp_compare_and_store8 PROC NEAR 143 144 mov ecx, DWORD PTR _p$[esp] 145 mov al, BYTE PTR _cv$[esp] 146 mov dl, BYTE PTR _sv$[esp] 147lock cmpxchg BYTE PTR [ecx], dl 148 sete al ; if al == [ecx] set al = 1 else set al = 0 149 and eax, 1 ; sign extend previous instruction 150 ret 151 152___kmp_compare_and_store8 ENDP 153_TEXT ENDS 154 155;------------------------------------------------------------------------ 156; FUNCTION ___kmp_compare_and_store16 157; 158; kmp_int16 159; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 160PUBLIC ___kmp_compare_and_store16 161_TEXT SEGMENT 162 ALIGN 16 163_p$ = 4 164_cv$ = 8 165_sv$ = 12 166 167___kmp_compare_and_store16 PROC NEAR 168 169 mov ecx, DWORD PTR _p$[esp] 170 mov ax, WORD PTR _cv$[esp] 171 mov dx, WORD PTR _sv$[esp] 172lock cmpxchg WORD PTR [ecx], dx 173 sete al ; if ax == [ecx] set al = 1 else set al = 0 174 and eax, 1 ; sign extend previous instruction 175 ret 176 177___kmp_compare_and_store16 ENDP 178_TEXT ENDS 179 180;------------------------------------------------------------------------ 181; FUNCTION ___kmp_compare_and_store32 182; 183; kmp_int32 184; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 185PUBLIC ___kmp_compare_and_store32 186_TEXT SEGMENT 187 ALIGN 16 188_p$ = 4 189_cv$ = 8 190_sv$ = 12 191 192___kmp_compare_and_store32 PROC NEAR 193 194 mov ecx, DWORD PTR _p$[esp] 195 mov eax, DWORD PTR _cv$[esp] 196 mov edx, DWORD PTR _sv$[esp] 197lock cmpxchg DWORD PTR [ecx], edx 198 sete al ; if eax == [ecx] set al = 1 else set al = 0 199 and eax, 1 ; sign extend previous instruction 200 ret 201 202___kmp_compare_and_store32 ENDP 203_TEXT ENDS 204 205;------------------------------------------------------------------------ 206; FUNCTION ___kmp_compare_and_store64 207; 208; kmp_int32 209; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 210PUBLIC ___kmp_compare_and_store64 211_TEXT SEGMENT 212 ALIGN 16 213_p$ = 8 214_cv_low$ = 12 215_cv_high$ = 16 216_sv_low$ = 20 217_sv_high$ = 24 218 219___kmp_compare_and_store64 PROC NEAR 220 221 push ebp 222 mov ebp, esp 223 push ebx 224 push edi 225 mov edi, DWORD PTR _p$[ebp] 226 mov eax, DWORD PTR _cv_low$[ebp] 227 mov edx, DWORD PTR _cv_high$[ebp] 228 mov ebx, DWORD PTR _sv_low$[ebp] 229 mov ecx, DWORD PTR _sv_high$[ebp] 230lock cmpxchg8b QWORD PTR [edi] 231 sete al ; if edx:eax == [edi] set al = 1 else set al = 0 232 and eax, 1 ; sign extend previous instruction 233 pop edi 234 pop ebx 235 mov esp, ebp 236 pop ebp 237 ret 238 239___kmp_compare_and_store64 ENDP 240_TEXT ENDS 241 242;------------------------------------------------------------------------ 243; FUNCTION ___kmp_xchg_fixed8 244; 245; kmp_int8 246; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 247PUBLIC ___kmp_xchg_fixed8 248_TEXT SEGMENT 249 ALIGN 16 250_p$ = 4 251_d$ = 8 252 253___kmp_xchg_fixed8 PROC NEAR 254 255 mov ecx, DWORD PTR _p$[esp] 256 mov al, BYTE PTR _d$[esp] 257lock xchg BYTE PTR [ecx], al 258 ret 259 260___kmp_xchg_fixed8 ENDP 261_TEXT ENDS 262 263;------------------------------------------------------------------------ 264; FUNCTION ___kmp_xchg_fixed16 265; 266; kmp_int16 267; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 268PUBLIC ___kmp_xchg_fixed16 269_TEXT SEGMENT 270 ALIGN 16 271_p$ = 4 272_d$ = 8 273 274___kmp_xchg_fixed16 PROC NEAR 275 276 mov ecx, DWORD PTR _p$[esp] 277 mov ax, WORD PTR _d$[esp] 278lock xchg WORD PTR [ecx], ax 279 ret 280 281___kmp_xchg_fixed16 ENDP 282_TEXT ENDS 283 284;------------------------------------------------------------------------ 285; FUNCTION ___kmp_xchg_fixed32 286; 287; kmp_int32 288; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 289PUBLIC ___kmp_xchg_fixed32 290_TEXT SEGMENT 291 ALIGN 16 292_p$ = 4 293_d$ = 8 294 295___kmp_xchg_fixed32 PROC NEAR 296 297 mov ecx, DWORD PTR _p$[esp] 298 mov eax, DWORD PTR _d$[esp] 299lock xchg DWORD PTR [ecx], eax 300 ret 301 302___kmp_xchg_fixed32 ENDP 303_TEXT ENDS 304 305 306;------------------------------------------------------------------------ 307; FUNCTION ___kmp_xchg_real32 308; 309; kmp_real32 310; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d ); 311PUBLIC ___kmp_xchg_real32 312_TEXT SEGMENT 313 ALIGN 16 314_p$ = 8 315_d$ = 12 316_old_value$ = -4 317 318___kmp_xchg_real32 PROC NEAR 319 320 push ebp 321 mov ebp, esp 322 sub esp, 4 323 push esi 324 mov esi, DWORD PTR _p$[ebp] 325 326 fld DWORD PTR [esi] 327 ;; load <addr> 328 fst DWORD PTR _old_value$[ebp] 329 ;; store into old_value 330 331 mov eax, DWORD PTR _d$[ebp] 332 333lock xchg DWORD PTR [esi], eax 334 335 fld DWORD PTR _old_value$[ebp] 336 ;; return old_value 337 pop esi 338 mov esp, ebp 339 pop ebp 340 ret 341 342___kmp_xchg_real32 ENDP 343_TEXT ENDS 344 345 346;------------------------------------------------------------------------ 347; FUNCTION ___kmp_compare_and_store_ret8 348; 349; kmp_int8 350; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 351PUBLIC ___kmp_compare_and_store_ret8 352_TEXT SEGMENT 353 ALIGN 16 354_p$ = 4 355_cv$ = 8 356_sv$ = 12 357 358___kmp_compare_and_store_ret8 PROC NEAR 359 360 mov ecx, DWORD PTR _p$[esp] 361 mov al, BYTE PTR _cv$[esp] 362 mov dl, BYTE PTR _sv$[esp] 363lock cmpxchg BYTE PTR [ecx], dl 364 ret 365 366___kmp_compare_and_store_ret8 ENDP 367_TEXT ENDS 368 369;------------------------------------------------------------------------ 370; FUNCTION ___kmp_compare_and_store_ret16 371; 372; kmp_int16 373; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 374PUBLIC ___kmp_compare_and_store_ret16 375_TEXT SEGMENT 376 ALIGN 16 377_p$ = 4 378_cv$ = 8 379_sv$ = 12 380 381___kmp_compare_and_store_ret16 PROC NEAR 382 383 mov ecx, DWORD PTR _p$[esp] 384 mov ax, WORD PTR _cv$[esp] 385 mov dx, WORD PTR _sv$[esp] 386lock cmpxchg WORD PTR [ecx], dx 387 ret 388 389___kmp_compare_and_store_ret16 ENDP 390_TEXT ENDS 391 392;------------------------------------------------------------------------ 393; FUNCTION ___kmp_compare_and_store_ret32 394; 395; kmp_int32 396; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 397PUBLIC ___kmp_compare_and_store_ret32 398_TEXT SEGMENT 399 ALIGN 16 400_p$ = 4 401_cv$ = 8 402_sv$ = 12 403 404___kmp_compare_and_store_ret32 PROC NEAR 405 406 mov ecx, DWORD PTR _p$[esp] 407 mov eax, DWORD PTR _cv$[esp] 408 mov edx, DWORD PTR _sv$[esp] 409lock cmpxchg DWORD PTR [ecx], edx 410 ret 411 412___kmp_compare_and_store_ret32 ENDP 413_TEXT ENDS 414 415;------------------------------------------------------------------------ 416; FUNCTION ___kmp_compare_and_store_ret64 417; 418; kmp_int64 419; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 420PUBLIC ___kmp_compare_and_store_ret64 421_TEXT SEGMENT 422 ALIGN 16 423_p$ = 8 424_cv_low$ = 12 425_cv_high$ = 16 426_sv_low$ = 20 427_sv_high$ = 24 428 429___kmp_compare_and_store_ret64 PROC NEAR 430 431 push ebp 432 mov ebp, esp 433 push ebx 434 push edi 435 mov edi, DWORD PTR _p$[ebp] 436 mov eax, DWORD PTR _cv_low$[ebp] 437 mov edx, DWORD PTR _cv_high$[ebp] 438 mov ebx, DWORD PTR _sv_low$[ebp] 439 mov ecx, DWORD PTR _sv_high$[ebp] 440lock cmpxchg8b QWORD PTR [edi] 441 pop edi 442 pop ebx 443 mov esp, ebp 444 pop ebp 445 ret 446 447___kmp_compare_and_store_ret64 ENDP 448_TEXT ENDS 449 450;------------------------------------------------------------------------ 451; FUNCTION ___kmp_load_x87_fpu_control_word 452; 453; void 454; __kmp_load_x87_fpu_control_word( kmp_int16 *p ); 455; 456; parameters: 457; p: 4(%esp) 458PUBLIC ___kmp_load_x87_fpu_control_word 459_TEXT SEGMENT 460 ALIGN 16 461_p$ = 4 462 463___kmp_load_x87_fpu_control_word PROC NEAR 464 465 mov eax, DWORD PTR _p$[esp] 466 fldcw WORD PTR [eax] 467 ret 468 469___kmp_load_x87_fpu_control_word ENDP 470_TEXT ENDS 471 472;------------------------------------------------------------------------ 473; FUNCTION ___kmp_store_x87_fpu_control_word 474; 475; void 476; __kmp_store_x87_fpu_control_word( kmp_int16 *p ); 477; 478; parameters: 479; p: 4(%esp) 480PUBLIC ___kmp_store_x87_fpu_control_word 481_TEXT SEGMENT 482 ALIGN 16 483_p$ = 4 484 485___kmp_store_x87_fpu_control_word PROC NEAR 486 487 mov eax, DWORD PTR _p$[esp] 488 fstcw WORD PTR [eax] 489 ret 490 491___kmp_store_x87_fpu_control_word ENDP 492_TEXT ENDS 493 494;------------------------------------------------------------------------ 495; FUNCTION ___kmp_clear_x87_fpu_status_word 496; 497; void 498; __kmp_clear_x87_fpu_status_word(); 499PUBLIC ___kmp_clear_x87_fpu_status_word 500_TEXT SEGMENT 501 ALIGN 16 502 503___kmp_clear_x87_fpu_status_word PROC NEAR 504 505 fnclex 506 ret 507 508___kmp_clear_x87_fpu_status_word ENDP 509_TEXT ENDS 510 511 512;------------------------------------------------------------------------ 513; FUNCTION ___kmp_invoke_microtask 514; 515; typedef void (*microtask_t)( int *gtid, int *tid, ... ); 516; 517; int 518; __kmp_invoke_microtask( microtask_t pkfn, 519; int gtid, int tid, 520; int argc, void *p_argv[] ) 521PUBLIC ___kmp_invoke_microtask 522_TEXT SEGMENT 523 ALIGN 16 524_pkfn$ = 8 525_gtid$ = 12 526_tid$ = 16 527_argc$ = 20 528_argv$ = 24 529if OMPT_SUPPORT 530_exit_frame$ = 28 531endif 532_i$ = -8 533_stk_adj$ = -16 534_vptr$ = -12 535_qptr$ = -4 536 537___kmp_invoke_microtask PROC NEAR 538; Line 102 539 push ebp 540 mov ebp, esp 541 sub esp, 16 ; 00000010H 542 push ebx 543 push esi 544 push edi 545if OMPT_SUPPORT 546 mov eax, DWORD PTR _exit_frame$[ebp] 547 mov DWORD PTR [eax], ebp 548endif 549; Line 114 550 mov eax, DWORD PTR _argc$[ebp] 551 mov DWORD PTR _i$[ebp], eax 552 553;; ------------------------------------------------------------ 554 lea edx, DWORD PTR [eax*4+8] 555 mov ecx, esp ; Save current SP into ECX 556 mov eax,edx ; Save the size of the args in eax 557 sub ecx,edx ; esp-((#args+2)*4) -> ecx -- without mods, stack ptr would be this 558 mov edx,ecx ; Save to edx 559 and ecx,-128 ; Mask off 7 bits 560 sub edx,ecx ; Amount to subtract from esp 561 sub esp,edx ; Prepare stack ptr-- Now it will be aligned on 128-byte boundary at the call 562 563 add edx,eax ; Calculate total size of the stack decrement. 564 mov DWORD PTR _stk_adj$[ebp], edx 565;; ------------------------------------------------------------ 566 567 jmp SHORT $L22237 568$L22238: 569 mov ecx, DWORD PTR _i$[ebp] 570 sub ecx, 1 571 mov DWORD PTR _i$[ebp], ecx 572$L22237: 573 cmp DWORD PTR _i$[ebp], 0 574 jle SHORT $L22239 575; Line 116 576 mov edx, DWORD PTR _i$[ebp] 577 mov eax, DWORD PTR _argv$[ebp] 578 mov ecx, DWORD PTR [eax+edx*4-4] 579 mov DWORD PTR _vptr$[ebp], ecx 580; Line 123 581 mov eax, DWORD PTR _vptr$[ebp] 582; Line 124 583 push eax 584; Line 127 585 jmp SHORT $L22238 586$L22239: 587; Line 129 588 lea edx, DWORD PTR _tid$[ebp] 589 mov DWORD PTR _vptr$[ebp], edx 590; Line 130 591 lea eax, DWORD PTR _gtid$[ebp] 592 mov DWORD PTR _qptr$[ebp], eax 593; Line 143 594 mov eax, DWORD PTR _vptr$[ebp] 595; Line 144 596 push eax 597; Line 145 598 mov eax, DWORD PTR _qptr$[ebp] 599; Line 146 600 push eax 601; Line 147 602 call DWORD PTR _pkfn$[ebp] 603; Line 148 604 add esp, DWORD PTR _stk_adj$[ebp] 605; Line 152 606 mov eax, 1 607; Line 153 608 pop edi 609 pop esi 610 pop ebx 611 mov esp, ebp 612 pop ebp 613 ret 0 614___kmp_invoke_microtask ENDP 615_TEXT ENDS 616 617endif 618 619; ==================================== Intel(R) 64 =================================== 620 621ifdef _M_AMD64 622 623;------------------------------------------------------------------------ 624; FUNCTION __kmp_x86_cpuid 625; 626; void 627; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); 628; 629; parameters: 630; mode: ecx 631; mode2: edx 632; cpuid_buffer: r8 633PUBLIC __kmp_x86_cpuid 634_TEXT SEGMENT 635 ALIGN 16 636 637__kmp_x86_cpuid PROC FRAME ;NEAR 638 639 push rbp 640 .pushreg rbp 641 mov rbp, rsp 642 .setframe rbp, 0 643 push rbx ; callee-save register 644 .pushreg rbx 645 .ENDPROLOG 646 647 mov r10, r8 ; p parameter 648 mov eax, ecx ; mode parameter 649 mov ecx, edx ; mode2 parameter 650 cpuid ; Query the CPUID for the current processor 651 652 mov DWORD PTR 0[ r10 ], eax ; store results into buffer 653 mov DWORD PTR 4[ r10 ], ebx 654 mov DWORD PTR 8[ r10 ], ecx 655 mov DWORD PTR 12[ r10 ], edx 656 657 pop rbx ; callee-save register 658 mov rsp, rbp 659 pop rbp 660 ret 661 662__kmp_x86_cpuid ENDP 663_TEXT ENDS 664 665 666;------------------------------------------------------------------------ 667; FUNCTION __kmp_test_then_add32 668; 669; kmp_int32 670; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 671; 672; parameters: 673; p: rcx 674; d: edx 675; 676; return: eax 677PUBLIC __kmp_test_then_add32 678_TEXT SEGMENT 679 ALIGN 16 680__kmp_test_then_add32 PROC ;NEAR 681 682 mov eax, edx 683lock xadd DWORD PTR [rcx], eax 684 ret 685 686__kmp_test_then_add32 ENDP 687_TEXT ENDS 688 689 690;------------------------------------------------------------------------ 691; FUNCTION __kmp_test_then_add64 692; 693; kmp_int32 694; __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); 695; 696; parameters: 697; p: rcx 698; d: rdx 699; 700; return: rax 701PUBLIC __kmp_test_then_add64 702_TEXT SEGMENT 703 ALIGN 16 704__kmp_test_then_add64 PROC ;NEAR 705 706 mov rax, rdx 707lock xadd QWORD PTR [rcx], rax 708 ret 709 710__kmp_test_then_add64 ENDP 711_TEXT ENDS 712 713 714;------------------------------------------------------------------------ 715; FUNCTION __kmp_compare_and_store8 716; 717; kmp_int8 718; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 719; parameters: 720; p: rcx 721; cv: edx 722; sv: r8d 723; 724; return: eax 725PUBLIC __kmp_compare_and_store8 726_TEXT SEGMENT 727 ALIGN 16 728 729__kmp_compare_and_store8 PROC ;NEAR 730 731 mov al, dl ; "cv" 732 mov edx, r8d ; "sv" 733lock cmpxchg BYTE PTR [rcx], dl 734 sete al ; if al == [rcx] set al = 1 else set al = 0 735 and rax, 1 ; sign extend previous instruction 736 ret 737 738__kmp_compare_and_store8 ENDP 739_TEXT ENDS 740 741 742;------------------------------------------------------------------------ 743; FUNCTION __kmp_compare_and_store16 744; 745; kmp_int16 746; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 747; parameters: 748; p: rcx 749; cv: edx 750; sv: r8d 751; 752; return: eax 753PUBLIC __kmp_compare_and_store16 754_TEXT SEGMENT 755 ALIGN 16 756 757__kmp_compare_and_store16 PROC ;NEAR 758 759 mov ax, dx ; "cv" 760 mov edx, r8d ; "sv" 761lock cmpxchg WORD PTR [rcx], dx 762 sete al ; if ax == [rcx] set al = 1 else set al = 0 763 and rax, 1 ; sign extend previous instruction 764 ret 765 766__kmp_compare_and_store16 ENDP 767_TEXT ENDS 768 769 770;------------------------------------------------------------------------ 771; FUNCTION __kmp_compare_and_store32 772; 773; kmp_int32 774; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 775; parameters: 776; p: rcx 777; cv: edx 778; sv: r8d 779; 780; return: eax 781PUBLIC __kmp_compare_and_store32 782_TEXT SEGMENT 783 ALIGN 16 784 785__kmp_compare_and_store32 PROC ;NEAR 786 787 mov eax, edx ; "cv" 788 mov edx, r8d ; "sv" 789lock cmpxchg DWORD PTR [rcx], edx 790 sete al ; if eax == [rcx] set al = 1 else set al = 0 791 and rax, 1 ; sign extend previous instruction 792 ret 793 794__kmp_compare_and_store32 ENDP 795_TEXT ENDS 796 797 798;------------------------------------------------------------------------ 799; FUNCTION __kmp_compare_and_store64 800; 801; kmp_int32 802; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 803; parameters: 804; p: rcx 805; cv: rdx 806; sv: r8 807; 808; return: eax 809PUBLIC __kmp_compare_and_store64 810_TEXT SEGMENT 811 ALIGN 16 812 813__kmp_compare_and_store64 PROC ;NEAR 814 815 mov rax, rdx ; "cv" 816 mov rdx, r8 ; "sv" 817lock cmpxchg QWORD PTR [rcx], rdx 818 sete al ; if rax == [rcx] set al = 1 else set al = 0 819 and rax, 1 ; sign extend previous instruction 820 ret 821 822__kmp_compare_and_store64 ENDP 823_TEXT ENDS 824 825 826;------------------------------------------------------------------------ 827; FUNCTION ___kmp_xchg_fixed8 828; 829; kmp_int8 830; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 831; 832; parameters: 833; p: rcx 834; d: dl 835; 836; return: al 837PUBLIC __kmp_xchg_fixed8 838_TEXT SEGMENT 839 ALIGN 16 840 841__kmp_xchg_fixed8 PROC ;NEAR 842 843 mov al, dl 844lock xchg BYTE PTR [rcx], al 845 ret 846 847__kmp_xchg_fixed8 ENDP 848_TEXT ENDS 849 850 851;------------------------------------------------------------------------ 852; FUNCTION ___kmp_xchg_fixed16 853; 854; kmp_int16 855; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 856; 857; parameters: 858; p: rcx 859; d: dx 860; 861; return: ax 862PUBLIC __kmp_xchg_fixed16 863_TEXT SEGMENT 864 ALIGN 16 865 866__kmp_xchg_fixed16 PROC ;NEAR 867 868 mov ax, dx 869lock xchg WORD PTR [rcx], ax 870 ret 871 872__kmp_xchg_fixed16 ENDP 873_TEXT ENDS 874 875 876;------------------------------------------------------------------------ 877; FUNCTION ___kmp_xchg_fixed32 878; 879; kmp_int32 880; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 881; 882; parameters: 883; p: rcx 884; d: edx 885; 886; return: eax 887PUBLIC __kmp_xchg_fixed32 888_TEXT SEGMENT 889 ALIGN 16 890__kmp_xchg_fixed32 PROC ;NEAR 891 892 mov eax, edx 893lock xchg DWORD PTR [rcx], eax 894 ret 895 896__kmp_xchg_fixed32 ENDP 897_TEXT ENDS 898 899 900;------------------------------------------------------------------------ 901; FUNCTION ___kmp_xchg_fixed64 902; 903; kmp_int64 904; __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); 905; 906; parameters: 907; p: rcx 908; d: rdx 909; 910; return: rax 911PUBLIC __kmp_xchg_fixed64 912_TEXT SEGMENT 913 ALIGN 16 914__kmp_xchg_fixed64 PROC ;NEAR 915 916 mov rax, rdx 917lock xchg QWORD PTR [rcx], rax 918 ret 919 920__kmp_xchg_fixed64 ENDP 921_TEXT ENDS 922 923 924;------------------------------------------------------------------------ 925; FUNCTION __kmp_compare_and_store_ret8 926; 927; kmp_int8 928; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 929; parameters: 930; p: rcx 931; cv: edx 932; sv: r8d 933; 934; return: eax 935PUBLIC __kmp_compare_and_store_ret8 936_TEXT SEGMENT 937 ALIGN 16 938 939__kmp_compare_and_store_ret8 PROC ;NEAR 940 mov al, dl ; "cv" 941 mov edx, r8d ; "sv" 942lock cmpxchg BYTE PTR [rcx], dl 943 ; Compare AL with [rcx]. If equal set 944 ; ZF and exchange DL with [rcx]. Else, clear 945 ; ZF and load [rcx] into AL. 946 ret 947 948__kmp_compare_and_store_ret8 ENDP 949_TEXT ENDS 950 951 952;------------------------------------------------------------------------ 953; FUNCTION __kmp_compare_and_store_ret16 954; 955; kmp_int16 956; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 957; parameters: 958; p: rcx 959; cv: edx 960; sv: r8d 961; 962; return: eax 963PUBLIC __kmp_compare_and_store_ret16 964_TEXT SEGMENT 965 ALIGN 16 966 967__kmp_compare_and_store_ret16 PROC ;NEAR 968 969 mov ax, dx ; "cv" 970 mov edx, r8d ; "sv" 971lock cmpxchg WORD PTR [rcx], dx 972 ret 973 974__kmp_compare_and_store_ret16 ENDP 975_TEXT ENDS 976 977 978;------------------------------------------------------------------------ 979; FUNCTION __kmp_compare_and_store_ret32 980; 981; kmp_int32 982; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 983; parameters: 984; p: rcx 985; cv: edx 986; sv: r8d 987; 988; return: eax 989PUBLIC __kmp_compare_and_store_ret32 990_TEXT SEGMENT 991 ALIGN 16 992 993__kmp_compare_and_store_ret32 PROC ;NEAR 994 995 mov eax, edx ; "cv" 996 mov edx, r8d ; "sv" 997lock cmpxchg DWORD PTR [rcx], edx 998 ret 999 1000__kmp_compare_and_store_ret32 ENDP 1001_TEXT ENDS 1002 1003 1004;------------------------------------------------------------------------ 1005; FUNCTION __kmp_compare_and_store_ret64 1006; 1007; kmp_int64 1008; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 1009; parameters: 1010; p: rcx 1011; cv: rdx 1012; sv: r8 1013; 1014; return: rax 1015PUBLIC __kmp_compare_and_store_ret64 1016_TEXT SEGMENT 1017 ALIGN 16 1018 1019__kmp_compare_and_store_ret64 PROC ;NEAR 1020 1021 mov rax, rdx ; "cv" 1022 mov rdx, r8 ; "sv" 1023lock cmpxchg QWORD PTR [rcx], rdx 1024 ret 1025 1026__kmp_compare_and_store_ret64 ENDP 1027_TEXT ENDS 1028 1029 1030;------------------------------------------------------------------------ 1031; FUNCTION __kmp_compare_and_store_loop8 1032; 1033; kmp_int8 1034; __kmp_compare_and_store_loop8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 1035; parameters: 1036; p: rcx 1037; cv: edx 1038; sv: r8d 1039; 1040; return: al 1041PUBLIC __kmp_compare_and_store_loop8 1042_TEXT SEGMENT 1043 ALIGN 16 1044 1045__kmp_compare_and_store_loop8 PROC ;NEAR 1046$__kmp_loop: 1047 mov al, dl ; "cv" 1048 mov edx, r8d ; "sv" 1049lock cmpxchg BYTE PTR [rcx], dl 1050 ; Compare AL with [rcx]. If equal set 1051 ; ZF and exchange DL with [rcx]. Else, clear 1052 ; ZF and load [rcx] into AL. 1053 jz SHORT $__kmp_success 1054 1055 db 0f3H 1056 db 090H ; pause 1057 1058 jmp SHORT $__kmp_loop 1059 1060$__kmp_success: 1061 ret 1062 1063__kmp_compare_and_store_loop8 ENDP 1064_TEXT ENDS 1065 1066 1067;------------------------------------------------------------------------ 1068; FUNCTION __kmp_xchg_real32 1069; 1070; kmp_real32 1071; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d ); 1072; 1073; parameters: 1074; p: rcx 1075; d: xmm1 (lower 4 bytes) 1076; 1077; return: xmm0 (lower 4 bytes) 1078PUBLIC __kmp_xchg_real32 1079_TEXT SEGMENT 1080 ALIGN 16 1081__kmp_xchg_real32 PROC ;NEAR 1082 1083 movd eax, xmm1 ; load d 1084 1085lock xchg DWORD PTR [rcx], eax 1086 1087 movd xmm0, eax ; load old value into return register 1088 ret 1089 1090__kmp_xchg_real32 ENDP 1091_TEXT ENDS 1092 1093 1094;------------------------------------------------------------------------ 1095; FUNCTION __kmp_xchg_real64 1096; 1097; kmp_real64 1098; __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 d ); 1099; 1100; parameters: 1101; p: rcx 1102; d: xmm1 (lower 8 bytes) 1103; 1104; return: xmm0 (lower 8 bytes) 1105PUBLIC __kmp_xchg_real64 1106_TEXT SEGMENT 1107 ALIGN 16 1108__kmp_xchg_real64 PROC ;NEAR 1109 1110 movd rax, xmm1 ; load "d" 1111 1112lock xchg QWORD PTR [rcx], rax 1113 1114 movd xmm0, rax ; load old value into return register 1115 ret 1116 1117__kmp_xchg_real64 ENDP 1118_TEXT ENDS 1119 1120;------------------------------------------------------------------------ 1121; FUNCTION __kmp_load_x87_fpu_control_word 1122; 1123; void 1124; __kmp_load_x87_fpu_control_word( kmp_int16 *p ); 1125; 1126; parameters: 1127; p: rcx 1128PUBLIC __kmp_load_x87_fpu_control_word 1129_TEXT SEGMENT 1130 ALIGN 16 1131__kmp_load_x87_fpu_control_word PROC ;NEAR 1132 1133 fldcw WORD PTR [rcx] 1134 ret 1135 1136__kmp_load_x87_fpu_control_word ENDP 1137_TEXT ENDS 1138 1139 1140;------------------------------------------------------------------------ 1141; FUNCTION __kmp_store_x87_fpu_control_word 1142; 1143; void 1144; __kmp_store_x87_fpu_control_word( kmp_int16 *p ); 1145; 1146; parameters: 1147; p: rcx 1148PUBLIC __kmp_store_x87_fpu_control_word 1149_TEXT SEGMENT 1150 ALIGN 16 1151__kmp_store_x87_fpu_control_word PROC ;NEAR 1152 1153 fstcw WORD PTR [rcx] 1154 ret 1155 1156__kmp_store_x87_fpu_control_word ENDP 1157_TEXT ENDS 1158 1159 1160;------------------------------------------------------------------------ 1161; FUNCTION __kmp_clear_x87_fpu_status_word 1162; 1163; void 1164; __kmp_clear_x87_fpu_status_word() 1165PUBLIC __kmp_clear_x87_fpu_status_word 1166_TEXT SEGMENT 1167 ALIGN 16 1168__kmp_clear_x87_fpu_status_word PROC ;NEAR 1169 1170 fnclex 1171 ret 1172 1173__kmp_clear_x87_fpu_status_word ENDP 1174_TEXT ENDS 1175 1176 1177;------------------------------------------------------------------------ 1178; FUNCTION __kmp_invoke_microtask 1179; 1180; typedef void (*microtask_t)( int *gtid, int *tid, ... ); 1181; 1182; int 1183; __kmp_invoke_microtask( microtask_t pkfn, 1184; int gtid, int tid, 1185; int argc, void *p_argv[] ) { 1186; 1187; (*pkfn) ( >id, &tid, argv[0], ... ); 1188; return 1; 1189; } 1190; 1191; note: 1192; just before call to pkfn must have rsp 128-byte aligned for compiler 1193; 1194; parameters: 1195; rcx: pkfn 16[rbp] 1196; edx: gtid 24[rbp] 1197; r8d: tid 32[rbp] 1198; r9d: argc 40[rbp] 1199; [st]: p_argv 48[rbp] 1200; 1201; reg temps: 1202; rax: used all over the place 1203; rdx: used all over the place 1204; rcx: used as argument counter for push parms loop 1205; r10: used to hold pkfn function pointer argument 1206; 1207; return: eax (always 1/TRUE) 1208$_pkfn = 16 1209$_gtid = 24 1210$_tid = 32 1211$_argc = 40 1212$_p_argv = 48 1213if OMPT_SUPPORT 1214$_exit_frame = 56 1215endif 1216 1217PUBLIC __kmp_invoke_microtask 1218_TEXT SEGMENT 1219 ALIGN 16 1220 1221__kmp_invoke_microtask PROC FRAME ;NEAR 1222 mov QWORD PTR 16[rsp], rdx ; home gtid parameter 1223 mov QWORD PTR 24[rsp], r8 ; home tid parameter 1224 push rbp ; save base pointer 1225 .pushreg rbp 1226 sub rsp, 0 ; no fixed allocation necessary - end prolog 1227 1228 lea rbp, QWORD PTR [rsp] ; establish the base pointer 1229 .setframe rbp, 0 1230 .ENDPROLOG 1231if OMPT_SUPPORT 1232 mov rax, QWORD PTR $_exit_frame[rbp] 1233 mov QWORD PTR [rax], rbp 1234endif 1235 mov r10, rcx ; save pkfn pointer for later 1236 1237;; ------------------------------------------------------------ 1238 mov rax, r9 ; rax <= argc 1239 cmp rax, 2 1240 jge SHORT $_kmp_invoke_stack_align 1241 mov rax, 2 ; set 4 homes if less than 2 parms 1242$_kmp_invoke_stack_align: 1243 lea rdx, QWORD PTR [rax*8+16] ; rax <= (argc + 2) * 8 1244 mov rax, rsp ; Save current SP into rax 1245 sub rax, rdx ; rsp - ((argc+2)*8) -> rax 1246 ; without align, rsp would be this 1247 and rax, -128 ; Mask off 7 bits (128-byte align) 1248 add rax, rdx ; add space for push's in a loop below 1249 mov rsp, rax ; Prepare the stack ptr 1250 ; Now it will align to 128-byte at the call 1251;; ------------------------------------------------------------ 1252 ; setup pkfn parameter stack 1253 mov rax, r9 ; rax <= argc 1254 shl rax, 3 ; rax <= argc*8 1255 mov rdx, QWORD PTR $_p_argv[rbp] ; rdx <= p_argv 1256 add rdx, rax ; rdx <= &p_argv[argc] 1257 mov rcx, r9 ; rcx <= argc 1258 jecxz SHORT $_kmp_invoke_pass_parms ; nothing to push if argc=0 1259 cmp ecx, 1 ; if argc=1 branch ahead 1260 je SHORT $_kmp_invoke_one_parm 1261 sub ecx, 2 ; if argc=2 branch ahead, subtract two from 1262 je SHORT $_kmp_invoke_two_parms 1263 1264$_kmp_invoke_push_parms: ; push last - 5th parms to pkfn on stack 1265 sub rdx, 8 ; decrement p_argv pointer to previous parm 1266 mov r8, QWORD PTR [rdx] ; r8 <= p_argv[rcx-1] 1267 push r8 ; push p_argv[rcx-1] onto stack (reverse order) 1268 sub ecx, 1 1269 jecxz SHORT $_kmp_invoke_two_parms 1270 jmp SHORT $_kmp_invoke_push_parms 1271 1272$_kmp_invoke_two_parms: 1273 sub rdx, 8 ; put 4th parm to pkfn in r9 1274 mov r9, QWORD PTR [rdx] ; r9 <= p_argv[1] 1275 1276$_kmp_invoke_one_parm: 1277 sub rdx, 8 ; put 3rd parm to pkfn in r8 1278 mov r8, QWORD PTR [rdx] ; r8 <= p_argv[0] 1279 1280$_kmp_invoke_pass_parms: ; put 1st & 2nd parms to pkfn in registers 1281 lea rdx, QWORD PTR $_tid[rbp] ; rdx <= &tid (2nd parm to pkfn) 1282 lea rcx, QWORD PTR $_gtid[rbp] ; rcx <= >id (1st parm to pkfn) 1283 sub rsp, 32 ; add stack space for first four parms 1284 mov rax, r10 ; rax <= pkfn 1285 call rax ; call (*pkfn)() 1286 mov rax, 1 ; move 1 into return register; 1287 1288 lea rsp, QWORD PTR [rbp] ; restore stack pointer 1289 1290; add rsp, 0 ; no fixed allocation necessary - start epilog 1291 pop rbp ; restore frame pointer 1292 ret 1293__kmp_invoke_microtask ENDP 1294_TEXT ENDS 1295 1296endif 1297 1298END 1299