1%ifidn __OUTPUT_FORMAT__,obj 2section code use32 class=code align=64 3%elifidn __OUTPUT_FORMAT__,win32 4%ifdef __YASM_VERSION_ID__ 5%if __YASM_VERSION_ID__ < 01010000h 6%error yasm version 1.1.0 or later needed. 7%endif 8; Yasm automatically includes .00 and complains about redefining it. 9; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html 10%else 11$@feat.00 equ 1 12%endif 13section .text code align=64 14%else 15section .text code 16%endif 17align 64 18L$_vpaes_consts: 19dd 218628480,235210255,168496130,67568393 20dd 252381056,17041926,33884169,51187212 21dd 252645135,252645135,252645135,252645135 22dd 1512730624,3266504856,1377990664,3401244816 23dd 830229760,1275146365,2969422977,3447763452 24dd 3411033600,2979783055,338359620,2782886510 25dd 4209124096,907596821,221174255,1006095553 26dd 191964160,3799684038,3164090317,1589111125 27dd 182528256,1777043520,2877432650,3265356744 28dd 1874708224,3503451415,3305285752,363511674 29dd 1606117888,3487855781,1093350906,2384367825 30dd 197121,67569157,134941193,202313229 31dd 67569157,134941193,202313229,197121 32dd 134941193,202313229,197121,67569157 33dd 202313229,197121,67569157,134941193 34dd 33619971,100992007,168364043,235736079 35dd 235736079,33619971,100992007,168364043 36dd 168364043,235736079,33619971,100992007 37dd 100992007,168364043,235736079,33619971 38dd 50462976,117835012,185207048,252579084 39dd 252314880,51251460,117574920,184942860 40dd 184682752,252054788,50987272,118359308 41dd 118099200,185467140,251790600,50727180 42dd 2946363062,528716217,1300004225,1881839624 43dd 1532713819,1532713819,1532713819,1532713819 44dd 3602276352,4288629033,3737020424,4153884961 45dd 1354558464,32357713,2958822624,3775749553 46dd 1201988352,132424512,1572796698,503232858 47dd 2213177600,1597421020,4103937655,675398315 48dd 2749646592,4273543773,1511898873,121693092 49dd 3040248576,1103263732,2871565598,1608280554 50dd 2236667136,2588920351,482954393,64377734 51dd 3069987328,291237287,2117370568,3650299247 52dd 533321216,3573750986,2572112006,1401264716 53dd 1339849704,2721158661,548607111,3445553514 54dd 2128193280,3054596040,2183486460,1257083700 55dd 655635200,1165381986,3923443150,2344132524 56dd 190078720,256924420,290342170,357187870 57dd 1610966272,2263057382,4103205268,309794674 58dd 2592527872,2233205587,1335446729,3402964816 59dd 3973531904,3225098121,3002836325,1918774430 60dd 3870401024,2102906079,2284471353,4117666579 61dd 617007872,1021508343,366931923,691083277 62dd 2528395776,3491914898,2968704004,1613121270 63dd 3445188352,3247741094,844474987,4093578302 64dd 651481088,1190302358,1689581232,574775300 65dd 4289380608,206939853,2555985458,2489840491 66dd 2130264064,327674451,3566485037,3349835193 67dd 2470714624,316102159,3636825756,3393945945 68db 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 69db 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 70db 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 71db 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 72db 118,101,114,115,105,116,121,41,0 73align 64 74align 16 75__vpaes_preheat: 76 add ebp,DWORD [esp] 77 movdqa xmm7,[ebp-48] 78 movdqa xmm6,[ebp-16] 79 ret 80align 16 81__vpaes_encrypt_core: 82 mov ecx,16 83 mov eax,DWORD [240+edx] 84 movdqa xmm1,xmm6 85 movdqa xmm2,[ebp] 86 pandn xmm1,xmm0 87 pand xmm0,xmm6 88 movdqu xmm5,[edx] 89db 102,15,56,0,208 90 movdqa xmm0,[16+ebp] 91 pxor xmm2,xmm5 92 psrld xmm1,4 93 add edx,16 94db 102,15,56,0,193 95 lea ebx,[192+ebp] 96 pxor xmm0,xmm2 97 jmp NEAR L$000enc_entry 98align 16 99L$001enc_loop: 100 movdqa xmm4,[32+ebp] 101 movdqa xmm0,[48+ebp] 102db 102,15,56,0,226 103db 102,15,56,0,195 104 pxor xmm4,xmm5 105 movdqa xmm5,[64+ebp] 106 pxor xmm0,xmm4 107 movdqa xmm1,[ecx*1+ebx-64] 108db 102,15,56,0,234 109 movdqa xmm2,[80+ebp] 110 movdqa xmm4,[ecx*1+ebx] 111db 102,15,56,0,211 112 movdqa xmm3,xmm0 113 pxor xmm2,xmm5 114db 102,15,56,0,193 115 add edx,16 116 pxor xmm0,xmm2 117db 102,15,56,0,220 118 add ecx,16 119 pxor xmm3,xmm0 120db 102,15,56,0,193 121 and ecx,48 122 sub eax,1 123 pxor xmm0,xmm3 124L$000enc_entry: 125 movdqa xmm1,xmm6 126 movdqa xmm5,[ebp-32] 127 pandn xmm1,xmm0 128 psrld xmm1,4 129 pand xmm0,xmm6 130db 102,15,56,0,232 131 movdqa xmm3,xmm7 132 pxor xmm0,xmm1 133db 102,15,56,0,217 134 movdqa xmm4,xmm7 135 pxor xmm3,xmm5 136db 102,15,56,0,224 137 movdqa xmm2,xmm7 138 pxor xmm4,xmm5 139db 102,15,56,0,211 140 movdqa xmm3,xmm7 141 pxor xmm2,xmm0 142db 102,15,56,0,220 143 movdqu xmm5,[edx] 144 pxor xmm3,xmm1 145 jnz NEAR L$001enc_loop 146 movdqa xmm4,[96+ebp] 147 movdqa xmm0,[112+ebp] 148db 102,15,56,0,226 149 pxor xmm4,xmm5 150db 102,15,56,0,195 151 movdqa xmm1,[64+ecx*1+ebx] 152 pxor xmm0,xmm4 153db 102,15,56,0,193 154 ret 155align 16 156__vpaes_decrypt_core: 157 lea ebx,[608+ebp] 158 mov eax,DWORD [240+edx] 159 movdqa xmm1,xmm6 160 movdqa xmm2,[ebx-64] 161 pandn xmm1,xmm0 162 mov ecx,eax 163 psrld xmm1,4 164 movdqu xmm5,[edx] 165 shl ecx,4 166 pand xmm0,xmm6 167db 102,15,56,0,208 168 movdqa xmm0,[ebx-48] 169 xor ecx,48 170db 102,15,56,0,193 171 and ecx,48 172 pxor xmm2,xmm5 173 movdqa xmm5,[176+ebp] 174 pxor xmm0,xmm2 175 add edx,16 176 lea ecx,[ecx*1+ebx-352] 177 jmp NEAR L$002dec_entry 178align 16 179L$003dec_loop: 180 movdqa xmm4,[ebx-32] 181 movdqa xmm1,[ebx-16] 182db 102,15,56,0,226 183db 102,15,56,0,203 184 pxor xmm0,xmm4 185 movdqa xmm4,[ebx] 186 pxor xmm0,xmm1 187 movdqa xmm1,[16+ebx] 188db 102,15,56,0,226 189db 102,15,56,0,197 190db 102,15,56,0,203 191 pxor xmm0,xmm4 192 movdqa xmm4,[32+ebx] 193 pxor xmm0,xmm1 194 movdqa xmm1,[48+ebx] 195db 102,15,56,0,226 196db 102,15,56,0,197 197db 102,15,56,0,203 198 pxor xmm0,xmm4 199 movdqa xmm4,[64+ebx] 200 pxor xmm0,xmm1 201 movdqa xmm1,[80+ebx] 202db 102,15,56,0,226 203db 102,15,56,0,197 204db 102,15,56,0,203 205 pxor xmm0,xmm4 206 add edx,16 207db 102,15,58,15,237,12 208 pxor xmm0,xmm1 209 sub eax,1 210L$002dec_entry: 211 movdqa xmm1,xmm6 212 movdqa xmm2,[ebp-32] 213 pandn xmm1,xmm0 214 pand xmm0,xmm6 215 psrld xmm1,4 216db 102,15,56,0,208 217 movdqa xmm3,xmm7 218 pxor xmm0,xmm1 219db 102,15,56,0,217 220 movdqa xmm4,xmm7 221 pxor xmm3,xmm2 222db 102,15,56,0,224 223 pxor xmm4,xmm2 224 movdqa xmm2,xmm7 225db 102,15,56,0,211 226 movdqa xmm3,xmm7 227 pxor xmm2,xmm0 228db 102,15,56,0,220 229 movdqu xmm0,[edx] 230 pxor xmm3,xmm1 231 jnz NEAR L$003dec_loop 232 movdqa xmm4,[96+ebx] 233db 102,15,56,0,226 234 pxor xmm4,xmm0 235 movdqa xmm0,[112+ebx] 236 movdqa xmm2,[ecx] 237db 102,15,56,0,195 238 pxor xmm0,xmm4 239db 102,15,56,0,194 240 ret 241align 16 242__vpaes_schedule_core: 243 add ebp,DWORD [esp] 244 movdqu xmm0,[esi] 245 movdqa xmm2,[320+ebp] 246 movdqa xmm3,xmm0 247 lea ebx,[ebp] 248 movdqa [4+esp],xmm2 249 call __vpaes_schedule_transform 250 movdqa xmm7,xmm0 251 test edi,edi 252 jnz NEAR L$004schedule_am_decrypting 253 movdqu [edx],xmm0 254 jmp NEAR L$005schedule_go 255L$004schedule_am_decrypting: 256 movdqa xmm1,[256+ecx*1+ebp] 257db 102,15,56,0,217 258 movdqu [edx],xmm3 259 xor ecx,48 260L$005schedule_go: 261 cmp eax,192 262 ja NEAR L$006schedule_256 263 je NEAR L$007schedule_192 264L$008schedule_128: 265 mov eax,10 266L$009loop_schedule_128: 267 call __vpaes_schedule_round 268 dec eax 269 jz NEAR L$010schedule_mangle_last 270 call __vpaes_schedule_mangle 271 jmp NEAR L$009loop_schedule_128 272align 16 273L$007schedule_192: 274 movdqu xmm0,[8+esi] 275 call __vpaes_schedule_transform 276 movdqa xmm6,xmm0 277 pxor xmm4,xmm4 278 movhlps xmm6,xmm4 279 mov eax,4 280L$011loop_schedule_192: 281 call __vpaes_schedule_round 282db 102,15,58,15,198,8 283 call __vpaes_schedule_mangle 284 call __vpaes_schedule_192_smear 285 call __vpaes_schedule_mangle 286 call __vpaes_schedule_round 287 dec eax 288 jz NEAR L$010schedule_mangle_last 289 call __vpaes_schedule_mangle 290 call __vpaes_schedule_192_smear 291 jmp NEAR L$011loop_schedule_192 292align 16 293L$006schedule_256: 294 movdqu xmm0,[16+esi] 295 call __vpaes_schedule_transform 296 mov eax,7 297L$012loop_schedule_256: 298 call __vpaes_schedule_mangle 299 movdqa xmm6,xmm0 300 call __vpaes_schedule_round 301 dec eax 302 jz NEAR L$010schedule_mangle_last 303 call __vpaes_schedule_mangle 304 pshufd xmm0,xmm0,255 305 movdqa [20+esp],xmm7 306 movdqa xmm7,xmm6 307 call L$_vpaes_schedule_low_round 308 movdqa xmm7,[20+esp] 309 jmp NEAR L$012loop_schedule_256 310align 16 311L$010schedule_mangle_last: 312 lea ebx,[384+ebp] 313 test edi,edi 314 jnz NEAR L$013schedule_mangle_last_dec 315 movdqa xmm1,[256+ecx*1+ebp] 316db 102,15,56,0,193 317 lea ebx,[352+ebp] 318 add edx,32 319L$013schedule_mangle_last_dec: 320 add edx,-16 321 pxor xmm0,[336+ebp] 322 call __vpaes_schedule_transform 323 movdqu [edx],xmm0 324 pxor xmm0,xmm0 325 pxor xmm1,xmm1 326 pxor xmm2,xmm2 327 pxor xmm3,xmm3 328 pxor xmm4,xmm4 329 pxor xmm5,xmm5 330 pxor xmm6,xmm6 331 pxor xmm7,xmm7 332 ret 333align 16 334__vpaes_schedule_192_smear: 335 pshufd xmm1,xmm6,128 336 pshufd xmm0,xmm7,254 337 pxor xmm6,xmm1 338 pxor xmm1,xmm1 339 pxor xmm6,xmm0 340 movdqa xmm0,xmm6 341 movhlps xmm6,xmm1 342 ret 343align 16 344__vpaes_schedule_round: 345 movdqa xmm2,[8+esp] 346 pxor xmm1,xmm1 347db 102,15,58,15,202,15 348db 102,15,58,15,210,15 349 pxor xmm7,xmm1 350 pshufd xmm0,xmm0,255 351db 102,15,58,15,192,1 352 movdqa [8+esp],xmm2 353L$_vpaes_schedule_low_round: 354 movdqa xmm1,xmm7 355 pslldq xmm7,4 356 pxor xmm7,xmm1 357 movdqa xmm1,xmm7 358 pslldq xmm7,8 359 pxor xmm7,xmm1 360 pxor xmm7,[336+ebp] 361 movdqa xmm4,[ebp-16] 362 movdqa xmm5,[ebp-48] 363 movdqa xmm1,xmm4 364 pandn xmm1,xmm0 365 psrld xmm1,4 366 pand xmm0,xmm4 367 movdqa xmm2,[ebp-32] 368db 102,15,56,0,208 369 pxor xmm0,xmm1 370 movdqa xmm3,xmm5 371db 102,15,56,0,217 372 pxor xmm3,xmm2 373 movdqa xmm4,xmm5 374db 102,15,56,0,224 375 pxor xmm4,xmm2 376 movdqa xmm2,xmm5 377db 102,15,56,0,211 378 pxor xmm2,xmm0 379 movdqa xmm3,xmm5 380db 102,15,56,0,220 381 pxor xmm3,xmm1 382 movdqa xmm4,[32+ebp] 383db 102,15,56,0,226 384 movdqa xmm0,[48+ebp] 385db 102,15,56,0,195 386 pxor xmm0,xmm4 387 pxor xmm0,xmm7 388 movdqa xmm7,xmm0 389 ret 390align 16 391__vpaes_schedule_transform: 392 movdqa xmm2,[ebp-16] 393 movdqa xmm1,xmm2 394 pandn xmm1,xmm0 395 psrld xmm1,4 396 pand xmm0,xmm2 397 movdqa xmm2,[ebx] 398db 102,15,56,0,208 399 movdqa xmm0,[16+ebx] 400db 102,15,56,0,193 401 pxor xmm0,xmm2 402 ret 403align 16 404__vpaes_schedule_mangle: 405 movdqa xmm4,xmm0 406 movdqa xmm5,[128+ebp] 407 test edi,edi 408 jnz NEAR L$014schedule_mangle_dec 409 add edx,16 410 pxor xmm4,[336+ebp] 411db 102,15,56,0,229 412 movdqa xmm3,xmm4 413db 102,15,56,0,229 414 pxor xmm3,xmm4 415db 102,15,56,0,229 416 pxor xmm3,xmm4 417 jmp NEAR L$015schedule_mangle_both 418align 16 419L$014schedule_mangle_dec: 420 movdqa xmm2,[ebp-16] 421 lea esi,[416+ebp] 422 movdqa xmm1,xmm2 423 pandn xmm1,xmm4 424 psrld xmm1,4 425 pand xmm4,xmm2 426 movdqa xmm2,[esi] 427db 102,15,56,0,212 428 movdqa xmm3,[16+esi] 429db 102,15,56,0,217 430 pxor xmm3,xmm2 431db 102,15,56,0,221 432 movdqa xmm2,[32+esi] 433db 102,15,56,0,212 434 pxor xmm2,xmm3 435 movdqa xmm3,[48+esi] 436db 102,15,56,0,217 437 pxor xmm3,xmm2 438db 102,15,56,0,221 439 movdqa xmm2,[64+esi] 440db 102,15,56,0,212 441 pxor xmm2,xmm3 442 movdqa xmm3,[80+esi] 443db 102,15,56,0,217 444 pxor xmm3,xmm2 445db 102,15,56,0,221 446 movdqa xmm2,[96+esi] 447db 102,15,56,0,212 448 pxor xmm2,xmm3 449 movdqa xmm3,[112+esi] 450db 102,15,56,0,217 451 pxor xmm3,xmm2 452 add edx,-16 453L$015schedule_mangle_both: 454 movdqa xmm1,[256+ecx*1+ebp] 455db 102,15,56,0,217 456 add ecx,-16 457 and ecx,48 458 movdqu [edx],xmm3 459 ret 460global _vpaes_set_encrypt_key 461align 16 462_vpaes_set_encrypt_key: 463L$_vpaes_set_encrypt_key_begin: 464 push ebp 465 push ebx 466 push esi 467 push edi 468 mov esi,DWORD [20+esp] 469 lea ebx,[esp-56] 470 mov eax,DWORD [24+esp] 471 and ebx,-16 472 mov edx,DWORD [28+esp] 473 xchg ebx,esp 474 mov DWORD [48+esp],ebx 475 mov ebx,eax 476 shr ebx,5 477 add ebx,5 478 mov DWORD [240+edx],ebx 479 mov ecx,48 480 mov edi,0 481 lea ebp,[(L$_vpaes_consts+0x30-L$016pic_point)] 482 call __vpaes_schedule_core 483L$016pic_point: 484 mov esp,DWORD [48+esp] 485 xor eax,eax 486 pop edi 487 pop esi 488 pop ebx 489 pop ebp 490 ret 491global _vpaes_set_decrypt_key 492align 16 493_vpaes_set_decrypt_key: 494L$_vpaes_set_decrypt_key_begin: 495 push ebp 496 push ebx 497 push esi 498 push edi 499 mov esi,DWORD [20+esp] 500 lea ebx,[esp-56] 501 mov eax,DWORD [24+esp] 502 and ebx,-16 503 mov edx,DWORD [28+esp] 504 xchg ebx,esp 505 mov DWORD [48+esp],ebx 506 mov ebx,eax 507 shr ebx,5 508 add ebx,5 509 mov DWORD [240+edx],ebx 510 shl ebx,4 511 lea edx,[16+ebx*1+edx] 512 mov edi,1 513 mov ecx,eax 514 shr ecx,1 515 and ecx,32 516 xor ecx,32 517 lea ebp,[(L$_vpaes_consts+0x30-L$017pic_point)] 518 call __vpaes_schedule_core 519L$017pic_point: 520 mov esp,DWORD [48+esp] 521 xor eax,eax 522 pop edi 523 pop esi 524 pop ebx 525 pop ebp 526 ret 527global _vpaes_encrypt 528align 16 529_vpaes_encrypt: 530L$_vpaes_encrypt_begin: 531 push ebp 532 push ebx 533 push esi 534 push edi 535 lea ebp,[(L$_vpaes_consts+0x30-L$018pic_point)] 536 call __vpaes_preheat 537L$018pic_point: 538 mov esi,DWORD [20+esp] 539 lea ebx,[esp-56] 540 mov edi,DWORD [24+esp] 541 and ebx,-16 542 mov edx,DWORD [28+esp] 543 xchg ebx,esp 544 mov DWORD [48+esp],ebx 545 movdqu xmm0,[esi] 546 call __vpaes_encrypt_core 547 movdqu [edi],xmm0 548 mov esp,DWORD [48+esp] 549 pop edi 550 pop esi 551 pop ebx 552 pop ebp 553 ret 554global _vpaes_decrypt 555align 16 556_vpaes_decrypt: 557L$_vpaes_decrypt_begin: 558 push ebp 559 push ebx 560 push esi 561 push edi 562 lea ebp,[(L$_vpaes_consts+0x30-L$019pic_point)] 563 call __vpaes_preheat 564L$019pic_point: 565 mov esi,DWORD [20+esp] 566 lea ebx,[esp-56] 567 mov edi,DWORD [24+esp] 568 and ebx,-16 569 mov edx,DWORD [28+esp] 570 xchg ebx,esp 571 mov DWORD [48+esp],ebx 572 movdqu xmm0,[esi] 573 call __vpaes_decrypt_core 574 movdqu [edi],xmm0 575 mov esp,DWORD [48+esp] 576 pop edi 577 pop esi 578 pop ebx 579 pop ebp 580 ret 581global _vpaes_cbc_encrypt 582align 16 583_vpaes_cbc_encrypt: 584L$_vpaes_cbc_encrypt_begin: 585 push ebp 586 push ebx 587 push esi 588 push edi 589 mov esi,DWORD [20+esp] 590 mov edi,DWORD [24+esp] 591 mov eax,DWORD [28+esp] 592 mov edx,DWORD [32+esp] 593 sub eax,16 594 jc NEAR L$020cbc_abort 595 lea ebx,[esp-56] 596 mov ebp,DWORD [36+esp] 597 and ebx,-16 598 mov ecx,DWORD [40+esp] 599 xchg ebx,esp 600 movdqu xmm1,[ebp] 601 sub edi,esi 602 mov DWORD [48+esp],ebx 603 mov DWORD [esp],edi 604 mov DWORD [4+esp],edx 605 mov DWORD [8+esp],ebp 606 mov edi,eax 607 lea ebp,[(L$_vpaes_consts+0x30-L$021pic_point)] 608 call __vpaes_preheat 609L$021pic_point: 610 cmp ecx,0 611 je NEAR L$022cbc_dec_loop 612 jmp NEAR L$023cbc_enc_loop 613align 16 614L$023cbc_enc_loop: 615 movdqu xmm0,[esi] 616 pxor xmm0,xmm1 617 call __vpaes_encrypt_core 618 mov ebx,DWORD [esp] 619 mov edx,DWORD [4+esp] 620 movdqa xmm1,xmm0 621 movdqu [esi*1+ebx],xmm0 622 lea esi,[16+esi] 623 sub edi,16 624 jnc NEAR L$023cbc_enc_loop 625 jmp NEAR L$024cbc_done 626align 16 627L$022cbc_dec_loop: 628 movdqu xmm0,[esi] 629 movdqa [16+esp],xmm1 630 movdqa [32+esp],xmm0 631 call __vpaes_decrypt_core 632 mov ebx,DWORD [esp] 633 mov edx,DWORD [4+esp] 634 pxor xmm0,[16+esp] 635 movdqa xmm1,[32+esp] 636 movdqu [esi*1+ebx],xmm0 637 lea esi,[16+esi] 638 sub edi,16 639 jnc NEAR L$022cbc_dec_loop 640L$024cbc_done: 641 mov ebx,DWORD [8+esp] 642 mov esp,DWORD [48+esp] 643 movdqu [ebx],xmm1 644L$020cbc_abort: 645 pop edi 646 pop esi 647 pop ebx 648 pop ebp 649 ret 650