1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8 9%ifdef BORINGSSL_PREFIX 10%include "boringssl_prefix_symbols_nasm.inc" 11%endif 12section .text code align=64 13 14 15EXTERN OPENSSL_ia32cap_P 16 17ALIGN 64 18$L$zero: 19 DD 0,0,0,0 20$L$one: 21 DD 1,0,0,0 22$L$inc: 23 DD 0,1,2,3 24$L$four: 25 DD 4,4,4,4 26$L$incy: 27 DD 0,2,4,6,1,3,5,7 28$L$eight: 29 DD 8,8,8,8,8,8,8,8 30$L$rot16: 31DB 0x2,0x3,0x0,0x1,0x6,0x7,0x4,0x5,0xa,0xb,0x8,0x9,0xe,0xf,0xc,0xd 32$L$rot24: 33DB 0x3,0x0,0x1,0x2,0x7,0x4,0x5,0x6,0xb,0x8,0x9,0xa,0xf,0xc,0xd,0xe 34$L$sigma: 35DB 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107 36DB 0 37ALIGN 64 38$L$zeroz: 39 DD 0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0 40$L$fourz: 41 DD 4,0,0,0,4,0,0,0,4,0,0,0,4,0,0,0 42$L$incz: 43 DD 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 44$L$sixteen: 45 DD 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 46DB 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 47DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32 48DB 98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115 49DB 108,46,111,114,103,62,0 50global ChaCha20_ctr32 51 52ALIGN 64 53ChaCha20_ctr32: 54 mov QWORD[8+rsp],rdi ;WIN64 prologue 55 mov QWORD[16+rsp],rsi 56 mov rax,rsp 57$L$SEH_begin_ChaCha20_ctr32: 58 mov rdi,rcx 59 mov rsi,rdx 60 mov rdx,r8 61 mov rcx,r9 62 mov r8,QWORD[40+rsp] 63 64 65 66 cmp rdx,0 67 je NEAR $L$no_data 68 mov r10,QWORD[((OPENSSL_ia32cap_P+4))] 69 test r10d,512 70 jnz NEAR $L$ChaCha20_ssse3 71 72 push rbx 73 74 push rbp 75 76 push r12 77 78 push r13 79 80 push r14 81 82 push r15 83 84 sub rsp,64+24 85 86$L$ctr32_body: 87 88 89 movdqu xmm1,XMMWORD[rcx] 90 movdqu xmm2,XMMWORD[16+rcx] 91 movdqu xmm3,XMMWORD[r8] 92 movdqa xmm4,XMMWORD[$L$one] 93 94 95 movdqa XMMWORD[16+rsp],xmm1 96 movdqa XMMWORD[32+rsp],xmm2 97 movdqa XMMWORD[48+rsp],xmm3 98 mov rbp,rdx 99 jmp NEAR $L$oop_outer 100 101ALIGN 32 102$L$oop_outer: 103 mov eax,0x61707865 104 mov ebx,0x3320646e 105 mov ecx,0x79622d32 106 mov edx,0x6b206574 107 mov r8d,DWORD[16+rsp] 108 mov r9d,DWORD[20+rsp] 109 mov r10d,DWORD[24+rsp] 110 mov r11d,DWORD[28+rsp] 111 movd r12d,xmm3 112 mov r13d,DWORD[52+rsp] 113 mov r14d,DWORD[56+rsp] 114 mov r15d,DWORD[60+rsp] 115 116 mov QWORD[((64+0))+rsp],rbp 117 mov ebp,10 118 mov QWORD[((64+8))+rsp],rsi 119DB 102,72,15,126,214 120 mov QWORD[((64+16))+rsp],rdi 121 mov rdi,rsi 122 shr rdi,32 123 jmp NEAR $L$oop 124 125ALIGN 32 126$L$oop: 127 add eax,r8d 128 xor r12d,eax 129 rol r12d,16 130 add ebx,r9d 131 xor r13d,ebx 132 rol r13d,16 133 add esi,r12d 134 xor r8d,esi 135 rol r8d,12 136 add edi,r13d 137 xor r9d,edi 138 rol r9d,12 139 add eax,r8d 140 xor r12d,eax 141 rol r12d,8 142 add ebx,r9d 143 xor r13d,ebx 144 rol r13d,8 145 add esi,r12d 146 xor r8d,esi 147 rol r8d,7 148 add edi,r13d 149 xor r9d,edi 150 rol r9d,7 151 mov DWORD[32+rsp],esi 152 mov DWORD[36+rsp],edi 153 mov esi,DWORD[40+rsp] 154 mov edi,DWORD[44+rsp] 155 add ecx,r10d 156 xor r14d,ecx 157 rol r14d,16 158 add edx,r11d 159 xor r15d,edx 160 rol r15d,16 161 add esi,r14d 162 xor r10d,esi 163 rol r10d,12 164 add edi,r15d 165 xor r11d,edi 166 rol r11d,12 167 add ecx,r10d 168 xor r14d,ecx 169 rol r14d,8 170 add edx,r11d 171 xor r15d,edx 172 rol r15d,8 173 add esi,r14d 174 xor r10d,esi 175 rol r10d,7 176 add edi,r15d 177 xor r11d,edi 178 rol r11d,7 179 add eax,r9d 180 xor r15d,eax 181 rol r15d,16 182 add ebx,r10d 183 xor r12d,ebx 184 rol r12d,16 185 add esi,r15d 186 xor r9d,esi 187 rol r9d,12 188 add edi,r12d 189 xor r10d,edi 190 rol r10d,12 191 add eax,r9d 192 xor r15d,eax 193 rol r15d,8 194 add ebx,r10d 195 xor r12d,ebx 196 rol r12d,8 197 add esi,r15d 198 xor r9d,esi 199 rol r9d,7 200 add edi,r12d 201 xor r10d,edi 202 rol r10d,7 203 mov DWORD[40+rsp],esi 204 mov DWORD[44+rsp],edi 205 mov esi,DWORD[32+rsp] 206 mov edi,DWORD[36+rsp] 207 add ecx,r11d 208 xor r13d,ecx 209 rol r13d,16 210 add edx,r8d 211 xor r14d,edx 212 rol r14d,16 213 add esi,r13d 214 xor r11d,esi 215 rol r11d,12 216 add edi,r14d 217 xor r8d,edi 218 rol r8d,12 219 add ecx,r11d 220 xor r13d,ecx 221 rol r13d,8 222 add edx,r8d 223 xor r14d,edx 224 rol r14d,8 225 add esi,r13d 226 xor r11d,esi 227 rol r11d,7 228 add edi,r14d 229 xor r8d,edi 230 rol r8d,7 231 dec ebp 232 jnz NEAR $L$oop 233 mov DWORD[36+rsp],edi 234 mov DWORD[32+rsp],esi 235 mov rbp,QWORD[64+rsp] 236 movdqa xmm1,xmm2 237 mov rsi,QWORD[((64+8))+rsp] 238 paddd xmm3,xmm4 239 mov rdi,QWORD[((64+16))+rsp] 240 241 add eax,0x61707865 242 add ebx,0x3320646e 243 add ecx,0x79622d32 244 add edx,0x6b206574 245 add r8d,DWORD[16+rsp] 246 add r9d,DWORD[20+rsp] 247 add r10d,DWORD[24+rsp] 248 add r11d,DWORD[28+rsp] 249 add r12d,DWORD[48+rsp] 250 add r13d,DWORD[52+rsp] 251 add r14d,DWORD[56+rsp] 252 add r15d,DWORD[60+rsp] 253 paddd xmm1,XMMWORD[32+rsp] 254 255 cmp rbp,64 256 jb NEAR $L$tail 257 258 xor eax,DWORD[rsi] 259 xor ebx,DWORD[4+rsi] 260 xor ecx,DWORD[8+rsi] 261 xor edx,DWORD[12+rsi] 262 xor r8d,DWORD[16+rsi] 263 xor r9d,DWORD[20+rsi] 264 xor r10d,DWORD[24+rsi] 265 xor r11d,DWORD[28+rsi] 266 movdqu xmm0,XMMWORD[32+rsi] 267 xor r12d,DWORD[48+rsi] 268 xor r13d,DWORD[52+rsi] 269 xor r14d,DWORD[56+rsi] 270 xor r15d,DWORD[60+rsi] 271 lea rsi,[64+rsi] 272 pxor xmm0,xmm1 273 274 movdqa XMMWORD[32+rsp],xmm2 275 movd DWORD[48+rsp],xmm3 276 277 mov DWORD[rdi],eax 278 mov DWORD[4+rdi],ebx 279 mov DWORD[8+rdi],ecx 280 mov DWORD[12+rdi],edx 281 mov DWORD[16+rdi],r8d 282 mov DWORD[20+rdi],r9d 283 mov DWORD[24+rdi],r10d 284 mov DWORD[28+rdi],r11d 285 movdqu XMMWORD[32+rdi],xmm0 286 mov DWORD[48+rdi],r12d 287 mov DWORD[52+rdi],r13d 288 mov DWORD[56+rdi],r14d 289 mov DWORD[60+rdi],r15d 290 lea rdi,[64+rdi] 291 292 sub rbp,64 293 jnz NEAR $L$oop_outer 294 295 jmp NEAR $L$done 296 297ALIGN 16 298$L$tail: 299 mov DWORD[rsp],eax 300 mov DWORD[4+rsp],ebx 301 xor rbx,rbx 302 mov DWORD[8+rsp],ecx 303 mov DWORD[12+rsp],edx 304 mov DWORD[16+rsp],r8d 305 mov DWORD[20+rsp],r9d 306 mov DWORD[24+rsp],r10d 307 mov DWORD[28+rsp],r11d 308 movdqa XMMWORD[32+rsp],xmm1 309 mov DWORD[48+rsp],r12d 310 mov DWORD[52+rsp],r13d 311 mov DWORD[56+rsp],r14d 312 mov DWORD[60+rsp],r15d 313 314$L$oop_tail: 315 movzx eax,BYTE[rbx*1+rsi] 316 movzx edx,BYTE[rbx*1+rsp] 317 lea rbx,[1+rbx] 318 xor eax,edx 319 mov BYTE[((-1))+rbx*1+rdi],al 320 dec rbp 321 jnz NEAR $L$oop_tail 322 323$L$done: 324 lea rsi,[((64+24+48))+rsp] 325 mov r15,QWORD[((-48))+rsi] 326 327 mov r14,QWORD[((-40))+rsi] 328 329 mov r13,QWORD[((-32))+rsi] 330 331 mov r12,QWORD[((-24))+rsi] 332 333 mov rbp,QWORD[((-16))+rsi] 334 335 mov rbx,QWORD[((-8))+rsi] 336 337 lea rsp,[rsi] 338 339$L$no_data: 340 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 341 mov rsi,QWORD[16+rsp] 342 DB 0F3h,0C3h ;repret 343 344$L$SEH_end_ChaCha20_ctr32: 345 346ALIGN 32 347ChaCha20_ssse3: 348 mov QWORD[8+rsp],rdi ;WIN64 prologue 349 mov QWORD[16+rsp],rsi 350 mov rax,rsp 351$L$SEH_begin_ChaCha20_ssse3: 352 mov rdi,rcx 353 mov rsi,rdx 354 mov rdx,r8 355 mov rcx,r9 356 mov r8,QWORD[40+rsp] 357 358 359$L$ChaCha20_ssse3: 360 361 mov r9,rsp 362 363 cmp rdx,128 364 ja NEAR $L$ChaCha20_4x 365 366$L$do_sse3_after_all: 367 sub rsp,64+40 368 movaps XMMWORD[(-40)+r9],xmm6 369 movaps XMMWORD[(-24)+r9],xmm7 370$L$ssse3_body: 371 movdqa xmm0,XMMWORD[$L$sigma] 372 movdqu xmm1,XMMWORD[rcx] 373 movdqu xmm2,XMMWORD[16+rcx] 374 movdqu xmm3,XMMWORD[r8] 375 movdqa xmm6,XMMWORD[$L$rot16] 376 movdqa xmm7,XMMWORD[$L$rot24] 377 378 movdqa XMMWORD[rsp],xmm0 379 movdqa XMMWORD[16+rsp],xmm1 380 movdqa XMMWORD[32+rsp],xmm2 381 movdqa XMMWORD[48+rsp],xmm3 382 mov r8,10 383 jmp NEAR $L$oop_ssse3 384 385ALIGN 32 386$L$oop_outer_ssse3: 387 movdqa xmm3,XMMWORD[$L$one] 388 movdqa xmm0,XMMWORD[rsp] 389 movdqa xmm1,XMMWORD[16+rsp] 390 movdqa xmm2,XMMWORD[32+rsp] 391 paddd xmm3,XMMWORD[48+rsp] 392 mov r8,10 393 movdqa XMMWORD[48+rsp],xmm3 394 jmp NEAR $L$oop_ssse3 395 396ALIGN 32 397$L$oop_ssse3: 398 paddd xmm0,xmm1 399 pxor xmm3,xmm0 400DB 102,15,56,0,222 401 paddd xmm2,xmm3 402 pxor xmm1,xmm2 403 movdqa xmm4,xmm1 404 psrld xmm1,20 405 pslld xmm4,12 406 por xmm1,xmm4 407 paddd xmm0,xmm1 408 pxor xmm3,xmm0 409DB 102,15,56,0,223 410 paddd xmm2,xmm3 411 pxor xmm1,xmm2 412 movdqa xmm4,xmm1 413 psrld xmm1,25 414 pslld xmm4,7 415 por xmm1,xmm4 416 pshufd xmm2,xmm2,78 417 pshufd xmm1,xmm1,57 418 pshufd xmm3,xmm3,147 419 nop 420 paddd xmm0,xmm1 421 pxor xmm3,xmm0 422DB 102,15,56,0,222 423 paddd xmm2,xmm3 424 pxor xmm1,xmm2 425 movdqa xmm4,xmm1 426 psrld xmm1,20 427 pslld xmm4,12 428 por xmm1,xmm4 429 paddd xmm0,xmm1 430 pxor xmm3,xmm0 431DB 102,15,56,0,223 432 paddd xmm2,xmm3 433 pxor xmm1,xmm2 434 movdqa xmm4,xmm1 435 psrld xmm1,25 436 pslld xmm4,7 437 por xmm1,xmm4 438 pshufd xmm2,xmm2,78 439 pshufd xmm1,xmm1,147 440 pshufd xmm3,xmm3,57 441 dec r8 442 jnz NEAR $L$oop_ssse3 443 paddd xmm0,XMMWORD[rsp] 444 paddd xmm1,XMMWORD[16+rsp] 445 paddd xmm2,XMMWORD[32+rsp] 446 paddd xmm3,XMMWORD[48+rsp] 447 448 cmp rdx,64 449 jb NEAR $L$tail_ssse3 450 451 movdqu xmm4,XMMWORD[rsi] 452 movdqu xmm5,XMMWORD[16+rsi] 453 pxor xmm0,xmm4 454 movdqu xmm4,XMMWORD[32+rsi] 455 pxor xmm1,xmm5 456 movdqu xmm5,XMMWORD[48+rsi] 457 lea rsi,[64+rsi] 458 pxor xmm2,xmm4 459 pxor xmm3,xmm5 460 461 movdqu XMMWORD[rdi],xmm0 462 movdqu XMMWORD[16+rdi],xmm1 463 movdqu XMMWORD[32+rdi],xmm2 464 movdqu XMMWORD[48+rdi],xmm3 465 lea rdi,[64+rdi] 466 467 sub rdx,64 468 jnz NEAR $L$oop_outer_ssse3 469 470 jmp NEAR $L$done_ssse3 471 472ALIGN 16 473$L$tail_ssse3: 474 movdqa XMMWORD[rsp],xmm0 475 movdqa XMMWORD[16+rsp],xmm1 476 movdqa XMMWORD[32+rsp],xmm2 477 movdqa XMMWORD[48+rsp],xmm3 478 xor r8,r8 479 480$L$oop_tail_ssse3: 481 movzx eax,BYTE[r8*1+rsi] 482 movzx ecx,BYTE[r8*1+rsp] 483 lea r8,[1+r8] 484 xor eax,ecx 485 mov BYTE[((-1))+r8*1+rdi],al 486 dec rdx 487 jnz NEAR $L$oop_tail_ssse3 488 489$L$done_ssse3: 490 movaps xmm6,XMMWORD[((-40))+r9] 491 movaps xmm7,XMMWORD[((-24))+r9] 492 lea rsp,[r9] 493 494$L$ssse3_epilogue: 495 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 496 mov rsi,QWORD[16+rsp] 497 DB 0F3h,0C3h ;repret 498 499$L$SEH_end_ChaCha20_ssse3: 500 501ALIGN 32 502ChaCha20_4x: 503 mov QWORD[8+rsp],rdi ;WIN64 prologue 504 mov QWORD[16+rsp],rsi 505 mov rax,rsp 506$L$SEH_begin_ChaCha20_4x: 507 mov rdi,rcx 508 mov rsi,rdx 509 mov rdx,r8 510 mov rcx,r9 511 mov r8,QWORD[40+rsp] 512 513 514$L$ChaCha20_4x: 515 516 mov r9,rsp 517 518 mov r11,r10 519 shr r10,32 520 test r10,32 521 jnz NEAR $L$ChaCha20_8x 522 cmp rdx,192 523 ja NEAR $L$proceed4x 524 525 and r11,71303168 526 cmp r11,4194304 527 je NEAR $L$do_sse3_after_all 528 529$L$proceed4x: 530 sub rsp,0x140+168 531 movaps XMMWORD[(-168)+r9],xmm6 532 movaps XMMWORD[(-152)+r9],xmm7 533 movaps XMMWORD[(-136)+r9],xmm8 534 movaps XMMWORD[(-120)+r9],xmm9 535 movaps XMMWORD[(-104)+r9],xmm10 536 movaps XMMWORD[(-88)+r9],xmm11 537 movaps XMMWORD[(-72)+r9],xmm12 538 movaps XMMWORD[(-56)+r9],xmm13 539 movaps XMMWORD[(-40)+r9],xmm14 540 movaps XMMWORD[(-24)+r9],xmm15 541$L$4x_body: 542 movdqa xmm11,XMMWORD[$L$sigma] 543 movdqu xmm15,XMMWORD[rcx] 544 movdqu xmm7,XMMWORD[16+rcx] 545 movdqu xmm3,XMMWORD[r8] 546 lea rcx,[256+rsp] 547 lea r10,[$L$rot16] 548 lea r11,[$L$rot24] 549 550 pshufd xmm8,xmm11,0x00 551 pshufd xmm9,xmm11,0x55 552 movdqa XMMWORD[64+rsp],xmm8 553 pshufd xmm10,xmm11,0xaa 554 movdqa XMMWORD[80+rsp],xmm9 555 pshufd xmm11,xmm11,0xff 556 movdqa XMMWORD[96+rsp],xmm10 557 movdqa XMMWORD[112+rsp],xmm11 558 559 pshufd xmm12,xmm15,0x00 560 pshufd xmm13,xmm15,0x55 561 movdqa XMMWORD[(128-256)+rcx],xmm12 562 pshufd xmm14,xmm15,0xaa 563 movdqa XMMWORD[(144-256)+rcx],xmm13 564 pshufd xmm15,xmm15,0xff 565 movdqa XMMWORD[(160-256)+rcx],xmm14 566 movdqa XMMWORD[(176-256)+rcx],xmm15 567 568 pshufd xmm4,xmm7,0x00 569 pshufd xmm5,xmm7,0x55 570 movdqa XMMWORD[(192-256)+rcx],xmm4 571 pshufd xmm6,xmm7,0xaa 572 movdqa XMMWORD[(208-256)+rcx],xmm5 573 pshufd xmm7,xmm7,0xff 574 movdqa XMMWORD[(224-256)+rcx],xmm6 575 movdqa XMMWORD[(240-256)+rcx],xmm7 576 577 pshufd xmm0,xmm3,0x00 578 pshufd xmm1,xmm3,0x55 579 paddd xmm0,XMMWORD[$L$inc] 580 pshufd xmm2,xmm3,0xaa 581 movdqa XMMWORD[(272-256)+rcx],xmm1 582 pshufd xmm3,xmm3,0xff 583 movdqa XMMWORD[(288-256)+rcx],xmm2 584 movdqa XMMWORD[(304-256)+rcx],xmm3 585 586 jmp NEAR $L$oop_enter4x 587 588ALIGN 32 589$L$oop_outer4x: 590 movdqa xmm8,XMMWORD[64+rsp] 591 movdqa xmm9,XMMWORD[80+rsp] 592 movdqa xmm10,XMMWORD[96+rsp] 593 movdqa xmm11,XMMWORD[112+rsp] 594 movdqa xmm12,XMMWORD[((128-256))+rcx] 595 movdqa xmm13,XMMWORD[((144-256))+rcx] 596 movdqa xmm14,XMMWORD[((160-256))+rcx] 597 movdqa xmm15,XMMWORD[((176-256))+rcx] 598 movdqa xmm4,XMMWORD[((192-256))+rcx] 599 movdqa xmm5,XMMWORD[((208-256))+rcx] 600 movdqa xmm6,XMMWORD[((224-256))+rcx] 601 movdqa xmm7,XMMWORD[((240-256))+rcx] 602 movdqa xmm0,XMMWORD[((256-256))+rcx] 603 movdqa xmm1,XMMWORD[((272-256))+rcx] 604 movdqa xmm2,XMMWORD[((288-256))+rcx] 605 movdqa xmm3,XMMWORD[((304-256))+rcx] 606 paddd xmm0,XMMWORD[$L$four] 607 608$L$oop_enter4x: 609 movdqa XMMWORD[32+rsp],xmm6 610 movdqa XMMWORD[48+rsp],xmm7 611 movdqa xmm7,XMMWORD[r10] 612 mov eax,10 613 movdqa XMMWORD[(256-256)+rcx],xmm0 614 jmp NEAR $L$oop4x 615 616ALIGN 32 617$L$oop4x: 618 paddd xmm8,xmm12 619 paddd xmm9,xmm13 620 pxor xmm0,xmm8 621 pxor xmm1,xmm9 622DB 102,15,56,0,199 623DB 102,15,56,0,207 624 paddd xmm4,xmm0 625 paddd xmm5,xmm1 626 pxor xmm12,xmm4 627 pxor xmm13,xmm5 628 movdqa xmm6,xmm12 629 pslld xmm12,12 630 psrld xmm6,20 631 movdqa xmm7,xmm13 632 pslld xmm13,12 633 por xmm12,xmm6 634 psrld xmm7,20 635 movdqa xmm6,XMMWORD[r11] 636 por xmm13,xmm7 637 paddd xmm8,xmm12 638 paddd xmm9,xmm13 639 pxor xmm0,xmm8 640 pxor xmm1,xmm9 641DB 102,15,56,0,198 642DB 102,15,56,0,206 643 paddd xmm4,xmm0 644 paddd xmm5,xmm1 645 pxor xmm12,xmm4 646 pxor xmm13,xmm5 647 movdqa xmm7,xmm12 648 pslld xmm12,7 649 psrld xmm7,25 650 movdqa xmm6,xmm13 651 pslld xmm13,7 652 por xmm12,xmm7 653 psrld xmm6,25 654 movdqa xmm7,XMMWORD[r10] 655 por xmm13,xmm6 656 movdqa XMMWORD[rsp],xmm4 657 movdqa XMMWORD[16+rsp],xmm5 658 movdqa xmm4,XMMWORD[32+rsp] 659 movdqa xmm5,XMMWORD[48+rsp] 660 paddd xmm10,xmm14 661 paddd xmm11,xmm15 662 pxor xmm2,xmm10 663 pxor xmm3,xmm11 664DB 102,15,56,0,215 665DB 102,15,56,0,223 666 paddd xmm4,xmm2 667 paddd xmm5,xmm3 668 pxor xmm14,xmm4 669 pxor xmm15,xmm5 670 movdqa xmm6,xmm14 671 pslld xmm14,12 672 psrld xmm6,20 673 movdqa xmm7,xmm15 674 pslld xmm15,12 675 por xmm14,xmm6 676 psrld xmm7,20 677 movdqa xmm6,XMMWORD[r11] 678 por xmm15,xmm7 679 paddd xmm10,xmm14 680 paddd xmm11,xmm15 681 pxor xmm2,xmm10 682 pxor xmm3,xmm11 683DB 102,15,56,0,214 684DB 102,15,56,0,222 685 paddd xmm4,xmm2 686 paddd xmm5,xmm3 687 pxor xmm14,xmm4 688 pxor xmm15,xmm5 689 movdqa xmm7,xmm14 690 pslld xmm14,7 691 psrld xmm7,25 692 movdqa xmm6,xmm15 693 pslld xmm15,7 694 por xmm14,xmm7 695 psrld xmm6,25 696 movdqa xmm7,XMMWORD[r10] 697 por xmm15,xmm6 698 paddd xmm8,xmm13 699 paddd xmm9,xmm14 700 pxor xmm3,xmm8 701 pxor xmm0,xmm9 702DB 102,15,56,0,223 703DB 102,15,56,0,199 704 paddd xmm4,xmm3 705 paddd xmm5,xmm0 706 pxor xmm13,xmm4 707 pxor xmm14,xmm5 708 movdqa xmm6,xmm13 709 pslld xmm13,12 710 psrld xmm6,20 711 movdqa xmm7,xmm14 712 pslld xmm14,12 713 por xmm13,xmm6 714 psrld xmm7,20 715 movdqa xmm6,XMMWORD[r11] 716 por xmm14,xmm7 717 paddd xmm8,xmm13 718 paddd xmm9,xmm14 719 pxor xmm3,xmm8 720 pxor xmm0,xmm9 721DB 102,15,56,0,222 722DB 102,15,56,0,198 723 paddd xmm4,xmm3 724 paddd xmm5,xmm0 725 pxor xmm13,xmm4 726 pxor xmm14,xmm5 727 movdqa xmm7,xmm13 728 pslld xmm13,7 729 psrld xmm7,25 730 movdqa xmm6,xmm14 731 pslld xmm14,7 732 por xmm13,xmm7 733 psrld xmm6,25 734 movdqa xmm7,XMMWORD[r10] 735 por xmm14,xmm6 736 movdqa XMMWORD[32+rsp],xmm4 737 movdqa XMMWORD[48+rsp],xmm5 738 movdqa xmm4,XMMWORD[rsp] 739 movdqa xmm5,XMMWORD[16+rsp] 740 paddd xmm10,xmm15 741 paddd xmm11,xmm12 742 pxor xmm1,xmm10 743 pxor xmm2,xmm11 744DB 102,15,56,0,207 745DB 102,15,56,0,215 746 paddd xmm4,xmm1 747 paddd xmm5,xmm2 748 pxor xmm15,xmm4 749 pxor xmm12,xmm5 750 movdqa xmm6,xmm15 751 pslld xmm15,12 752 psrld xmm6,20 753 movdqa xmm7,xmm12 754 pslld xmm12,12 755 por xmm15,xmm6 756 psrld xmm7,20 757 movdqa xmm6,XMMWORD[r11] 758 por xmm12,xmm7 759 paddd xmm10,xmm15 760 paddd xmm11,xmm12 761 pxor xmm1,xmm10 762 pxor xmm2,xmm11 763DB 102,15,56,0,206 764DB 102,15,56,0,214 765 paddd xmm4,xmm1 766 paddd xmm5,xmm2 767 pxor xmm15,xmm4 768 pxor xmm12,xmm5 769 movdqa xmm7,xmm15 770 pslld xmm15,7 771 psrld xmm7,25 772 movdqa xmm6,xmm12 773 pslld xmm12,7 774 por xmm15,xmm7 775 psrld xmm6,25 776 movdqa xmm7,XMMWORD[r10] 777 por xmm12,xmm6 778 dec eax 779 jnz NEAR $L$oop4x 780 781 paddd xmm8,XMMWORD[64+rsp] 782 paddd xmm9,XMMWORD[80+rsp] 783 paddd xmm10,XMMWORD[96+rsp] 784 paddd xmm11,XMMWORD[112+rsp] 785 786 movdqa xmm6,xmm8 787 punpckldq xmm8,xmm9 788 movdqa xmm7,xmm10 789 punpckldq xmm10,xmm11 790 punpckhdq xmm6,xmm9 791 punpckhdq xmm7,xmm11 792 movdqa xmm9,xmm8 793 punpcklqdq xmm8,xmm10 794 movdqa xmm11,xmm6 795 punpcklqdq xmm6,xmm7 796 punpckhqdq xmm9,xmm10 797 punpckhqdq xmm11,xmm7 798 paddd xmm12,XMMWORD[((128-256))+rcx] 799 paddd xmm13,XMMWORD[((144-256))+rcx] 800 paddd xmm14,XMMWORD[((160-256))+rcx] 801 paddd xmm15,XMMWORD[((176-256))+rcx] 802 803 movdqa XMMWORD[rsp],xmm8 804 movdqa XMMWORD[16+rsp],xmm9 805 movdqa xmm8,XMMWORD[32+rsp] 806 movdqa xmm9,XMMWORD[48+rsp] 807 808 movdqa xmm10,xmm12 809 punpckldq xmm12,xmm13 810 movdqa xmm7,xmm14 811 punpckldq xmm14,xmm15 812 punpckhdq xmm10,xmm13 813 punpckhdq xmm7,xmm15 814 movdqa xmm13,xmm12 815 punpcklqdq xmm12,xmm14 816 movdqa xmm15,xmm10 817 punpcklqdq xmm10,xmm7 818 punpckhqdq xmm13,xmm14 819 punpckhqdq xmm15,xmm7 820 paddd xmm4,XMMWORD[((192-256))+rcx] 821 paddd xmm5,XMMWORD[((208-256))+rcx] 822 paddd xmm8,XMMWORD[((224-256))+rcx] 823 paddd xmm9,XMMWORD[((240-256))+rcx] 824 825 movdqa XMMWORD[32+rsp],xmm6 826 movdqa XMMWORD[48+rsp],xmm11 827 828 movdqa xmm14,xmm4 829 punpckldq xmm4,xmm5 830 movdqa xmm7,xmm8 831 punpckldq xmm8,xmm9 832 punpckhdq xmm14,xmm5 833 punpckhdq xmm7,xmm9 834 movdqa xmm5,xmm4 835 punpcklqdq xmm4,xmm8 836 movdqa xmm9,xmm14 837 punpcklqdq xmm14,xmm7 838 punpckhqdq xmm5,xmm8 839 punpckhqdq xmm9,xmm7 840 paddd xmm0,XMMWORD[((256-256))+rcx] 841 paddd xmm1,XMMWORD[((272-256))+rcx] 842 paddd xmm2,XMMWORD[((288-256))+rcx] 843 paddd xmm3,XMMWORD[((304-256))+rcx] 844 845 movdqa xmm8,xmm0 846 punpckldq xmm0,xmm1 847 movdqa xmm7,xmm2 848 punpckldq xmm2,xmm3 849 punpckhdq xmm8,xmm1 850 punpckhdq xmm7,xmm3 851 movdqa xmm1,xmm0 852 punpcklqdq xmm0,xmm2 853 movdqa xmm3,xmm8 854 punpcklqdq xmm8,xmm7 855 punpckhqdq xmm1,xmm2 856 punpckhqdq xmm3,xmm7 857 cmp rdx,64*4 858 jb NEAR $L$tail4x 859 860 movdqu xmm6,XMMWORD[rsi] 861 movdqu xmm11,XMMWORD[16+rsi] 862 movdqu xmm2,XMMWORD[32+rsi] 863 movdqu xmm7,XMMWORD[48+rsi] 864 pxor xmm6,XMMWORD[rsp] 865 pxor xmm11,xmm12 866 pxor xmm2,xmm4 867 pxor xmm7,xmm0 868 869 movdqu XMMWORD[rdi],xmm6 870 movdqu xmm6,XMMWORD[64+rsi] 871 movdqu XMMWORD[16+rdi],xmm11 872 movdqu xmm11,XMMWORD[80+rsi] 873 movdqu XMMWORD[32+rdi],xmm2 874 movdqu xmm2,XMMWORD[96+rsi] 875 movdqu XMMWORD[48+rdi],xmm7 876 movdqu xmm7,XMMWORD[112+rsi] 877 lea rsi,[128+rsi] 878 pxor xmm6,XMMWORD[16+rsp] 879 pxor xmm11,xmm13 880 pxor xmm2,xmm5 881 pxor xmm7,xmm1 882 883 movdqu XMMWORD[64+rdi],xmm6 884 movdqu xmm6,XMMWORD[rsi] 885 movdqu XMMWORD[80+rdi],xmm11 886 movdqu xmm11,XMMWORD[16+rsi] 887 movdqu XMMWORD[96+rdi],xmm2 888 movdqu xmm2,XMMWORD[32+rsi] 889 movdqu XMMWORD[112+rdi],xmm7 890 lea rdi,[128+rdi] 891 movdqu xmm7,XMMWORD[48+rsi] 892 pxor xmm6,XMMWORD[32+rsp] 893 pxor xmm11,xmm10 894 pxor xmm2,xmm14 895 pxor xmm7,xmm8 896 897 movdqu XMMWORD[rdi],xmm6 898 movdqu xmm6,XMMWORD[64+rsi] 899 movdqu XMMWORD[16+rdi],xmm11 900 movdqu xmm11,XMMWORD[80+rsi] 901 movdqu XMMWORD[32+rdi],xmm2 902 movdqu xmm2,XMMWORD[96+rsi] 903 movdqu XMMWORD[48+rdi],xmm7 904 movdqu xmm7,XMMWORD[112+rsi] 905 lea rsi,[128+rsi] 906 pxor xmm6,XMMWORD[48+rsp] 907 pxor xmm11,xmm15 908 pxor xmm2,xmm9 909 pxor xmm7,xmm3 910 movdqu XMMWORD[64+rdi],xmm6 911 movdqu XMMWORD[80+rdi],xmm11 912 movdqu XMMWORD[96+rdi],xmm2 913 movdqu XMMWORD[112+rdi],xmm7 914 lea rdi,[128+rdi] 915 916 sub rdx,64*4 917 jnz NEAR $L$oop_outer4x 918 919 jmp NEAR $L$done4x 920 921$L$tail4x: 922 cmp rdx,192 923 jae NEAR $L$192_or_more4x 924 cmp rdx,128 925 jae NEAR $L$128_or_more4x 926 cmp rdx,64 927 jae NEAR $L$64_or_more4x 928 929 930 xor r10,r10 931 932 movdqa XMMWORD[16+rsp],xmm12 933 movdqa XMMWORD[32+rsp],xmm4 934 movdqa XMMWORD[48+rsp],xmm0 935 jmp NEAR $L$oop_tail4x 936 937ALIGN 32 938$L$64_or_more4x: 939 movdqu xmm6,XMMWORD[rsi] 940 movdqu xmm11,XMMWORD[16+rsi] 941 movdqu xmm2,XMMWORD[32+rsi] 942 movdqu xmm7,XMMWORD[48+rsi] 943 pxor xmm6,XMMWORD[rsp] 944 pxor xmm11,xmm12 945 pxor xmm2,xmm4 946 pxor xmm7,xmm0 947 movdqu XMMWORD[rdi],xmm6 948 movdqu XMMWORD[16+rdi],xmm11 949 movdqu XMMWORD[32+rdi],xmm2 950 movdqu XMMWORD[48+rdi],xmm7 951 je NEAR $L$done4x 952 953 movdqa xmm6,XMMWORD[16+rsp] 954 lea rsi,[64+rsi] 955 xor r10,r10 956 movdqa XMMWORD[rsp],xmm6 957 movdqa XMMWORD[16+rsp],xmm13 958 lea rdi,[64+rdi] 959 movdqa XMMWORD[32+rsp],xmm5 960 sub rdx,64 961 movdqa XMMWORD[48+rsp],xmm1 962 jmp NEAR $L$oop_tail4x 963 964ALIGN 32 965$L$128_or_more4x: 966 movdqu xmm6,XMMWORD[rsi] 967 movdqu xmm11,XMMWORD[16+rsi] 968 movdqu xmm2,XMMWORD[32+rsi] 969 movdqu xmm7,XMMWORD[48+rsi] 970 pxor xmm6,XMMWORD[rsp] 971 pxor xmm11,xmm12 972 pxor xmm2,xmm4 973 pxor xmm7,xmm0 974 975 movdqu XMMWORD[rdi],xmm6 976 movdqu xmm6,XMMWORD[64+rsi] 977 movdqu XMMWORD[16+rdi],xmm11 978 movdqu xmm11,XMMWORD[80+rsi] 979 movdqu XMMWORD[32+rdi],xmm2 980 movdqu xmm2,XMMWORD[96+rsi] 981 movdqu XMMWORD[48+rdi],xmm7 982 movdqu xmm7,XMMWORD[112+rsi] 983 pxor xmm6,XMMWORD[16+rsp] 984 pxor xmm11,xmm13 985 pxor xmm2,xmm5 986 pxor xmm7,xmm1 987 movdqu XMMWORD[64+rdi],xmm6 988 movdqu XMMWORD[80+rdi],xmm11 989 movdqu XMMWORD[96+rdi],xmm2 990 movdqu XMMWORD[112+rdi],xmm7 991 je NEAR $L$done4x 992 993 movdqa xmm6,XMMWORD[32+rsp] 994 lea rsi,[128+rsi] 995 xor r10,r10 996 movdqa XMMWORD[rsp],xmm6 997 movdqa XMMWORD[16+rsp],xmm10 998 lea rdi,[128+rdi] 999 movdqa XMMWORD[32+rsp],xmm14 1000 sub rdx,128 1001 movdqa XMMWORD[48+rsp],xmm8 1002 jmp NEAR $L$oop_tail4x 1003 1004ALIGN 32 1005$L$192_or_more4x: 1006 movdqu xmm6,XMMWORD[rsi] 1007 movdqu xmm11,XMMWORD[16+rsi] 1008 movdqu xmm2,XMMWORD[32+rsi] 1009 movdqu xmm7,XMMWORD[48+rsi] 1010 pxor xmm6,XMMWORD[rsp] 1011 pxor xmm11,xmm12 1012 pxor xmm2,xmm4 1013 pxor xmm7,xmm0 1014 1015 movdqu XMMWORD[rdi],xmm6 1016 movdqu xmm6,XMMWORD[64+rsi] 1017 movdqu XMMWORD[16+rdi],xmm11 1018 movdqu xmm11,XMMWORD[80+rsi] 1019 movdqu XMMWORD[32+rdi],xmm2 1020 movdqu xmm2,XMMWORD[96+rsi] 1021 movdqu XMMWORD[48+rdi],xmm7 1022 movdqu xmm7,XMMWORD[112+rsi] 1023 lea rsi,[128+rsi] 1024 pxor xmm6,XMMWORD[16+rsp] 1025 pxor xmm11,xmm13 1026 pxor xmm2,xmm5 1027 pxor xmm7,xmm1 1028 1029 movdqu XMMWORD[64+rdi],xmm6 1030 movdqu xmm6,XMMWORD[rsi] 1031 movdqu XMMWORD[80+rdi],xmm11 1032 movdqu xmm11,XMMWORD[16+rsi] 1033 movdqu XMMWORD[96+rdi],xmm2 1034 movdqu xmm2,XMMWORD[32+rsi] 1035 movdqu XMMWORD[112+rdi],xmm7 1036 lea rdi,[128+rdi] 1037 movdqu xmm7,XMMWORD[48+rsi] 1038 pxor xmm6,XMMWORD[32+rsp] 1039 pxor xmm11,xmm10 1040 pxor xmm2,xmm14 1041 pxor xmm7,xmm8 1042 movdqu XMMWORD[rdi],xmm6 1043 movdqu XMMWORD[16+rdi],xmm11 1044 movdqu XMMWORD[32+rdi],xmm2 1045 movdqu XMMWORD[48+rdi],xmm7 1046 je NEAR $L$done4x 1047 1048 movdqa xmm6,XMMWORD[48+rsp] 1049 lea rsi,[64+rsi] 1050 xor r10,r10 1051 movdqa XMMWORD[rsp],xmm6 1052 movdqa XMMWORD[16+rsp],xmm15 1053 lea rdi,[64+rdi] 1054 movdqa XMMWORD[32+rsp],xmm9 1055 sub rdx,192 1056 movdqa XMMWORD[48+rsp],xmm3 1057 1058$L$oop_tail4x: 1059 movzx eax,BYTE[r10*1+rsi] 1060 movzx ecx,BYTE[r10*1+rsp] 1061 lea r10,[1+r10] 1062 xor eax,ecx 1063 mov BYTE[((-1))+r10*1+rdi],al 1064 dec rdx 1065 jnz NEAR $L$oop_tail4x 1066 1067$L$done4x: 1068 movaps xmm6,XMMWORD[((-168))+r9] 1069 movaps xmm7,XMMWORD[((-152))+r9] 1070 movaps xmm8,XMMWORD[((-136))+r9] 1071 movaps xmm9,XMMWORD[((-120))+r9] 1072 movaps xmm10,XMMWORD[((-104))+r9] 1073 movaps xmm11,XMMWORD[((-88))+r9] 1074 movaps xmm12,XMMWORD[((-72))+r9] 1075 movaps xmm13,XMMWORD[((-56))+r9] 1076 movaps xmm14,XMMWORD[((-40))+r9] 1077 movaps xmm15,XMMWORD[((-24))+r9] 1078 lea rsp,[r9] 1079 1080$L$4x_epilogue: 1081 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1082 mov rsi,QWORD[16+rsp] 1083 DB 0F3h,0C3h ;repret 1084 1085$L$SEH_end_ChaCha20_4x: 1086 1087ALIGN 32 1088ChaCha20_8x: 1089 mov QWORD[8+rsp],rdi ;WIN64 prologue 1090 mov QWORD[16+rsp],rsi 1091 mov rax,rsp 1092$L$SEH_begin_ChaCha20_8x: 1093 mov rdi,rcx 1094 mov rsi,rdx 1095 mov rdx,r8 1096 mov rcx,r9 1097 mov r8,QWORD[40+rsp] 1098 1099 1100$L$ChaCha20_8x: 1101 1102 mov r9,rsp 1103 1104 sub rsp,0x280+168 1105 and rsp,-32 1106 movaps XMMWORD[(-168)+r9],xmm6 1107 movaps XMMWORD[(-152)+r9],xmm7 1108 movaps XMMWORD[(-136)+r9],xmm8 1109 movaps XMMWORD[(-120)+r9],xmm9 1110 movaps XMMWORD[(-104)+r9],xmm10 1111 movaps XMMWORD[(-88)+r9],xmm11 1112 movaps XMMWORD[(-72)+r9],xmm12 1113 movaps XMMWORD[(-56)+r9],xmm13 1114 movaps XMMWORD[(-40)+r9],xmm14 1115 movaps XMMWORD[(-24)+r9],xmm15 1116$L$8x_body: 1117 vzeroupper 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 vbroadcasti128 ymm11,XMMWORD[$L$sigma] 1129 vbroadcasti128 ymm3,XMMWORD[rcx] 1130 vbroadcasti128 ymm15,XMMWORD[16+rcx] 1131 vbroadcasti128 ymm7,XMMWORD[r8] 1132 lea rcx,[256+rsp] 1133 lea rax,[512+rsp] 1134 lea r10,[$L$rot16] 1135 lea r11,[$L$rot24] 1136 1137 vpshufd ymm8,ymm11,0x00 1138 vpshufd ymm9,ymm11,0x55 1139 vmovdqa YMMWORD[(128-256)+rcx],ymm8 1140 vpshufd ymm10,ymm11,0xaa 1141 vmovdqa YMMWORD[(160-256)+rcx],ymm9 1142 vpshufd ymm11,ymm11,0xff 1143 vmovdqa YMMWORD[(192-256)+rcx],ymm10 1144 vmovdqa YMMWORD[(224-256)+rcx],ymm11 1145 1146 vpshufd ymm0,ymm3,0x00 1147 vpshufd ymm1,ymm3,0x55 1148 vmovdqa YMMWORD[(256-256)+rcx],ymm0 1149 vpshufd ymm2,ymm3,0xaa 1150 vmovdqa YMMWORD[(288-256)+rcx],ymm1 1151 vpshufd ymm3,ymm3,0xff 1152 vmovdqa YMMWORD[(320-256)+rcx],ymm2 1153 vmovdqa YMMWORD[(352-256)+rcx],ymm3 1154 1155 vpshufd ymm12,ymm15,0x00 1156 vpshufd ymm13,ymm15,0x55 1157 vmovdqa YMMWORD[(384-512)+rax],ymm12 1158 vpshufd ymm14,ymm15,0xaa 1159 vmovdqa YMMWORD[(416-512)+rax],ymm13 1160 vpshufd ymm15,ymm15,0xff 1161 vmovdqa YMMWORD[(448-512)+rax],ymm14 1162 vmovdqa YMMWORD[(480-512)+rax],ymm15 1163 1164 vpshufd ymm4,ymm7,0x00 1165 vpshufd ymm5,ymm7,0x55 1166 vpaddd ymm4,ymm4,YMMWORD[$L$incy] 1167 vpshufd ymm6,ymm7,0xaa 1168 vmovdqa YMMWORD[(544-512)+rax],ymm5 1169 vpshufd ymm7,ymm7,0xff 1170 vmovdqa YMMWORD[(576-512)+rax],ymm6 1171 vmovdqa YMMWORD[(608-512)+rax],ymm7 1172 1173 jmp NEAR $L$oop_enter8x 1174 1175ALIGN 32 1176$L$oop_outer8x: 1177 vmovdqa ymm8,YMMWORD[((128-256))+rcx] 1178 vmovdqa ymm9,YMMWORD[((160-256))+rcx] 1179 vmovdqa ymm10,YMMWORD[((192-256))+rcx] 1180 vmovdqa ymm11,YMMWORD[((224-256))+rcx] 1181 vmovdqa ymm0,YMMWORD[((256-256))+rcx] 1182 vmovdqa ymm1,YMMWORD[((288-256))+rcx] 1183 vmovdqa ymm2,YMMWORD[((320-256))+rcx] 1184 vmovdqa ymm3,YMMWORD[((352-256))+rcx] 1185 vmovdqa ymm12,YMMWORD[((384-512))+rax] 1186 vmovdqa ymm13,YMMWORD[((416-512))+rax] 1187 vmovdqa ymm14,YMMWORD[((448-512))+rax] 1188 vmovdqa ymm15,YMMWORD[((480-512))+rax] 1189 vmovdqa ymm4,YMMWORD[((512-512))+rax] 1190 vmovdqa ymm5,YMMWORD[((544-512))+rax] 1191 vmovdqa ymm6,YMMWORD[((576-512))+rax] 1192 vmovdqa ymm7,YMMWORD[((608-512))+rax] 1193 vpaddd ymm4,ymm4,YMMWORD[$L$eight] 1194 1195$L$oop_enter8x: 1196 vmovdqa YMMWORD[64+rsp],ymm14 1197 vmovdqa YMMWORD[96+rsp],ymm15 1198 vbroadcasti128 ymm15,XMMWORD[r10] 1199 vmovdqa YMMWORD[(512-512)+rax],ymm4 1200 mov eax,10 1201 jmp NEAR $L$oop8x 1202 1203ALIGN 32 1204$L$oop8x: 1205 vpaddd ymm8,ymm8,ymm0 1206 vpxor ymm4,ymm8,ymm4 1207 vpshufb ymm4,ymm4,ymm15 1208 vpaddd ymm9,ymm9,ymm1 1209 vpxor ymm5,ymm9,ymm5 1210 vpshufb ymm5,ymm5,ymm15 1211 vpaddd ymm12,ymm12,ymm4 1212 vpxor ymm0,ymm12,ymm0 1213 vpslld ymm14,ymm0,12 1214 vpsrld ymm0,ymm0,20 1215 vpor ymm0,ymm14,ymm0 1216 vbroadcasti128 ymm14,XMMWORD[r11] 1217 vpaddd ymm13,ymm13,ymm5 1218 vpxor ymm1,ymm13,ymm1 1219 vpslld ymm15,ymm1,12 1220 vpsrld ymm1,ymm1,20 1221 vpor ymm1,ymm15,ymm1 1222 vpaddd ymm8,ymm8,ymm0 1223 vpxor ymm4,ymm8,ymm4 1224 vpshufb ymm4,ymm4,ymm14 1225 vpaddd ymm9,ymm9,ymm1 1226 vpxor ymm5,ymm9,ymm5 1227 vpshufb ymm5,ymm5,ymm14 1228 vpaddd ymm12,ymm12,ymm4 1229 vpxor ymm0,ymm12,ymm0 1230 vpslld ymm15,ymm0,7 1231 vpsrld ymm0,ymm0,25 1232 vpor ymm0,ymm15,ymm0 1233 vbroadcasti128 ymm15,XMMWORD[r10] 1234 vpaddd ymm13,ymm13,ymm5 1235 vpxor ymm1,ymm13,ymm1 1236 vpslld ymm14,ymm1,7 1237 vpsrld ymm1,ymm1,25 1238 vpor ymm1,ymm14,ymm1 1239 vmovdqa YMMWORD[rsp],ymm12 1240 vmovdqa YMMWORD[32+rsp],ymm13 1241 vmovdqa ymm12,YMMWORD[64+rsp] 1242 vmovdqa ymm13,YMMWORD[96+rsp] 1243 vpaddd ymm10,ymm10,ymm2 1244 vpxor ymm6,ymm10,ymm6 1245 vpshufb ymm6,ymm6,ymm15 1246 vpaddd ymm11,ymm11,ymm3 1247 vpxor ymm7,ymm11,ymm7 1248 vpshufb ymm7,ymm7,ymm15 1249 vpaddd ymm12,ymm12,ymm6 1250 vpxor ymm2,ymm12,ymm2 1251 vpslld ymm14,ymm2,12 1252 vpsrld ymm2,ymm2,20 1253 vpor ymm2,ymm14,ymm2 1254 vbroadcasti128 ymm14,XMMWORD[r11] 1255 vpaddd ymm13,ymm13,ymm7 1256 vpxor ymm3,ymm13,ymm3 1257 vpslld ymm15,ymm3,12 1258 vpsrld ymm3,ymm3,20 1259 vpor ymm3,ymm15,ymm3 1260 vpaddd ymm10,ymm10,ymm2 1261 vpxor ymm6,ymm10,ymm6 1262 vpshufb ymm6,ymm6,ymm14 1263 vpaddd ymm11,ymm11,ymm3 1264 vpxor ymm7,ymm11,ymm7 1265 vpshufb ymm7,ymm7,ymm14 1266 vpaddd ymm12,ymm12,ymm6 1267 vpxor ymm2,ymm12,ymm2 1268 vpslld ymm15,ymm2,7 1269 vpsrld ymm2,ymm2,25 1270 vpor ymm2,ymm15,ymm2 1271 vbroadcasti128 ymm15,XMMWORD[r10] 1272 vpaddd ymm13,ymm13,ymm7 1273 vpxor ymm3,ymm13,ymm3 1274 vpslld ymm14,ymm3,7 1275 vpsrld ymm3,ymm3,25 1276 vpor ymm3,ymm14,ymm3 1277 vpaddd ymm8,ymm8,ymm1 1278 vpxor ymm7,ymm8,ymm7 1279 vpshufb ymm7,ymm7,ymm15 1280 vpaddd ymm9,ymm9,ymm2 1281 vpxor ymm4,ymm9,ymm4 1282 vpshufb ymm4,ymm4,ymm15 1283 vpaddd ymm12,ymm12,ymm7 1284 vpxor ymm1,ymm12,ymm1 1285 vpslld ymm14,ymm1,12 1286 vpsrld ymm1,ymm1,20 1287 vpor ymm1,ymm14,ymm1 1288 vbroadcasti128 ymm14,XMMWORD[r11] 1289 vpaddd ymm13,ymm13,ymm4 1290 vpxor ymm2,ymm13,ymm2 1291 vpslld ymm15,ymm2,12 1292 vpsrld ymm2,ymm2,20 1293 vpor ymm2,ymm15,ymm2 1294 vpaddd ymm8,ymm8,ymm1 1295 vpxor ymm7,ymm8,ymm7 1296 vpshufb ymm7,ymm7,ymm14 1297 vpaddd ymm9,ymm9,ymm2 1298 vpxor ymm4,ymm9,ymm4 1299 vpshufb ymm4,ymm4,ymm14 1300 vpaddd ymm12,ymm12,ymm7 1301 vpxor ymm1,ymm12,ymm1 1302 vpslld ymm15,ymm1,7 1303 vpsrld ymm1,ymm1,25 1304 vpor ymm1,ymm15,ymm1 1305 vbroadcasti128 ymm15,XMMWORD[r10] 1306 vpaddd ymm13,ymm13,ymm4 1307 vpxor ymm2,ymm13,ymm2 1308 vpslld ymm14,ymm2,7 1309 vpsrld ymm2,ymm2,25 1310 vpor ymm2,ymm14,ymm2 1311 vmovdqa YMMWORD[64+rsp],ymm12 1312 vmovdqa YMMWORD[96+rsp],ymm13 1313 vmovdqa ymm12,YMMWORD[rsp] 1314 vmovdqa ymm13,YMMWORD[32+rsp] 1315 vpaddd ymm10,ymm10,ymm3 1316 vpxor ymm5,ymm10,ymm5 1317 vpshufb ymm5,ymm5,ymm15 1318 vpaddd ymm11,ymm11,ymm0 1319 vpxor ymm6,ymm11,ymm6 1320 vpshufb ymm6,ymm6,ymm15 1321 vpaddd ymm12,ymm12,ymm5 1322 vpxor ymm3,ymm12,ymm3 1323 vpslld ymm14,ymm3,12 1324 vpsrld ymm3,ymm3,20 1325 vpor ymm3,ymm14,ymm3 1326 vbroadcasti128 ymm14,XMMWORD[r11] 1327 vpaddd ymm13,ymm13,ymm6 1328 vpxor ymm0,ymm13,ymm0 1329 vpslld ymm15,ymm0,12 1330 vpsrld ymm0,ymm0,20 1331 vpor ymm0,ymm15,ymm0 1332 vpaddd ymm10,ymm10,ymm3 1333 vpxor ymm5,ymm10,ymm5 1334 vpshufb ymm5,ymm5,ymm14 1335 vpaddd ymm11,ymm11,ymm0 1336 vpxor ymm6,ymm11,ymm6 1337 vpshufb ymm6,ymm6,ymm14 1338 vpaddd ymm12,ymm12,ymm5 1339 vpxor ymm3,ymm12,ymm3 1340 vpslld ymm15,ymm3,7 1341 vpsrld ymm3,ymm3,25 1342 vpor ymm3,ymm15,ymm3 1343 vbroadcasti128 ymm15,XMMWORD[r10] 1344 vpaddd ymm13,ymm13,ymm6 1345 vpxor ymm0,ymm13,ymm0 1346 vpslld ymm14,ymm0,7 1347 vpsrld ymm0,ymm0,25 1348 vpor ymm0,ymm14,ymm0 1349 dec eax 1350 jnz NEAR $L$oop8x 1351 1352 lea rax,[512+rsp] 1353 vpaddd ymm8,ymm8,YMMWORD[((128-256))+rcx] 1354 vpaddd ymm9,ymm9,YMMWORD[((160-256))+rcx] 1355 vpaddd ymm10,ymm10,YMMWORD[((192-256))+rcx] 1356 vpaddd ymm11,ymm11,YMMWORD[((224-256))+rcx] 1357 1358 vpunpckldq ymm14,ymm8,ymm9 1359 vpunpckldq ymm15,ymm10,ymm11 1360 vpunpckhdq ymm8,ymm8,ymm9 1361 vpunpckhdq ymm10,ymm10,ymm11 1362 vpunpcklqdq ymm9,ymm14,ymm15 1363 vpunpckhqdq ymm14,ymm14,ymm15 1364 vpunpcklqdq ymm11,ymm8,ymm10 1365 vpunpckhqdq ymm8,ymm8,ymm10 1366 vpaddd ymm0,ymm0,YMMWORD[((256-256))+rcx] 1367 vpaddd ymm1,ymm1,YMMWORD[((288-256))+rcx] 1368 vpaddd ymm2,ymm2,YMMWORD[((320-256))+rcx] 1369 vpaddd ymm3,ymm3,YMMWORD[((352-256))+rcx] 1370 1371 vpunpckldq ymm10,ymm0,ymm1 1372 vpunpckldq ymm15,ymm2,ymm3 1373 vpunpckhdq ymm0,ymm0,ymm1 1374 vpunpckhdq ymm2,ymm2,ymm3 1375 vpunpcklqdq ymm1,ymm10,ymm15 1376 vpunpckhqdq ymm10,ymm10,ymm15 1377 vpunpcklqdq ymm3,ymm0,ymm2 1378 vpunpckhqdq ymm0,ymm0,ymm2 1379 vperm2i128 ymm15,ymm9,ymm1,0x20 1380 vperm2i128 ymm1,ymm9,ymm1,0x31 1381 vperm2i128 ymm9,ymm14,ymm10,0x20 1382 vperm2i128 ymm10,ymm14,ymm10,0x31 1383 vperm2i128 ymm14,ymm11,ymm3,0x20 1384 vperm2i128 ymm3,ymm11,ymm3,0x31 1385 vperm2i128 ymm11,ymm8,ymm0,0x20 1386 vperm2i128 ymm0,ymm8,ymm0,0x31 1387 vmovdqa YMMWORD[rsp],ymm15 1388 vmovdqa YMMWORD[32+rsp],ymm9 1389 vmovdqa ymm15,YMMWORD[64+rsp] 1390 vmovdqa ymm9,YMMWORD[96+rsp] 1391 1392 vpaddd ymm12,ymm12,YMMWORD[((384-512))+rax] 1393 vpaddd ymm13,ymm13,YMMWORD[((416-512))+rax] 1394 vpaddd ymm15,ymm15,YMMWORD[((448-512))+rax] 1395 vpaddd ymm9,ymm9,YMMWORD[((480-512))+rax] 1396 1397 vpunpckldq ymm2,ymm12,ymm13 1398 vpunpckldq ymm8,ymm15,ymm9 1399 vpunpckhdq ymm12,ymm12,ymm13 1400 vpunpckhdq ymm15,ymm15,ymm9 1401 vpunpcklqdq ymm13,ymm2,ymm8 1402 vpunpckhqdq ymm2,ymm2,ymm8 1403 vpunpcklqdq ymm9,ymm12,ymm15 1404 vpunpckhqdq ymm12,ymm12,ymm15 1405 vpaddd ymm4,ymm4,YMMWORD[((512-512))+rax] 1406 vpaddd ymm5,ymm5,YMMWORD[((544-512))+rax] 1407 vpaddd ymm6,ymm6,YMMWORD[((576-512))+rax] 1408 vpaddd ymm7,ymm7,YMMWORD[((608-512))+rax] 1409 1410 vpunpckldq ymm15,ymm4,ymm5 1411 vpunpckldq ymm8,ymm6,ymm7 1412 vpunpckhdq ymm4,ymm4,ymm5 1413 vpunpckhdq ymm6,ymm6,ymm7 1414 vpunpcklqdq ymm5,ymm15,ymm8 1415 vpunpckhqdq ymm15,ymm15,ymm8 1416 vpunpcklqdq ymm7,ymm4,ymm6 1417 vpunpckhqdq ymm4,ymm4,ymm6 1418 vperm2i128 ymm8,ymm13,ymm5,0x20 1419 vperm2i128 ymm5,ymm13,ymm5,0x31 1420 vperm2i128 ymm13,ymm2,ymm15,0x20 1421 vperm2i128 ymm15,ymm2,ymm15,0x31 1422 vperm2i128 ymm2,ymm9,ymm7,0x20 1423 vperm2i128 ymm7,ymm9,ymm7,0x31 1424 vperm2i128 ymm9,ymm12,ymm4,0x20 1425 vperm2i128 ymm4,ymm12,ymm4,0x31 1426 vmovdqa ymm6,YMMWORD[rsp] 1427 vmovdqa ymm12,YMMWORD[32+rsp] 1428 1429 cmp rdx,64*8 1430 jb NEAR $L$tail8x 1431 1432 vpxor ymm6,ymm6,YMMWORD[rsi] 1433 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1434 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1435 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1436 lea rsi,[128+rsi] 1437 vmovdqu YMMWORD[rdi],ymm6 1438 vmovdqu YMMWORD[32+rdi],ymm8 1439 vmovdqu YMMWORD[64+rdi],ymm1 1440 vmovdqu YMMWORD[96+rdi],ymm5 1441 lea rdi,[128+rdi] 1442 1443 vpxor ymm12,ymm12,YMMWORD[rsi] 1444 vpxor ymm13,ymm13,YMMWORD[32+rsi] 1445 vpxor ymm10,ymm10,YMMWORD[64+rsi] 1446 vpxor ymm15,ymm15,YMMWORD[96+rsi] 1447 lea rsi,[128+rsi] 1448 vmovdqu YMMWORD[rdi],ymm12 1449 vmovdqu YMMWORD[32+rdi],ymm13 1450 vmovdqu YMMWORD[64+rdi],ymm10 1451 vmovdqu YMMWORD[96+rdi],ymm15 1452 lea rdi,[128+rdi] 1453 1454 vpxor ymm14,ymm14,YMMWORD[rsi] 1455 vpxor ymm2,ymm2,YMMWORD[32+rsi] 1456 vpxor ymm3,ymm3,YMMWORD[64+rsi] 1457 vpxor ymm7,ymm7,YMMWORD[96+rsi] 1458 lea rsi,[128+rsi] 1459 vmovdqu YMMWORD[rdi],ymm14 1460 vmovdqu YMMWORD[32+rdi],ymm2 1461 vmovdqu YMMWORD[64+rdi],ymm3 1462 vmovdqu YMMWORD[96+rdi],ymm7 1463 lea rdi,[128+rdi] 1464 1465 vpxor ymm11,ymm11,YMMWORD[rsi] 1466 vpxor ymm9,ymm9,YMMWORD[32+rsi] 1467 vpxor ymm0,ymm0,YMMWORD[64+rsi] 1468 vpxor ymm4,ymm4,YMMWORD[96+rsi] 1469 lea rsi,[128+rsi] 1470 vmovdqu YMMWORD[rdi],ymm11 1471 vmovdqu YMMWORD[32+rdi],ymm9 1472 vmovdqu YMMWORD[64+rdi],ymm0 1473 vmovdqu YMMWORD[96+rdi],ymm4 1474 lea rdi,[128+rdi] 1475 1476 sub rdx,64*8 1477 jnz NEAR $L$oop_outer8x 1478 1479 jmp NEAR $L$done8x 1480 1481$L$tail8x: 1482 cmp rdx,448 1483 jae NEAR $L$448_or_more8x 1484 cmp rdx,384 1485 jae NEAR $L$384_or_more8x 1486 cmp rdx,320 1487 jae NEAR $L$320_or_more8x 1488 cmp rdx,256 1489 jae NEAR $L$256_or_more8x 1490 cmp rdx,192 1491 jae NEAR $L$192_or_more8x 1492 cmp rdx,128 1493 jae NEAR $L$128_or_more8x 1494 cmp rdx,64 1495 jae NEAR $L$64_or_more8x 1496 1497 xor r10,r10 1498 vmovdqa YMMWORD[rsp],ymm6 1499 vmovdqa YMMWORD[32+rsp],ymm8 1500 jmp NEAR $L$oop_tail8x 1501 1502ALIGN 32 1503$L$64_or_more8x: 1504 vpxor ymm6,ymm6,YMMWORD[rsi] 1505 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1506 vmovdqu YMMWORD[rdi],ymm6 1507 vmovdqu YMMWORD[32+rdi],ymm8 1508 je NEAR $L$done8x 1509 1510 lea rsi,[64+rsi] 1511 xor r10,r10 1512 vmovdqa YMMWORD[rsp],ymm1 1513 lea rdi,[64+rdi] 1514 sub rdx,64 1515 vmovdqa YMMWORD[32+rsp],ymm5 1516 jmp NEAR $L$oop_tail8x 1517 1518ALIGN 32 1519$L$128_or_more8x: 1520 vpxor ymm6,ymm6,YMMWORD[rsi] 1521 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1522 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1523 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1524 vmovdqu YMMWORD[rdi],ymm6 1525 vmovdqu YMMWORD[32+rdi],ymm8 1526 vmovdqu YMMWORD[64+rdi],ymm1 1527 vmovdqu YMMWORD[96+rdi],ymm5 1528 je NEAR $L$done8x 1529 1530 lea rsi,[128+rsi] 1531 xor r10,r10 1532 vmovdqa YMMWORD[rsp],ymm12 1533 lea rdi,[128+rdi] 1534 sub rdx,128 1535 vmovdqa YMMWORD[32+rsp],ymm13 1536 jmp NEAR $L$oop_tail8x 1537 1538ALIGN 32 1539$L$192_or_more8x: 1540 vpxor ymm6,ymm6,YMMWORD[rsi] 1541 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1542 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1543 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1544 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1545 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1546 vmovdqu YMMWORD[rdi],ymm6 1547 vmovdqu YMMWORD[32+rdi],ymm8 1548 vmovdqu YMMWORD[64+rdi],ymm1 1549 vmovdqu YMMWORD[96+rdi],ymm5 1550 vmovdqu YMMWORD[128+rdi],ymm12 1551 vmovdqu YMMWORD[160+rdi],ymm13 1552 je NEAR $L$done8x 1553 1554 lea rsi,[192+rsi] 1555 xor r10,r10 1556 vmovdqa YMMWORD[rsp],ymm10 1557 lea rdi,[192+rdi] 1558 sub rdx,192 1559 vmovdqa YMMWORD[32+rsp],ymm15 1560 jmp NEAR $L$oop_tail8x 1561 1562ALIGN 32 1563$L$256_or_more8x: 1564 vpxor ymm6,ymm6,YMMWORD[rsi] 1565 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1566 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1567 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1568 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1569 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1570 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1571 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1572 vmovdqu YMMWORD[rdi],ymm6 1573 vmovdqu YMMWORD[32+rdi],ymm8 1574 vmovdqu YMMWORD[64+rdi],ymm1 1575 vmovdqu YMMWORD[96+rdi],ymm5 1576 vmovdqu YMMWORD[128+rdi],ymm12 1577 vmovdqu YMMWORD[160+rdi],ymm13 1578 vmovdqu YMMWORD[192+rdi],ymm10 1579 vmovdqu YMMWORD[224+rdi],ymm15 1580 je NEAR $L$done8x 1581 1582 lea rsi,[256+rsi] 1583 xor r10,r10 1584 vmovdqa YMMWORD[rsp],ymm14 1585 lea rdi,[256+rdi] 1586 sub rdx,256 1587 vmovdqa YMMWORD[32+rsp],ymm2 1588 jmp NEAR $L$oop_tail8x 1589 1590ALIGN 32 1591$L$320_or_more8x: 1592 vpxor ymm6,ymm6,YMMWORD[rsi] 1593 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1594 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1595 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1596 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1597 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1598 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1599 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1600 vpxor ymm14,ymm14,YMMWORD[256+rsi] 1601 vpxor ymm2,ymm2,YMMWORD[288+rsi] 1602 vmovdqu YMMWORD[rdi],ymm6 1603 vmovdqu YMMWORD[32+rdi],ymm8 1604 vmovdqu YMMWORD[64+rdi],ymm1 1605 vmovdqu YMMWORD[96+rdi],ymm5 1606 vmovdqu YMMWORD[128+rdi],ymm12 1607 vmovdqu YMMWORD[160+rdi],ymm13 1608 vmovdqu YMMWORD[192+rdi],ymm10 1609 vmovdqu YMMWORD[224+rdi],ymm15 1610 vmovdqu YMMWORD[256+rdi],ymm14 1611 vmovdqu YMMWORD[288+rdi],ymm2 1612 je NEAR $L$done8x 1613 1614 lea rsi,[320+rsi] 1615 xor r10,r10 1616 vmovdqa YMMWORD[rsp],ymm3 1617 lea rdi,[320+rdi] 1618 sub rdx,320 1619 vmovdqa YMMWORD[32+rsp],ymm7 1620 jmp NEAR $L$oop_tail8x 1621 1622ALIGN 32 1623$L$384_or_more8x: 1624 vpxor ymm6,ymm6,YMMWORD[rsi] 1625 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1626 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1627 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1628 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1629 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1630 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1631 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1632 vpxor ymm14,ymm14,YMMWORD[256+rsi] 1633 vpxor ymm2,ymm2,YMMWORD[288+rsi] 1634 vpxor ymm3,ymm3,YMMWORD[320+rsi] 1635 vpxor ymm7,ymm7,YMMWORD[352+rsi] 1636 vmovdqu YMMWORD[rdi],ymm6 1637 vmovdqu YMMWORD[32+rdi],ymm8 1638 vmovdqu YMMWORD[64+rdi],ymm1 1639 vmovdqu YMMWORD[96+rdi],ymm5 1640 vmovdqu YMMWORD[128+rdi],ymm12 1641 vmovdqu YMMWORD[160+rdi],ymm13 1642 vmovdqu YMMWORD[192+rdi],ymm10 1643 vmovdqu YMMWORD[224+rdi],ymm15 1644 vmovdqu YMMWORD[256+rdi],ymm14 1645 vmovdqu YMMWORD[288+rdi],ymm2 1646 vmovdqu YMMWORD[320+rdi],ymm3 1647 vmovdqu YMMWORD[352+rdi],ymm7 1648 je NEAR $L$done8x 1649 1650 lea rsi,[384+rsi] 1651 xor r10,r10 1652 vmovdqa YMMWORD[rsp],ymm11 1653 lea rdi,[384+rdi] 1654 sub rdx,384 1655 vmovdqa YMMWORD[32+rsp],ymm9 1656 jmp NEAR $L$oop_tail8x 1657 1658ALIGN 32 1659$L$448_or_more8x: 1660 vpxor ymm6,ymm6,YMMWORD[rsi] 1661 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1662 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1663 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1664 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1665 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1666 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1667 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1668 vpxor ymm14,ymm14,YMMWORD[256+rsi] 1669 vpxor ymm2,ymm2,YMMWORD[288+rsi] 1670 vpxor ymm3,ymm3,YMMWORD[320+rsi] 1671 vpxor ymm7,ymm7,YMMWORD[352+rsi] 1672 vpxor ymm11,ymm11,YMMWORD[384+rsi] 1673 vpxor ymm9,ymm9,YMMWORD[416+rsi] 1674 vmovdqu YMMWORD[rdi],ymm6 1675 vmovdqu YMMWORD[32+rdi],ymm8 1676 vmovdqu YMMWORD[64+rdi],ymm1 1677 vmovdqu YMMWORD[96+rdi],ymm5 1678 vmovdqu YMMWORD[128+rdi],ymm12 1679 vmovdqu YMMWORD[160+rdi],ymm13 1680 vmovdqu YMMWORD[192+rdi],ymm10 1681 vmovdqu YMMWORD[224+rdi],ymm15 1682 vmovdqu YMMWORD[256+rdi],ymm14 1683 vmovdqu YMMWORD[288+rdi],ymm2 1684 vmovdqu YMMWORD[320+rdi],ymm3 1685 vmovdqu YMMWORD[352+rdi],ymm7 1686 vmovdqu YMMWORD[384+rdi],ymm11 1687 vmovdqu YMMWORD[416+rdi],ymm9 1688 je NEAR $L$done8x 1689 1690 lea rsi,[448+rsi] 1691 xor r10,r10 1692 vmovdqa YMMWORD[rsp],ymm0 1693 lea rdi,[448+rdi] 1694 sub rdx,448 1695 vmovdqa YMMWORD[32+rsp],ymm4 1696 1697$L$oop_tail8x: 1698 movzx eax,BYTE[r10*1+rsi] 1699 movzx ecx,BYTE[r10*1+rsp] 1700 lea r10,[1+r10] 1701 xor eax,ecx 1702 mov BYTE[((-1))+r10*1+rdi],al 1703 dec rdx 1704 jnz NEAR $L$oop_tail8x 1705 1706$L$done8x: 1707 vzeroall 1708 movaps xmm6,XMMWORD[((-168))+r9] 1709 movaps xmm7,XMMWORD[((-152))+r9] 1710 movaps xmm8,XMMWORD[((-136))+r9] 1711 movaps xmm9,XMMWORD[((-120))+r9] 1712 movaps xmm10,XMMWORD[((-104))+r9] 1713 movaps xmm11,XMMWORD[((-88))+r9] 1714 movaps xmm12,XMMWORD[((-72))+r9] 1715 movaps xmm13,XMMWORD[((-56))+r9] 1716 movaps xmm14,XMMWORD[((-40))+r9] 1717 movaps xmm15,XMMWORD[((-24))+r9] 1718 lea rsp,[r9] 1719 1720$L$8x_epilogue: 1721 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1722 mov rsi,QWORD[16+rsp] 1723 DB 0F3h,0C3h ;repret 1724 1725$L$SEH_end_ChaCha20_8x: 1726EXTERN __imp_RtlVirtualUnwind 1727 1728ALIGN 16 1729se_handler: 1730 push rsi 1731 push rdi 1732 push rbx 1733 push rbp 1734 push r12 1735 push r13 1736 push r14 1737 push r15 1738 pushfq 1739 sub rsp,64 1740 1741 mov rax,QWORD[120+r8] 1742 mov rbx,QWORD[248+r8] 1743 1744 mov rsi,QWORD[8+r9] 1745 mov r11,QWORD[56+r9] 1746 1747 lea r10,[$L$ctr32_body] 1748 cmp rbx,r10 1749 jb NEAR $L$common_seh_tail 1750 1751 mov rax,QWORD[152+r8] 1752 1753 lea r10,[$L$no_data] 1754 cmp rbx,r10 1755 jae NEAR $L$common_seh_tail 1756 1757 lea rax,[((64+24+48))+rax] 1758 1759 mov rbx,QWORD[((-8))+rax] 1760 mov rbp,QWORD[((-16))+rax] 1761 mov r12,QWORD[((-24))+rax] 1762 mov r13,QWORD[((-32))+rax] 1763 mov r14,QWORD[((-40))+rax] 1764 mov r15,QWORD[((-48))+rax] 1765 mov QWORD[144+r8],rbx 1766 mov QWORD[160+r8],rbp 1767 mov QWORD[216+r8],r12 1768 mov QWORD[224+r8],r13 1769 mov QWORD[232+r8],r14 1770 mov QWORD[240+r8],r15 1771 1772$L$common_seh_tail: 1773 mov rdi,QWORD[8+rax] 1774 mov rsi,QWORD[16+rax] 1775 mov QWORD[152+r8],rax 1776 mov QWORD[168+r8],rsi 1777 mov QWORD[176+r8],rdi 1778 1779 mov rdi,QWORD[40+r9] 1780 mov rsi,r8 1781 mov ecx,154 1782 DD 0xa548f3fc 1783 1784 mov rsi,r9 1785 xor rcx,rcx 1786 mov rdx,QWORD[8+rsi] 1787 mov r8,QWORD[rsi] 1788 mov r9,QWORD[16+rsi] 1789 mov r10,QWORD[40+rsi] 1790 lea r11,[56+rsi] 1791 lea r12,[24+rsi] 1792 mov QWORD[32+rsp],r10 1793 mov QWORD[40+rsp],r11 1794 mov QWORD[48+rsp],r12 1795 mov QWORD[56+rsp],rcx 1796 call QWORD[__imp_RtlVirtualUnwind] 1797 1798 mov eax,1 1799 add rsp,64 1800 popfq 1801 pop r15 1802 pop r14 1803 pop r13 1804 pop r12 1805 pop rbp 1806 pop rbx 1807 pop rdi 1808 pop rsi 1809 DB 0F3h,0C3h ;repret 1810 1811 1812 1813ALIGN 16 1814ssse3_handler: 1815 push rsi 1816 push rdi 1817 push rbx 1818 push rbp 1819 push r12 1820 push r13 1821 push r14 1822 push r15 1823 pushfq 1824 sub rsp,64 1825 1826 mov rax,QWORD[120+r8] 1827 mov rbx,QWORD[248+r8] 1828 1829 mov rsi,QWORD[8+r9] 1830 mov r11,QWORD[56+r9] 1831 1832 mov r10d,DWORD[r11] 1833 lea r10,[r10*1+rsi] 1834 cmp rbx,r10 1835 jb NEAR $L$common_seh_tail 1836 1837 mov rax,QWORD[192+r8] 1838 1839 mov r10d,DWORD[4+r11] 1840 lea r10,[r10*1+rsi] 1841 cmp rbx,r10 1842 jae NEAR $L$common_seh_tail 1843 1844 lea rsi,[((-40))+rax] 1845 lea rdi,[512+r8] 1846 mov ecx,4 1847 DD 0xa548f3fc 1848 1849 jmp NEAR $L$common_seh_tail 1850 1851 1852 1853ALIGN 16 1854full_handler: 1855 push rsi 1856 push rdi 1857 push rbx 1858 push rbp 1859 push r12 1860 push r13 1861 push r14 1862 push r15 1863 pushfq 1864 sub rsp,64 1865 1866 mov rax,QWORD[120+r8] 1867 mov rbx,QWORD[248+r8] 1868 1869 mov rsi,QWORD[8+r9] 1870 mov r11,QWORD[56+r9] 1871 1872 mov r10d,DWORD[r11] 1873 lea r10,[r10*1+rsi] 1874 cmp rbx,r10 1875 jb NEAR $L$common_seh_tail 1876 1877 mov rax,QWORD[192+r8] 1878 1879 mov r10d,DWORD[4+r11] 1880 lea r10,[r10*1+rsi] 1881 cmp rbx,r10 1882 jae NEAR $L$common_seh_tail 1883 1884 lea rsi,[((-168))+rax] 1885 lea rdi,[512+r8] 1886 mov ecx,20 1887 DD 0xa548f3fc 1888 1889 jmp NEAR $L$common_seh_tail 1890 1891 1892section .pdata rdata align=4 1893ALIGN 4 1894 DD $L$SEH_begin_ChaCha20_ctr32 wrt ..imagebase 1895 DD $L$SEH_end_ChaCha20_ctr32 wrt ..imagebase 1896 DD $L$SEH_info_ChaCha20_ctr32 wrt ..imagebase 1897 1898 DD $L$SEH_begin_ChaCha20_ssse3 wrt ..imagebase 1899 DD $L$SEH_end_ChaCha20_ssse3 wrt ..imagebase 1900 DD $L$SEH_info_ChaCha20_ssse3 wrt ..imagebase 1901 1902 DD $L$SEH_begin_ChaCha20_4x wrt ..imagebase 1903 DD $L$SEH_end_ChaCha20_4x wrt ..imagebase 1904 DD $L$SEH_info_ChaCha20_4x wrt ..imagebase 1905 DD $L$SEH_begin_ChaCha20_8x wrt ..imagebase 1906 DD $L$SEH_end_ChaCha20_8x wrt ..imagebase 1907 DD $L$SEH_info_ChaCha20_8x wrt ..imagebase 1908section .xdata rdata align=8 1909ALIGN 8 1910$L$SEH_info_ChaCha20_ctr32: 1911DB 9,0,0,0 1912 DD se_handler wrt ..imagebase 1913 1914$L$SEH_info_ChaCha20_ssse3: 1915DB 9,0,0,0 1916 DD ssse3_handler wrt ..imagebase 1917 DD $L$ssse3_body wrt ..imagebase,$L$ssse3_epilogue wrt ..imagebase 1918 1919$L$SEH_info_ChaCha20_4x: 1920DB 9,0,0,0 1921 DD full_handler wrt ..imagebase 1922 DD $L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase 1923$L$SEH_info_ChaCha20_8x: 1924DB 9,0,0,0 1925 DD full_handler wrt ..imagebase 1926 DD $L$8x_body wrt ..imagebase,$L$8x_epilogue wrt ..imagebase 1927