1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8 9%include "ring_core_generated/prefix_symbols_nasm.inc" 10section .text code align=64 11 12 13EXTERN OPENSSL_ia32cap_P 14 15ALIGN 64 16$L$zero: 17 DD 0,0,0,0 18$L$one: 19 DD 1,0,0,0 20$L$inc: 21 DD 0,1,2,3 22$L$four: 23 DD 4,4,4,4 24$L$incy: 25 DD 0,2,4,6,1,3,5,7 26$L$eight: 27 DD 8,8,8,8,8,8,8,8 28$L$rot16: 29DB 0x2,0x3,0x0,0x1,0x6,0x7,0x4,0x5,0xa,0xb,0x8,0x9,0xe,0xf,0xc,0xd 30$L$rot24: 31DB 0x3,0x0,0x1,0x2,0x7,0x4,0x5,0x6,0xb,0x8,0x9,0xa,0xf,0xc,0xd,0xe 32$L$sigma: 33DB 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107 34DB 0 35ALIGN 64 36$L$zeroz: 37 DD 0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0 38$L$fourz: 39 DD 4,0,0,0,4,0,0,0,4,0,0,0,4,0,0,0 40$L$incz: 41 DD 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 42$L$sixteen: 43 DD 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 44DB 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 45DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32 46DB 98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115 47DB 108,46,111,114,103,62,0 48global ChaCha20_ctr32 49 50ALIGN 64 51ChaCha20_ctr32: 52 mov QWORD[8+rsp],rdi ;WIN64 prologue 53 mov QWORD[16+rsp],rsi 54 mov rax,rsp 55$L$SEH_begin_ChaCha20_ctr32: 56 mov rdi,rcx 57 mov rsi,rdx 58 mov rdx,r8 59 mov rcx,r9 60 mov r8,QWORD[40+rsp] 61 62 63 64 cmp rdx,0 65 je NEAR $L$no_data 66 mov r10,QWORD[((OPENSSL_ia32cap_P+4))] 67 test r10d,512 68 jnz NEAR $L$ChaCha20_ssse3 69 70 push rbx 71 72 push rbp 73 74 push r12 75 76 push r13 77 78 push r14 79 80 push r15 81 82 sub rsp,64+24 83 84$L$ctr32_body: 85 86 87 movdqu xmm1,XMMWORD[rcx] 88 movdqu xmm2,XMMWORD[16+rcx] 89 movdqu xmm3,XMMWORD[r8] 90 movdqa xmm4,XMMWORD[$L$one] 91 92 93 movdqa XMMWORD[16+rsp],xmm1 94 movdqa XMMWORD[32+rsp],xmm2 95 movdqa XMMWORD[48+rsp],xmm3 96 mov rbp,rdx 97 jmp NEAR $L$oop_outer 98 99ALIGN 32 100$L$oop_outer: 101 mov eax,0x61707865 102 mov ebx,0x3320646e 103 mov ecx,0x79622d32 104 mov edx,0x6b206574 105 mov r8d,DWORD[16+rsp] 106 mov r9d,DWORD[20+rsp] 107 mov r10d,DWORD[24+rsp] 108 mov r11d,DWORD[28+rsp] 109 movd r12d,xmm3 110 mov r13d,DWORD[52+rsp] 111 mov r14d,DWORD[56+rsp] 112 mov r15d,DWORD[60+rsp] 113 114 mov QWORD[((64+0))+rsp],rbp 115 mov ebp,10 116 mov QWORD[((64+8))+rsp],rsi 117DB 102,72,15,126,214 118 mov QWORD[((64+16))+rsp],rdi 119 mov rdi,rsi 120 shr rdi,32 121 jmp NEAR $L$oop 122 123ALIGN 32 124$L$oop: 125 add eax,r8d 126 xor r12d,eax 127 rol r12d,16 128 add ebx,r9d 129 xor r13d,ebx 130 rol r13d,16 131 add esi,r12d 132 xor r8d,esi 133 rol r8d,12 134 add edi,r13d 135 xor r9d,edi 136 rol r9d,12 137 add eax,r8d 138 xor r12d,eax 139 rol r12d,8 140 add ebx,r9d 141 xor r13d,ebx 142 rol r13d,8 143 add esi,r12d 144 xor r8d,esi 145 rol r8d,7 146 add edi,r13d 147 xor r9d,edi 148 rol r9d,7 149 mov DWORD[32+rsp],esi 150 mov DWORD[36+rsp],edi 151 mov esi,DWORD[40+rsp] 152 mov edi,DWORD[44+rsp] 153 add ecx,r10d 154 xor r14d,ecx 155 rol r14d,16 156 add edx,r11d 157 xor r15d,edx 158 rol r15d,16 159 add esi,r14d 160 xor r10d,esi 161 rol r10d,12 162 add edi,r15d 163 xor r11d,edi 164 rol r11d,12 165 add ecx,r10d 166 xor r14d,ecx 167 rol r14d,8 168 add edx,r11d 169 xor r15d,edx 170 rol r15d,8 171 add esi,r14d 172 xor r10d,esi 173 rol r10d,7 174 add edi,r15d 175 xor r11d,edi 176 rol r11d,7 177 add eax,r9d 178 xor r15d,eax 179 rol r15d,16 180 add ebx,r10d 181 xor r12d,ebx 182 rol r12d,16 183 add esi,r15d 184 xor r9d,esi 185 rol r9d,12 186 add edi,r12d 187 xor r10d,edi 188 rol r10d,12 189 add eax,r9d 190 xor r15d,eax 191 rol r15d,8 192 add ebx,r10d 193 xor r12d,ebx 194 rol r12d,8 195 add esi,r15d 196 xor r9d,esi 197 rol r9d,7 198 add edi,r12d 199 xor r10d,edi 200 rol r10d,7 201 mov DWORD[40+rsp],esi 202 mov DWORD[44+rsp],edi 203 mov esi,DWORD[32+rsp] 204 mov edi,DWORD[36+rsp] 205 add ecx,r11d 206 xor r13d,ecx 207 rol r13d,16 208 add edx,r8d 209 xor r14d,edx 210 rol r14d,16 211 add esi,r13d 212 xor r11d,esi 213 rol r11d,12 214 add edi,r14d 215 xor r8d,edi 216 rol r8d,12 217 add ecx,r11d 218 xor r13d,ecx 219 rol r13d,8 220 add edx,r8d 221 xor r14d,edx 222 rol r14d,8 223 add esi,r13d 224 xor r11d,esi 225 rol r11d,7 226 add edi,r14d 227 xor r8d,edi 228 rol r8d,7 229 dec ebp 230 jnz NEAR $L$oop 231 mov DWORD[36+rsp],edi 232 mov DWORD[32+rsp],esi 233 mov rbp,QWORD[64+rsp] 234 movdqa xmm1,xmm2 235 mov rsi,QWORD[((64+8))+rsp] 236 paddd xmm3,xmm4 237 mov rdi,QWORD[((64+16))+rsp] 238 239 add eax,0x61707865 240 add ebx,0x3320646e 241 add ecx,0x79622d32 242 add edx,0x6b206574 243 add r8d,DWORD[16+rsp] 244 add r9d,DWORD[20+rsp] 245 add r10d,DWORD[24+rsp] 246 add r11d,DWORD[28+rsp] 247 add r12d,DWORD[48+rsp] 248 add r13d,DWORD[52+rsp] 249 add r14d,DWORD[56+rsp] 250 add r15d,DWORD[60+rsp] 251 paddd xmm1,XMMWORD[32+rsp] 252 253 cmp rbp,64 254 jb NEAR $L$tail 255 256 xor eax,DWORD[rsi] 257 xor ebx,DWORD[4+rsi] 258 xor ecx,DWORD[8+rsi] 259 xor edx,DWORD[12+rsi] 260 xor r8d,DWORD[16+rsi] 261 xor r9d,DWORD[20+rsi] 262 xor r10d,DWORD[24+rsi] 263 xor r11d,DWORD[28+rsi] 264 movdqu xmm0,XMMWORD[32+rsi] 265 xor r12d,DWORD[48+rsi] 266 xor r13d,DWORD[52+rsi] 267 xor r14d,DWORD[56+rsi] 268 xor r15d,DWORD[60+rsi] 269 lea rsi,[64+rsi] 270 pxor xmm0,xmm1 271 272 movdqa XMMWORD[32+rsp],xmm2 273 movd DWORD[48+rsp],xmm3 274 275 mov DWORD[rdi],eax 276 mov DWORD[4+rdi],ebx 277 mov DWORD[8+rdi],ecx 278 mov DWORD[12+rdi],edx 279 mov DWORD[16+rdi],r8d 280 mov DWORD[20+rdi],r9d 281 mov DWORD[24+rdi],r10d 282 mov DWORD[28+rdi],r11d 283 movdqu XMMWORD[32+rdi],xmm0 284 mov DWORD[48+rdi],r12d 285 mov DWORD[52+rdi],r13d 286 mov DWORD[56+rdi],r14d 287 mov DWORD[60+rdi],r15d 288 lea rdi,[64+rdi] 289 290 sub rbp,64 291 jnz NEAR $L$oop_outer 292 293 jmp NEAR $L$done 294 295ALIGN 16 296$L$tail: 297 mov DWORD[rsp],eax 298 mov DWORD[4+rsp],ebx 299 xor rbx,rbx 300 mov DWORD[8+rsp],ecx 301 mov DWORD[12+rsp],edx 302 mov DWORD[16+rsp],r8d 303 mov DWORD[20+rsp],r9d 304 mov DWORD[24+rsp],r10d 305 mov DWORD[28+rsp],r11d 306 movdqa XMMWORD[32+rsp],xmm1 307 mov DWORD[48+rsp],r12d 308 mov DWORD[52+rsp],r13d 309 mov DWORD[56+rsp],r14d 310 mov DWORD[60+rsp],r15d 311 312$L$oop_tail: 313 movzx eax,BYTE[rbx*1+rsi] 314 movzx edx,BYTE[rbx*1+rsp] 315 lea rbx,[1+rbx] 316 xor eax,edx 317 mov BYTE[((-1))+rbx*1+rdi],al 318 dec rbp 319 jnz NEAR $L$oop_tail 320 321$L$done: 322 lea rsi,[((64+24+48))+rsp] 323 mov r15,QWORD[((-48))+rsi] 324 325 mov r14,QWORD[((-40))+rsi] 326 327 mov r13,QWORD[((-32))+rsi] 328 329 mov r12,QWORD[((-24))+rsi] 330 331 mov rbp,QWORD[((-16))+rsi] 332 333 mov rbx,QWORD[((-8))+rsi] 334 335 lea rsp,[rsi] 336 337$L$no_data: 338 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 339 mov rsi,QWORD[16+rsp] 340 DB 0F3h,0C3h ;repret 341 342$L$SEH_end_ChaCha20_ctr32: 343 344ALIGN 32 345ChaCha20_ssse3: 346 mov QWORD[8+rsp],rdi ;WIN64 prologue 347 mov QWORD[16+rsp],rsi 348 mov rax,rsp 349$L$SEH_begin_ChaCha20_ssse3: 350 mov rdi,rcx 351 mov rsi,rdx 352 mov rdx,r8 353 mov rcx,r9 354 mov r8,QWORD[40+rsp] 355 356 357$L$ChaCha20_ssse3: 358 359 mov r9,rsp 360 361 cmp rdx,128 362 ja NEAR $L$ChaCha20_4x 363 364$L$do_sse3_after_all: 365 sub rsp,64+40 366 movaps XMMWORD[(-40)+r9],xmm6 367 movaps XMMWORD[(-24)+r9],xmm7 368$L$ssse3_body: 369 movdqa xmm0,XMMWORD[$L$sigma] 370 movdqu xmm1,XMMWORD[rcx] 371 movdqu xmm2,XMMWORD[16+rcx] 372 movdqu xmm3,XMMWORD[r8] 373 movdqa xmm6,XMMWORD[$L$rot16] 374 movdqa xmm7,XMMWORD[$L$rot24] 375 376 movdqa XMMWORD[rsp],xmm0 377 movdqa XMMWORD[16+rsp],xmm1 378 movdqa XMMWORD[32+rsp],xmm2 379 movdqa XMMWORD[48+rsp],xmm3 380 mov r8,10 381 jmp NEAR $L$oop_ssse3 382 383ALIGN 32 384$L$oop_outer_ssse3: 385 movdqa xmm3,XMMWORD[$L$one] 386 movdqa xmm0,XMMWORD[rsp] 387 movdqa xmm1,XMMWORD[16+rsp] 388 movdqa xmm2,XMMWORD[32+rsp] 389 paddd xmm3,XMMWORD[48+rsp] 390 mov r8,10 391 movdqa XMMWORD[48+rsp],xmm3 392 jmp NEAR $L$oop_ssse3 393 394ALIGN 32 395$L$oop_ssse3: 396 paddd xmm0,xmm1 397 pxor xmm3,xmm0 398DB 102,15,56,0,222 399 paddd xmm2,xmm3 400 pxor xmm1,xmm2 401 movdqa xmm4,xmm1 402 psrld xmm1,20 403 pslld xmm4,12 404 por xmm1,xmm4 405 paddd xmm0,xmm1 406 pxor xmm3,xmm0 407DB 102,15,56,0,223 408 paddd xmm2,xmm3 409 pxor xmm1,xmm2 410 movdqa xmm4,xmm1 411 psrld xmm1,25 412 pslld xmm4,7 413 por xmm1,xmm4 414 pshufd xmm2,xmm2,78 415 pshufd xmm1,xmm1,57 416 pshufd xmm3,xmm3,147 417 nop 418 paddd xmm0,xmm1 419 pxor xmm3,xmm0 420DB 102,15,56,0,222 421 paddd xmm2,xmm3 422 pxor xmm1,xmm2 423 movdqa xmm4,xmm1 424 psrld xmm1,20 425 pslld xmm4,12 426 por xmm1,xmm4 427 paddd xmm0,xmm1 428 pxor xmm3,xmm0 429DB 102,15,56,0,223 430 paddd xmm2,xmm3 431 pxor xmm1,xmm2 432 movdqa xmm4,xmm1 433 psrld xmm1,25 434 pslld xmm4,7 435 por xmm1,xmm4 436 pshufd xmm2,xmm2,78 437 pshufd xmm1,xmm1,147 438 pshufd xmm3,xmm3,57 439 dec r8 440 jnz NEAR $L$oop_ssse3 441 paddd xmm0,XMMWORD[rsp] 442 paddd xmm1,XMMWORD[16+rsp] 443 paddd xmm2,XMMWORD[32+rsp] 444 paddd xmm3,XMMWORD[48+rsp] 445 446 cmp rdx,64 447 jb NEAR $L$tail_ssse3 448 449 movdqu xmm4,XMMWORD[rsi] 450 movdqu xmm5,XMMWORD[16+rsi] 451 pxor xmm0,xmm4 452 movdqu xmm4,XMMWORD[32+rsi] 453 pxor xmm1,xmm5 454 movdqu xmm5,XMMWORD[48+rsi] 455 lea rsi,[64+rsi] 456 pxor xmm2,xmm4 457 pxor xmm3,xmm5 458 459 movdqu XMMWORD[rdi],xmm0 460 movdqu XMMWORD[16+rdi],xmm1 461 movdqu XMMWORD[32+rdi],xmm2 462 movdqu XMMWORD[48+rdi],xmm3 463 lea rdi,[64+rdi] 464 465 sub rdx,64 466 jnz NEAR $L$oop_outer_ssse3 467 468 jmp NEAR $L$done_ssse3 469 470ALIGN 16 471$L$tail_ssse3: 472 movdqa XMMWORD[rsp],xmm0 473 movdqa XMMWORD[16+rsp],xmm1 474 movdqa XMMWORD[32+rsp],xmm2 475 movdqa XMMWORD[48+rsp],xmm3 476 xor r8,r8 477 478$L$oop_tail_ssse3: 479 movzx eax,BYTE[r8*1+rsi] 480 movzx ecx,BYTE[r8*1+rsp] 481 lea r8,[1+r8] 482 xor eax,ecx 483 mov BYTE[((-1))+r8*1+rdi],al 484 dec rdx 485 jnz NEAR $L$oop_tail_ssse3 486 487$L$done_ssse3: 488 movaps xmm6,XMMWORD[((-40))+r9] 489 movaps xmm7,XMMWORD[((-24))+r9] 490 lea rsp,[r9] 491 492$L$ssse3_epilogue: 493 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 494 mov rsi,QWORD[16+rsp] 495 DB 0F3h,0C3h ;repret 496 497$L$SEH_end_ChaCha20_ssse3: 498 499ALIGN 32 500ChaCha20_4x: 501 mov QWORD[8+rsp],rdi ;WIN64 prologue 502 mov QWORD[16+rsp],rsi 503 mov rax,rsp 504$L$SEH_begin_ChaCha20_4x: 505 mov rdi,rcx 506 mov rsi,rdx 507 mov rdx,r8 508 mov rcx,r9 509 mov r8,QWORD[40+rsp] 510 511 512$L$ChaCha20_4x: 513 514 mov r9,rsp 515 516 mov r11,r10 517 shr r10,32 518 test r10,32 519 jnz NEAR $L$ChaCha20_8x 520 cmp rdx,192 521 ja NEAR $L$proceed4x 522 523 and r11,71303168 524 cmp r11,4194304 525 je NEAR $L$do_sse3_after_all 526 527$L$proceed4x: 528 sub rsp,0x140+168 529 movaps XMMWORD[(-168)+r9],xmm6 530 movaps XMMWORD[(-152)+r9],xmm7 531 movaps XMMWORD[(-136)+r9],xmm8 532 movaps XMMWORD[(-120)+r9],xmm9 533 movaps XMMWORD[(-104)+r9],xmm10 534 movaps XMMWORD[(-88)+r9],xmm11 535 movaps XMMWORD[(-72)+r9],xmm12 536 movaps XMMWORD[(-56)+r9],xmm13 537 movaps XMMWORD[(-40)+r9],xmm14 538 movaps XMMWORD[(-24)+r9],xmm15 539$L$4x_body: 540 movdqa xmm11,XMMWORD[$L$sigma] 541 movdqu xmm15,XMMWORD[rcx] 542 movdqu xmm7,XMMWORD[16+rcx] 543 movdqu xmm3,XMMWORD[r8] 544 lea rcx,[256+rsp] 545 lea r10,[$L$rot16] 546 lea r11,[$L$rot24] 547 548 pshufd xmm8,xmm11,0x00 549 pshufd xmm9,xmm11,0x55 550 movdqa XMMWORD[64+rsp],xmm8 551 pshufd xmm10,xmm11,0xaa 552 movdqa XMMWORD[80+rsp],xmm9 553 pshufd xmm11,xmm11,0xff 554 movdqa XMMWORD[96+rsp],xmm10 555 movdqa XMMWORD[112+rsp],xmm11 556 557 pshufd xmm12,xmm15,0x00 558 pshufd xmm13,xmm15,0x55 559 movdqa XMMWORD[(128-256)+rcx],xmm12 560 pshufd xmm14,xmm15,0xaa 561 movdqa XMMWORD[(144-256)+rcx],xmm13 562 pshufd xmm15,xmm15,0xff 563 movdqa XMMWORD[(160-256)+rcx],xmm14 564 movdqa XMMWORD[(176-256)+rcx],xmm15 565 566 pshufd xmm4,xmm7,0x00 567 pshufd xmm5,xmm7,0x55 568 movdqa XMMWORD[(192-256)+rcx],xmm4 569 pshufd xmm6,xmm7,0xaa 570 movdqa XMMWORD[(208-256)+rcx],xmm5 571 pshufd xmm7,xmm7,0xff 572 movdqa XMMWORD[(224-256)+rcx],xmm6 573 movdqa XMMWORD[(240-256)+rcx],xmm7 574 575 pshufd xmm0,xmm3,0x00 576 pshufd xmm1,xmm3,0x55 577 paddd xmm0,XMMWORD[$L$inc] 578 pshufd xmm2,xmm3,0xaa 579 movdqa XMMWORD[(272-256)+rcx],xmm1 580 pshufd xmm3,xmm3,0xff 581 movdqa XMMWORD[(288-256)+rcx],xmm2 582 movdqa XMMWORD[(304-256)+rcx],xmm3 583 584 jmp NEAR $L$oop_enter4x 585 586ALIGN 32 587$L$oop_outer4x: 588 movdqa xmm8,XMMWORD[64+rsp] 589 movdqa xmm9,XMMWORD[80+rsp] 590 movdqa xmm10,XMMWORD[96+rsp] 591 movdqa xmm11,XMMWORD[112+rsp] 592 movdqa xmm12,XMMWORD[((128-256))+rcx] 593 movdqa xmm13,XMMWORD[((144-256))+rcx] 594 movdqa xmm14,XMMWORD[((160-256))+rcx] 595 movdqa xmm15,XMMWORD[((176-256))+rcx] 596 movdqa xmm4,XMMWORD[((192-256))+rcx] 597 movdqa xmm5,XMMWORD[((208-256))+rcx] 598 movdqa xmm6,XMMWORD[((224-256))+rcx] 599 movdqa xmm7,XMMWORD[((240-256))+rcx] 600 movdqa xmm0,XMMWORD[((256-256))+rcx] 601 movdqa xmm1,XMMWORD[((272-256))+rcx] 602 movdqa xmm2,XMMWORD[((288-256))+rcx] 603 movdqa xmm3,XMMWORD[((304-256))+rcx] 604 paddd xmm0,XMMWORD[$L$four] 605 606$L$oop_enter4x: 607 movdqa XMMWORD[32+rsp],xmm6 608 movdqa XMMWORD[48+rsp],xmm7 609 movdqa xmm7,XMMWORD[r10] 610 mov eax,10 611 movdqa XMMWORD[(256-256)+rcx],xmm0 612 jmp NEAR $L$oop4x 613 614ALIGN 32 615$L$oop4x: 616 paddd xmm8,xmm12 617 paddd xmm9,xmm13 618 pxor xmm0,xmm8 619 pxor xmm1,xmm9 620DB 102,15,56,0,199 621DB 102,15,56,0,207 622 paddd xmm4,xmm0 623 paddd xmm5,xmm1 624 pxor xmm12,xmm4 625 pxor xmm13,xmm5 626 movdqa xmm6,xmm12 627 pslld xmm12,12 628 psrld xmm6,20 629 movdqa xmm7,xmm13 630 pslld xmm13,12 631 por xmm12,xmm6 632 psrld xmm7,20 633 movdqa xmm6,XMMWORD[r11] 634 por xmm13,xmm7 635 paddd xmm8,xmm12 636 paddd xmm9,xmm13 637 pxor xmm0,xmm8 638 pxor xmm1,xmm9 639DB 102,15,56,0,198 640DB 102,15,56,0,206 641 paddd xmm4,xmm0 642 paddd xmm5,xmm1 643 pxor xmm12,xmm4 644 pxor xmm13,xmm5 645 movdqa xmm7,xmm12 646 pslld xmm12,7 647 psrld xmm7,25 648 movdqa xmm6,xmm13 649 pslld xmm13,7 650 por xmm12,xmm7 651 psrld xmm6,25 652 movdqa xmm7,XMMWORD[r10] 653 por xmm13,xmm6 654 movdqa XMMWORD[rsp],xmm4 655 movdqa XMMWORD[16+rsp],xmm5 656 movdqa xmm4,XMMWORD[32+rsp] 657 movdqa xmm5,XMMWORD[48+rsp] 658 paddd xmm10,xmm14 659 paddd xmm11,xmm15 660 pxor xmm2,xmm10 661 pxor xmm3,xmm11 662DB 102,15,56,0,215 663DB 102,15,56,0,223 664 paddd xmm4,xmm2 665 paddd xmm5,xmm3 666 pxor xmm14,xmm4 667 pxor xmm15,xmm5 668 movdqa xmm6,xmm14 669 pslld xmm14,12 670 psrld xmm6,20 671 movdqa xmm7,xmm15 672 pslld xmm15,12 673 por xmm14,xmm6 674 psrld xmm7,20 675 movdqa xmm6,XMMWORD[r11] 676 por xmm15,xmm7 677 paddd xmm10,xmm14 678 paddd xmm11,xmm15 679 pxor xmm2,xmm10 680 pxor xmm3,xmm11 681DB 102,15,56,0,214 682DB 102,15,56,0,222 683 paddd xmm4,xmm2 684 paddd xmm5,xmm3 685 pxor xmm14,xmm4 686 pxor xmm15,xmm5 687 movdqa xmm7,xmm14 688 pslld xmm14,7 689 psrld xmm7,25 690 movdqa xmm6,xmm15 691 pslld xmm15,7 692 por xmm14,xmm7 693 psrld xmm6,25 694 movdqa xmm7,XMMWORD[r10] 695 por xmm15,xmm6 696 paddd xmm8,xmm13 697 paddd xmm9,xmm14 698 pxor xmm3,xmm8 699 pxor xmm0,xmm9 700DB 102,15,56,0,223 701DB 102,15,56,0,199 702 paddd xmm4,xmm3 703 paddd xmm5,xmm0 704 pxor xmm13,xmm4 705 pxor xmm14,xmm5 706 movdqa xmm6,xmm13 707 pslld xmm13,12 708 psrld xmm6,20 709 movdqa xmm7,xmm14 710 pslld xmm14,12 711 por xmm13,xmm6 712 psrld xmm7,20 713 movdqa xmm6,XMMWORD[r11] 714 por xmm14,xmm7 715 paddd xmm8,xmm13 716 paddd xmm9,xmm14 717 pxor xmm3,xmm8 718 pxor xmm0,xmm9 719DB 102,15,56,0,222 720DB 102,15,56,0,198 721 paddd xmm4,xmm3 722 paddd xmm5,xmm0 723 pxor xmm13,xmm4 724 pxor xmm14,xmm5 725 movdqa xmm7,xmm13 726 pslld xmm13,7 727 psrld xmm7,25 728 movdqa xmm6,xmm14 729 pslld xmm14,7 730 por xmm13,xmm7 731 psrld xmm6,25 732 movdqa xmm7,XMMWORD[r10] 733 por xmm14,xmm6 734 movdqa XMMWORD[32+rsp],xmm4 735 movdqa XMMWORD[48+rsp],xmm5 736 movdqa xmm4,XMMWORD[rsp] 737 movdqa xmm5,XMMWORD[16+rsp] 738 paddd xmm10,xmm15 739 paddd xmm11,xmm12 740 pxor xmm1,xmm10 741 pxor xmm2,xmm11 742DB 102,15,56,0,207 743DB 102,15,56,0,215 744 paddd xmm4,xmm1 745 paddd xmm5,xmm2 746 pxor xmm15,xmm4 747 pxor xmm12,xmm5 748 movdqa xmm6,xmm15 749 pslld xmm15,12 750 psrld xmm6,20 751 movdqa xmm7,xmm12 752 pslld xmm12,12 753 por xmm15,xmm6 754 psrld xmm7,20 755 movdqa xmm6,XMMWORD[r11] 756 por xmm12,xmm7 757 paddd xmm10,xmm15 758 paddd xmm11,xmm12 759 pxor xmm1,xmm10 760 pxor xmm2,xmm11 761DB 102,15,56,0,206 762DB 102,15,56,0,214 763 paddd xmm4,xmm1 764 paddd xmm5,xmm2 765 pxor xmm15,xmm4 766 pxor xmm12,xmm5 767 movdqa xmm7,xmm15 768 pslld xmm15,7 769 psrld xmm7,25 770 movdqa xmm6,xmm12 771 pslld xmm12,7 772 por xmm15,xmm7 773 psrld xmm6,25 774 movdqa xmm7,XMMWORD[r10] 775 por xmm12,xmm6 776 dec eax 777 jnz NEAR $L$oop4x 778 779 paddd xmm8,XMMWORD[64+rsp] 780 paddd xmm9,XMMWORD[80+rsp] 781 paddd xmm10,XMMWORD[96+rsp] 782 paddd xmm11,XMMWORD[112+rsp] 783 784 movdqa xmm6,xmm8 785 punpckldq xmm8,xmm9 786 movdqa xmm7,xmm10 787 punpckldq xmm10,xmm11 788 punpckhdq xmm6,xmm9 789 punpckhdq xmm7,xmm11 790 movdqa xmm9,xmm8 791 punpcklqdq xmm8,xmm10 792 movdqa xmm11,xmm6 793 punpcklqdq xmm6,xmm7 794 punpckhqdq xmm9,xmm10 795 punpckhqdq xmm11,xmm7 796 paddd xmm12,XMMWORD[((128-256))+rcx] 797 paddd xmm13,XMMWORD[((144-256))+rcx] 798 paddd xmm14,XMMWORD[((160-256))+rcx] 799 paddd xmm15,XMMWORD[((176-256))+rcx] 800 801 movdqa XMMWORD[rsp],xmm8 802 movdqa XMMWORD[16+rsp],xmm9 803 movdqa xmm8,XMMWORD[32+rsp] 804 movdqa xmm9,XMMWORD[48+rsp] 805 806 movdqa xmm10,xmm12 807 punpckldq xmm12,xmm13 808 movdqa xmm7,xmm14 809 punpckldq xmm14,xmm15 810 punpckhdq xmm10,xmm13 811 punpckhdq xmm7,xmm15 812 movdqa xmm13,xmm12 813 punpcklqdq xmm12,xmm14 814 movdqa xmm15,xmm10 815 punpcklqdq xmm10,xmm7 816 punpckhqdq xmm13,xmm14 817 punpckhqdq xmm15,xmm7 818 paddd xmm4,XMMWORD[((192-256))+rcx] 819 paddd xmm5,XMMWORD[((208-256))+rcx] 820 paddd xmm8,XMMWORD[((224-256))+rcx] 821 paddd xmm9,XMMWORD[((240-256))+rcx] 822 823 movdqa XMMWORD[32+rsp],xmm6 824 movdqa XMMWORD[48+rsp],xmm11 825 826 movdqa xmm14,xmm4 827 punpckldq xmm4,xmm5 828 movdqa xmm7,xmm8 829 punpckldq xmm8,xmm9 830 punpckhdq xmm14,xmm5 831 punpckhdq xmm7,xmm9 832 movdqa xmm5,xmm4 833 punpcklqdq xmm4,xmm8 834 movdqa xmm9,xmm14 835 punpcklqdq xmm14,xmm7 836 punpckhqdq xmm5,xmm8 837 punpckhqdq xmm9,xmm7 838 paddd xmm0,XMMWORD[((256-256))+rcx] 839 paddd xmm1,XMMWORD[((272-256))+rcx] 840 paddd xmm2,XMMWORD[((288-256))+rcx] 841 paddd xmm3,XMMWORD[((304-256))+rcx] 842 843 movdqa xmm8,xmm0 844 punpckldq xmm0,xmm1 845 movdqa xmm7,xmm2 846 punpckldq xmm2,xmm3 847 punpckhdq xmm8,xmm1 848 punpckhdq xmm7,xmm3 849 movdqa xmm1,xmm0 850 punpcklqdq xmm0,xmm2 851 movdqa xmm3,xmm8 852 punpcklqdq xmm8,xmm7 853 punpckhqdq xmm1,xmm2 854 punpckhqdq xmm3,xmm7 855 cmp rdx,64*4 856 jb NEAR $L$tail4x 857 858 movdqu xmm6,XMMWORD[rsi] 859 movdqu xmm11,XMMWORD[16+rsi] 860 movdqu xmm2,XMMWORD[32+rsi] 861 movdqu xmm7,XMMWORD[48+rsi] 862 pxor xmm6,XMMWORD[rsp] 863 pxor xmm11,xmm12 864 pxor xmm2,xmm4 865 pxor xmm7,xmm0 866 867 movdqu XMMWORD[rdi],xmm6 868 movdqu xmm6,XMMWORD[64+rsi] 869 movdqu XMMWORD[16+rdi],xmm11 870 movdqu xmm11,XMMWORD[80+rsi] 871 movdqu XMMWORD[32+rdi],xmm2 872 movdqu xmm2,XMMWORD[96+rsi] 873 movdqu XMMWORD[48+rdi],xmm7 874 movdqu xmm7,XMMWORD[112+rsi] 875 lea rsi,[128+rsi] 876 pxor xmm6,XMMWORD[16+rsp] 877 pxor xmm11,xmm13 878 pxor xmm2,xmm5 879 pxor xmm7,xmm1 880 881 movdqu XMMWORD[64+rdi],xmm6 882 movdqu xmm6,XMMWORD[rsi] 883 movdqu XMMWORD[80+rdi],xmm11 884 movdqu xmm11,XMMWORD[16+rsi] 885 movdqu XMMWORD[96+rdi],xmm2 886 movdqu xmm2,XMMWORD[32+rsi] 887 movdqu XMMWORD[112+rdi],xmm7 888 lea rdi,[128+rdi] 889 movdqu xmm7,XMMWORD[48+rsi] 890 pxor xmm6,XMMWORD[32+rsp] 891 pxor xmm11,xmm10 892 pxor xmm2,xmm14 893 pxor xmm7,xmm8 894 895 movdqu XMMWORD[rdi],xmm6 896 movdqu xmm6,XMMWORD[64+rsi] 897 movdqu XMMWORD[16+rdi],xmm11 898 movdqu xmm11,XMMWORD[80+rsi] 899 movdqu XMMWORD[32+rdi],xmm2 900 movdqu xmm2,XMMWORD[96+rsi] 901 movdqu XMMWORD[48+rdi],xmm7 902 movdqu xmm7,XMMWORD[112+rsi] 903 lea rsi,[128+rsi] 904 pxor xmm6,XMMWORD[48+rsp] 905 pxor xmm11,xmm15 906 pxor xmm2,xmm9 907 pxor xmm7,xmm3 908 movdqu XMMWORD[64+rdi],xmm6 909 movdqu XMMWORD[80+rdi],xmm11 910 movdqu XMMWORD[96+rdi],xmm2 911 movdqu XMMWORD[112+rdi],xmm7 912 lea rdi,[128+rdi] 913 914 sub rdx,64*4 915 jnz NEAR $L$oop_outer4x 916 917 jmp NEAR $L$done4x 918 919$L$tail4x: 920 cmp rdx,192 921 jae NEAR $L$192_or_more4x 922 cmp rdx,128 923 jae NEAR $L$128_or_more4x 924 cmp rdx,64 925 jae NEAR $L$64_or_more4x 926 927 928 xor r10,r10 929 930 movdqa XMMWORD[16+rsp],xmm12 931 movdqa XMMWORD[32+rsp],xmm4 932 movdqa XMMWORD[48+rsp],xmm0 933 jmp NEAR $L$oop_tail4x 934 935ALIGN 32 936$L$64_or_more4x: 937 movdqu xmm6,XMMWORD[rsi] 938 movdqu xmm11,XMMWORD[16+rsi] 939 movdqu xmm2,XMMWORD[32+rsi] 940 movdqu xmm7,XMMWORD[48+rsi] 941 pxor xmm6,XMMWORD[rsp] 942 pxor xmm11,xmm12 943 pxor xmm2,xmm4 944 pxor xmm7,xmm0 945 movdqu XMMWORD[rdi],xmm6 946 movdqu XMMWORD[16+rdi],xmm11 947 movdqu XMMWORD[32+rdi],xmm2 948 movdqu XMMWORD[48+rdi],xmm7 949 je NEAR $L$done4x 950 951 movdqa xmm6,XMMWORD[16+rsp] 952 lea rsi,[64+rsi] 953 xor r10,r10 954 movdqa XMMWORD[rsp],xmm6 955 movdqa XMMWORD[16+rsp],xmm13 956 lea rdi,[64+rdi] 957 movdqa XMMWORD[32+rsp],xmm5 958 sub rdx,64 959 movdqa XMMWORD[48+rsp],xmm1 960 jmp NEAR $L$oop_tail4x 961 962ALIGN 32 963$L$128_or_more4x: 964 movdqu xmm6,XMMWORD[rsi] 965 movdqu xmm11,XMMWORD[16+rsi] 966 movdqu xmm2,XMMWORD[32+rsi] 967 movdqu xmm7,XMMWORD[48+rsi] 968 pxor xmm6,XMMWORD[rsp] 969 pxor xmm11,xmm12 970 pxor xmm2,xmm4 971 pxor xmm7,xmm0 972 973 movdqu XMMWORD[rdi],xmm6 974 movdqu xmm6,XMMWORD[64+rsi] 975 movdqu XMMWORD[16+rdi],xmm11 976 movdqu xmm11,XMMWORD[80+rsi] 977 movdqu XMMWORD[32+rdi],xmm2 978 movdqu xmm2,XMMWORD[96+rsi] 979 movdqu XMMWORD[48+rdi],xmm7 980 movdqu xmm7,XMMWORD[112+rsi] 981 pxor xmm6,XMMWORD[16+rsp] 982 pxor xmm11,xmm13 983 pxor xmm2,xmm5 984 pxor xmm7,xmm1 985 movdqu XMMWORD[64+rdi],xmm6 986 movdqu XMMWORD[80+rdi],xmm11 987 movdqu XMMWORD[96+rdi],xmm2 988 movdqu XMMWORD[112+rdi],xmm7 989 je NEAR $L$done4x 990 991 movdqa xmm6,XMMWORD[32+rsp] 992 lea rsi,[128+rsi] 993 xor r10,r10 994 movdqa XMMWORD[rsp],xmm6 995 movdqa XMMWORD[16+rsp],xmm10 996 lea rdi,[128+rdi] 997 movdqa XMMWORD[32+rsp],xmm14 998 sub rdx,128 999 movdqa XMMWORD[48+rsp],xmm8 1000 jmp NEAR $L$oop_tail4x 1001 1002ALIGN 32 1003$L$192_or_more4x: 1004 movdqu xmm6,XMMWORD[rsi] 1005 movdqu xmm11,XMMWORD[16+rsi] 1006 movdqu xmm2,XMMWORD[32+rsi] 1007 movdqu xmm7,XMMWORD[48+rsi] 1008 pxor xmm6,XMMWORD[rsp] 1009 pxor xmm11,xmm12 1010 pxor xmm2,xmm4 1011 pxor xmm7,xmm0 1012 1013 movdqu XMMWORD[rdi],xmm6 1014 movdqu xmm6,XMMWORD[64+rsi] 1015 movdqu XMMWORD[16+rdi],xmm11 1016 movdqu xmm11,XMMWORD[80+rsi] 1017 movdqu XMMWORD[32+rdi],xmm2 1018 movdqu xmm2,XMMWORD[96+rsi] 1019 movdqu XMMWORD[48+rdi],xmm7 1020 movdqu xmm7,XMMWORD[112+rsi] 1021 lea rsi,[128+rsi] 1022 pxor xmm6,XMMWORD[16+rsp] 1023 pxor xmm11,xmm13 1024 pxor xmm2,xmm5 1025 pxor xmm7,xmm1 1026 1027 movdqu XMMWORD[64+rdi],xmm6 1028 movdqu xmm6,XMMWORD[rsi] 1029 movdqu XMMWORD[80+rdi],xmm11 1030 movdqu xmm11,XMMWORD[16+rsi] 1031 movdqu XMMWORD[96+rdi],xmm2 1032 movdqu xmm2,XMMWORD[32+rsi] 1033 movdqu XMMWORD[112+rdi],xmm7 1034 lea rdi,[128+rdi] 1035 movdqu xmm7,XMMWORD[48+rsi] 1036 pxor xmm6,XMMWORD[32+rsp] 1037 pxor xmm11,xmm10 1038 pxor xmm2,xmm14 1039 pxor xmm7,xmm8 1040 movdqu XMMWORD[rdi],xmm6 1041 movdqu XMMWORD[16+rdi],xmm11 1042 movdqu XMMWORD[32+rdi],xmm2 1043 movdqu XMMWORD[48+rdi],xmm7 1044 je NEAR $L$done4x 1045 1046 movdqa xmm6,XMMWORD[48+rsp] 1047 lea rsi,[64+rsi] 1048 xor r10,r10 1049 movdqa XMMWORD[rsp],xmm6 1050 movdqa XMMWORD[16+rsp],xmm15 1051 lea rdi,[64+rdi] 1052 movdqa XMMWORD[32+rsp],xmm9 1053 sub rdx,192 1054 movdqa XMMWORD[48+rsp],xmm3 1055 1056$L$oop_tail4x: 1057 movzx eax,BYTE[r10*1+rsi] 1058 movzx ecx,BYTE[r10*1+rsp] 1059 lea r10,[1+r10] 1060 xor eax,ecx 1061 mov BYTE[((-1))+r10*1+rdi],al 1062 dec rdx 1063 jnz NEAR $L$oop_tail4x 1064 1065$L$done4x: 1066 movaps xmm6,XMMWORD[((-168))+r9] 1067 movaps xmm7,XMMWORD[((-152))+r9] 1068 movaps xmm8,XMMWORD[((-136))+r9] 1069 movaps xmm9,XMMWORD[((-120))+r9] 1070 movaps xmm10,XMMWORD[((-104))+r9] 1071 movaps xmm11,XMMWORD[((-88))+r9] 1072 movaps xmm12,XMMWORD[((-72))+r9] 1073 movaps xmm13,XMMWORD[((-56))+r9] 1074 movaps xmm14,XMMWORD[((-40))+r9] 1075 movaps xmm15,XMMWORD[((-24))+r9] 1076 lea rsp,[r9] 1077 1078$L$4x_epilogue: 1079 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1080 mov rsi,QWORD[16+rsp] 1081 DB 0F3h,0C3h ;repret 1082 1083$L$SEH_end_ChaCha20_4x: 1084 1085ALIGN 32 1086ChaCha20_8x: 1087 mov QWORD[8+rsp],rdi ;WIN64 prologue 1088 mov QWORD[16+rsp],rsi 1089 mov rax,rsp 1090$L$SEH_begin_ChaCha20_8x: 1091 mov rdi,rcx 1092 mov rsi,rdx 1093 mov rdx,r8 1094 mov rcx,r9 1095 mov r8,QWORD[40+rsp] 1096 1097 1098$L$ChaCha20_8x: 1099 1100 mov r9,rsp 1101 1102 sub rsp,0x280+168 1103 and rsp,-32 1104 movaps XMMWORD[(-168)+r9],xmm6 1105 movaps XMMWORD[(-152)+r9],xmm7 1106 movaps XMMWORD[(-136)+r9],xmm8 1107 movaps XMMWORD[(-120)+r9],xmm9 1108 movaps XMMWORD[(-104)+r9],xmm10 1109 movaps XMMWORD[(-88)+r9],xmm11 1110 movaps XMMWORD[(-72)+r9],xmm12 1111 movaps XMMWORD[(-56)+r9],xmm13 1112 movaps XMMWORD[(-40)+r9],xmm14 1113 movaps XMMWORD[(-24)+r9],xmm15 1114$L$8x_body: 1115 vzeroupper 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 vbroadcasti128 ymm11,XMMWORD[$L$sigma] 1127 vbroadcasti128 ymm3,XMMWORD[rcx] 1128 vbroadcasti128 ymm15,XMMWORD[16+rcx] 1129 vbroadcasti128 ymm7,XMMWORD[r8] 1130 lea rcx,[256+rsp] 1131 lea rax,[512+rsp] 1132 lea r10,[$L$rot16] 1133 lea r11,[$L$rot24] 1134 1135 vpshufd ymm8,ymm11,0x00 1136 vpshufd ymm9,ymm11,0x55 1137 vmovdqa YMMWORD[(128-256)+rcx],ymm8 1138 vpshufd ymm10,ymm11,0xaa 1139 vmovdqa YMMWORD[(160-256)+rcx],ymm9 1140 vpshufd ymm11,ymm11,0xff 1141 vmovdqa YMMWORD[(192-256)+rcx],ymm10 1142 vmovdqa YMMWORD[(224-256)+rcx],ymm11 1143 1144 vpshufd ymm0,ymm3,0x00 1145 vpshufd ymm1,ymm3,0x55 1146 vmovdqa YMMWORD[(256-256)+rcx],ymm0 1147 vpshufd ymm2,ymm3,0xaa 1148 vmovdqa YMMWORD[(288-256)+rcx],ymm1 1149 vpshufd ymm3,ymm3,0xff 1150 vmovdqa YMMWORD[(320-256)+rcx],ymm2 1151 vmovdqa YMMWORD[(352-256)+rcx],ymm3 1152 1153 vpshufd ymm12,ymm15,0x00 1154 vpshufd ymm13,ymm15,0x55 1155 vmovdqa YMMWORD[(384-512)+rax],ymm12 1156 vpshufd ymm14,ymm15,0xaa 1157 vmovdqa YMMWORD[(416-512)+rax],ymm13 1158 vpshufd ymm15,ymm15,0xff 1159 vmovdqa YMMWORD[(448-512)+rax],ymm14 1160 vmovdqa YMMWORD[(480-512)+rax],ymm15 1161 1162 vpshufd ymm4,ymm7,0x00 1163 vpshufd ymm5,ymm7,0x55 1164 vpaddd ymm4,ymm4,YMMWORD[$L$incy] 1165 vpshufd ymm6,ymm7,0xaa 1166 vmovdqa YMMWORD[(544-512)+rax],ymm5 1167 vpshufd ymm7,ymm7,0xff 1168 vmovdqa YMMWORD[(576-512)+rax],ymm6 1169 vmovdqa YMMWORD[(608-512)+rax],ymm7 1170 1171 jmp NEAR $L$oop_enter8x 1172 1173ALIGN 32 1174$L$oop_outer8x: 1175 vmovdqa ymm8,YMMWORD[((128-256))+rcx] 1176 vmovdqa ymm9,YMMWORD[((160-256))+rcx] 1177 vmovdqa ymm10,YMMWORD[((192-256))+rcx] 1178 vmovdqa ymm11,YMMWORD[((224-256))+rcx] 1179 vmovdqa ymm0,YMMWORD[((256-256))+rcx] 1180 vmovdqa ymm1,YMMWORD[((288-256))+rcx] 1181 vmovdqa ymm2,YMMWORD[((320-256))+rcx] 1182 vmovdqa ymm3,YMMWORD[((352-256))+rcx] 1183 vmovdqa ymm12,YMMWORD[((384-512))+rax] 1184 vmovdqa ymm13,YMMWORD[((416-512))+rax] 1185 vmovdqa ymm14,YMMWORD[((448-512))+rax] 1186 vmovdqa ymm15,YMMWORD[((480-512))+rax] 1187 vmovdqa ymm4,YMMWORD[((512-512))+rax] 1188 vmovdqa ymm5,YMMWORD[((544-512))+rax] 1189 vmovdqa ymm6,YMMWORD[((576-512))+rax] 1190 vmovdqa ymm7,YMMWORD[((608-512))+rax] 1191 vpaddd ymm4,ymm4,YMMWORD[$L$eight] 1192 1193$L$oop_enter8x: 1194 vmovdqa YMMWORD[64+rsp],ymm14 1195 vmovdqa YMMWORD[96+rsp],ymm15 1196 vbroadcasti128 ymm15,XMMWORD[r10] 1197 vmovdqa YMMWORD[(512-512)+rax],ymm4 1198 mov eax,10 1199 jmp NEAR $L$oop8x 1200 1201ALIGN 32 1202$L$oop8x: 1203 vpaddd ymm8,ymm8,ymm0 1204 vpxor ymm4,ymm8,ymm4 1205 vpshufb ymm4,ymm4,ymm15 1206 vpaddd ymm9,ymm9,ymm1 1207 vpxor ymm5,ymm9,ymm5 1208 vpshufb ymm5,ymm5,ymm15 1209 vpaddd ymm12,ymm12,ymm4 1210 vpxor ymm0,ymm12,ymm0 1211 vpslld ymm14,ymm0,12 1212 vpsrld ymm0,ymm0,20 1213 vpor ymm0,ymm14,ymm0 1214 vbroadcasti128 ymm14,XMMWORD[r11] 1215 vpaddd ymm13,ymm13,ymm5 1216 vpxor ymm1,ymm13,ymm1 1217 vpslld ymm15,ymm1,12 1218 vpsrld ymm1,ymm1,20 1219 vpor ymm1,ymm15,ymm1 1220 vpaddd ymm8,ymm8,ymm0 1221 vpxor ymm4,ymm8,ymm4 1222 vpshufb ymm4,ymm4,ymm14 1223 vpaddd ymm9,ymm9,ymm1 1224 vpxor ymm5,ymm9,ymm5 1225 vpshufb ymm5,ymm5,ymm14 1226 vpaddd ymm12,ymm12,ymm4 1227 vpxor ymm0,ymm12,ymm0 1228 vpslld ymm15,ymm0,7 1229 vpsrld ymm0,ymm0,25 1230 vpor ymm0,ymm15,ymm0 1231 vbroadcasti128 ymm15,XMMWORD[r10] 1232 vpaddd ymm13,ymm13,ymm5 1233 vpxor ymm1,ymm13,ymm1 1234 vpslld ymm14,ymm1,7 1235 vpsrld ymm1,ymm1,25 1236 vpor ymm1,ymm14,ymm1 1237 vmovdqa YMMWORD[rsp],ymm12 1238 vmovdqa YMMWORD[32+rsp],ymm13 1239 vmovdqa ymm12,YMMWORD[64+rsp] 1240 vmovdqa ymm13,YMMWORD[96+rsp] 1241 vpaddd ymm10,ymm10,ymm2 1242 vpxor ymm6,ymm10,ymm6 1243 vpshufb ymm6,ymm6,ymm15 1244 vpaddd ymm11,ymm11,ymm3 1245 vpxor ymm7,ymm11,ymm7 1246 vpshufb ymm7,ymm7,ymm15 1247 vpaddd ymm12,ymm12,ymm6 1248 vpxor ymm2,ymm12,ymm2 1249 vpslld ymm14,ymm2,12 1250 vpsrld ymm2,ymm2,20 1251 vpor ymm2,ymm14,ymm2 1252 vbroadcasti128 ymm14,XMMWORD[r11] 1253 vpaddd ymm13,ymm13,ymm7 1254 vpxor ymm3,ymm13,ymm3 1255 vpslld ymm15,ymm3,12 1256 vpsrld ymm3,ymm3,20 1257 vpor ymm3,ymm15,ymm3 1258 vpaddd ymm10,ymm10,ymm2 1259 vpxor ymm6,ymm10,ymm6 1260 vpshufb ymm6,ymm6,ymm14 1261 vpaddd ymm11,ymm11,ymm3 1262 vpxor ymm7,ymm11,ymm7 1263 vpshufb ymm7,ymm7,ymm14 1264 vpaddd ymm12,ymm12,ymm6 1265 vpxor ymm2,ymm12,ymm2 1266 vpslld ymm15,ymm2,7 1267 vpsrld ymm2,ymm2,25 1268 vpor ymm2,ymm15,ymm2 1269 vbroadcasti128 ymm15,XMMWORD[r10] 1270 vpaddd ymm13,ymm13,ymm7 1271 vpxor ymm3,ymm13,ymm3 1272 vpslld ymm14,ymm3,7 1273 vpsrld ymm3,ymm3,25 1274 vpor ymm3,ymm14,ymm3 1275 vpaddd ymm8,ymm8,ymm1 1276 vpxor ymm7,ymm8,ymm7 1277 vpshufb ymm7,ymm7,ymm15 1278 vpaddd ymm9,ymm9,ymm2 1279 vpxor ymm4,ymm9,ymm4 1280 vpshufb ymm4,ymm4,ymm15 1281 vpaddd ymm12,ymm12,ymm7 1282 vpxor ymm1,ymm12,ymm1 1283 vpslld ymm14,ymm1,12 1284 vpsrld ymm1,ymm1,20 1285 vpor ymm1,ymm14,ymm1 1286 vbroadcasti128 ymm14,XMMWORD[r11] 1287 vpaddd ymm13,ymm13,ymm4 1288 vpxor ymm2,ymm13,ymm2 1289 vpslld ymm15,ymm2,12 1290 vpsrld ymm2,ymm2,20 1291 vpor ymm2,ymm15,ymm2 1292 vpaddd ymm8,ymm8,ymm1 1293 vpxor ymm7,ymm8,ymm7 1294 vpshufb ymm7,ymm7,ymm14 1295 vpaddd ymm9,ymm9,ymm2 1296 vpxor ymm4,ymm9,ymm4 1297 vpshufb ymm4,ymm4,ymm14 1298 vpaddd ymm12,ymm12,ymm7 1299 vpxor ymm1,ymm12,ymm1 1300 vpslld ymm15,ymm1,7 1301 vpsrld ymm1,ymm1,25 1302 vpor ymm1,ymm15,ymm1 1303 vbroadcasti128 ymm15,XMMWORD[r10] 1304 vpaddd ymm13,ymm13,ymm4 1305 vpxor ymm2,ymm13,ymm2 1306 vpslld ymm14,ymm2,7 1307 vpsrld ymm2,ymm2,25 1308 vpor ymm2,ymm14,ymm2 1309 vmovdqa YMMWORD[64+rsp],ymm12 1310 vmovdqa YMMWORD[96+rsp],ymm13 1311 vmovdqa ymm12,YMMWORD[rsp] 1312 vmovdqa ymm13,YMMWORD[32+rsp] 1313 vpaddd ymm10,ymm10,ymm3 1314 vpxor ymm5,ymm10,ymm5 1315 vpshufb ymm5,ymm5,ymm15 1316 vpaddd ymm11,ymm11,ymm0 1317 vpxor ymm6,ymm11,ymm6 1318 vpshufb ymm6,ymm6,ymm15 1319 vpaddd ymm12,ymm12,ymm5 1320 vpxor ymm3,ymm12,ymm3 1321 vpslld ymm14,ymm3,12 1322 vpsrld ymm3,ymm3,20 1323 vpor ymm3,ymm14,ymm3 1324 vbroadcasti128 ymm14,XMMWORD[r11] 1325 vpaddd ymm13,ymm13,ymm6 1326 vpxor ymm0,ymm13,ymm0 1327 vpslld ymm15,ymm0,12 1328 vpsrld ymm0,ymm0,20 1329 vpor ymm0,ymm15,ymm0 1330 vpaddd ymm10,ymm10,ymm3 1331 vpxor ymm5,ymm10,ymm5 1332 vpshufb ymm5,ymm5,ymm14 1333 vpaddd ymm11,ymm11,ymm0 1334 vpxor ymm6,ymm11,ymm6 1335 vpshufb ymm6,ymm6,ymm14 1336 vpaddd ymm12,ymm12,ymm5 1337 vpxor ymm3,ymm12,ymm3 1338 vpslld ymm15,ymm3,7 1339 vpsrld ymm3,ymm3,25 1340 vpor ymm3,ymm15,ymm3 1341 vbroadcasti128 ymm15,XMMWORD[r10] 1342 vpaddd ymm13,ymm13,ymm6 1343 vpxor ymm0,ymm13,ymm0 1344 vpslld ymm14,ymm0,7 1345 vpsrld ymm0,ymm0,25 1346 vpor ymm0,ymm14,ymm0 1347 dec eax 1348 jnz NEAR $L$oop8x 1349 1350 lea rax,[512+rsp] 1351 vpaddd ymm8,ymm8,YMMWORD[((128-256))+rcx] 1352 vpaddd ymm9,ymm9,YMMWORD[((160-256))+rcx] 1353 vpaddd ymm10,ymm10,YMMWORD[((192-256))+rcx] 1354 vpaddd ymm11,ymm11,YMMWORD[((224-256))+rcx] 1355 1356 vpunpckldq ymm14,ymm8,ymm9 1357 vpunpckldq ymm15,ymm10,ymm11 1358 vpunpckhdq ymm8,ymm8,ymm9 1359 vpunpckhdq ymm10,ymm10,ymm11 1360 vpunpcklqdq ymm9,ymm14,ymm15 1361 vpunpckhqdq ymm14,ymm14,ymm15 1362 vpunpcklqdq ymm11,ymm8,ymm10 1363 vpunpckhqdq ymm8,ymm8,ymm10 1364 vpaddd ymm0,ymm0,YMMWORD[((256-256))+rcx] 1365 vpaddd ymm1,ymm1,YMMWORD[((288-256))+rcx] 1366 vpaddd ymm2,ymm2,YMMWORD[((320-256))+rcx] 1367 vpaddd ymm3,ymm3,YMMWORD[((352-256))+rcx] 1368 1369 vpunpckldq ymm10,ymm0,ymm1 1370 vpunpckldq ymm15,ymm2,ymm3 1371 vpunpckhdq ymm0,ymm0,ymm1 1372 vpunpckhdq ymm2,ymm2,ymm3 1373 vpunpcklqdq ymm1,ymm10,ymm15 1374 vpunpckhqdq ymm10,ymm10,ymm15 1375 vpunpcklqdq ymm3,ymm0,ymm2 1376 vpunpckhqdq ymm0,ymm0,ymm2 1377 vperm2i128 ymm15,ymm9,ymm1,0x20 1378 vperm2i128 ymm1,ymm9,ymm1,0x31 1379 vperm2i128 ymm9,ymm14,ymm10,0x20 1380 vperm2i128 ymm10,ymm14,ymm10,0x31 1381 vperm2i128 ymm14,ymm11,ymm3,0x20 1382 vperm2i128 ymm3,ymm11,ymm3,0x31 1383 vperm2i128 ymm11,ymm8,ymm0,0x20 1384 vperm2i128 ymm0,ymm8,ymm0,0x31 1385 vmovdqa YMMWORD[rsp],ymm15 1386 vmovdqa YMMWORD[32+rsp],ymm9 1387 vmovdqa ymm15,YMMWORD[64+rsp] 1388 vmovdqa ymm9,YMMWORD[96+rsp] 1389 1390 vpaddd ymm12,ymm12,YMMWORD[((384-512))+rax] 1391 vpaddd ymm13,ymm13,YMMWORD[((416-512))+rax] 1392 vpaddd ymm15,ymm15,YMMWORD[((448-512))+rax] 1393 vpaddd ymm9,ymm9,YMMWORD[((480-512))+rax] 1394 1395 vpunpckldq ymm2,ymm12,ymm13 1396 vpunpckldq ymm8,ymm15,ymm9 1397 vpunpckhdq ymm12,ymm12,ymm13 1398 vpunpckhdq ymm15,ymm15,ymm9 1399 vpunpcklqdq ymm13,ymm2,ymm8 1400 vpunpckhqdq ymm2,ymm2,ymm8 1401 vpunpcklqdq ymm9,ymm12,ymm15 1402 vpunpckhqdq ymm12,ymm12,ymm15 1403 vpaddd ymm4,ymm4,YMMWORD[((512-512))+rax] 1404 vpaddd ymm5,ymm5,YMMWORD[((544-512))+rax] 1405 vpaddd ymm6,ymm6,YMMWORD[((576-512))+rax] 1406 vpaddd ymm7,ymm7,YMMWORD[((608-512))+rax] 1407 1408 vpunpckldq ymm15,ymm4,ymm5 1409 vpunpckldq ymm8,ymm6,ymm7 1410 vpunpckhdq ymm4,ymm4,ymm5 1411 vpunpckhdq ymm6,ymm6,ymm7 1412 vpunpcklqdq ymm5,ymm15,ymm8 1413 vpunpckhqdq ymm15,ymm15,ymm8 1414 vpunpcklqdq ymm7,ymm4,ymm6 1415 vpunpckhqdq ymm4,ymm4,ymm6 1416 vperm2i128 ymm8,ymm13,ymm5,0x20 1417 vperm2i128 ymm5,ymm13,ymm5,0x31 1418 vperm2i128 ymm13,ymm2,ymm15,0x20 1419 vperm2i128 ymm15,ymm2,ymm15,0x31 1420 vperm2i128 ymm2,ymm9,ymm7,0x20 1421 vperm2i128 ymm7,ymm9,ymm7,0x31 1422 vperm2i128 ymm9,ymm12,ymm4,0x20 1423 vperm2i128 ymm4,ymm12,ymm4,0x31 1424 vmovdqa ymm6,YMMWORD[rsp] 1425 vmovdqa ymm12,YMMWORD[32+rsp] 1426 1427 cmp rdx,64*8 1428 jb NEAR $L$tail8x 1429 1430 vpxor ymm6,ymm6,YMMWORD[rsi] 1431 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1432 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1433 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1434 lea rsi,[128+rsi] 1435 vmovdqu YMMWORD[rdi],ymm6 1436 vmovdqu YMMWORD[32+rdi],ymm8 1437 vmovdqu YMMWORD[64+rdi],ymm1 1438 vmovdqu YMMWORD[96+rdi],ymm5 1439 lea rdi,[128+rdi] 1440 1441 vpxor ymm12,ymm12,YMMWORD[rsi] 1442 vpxor ymm13,ymm13,YMMWORD[32+rsi] 1443 vpxor ymm10,ymm10,YMMWORD[64+rsi] 1444 vpxor ymm15,ymm15,YMMWORD[96+rsi] 1445 lea rsi,[128+rsi] 1446 vmovdqu YMMWORD[rdi],ymm12 1447 vmovdqu YMMWORD[32+rdi],ymm13 1448 vmovdqu YMMWORD[64+rdi],ymm10 1449 vmovdqu YMMWORD[96+rdi],ymm15 1450 lea rdi,[128+rdi] 1451 1452 vpxor ymm14,ymm14,YMMWORD[rsi] 1453 vpxor ymm2,ymm2,YMMWORD[32+rsi] 1454 vpxor ymm3,ymm3,YMMWORD[64+rsi] 1455 vpxor ymm7,ymm7,YMMWORD[96+rsi] 1456 lea rsi,[128+rsi] 1457 vmovdqu YMMWORD[rdi],ymm14 1458 vmovdqu YMMWORD[32+rdi],ymm2 1459 vmovdqu YMMWORD[64+rdi],ymm3 1460 vmovdqu YMMWORD[96+rdi],ymm7 1461 lea rdi,[128+rdi] 1462 1463 vpxor ymm11,ymm11,YMMWORD[rsi] 1464 vpxor ymm9,ymm9,YMMWORD[32+rsi] 1465 vpxor ymm0,ymm0,YMMWORD[64+rsi] 1466 vpxor ymm4,ymm4,YMMWORD[96+rsi] 1467 lea rsi,[128+rsi] 1468 vmovdqu YMMWORD[rdi],ymm11 1469 vmovdqu YMMWORD[32+rdi],ymm9 1470 vmovdqu YMMWORD[64+rdi],ymm0 1471 vmovdqu YMMWORD[96+rdi],ymm4 1472 lea rdi,[128+rdi] 1473 1474 sub rdx,64*8 1475 jnz NEAR $L$oop_outer8x 1476 1477 jmp NEAR $L$done8x 1478 1479$L$tail8x: 1480 cmp rdx,448 1481 jae NEAR $L$448_or_more8x 1482 cmp rdx,384 1483 jae NEAR $L$384_or_more8x 1484 cmp rdx,320 1485 jae NEAR $L$320_or_more8x 1486 cmp rdx,256 1487 jae NEAR $L$256_or_more8x 1488 cmp rdx,192 1489 jae NEAR $L$192_or_more8x 1490 cmp rdx,128 1491 jae NEAR $L$128_or_more8x 1492 cmp rdx,64 1493 jae NEAR $L$64_or_more8x 1494 1495 xor r10,r10 1496 vmovdqa YMMWORD[rsp],ymm6 1497 vmovdqa YMMWORD[32+rsp],ymm8 1498 jmp NEAR $L$oop_tail8x 1499 1500ALIGN 32 1501$L$64_or_more8x: 1502 vpxor ymm6,ymm6,YMMWORD[rsi] 1503 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1504 vmovdqu YMMWORD[rdi],ymm6 1505 vmovdqu YMMWORD[32+rdi],ymm8 1506 je NEAR $L$done8x 1507 1508 lea rsi,[64+rsi] 1509 xor r10,r10 1510 vmovdqa YMMWORD[rsp],ymm1 1511 lea rdi,[64+rdi] 1512 sub rdx,64 1513 vmovdqa YMMWORD[32+rsp],ymm5 1514 jmp NEAR $L$oop_tail8x 1515 1516ALIGN 32 1517$L$128_or_more8x: 1518 vpxor ymm6,ymm6,YMMWORD[rsi] 1519 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1520 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1521 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1522 vmovdqu YMMWORD[rdi],ymm6 1523 vmovdqu YMMWORD[32+rdi],ymm8 1524 vmovdqu YMMWORD[64+rdi],ymm1 1525 vmovdqu YMMWORD[96+rdi],ymm5 1526 je NEAR $L$done8x 1527 1528 lea rsi,[128+rsi] 1529 xor r10,r10 1530 vmovdqa YMMWORD[rsp],ymm12 1531 lea rdi,[128+rdi] 1532 sub rdx,128 1533 vmovdqa YMMWORD[32+rsp],ymm13 1534 jmp NEAR $L$oop_tail8x 1535 1536ALIGN 32 1537$L$192_or_more8x: 1538 vpxor ymm6,ymm6,YMMWORD[rsi] 1539 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1540 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1541 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1542 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1543 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1544 vmovdqu YMMWORD[rdi],ymm6 1545 vmovdqu YMMWORD[32+rdi],ymm8 1546 vmovdqu YMMWORD[64+rdi],ymm1 1547 vmovdqu YMMWORD[96+rdi],ymm5 1548 vmovdqu YMMWORD[128+rdi],ymm12 1549 vmovdqu YMMWORD[160+rdi],ymm13 1550 je NEAR $L$done8x 1551 1552 lea rsi,[192+rsi] 1553 xor r10,r10 1554 vmovdqa YMMWORD[rsp],ymm10 1555 lea rdi,[192+rdi] 1556 sub rdx,192 1557 vmovdqa YMMWORD[32+rsp],ymm15 1558 jmp NEAR $L$oop_tail8x 1559 1560ALIGN 32 1561$L$256_or_more8x: 1562 vpxor ymm6,ymm6,YMMWORD[rsi] 1563 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1564 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1565 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1566 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1567 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1568 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1569 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1570 vmovdqu YMMWORD[rdi],ymm6 1571 vmovdqu YMMWORD[32+rdi],ymm8 1572 vmovdqu YMMWORD[64+rdi],ymm1 1573 vmovdqu YMMWORD[96+rdi],ymm5 1574 vmovdqu YMMWORD[128+rdi],ymm12 1575 vmovdqu YMMWORD[160+rdi],ymm13 1576 vmovdqu YMMWORD[192+rdi],ymm10 1577 vmovdqu YMMWORD[224+rdi],ymm15 1578 je NEAR $L$done8x 1579 1580 lea rsi,[256+rsi] 1581 xor r10,r10 1582 vmovdqa YMMWORD[rsp],ymm14 1583 lea rdi,[256+rdi] 1584 sub rdx,256 1585 vmovdqa YMMWORD[32+rsp],ymm2 1586 jmp NEAR $L$oop_tail8x 1587 1588ALIGN 32 1589$L$320_or_more8x: 1590 vpxor ymm6,ymm6,YMMWORD[rsi] 1591 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1592 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1593 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1594 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1595 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1596 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1597 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1598 vpxor ymm14,ymm14,YMMWORD[256+rsi] 1599 vpxor ymm2,ymm2,YMMWORD[288+rsi] 1600 vmovdqu YMMWORD[rdi],ymm6 1601 vmovdqu YMMWORD[32+rdi],ymm8 1602 vmovdqu YMMWORD[64+rdi],ymm1 1603 vmovdqu YMMWORD[96+rdi],ymm5 1604 vmovdqu YMMWORD[128+rdi],ymm12 1605 vmovdqu YMMWORD[160+rdi],ymm13 1606 vmovdqu YMMWORD[192+rdi],ymm10 1607 vmovdqu YMMWORD[224+rdi],ymm15 1608 vmovdqu YMMWORD[256+rdi],ymm14 1609 vmovdqu YMMWORD[288+rdi],ymm2 1610 je NEAR $L$done8x 1611 1612 lea rsi,[320+rsi] 1613 xor r10,r10 1614 vmovdqa YMMWORD[rsp],ymm3 1615 lea rdi,[320+rdi] 1616 sub rdx,320 1617 vmovdqa YMMWORD[32+rsp],ymm7 1618 jmp NEAR $L$oop_tail8x 1619 1620ALIGN 32 1621$L$384_or_more8x: 1622 vpxor ymm6,ymm6,YMMWORD[rsi] 1623 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1624 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1625 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1626 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1627 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1628 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1629 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1630 vpxor ymm14,ymm14,YMMWORD[256+rsi] 1631 vpxor ymm2,ymm2,YMMWORD[288+rsi] 1632 vpxor ymm3,ymm3,YMMWORD[320+rsi] 1633 vpxor ymm7,ymm7,YMMWORD[352+rsi] 1634 vmovdqu YMMWORD[rdi],ymm6 1635 vmovdqu YMMWORD[32+rdi],ymm8 1636 vmovdqu YMMWORD[64+rdi],ymm1 1637 vmovdqu YMMWORD[96+rdi],ymm5 1638 vmovdqu YMMWORD[128+rdi],ymm12 1639 vmovdqu YMMWORD[160+rdi],ymm13 1640 vmovdqu YMMWORD[192+rdi],ymm10 1641 vmovdqu YMMWORD[224+rdi],ymm15 1642 vmovdqu YMMWORD[256+rdi],ymm14 1643 vmovdqu YMMWORD[288+rdi],ymm2 1644 vmovdqu YMMWORD[320+rdi],ymm3 1645 vmovdqu YMMWORD[352+rdi],ymm7 1646 je NEAR $L$done8x 1647 1648 lea rsi,[384+rsi] 1649 xor r10,r10 1650 vmovdqa YMMWORD[rsp],ymm11 1651 lea rdi,[384+rdi] 1652 sub rdx,384 1653 vmovdqa YMMWORD[32+rsp],ymm9 1654 jmp NEAR $L$oop_tail8x 1655 1656ALIGN 32 1657$L$448_or_more8x: 1658 vpxor ymm6,ymm6,YMMWORD[rsi] 1659 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1660 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1661 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1662 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1663 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1664 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1665 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1666 vpxor ymm14,ymm14,YMMWORD[256+rsi] 1667 vpxor ymm2,ymm2,YMMWORD[288+rsi] 1668 vpxor ymm3,ymm3,YMMWORD[320+rsi] 1669 vpxor ymm7,ymm7,YMMWORD[352+rsi] 1670 vpxor ymm11,ymm11,YMMWORD[384+rsi] 1671 vpxor ymm9,ymm9,YMMWORD[416+rsi] 1672 vmovdqu YMMWORD[rdi],ymm6 1673 vmovdqu YMMWORD[32+rdi],ymm8 1674 vmovdqu YMMWORD[64+rdi],ymm1 1675 vmovdqu YMMWORD[96+rdi],ymm5 1676 vmovdqu YMMWORD[128+rdi],ymm12 1677 vmovdqu YMMWORD[160+rdi],ymm13 1678 vmovdqu YMMWORD[192+rdi],ymm10 1679 vmovdqu YMMWORD[224+rdi],ymm15 1680 vmovdqu YMMWORD[256+rdi],ymm14 1681 vmovdqu YMMWORD[288+rdi],ymm2 1682 vmovdqu YMMWORD[320+rdi],ymm3 1683 vmovdqu YMMWORD[352+rdi],ymm7 1684 vmovdqu YMMWORD[384+rdi],ymm11 1685 vmovdqu YMMWORD[416+rdi],ymm9 1686 je NEAR $L$done8x 1687 1688 lea rsi,[448+rsi] 1689 xor r10,r10 1690 vmovdqa YMMWORD[rsp],ymm0 1691 lea rdi,[448+rdi] 1692 sub rdx,448 1693 vmovdqa YMMWORD[32+rsp],ymm4 1694 1695$L$oop_tail8x: 1696 movzx eax,BYTE[r10*1+rsi] 1697 movzx ecx,BYTE[r10*1+rsp] 1698 lea r10,[1+r10] 1699 xor eax,ecx 1700 mov BYTE[((-1))+r10*1+rdi],al 1701 dec rdx 1702 jnz NEAR $L$oop_tail8x 1703 1704$L$done8x: 1705 vzeroall 1706 movaps xmm6,XMMWORD[((-168))+r9] 1707 movaps xmm7,XMMWORD[((-152))+r9] 1708 movaps xmm8,XMMWORD[((-136))+r9] 1709 movaps xmm9,XMMWORD[((-120))+r9] 1710 movaps xmm10,XMMWORD[((-104))+r9] 1711 movaps xmm11,XMMWORD[((-88))+r9] 1712 movaps xmm12,XMMWORD[((-72))+r9] 1713 movaps xmm13,XMMWORD[((-56))+r9] 1714 movaps xmm14,XMMWORD[((-40))+r9] 1715 movaps xmm15,XMMWORD[((-24))+r9] 1716 lea rsp,[r9] 1717 1718$L$8x_epilogue: 1719 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1720 mov rsi,QWORD[16+rsp] 1721 DB 0F3h,0C3h ;repret 1722 1723$L$SEH_end_ChaCha20_8x: 1724EXTERN __imp_RtlVirtualUnwind 1725 1726ALIGN 16 1727se_handler: 1728 push rsi 1729 push rdi 1730 push rbx 1731 push rbp 1732 push r12 1733 push r13 1734 push r14 1735 push r15 1736 pushfq 1737 sub rsp,64 1738 1739 mov rax,QWORD[120+r8] 1740 mov rbx,QWORD[248+r8] 1741 1742 mov rsi,QWORD[8+r9] 1743 mov r11,QWORD[56+r9] 1744 1745 lea r10,[$L$ctr32_body] 1746 cmp rbx,r10 1747 jb NEAR $L$common_seh_tail 1748 1749 mov rax,QWORD[152+r8] 1750 1751 lea r10,[$L$no_data] 1752 cmp rbx,r10 1753 jae NEAR $L$common_seh_tail 1754 1755 lea rax,[((64+24+48))+rax] 1756 1757 mov rbx,QWORD[((-8))+rax] 1758 mov rbp,QWORD[((-16))+rax] 1759 mov r12,QWORD[((-24))+rax] 1760 mov r13,QWORD[((-32))+rax] 1761 mov r14,QWORD[((-40))+rax] 1762 mov r15,QWORD[((-48))+rax] 1763 mov QWORD[144+r8],rbx 1764 mov QWORD[160+r8],rbp 1765 mov QWORD[216+r8],r12 1766 mov QWORD[224+r8],r13 1767 mov QWORD[232+r8],r14 1768 mov QWORD[240+r8],r15 1769 1770$L$common_seh_tail: 1771 mov rdi,QWORD[8+rax] 1772 mov rsi,QWORD[16+rax] 1773 mov QWORD[152+r8],rax 1774 mov QWORD[168+r8],rsi 1775 mov QWORD[176+r8],rdi 1776 1777 mov rdi,QWORD[40+r9] 1778 mov rsi,r8 1779 mov ecx,154 1780 DD 0xa548f3fc 1781 1782 mov rsi,r9 1783 xor rcx,rcx 1784 mov rdx,QWORD[8+rsi] 1785 mov r8,QWORD[rsi] 1786 mov r9,QWORD[16+rsi] 1787 mov r10,QWORD[40+rsi] 1788 lea r11,[56+rsi] 1789 lea r12,[24+rsi] 1790 mov QWORD[32+rsp],r10 1791 mov QWORD[40+rsp],r11 1792 mov QWORD[48+rsp],r12 1793 mov QWORD[56+rsp],rcx 1794 call QWORD[__imp_RtlVirtualUnwind] 1795 1796 mov eax,1 1797 add rsp,64 1798 popfq 1799 pop r15 1800 pop r14 1801 pop r13 1802 pop r12 1803 pop rbp 1804 pop rbx 1805 pop rdi 1806 pop rsi 1807 DB 0F3h,0C3h ;repret 1808 1809 1810 1811ALIGN 16 1812ssse3_handler: 1813 push rsi 1814 push rdi 1815 push rbx 1816 push rbp 1817 push r12 1818 push r13 1819 push r14 1820 push r15 1821 pushfq 1822 sub rsp,64 1823 1824 mov rax,QWORD[120+r8] 1825 mov rbx,QWORD[248+r8] 1826 1827 mov rsi,QWORD[8+r9] 1828 mov r11,QWORD[56+r9] 1829 1830 mov r10d,DWORD[r11] 1831 lea r10,[r10*1+rsi] 1832 cmp rbx,r10 1833 jb NEAR $L$common_seh_tail 1834 1835 mov rax,QWORD[192+r8] 1836 1837 mov r10d,DWORD[4+r11] 1838 lea r10,[r10*1+rsi] 1839 cmp rbx,r10 1840 jae NEAR $L$common_seh_tail 1841 1842 lea rsi,[((-40))+rax] 1843 lea rdi,[512+r8] 1844 mov ecx,4 1845 DD 0xa548f3fc 1846 1847 jmp NEAR $L$common_seh_tail 1848 1849 1850 1851ALIGN 16 1852full_handler: 1853 push rsi 1854 push rdi 1855 push rbx 1856 push rbp 1857 push r12 1858 push r13 1859 push r14 1860 push r15 1861 pushfq 1862 sub rsp,64 1863 1864 mov rax,QWORD[120+r8] 1865 mov rbx,QWORD[248+r8] 1866 1867 mov rsi,QWORD[8+r9] 1868 mov r11,QWORD[56+r9] 1869 1870 mov r10d,DWORD[r11] 1871 lea r10,[r10*1+rsi] 1872 cmp rbx,r10 1873 jb NEAR $L$common_seh_tail 1874 1875 mov rax,QWORD[192+r8] 1876 1877 mov r10d,DWORD[4+r11] 1878 lea r10,[r10*1+rsi] 1879 cmp rbx,r10 1880 jae NEAR $L$common_seh_tail 1881 1882 lea rsi,[((-168))+rax] 1883 lea rdi,[512+r8] 1884 mov ecx,20 1885 DD 0xa548f3fc 1886 1887 jmp NEAR $L$common_seh_tail 1888 1889 1890section .pdata rdata align=4 1891ALIGN 4 1892 DD $L$SEH_begin_ChaCha20_ctr32 wrt ..imagebase 1893 DD $L$SEH_end_ChaCha20_ctr32 wrt ..imagebase 1894 DD $L$SEH_info_ChaCha20_ctr32 wrt ..imagebase 1895 1896 DD $L$SEH_begin_ChaCha20_ssse3 wrt ..imagebase 1897 DD $L$SEH_end_ChaCha20_ssse3 wrt ..imagebase 1898 DD $L$SEH_info_ChaCha20_ssse3 wrt ..imagebase 1899 1900 DD $L$SEH_begin_ChaCha20_4x wrt ..imagebase 1901 DD $L$SEH_end_ChaCha20_4x wrt ..imagebase 1902 DD $L$SEH_info_ChaCha20_4x wrt ..imagebase 1903 DD $L$SEH_begin_ChaCha20_8x wrt ..imagebase 1904 DD $L$SEH_end_ChaCha20_8x wrt ..imagebase 1905 DD $L$SEH_info_ChaCha20_8x wrt ..imagebase 1906section .xdata rdata align=8 1907ALIGN 8 1908$L$SEH_info_ChaCha20_ctr32: 1909DB 9,0,0,0 1910 DD se_handler wrt ..imagebase 1911 1912$L$SEH_info_ChaCha20_ssse3: 1913DB 9,0,0,0 1914 DD ssse3_handler wrt ..imagebase 1915 DD $L$ssse3_body wrt ..imagebase,$L$ssse3_epilogue wrt ..imagebase 1916 1917$L$SEH_info_ChaCha20_4x: 1918DB 9,0,0,0 1919 DD full_handler wrt ..imagebase 1920 DD $L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase 1921$L$SEH_info_ChaCha20_8x: 1922DB 9,0,0,0 1923 DD full_handler wrt ..imagebase 1924 DD $L$8x_body wrt ..imagebase,$L$8x_epilogue wrt ..imagebase 1925