1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifidn __OUTPUT_FORMAT__, win64 5default rel 6%define XMMWORD 7%define YMMWORD 8%define ZMMWORD 9%define _CET_ENDBR 10 11%ifdef BORINGSSL_PREFIX 12%include "boringssl_prefix_symbols_nasm.inc" 13%endif 14section .text code align=64 15 16 17section .rdata rdata align=8 18ALIGN 64 19$L$zero: 20 DD 0,0,0,0 21$L$one: 22 DD 1,0,0,0 23$L$inc: 24 DD 0,1,2,3 25$L$four: 26 DD 4,4,4,4 27$L$incy: 28 DD 0,2,4,6,1,3,5,7 29$L$eight: 30 DD 8,8,8,8,8,8,8,8 31$L$rot16: 32 DB 0x2,0x3,0x0,0x1,0x6,0x7,0x4,0x5,0xa,0xb,0x8,0x9,0xe,0xf,0xc,0xd 33$L$rot24: 34 DB 0x3,0x0,0x1,0x2,0x7,0x4,0x5,0x6,0xb,0x8,0x9,0xa,0xf,0xc,0xd,0xe 35$L$sigma: 36 DB 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107 37 DB 0 38ALIGN 64 39$L$zeroz: 40 DD 0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0 41$L$fourz: 42 DD 4,0,0,0,4,0,0,0,4,0,0,0,4,0,0,0 43$L$incz: 44 DD 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 45$L$sixteen: 46 DD 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 47 DB 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 48 DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32 49 DB 98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115 50 DB 108,46,111,114,103,62,0 51section .text 52 53global ChaCha20_ctr32_nohw 54 55ALIGN 64 56ChaCha20_ctr32_nohw: 57 mov QWORD[8+rsp],rdi ;WIN64 prologue 58 mov QWORD[16+rsp],rsi 59 mov rax,rsp 60$L$SEH_begin_ChaCha20_ctr32_nohw: 61 mov rdi,rcx 62 mov rsi,rdx 63 mov rdx,r8 64 mov rcx,r9 65 mov r8,QWORD[40+rsp] 66 67 68 69_CET_ENDBR 70 push rbx 71 72 push rbp 73 74 push r12 75 76 push r13 77 78 push r14 79 80 push r15 81 82 sub rsp,64+24 83 84$L$ctr32_body: 85 86 87 movdqu xmm1,XMMWORD[rcx] 88 movdqu xmm2,XMMWORD[16+rcx] 89 movdqu xmm3,XMMWORD[r8] 90 movdqa xmm4,XMMWORD[$L$one] 91 92 93 movdqa XMMWORD[16+rsp],xmm1 94 movdqa XMMWORD[32+rsp],xmm2 95 movdqa XMMWORD[48+rsp],xmm3 96 mov rbp,rdx 97 jmp NEAR $L$oop_outer 98 99ALIGN 32 100$L$oop_outer: 101 mov eax,0x61707865 102 mov ebx,0x3320646e 103 mov ecx,0x79622d32 104 mov edx,0x6b206574 105 mov r8d,DWORD[16+rsp] 106 mov r9d,DWORD[20+rsp] 107 mov r10d,DWORD[24+rsp] 108 mov r11d,DWORD[28+rsp] 109 movd r12d,xmm3 110 mov r13d,DWORD[52+rsp] 111 mov r14d,DWORD[56+rsp] 112 mov r15d,DWORD[60+rsp] 113 114 mov QWORD[((64+0))+rsp],rbp 115 mov ebp,10 116 mov QWORD[((64+8))+rsp],rsi 117DB 102,72,15,126,214 118 mov QWORD[((64+16))+rsp],rdi 119 mov rdi,rsi 120 shr rdi,32 121 jmp NEAR $L$oop 122 123ALIGN 32 124$L$oop: 125 add eax,r8d 126 xor r12d,eax 127 rol r12d,16 128 add ebx,r9d 129 xor r13d,ebx 130 rol r13d,16 131 add esi,r12d 132 xor r8d,esi 133 rol r8d,12 134 add edi,r13d 135 xor r9d,edi 136 rol r9d,12 137 add eax,r8d 138 xor r12d,eax 139 rol r12d,8 140 add ebx,r9d 141 xor r13d,ebx 142 rol r13d,8 143 add esi,r12d 144 xor r8d,esi 145 rol r8d,7 146 add edi,r13d 147 xor r9d,edi 148 rol r9d,7 149 mov DWORD[32+rsp],esi 150 mov DWORD[36+rsp],edi 151 mov esi,DWORD[40+rsp] 152 mov edi,DWORD[44+rsp] 153 add ecx,r10d 154 xor r14d,ecx 155 rol r14d,16 156 add edx,r11d 157 xor r15d,edx 158 rol r15d,16 159 add esi,r14d 160 xor r10d,esi 161 rol r10d,12 162 add edi,r15d 163 xor r11d,edi 164 rol r11d,12 165 add ecx,r10d 166 xor r14d,ecx 167 rol r14d,8 168 add edx,r11d 169 xor r15d,edx 170 rol r15d,8 171 add esi,r14d 172 xor r10d,esi 173 rol r10d,7 174 add edi,r15d 175 xor r11d,edi 176 rol r11d,7 177 add eax,r9d 178 xor r15d,eax 179 rol r15d,16 180 add ebx,r10d 181 xor r12d,ebx 182 rol r12d,16 183 add esi,r15d 184 xor r9d,esi 185 rol r9d,12 186 add edi,r12d 187 xor r10d,edi 188 rol r10d,12 189 add eax,r9d 190 xor r15d,eax 191 rol r15d,8 192 add ebx,r10d 193 xor r12d,ebx 194 rol r12d,8 195 add esi,r15d 196 xor r9d,esi 197 rol r9d,7 198 add edi,r12d 199 xor r10d,edi 200 rol r10d,7 201 mov DWORD[40+rsp],esi 202 mov DWORD[44+rsp],edi 203 mov esi,DWORD[32+rsp] 204 mov edi,DWORD[36+rsp] 205 add ecx,r11d 206 xor r13d,ecx 207 rol r13d,16 208 add edx,r8d 209 xor r14d,edx 210 rol r14d,16 211 add esi,r13d 212 xor r11d,esi 213 rol r11d,12 214 add edi,r14d 215 xor r8d,edi 216 rol r8d,12 217 add ecx,r11d 218 xor r13d,ecx 219 rol r13d,8 220 add edx,r8d 221 xor r14d,edx 222 rol r14d,8 223 add esi,r13d 224 xor r11d,esi 225 rol r11d,7 226 add edi,r14d 227 xor r8d,edi 228 rol r8d,7 229 dec ebp 230 jnz NEAR $L$oop 231 mov DWORD[36+rsp],edi 232 mov DWORD[32+rsp],esi 233 mov rbp,QWORD[64+rsp] 234 movdqa xmm1,xmm2 235 mov rsi,QWORD[((64+8))+rsp] 236 paddd xmm3,xmm4 237 mov rdi,QWORD[((64+16))+rsp] 238 239 add eax,0x61707865 240 add ebx,0x3320646e 241 add ecx,0x79622d32 242 add edx,0x6b206574 243 add r8d,DWORD[16+rsp] 244 add r9d,DWORD[20+rsp] 245 add r10d,DWORD[24+rsp] 246 add r11d,DWORD[28+rsp] 247 add r12d,DWORD[48+rsp] 248 add r13d,DWORD[52+rsp] 249 add r14d,DWORD[56+rsp] 250 add r15d,DWORD[60+rsp] 251 paddd xmm1,XMMWORD[32+rsp] 252 253 cmp rbp,64 254 jb NEAR $L$tail 255 256 xor eax,DWORD[rsi] 257 xor ebx,DWORD[4+rsi] 258 xor ecx,DWORD[8+rsi] 259 xor edx,DWORD[12+rsi] 260 xor r8d,DWORD[16+rsi] 261 xor r9d,DWORD[20+rsi] 262 xor r10d,DWORD[24+rsi] 263 xor r11d,DWORD[28+rsi] 264 movdqu xmm0,XMMWORD[32+rsi] 265 xor r12d,DWORD[48+rsi] 266 xor r13d,DWORD[52+rsi] 267 xor r14d,DWORD[56+rsi] 268 xor r15d,DWORD[60+rsi] 269 lea rsi,[64+rsi] 270 pxor xmm0,xmm1 271 272 movdqa XMMWORD[32+rsp],xmm2 273 movd DWORD[48+rsp],xmm3 274 275 mov DWORD[rdi],eax 276 mov DWORD[4+rdi],ebx 277 mov DWORD[8+rdi],ecx 278 mov DWORD[12+rdi],edx 279 mov DWORD[16+rdi],r8d 280 mov DWORD[20+rdi],r9d 281 mov DWORD[24+rdi],r10d 282 mov DWORD[28+rdi],r11d 283 movdqu XMMWORD[32+rdi],xmm0 284 mov DWORD[48+rdi],r12d 285 mov DWORD[52+rdi],r13d 286 mov DWORD[56+rdi],r14d 287 mov DWORD[60+rdi],r15d 288 lea rdi,[64+rdi] 289 290 sub rbp,64 291 jnz NEAR $L$oop_outer 292 293 jmp NEAR $L$done 294 295ALIGN 16 296$L$tail: 297 mov DWORD[rsp],eax 298 mov DWORD[4+rsp],ebx 299 xor rbx,rbx 300 mov DWORD[8+rsp],ecx 301 mov DWORD[12+rsp],edx 302 mov DWORD[16+rsp],r8d 303 mov DWORD[20+rsp],r9d 304 mov DWORD[24+rsp],r10d 305 mov DWORD[28+rsp],r11d 306 movdqa XMMWORD[32+rsp],xmm1 307 mov DWORD[48+rsp],r12d 308 mov DWORD[52+rsp],r13d 309 mov DWORD[56+rsp],r14d 310 mov DWORD[60+rsp],r15d 311 312$L$oop_tail: 313 movzx eax,BYTE[rbx*1+rsi] 314 movzx edx,BYTE[rbx*1+rsp] 315 lea rbx,[1+rbx] 316 xor eax,edx 317 mov BYTE[((-1))+rbx*1+rdi],al 318 dec rbp 319 jnz NEAR $L$oop_tail 320 321$L$done: 322 lea rsi,[((64+24+48))+rsp] 323 mov r15,QWORD[((-48))+rsi] 324 325 mov r14,QWORD[((-40))+rsi] 326 327 mov r13,QWORD[((-32))+rsi] 328 329 mov r12,QWORD[((-24))+rsi] 330 331 mov rbp,QWORD[((-16))+rsi] 332 333 mov rbx,QWORD[((-8))+rsi] 334 335 lea rsp,[rsi] 336 337$L$no_data: 338 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 339 mov rsi,QWORD[16+rsp] 340 ret 341 342$L$SEH_end_ChaCha20_ctr32_nohw: 343global ChaCha20_ctr32_ssse3 344 345ALIGN 32 346ChaCha20_ctr32_ssse3: 347 mov QWORD[8+rsp],rdi ;WIN64 prologue 348 mov QWORD[16+rsp],rsi 349 mov rax,rsp 350$L$SEH_begin_ChaCha20_ctr32_ssse3: 351 mov rdi,rcx 352 mov rsi,rdx 353 mov rdx,r8 354 mov rcx,r9 355 mov r8,QWORD[40+rsp] 356 357 358 359_CET_ENDBR 360 mov r9,rsp 361 362 sub rsp,64+40 363 movaps XMMWORD[(-40)+r9],xmm6 364 movaps XMMWORD[(-24)+r9],xmm7 365$L$ssse3_body: 366 movdqa xmm0,XMMWORD[$L$sigma] 367 movdqu xmm1,XMMWORD[rcx] 368 movdqu xmm2,XMMWORD[16+rcx] 369 movdqu xmm3,XMMWORD[r8] 370 movdqa xmm6,XMMWORD[$L$rot16] 371 movdqa xmm7,XMMWORD[$L$rot24] 372 373 movdqa XMMWORD[rsp],xmm0 374 movdqa XMMWORD[16+rsp],xmm1 375 movdqa XMMWORD[32+rsp],xmm2 376 movdqa XMMWORD[48+rsp],xmm3 377 mov r8,10 378 jmp NEAR $L$oop_ssse3 379 380ALIGN 32 381$L$oop_outer_ssse3: 382 movdqa xmm3,XMMWORD[$L$one] 383 movdqa xmm0,XMMWORD[rsp] 384 movdqa xmm1,XMMWORD[16+rsp] 385 movdqa xmm2,XMMWORD[32+rsp] 386 paddd xmm3,XMMWORD[48+rsp] 387 mov r8,10 388 movdqa XMMWORD[48+rsp],xmm3 389 jmp NEAR $L$oop_ssse3 390 391ALIGN 32 392$L$oop_ssse3: 393 paddd xmm0,xmm1 394 pxor xmm3,xmm0 395DB 102,15,56,0,222 396 paddd xmm2,xmm3 397 pxor xmm1,xmm2 398 movdqa xmm4,xmm1 399 psrld xmm1,20 400 pslld xmm4,12 401 por xmm1,xmm4 402 paddd xmm0,xmm1 403 pxor xmm3,xmm0 404DB 102,15,56,0,223 405 paddd xmm2,xmm3 406 pxor xmm1,xmm2 407 movdqa xmm4,xmm1 408 psrld xmm1,25 409 pslld xmm4,7 410 por xmm1,xmm4 411 pshufd xmm2,xmm2,78 412 pshufd xmm1,xmm1,57 413 pshufd xmm3,xmm3,147 414 nop 415 paddd xmm0,xmm1 416 pxor xmm3,xmm0 417DB 102,15,56,0,222 418 paddd xmm2,xmm3 419 pxor xmm1,xmm2 420 movdqa xmm4,xmm1 421 psrld xmm1,20 422 pslld xmm4,12 423 por xmm1,xmm4 424 paddd xmm0,xmm1 425 pxor xmm3,xmm0 426DB 102,15,56,0,223 427 paddd xmm2,xmm3 428 pxor xmm1,xmm2 429 movdqa xmm4,xmm1 430 psrld xmm1,25 431 pslld xmm4,7 432 por xmm1,xmm4 433 pshufd xmm2,xmm2,78 434 pshufd xmm1,xmm1,147 435 pshufd xmm3,xmm3,57 436 dec r8 437 jnz NEAR $L$oop_ssse3 438 paddd xmm0,XMMWORD[rsp] 439 paddd xmm1,XMMWORD[16+rsp] 440 paddd xmm2,XMMWORD[32+rsp] 441 paddd xmm3,XMMWORD[48+rsp] 442 443 cmp rdx,64 444 jb NEAR $L$tail_ssse3 445 446 movdqu xmm4,XMMWORD[rsi] 447 movdqu xmm5,XMMWORD[16+rsi] 448 pxor xmm0,xmm4 449 movdqu xmm4,XMMWORD[32+rsi] 450 pxor xmm1,xmm5 451 movdqu xmm5,XMMWORD[48+rsi] 452 lea rsi,[64+rsi] 453 pxor xmm2,xmm4 454 pxor xmm3,xmm5 455 456 movdqu XMMWORD[rdi],xmm0 457 movdqu XMMWORD[16+rdi],xmm1 458 movdqu XMMWORD[32+rdi],xmm2 459 movdqu XMMWORD[48+rdi],xmm3 460 lea rdi,[64+rdi] 461 462 sub rdx,64 463 jnz NEAR $L$oop_outer_ssse3 464 465 jmp NEAR $L$done_ssse3 466 467ALIGN 16 468$L$tail_ssse3: 469 movdqa XMMWORD[rsp],xmm0 470 movdqa XMMWORD[16+rsp],xmm1 471 movdqa XMMWORD[32+rsp],xmm2 472 movdqa XMMWORD[48+rsp],xmm3 473 xor r8,r8 474 475$L$oop_tail_ssse3: 476 movzx eax,BYTE[r8*1+rsi] 477 movzx ecx,BYTE[r8*1+rsp] 478 lea r8,[1+r8] 479 xor eax,ecx 480 mov BYTE[((-1))+r8*1+rdi],al 481 dec rdx 482 jnz NEAR $L$oop_tail_ssse3 483 484$L$done_ssse3: 485 movaps xmm6,XMMWORD[((-40))+r9] 486 movaps xmm7,XMMWORD[((-24))+r9] 487 lea rsp,[r9] 488 489$L$ssse3_epilogue: 490 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 491 mov rsi,QWORD[16+rsp] 492 ret 493 494$L$SEH_end_ChaCha20_ctr32_ssse3: 495global ChaCha20_ctr32_ssse3_4x 496 497ALIGN 32 498ChaCha20_ctr32_ssse3_4x: 499 mov QWORD[8+rsp],rdi ;WIN64 prologue 500 mov QWORD[16+rsp],rsi 501 mov rax,rsp 502$L$SEH_begin_ChaCha20_ctr32_ssse3_4x: 503 mov rdi,rcx 504 mov rsi,rdx 505 mov rdx,r8 506 mov rcx,r9 507 mov r8,QWORD[40+rsp] 508 509 510 511_CET_ENDBR 512 mov r9,rsp 513 514 mov r11,r10 515 sub rsp,0x140+168 516 movaps XMMWORD[(-168)+r9],xmm6 517 movaps XMMWORD[(-152)+r9],xmm7 518 movaps XMMWORD[(-136)+r9],xmm8 519 movaps XMMWORD[(-120)+r9],xmm9 520 movaps XMMWORD[(-104)+r9],xmm10 521 movaps XMMWORD[(-88)+r9],xmm11 522 movaps XMMWORD[(-72)+r9],xmm12 523 movaps XMMWORD[(-56)+r9],xmm13 524 movaps XMMWORD[(-40)+r9],xmm14 525 movaps XMMWORD[(-24)+r9],xmm15 526$L$4x_body: 527 movdqa xmm11,XMMWORD[$L$sigma] 528 movdqu xmm15,XMMWORD[rcx] 529 movdqu xmm7,XMMWORD[16+rcx] 530 movdqu xmm3,XMMWORD[r8] 531 lea rcx,[256+rsp] 532 lea r10,[$L$rot16] 533 lea r11,[$L$rot24] 534 535 pshufd xmm8,xmm11,0x00 536 pshufd xmm9,xmm11,0x55 537 movdqa XMMWORD[64+rsp],xmm8 538 pshufd xmm10,xmm11,0xaa 539 movdqa XMMWORD[80+rsp],xmm9 540 pshufd xmm11,xmm11,0xff 541 movdqa XMMWORD[96+rsp],xmm10 542 movdqa XMMWORD[112+rsp],xmm11 543 544 pshufd xmm12,xmm15,0x00 545 pshufd xmm13,xmm15,0x55 546 movdqa XMMWORD[(128-256)+rcx],xmm12 547 pshufd xmm14,xmm15,0xaa 548 movdqa XMMWORD[(144-256)+rcx],xmm13 549 pshufd xmm15,xmm15,0xff 550 movdqa XMMWORD[(160-256)+rcx],xmm14 551 movdqa XMMWORD[(176-256)+rcx],xmm15 552 553 pshufd xmm4,xmm7,0x00 554 pshufd xmm5,xmm7,0x55 555 movdqa XMMWORD[(192-256)+rcx],xmm4 556 pshufd xmm6,xmm7,0xaa 557 movdqa XMMWORD[(208-256)+rcx],xmm5 558 pshufd xmm7,xmm7,0xff 559 movdqa XMMWORD[(224-256)+rcx],xmm6 560 movdqa XMMWORD[(240-256)+rcx],xmm7 561 562 pshufd xmm0,xmm3,0x00 563 pshufd xmm1,xmm3,0x55 564 paddd xmm0,XMMWORD[$L$inc] 565 pshufd xmm2,xmm3,0xaa 566 movdqa XMMWORD[(272-256)+rcx],xmm1 567 pshufd xmm3,xmm3,0xff 568 movdqa XMMWORD[(288-256)+rcx],xmm2 569 movdqa XMMWORD[(304-256)+rcx],xmm3 570 571 jmp NEAR $L$oop_enter4x 572 573ALIGN 32 574$L$oop_outer4x: 575 movdqa xmm8,XMMWORD[64+rsp] 576 movdqa xmm9,XMMWORD[80+rsp] 577 movdqa xmm10,XMMWORD[96+rsp] 578 movdqa xmm11,XMMWORD[112+rsp] 579 movdqa xmm12,XMMWORD[((128-256))+rcx] 580 movdqa xmm13,XMMWORD[((144-256))+rcx] 581 movdqa xmm14,XMMWORD[((160-256))+rcx] 582 movdqa xmm15,XMMWORD[((176-256))+rcx] 583 movdqa xmm4,XMMWORD[((192-256))+rcx] 584 movdqa xmm5,XMMWORD[((208-256))+rcx] 585 movdqa xmm6,XMMWORD[((224-256))+rcx] 586 movdqa xmm7,XMMWORD[((240-256))+rcx] 587 movdqa xmm0,XMMWORD[((256-256))+rcx] 588 movdqa xmm1,XMMWORD[((272-256))+rcx] 589 movdqa xmm2,XMMWORD[((288-256))+rcx] 590 movdqa xmm3,XMMWORD[((304-256))+rcx] 591 paddd xmm0,XMMWORD[$L$four] 592 593$L$oop_enter4x: 594 movdqa XMMWORD[32+rsp],xmm6 595 movdqa XMMWORD[48+rsp],xmm7 596 movdqa xmm7,XMMWORD[r10] 597 mov eax,10 598 movdqa XMMWORD[(256-256)+rcx],xmm0 599 jmp NEAR $L$oop4x 600 601ALIGN 32 602$L$oop4x: 603 paddd xmm8,xmm12 604 paddd xmm9,xmm13 605 pxor xmm0,xmm8 606 pxor xmm1,xmm9 607DB 102,15,56,0,199 608DB 102,15,56,0,207 609 paddd xmm4,xmm0 610 paddd xmm5,xmm1 611 pxor xmm12,xmm4 612 pxor xmm13,xmm5 613 movdqa xmm6,xmm12 614 pslld xmm12,12 615 psrld xmm6,20 616 movdqa xmm7,xmm13 617 pslld xmm13,12 618 por xmm12,xmm6 619 psrld xmm7,20 620 movdqa xmm6,XMMWORD[r11] 621 por xmm13,xmm7 622 paddd xmm8,xmm12 623 paddd xmm9,xmm13 624 pxor xmm0,xmm8 625 pxor xmm1,xmm9 626DB 102,15,56,0,198 627DB 102,15,56,0,206 628 paddd xmm4,xmm0 629 paddd xmm5,xmm1 630 pxor xmm12,xmm4 631 pxor xmm13,xmm5 632 movdqa xmm7,xmm12 633 pslld xmm12,7 634 psrld xmm7,25 635 movdqa xmm6,xmm13 636 pslld xmm13,7 637 por xmm12,xmm7 638 psrld xmm6,25 639 movdqa xmm7,XMMWORD[r10] 640 por xmm13,xmm6 641 movdqa XMMWORD[rsp],xmm4 642 movdqa XMMWORD[16+rsp],xmm5 643 movdqa xmm4,XMMWORD[32+rsp] 644 movdqa xmm5,XMMWORD[48+rsp] 645 paddd xmm10,xmm14 646 paddd xmm11,xmm15 647 pxor xmm2,xmm10 648 pxor xmm3,xmm11 649DB 102,15,56,0,215 650DB 102,15,56,0,223 651 paddd xmm4,xmm2 652 paddd xmm5,xmm3 653 pxor xmm14,xmm4 654 pxor xmm15,xmm5 655 movdqa xmm6,xmm14 656 pslld xmm14,12 657 psrld xmm6,20 658 movdqa xmm7,xmm15 659 pslld xmm15,12 660 por xmm14,xmm6 661 psrld xmm7,20 662 movdqa xmm6,XMMWORD[r11] 663 por xmm15,xmm7 664 paddd xmm10,xmm14 665 paddd xmm11,xmm15 666 pxor xmm2,xmm10 667 pxor xmm3,xmm11 668DB 102,15,56,0,214 669DB 102,15,56,0,222 670 paddd xmm4,xmm2 671 paddd xmm5,xmm3 672 pxor xmm14,xmm4 673 pxor xmm15,xmm5 674 movdqa xmm7,xmm14 675 pslld xmm14,7 676 psrld xmm7,25 677 movdqa xmm6,xmm15 678 pslld xmm15,7 679 por xmm14,xmm7 680 psrld xmm6,25 681 movdqa xmm7,XMMWORD[r10] 682 por xmm15,xmm6 683 paddd xmm8,xmm13 684 paddd xmm9,xmm14 685 pxor xmm3,xmm8 686 pxor xmm0,xmm9 687DB 102,15,56,0,223 688DB 102,15,56,0,199 689 paddd xmm4,xmm3 690 paddd xmm5,xmm0 691 pxor xmm13,xmm4 692 pxor xmm14,xmm5 693 movdqa xmm6,xmm13 694 pslld xmm13,12 695 psrld xmm6,20 696 movdqa xmm7,xmm14 697 pslld xmm14,12 698 por xmm13,xmm6 699 psrld xmm7,20 700 movdqa xmm6,XMMWORD[r11] 701 por xmm14,xmm7 702 paddd xmm8,xmm13 703 paddd xmm9,xmm14 704 pxor xmm3,xmm8 705 pxor xmm0,xmm9 706DB 102,15,56,0,222 707DB 102,15,56,0,198 708 paddd xmm4,xmm3 709 paddd xmm5,xmm0 710 pxor xmm13,xmm4 711 pxor xmm14,xmm5 712 movdqa xmm7,xmm13 713 pslld xmm13,7 714 psrld xmm7,25 715 movdqa xmm6,xmm14 716 pslld xmm14,7 717 por xmm13,xmm7 718 psrld xmm6,25 719 movdqa xmm7,XMMWORD[r10] 720 por xmm14,xmm6 721 movdqa XMMWORD[32+rsp],xmm4 722 movdqa XMMWORD[48+rsp],xmm5 723 movdqa xmm4,XMMWORD[rsp] 724 movdqa xmm5,XMMWORD[16+rsp] 725 paddd xmm10,xmm15 726 paddd xmm11,xmm12 727 pxor xmm1,xmm10 728 pxor xmm2,xmm11 729DB 102,15,56,0,207 730DB 102,15,56,0,215 731 paddd xmm4,xmm1 732 paddd xmm5,xmm2 733 pxor xmm15,xmm4 734 pxor xmm12,xmm5 735 movdqa xmm6,xmm15 736 pslld xmm15,12 737 psrld xmm6,20 738 movdqa xmm7,xmm12 739 pslld xmm12,12 740 por xmm15,xmm6 741 psrld xmm7,20 742 movdqa xmm6,XMMWORD[r11] 743 por xmm12,xmm7 744 paddd xmm10,xmm15 745 paddd xmm11,xmm12 746 pxor xmm1,xmm10 747 pxor xmm2,xmm11 748DB 102,15,56,0,206 749DB 102,15,56,0,214 750 paddd xmm4,xmm1 751 paddd xmm5,xmm2 752 pxor xmm15,xmm4 753 pxor xmm12,xmm5 754 movdqa xmm7,xmm15 755 pslld xmm15,7 756 psrld xmm7,25 757 movdqa xmm6,xmm12 758 pslld xmm12,7 759 por xmm15,xmm7 760 psrld xmm6,25 761 movdqa xmm7,XMMWORD[r10] 762 por xmm12,xmm6 763 dec eax 764 jnz NEAR $L$oop4x 765 766 paddd xmm8,XMMWORD[64+rsp] 767 paddd xmm9,XMMWORD[80+rsp] 768 paddd xmm10,XMMWORD[96+rsp] 769 paddd xmm11,XMMWORD[112+rsp] 770 771 movdqa xmm6,xmm8 772 punpckldq xmm8,xmm9 773 movdqa xmm7,xmm10 774 punpckldq xmm10,xmm11 775 punpckhdq xmm6,xmm9 776 punpckhdq xmm7,xmm11 777 movdqa xmm9,xmm8 778 punpcklqdq xmm8,xmm10 779 movdqa xmm11,xmm6 780 punpcklqdq xmm6,xmm7 781 punpckhqdq xmm9,xmm10 782 punpckhqdq xmm11,xmm7 783 paddd xmm12,XMMWORD[((128-256))+rcx] 784 paddd xmm13,XMMWORD[((144-256))+rcx] 785 paddd xmm14,XMMWORD[((160-256))+rcx] 786 paddd xmm15,XMMWORD[((176-256))+rcx] 787 788 movdqa XMMWORD[rsp],xmm8 789 movdqa XMMWORD[16+rsp],xmm9 790 movdqa xmm8,XMMWORD[32+rsp] 791 movdqa xmm9,XMMWORD[48+rsp] 792 793 movdqa xmm10,xmm12 794 punpckldq xmm12,xmm13 795 movdqa xmm7,xmm14 796 punpckldq xmm14,xmm15 797 punpckhdq xmm10,xmm13 798 punpckhdq xmm7,xmm15 799 movdqa xmm13,xmm12 800 punpcklqdq xmm12,xmm14 801 movdqa xmm15,xmm10 802 punpcklqdq xmm10,xmm7 803 punpckhqdq xmm13,xmm14 804 punpckhqdq xmm15,xmm7 805 paddd xmm4,XMMWORD[((192-256))+rcx] 806 paddd xmm5,XMMWORD[((208-256))+rcx] 807 paddd xmm8,XMMWORD[((224-256))+rcx] 808 paddd xmm9,XMMWORD[((240-256))+rcx] 809 810 movdqa XMMWORD[32+rsp],xmm6 811 movdqa XMMWORD[48+rsp],xmm11 812 813 movdqa xmm14,xmm4 814 punpckldq xmm4,xmm5 815 movdqa xmm7,xmm8 816 punpckldq xmm8,xmm9 817 punpckhdq xmm14,xmm5 818 punpckhdq xmm7,xmm9 819 movdqa xmm5,xmm4 820 punpcklqdq xmm4,xmm8 821 movdqa xmm9,xmm14 822 punpcklqdq xmm14,xmm7 823 punpckhqdq xmm5,xmm8 824 punpckhqdq xmm9,xmm7 825 paddd xmm0,XMMWORD[((256-256))+rcx] 826 paddd xmm1,XMMWORD[((272-256))+rcx] 827 paddd xmm2,XMMWORD[((288-256))+rcx] 828 paddd xmm3,XMMWORD[((304-256))+rcx] 829 830 movdqa xmm8,xmm0 831 punpckldq xmm0,xmm1 832 movdqa xmm7,xmm2 833 punpckldq xmm2,xmm3 834 punpckhdq xmm8,xmm1 835 punpckhdq xmm7,xmm3 836 movdqa xmm1,xmm0 837 punpcklqdq xmm0,xmm2 838 movdqa xmm3,xmm8 839 punpcklqdq xmm8,xmm7 840 punpckhqdq xmm1,xmm2 841 punpckhqdq xmm3,xmm7 842 cmp rdx,64*4 843 jb NEAR $L$tail4x 844 845 movdqu xmm6,XMMWORD[rsi] 846 movdqu xmm11,XMMWORD[16+rsi] 847 movdqu xmm2,XMMWORD[32+rsi] 848 movdqu xmm7,XMMWORD[48+rsi] 849 pxor xmm6,XMMWORD[rsp] 850 pxor xmm11,xmm12 851 pxor xmm2,xmm4 852 pxor xmm7,xmm0 853 854 movdqu XMMWORD[rdi],xmm6 855 movdqu xmm6,XMMWORD[64+rsi] 856 movdqu XMMWORD[16+rdi],xmm11 857 movdqu xmm11,XMMWORD[80+rsi] 858 movdqu XMMWORD[32+rdi],xmm2 859 movdqu xmm2,XMMWORD[96+rsi] 860 movdqu XMMWORD[48+rdi],xmm7 861 movdqu xmm7,XMMWORD[112+rsi] 862 lea rsi,[128+rsi] 863 pxor xmm6,XMMWORD[16+rsp] 864 pxor xmm11,xmm13 865 pxor xmm2,xmm5 866 pxor xmm7,xmm1 867 868 movdqu XMMWORD[64+rdi],xmm6 869 movdqu xmm6,XMMWORD[rsi] 870 movdqu XMMWORD[80+rdi],xmm11 871 movdqu xmm11,XMMWORD[16+rsi] 872 movdqu XMMWORD[96+rdi],xmm2 873 movdqu xmm2,XMMWORD[32+rsi] 874 movdqu XMMWORD[112+rdi],xmm7 875 lea rdi,[128+rdi] 876 movdqu xmm7,XMMWORD[48+rsi] 877 pxor xmm6,XMMWORD[32+rsp] 878 pxor xmm11,xmm10 879 pxor xmm2,xmm14 880 pxor xmm7,xmm8 881 882 movdqu XMMWORD[rdi],xmm6 883 movdqu xmm6,XMMWORD[64+rsi] 884 movdqu XMMWORD[16+rdi],xmm11 885 movdqu xmm11,XMMWORD[80+rsi] 886 movdqu XMMWORD[32+rdi],xmm2 887 movdqu xmm2,XMMWORD[96+rsi] 888 movdqu XMMWORD[48+rdi],xmm7 889 movdqu xmm7,XMMWORD[112+rsi] 890 lea rsi,[128+rsi] 891 pxor xmm6,XMMWORD[48+rsp] 892 pxor xmm11,xmm15 893 pxor xmm2,xmm9 894 pxor xmm7,xmm3 895 movdqu XMMWORD[64+rdi],xmm6 896 movdqu XMMWORD[80+rdi],xmm11 897 movdqu XMMWORD[96+rdi],xmm2 898 movdqu XMMWORD[112+rdi],xmm7 899 lea rdi,[128+rdi] 900 901 sub rdx,64*4 902 jnz NEAR $L$oop_outer4x 903 904 jmp NEAR $L$done4x 905 906$L$tail4x: 907 cmp rdx,192 908 jae NEAR $L$192_or_more4x 909 cmp rdx,128 910 jae NEAR $L$128_or_more4x 911 cmp rdx,64 912 jae NEAR $L$64_or_more4x 913 914 915 xor r10,r10 916 917 movdqa XMMWORD[16+rsp],xmm12 918 movdqa XMMWORD[32+rsp],xmm4 919 movdqa XMMWORD[48+rsp],xmm0 920 jmp NEAR $L$oop_tail4x 921 922ALIGN 32 923$L$64_or_more4x: 924 movdqu xmm6,XMMWORD[rsi] 925 movdqu xmm11,XMMWORD[16+rsi] 926 movdqu xmm2,XMMWORD[32+rsi] 927 movdqu xmm7,XMMWORD[48+rsi] 928 pxor xmm6,XMMWORD[rsp] 929 pxor xmm11,xmm12 930 pxor xmm2,xmm4 931 pxor xmm7,xmm0 932 movdqu XMMWORD[rdi],xmm6 933 movdqu XMMWORD[16+rdi],xmm11 934 movdqu XMMWORD[32+rdi],xmm2 935 movdqu XMMWORD[48+rdi],xmm7 936 je NEAR $L$done4x 937 938 movdqa xmm6,XMMWORD[16+rsp] 939 lea rsi,[64+rsi] 940 xor r10,r10 941 movdqa XMMWORD[rsp],xmm6 942 movdqa XMMWORD[16+rsp],xmm13 943 lea rdi,[64+rdi] 944 movdqa XMMWORD[32+rsp],xmm5 945 sub rdx,64 946 movdqa XMMWORD[48+rsp],xmm1 947 jmp NEAR $L$oop_tail4x 948 949ALIGN 32 950$L$128_or_more4x: 951 movdqu xmm6,XMMWORD[rsi] 952 movdqu xmm11,XMMWORD[16+rsi] 953 movdqu xmm2,XMMWORD[32+rsi] 954 movdqu xmm7,XMMWORD[48+rsi] 955 pxor xmm6,XMMWORD[rsp] 956 pxor xmm11,xmm12 957 pxor xmm2,xmm4 958 pxor xmm7,xmm0 959 960 movdqu XMMWORD[rdi],xmm6 961 movdqu xmm6,XMMWORD[64+rsi] 962 movdqu XMMWORD[16+rdi],xmm11 963 movdqu xmm11,XMMWORD[80+rsi] 964 movdqu XMMWORD[32+rdi],xmm2 965 movdqu xmm2,XMMWORD[96+rsi] 966 movdqu XMMWORD[48+rdi],xmm7 967 movdqu xmm7,XMMWORD[112+rsi] 968 pxor xmm6,XMMWORD[16+rsp] 969 pxor xmm11,xmm13 970 pxor xmm2,xmm5 971 pxor xmm7,xmm1 972 movdqu XMMWORD[64+rdi],xmm6 973 movdqu XMMWORD[80+rdi],xmm11 974 movdqu XMMWORD[96+rdi],xmm2 975 movdqu XMMWORD[112+rdi],xmm7 976 je NEAR $L$done4x 977 978 movdqa xmm6,XMMWORD[32+rsp] 979 lea rsi,[128+rsi] 980 xor r10,r10 981 movdqa XMMWORD[rsp],xmm6 982 movdqa XMMWORD[16+rsp],xmm10 983 lea rdi,[128+rdi] 984 movdqa XMMWORD[32+rsp],xmm14 985 sub rdx,128 986 movdqa XMMWORD[48+rsp],xmm8 987 jmp NEAR $L$oop_tail4x 988 989ALIGN 32 990$L$192_or_more4x: 991 movdqu xmm6,XMMWORD[rsi] 992 movdqu xmm11,XMMWORD[16+rsi] 993 movdqu xmm2,XMMWORD[32+rsi] 994 movdqu xmm7,XMMWORD[48+rsi] 995 pxor xmm6,XMMWORD[rsp] 996 pxor xmm11,xmm12 997 pxor xmm2,xmm4 998 pxor xmm7,xmm0 999 1000 movdqu XMMWORD[rdi],xmm6 1001 movdqu xmm6,XMMWORD[64+rsi] 1002 movdqu XMMWORD[16+rdi],xmm11 1003 movdqu xmm11,XMMWORD[80+rsi] 1004 movdqu XMMWORD[32+rdi],xmm2 1005 movdqu xmm2,XMMWORD[96+rsi] 1006 movdqu XMMWORD[48+rdi],xmm7 1007 movdqu xmm7,XMMWORD[112+rsi] 1008 lea rsi,[128+rsi] 1009 pxor xmm6,XMMWORD[16+rsp] 1010 pxor xmm11,xmm13 1011 pxor xmm2,xmm5 1012 pxor xmm7,xmm1 1013 1014 movdqu XMMWORD[64+rdi],xmm6 1015 movdqu xmm6,XMMWORD[rsi] 1016 movdqu XMMWORD[80+rdi],xmm11 1017 movdqu xmm11,XMMWORD[16+rsi] 1018 movdqu XMMWORD[96+rdi],xmm2 1019 movdqu xmm2,XMMWORD[32+rsi] 1020 movdqu XMMWORD[112+rdi],xmm7 1021 lea rdi,[128+rdi] 1022 movdqu xmm7,XMMWORD[48+rsi] 1023 pxor xmm6,XMMWORD[32+rsp] 1024 pxor xmm11,xmm10 1025 pxor xmm2,xmm14 1026 pxor xmm7,xmm8 1027 movdqu XMMWORD[rdi],xmm6 1028 movdqu XMMWORD[16+rdi],xmm11 1029 movdqu XMMWORD[32+rdi],xmm2 1030 movdqu XMMWORD[48+rdi],xmm7 1031 je NEAR $L$done4x 1032 1033 movdqa xmm6,XMMWORD[48+rsp] 1034 lea rsi,[64+rsi] 1035 xor r10,r10 1036 movdqa XMMWORD[rsp],xmm6 1037 movdqa XMMWORD[16+rsp],xmm15 1038 lea rdi,[64+rdi] 1039 movdqa XMMWORD[32+rsp],xmm9 1040 sub rdx,192 1041 movdqa XMMWORD[48+rsp],xmm3 1042 1043$L$oop_tail4x: 1044 movzx eax,BYTE[r10*1+rsi] 1045 movzx ecx,BYTE[r10*1+rsp] 1046 lea r10,[1+r10] 1047 xor eax,ecx 1048 mov BYTE[((-1))+r10*1+rdi],al 1049 dec rdx 1050 jnz NEAR $L$oop_tail4x 1051 1052$L$done4x: 1053 movaps xmm6,XMMWORD[((-168))+r9] 1054 movaps xmm7,XMMWORD[((-152))+r9] 1055 movaps xmm8,XMMWORD[((-136))+r9] 1056 movaps xmm9,XMMWORD[((-120))+r9] 1057 movaps xmm10,XMMWORD[((-104))+r9] 1058 movaps xmm11,XMMWORD[((-88))+r9] 1059 movaps xmm12,XMMWORD[((-72))+r9] 1060 movaps xmm13,XMMWORD[((-56))+r9] 1061 movaps xmm14,XMMWORD[((-40))+r9] 1062 movaps xmm15,XMMWORD[((-24))+r9] 1063 lea rsp,[r9] 1064 1065$L$4x_epilogue: 1066 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1067 mov rsi,QWORD[16+rsp] 1068 ret 1069 1070$L$SEH_end_ChaCha20_ctr32_ssse3_4x: 1071global ChaCha20_ctr32_avx2 1072 1073ALIGN 32 1074ChaCha20_ctr32_avx2: 1075 mov QWORD[8+rsp],rdi ;WIN64 prologue 1076 mov QWORD[16+rsp],rsi 1077 mov rax,rsp 1078$L$SEH_begin_ChaCha20_ctr32_avx2: 1079 mov rdi,rcx 1080 mov rsi,rdx 1081 mov rdx,r8 1082 mov rcx,r9 1083 mov r8,QWORD[40+rsp] 1084 1085 1086 1087_CET_ENDBR 1088 mov r9,rsp 1089 1090 sub rsp,0x280+168 1091 and rsp,-32 1092 movaps XMMWORD[(-168)+r9],xmm6 1093 movaps XMMWORD[(-152)+r9],xmm7 1094 movaps XMMWORD[(-136)+r9],xmm8 1095 movaps XMMWORD[(-120)+r9],xmm9 1096 movaps XMMWORD[(-104)+r9],xmm10 1097 movaps XMMWORD[(-88)+r9],xmm11 1098 movaps XMMWORD[(-72)+r9],xmm12 1099 movaps XMMWORD[(-56)+r9],xmm13 1100 movaps XMMWORD[(-40)+r9],xmm14 1101 movaps XMMWORD[(-24)+r9],xmm15 1102$L$8x_body: 1103 vzeroupper 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 vbroadcasti128 ymm11,XMMWORD[$L$sigma] 1115 vbroadcasti128 ymm3,XMMWORD[rcx] 1116 vbroadcasti128 ymm15,XMMWORD[16+rcx] 1117 vbroadcasti128 ymm7,XMMWORD[r8] 1118 lea rcx,[256+rsp] 1119 lea rax,[512+rsp] 1120 lea r10,[$L$rot16] 1121 lea r11,[$L$rot24] 1122 1123 vpshufd ymm8,ymm11,0x00 1124 vpshufd ymm9,ymm11,0x55 1125 vmovdqa YMMWORD[(128-256)+rcx],ymm8 1126 vpshufd ymm10,ymm11,0xaa 1127 vmovdqa YMMWORD[(160-256)+rcx],ymm9 1128 vpshufd ymm11,ymm11,0xff 1129 vmovdqa YMMWORD[(192-256)+rcx],ymm10 1130 vmovdqa YMMWORD[(224-256)+rcx],ymm11 1131 1132 vpshufd ymm0,ymm3,0x00 1133 vpshufd ymm1,ymm3,0x55 1134 vmovdqa YMMWORD[(256-256)+rcx],ymm0 1135 vpshufd ymm2,ymm3,0xaa 1136 vmovdqa YMMWORD[(288-256)+rcx],ymm1 1137 vpshufd ymm3,ymm3,0xff 1138 vmovdqa YMMWORD[(320-256)+rcx],ymm2 1139 vmovdqa YMMWORD[(352-256)+rcx],ymm3 1140 1141 vpshufd ymm12,ymm15,0x00 1142 vpshufd ymm13,ymm15,0x55 1143 vmovdqa YMMWORD[(384-512)+rax],ymm12 1144 vpshufd ymm14,ymm15,0xaa 1145 vmovdqa YMMWORD[(416-512)+rax],ymm13 1146 vpshufd ymm15,ymm15,0xff 1147 vmovdqa YMMWORD[(448-512)+rax],ymm14 1148 vmovdqa YMMWORD[(480-512)+rax],ymm15 1149 1150 vpshufd ymm4,ymm7,0x00 1151 vpshufd ymm5,ymm7,0x55 1152 vpaddd ymm4,ymm4,YMMWORD[$L$incy] 1153 vpshufd ymm6,ymm7,0xaa 1154 vmovdqa YMMWORD[(544-512)+rax],ymm5 1155 vpshufd ymm7,ymm7,0xff 1156 vmovdqa YMMWORD[(576-512)+rax],ymm6 1157 vmovdqa YMMWORD[(608-512)+rax],ymm7 1158 1159 jmp NEAR $L$oop_enter8x 1160 1161ALIGN 32 1162$L$oop_outer8x: 1163 vmovdqa ymm8,YMMWORD[((128-256))+rcx] 1164 vmovdqa ymm9,YMMWORD[((160-256))+rcx] 1165 vmovdqa ymm10,YMMWORD[((192-256))+rcx] 1166 vmovdqa ymm11,YMMWORD[((224-256))+rcx] 1167 vmovdqa ymm0,YMMWORD[((256-256))+rcx] 1168 vmovdqa ymm1,YMMWORD[((288-256))+rcx] 1169 vmovdqa ymm2,YMMWORD[((320-256))+rcx] 1170 vmovdqa ymm3,YMMWORD[((352-256))+rcx] 1171 vmovdqa ymm12,YMMWORD[((384-512))+rax] 1172 vmovdqa ymm13,YMMWORD[((416-512))+rax] 1173 vmovdqa ymm14,YMMWORD[((448-512))+rax] 1174 vmovdqa ymm15,YMMWORD[((480-512))+rax] 1175 vmovdqa ymm4,YMMWORD[((512-512))+rax] 1176 vmovdqa ymm5,YMMWORD[((544-512))+rax] 1177 vmovdqa ymm6,YMMWORD[((576-512))+rax] 1178 vmovdqa ymm7,YMMWORD[((608-512))+rax] 1179 vpaddd ymm4,ymm4,YMMWORD[$L$eight] 1180 1181$L$oop_enter8x: 1182 vmovdqa YMMWORD[64+rsp],ymm14 1183 vmovdqa YMMWORD[96+rsp],ymm15 1184 vbroadcasti128 ymm15,XMMWORD[r10] 1185 vmovdqa YMMWORD[(512-512)+rax],ymm4 1186 mov eax,10 1187 jmp NEAR $L$oop8x 1188 1189ALIGN 32 1190$L$oop8x: 1191 vpaddd ymm8,ymm8,ymm0 1192 vpxor ymm4,ymm8,ymm4 1193 vpshufb ymm4,ymm4,ymm15 1194 vpaddd ymm9,ymm9,ymm1 1195 vpxor ymm5,ymm9,ymm5 1196 vpshufb ymm5,ymm5,ymm15 1197 vpaddd ymm12,ymm12,ymm4 1198 vpxor ymm0,ymm12,ymm0 1199 vpslld ymm14,ymm0,12 1200 vpsrld ymm0,ymm0,20 1201 vpor ymm0,ymm14,ymm0 1202 vbroadcasti128 ymm14,XMMWORD[r11] 1203 vpaddd ymm13,ymm13,ymm5 1204 vpxor ymm1,ymm13,ymm1 1205 vpslld ymm15,ymm1,12 1206 vpsrld ymm1,ymm1,20 1207 vpor ymm1,ymm15,ymm1 1208 vpaddd ymm8,ymm8,ymm0 1209 vpxor ymm4,ymm8,ymm4 1210 vpshufb ymm4,ymm4,ymm14 1211 vpaddd ymm9,ymm9,ymm1 1212 vpxor ymm5,ymm9,ymm5 1213 vpshufb ymm5,ymm5,ymm14 1214 vpaddd ymm12,ymm12,ymm4 1215 vpxor ymm0,ymm12,ymm0 1216 vpslld ymm15,ymm0,7 1217 vpsrld ymm0,ymm0,25 1218 vpor ymm0,ymm15,ymm0 1219 vbroadcasti128 ymm15,XMMWORD[r10] 1220 vpaddd ymm13,ymm13,ymm5 1221 vpxor ymm1,ymm13,ymm1 1222 vpslld ymm14,ymm1,7 1223 vpsrld ymm1,ymm1,25 1224 vpor ymm1,ymm14,ymm1 1225 vmovdqa YMMWORD[rsp],ymm12 1226 vmovdqa YMMWORD[32+rsp],ymm13 1227 vmovdqa ymm12,YMMWORD[64+rsp] 1228 vmovdqa ymm13,YMMWORD[96+rsp] 1229 vpaddd ymm10,ymm10,ymm2 1230 vpxor ymm6,ymm10,ymm6 1231 vpshufb ymm6,ymm6,ymm15 1232 vpaddd ymm11,ymm11,ymm3 1233 vpxor ymm7,ymm11,ymm7 1234 vpshufb ymm7,ymm7,ymm15 1235 vpaddd ymm12,ymm12,ymm6 1236 vpxor ymm2,ymm12,ymm2 1237 vpslld ymm14,ymm2,12 1238 vpsrld ymm2,ymm2,20 1239 vpor ymm2,ymm14,ymm2 1240 vbroadcasti128 ymm14,XMMWORD[r11] 1241 vpaddd ymm13,ymm13,ymm7 1242 vpxor ymm3,ymm13,ymm3 1243 vpslld ymm15,ymm3,12 1244 vpsrld ymm3,ymm3,20 1245 vpor ymm3,ymm15,ymm3 1246 vpaddd ymm10,ymm10,ymm2 1247 vpxor ymm6,ymm10,ymm6 1248 vpshufb ymm6,ymm6,ymm14 1249 vpaddd ymm11,ymm11,ymm3 1250 vpxor ymm7,ymm11,ymm7 1251 vpshufb ymm7,ymm7,ymm14 1252 vpaddd ymm12,ymm12,ymm6 1253 vpxor ymm2,ymm12,ymm2 1254 vpslld ymm15,ymm2,7 1255 vpsrld ymm2,ymm2,25 1256 vpor ymm2,ymm15,ymm2 1257 vbroadcasti128 ymm15,XMMWORD[r10] 1258 vpaddd ymm13,ymm13,ymm7 1259 vpxor ymm3,ymm13,ymm3 1260 vpslld ymm14,ymm3,7 1261 vpsrld ymm3,ymm3,25 1262 vpor ymm3,ymm14,ymm3 1263 vpaddd ymm8,ymm8,ymm1 1264 vpxor ymm7,ymm8,ymm7 1265 vpshufb ymm7,ymm7,ymm15 1266 vpaddd ymm9,ymm9,ymm2 1267 vpxor ymm4,ymm9,ymm4 1268 vpshufb ymm4,ymm4,ymm15 1269 vpaddd ymm12,ymm12,ymm7 1270 vpxor ymm1,ymm12,ymm1 1271 vpslld ymm14,ymm1,12 1272 vpsrld ymm1,ymm1,20 1273 vpor ymm1,ymm14,ymm1 1274 vbroadcasti128 ymm14,XMMWORD[r11] 1275 vpaddd ymm13,ymm13,ymm4 1276 vpxor ymm2,ymm13,ymm2 1277 vpslld ymm15,ymm2,12 1278 vpsrld ymm2,ymm2,20 1279 vpor ymm2,ymm15,ymm2 1280 vpaddd ymm8,ymm8,ymm1 1281 vpxor ymm7,ymm8,ymm7 1282 vpshufb ymm7,ymm7,ymm14 1283 vpaddd ymm9,ymm9,ymm2 1284 vpxor ymm4,ymm9,ymm4 1285 vpshufb ymm4,ymm4,ymm14 1286 vpaddd ymm12,ymm12,ymm7 1287 vpxor ymm1,ymm12,ymm1 1288 vpslld ymm15,ymm1,7 1289 vpsrld ymm1,ymm1,25 1290 vpor ymm1,ymm15,ymm1 1291 vbroadcasti128 ymm15,XMMWORD[r10] 1292 vpaddd ymm13,ymm13,ymm4 1293 vpxor ymm2,ymm13,ymm2 1294 vpslld ymm14,ymm2,7 1295 vpsrld ymm2,ymm2,25 1296 vpor ymm2,ymm14,ymm2 1297 vmovdqa YMMWORD[64+rsp],ymm12 1298 vmovdqa YMMWORD[96+rsp],ymm13 1299 vmovdqa ymm12,YMMWORD[rsp] 1300 vmovdqa ymm13,YMMWORD[32+rsp] 1301 vpaddd ymm10,ymm10,ymm3 1302 vpxor ymm5,ymm10,ymm5 1303 vpshufb ymm5,ymm5,ymm15 1304 vpaddd ymm11,ymm11,ymm0 1305 vpxor ymm6,ymm11,ymm6 1306 vpshufb ymm6,ymm6,ymm15 1307 vpaddd ymm12,ymm12,ymm5 1308 vpxor ymm3,ymm12,ymm3 1309 vpslld ymm14,ymm3,12 1310 vpsrld ymm3,ymm3,20 1311 vpor ymm3,ymm14,ymm3 1312 vbroadcasti128 ymm14,XMMWORD[r11] 1313 vpaddd ymm13,ymm13,ymm6 1314 vpxor ymm0,ymm13,ymm0 1315 vpslld ymm15,ymm0,12 1316 vpsrld ymm0,ymm0,20 1317 vpor ymm0,ymm15,ymm0 1318 vpaddd ymm10,ymm10,ymm3 1319 vpxor ymm5,ymm10,ymm5 1320 vpshufb ymm5,ymm5,ymm14 1321 vpaddd ymm11,ymm11,ymm0 1322 vpxor ymm6,ymm11,ymm6 1323 vpshufb ymm6,ymm6,ymm14 1324 vpaddd ymm12,ymm12,ymm5 1325 vpxor ymm3,ymm12,ymm3 1326 vpslld ymm15,ymm3,7 1327 vpsrld ymm3,ymm3,25 1328 vpor ymm3,ymm15,ymm3 1329 vbroadcasti128 ymm15,XMMWORD[r10] 1330 vpaddd ymm13,ymm13,ymm6 1331 vpxor ymm0,ymm13,ymm0 1332 vpslld ymm14,ymm0,7 1333 vpsrld ymm0,ymm0,25 1334 vpor ymm0,ymm14,ymm0 1335 dec eax 1336 jnz NEAR $L$oop8x 1337 1338 lea rax,[512+rsp] 1339 vpaddd ymm8,ymm8,YMMWORD[((128-256))+rcx] 1340 vpaddd ymm9,ymm9,YMMWORD[((160-256))+rcx] 1341 vpaddd ymm10,ymm10,YMMWORD[((192-256))+rcx] 1342 vpaddd ymm11,ymm11,YMMWORD[((224-256))+rcx] 1343 1344 vpunpckldq ymm14,ymm8,ymm9 1345 vpunpckldq ymm15,ymm10,ymm11 1346 vpunpckhdq ymm8,ymm8,ymm9 1347 vpunpckhdq ymm10,ymm10,ymm11 1348 vpunpcklqdq ymm9,ymm14,ymm15 1349 vpunpckhqdq ymm14,ymm14,ymm15 1350 vpunpcklqdq ymm11,ymm8,ymm10 1351 vpunpckhqdq ymm8,ymm8,ymm10 1352 vpaddd ymm0,ymm0,YMMWORD[((256-256))+rcx] 1353 vpaddd ymm1,ymm1,YMMWORD[((288-256))+rcx] 1354 vpaddd ymm2,ymm2,YMMWORD[((320-256))+rcx] 1355 vpaddd ymm3,ymm3,YMMWORD[((352-256))+rcx] 1356 1357 vpunpckldq ymm10,ymm0,ymm1 1358 vpunpckldq ymm15,ymm2,ymm3 1359 vpunpckhdq ymm0,ymm0,ymm1 1360 vpunpckhdq ymm2,ymm2,ymm3 1361 vpunpcklqdq ymm1,ymm10,ymm15 1362 vpunpckhqdq ymm10,ymm10,ymm15 1363 vpunpcklqdq ymm3,ymm0,ymm2 1364 vpunpckhqdq ymm0,ymm0,ymm2 1365 vperm2i128 ymm15,ymm9,ymm1,0x20 1366 vperm2i128 ymm1,ymm9,ymm1,0x31 1367 vperm2i128 ymm9,ymm14,ymm10,0x20 1368 vperm2i128 ymm10,ymm14,ymm10,0x31 1369 vperm2i128 ymm14,ymm11,ymm3,0x20 1370 vperm2i128 ymm3,ymm11,ymm3,0x31 1371 vperm2i128 ymm11,ymm8,ymm0,0x20 1372 vperm2i128 ymm0,ymm8,ymm0,0x31 1373 vmovdqa YMMWORD[rsp],ymm15 1374 vmovdqa YMMWORD[32+rsp],ymm9 1375 vmovdqa ymm15,YMMWORD[64+rsp] 1376 vmovdqa ymm9,YMMWORD[96+rsp] 1377 1378 vpaddd ymm12,ymm12,YMMWORD[((384-512))+rax] 1379 vpaddd ymm13,ymm13,YMMWORD[((416-512))+rax] 1380 vpaddd ymm15,ymm15,YMMWORD[((448-512))+rax] 1381 vpaddd ymm9,ymm9,YMMWORD[((480-512))+rax] 1382 1383 vpunpckldq ymm2,ymm12,ymm13 1384 vpunpckldq ymm8,ymm15,ymm9 1385 vpunpckhdq ymm12,ymm12,ymm13 1386 vpunpckhdq ymm15,ymm15,ymm9 1387 vpunpcklqdq ymm13,ymm2,ymm8 1388 vpunpckhqdq ymm2,ymm2,ymm8 1389 vpunpcklqdq ymm9,ymm12,ymm15 1390 vpunpckhqdq ymm12,ymm12,ymm15 1391 vpaddd ymm4,ymm4,YMMWORD[((512-512))+rax] 1392 vpaddd ymm5,ymm5,YMMWORD[((544-512))+rax] 1393 vpaddd ymm6,ymm6,YMMWORD[((576-512))+rax] 1394 vpaddd ymm7,ymm7,YMMWORD[((608-512))+rax] 1395 1396 vpunpckldq ymm15,ymm4,ymm5 1397 vpunpckldq ymm8,ymm6,ymm7 1398 vpunpckhdq ymm4,ymm4,ymm5 1399 vpunpckhdq ymm6,ymm6,ymm7 1400 vpunpcklqdq ymm5,ymm15,ymm8 1401 vpunpckhqdq ymm15,ymm15,ymm8 1402 vpunpcklqdq ymm7,ymm4,ymm6 1403 vpunpckhqdq ymm4,ymm4,ymm6 1404 vperm2i128 ymm8,ymm13,ymm5,0x20 1405 vperm2i128 ymm5,ymm13,ymm5,0x31 1406 vperm2i128 ymm13,ymm2,ymm15,0x20 1407 vperm2i128 ymm15,ymm2,ymm15,0x31 1408 vperm2i128 ymm2,ymm9,ymm7,0x20 1409 vperm2i128 ymm7,ymm9,ymm7,0x31 1410 vperm2i128 ymm9,ymm12,ymm4,0x20 1411 vperm2i128 ymm4,ymm12,ymm4,0x31 1412 vmovdqa ymm6,YMMWORD[rsp] 1413 vmovdqa ymm12,YMMWORD[32+rsp] 1414 1415 cmp rdx,64*8 1416 jb NEAR $L$tail8x 1417 1418 vpxor ymm6,ymm6,YMMWORD[rsi] 1419 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1420 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1421 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1422 lea rsi,[128+rsi] 1423 vmovdqu YMMWORD[rdi],ymm6 1424 vmovdqu YMMWORD[32+rdi],ymm8 1425 vmovdqu YMMWORD[64+rdi],ymm1 1426 vmovdqu YMMWORD[96+rdi],ymm5 1427 lea rdi,[128+rdi] 1428 1429 vpxor ymm12,ymm12,YMMWORD[rsi] 1430 vpxor ymm13,ymm13,YMMWORD[32+rsi] 1431 vpxor ymm10,ymm10,YMMWORD[64+rsi] 1432 vpxor ymm15,ymm15,YMMWORD[96+rsi] 1433 lea rsi,[128+rsi] 1434 vmovdqu YMMWORD[rdi],ymm12 1435 vmovdqu YMMWORD[32+rdi],ymm13 1436 vmovdqu YMMWORD[64+rdi],ymm10 1437 vmovdqu YMMWORD[96+rdi],ymm15 1438 lea rdi,[128+rdi] 1439 1440 vpxor ymm14,ymm14,YMMWORD[rsi] 1441 vpxor ymm2,ymm2,YMMWORD[32+rsi] 1442 vpxor ymm3,ymm3,YMMWORD[64+rsi] 1443 vpxor ymm7,ymm7,YMMWORD[96+rsi] 1444 lea rsi,[128+rsi] 1445 vmovdqu YMMWORD[rdi],ymm14 1446 vmovdqu YMMWORD[32+rdi],ymm2 1447 vmovdqu YMMWORD[64+rdi],ymm3 1448 vmovdqu YMMWORD[96+rdi],ymm7 1449 lea rdi,[128+rdi] 1450 1451 vpxor ymm11,ymm11,YMMWORD[rsi] 1452 vpxor ymm9,ymm9,YMMWORD[32+rsi] 1453 vpxor ymm0,ymm0,YMMWORD[64+rsi] 1454 vpxor ymm4,ymm4,YMMWORD[96+rsi] 1455 lea rsi,[128+rsi] 1456 vmovdqu YMMWORD[rdi],ymm11 1457 vmovdqu YMMWORD[32+rdi],ymm9 1458 vmovdqu YMMWORD[64+rdi],ymm0 1459 vmovdqu YMMWORD[96+rdi],ymm4 1460 lea rdi,[128+rdi] 1461 1462 sub rdx,64*8 1463 jnz NEAR $L$oop_outer8x 1464 1465 jmp NEAR $L$done8x 1466 1467$L$tail8x: 1468 cmp rdx,448 1469 jae NEAR $L$448_or_more8x 1470 cmp rdx,384 1471 jae NEAR $L$384_or_more8x 1472 cmp rdx,320 1473 jae NEAR $L$320_or_more8x 1474 cmp rdx,256 1475 jae NEAR $L$256_or_more8x 1476 cmp rdx,192 1477 jae NEAR $L$192_or_more8x 1478 cmp rdx,128 1479 jae NEAR $L$128_or_more8x 1480 cmp rdx,64 1481 jae NEAR $L$64_or_more8x 1482 1483 xor r10,r10 1484 vmovdqa YMMWORD[rsp],ymm6 1485 vmovdqa YMMWORD[32+rsp],ymm8 1486 jmp NEAR $L$oop_tail8x 1487 1488ALIGN 32 1489$L$64_or_more8x: 1490 vpxor ymm6,ymm6,YMMWORD[rsi] 1491 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1492 vmovdqu YMMWORD[rdi],ymm6 1493 vmovdqu YMMWORD[32+rdi],ymm8 1494 je NEAR $L$done8x 1495 1496 lea rsi,[64+rsi] 1497 xor r10,r10 1498 vmovdqa YMMWORD[rsp],ymm1 1499 lea rdi,[64+rdi] 1500 sub rdx,64 1501 vmovdqa YMMWORD[32+rsp],ymm5 1502 jmp NEAR $L$oop_tail8x 1503 1504ALIGN 32 1505$L$128_or_more8x: 1506 vpxor ymm6,ymm6,YMMWORD[rsi] 1507 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1508 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1509 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1510 vmovdqu YMMWORD[rdi],ymm6 1511 vmovdqu YMMWORD[32+rdi],ymm8 1512 vmovdqu YMMWORD[64+rdi],ymm1 1513 vmovdqu YMMWORD[96+rdi],ymm5 1514 je NEAR $L$done8x 1515 1516 lea rsi,[128+rsi] 1517 xor r10,r10 1518 vmovdqa YMMWORD[rsp],ymm12 1519 lea rdi,[128+rdi] 1520 sub rdx,128 1521 vmovdqa YMMWORD[32+rsp],ymm13 1522 jmp NEAR $L$oop_tail8x 1523 1524ALIGN 32 1525$L$192_or_more8x: 1526 vpxor ymm6,ymm6,YMMWORD[rsi] 1527 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1528 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1529 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1530 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1531 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1532 vmovdqu YMMWORD[rdi],ymm6 1533 vmovdqu YMMWORD[32+rdi],ymm8 1534 vmovdqu YMMWORD[64+rdi],ymm1 1535 vmovdqu YMMWORD[96+rdi],ymm5 1536 vmovdqu YMMWORD[128+rdi],ymm12 1537 vmovdqu YMMWORD[160+rdi],ymm13 1538 je NEAR $L$done8x 1539 1540 lea rsi,[192+rsi] 1541 xor r10,r10 1542 vmovdqa YMMWORD[rsp],ymm10 1543 lea rdi,[192+rdi] 1544 sub rdx,192 1545 vmovdqa YMMWORD[32+rsp],ymm15 1546 jmp NEAR $L$oop_tail8x 1547 1548ALIGN 32 1549$L$256_or_more8x: 1550 vpxor ymm6,ymm6,YMMWORD[rsi] 1551 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1552 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1553 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1554 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1555 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1556 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1557 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1558 vmovdqu YMMWORD[rdi],ymm6 1559 vmovdqu YMMWORD[32+rdi],ymm8 1560 vmovdqu YMMWORD[64+rdi],ymm1 1561 vmovdqu YMMWORD[96+rdi],ymm5 1562 vmovdqu YMMWORD[128+rdi],ymm12 1563 vmovdqu YMMWORD[160+rdi],ymm13 1564 vmovdqu YMMWORD[192+rdi],ymm10 1565 vmovdqu YMMWORD[224+rdi],ymm15 1566 je NEAR $L$done8x 1567 1568 lea rsi,[256+rsi] 1569 xor r10,r10 1570 vmovdqa YMMWORD[rsp],ymm14 1571 lea rdi,[256+rdi] 1572 sub rdx,256 1573 vmovdqa YMMWORD[32+rsp],ymm2 1574 jmp NEAR $L$oop_tail8x 1575 1576ALIGN 32 1577$L$320_or_more8x: 1578 vpxor ymm6,ymm6,YMMWORD[rsi] 1579 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1580 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1581 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1582 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1583 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1584 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1585 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1586 vpxor ymm14,ymm14,YMMWORD[256+rsi] 1587 vpxor ymm2,ymm2,YMMWORD[288+rsi] 1588 vmovdqu YMMWORD[rdi],ymm6 1589 vmovdqu YMMWORD[32+rdi],ymm8 1590 vmovdqu YMMWORD[64+rdi],ymm1 1591 vmovdqu YMMWORD[96+rdi],ymm5 1592 vmovdqu YMMWORD[128+rdi],ymm12 1593 vmovdqu YMMWORD[160+rdi],ymm13 1594 vmovdqu YMMWORD[192+rdi],ymm10 1595 vmovdqu YMMWORD[224+rdi],ymm15 1596 vmovdqu YMMWORD[256+rdi],ymm14 1597 vmovdqu YMMWORD[288+rdi],ymm2 1598 je NEAR $L$done8x 1599 1600 lea rsi,[320+rsi] 1601 xor r10,r10 1602 vmovdqa YMMWORD[rsp],ymm3 1603 lea rdi,[320+rdi] 1604 sub rdx,320 1605 vmovdqa YMMWORD[32+rsp],ymm7 1606 jmp NEAR $L$oop_tail8x 1607 1608ALIGN 32 1609$L$384_or_more8x: 1610 vpxor ymm6,ymm6,YMMWORD[rsi] 1611 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1612 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1613 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1614 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1615 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1616 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1617 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1618 vpxor ymm14,ymm14,YMMWORD[256+rsi] 1619 vpxor ymm2,ymm2,YMMWORD[288+rsi] 1620 vpxor ymm3,ymm3,YMMWORD[320+rsi] 1621 vpxor ymm7,ymm7,YMMWORD[352+rsi] 1622 vmovdqu YMMWORD[rdi],ymm6 1623 vmovdqu YMMWORD[32+rdi],ymm8 1624 vmovdqu YMMWORD[64+rdi],ymm1 1625 vmovdqu YMMWORD[96+rdi],ymm5 1626 vmovdqu YMMWORD[128+rdi],ymm12 1627 vmovdqu YMMWORD[160+rdi],ymm13 1628 vmovdqu YMMWORD[192+rdi],ymm10 1629 vmovdqu YMMWORD[224+rdi],ymm15 1630 vmovdqu YMMWORD[256+rdi],ymm14 1631 vmovdqu YMMWORD[288+rdi],ymm2 1632 vmovdqu YMMWORD[320+rdi],ymm3 1633 vmovdqu YMMWORD[352+rdi],ymm7 1634 je NEAR $L$done8x 1635 1636 lea rsi,[384+rsi] 1637 xor r10,r10 1638 vmovdqa YMMWORD[rsp],ymm11 1639 lea rdi,[384+rdi] 1640 sub rdx,384 1641 vmovdqa YMMWORD[32+rsp],ymm9 1642 jmp NEAR $L$oop_tail8x 1643 1644ALIGN 32 1645$L$448_or_more8x: 1646 vpxor ymm6,ymm6,YMMWORD[rsi] 1647 vpxor ymm8,ymm8,YMMWORD[32+rsi] 1648 vpxor ymm1,ymm1,YMMWORD[64+rsi] 1649 vpxor ymm5,ymm5,YMMWORD[96+rsi] 1650 vpxor ymm12,ymm12,YMMWORD[128+rsi] 1651 vpxor ymm13,ymm13,YMMWORD[160+rsi] 1652 vpxor ymm10,ymm10,YMMWORD[192+rsi] 1653 vpxor ymm15,ymm15,YMMWORD[224+rsi] 1654 vpxor ymm14,ymm14,YMMWORD[256+rsi] 1655 vpxor ymm2,ymm2,YMMWORD[288+rsi] 1656 vpxor ymm3,ymm3,YMMWORD[320+rsi] 1657 vpxor ymm7,ymm7,YMMWORD[352+rsi] 1658 vpxor ymm11,ymm11,YMMWORD[384+rsi] 1659 vpxor ymm9,ymm9,YMMWORD[416+rsi] 1660 vmovdqu YMMWORD[rdi],ymm6 1661 vmovdqu YMMWORD[32+rdi],ymm8 1662 vmovdqu YMMWORD[64+rdi],ymm1 1663 vmovdqu YMMWORD[96+rdi],ymm5 1664 vmovdqu YMMWORD[128+rdi],ymm12 1665 vmovdqu YMMWORD[160+rdi],ymm13 1666 vmovdqu YMMWORD[192+rdi],ymm10 1667 vmovdqu YMMWORD[224+rdi],ymm15 1668 vmovdqu YMMWORD[256+rdi],ymm14 1669 vmovdqu YMMWORD[288+rdi],ymm2 1670 vmovdqu YMMWORD[320+rdi],ymm3 1671 vmovdqu YMMWORD[352+rdi],ymm7 1672 vmovdqu YMMWORD[384+rdi],ymm11 1673 vmovdqu YMMWORD[416+rdi],ymm9 1674 je NEAR $L$done8x 1675 1676 lea rsi,[448+rsi] 1677 xor r10,r10 1678 vmovdqa YMMWORD[rsp],ymm0 1679 lea rdi,[448+rdi] 1680 sub rdx,448 1681 vmovdqa YMMWORD[32+rsp],ymm4 1682 1683$L$oop_tail8x: 1684 movzx eax,BYTE[r10*1+rsi] 1685 movzx ecx,BYTE[r10*1+rsp] 1686 lea r10,[1+r10] 1687 xor eax,ecx 1688 mov BYTE[((-1))+r10*1+rdi],al 1689 dec rdx 1690 jnz NEAR $L$oop_tail8x 1691 1692$L$done8x: 1693 vzeroall 1694 movaps xmm6,XMMWORD[((-168))+r9] 1695 movaps xmm7,XMMWORD[((-152))+r9] 1696 movaps xmm8,XMMWORD[((-136))+r9] 1697 movaps xmm9,XMMWORD[((-120))+r9] 1698 movaps xmm10,XMMWORD[((-104))+r9] 1699 movaps xmm11,XMMWORD[((-88))+r9] 1700 movaps xmm12,XMMWORD[((-72))+r9] 1701 movaps xmm13,XMMWORD[((-56))+r9] 1702 movaps xmm14,XMMWORD[((-40))+r9] 1703 movaps xmm15,XMMWORD[((-24))+r9] 1704 lea rsp,[r9] 1705 1706$L$8x_epilogue: 1707 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1708 mov rsi,QWORD[16+rsp] 1709 ret 1710 1711$L$SEH_end_ChaCha20_ctr32_avx2: 1712EXTERN __imp_RtlVirtualUnwind 1713 1714ALIGN 16 1715se_handler: 1716 push rsi 1717 push rdi 1718 push rbx 1719 push rbp 1720 push r12 1721 push r13 1722 push r14 1723 push r15 1724 pushfq 1725 sub rsp,64 1726 1727 mov rax,QWORD[120+r8] 1728 mov rbx,QWORD[248+r8] 1729 1730 mov rsi,QWORD[8+r9] 1731 mov r11,QWORD[56+r9] 1732 1733 lea r10,[$L$ctr32_body] 1734 cmp rbx,r10 1735 jb NEAR $L$common_seh_tail 1736 1737 mov rax,QWORD[152+r8] 1738 1739 lea r10,[$L$no_data] 1740 cmp rbx,r10 1741 jae NEAR $L$common_seh_tail 1742 1743 lea rax,[((64+24+48))+rax] 1744 1745 mov rbx,QWORD[((-8))+rax] 1746 mov rbp,QWORD[((-16))+rax] 1747 mov r12,QWORD[((-24))+rax] 1748 mov r13,QWORD[((-32))+rax] 1749 mov r14,QWORD[((-40))+rax] 1750 mov r15,QWORD[((-48))+rax] 1751 mov QWORD[144+r8],rbx 1752 mov QWORD[160+r8],rbp 1753 mov QWORD[216+r8],r12 1754 mov QWORD[224+r8],r13 1755 mov QWORD[232+r8],r14 1756 mov QWORD[240+r8],r15 1757 1758$L$common_seh_tail: 1759 mov rdi,QWORD[8+rax] 1760 mov rsi,QWORD[16+rax] 1761 mov QWORD[152+r8],rax 1762 mov QWORD[168+r8],rsi 1763 mov QWORD[176+r8],rdi 1764 1765 mov rdi,QWORD[40+r9] 1766 mov rsi,r8 1767 mov ecx,154 1768 DD 0xa548f3fc 1769 1770 mov rsi,r9 1771 xor rcx,rcx 1772 mov rdx,QWORD[8+rsi] 1773 mov r8,QWORD[rsi] 1774 mov r9,QWORD[16+rsi] 1775 mov r10,QWORD[40+rsi] 1776 lea r11,[56+rsi] 1777 lea r12,[24+rsi] 1778 mov QWORD[32+rsp],r10 1779 mov QWORD[40+rsp],r11 1780 mov QWORD[48+rsp],r12 1781 mov QWORD[56+rsp],rcx 1782 call QWORD[__imp_RtlVirtualUnwind] 1783 1784 mov eax,1 1785 add rsp,64 1786 popfq 1787 pop r15 1788 pop r14 1789 pop r13 1790 pop r12 1791 pop rbp 1792 pop rbx 1793 pop rdi 1794 pop rsi 1795 ret 1796 1797 1798 1799ALIGN 16 1800ssse3_handler: 1801 push rsi 1802 push rdi 1803 push rbx 1804 push rbp 1805 push r12 1806 push r13 1807 push r14 1808 push r15 1809 pushfq 1810 sub rsp,64 1811 1812 mov rax,QWORD[120+r8] 1813 mov rbx,QWORD[248+r8] 1814 1815 mov rsi,QWORD[8+r9] 1816 mov r11,QWORD[56+r9] 1817 1818 mov r10d,DWORD[r11] 1819 lea r10,[r10*1+rsi] 1820 cmp rbx,r10 1821 jb NEAR $L$common_seh_tail 1822 1823 mov rax,QWORD[192+r8] 1824 1825 mov r10d,DWORD[4+r11] 1826 lea r10,[r10*1+rsi] 1827 cmp rbx,r10 1828 jae NEAR $L$common_seh_tail 1829 1830 lea rsi,[((-40))+rax] 1831 lea rdi,[512+r8] 1832 mov ecx,4 1833 DD 0xa548f3fc 1834 1835 jmp NEAR $L$common_seh_tail 1836 1837 1838 1839ALIGN 16 1840full_handler: 1841 push rsi 1842 push rdi 1843 push rbx 1844 push rbp 1845 push r12 1846 push r13 1847 push r14 1848 push r15 1849 pushfq 1850 sub rsp,64 1851 1852 mov rax,QWORD[120+r8] 1853 mov rbx,QWORD[248+r8] 1854 1855 mov rsi,QWORD[8+r9] 1856 mov r11,QWORD[56+r9] 1857 1858 mov r10d,DWORD[r11] 1859 lea r10,[r10*1+rsi] 1860 cmp rbx,r10 1861 jb NEAR $L$common_seh_tail 1862 1863 mov rax,QWORD[192+r8] 1864 1865 mov r10d,DWORD[4+r11] 1866 lea r10,[r10*1+rsi] 1867 cmp rbx,r10 1868 jae NEAR $L$common_seh_tail 1869 1870 lea rsi,[((-168))+rax] 1871 lea rdi,[512+r8] 1872 mov ecx,20 1873 DD 0xa548f3fc 1874 1875 jmp NEAR $L$common_seh_tail 1876 1877 1878section .pdata rdata align=4 1879ALIGN 4 1880 DD $L$SEH_begin_ChaCha20_ctr32_nohw wrt ..imagebase 1881 DD $L$SEH_end_ChaCha20_ctr32_nohw wrt ..imagebase 1882 DD $L$SEH_info_ChaCha20_ctr32_nohw wrt ..imagebase 1883 1884 DD $L$SEH_begin_ChaCha20_ctr32_ssse3 wrt ..imagebase 1885 DD $L$SEH_end_ChaCha20_ctr32_ssse3 wrt ..imagebase 1886 DD $L$SEH_info_ChaCha20_ctr32_ssse3 wrt ..imagebase 1887 1888 DD $L$SEH_begin_ChaCha20_ctr32_ssse3_4x wrt ..imagebase 1889 DD $L$SEH_end_ChaCha20_ctr32_ssse3_4x wrt ..imagebase 1890 DD $L$SEH_info_ChaCha20_ctr32_ssse3_4x wrt ..imagebase 1891 DD $L$SEH_begin_ChaCha20_ctr32_avx2 wrt ..imagebase 1892 DD $L$SEH_end_ChaCha20_ctr32_avx2 wrt ..imagebase 1893 DD $L$SEH_info_ChaCha20_ctr32_avx2 wrt ..imagebase 1894section .xdata rdata align=8 1895ALIGN 8 1896$L$SEH_info_ChaCha20_ctr32_nohw: 1897 DB 9,0,0,0 1898 DD se_handler wrt ..imagebase 1899 1900$L$SEH_info_ChaCha20_ctr32_ssse3: 1901 DB 9,0,0,0 1902 DD ssse3_handler wrt ..imagebase 1903 DD $L$ssse3_body wrt ..imagebase,$L$ssse3_epilogue wrt ..imagebase 1904 1905$L$SEH_info_ChaCha20_ctr32_ssse3_4x: 1906 DB 9,0,0,0 1907 DD full_handler wrt ..imagebase 1908 DD $L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase 1909$L$SEH_info_ChaCha20_ctr32_avx2: 1910 DB 9,0,0,0 1911 DD full_handler wrt ..imagebase 1912 DD $L$8x_body wrt ..imagebase,$L$8x_epilogue wrt ..imagebase 1913%else 1914; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 1915ret 1916%endif 1917