1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifidn __OUTPUT_FORMAT__, win64 5default rel 6%define XMMWORD 7%define YMMWORD 8%define ZMMWORD 9%define _CET_ENDBR 10 11%ifdef BORINGSSL_PREFIX 12%include "boringssl_prefix_symbols_nasm.inc" 13%endif 14section .text code align=64 15 16EXTERN OPENSSL_ia32cap_P 17 18chacha20_poly1305_constants: 19 20section .rdata rdata align=8 21ALIGN 64 22$L$chacha20_consts: 23 DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 24 DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 25$L$rol8: 26 DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 27 DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 28$L$rol16: 29 DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 30 DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 31$L$avx2_init: 32 DD 0,0,0,0 33$L$sse_inc: 34 DD 1,0,0,0 35$L$avx2_inc: 36 DD 2,0,0,0,2,0,0,0 37$L$clamp: 38 DQ 0x0FFFFFFC0FFFFFFF,0x0FFFFFFC0FFFFFFC 39 DQ 0xFFFFFFFFFFFFFFFF,0xFFFFFFFFFFFFFFFF 40ALIGN 16 41$L$and_masks: 42 DB 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 43 DB 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 44 DB 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 45 DB 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 46 DB 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 47 DB 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 48 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 49 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 50 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 51 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 52 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 53 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 54 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 55 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 56 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 57 DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff 58section .text 59 60 61 62ALIGN 64 63poly_hash_ad_internal: 64 65 66 xor r10,r10 67 xor r11,r11 68 xor r12,r12 69 cmp r8,13 70 jne NEAR $L$hash_ad_loop 71$L$poly_fast_tls_ad: 72 73 mov r10,QWORD[rcx] 74 mov r11,QWORD[5+rcx] 75 shr r11,24 76 mov r12,1 77 mov rax,QWORD[((0+160+0))+rbp] 78 mov r15,rax 79 mul r10 80 mov r13,rax 81 mov r14,rdx 82 mov rax,QWORD[((0+160+0))+rbp] 83 mul r11 84 imul r15,r12 85 add r14,rax 86 adc r15,rdx 87 mov rax,QWORD[((8+160+0))+rbp] 88 mov r9,rax 89 mul r10 90 add r14,rax 91 adc rdx,0 92 mov r10,rdx 93 mov rax,QWORD[((8+160+0))+rbp] 94 mul r11 95 add r15,rax 96 adc rdx,0 97 imul r9,r12 98 add r15,r10 99 adc r9,rdx 100 mov r10,r13 101 mov r11,r14 102 mov r12,r15 103 and r12,3 104 mov r13,r15 105 and r13,-4 106 mov r14,r9 107 shrd r15,r9,2 108 shr r9,2 109 add r15,r13 110 adc r9,r14 111 add r10,r15 112 adc r11,r9 113 adc r12,0 114 115 ret 116$L$hash_ad_loop: 117 118 cmp r8,16 119 jb NEAR $L$hash_ad_tail 120 add r10,QWORD[((0+0))+rcx] 121 adc r11,QWORD[((8+0))+rcx] 122 adc r12,1 123 mov rax,QWORD[((0+160+0))+rbp] 124 mov r15,rax 125 mul r10 126 mov r13,rax 127 mov r14,rdx 128 mov rax,QWORD[((0+160+0))+rbp] 129 mul r11 130 imul r15,r12 131 add r14,rax 132 adc r15,rdx 133 mov rax,QWORD[((8+160+0))+rbp] 134 mov r9,rax 135 mul r10 136 add r14,rax 137 adc rdx,0 138 mov r10,rdx 139 mov rax,QWORD[((8+160+0))+rbp] 140 mul r11 141 add r15,rax 142 adc rdx,0 143 imul r9,r12 144 add r15,r10 145 adc r9,rdx 146 mov r10,r13 147 mov r11,r14 148 mov r12,r15 149 and r12,3 150 mov r13,r15 151 and r13,-4 152 mov r14,r9 153 shrd r15,r9,2 154 shr r9,2 155 add r15,r13 156 adc r9,r14 157 add r10,r15 158 adc r11,r9 159 adc r12,0 160 161 lea rcx,[16+rcx] 162 sub r8,16 163 jmp NEAR $L$hash_ad_loop 164$L$hash_ad_tail: 165 cmp r8,0 166 je NEAR $L$hash_ad_done 167 168 xor r13,r13 169 xor r14,r14 170 xor r15,r15 171 add rcx,r8 172$L$hash_ad_tail_loop: 173 shld r14,r13,8 174 shl r13,8 175 movzx r15,BYTE[((-1))+rcx] 176 xor r13,r15 177 dec rcx 178 dec r8 179 jne NEAR $L$hash_ad_tail_loop 180 181 add r10,r13 182 adc r11,r14 183 adc r12,1 184 mov rax,QWORD[((0+160+0))+rbp] 185 mov r15,rax 186 mul r10 187 mov r13,rax 188 mov r14,rdx 189 mov rax,QWORD[((0+160+0))+rbp] 190 mul r11 191 imul r15,r12 192 add r14,rax 193 adc r15,rdx 194 mov rax,QWORD[((8+160+0))+rbp] 195 mov r9,rax 196 mul r10 197 add r14,rax 198 adc rdx,0 199 mov r10,rdx 200 mov rax,QWORD[((8+160+0))+rbp] 201 mul r11 202 add r15,rax 203 adc rdx,0 204 imul r9,r12 205 add r15,r10 206 adc r9,rdx 207 mov r10,r13 208 mov r11,r14 209 mov r12,r15 210 and r12,3 211 mov r13,r15 212 and r13,-4 213 mov r14,r9 214 shrd r15,r9,2 215 shr r9,2 216 add r15,r13 217 adc r9,r14 218 add r10,r15 219 adc r11,r9 220 adc r12,0 221 222 223$L$hash_ad_done: 224 ret 225 226 227 228global chacha20_poly1305_open 229 230ALIGN 64 231chacha20_poly1305_open: 232 mov QWORD[8+rsp],rdi ;WIN64 prologue 233 mov QWORD[16+rsp],rsi 234 mov rax,rsp 235$L$SEH_begin_chacha20_poly1305_open: 236 mov rdi,rcx 237 mov rsi,rdx 238 mov rdx,r8 239 mov rcx,r9 240 mov r8,QWORD[40+rsp] 241 mov r9,QWORD[48+rsp] 242 243 244 245_CET_ENDBR 246 push rbp 247 248 push rbx 249 250 push r12 251 252 push r13 253 254 push r14 255 256 push r15 257 258 259 260 push r9 261 262 sub rsp,288 + 160 + 32 263 264 265 lea rbp,[32+rsp] 266 and rbp,-32 267 268 movaps XMMWORD[(0+0)+rbp],xmm6 269 movaps XMMWORD[(16+0)+rbp],xmm7 270 movaps XMMWORD[(32+0)+rbp],xmm8 271 movaps XMMWORD[(48+0)+rbp],xmm9 272 movaps XMMWORD[(64+0)+rbp],xmm10 273 movaps XMMWORD[(80+0)+rbp],xmm11 274 movaps XMMWORD[(96+0)+rbp],xmm12 275 movaps XMMWORD[(112+0)+rbp],xmm13 276 movaps XMMWORD[(128+0)+rbp],xmm14 277 movaps XMMWORD[(144+0)+rbp],xmm15 278 279 mov rbx,rdx 280 mov QWORD[((0+160+32))+rbp],r8 281 mov QWORD[((8+160+32))+rbp],rbx 282 283 mov eax,DWORD[((OPENSSL_ia32cap_P+8))] 284 and eax,288 285 xor eax,288 286 jz NEAR chacha20_poly1305_open_avx2 287 288 cmp rbx,128 289 jbe NEAR $L$open_sse_128 290 291 movdqa xmm0,XMMWORD[$L$chacha20_consts] 292 movdqu xmm4,XMMWORD[r9] 293 movdqu xmm8,XMMWORD[16+r9] 294 movdqu xmm12,XMMWORD[32+r9] 295 296 movdqa xmm7,xmm12 297 298 movdqa XMMWORD[(160+48)+rbp],xmm4 299 movdqa XMMWORD[(160+64)+rbp],xmm8 300 movdqa XMMWORD[(160+96)+rbp],xmm12 301 mov r10,10 302$L$open_sse_init_rounds: 303 paddd xmm0,xmm4 304 pxor xmm12,xmm0 305 pshufb xmm12,XMMWORD[$L$rol16] 306 paddd xmm8,xmm12 307 pxor xmm4,xmm8 308 movdqa xmm3,xmm4 309 pslld xmm3,12 310 psrld xmm4,20 311 pxor xmm4,xmm3 312 paddd xmm0,xmm4 313 pxor xmm12,xmm0 314 pshufb xmm12,XMMWORD[$L$rol8] 315 paddd xmm8,xmm12 316 pxor xmm4,xmm8 317 movdqa xmm3,xmm4 318 pslld xmm3,7 319 psrld xmm4,25 320 pxor xmm4,xmm3 321DB 102,15,58,15,228,4 322DB 102,69,15,58,15,192,8 323DB 102,69,15,58,15,228,12 324 paddd xmm0,xmm4 325 pxor xmm12,xmm0 326 pshufb xmm12,XMMWORD[$L$rol16] 327 paddd xmm8,xmm12 328 pxor xmm4,xmm8 329 movdqa xmm3,xmm4 330 pslld xmm3,12 331 psrld xmm4,20 332 pxor xmm4,xmm3 333 paddd xmm0,xmm4 334 pxor xmm12,xmm0 335 pshufb xmm12,XMMWORD[$L$rol8] 336 paddd xmm8,xmm12 337 pxor xmm4,xmm8 338 movdqa xmm3,xmm4 339 pslld xmm3,7 340 psrld xmm4,25 341 pxor xmm4,xmm3 342DB 102,15,58,15,228,12 343DB 102,69,15,58,15,192,8 344DB 102,69,15,58,15,228,4 345 346 dec r10 347 jne NEAR $L$open_sse_init_rounds 348 349 paddd xmm0,XMMWORD[$L$chacha20_consts] 350 paddd xmm4,XMMWORD[((160+48))+rbp] 351 352 pand xmm0,XMMWORD[$L$clamp] 353 movdqa XMMWORD[(160+0)+rbp],xmm0 354 movdqa XMMWORD[(160+16)+rbp],xmm4 355 356 mov r8,r8 357 call poly_hash_ad_internal 358$L$open_sse_main_loop: 359 cmp rbx,16*16 360 jb NEAR $L$open_sse_tail 361 362 movdqa xmm0,XMMWORD[$L$chacha20_consts] 363 movdqa xmm4,XMMWORD[((160+48))+rbp] 364 movdqa xmm8,XMMWORD[((160+64))+rbp] 365 movdqa xmm1,xmm0 366 movdqa xmm5,xmm4 367 movdqa xmm9,xmm8 368 movdqa xmm2,xmm0 369 movdqa xmm6,xmm4 370 movdqa xmm10,xmm8 371 movdqa xmm3,xmm0 372 movdqa xmm7,xmm4 373 movdqa xmm11,xmm8 374 movdqa xmm15,XMMWORD[((160+96))+rbp] 375 paddd xmm15,XMMWORD[$L$sse_inc] 376 movdqa xmm14,xmm15 377 paddd xmm14,XMMWORD[$L$sse_inc] 378 movdqa xmm13,xmm14 379 paddd xmm13,XMMWORD[$L$sse_inc] 380 movdqa xmm12,xmm13 381 paddd xmm12,XMMWORD[$L$sse_inc] 382 movdqa XMMWORD[(160+96)+rbp],xmm12 383 movdqa XMMWORD[(160+112)+rbp],xmm13 384 movdqa XMMWORD[(160+128)+rbp],xmm14 385 movdqa XMMWORD[(160+144)+rbp],xmm15 386 387 388 389 mov rcx,4 390 mov r8,rsi 391$L$open_sse_main_loop_rounds: 392 movdqa XMMWORD[(160+80)+rbp],xmm8 393 movdqa xmm8,XMMWORD[$L$rol16] 394 paddd xmm3,xmm7 395 paddd xmm2,xmm6 396 paddd xmm1,xmm5 397 paddd xmm0,xmm4 398 pxor xmm15,xmm3 399 pxor xmm14,xmm2 400 pxor xmm13,xmm1 401 pxor xmm12,xmm0 402DB 102,69,15,56,0,248 403DB 102,69,15,56,0,240 404DB 102,69,15,56,0,232 405DB 102,69,15,56,0,224 406 movdqa xmm8,XMMWORD[((160+80))+rbp] 407 paddd xmm11,xmm15 408 paddd xmm10,xmm14 409 paddd xmm9,xmm13 410 paddd xmm8,xmm12 411 pxor xmm7,xmm11 412 add r10,QWORD[((0+0))+r8] 413 adc r11,QWORD[((8+0))+r8] 414 adc r12,1 415 416 lea r8,[16+r8] 417 pxor xmm6,xmm10 418 pxor xmm5,xmm9 419 pxor xmm4,xmm8 420 movdqa XMMWORD[(160+80)+rbp],xmm8 421 movdqa xmm8,xmm7 422 psrld xmm8,20 423 pslld xmm7,32-20 424 pxor xmm7,xmm8 425 movdqa xmm8,xmm6 426 psrld xmm8,20 427 pslld xmm6,32-20 428 pxor xmm6,xmm8 429 movdqa xmm8,xmm5 430 psrld xmm8,20 431 pslld xmm5,32-20 432 pxor xmm5,xmm8 433 movdqa xmm8,xmm4 434 psrld xmm8,20 435 pslld xmm4,32-20 436 pxor xmm4,xmm8 437 mov rax,QWORD[((0+160+0))+rbp] 438 mov r15,rax 439 mul r10 440 mov r13,rax 441 mov r14,rdx 442 mov rax,QWORD[((0+160+0))+rbp] 443 mul r11 444 imul r15,r12 445 add r14,rax 446 adc r15,rdx 447 movdqa xmm8,XMMWORD[$L$rol8] 448 paddd xmm3,xmm7 449 paddd xmm2,xmm6 450 paddd xmm1,xmm5 451 paddd xmm0,xmm4 452 pxor xmm15,xmm3 453 pxor xmm14,xmm2 454 pxor xmm13,xmm1 455 pxor xmm12,xmm0 456DB 102,69,15,56,0,248 457DB 102,69,15,56,0,240 458DB 102,69,15,56,0,232 459DB 102,69,15,56,0,224 460 movdqa xmm8,XMMWORD[((160+80))+rbp] 461 paddd xmm11,xmm15 462 paddd xmm10,xmm14 463 paddd xmm9,xmm13 464 paddd xmm8,xmm12 465 pxor xmm7,xmm11 466 pxor xmm6,xmm10 467 mov rax,QWORD[((8+160+0))+rbp] 468 mov r9,rax 469 mul r10 470 add r14,rax 471 adc rdx,0 472 mov r10,rdx 473 mov rax,QWORD[((8+160+0))+rbp] 474 mul r11 475 add r15,rax 476 adc rdx,0 477 pxor xmm5,xmm9 478 pxor xmm4,xmm8 479 movdqa XMMWORD[(160+80)+rbp],xmm8 480 movdqa xmm8,xmm7 481 psrld xmm8,25 482 pslld xmm7,32-25 483 pxor xmm7,xmm8 484 movdqa xmm8,xmm6 485 psrld xmm8,25 486 pslld xmm6,32-25 487 pxor xmm6,xmm8 488 movdqa xmm8,xmm5 489 psrld xmm8,25 490 pslld xmm5,32-25 491 pxor xmm5,xmm8 492 movdqa xmm8,xmm4 493 psrld xmm8,25 494 pslld xmm4,32-25 495 pxor xmm4,xmm8 496 movdqa xmm8,XMMWORD[((160+80))+rbp] 497 imul r9,r12 498 add r15,r10 499 adc r9,rdx 500DB 102,15,58,15,255,4 501DB 102,69,15,58,15,219,8 502DB 102,69,15,58,15,255,12 503DB 102,15,58,15,246,4 504DB 102,69,15,58,15,210,8 505DB 102,69,15,58,15,246,12 506DB 102,15,58,15,237,4 507DB 102,69,15,58,15,201,8 508DB 102,69,15,58,15,237,12 509DB 102,15,58,15,228,4 510DB 102,69,15,58,15,192,8 511DB 102,69,15,58,15,228,12 512 movdqa XMMWORD[(160+80)+rbp],xmm8 513 movdqa xmm8,XMMWORD[$L$rol16] 514 paddd xmm3,xmm7 515 paddd xmm2,xmm6 516 paddd xmm1,xmm5 517 paddd xmm0,xmm4 518 pxor xmm15,xmm3 519 pxor xmm14,xmm2 520 mov r10,r13 521 mov r11,r14 522 mov r12,r15 523 and r12,3 524 mov r13,r15 525 and r13,-4 526 mov r14,r9 527 shrd r15,r9,2 528 shr r9,2 529 add r15,r13 530 adc r9,r14 531 add r10,r15 532 adc r11,r9 533 adc r12,0 534 pxor xmm13,xmm1 535 pxor xmm12,xmm0 536DB 102,69,15,56,0,248 537DB 102,69,15,56,0,240 538DB 102,69,15,56,0,232 539DB 102,69,15,56,0,224 540 movdqa xmm8,XMMWORD[((160+80))+rbp] 541 paddd xmm11,xmm15 542 paddd xmm10,xmm14 543 paddd xmm9,xmm13 544 paddd xmm8,xmm12 545 pxor xmm7,xmm11 546 pxor xmm6,xmm10 547 pxor xmm5,xmm9 548 pxor xmm4,xmm8 549 movdqa XMMWORD[(160+80)+rbp],xmm8 550 movdqa xmm8,xmm7 551 psrld xmm8,20 552 pslld xmm7,32-20 553 pxor xmm7,xmm8 554 movdqa xmm8,xmm6 555 psrld xmm8,20 556 pslld xmm6,32-20 557 pxor xmm6,xmm8 558 movdqa xmm8,xmm5 559 psrld xmm8,20 560 pslld xmm5,32-20 561 pxor xmm5,xmm8 562 movdqa xmm8,xmm4 563 psrld xmm8,20 564 pslld xmm4,32-20 565 pxor xmm4,xmm8 566 movdqa xmm8,XMMWORD[$L$rol8] 567 paddd xmm3,xmm7 568 paddd xmm2,xmm6 569 paddd xmm1,xmm5 570 paddd xmm0,xmm4 571 pxor xmm15,xmm3 572 pxor xmm14,xmm2 573 pxor xmm13,xmm1 574 pxor xmm12,xmm0 575DB 102,69,15,56,0,248 576DB 102,69,15,56,0,240 577DB 102,69,15,56,0,232 578DB 102,69,15,56,0,224 579 movdqa xmm8,XMMWORD[((160+80))+rbp] 580 paddd xmm11,xmm15 581 paddd xmm10,xmm14 582 paddd xmm9,xmm13 583 paddd xmm8,xmm12 584 pxor xmm7,xmm11 585 pxor xmm6,xmm10 586 pxor xmm5,xmm9 587 pxor xmm4,xmm8 588 movdqa XMMWORD[(160+80)+rbp],xmm8 589 movdqa xmm8,xmm7 590 psrld xmm8,25 591 pslld xmm7,32-25 592 pxor xmm7,xmm8 593 movdqa xmm8,xmm6 594 psrld xmm8,25 595 pslld xmm6,32-25 596 pxor xmm6,xmm8 597 movdqa xmm8,xmm5 598 psrld xmm8,25 599 pslld xmm5,32-25 600 pxor xmm5,xmm8 601 movdqa xmm8,xmm4 602 psrld xmm8,25 603 pslld xmm4,32-25 604 pxor xmm4,xmm8 605 movdqa xmm8,XMMWORD[((160+80))+rbp] 606DB 102,15,58,15,255,12 607DB 102,69,15,58,15,219,8 608DB 102,69,15,58,15,255,4 609DB 102,15,58,15,246,12 610DB 102,69,15,58,15,210,8 611DB 102,69,15,58,15,246,4 612DB 102,15,58,15,237,12 613DB 102,69,15,58,15,201,8 614DB 102,69,15,58,15,237,4 615DB 102,15,58,15,228,12 616DB 102,69,15,58,15,192,8 617DB 102,69,15,58,15,228,4 618 619 dec rcx 620 jge NEAR $L$open_sse_main_loop_rounds 621 add r10,QWORD[((0+0))+r8] 622 adc r11,QWORD[((8+0))+r8] 623 adc r12,1 624 mov rax,QWORD[((0+160+0))+rbp] 625 mov r15,rax 626 mul r10 627 mov r13,rax 628 mov r14,rdx 629 mov rax,QWORD[((0+160+0))+rbp] 630 mul r11 631 imul r15,r12 632 add r14,rax 633 adc r15,rdx 634 mov rax,QWORD[((8+160+0))+rbp] 635 mov r9,rax 636 mul r10 637 add r14,rax 638 adc rdx,0 639 mov r10,rdx 640 mov rax,QWORD[((8+160+0))+rbp] 641 mul r11 642 add r15,rax 643 adc rdx,0 644 imul r9,r12 645 add r15,r10 646 adc r9,rdx 647 mov r10,r13 648 mov r11,r14 649 mov r12,r15 650 and r12,3 651 mov r13,r15 652 and r13,-4 653 mov r14,r9 654 shrd r15,r9,2 655 shr r9,2 656 add r15,r13 657 adc r9,r14 658 add r10,r15 659 adc r11,r9 660 adc r12,0 661 662 lea r8,[16+r8] 663 cmp rcx,-6 664 jg NEAR $L$open_sse_main_loop_rounds 665 paddd xmm3,XMMWORD[$L$chacha20_consts] 666 paddd xmm7,XMMWORD[((160+48))+rbp] 667 paddd xmm11,XMMWORD[((160+64))+rbp] 668 paddd xmm15,XMMWORD[((160+144))+rbp] 669 paddd xmm2,XMMWORD[$L$chacha20_consts] 670 paddd xmm6,XMMWORD[((160+48))+rbp] 671 paddd xmm10,XMMWORD[((160+64))+rbp] 672 paddd xmm14,XMMWORD[((160+128))+rbp] 673 paddd xmm1,XMMWORD[$L$chacha20_consts] 674 paddd xmm5,XMMWORD[((160+48))+rbp] 675 paddd xmm9,XMMWORD[((160+64))+rbp] 676 paddd xmm13,XMMWORD[((160+112))+rbp] 677 paddd xmm0,XMMWORD[$L$chacha20_consts] 678 paddd xmm4,XMMWORD[((160+48))+rbp] 679 paddd xmm8,XMMWORD[((160+64))+rbp] 680 paddd xmm12,XMMWORD[((160+96))+rbp] 681 movdqa XMMWORD[(160+80)+rbp],xmm12 682 movdqu xmm12,XMMWORD[((0 + 0))+rsi] 683 pxor xmm12,xmm3 684 movdqu XMMWORD[(0 + 0)+rdi],xmm12 685 movdqu xmm12,XMMWORD[((16 + 0))+rsi] 686 pxor xmm12,xmm7 687 movdqu XMMWORD[(16 + 0)+rdi],xmm12 688 movdqu xmm12,XMMWORD[((32 + 0))+rsi] 689 pxor xmm12,xmm11 690 movdqu XMMWORD[(32 + 0)+rdi],xmm12 691 movdqu xmm12,XMMWORD[((48 + 0))+rsi] 692 pxor xmm12,xmm15 693 movdqu XMMWORD[(48 + 0)+rdi],xmm12 694 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 695 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 696 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 697 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 698 pxor xmm2,xmm3 699 pxor xmm6,xmm7 700 pxor xmm10,xmm11 701 pxor xmm15,xmm14 702 movdqu XMMWORD[(0 + 64)+rdi],xmm2 703 movdqu XMMWORD[(16 + 64)+rdi],xmm6 704 movdqu XMMWORD[(32 + 64)+rdi],xmm10 705 movdqu XMMWORD[(48 + 64)+rdi],xmm15 706 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 707 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 708 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 709 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 710 pxor xmm1,xmm3 711 pxor xmm5,xmm7 712 pxor xmm9,xmm11 713 pxor xmm15,xmm13 714 movdqu XMMWORD[(0 + 128)+rdi],xmm1 715 movdqu XMMWORD[(16 + 128)+rdi],xmm5 716 movdqu XMMWORD[(32 + 128)+rdi],xmm9 717 movdqu XMMWORD[(48 + 128)+rdi],xmm15 718 movdqu xmm3,XMMWORD[((0 + 192))+rsi] 719 movdqu xmm7,XMMWORD[((16 + 192))+rsi] 720 movdqu xmm11,XMMWORD[((32 + 192))+rsi] 721 movdqu xmm15,XMMWORD[((48 + 192))+rsi] 722 pxor xmm0,xmm3 723 pxor xmm4,xmm7 724 pxor xmm8,xmm11 725 pxor xmm15,XMMWORD[((160+80))+rbp] 726 movdqu XMMWORD[(0 + 192)+rdi],xmm0 727 movdqu XMMWORD[(16 + 192)+rdi],xmm4 728 movdqu XMMWORD[(32 + 192)+rdi],xmm8 729 movdqu XMMWORD[(48 + 192)+rdi],xmm15 730 731 lea rsi,[256+rsi] 732 lea rdi,[256+rdi] 733 sub rbx,16*16 734 jmp NEAR $L$open_sse_main_loop 735$L$open_sse_tail: 736 737 test rbx,rbx 738 jz NEAR $L$open_sse_finalize 739 cmp rbx,12*16 740 ja NEAR $L$open_sse_tail_256 741 cmp rbx,8*16 742 ja NEAR $L$open_sse_tail_192 743 cmp rbx,4*16 744 ja NEAR $L$open_sse_tail_128 745 movdqa xmm0,XMMWORD[$L$chacha20_consts] 746 movdqa xmm4,XMMWORD[((160+48))+rbp] 747 movdqa xmm8,XMMWORD[((160+64))+rbp] 748 movdqa xmm12,XMMWORD[((160+96))+rbp] 749 paddd xmm12,XMMWORD[$L$sse_inc] 750 movdqa XMMWORD[(160+96)+rbp],xmm12 751 752 xor r8,r8 753 mov rcx,rbx 754 cmp rcx,16 755 jb NEAR $L$open_sse_tail_64_rounds 756$L$open_sse_tail_64_rounds_and_x1hash: 757 add r10,QWORD[((0+0))+r8*1+rsi] 758 adc r11,QWORD[((8+0))+r8*1+rsi] 759 adc r12,1 760 mov rax,QWORD[((0+160+0))+rbp] 761 mov r15,rax 762 mul r10 763 mov r13,rax 764 mov r14,rdx 765 mov rax,QWORD[((0+160+0))+rbp] 766 mul r11 767 imul r15,r12 768 add r14,rax 769 adc r15,rdx 770 mov rax,QWORD[((8+160+0))+rbp] 771 mov r9,rax 772 mul r10 773 add r14,rax 774 adc rdx,0 775 mov r10,rdx 776 mov rax,QWORD[((8+160+0))+rbp] 777 mul r11 778 add r15,rax 779 adc rdx,0 780 imul r9,r12 781 add r15,r10 782 adc r9,rdx 783 mov r10,r13 784 mov r11,r14 785 mov r12,r15 786 and r12,3 787 mov r13,r15 788 and r13,-4 789 mov r14,r9 790 shrd r15,r9,2 791 shr r9,2 792 add r15,r13 793 adc r9,r14 794 add r10,r15 795 adc r11,r9 796 adc r12,0 797 798 sub rcx,16 799$L$open_sse_tail_64_rounds: 800 add r8,16 801 paddd xmm0,xmm4 802 pxor xmm12,xmm0 803 pshufb xmm12,XMMWORD[$L$rol16] 804 paddd xmm8,xmm12 805 pxor xmm4,xmm8 806 movdqa xmm3,xmm4 807 pslld xmm3,12 808 psrld xmm4,20 809 pxor xmm4,xmm3 810 paddd xmm0,xmm4 811 pxor xmm12,xmm0 812 pshufb xmm12,XMMWORD[$L$rol8] 813 paddd xmm8,xmm12 814 pxor xmm4,xmm8 815 movdqa xmm3,xmm4 816 pslld xmm3,7 817 psrld xmm4,25 818 pxor xmm4,xmm3 819DB 102,15,58,15,228,4 820DB 102,69,15,58,15,192,8 821DB 102,69,15,58,15,228,12 822 paddd xmm0,xmm4 823 pxor xmm12,xmm0 824 pshufb xmm12,XMMWORD[$L$rol16] 825 paddd xmm8,xmm12 826 pxor xmm4,xmm8 827 movdqa xmm3,xmm4 828 pslld xmm3,12 829 psrld xmm4,20 830 pxor xmm4,xmm3 831 paddd xmm0,xmm4 832 pxor xmm12,xmm0 833 pshufb xmm12,XMMWORD[$L$rol8] 834 paddd xmm8,xmm12 835 pxor xmm4,xmm8 836 movdqa xmm3,xmm4 837 pslld xmm3,7 838 psrld xmm4,25 839 pxor xmm4,xmm3 840DB 102,15,58,15,228,12 841DB 102,69,15,58,15,192,8 842DB 102,69,15,58,15,228,4 843 844 cmp rcx,16 845 jae NEAR $L$open_sse_tail_64_rounds_and_x1hash 846 cmp r8,10*16 847 jne NEAR $L$open_sse_tail_64_rounds 848 paddd xmm0,XMMWORD[$L$chacha20_consts] 849 paddd xmm4,XMMWORD[((160+48))+rbp] 850 paddd xmm8,XMMWORD[((160+64))+rbp] 851 paddd xmm12,XMMWORD[((160+96))+rbp] 852 853 jmp NEAR $L$open_sse_tail_64_dec_loop 854 855$L$open_sse_tail_128: 856 movdqa xmm0,XMMWORD[$L$chacha20_consts] 857 movdqa xmm4,XMMWORD[((160+48))+rbp] 858 movdqa xmm8,XMMWORD[((160+64))+rbp] 859 movdqa xmm1,xmm0 860 movdqa xmm5,xmm4 861 movdqa xmm9,xmm8 862 movdqa xmm13,XMMWORD[((160+96))+rbp] 863 paddd xmm13,XMMWORD[$L$sse_inc] 864 movdqa xmm12,xmm13 865 paddd xmm12,XMMWORD[$L$sse_inc] 866 movdqa XMMWORD[(160+96)+rbp],xmm12 867 movdqa XMMWORD[(160+112)+rbp],xmm13 868 869 mov rcx,rbx 870 and rcx,-16 871 xor r8,r8 872$L$open_sse_tail_128_rounds_and_x1hash: 873 add r10,QWORD[((0+0))+r8*1+rsi] 874 adc r11,QWORD[((8+0))+r8*1+rsi] 875 adc r12,1 876 mov rax,QWORD[((0+160+0))+rbp] 877 mov r15,rax 878 mul r10 879 mov r13,rax 880 mov r14,rdx 881 mov rax,QWORD[((0+160+0))+rbp] 882 mul r11 883 imul r15,r12 884 add r14,rax 885 adc r15,rdx 886 mov rax,QWORD[((8+160+0))+rbp] 887 mov r9,rax 888 mul r10 889 add r14,rax 890 adc rdx,0 891 mov r10,rdx 892 mov rax,QWORD[((8+160+0))+rbp] 893 mul r11 894 add r15,rax 895 adc rdx,0 896 imul r9,r12 897 add r15,r10 898 adc r9,rdx 899 mov r10,r13 900 mov r11,r14 901 mov r12,r15 902 and r12,3 903 mov r13,r15 904 and r13,-4 905 mov r14,r9 906 shrd r15,r9,2 907 shr r9,2 908 add r15,r13 909 adc r9,r14 910 add r10,r15 911 adc r11,r9 912 adc r12,0 913 914$L$open_sse_tail_128_rounds: 915 add r8,16 916 paddd xmm0,xmm4 917 pxor xmm12,xmm0 918 pshufb xmm12,XMMWORD[$L$rol16] 919 paddd xmm8,xmm12 920 pxor xmm4,xmm8 921 movdqa xmm3,xmm4 922 pslld xmm3,12 923 psrld xmm4,20 924 pxor xmm4,xmm3 925 paddd xmm0,xmm4 926 pxor xmm12,xmm0 927 pshufb xmm12,XMMWORD[$L$rol8] 928 paddd xmm8,xmm12 929 pxor xmm4,xmm8 930 movdqa xmm3,xmm4 931 pslld xmm3,7 932 psrld xmm4,25 933 pxor xmm4,xmm3 934DB 102,15,58,15,228,4 935DB 102,69,15,58,15,192,8 936DB 102,69,15,58,15,228,12 937 paddd xmm1,xmm5 938 pxor xmm13,xmm1 939 pshufb xmm13,XMMWORD[$L$rol16] 940 paddd xmm9,xmm13 941 pxor xmm5,xmm9 942 movdqa xmm3,xmm5 943 pslld xmm3,12 944 psrld xmm5,20 945 pxor xmm5,xmm3 946 paddd xmm1,xmm5 947 pxor xmm13,xmm1 948 pshufb xmm13,XMMWORD[$L$rol8] 949 paddd xmm9,xmm13 950 pxor xmm5,xmm9 951 movdqa xmm3,xmm5 952 pslld xmm3,7 953 psrld xmm5,25 954 pxor xmm5,xmm3 955DB 102,15,58,15,237,4 956DB 102,69,15,58,15,201,8 957DB 102,69,15,58,15,237,12 958 paddd xmm0,xmm4 959 pxor xmm12,xmm0 960 pshufb xmm12,XMMWORD[$L$rol16] 961 paddd xmm8,xmm12 962 pxor xmm4,xmm8 963 movdqa xmm3,xmm4 964 pslld xmm3,12 965 psrld xmm4,20 966 pxor xmm4,xmm3 967 paddd xmm0,xmm4 968 pxor xmm12,xmm0 969 pshufb xmm12,XMMWORD[$L$rol8] 970 paddd xmm8,xmm12 971 pxor xmm4,xmm8 972 movdqa xmm3,xmm4 973 pslld xmm3,7 974 psrld xmm4,25 975 pxor xmm4,xmm3 976DB 102,15,58,15,228,12 977DB 102,69,15,58,15,192,8 978DB 102,69,15,58,15,228,4 979 paddd xmm1,xmm5 980 pxor xmm13,xmm1 981 pshufb xmm13,XMMWORD[$L$rol16] 982 paddd xmm9,xmm13 983 pxor xmm5,xmm9 984 movdqa xmm3,xmm5 985 pslld xmm3,12 986 psrld xmm5,20 987 pxor xmm5,xmm3 988 paddd xmm1,xmm5 989 pxor xmm13,xmm1 990 pshufb xmm13,XMMWORD[$L$rol8] 991 paddd xmm9,xmm13 992 pxor xmm5,xmm9 993 movdqa xmm3,xmm5 994 pslld xmm3,7 995 psrld xmm5,25 996 pxor xmm5,xmm3 997DB 102,15,58,15,237,12 998DB 102,69,15,58,15,201,8 999DB 102,69,15,58,15,237,4 1000 1001 cmp r8,rcx 1002 jb NEAR $L$open_sse_tail_128_rounds_and_x1hash 1003 cmp r8,10*16 1004 jne NEAR $L$open_sse_tail_128_rounds 1005 paddd xmm1,XMMWORD[$L$chacha20_consts] 1006 paddd xmm5,XMMWORD[((160+48))+rbp] 1007 paddd xmm9,XMMWORD[((160+64))+rbp] 1008 paddd xmm13,XMMWORD[((160+112))+rbp] 1009 paddd xmm0,XMMWORD[$L$chacha20_consts] 1010 paddd xmm4,XMMWORD[((160+48))+rbp] 1011 paddd xmm8,XMMWORD[((160+64))+rbp] 1012 paddd xmm12,XMMWORD[((160+96))+rbp] 1013 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 1014 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 1015 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 1016 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 1017 pxor xmm1,xmm3 1018 pxor xmm5,xmm7 1019 pxor xmm9,xmm11 1020 pxor xmm15,xmm13 1021 movdqu XMMWORD[(0 + 0)+rdi],xmm1 1022 movdqu XMMWORD[(16 + 0)+rdi],xmm5 1023 movdqu XMMWORD[(32 + 0)+rdi],xmm9 1024 movdqu XMMWORD[(48 + 0)+rdi],xmm15 1025 1026 sub rbx,4*16 1027 lea rsi,[64+rsi] 1028 lea rdi,[64+rdi] 1029 jmp NEAR $L$open_sse_tail_64_dec_loop 1030 1031$L$open_sse_tail_192: 1032 movdqa xmm0,XMMWORD[$L$chacha20_consts] 1033 movdqa xmm4,XMMWORD[((160+48))+rbp] 1034 movdqa xmm8,XMMWORD[((160+64))+rbp] 1035 movdqa xmm1,xmm0 1036 movdqa xmm5,xmm4 1037 movdqa xmm9,xmm8 1038 movdqa xmm2,xmm0 1039 movdqa xmm6,xmm4 1040 movdqa xmm10,xmm8 1041 movdqa xmm14,XMMWORD[((160+96))+rbp] 1042 paddd xmm14,XMMWORD[$L$sse_inc] 1043 movdqa xmm13,xmm14 1044 paddd xmm13,XMMWORD[$L$sse_inc] 1045 movdqa xmm12,xmm13 1046 paddd xmm12,XMMWORD[$L$sse_inc] 1047 movdqa XMMWORD[(160+96)+rbp],xmm12 1048 movdqa XMMWORD[(160+112)+rbp],xmm13 1049 movdqa XMMWORD[(160+128)+rbp],xmm14 1050 1051 mov rcx,rbx 1052 mov r8,10*16 1053 cmp rcx,10*16 1054 cmovg rcx,r8 1055 and rcx,-16 1056 xor r8,r8 1057$L$open_sse_tail_192_rounds_and_x1hash: 1058 add r10,QWORD[((0+0))+r8*1+rsi] 1059 adc r11,QWORD[((8+0))+r8*1+rsi] 1060 adc r12,1 1061 mov rax,QWORD[((0+160+0))+rbp] 1062 mov r15,rax 1063 mul r10 1064 mov r13,rax 1065 mov r14,rdx 1066 mov rax,QWORD[((0+160+0))+rbp] 1067 mul r11 1068 imul r15,r12 1069 add r14,rax 1070 adc r15,rdx 1071 mov rax,QWORD[((8+160+0))+rbp] 1072 mov r9,rax 1073 mul r10 1074 add r14,rax 1075 adc rdx,0 1076 mov r10,rdx 1077 mov rax,QWORD[((8+160+0))+rbp] 1078 mul r11 1079 add r15,rax 1080 adc rdx,0 1081 imul r9,r12 1082 add r15,r10 1083 adc r9,rdx 1084 mov r10,r13 1085 mov r11,r14 1086 mov r12,r15 1087 and r12,3 1088 mov r13,r15 1089 and r13,-4 1090 mov r14,r9 1091 shrd r15,r9,2 1092 shr r9,2 1093 add r15,r13 1094 adc r9,r14 1095 add r10,r15 1096 adc r11,r9 1097 adc r12,0 1098 1099$L$open_sse_tail_192_rounds: 1100 add r8,16 1101 paddd xmm0,xmm4 1102 pxor xmm12,xmm0 1103 pshufb xmm12,XMMWORD[$L$rol16] 1104 paddd xmm8,xmm12 1105 pxor xmm4,xmm8 1106 movdqa xmm3,xmm4 1107 pslld xmm3,12 1108 psrld xmm4,20 1109 pxor xmm4,xmm3 1110 paddd xmm0,xmm4 1111 pxor xmm12,xmm0 1112 pshufb xmm12,XMMWORD[$L$rol8] 1113 paddd xmm8,xmm12 1114 pxor xmm4,xmm8 1115 movdqa xmm3,xmm4 1116 pslld xmm3,7 1117 psrld xmm4,25 1118 pxor xmm4,xmm3 1119DB 102,15,58,15,228,4 1120DB 102,69,15,58,15,192,8 1121DB 102,69,15,58,15,228,12 1122 paddd xmm1,xmm5 1123 pxor xmm13,xmm1 1124 pshufb xmm13,XMMWORD[$L$rol16] 1125 paddd xmm9,xmm13 1126 pxor xmm5,xmm9 1127 movdqa xmm3,xmm5 1128 pslld xmm3,12 1129 psrld xmm5,20 1130 pxor xmm5,xmm3 1131 paddd xmm1,xmm5 1132 pxor xmm13,xmm1 1133 pshufb xmm13,XMMWORD[$L$rol8] 1134 paddd xmm9,xmm13 1135 pxor xmm5,xmm9 1136 movdqa xmm3,xmm5 1137 pslld xmm3,7 1138 psrld xmm5,25 1139 pxor xmm5,xmm3 1140DB 102,15,58,15,237,4 1141DB 102,69,15,58,15,201,8 1142DB 102,69,15,58,15,237,12 1143 paddd xmm2,xmm6 1144 pxor xmm14,xmm2 1145 pshufb xmm14,XMMWORD[$L$rol16] 1146 paddd xmm10,xmm14 1147 pxor xmm6,xmm10 1148 movdqa xmm3,xmm6 1149 pslld xmm3,12 1150 psrld xmm6,20 1151 pxor xmm6,xmm3 1152 paddd xmm2,xmm6 1153 pxor xmm14,xmm2 1154 pshufb xmm14,XMMWORD[$L$rol8] 1155 paddd xmm10,xmm14 1156 pxor xmm6,xmm10 1157 movdqa xmm3,xmm6 1158 pslld xmm3,7 1159 psrld xmm6,25 1160 pxor xmm6,xmm3 1161DB 102,15,58,15,246,4 1162DB 102,69,15,58,15,210,8 1163DB 102,69,15,58,15,246,12 1164 paddd xmm0,xmm4 1165 pxor xmm12,xmm0 1166 pshufb xmm12,XMMWORD[$L$rol16] 1167 paddd xmm8,xmm12 1168 pxor xmm4,xmm8 1169 movdqa xmm3,xmm4 1170 pslld xmm3,12 1171 psrld xmm4,20 1172 pxor xmm4,xmm3 1173 paddd xmm0,xmm4 1174 pxor xmm12,xmm0 1175 pshufb xmm12,XMMWORD[$L$rol8] 1176 paddd xmm8,xmm12 1177 pxor xmm4,xmm8 1178 movdqa xmm3,xmm4 1179 pslld xmm3,7 1180 psrld xmm4,25 1181 pxor xmm4,xmm3 1182DB 102,15,58,15,228,12 1183DB 102,69,15,58,15,192,8 1184DB 102,69,15,58,15,228,4 1185 paddd xmm1,xmm5 1186 pxor xmm13,xmm1 1187 pshufb xmm13,XMMWORD[$L$rol16] 1188 paddd xmm9,xmm13 1189 pxor xmm5,xmm9 1190 movdqa xmm3,xmm5 1191 pslld xmm3,12 1192 psrld xmm5,20 1193 pxor xmm5,xmm3 1194 paddd xmm1,xmm5 1195 pxor xmm13,xmm1 1196 pshufb xmm13,XMMWORD[$L$rol8] 1197 paddd xmm9,xmm13 1198 pxor xmm5,xmm9 1199 movdqa xmm3,xmm5 1200 pslld xmm3,7 1201 psrld xmm5,25 1202 pxor xmm5,xmm3 1203DB 102,15,58,15,237,12 1204DB 102,69,15,58,15,201,8 1205DB 102,69,15,58,15,237,4 1206 paddd xmm2,xmm6 1207 pxor xmm14,xmm2 1208 pshufb xmm14,XMMWORD[$L$rol16] 1209 paddd xmm10,xmm14 1210 pxor xmm6,xmm10 1211 movdqa xmm3,xmm6 1212 pslld xmm3,12 1213 psrld xmm6,20 1214 pxor xmm6,xmm3 1215 paddd xmm2,xmm6 1216 pxor xmm14,xmm2 1217 pshufb xmm14,XMMWORD[$L$rol8] 1218 paddd xmm10,xmm14 1219 pxor xmm6,xmm10 1220 movdqa xmm3,xmm6 1221 pslld xmm3,7 1222 psrld xmm6,25 1223 pxor xmm6,xmm3 1224DB 102,15,58,15,246,12 1225DB 102,69,15,58,15,210,8 1226DB 102,69,15,58,15,246,4 1227 1228 cmp r8,rcx 1229 jb NEAR $L$open_sse_tail_192_rounds_and_x1hash 1230 cmp r8,10*16 1231 jne NEAR $L$open_sse_tail_192_rounds 1232 cmp rbx,11*16 1233 jb NEAR $L$open_sse_tail_192_finish 1234 add r10,QWORD[((0+160))+rsi] 1235 adc r11,QWORD[((8+160))+rsi] 1236 adc r12,1 1237 mov rax,QWORD[((0+160+0))+rbp] 1238 mov r15,rax 1239 mul r10 1240 mov r13,rax 1241 mov r14,rdx 1242 mov rax,QWORD[((0+160+0))+rbp] 1243 mul r11 1244 imul r15,r12 1245 add r14,rax 1246 adc r15,rdx 1247 mov rax,QWORD[((8+160+0))+rbp] 1248 mov r9,rax 1249 mul r10 1250 add r14,rax 1251 adc rdx,0 1252 mov r10,rdx 1253 mov rax,QWORD[((8+160+0))+rbp] 1254 mul r11 1255 add r15,rax 1256 adc rdx,0 1257 imul r9,r12 1258 add r15,r10 1259 adc r9,rdx 1260 mov r10,r13 1261 mov r11,r14 1262 mov r12,r15 1263 and r12,3 1264 mov r13,r15 1265 and r13,-4 1266 mov r14,r9 1267 shrd r15,r9,2 1268 shr r9,2 1269 add r15,r13 1270 adc r9,r14 1271 add r10,r15 1272 adc r11,r9 1273 adc r12,0 1274 1275 cmp rbx,12*16 1276 jb NEAR $L$open_sse_tail_192_finish 1277 add r10,QWORD[((0+176))+rsi] 1278 adc r11,QWORD[((8+176))+rsi] 1279 adc r12,1 1280 mov rax,QWORD[((0+160+0))+rbp] 1281 mov r15,rax 1282 mul r10 1283 mov r13,rax 1284 mov r14,rdx 1285 mov rax,QWORD[((0+160+0))+rbp] 1286 mul r11 1287 imul r15,r12 1288 add r14,rax 1289 adc r15,rdx 1290 mov rax,QWORD[((8+160+0))+rbp] 1291 mov r9,rax 1292 mul r10 1293 add r14,rax 1294 adc rdx,0 1295 mov r10,rdx 1296 mov rax,QWORD[((8+160+0))+rbp] 1297 mul r11 1298 add r15,rax 1299 adc rdx,0 1300 imul r9,r12 1301 add r15,r10 1302 adc r9,rdx 1303 mov r10,r13 1304 mov r11,r14 1305 mov r12,r15 1306 and r12,3 1307 mov r13,r15 1308 and r13,-4 1309 mov r14,r9 1310 shrd r15,r9,2 1311 shr r9,2 1312 add r15,r13 1313 adc r9,r14 1314 add r10,r15 1315 adc r11,r9 1316 adc r12,0 1317 1318$L$open_sse_tail_192_finish: 1319 paddd xmm2,XMMWORD[$L$chacha20_consts] 1320 paddd xmm6,XMMWORD[((160+48))+rbp] 1321 paddd xmm10,XMMWORD[((160+64))+rbp] 1322 paddd xmm14,XMMWORD[((160+128))+rbp] 1323 paddd xmm1,XMMWORD[$L$chacha20_consts] 1324 paddd xmm5,XMMWORD[((160+48))+rbp] 1325 paddd xmm9,XMMWORD[((160+64))+rbp] 1326 paddd xmm13,XMMWORD[((160+112))+rbp] 1327 paddd xmm0,XMMWORD[$L$chacha20_consts] 1328 paddd xmm4,XMMWORD[((160+48))+rbp] 1329 paddd xmm8,XMMWORD[((160+64))+rbp] 1330 paddd xmm12,XMMWORD[((160+96))+rbp] 1331 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 1332 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 1333 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 1334 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 1335 pxor xmm2,xmm3 1336 pxor xmm6,xmm7 1337 pxor xmm10,xmm11 1338 pxor xmm15,xmm14 1339 movdqu XMMWORD[(0 + 0)+rdi],xmm2 1340 movdqu XMMWORD[(16 + 0)+rdi],xmm6 1341 movdqu XMMWORD[(32 + 0)+rdi],xmm10 1342 movdqu XMMWORD[(48 + 0)+rdi],xmm15 1343 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 1344 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 1345 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 1346 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 1347 pxor xmm1,xmm3 1348 pxor xmm5,xmm7 1349 pxor xmm9,xmm11 1350 pxor xmm15,xmm13 1351 movdqu XMMWORD[(0 + 64)+rdi],xmm1 1352 movdqu XMMWORD[(16 + 64)+rdi],xmm5 1353 movdqu XMMWORD[(32 + 64)+rdi],xmm9 1354 movdqu XMMWORD[(48 + 64)+rdi],xmm15 1355 1356 sub rbx,8*16 1357 lea rsi,[128+rsi] 1358 lea rdi,[128+rdi] 1359 jmp NEAR $L$open_sse_tail_64_dec_loop 1360 1361$L$open_sse_tail_256: 1362 movdqa xmm0,XMMWORD[$L$chacha20_consts] 1363 movdqa xmm4,XMMWORD[((160+48))+rbp] 1364 movdqa xmm8,XMMWORD[((160+64))+rbp] 1365 movdqa xmm1,xmm0 1366 movdqa xmm5,xmm4 1367 movdqa xmm9,xmm8 1368 movdqa xmm2,xmm0 1369 movdqa xmm6,xmm4 1370 movdqa xmm10,xmm8 1371 movdqa xmm3,xmm0 1372 movdqa xmm7,xmm4 1373 movdqa xmm11,xmm8 1374 movdqa xmm15,XMMWORD[((160+96))+rbp] 1375 paddd xmm15,XMMWORD[$L$sse_inc] 1376 movdqa xmm14,xmm15 1377 paddd xmm14,XMMWORD[$L$sse_inc] 1378 movdqa xmm13,xmm14 1379 paddd xmm13,XMMWORD[$L$sse_inc] 1380 movdqa xmm12,xmm13 1381 paddd xmm12,XMMWORD[$L$sse_inc] 1382 movdqa XMMWORD[(160+96)+rbp],xmm12 1383 movdqa XMMWORD[(160+112)+rbp],xmm13 1384 movdqa XMMWORD[(160+128)+rbp],xmm14 1385 movdqa XMMWORD[(160+144)+rbp],xmm15 1386 1387 xor r8,r8 1388$L$open_sse_tail_256_rounds_and_x1hash: 1389 add r10,QWORD[((0+0))+r8*1+rsi] 1390 adc r11,QWORD[((8+0))+r8*1+rsi] 1391 adc r12,1 1392 movdqa XMMWORD[(160+80)+rbp],xmm11 1393 paddd xmm0,xmm4 1394 pxor xmm12,xmm0 1395 pshufb xmm12,XMMWORD[$L$rol16] 1396 paddd xmm8,xmm12 1397 pxor xmm4,xmm8 1398 movdqa xmm11,xmm4 1399 pslld xmm11,12 1400 psrld xmm4,20 1401 pxor xmm4,xmm11 1402 paddd xmm0,xmm4 1403 pxor xmm12,xmm0 1404 pshufb xmm12,XMMWORD[$L$rol8] 1405 paddd xmm8,xmm12 1406 pxor xmm4,xmm8 1407 movdqa xmm11,xmm4 1408 pslld xmm11,7 1409 psrld xmm4,25 1410 pxor xmm4,xmm11 1411DB 102,15,58,15,228,4 1412DB 102,69,15,58,15,192,8 1413DB 102,69,15,58,15,228,12 1414 paddd xmm1,xmm5 1415 pxor xmm13,xmm1 1416 pshufb xmm13,XMMWORD[$L$rol16] 1417 paddd xmm9,xmm13 1418 pxor xmm5,xmm9 1419 movdqa xmm11,xmm5 1420 pslld xmm11,12 1421 psrld xmm5,20 1422 pxor xmm5,xmm11 1423 paddd xmm1,xmm5 1424 pxor xmm13,xmm1 1425 pshufb xmm13,XMMWORD[$L$rol8] 1426 paddd xmm9,xmm13 1427 pxor xmm5,xmm9 1428 movdqa xmm11,xmm5 1429 pslld xmm11,7 1430 psrld xmm5,25 1431 pxor xmm5,xmm11 1432DB 102,15,58,15,237,4 1433DB 102,69,15,58,15,201,8 1434DB 102,69,15,58,15,237,12 1435 paddd xmm2,xmm6 1436 pxor xmm14,xmm2 1437 pshufb xmm14,XMMWORD[$L$rol16] 1438 paddd xmm10,xmm14 1439 pxor xmm6,xmm10 1440 movdqa xmm11,xmm6 1441 pslld xmm11,12 1442 psrld xmm6,20 1443 pxor xmm6,xmm11 1444 paddd xmm2,xmm6 1445 pxor xmm14,xmm2 1446 pshufb xmm14,XMMWORD[$L$rol8] 1447 paddd xmm10,xmm14 1448 pxor xmm6,xmm10 1449 movdqa xmm11,xmm6 1450 pslld xmm11,7 1451 psrld xmm6,25 1452 pxor xmm6,xmm11 1453DB 102,15,58,15,246,4 1454DB 102,69,15,58,15,210,8 1455DB 102,69,15,58,15,246,12 1456 movdqa xmm11,XMMWORD[((160+80))+rbp] 1457 mov rax,QWORD[((0+160+0))+rbp] 1458 mov r15,rax 1459 mul r10 1460 mov r13,rax 1461 mov r14,rdx 1462 mov rax,QWORD[((0+160+0))+rbp] 1463 mul r11 1464 imul r15,r12 1465 add r14,rax 1466 adc r15,rdx 1467 movdqa XMMWORD[(160+80)+rbp],xmm9 1468 paddd xmm3,xmm7 1469 pxor xmm15,xmm3 1470 pshufb xmm15,XMMWORD[$L$rol16] 1471 paddd xmm11,xmm15 1472 pxor xmm7,xmm11 1473 movdqa xmm9,xmm7 1474 pslld xmm9,12 1475 psrld xmm7,20 1476 pxor xmm7,xmm9 1477 paddd xmm3,xmm7 1478 pxor xmm15,xmm3 1479 pshufb xmm15,XMMWORD[$L$rol8] 1480 paddd xmm11,xmm15 1481 pxor xmm7,xmm11 1482 movdqa xmm9,xmm7 1483 pslld xmm9,7 1484 psrld xmm7,25 1485 pxor xmm7,xmm9 1486DB 102,15,58,15,255,4 1487DB 102,69,15,58,15,219,8 1488DB 102,69,15,58,15,255,12 1489 movdqa xmm9,XMMWORD[((160+80))+rbp] 1490 mov rax,QWORD[((8+160+0))+rbp] 1491 mov r9,rax 1492 mul r10 1493 add r14,rax 1494 adc rdx,0 1495 mov r10,rdx 1496 mov rax,QWORD[((8+160+0))+rbp] 1497 mul r11 1498 add r15,rax 1499 adc rdx,0 1500 movdqa XMMWORD[(160+80)+rbp],xmm11 1501 paddd xmm0,xmm4 1502 pxor xmm12,xmm0 1503 pshufb xmm12,XMMWORD[$L$rol16] 1504 paddd xmm8,xmm12 1505 pxor xmm4,xmm8 1506 movdqa xmm11,xmm4 1507 pslld xmm11,12 1508 psrld xmm4,20 1509 pxor xmm4,xmm11 1510 paddd xmm0,xmm4 1511 pxor xmm12,xmm0 1512 pshufb xmm12,XMMWORD[$L$rol8] 1513 paddd xmm8,xmm12 1514 pxor xmm4,xmm8 1515 movdqa xmm11,xmm4 1516 pslld xmm11,7 1517 psrld xmm4,25 1518 pxor xmm4,xmm11 1519DB 102,15,58,15,228,12 1520DB 102,69,15,58,15,192,8 1521DB 102,69,15,58,15,228,4 1522 paddd xmm1,xmm5 1523 pxor xmm13,xmm1 1524 pshufb xmm13,XMMWORD[$L$rol16] 1525 paddd xmm9,xmm13 1526 pxor xmm5,xmm9 1527 movdqa xmm11,xmm5 1528 pslld xmm11,12 1529 psrld xmm5,20 1530 pxor xmm5,xmm11 1531 paddd xmm1,xmm5 1532 pxor xmm13,xmm1 1533 pshufb xmm13,XMMWORD[$L$rol8] 1534 paddd xmm9,xmm13 1535 pxor xmm5,xmm9 1536 movdqa xmm11,xmm5 1537 pslld xmm11,7 1538 psrld xmm5,25 1539 pxor xmm5,xmm11 1540DB 102,15,58,15,237,12 1541DB 102,69,15,58,15,201,8 1542DB 102,69,15,58,15,237,4 1543 imul r9,r12 1544 add r15,r10 1545 adc r9,rdx 1546 paddd xmm2,xmm6 1547 pxor xmm14,xmm2 1548 pshufb xmm14,XMMWORD[$L$rol16] 1549 paddd xmm10,xmm14 1550 pxor xmm6,xmm10 1551 movdqa xmm11,xmm6 1552 pslld xmm11,12 1553 psrld xmm6,20 1554 pxor xmm6,xmm11 1555 paddd xmm2,xmm6 1556 pxor xmm14,xmm2 1557 pshufb xmm14,XMMWORD[$L$rol8] 1558 paddd xmm10,xmm14 1559 pxor xmm6,xmm10 1560 movdqa xmm11,xmm6 1561 pslld xmm11,7 1562 psrld xmm6,25 1563 pxor xmm6,xmm11 1564DB 102,15,58,15,246,12 1565DB 102,69,15,58,15,210,8 1566DB 102,69,15,58,15,246,4 1567 movdqa xmm11,XMMWORD[((160+80))+rbp] 1568 mov r10,r13 1569 mov r11,r14 1570 mov r12,r15 1571 and r12,3 1572 mov r13,r15 1573 and r13,-4 1574 mov r14,r9 1575 shrd r15,r9,2 1576 shr r9,2 1577 add r15,r13 1578 adc r9,r14 1579 add r10,r15 1580 adc r11,r9 1581 adc r12,0 1582 movdqa XMMWORD[(160+80)+rbp],xmm9 1583 paddd xmm3,xmm7 1584 pxor xmm15,xmm3 1585 pshufb xmm15,XMMWORD[$L$rol16] 1586 paddd xmm11,xmm15 1587 pxor xmm7,xmm11 1588 movdqa xmm9,xmm7 1589 pslld xmm9,12 1590 psrld xmm7,20 1591 pxor xmm7,xmm9 1592 paddd xmm3,xmm7 1593 pxor xmm15,xmm3 1594 pshufb xmm15,XMMWORD[$L$rol8] 1595 paddd xmm11,xmm15 1596 pxor xmm7,xmm11 1597 movdqa xmm9,xmm7 1598 pslld xmm9,7 1599 psrld xmm7,25 1600 pxor xmm7,xmm9 1601DB 102,15,58,15,255,12 1602DB 102,69,15,58,15,219,8 1603DB 102,69,15,58,15,255,4 1604 movdqa xmm9,XMMWORD[((160+80))+rbp] 1605 1606 add r8,16 1607 cmp r8,10*16 1608 jb NEAR $L$open_sse_tail_256_rounds_and_x1hash 1609 1610 mov rcx,rbx 1611 and rcx,-16 1612$L$open_sse_tail_256_hash: 1613 add r10,QWORD[((0+0))+r8*1+rsi] 1614 adc r11,QWORD[((8+0))+r8*1+rsi] 1615 adc r12,1 1616 mov rax,QWORD[((0+160+0))+rbp] 1617 mov r15,rax 1618 mul r10 1619 mov r13,rax 1620 mov r14,rdx 1621 mov rax,QWORD[((0+160+0))+rbp] 1622 mul r11 1623 imul r15,r12 1624 add r14,rax 1625 adc r15,rdx 1626 mov rax,QWORD[((8+160+0))+rbp] 1627 mov r9,rax 1628 mul r10 1629 add r14,rax 1630 adc rdx,0 1631 mov r10,rdx 1632 mov rax,QWORD[((8+160+0))+rbp] 1633 mul r11 1634 add r15,rax 1635 adc rdx,0 1636 imul r9,r12 1637 add r15,r10 1638 adc r9,rdx 1639 mov r10,r13 1640 mov r11,r14 1641 mov r12,r15 1642 and r12,3 1643 mov r13,r15 1644 and r13,-4 1645 mov r14,r9 1646 shrd r15,r9,2 1647 shr r9,2 1648 add r15,r13 1649 adc r9,r14 1650 add r10,r15 1651 adc r11,r9 1652 adc r12,0 1653 1654 add r8,16 1655 cmp r8,rcx 1656 jb NEAR $L$open_sse_tail_256_hash 1657 paddd xmm3,XMMWORD[$L$chacha20_consts] 1658 paddd xmm7,XMMWORD[((160+48))+rbp] 1659 paddd xmm11,XMMWORD[((160+64))+rbp] 1660 paddd xmm15,XMMWORD[((160+144))+rbp] 1661 paddd xmm2,XMMWORD[$L$chacha20_consts] 1662 paddd xmm6,XMMWORD[((160+48))+rbp] 1663 paddd xmm10,XMMWORD[((160+64))+rbp] 1664 paddd xmm14,XMMWORD[((160+128))+rbp] 1665 paddd xmm1,XMMWORD[$L$chacha20_consts] 1666 paddd xmm5,XMMWORD[((160+48))+rbp] 1667 paddd xmm9,XMMWORD[((160+64))+rbp] 1668 paddd xmm13,XMMWORD[((160+112))+rbp] 1669 paddd xmm0,XMMWORD[$L$chacha20_consts] 1670 paddd xmm4,XMMWORD[((160+48))+rbp] 1671 paddd xmm8,XMMWORD[((160+64))+rbp] 1672 paddd xmm12,XMMWORD[((160+96))+rbp] 1673 movdqa XMMWORD[(160+80)+rbp],xmm12 1674 movdqu xmm12,XMMWORD[((0 + 0))+rsi] 1675 pxor xmm12,xmm3 1676 movdqu XMMWORD[(0 + 0)+rdi],xmm12 1677 movdqu xmm12,XMMWORD[((16 + 0))+rsi] 1678 pxor xmm12,xmm7 1679 movdqu XMMWORD[(16 + 0)+rdi],xmm12 1680 movdqu xmm12,XMMWORD[((32 + 0))+rsi] 1681 pxor xmm12,xmm11 1682 movdqu XMMWORD[(32 + 0)+rdi],xmm12 1683 movdqu xmm12,XMMWORD[((48 + 0))+rsi] 1684 pxor xmm12,xmm15 1685 movdqu XMMWORD[(48 + 0)+rdi],xmm12 1686 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 1687 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 1688 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 1689 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 1690 pxor xmm2,xmm3 1691 pxor xmm6,xmm7 1692 pxor xmm10,xmm11 1693 pxor xmm15,xmm14 1694 movdqu XMMWORD[(0 + 64)+rdi],xmm2 1695 movdqu XMMWORD[(16 + 64)+rdi],xmm6 1696 movdqu XMMWORD[(32 + 64)+rdi],xmm10 1697 movdqu XMMWORD[(48 + 64)+rdi],xmm15 1698 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 1699 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 1700 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 1701 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 1702 pxor xmm1,xmm3 1703 pxor xmm5,xmm7 1704 pxor xmm9,xmm11 1705 pxor xmm15,xmm13 1706 movdqu XMMWORD[(0 + 128)+rdi],xmm1 1707 movdqu XMMWORD[(16 + 128)+rdi],xmm5 1708 movdqu XMMWORD[(32 + 128)+rdi],xmm9 1709 movdqu XMMWORD[(48 + 128)+rdi],xmm15 1710 1711 movdqa xmm12,XMMWORD[((160+80))+rbp] 1712 sub rbx,12*16 1713 lea rsi,[192+rsi] 1714 lea rdi,[192+rdi] 1715 1716 1717$L$open_sse_tail_64_dec_loop: 1718 cmp rbx,16 1719 jb NEAR $L$open_sse_tail_16_init 1720 sub rbx,16 1721 movdqu xmm3,XMMWORD[rsi] 1722 pxor xmm0,xmm3 1723 movdqu XMMWORD[rdi],xmm0 1724 lea rsi,[16+rsi] 1725 lea rdi,[16+rdi] 1726 movdqa xmm0,xmm4 1727 movdqa xmm4,xmm8 1728 movdqa xmm8,xmm12 1729 jmp NEAR $L$open_sse_tail_64_dec_loop 1730$L$open_sse_tail_16_init: 1731 movdqa xmm1,xmm0 1732 1733 1734$L$open_sse_tail_16: 1735 test rbx,rbx 1736 jz NEAR $L$open_sse_finalize 1737 1738 1739 1740 pxor xmm3,xmm3 1741 lea rsi,[((-1))+rbx*1+rsi] 1742 mov r8,rbx 1743$L$open_sse_tail_16_compose: 1744 pslldq xmm3,1 1745 pinsrb xmm3,BYTE[rsi],0 1746 sub rsi,1 1747 sub r8,1 1748 jnz NEAR $L$open_sse_tail_16_compose 1749 1750DB 102,73,15,126,221 1751 pextrq r14,xmm3,1 1752 1753 pxor xmm3,xmm1 1754 1755 1756$L$open_sse_tail_16_extract: 1757 pextrb XMMWORD[rdi],xmm3,0 1758 psrldq xmm3,1 1759 add rdi,1 1760 sub rbx,1 1761 jne NEAR $L$open_sse_tail_16_extract 1762 1763 add r10,r13 1764 adc r11,r14 1765 adc r12,1 1766 mov rax,QWORD[((0+160+0))+rbp] 1767 mov r15,rax 1768 mul r10 1769 mov r13,rax 1770 mov r14,rdx 1771 mov rax,QWORD[((0+160+0))+rbp] 1772 mul r11 1773 imul r15,r12 1774 add r14,rax 1775 adc r15,rdx 1776 mov rax,QWORD[((8+160+0))+rbp] 1777 mov r9,rax 1778 mul r10 1779 add r14,rax 1780 adc rdx,0 1781 mov r10,rdx 1782 mov rax,QWORD[((8+160+0))+rbp] 1783 mul r11 1784 add r15,rax 1785 adc rdx,0 1786 imul r9,r12 1787 add r15,r10 1788 adc r9,rdx 1789 mov r10,r13 1790 mov r11,r14 1791 mov r12,r15 1792 and r12,3 1793 mov r13,r15 1794 and r13,-4 1795 mov r14,r9 1796 shrd r15,r9,2 1797 shr r9,2 1798 add r15,r13 1799 adc r9,r14 1800 add r10,r15 1801 adc r11,r9 1802 adc r12,0 1803 1804 1805$L$open_sse_finalize: 1806 add r10,QWORD[((0+160+32))+rbp] 1807 adc r11,QWORD[((8+160+32))+rbp] 1808 adc r12,1 1809 mov rax,QWORD[((0+160+0))+rbp] 1810 mov r15,rax 1811 mul r10 1812 mov r13,rax 1813 mov r14,rdx 1814 mov rax,QWORD[((0+160+0))+rbp] 1815 mul r11 1816 imul r15,r12 1817 add r14,rax 1818 adc r15,rdx 1819 mov rax,QWORD[((8+160+0))+rbp] 1820 mov r9,rax 1821 mul r10 1822 add r14,rax 1823 adc rdx,0 1824 mov r10,rdx 1825 mov rax,QWORD[((8+160+0))+rbp] 1826 mul r11 1827 add r15,rax 1828 adc rdx,0 1829 imul r9,r12 1830 add r15,r10 1831 adc r9,rdx 1832 mov r10,r13 1833 mov r11,r14 1834 mov r12,r15 1835 and r12,3 1836 mov r13,r15 1837 and r13,-4 1838 mov r14,r9 1839 shrd r15,r9,2 1840 shr r9,2 1841 add r15,r13 1842 adc r9,r14 1843 add r10,r15 1844 adc r11,r9 1845 adc r12,0 1846 1847 1848 mov r13,r10 1849 mov r14,r11 1850 mov r15,r12 1851 sub r10,-5 1852 sbb r11,-1 1853 sbb r12,3 1854 cmovc r10,r13 1855 cmovc r11,r14 1856 cmovc r12,r15 1857 1858 add r10,QWORD[((0+160+16))+rbp] 1859 adc r11,QWORD[((8+160+16))+rbp] 1860 1861 movaps xmm6,XMMWORD[((0+0))+rbp] 1862 movaps xmm7,XMMWORD[((16+0))+rbp] 1863 movaps xmm8,XMMWORD[((32+0))+rbp] 1864 movaps xmm9,XMMWORD[((48+0))+rbp] 1865 movaps xmm10,XMMWORD[((64+0))+rbp] 1866 movaps xmm11,XMMWORD[((80+0))+rbp] 1867 movaps xmm12,XMMWORD[((96+0))+rbp] 1868 movaps xmm13,XMMWORD[((112+0))+rbp] 1869 movaps xmm14,XMMWORD[((128+0))+rbp] 1870 movaps xmm15,XMMWORD[((144+0))+rbp] 1871 1872 1873 add rsp,288 + 160 + 32 1874 1875 1876 pop r9 1877 1878 mov QWORD[r9],r10 1879 mov QWORD[8+r9],r11 1880 pop r15 1881 1882 pop r14 1883 1884 pop r13 1885 1886 pop r12 1887 1888 pop rbx 1889 1890 pop rbp 1891 1892 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1893 mov rsi,QWORD[16+rsp] 1894 ret 1895 1896$L$open_sse_128: 1897 1898 movdqu xmm0,XMMWORD[$L$chacha20_consts] 1899 movdqa xmm1,xmm0 1900 movdqa xmm2,xmm0 1901 movdqu xmm4,XMMWORD[r9] 1902 movdqa xmm5,xmm4 1903 movdqa xmm6,xmm4 1904 movdqu xmm8,XMMWORD[16+r9] 1905 movdqa xmm9,xmm8 1906 movdqa xmm10,xmm8 1907 movdqu xmm12,XMMWORD[32+r9] 1908 movdqa xmm13,xmm12 1909 paddd xmm13,XMMWORD[$L$sse_inc] 1910 movdqa xmm14,xmm13 1911 paddd xmm14,XMMWORD[$L$sse_inc] 1912 movdqa xmm7,xmm4 1913 movdqa xmm11,xmm8 1914 movdqa xmm15,xmm13 1915 mov r10,10 1916 1917$L$open_sse_128_rounds: 1918 paddd xmm0,xmm4 1919 pxor xmm12,xmm0 1920 pshufb xmm12,XMMWORD[$L$rol16] 1921 paddd xmm8,xmm12 1922 pxor xmm4,xmm8 1923 movdqa xmm3,xmm4 1924 pslld xmm3,12 1925 psrld xmm4,20 1926 pxor xmm4,xmm3 1927 paddd xmm0,xmm4 1928 pxor xmm12,xmm0 1929 pshufb xmm12,XMMWORD[$L$rol8] 1930 paddd xmm8,xmm12 1931 pxor xmm4,xmm8 1932 movdqa xmm3,xmm4 1933 pslld xmm3,7 1934 psrld xmm4,25 1935 pxor xmm4,xmm3 1936DB 102,15,58,15,228,4 1937DB 102,69,15,58,15,192,8 1938DB 102,69,15,58,15,228,12 1939 paddd xmm1,xmm5 1940 pxor xmm13,xmm1 1941 pshufb xmm13,XMMWORD[$L$rol16] 1942 paddd xmm9,xmm13 1943 pxor xmm5,xmm9 1944 movdqa xmm3,xmm5 1945 pslld xmm3,12 1946 psrld xmm5,20 1947 pxor xmm5,xmm3 1948 paddd xmm1,xmm5 1949 pxor xmm13,xmm1 1950 pshufb xmm13,XMMWORD[$L$rol8] 1951 paddd xmm9,xmm13 1952 pxor xmm5,xmm9 1953 movdqa xmm3,xmm5 1954 pslld xmm3,7 1955 psrld xmm5,25 1956 pxor xmm5,xmm3 1957DB 102,15,58,15,237,4 1958DB 102,69,15,58,15,201,8 1959DB 102,69,15,58,15,237,12 1960 paddd xmm2,xmm6 1961 pxor xmm14,xmm2 1962 pshufb xmm14,XMMWORD[$L$rol16] 1963 paddd xmm10,xmm14 1964 pxor xmm6,xmm10 1965 movdqa xmm3,xmm6 1966 pslld xmm3,12 1967 psrld xmm6,20 1968 pxor xmm6,xmm3 1969 paddd xmm2,xmm6 1970 pxor xmm14,xmm2 1971 pshufb xmm14,XMMWORD[$L$rol8] 1972 paddd xmm10,xmm14 1973 pxor xmm6,xmm10 1974 movdqa xmm3,xmm6 1975 pslld xmm3,7 1976 psrld xmm6,25 1977 pxor xmm6,xmm3 1978DB 102,15,58,15,246,4 1979DB 102,69,15,58,15,210,8 1980DB 102,69,15,58,15,246,12 1981 paddd xmm0,xmm4 1982 pxor xmm12,xmm0 1983 pshufb xmm12,XMMWORD[$L$rol16] 1984 paddd xmm8,xmm12 1985 pxor xmm4,xmm8 1986 movdqa xmm3,xmm4 1987 pslld xmm3,12 1988 psrld xmm4,20 1989 pxor xmm4,xmm3 1990 paddd xmm0,xmm4 1991 pxor xmm12,xmm0 1992 pshufb xmm12,XMMWORD[$L$rol8] 1993 paddd xmm8,xmm12 1994 pxor xmm4,xmm8 1995 movdqa xmm3,xmm4 1996 pslld xmm3,7 1997 psrld xmm4,25 1998 pxor xmm4,xmm3 1999DB 102,15,58,15,228,12 2000DB 102,69,15,58,15,192,8 2001DB 102,69,15,58,15,228,4 2002 paddd xmm1,xmm5 2003 pxor xmm13,xmm1 2004 pshufb xmm13,XMMWORD[$L$rol16] 2005 paddd xmm9,xmm13 2006 pxor xmm5,xmm9 2007 movdqa xmm3,xmm5 2008 pslld xmm3,12 2009 psrld xmm5,20 2010 pxor xmm5,xmm3 2011 paddd xmm1,xmm5 2012 pxor xmm13,xmm1 2013 pshufb xmm13,XMMWORD[$L$rol8] 2014 paddd xmm9,xmm13 2015 pxor xmm5,xmm9 2016 movdqa xmm3,xmm5 2017 pslld xmm3,7 2018 psrld xmm5,25 2019 pxor xmm5,xmm3 2020DB 102,15,58,15,237,12 2021DB 102,69,15,58,15,201,8 2022DB 102,69,15,58,15,237,4 2023 paddd xmm2,xmm6 2024 pxor xmm14,xmm2 2025 pshufb xmm14,XMMWORD[$L$rol16] 2026 paddd xmm10,xmm14 2027 pxor xmm6,xmm10 2028 movdqa xmm3,xmm6 2029 pslld xmm3,12 2030 psrld xmm6,20 2031 pxor xmm6,xmm3 2032 paddd xmm2,xmm6 2033 pxor xmm14,xmm2 2034 pshufb xmm14,XMMWORD[$L$rol8] 2035 paddd xmm10,xmm14 2036 pxor xmm6,xmm10 2037 movdqa xmm3,xmm6 2038 pslld xmm3,7 2039 psrld xmm6,25 2040 pxor xmm6,xmm3 2041DB 102,15,58,15,246,12 2042DB 102,69,15,58,15,210,8 2043DB 102,69,15,58,15,246,4 2044 2045 dec r10 2046 jnz NEAR $L$open_sse_128_rounds 2047 paddd xmm0,XMMWORD[$L$chacha20_consts] 2048 paddd xmm1,XMMWORD[$L$chacha20_consts] 2049 paddd xmm2,XMMWORD[$L$chacha20_consts] 2050 paddd xmm4,xmm7 2051 paddd xmm5,xmm7 2052 paddd xmm6,xmm7 2053 paddd xmm9,xmm11 2054 paddd xmm10,xmm11 2055 paddd xmm13,xmm15 2056 paddd xmm15,XMMWORD[$L$sse_inc] 2057 paddd xmm14,xmm15 2058 2059 pand xmm0,XMMWORD[$L$clamp] 2060 movdqa XMMWORD[(160+0)+rbp],xmm0 2061 movdqa XMMWORD[(160+16)+rbp],xmm4 2062 2063 mov r8,r8 2064 call poly_hash_ad_internal 2065$L$open_sse_128_xor_hash: 2066 cmp rbx,16 2067 jb NEAR $L$open_sse_tail_16 2068 sub rbx,16 2069 add r10,QWORD[((0+0))+rsi] 2070 adc r11,QWORD[((8+0))+rsi] 2071 adc r12,1 2072 2073 2074 movdqu xmm3,XMMWORD[rsi] 2075 pxor xmm1,xmm3 2076 movdqu XMMWORD[rdi],xmm1 2077 lea rsi,[16+rsi] 2078 lea rdi,[16+rdi] 2079 mov rax,QWORD[((0+160+0))+rbp] 2080 mov r15,rax 2081 mul r10 2082 mov r13,rax 2083 mov r14,rdx 2084 mov rax,QWORD[((0+160+0))+rbp] 2085 mul r11 2086 imul r15,r12 2087 add r14,rax 2088 adc r15,rdx 2089 mov rax,QWORD[((8+160+0))+rbp] 2090 mov r9,rax 2091 mul r10 2092 add r14,rax 2093 adc rdx,0 2094 mov r10,rdx 2095 mov rax,QWORD[((8+160+0))+rbp] 2096 mul r11 2097 add r15,rax 2098 adc rdx,0 2099 imul r9,r12 2100 add r15,r10 2101 adc r9,rdx 2102 mov r10,r13 2103 mov r11,r14 2104 mov r12,r15 2105 and r12,3 2106 mov r13,r15 2107 and r13,-4 2108 mov r14,r9 2109 shrd r15,r9,2 2110 shr r9,2 2111 add r15,r13 2112 adc r9,r14 2113 add r10,r15 2114 adc r11,r9 2115 adc r12,0 2116 2117 2118 movdqa xmm1,xmm5 2119 movdqa xmm5,xmm9 2120 movdqa xmm9,xmm13 2121 movdqa xmm13,xmm2 2122 movdqa xmm2,xmm6 2123 movdqa xmm6,xmm10 2124 movdqa xmm10,xmm14 2125 jmp NEAR $L$open_sse_128_xor_hash 2126$L$SEH_end_chacha20_poly1305_open: 2127 2128 2129 2130 2131 2132 2133 2134 2135global chacha20_poly1305_seal 2136 2137ALIGN 64 2138chacha20_poly1305_seal: 2139 mov QWORD[8+rsp],rdi ;WIN64 prologue 2140 mov QWORD[16+rsp],rsi 2141 mov rax,rsp 2142$L$SEH_begin_chacha20_poly1305_seal: 2143 mov rdi,rcx 2144 mov rsi,rdx 2145 mov rdx,r8 2146 mov rcx,r9 2147 mov r8,QWORD[40+rsp] 2148 mov r9,QWORD[48+rsp] 2149 2150 2151 2152_CET_ENDBR 2153 push rbp 2154 2155 push rbx 2156 2157 push r12 2158 2159 push r13 2160 2161 push r14 2162 2163 push r15 2164 2165 2166 2167 push r9 2168 2169 sub rsp,288 + 160 + 32 2170 2171 lea rbp,[32+rsp] 2172 and rbp,-32 2173 2174 movaps XMMWORD[(0+0)+rbp],xmm6 2175 movaps XMMWORD[(16+0)+rbp],xmm7 2176 movaps XMMWORD[(32+0)+rbp],xmm8 2177 movaps XMMWORD[(48+0)+rbp],xmm9 2178 movaps XMMWORD[(64+0)+rbp],xmm10 2179 movaps XMMWORD[(80+0)+rbp],xmm11 2180 movaps XMMWORD[(96+0)+rbp],xmm12 2181 movaps XMMWORD[(112+0)+rbp],xmm13 2182 movaps XMMWORD[(128+0)+rbp],xmm14 2183 movaps XMMWORD[(144+0)+rbp],xmm15 2184 2185 mov rbx,QWORD[56+r9] 2186 add rbx,rdx 2187 mov QWORD[((0+160+32))+rbp],r8 2188 mov QWORD[((8+160+32))+rbp],rbx 2189 mov rbx,rdx 2190 2191 mov eax,DWORD[((OPENSSL_ia32cap_P+8))] 2192 and eax,288 2193 xor eax,288 2194 jz NEAR chacha20_poly1305_seal_avx2 2195 2196 cmp rbx,128 2197 jbe NEAR $L$seal_sse_128 2198 2199 movdqa xmm0,XMMWORD[$L$chacha20_consts] 2200 movdqu xmm4,XMMWORD[r9] 2201 movdqu xmm8,XMMWORD[16+r9] 2202 movdqu xmm12,XMMWORD[32+r9] 2203 2204 movdqa xmm1,xmm0 2205 movdqa xmm2,xmm0 2206 movdqa xmm3,xmm0 2207 movdqa xmm5,xmm4 2208 movdqa xmm6,xmm4 2209 movdqa xmm7,xmm4 2210 movdqa xmm9,xmm8 2211 movdqa xmm10,xmm8 2212 movdqa xmm11,xmm8 2213 movdqa xmm15,xmm12 2214 paddd xmm12,XMMWORD[$L$sse_inc] 2215 movdqa xmm14,xmm12 2216 paddd xmm12,XMMWORD[$L$sse_inc] 2217 movdqa xmm13,xmm12 2218 paddd xmm12,XMMWORD[$L$sse_inc] 2219 2220 movdqa XMMWORD[(160+48)+rbp],xmm4 2221 movdqa XMMWORD[(160+64)+rbp],xmm8 2222 movdqa XMMWORD[(160+96)+rbp],xmm12 2223 movdqa XMMWORD[(160+112)+rbp],xmm13 2224 movdqa XMMWORD[(160+128)+rbp],xmm14 2225 movdqa XMMWORD[(160+144)+rbp],xmm15 2226 mov r10,10 2227$L$seal_sse_init_rounds: 2228 movdqa XMMWORD[(160+80)+rbp],xmm8 2229 movdqa xmm8,XMMWORD[$L$rol16] 2230 paddd xmm3,xmm7 2231 paddd xmm2,xmm6 2232 paddd xmm1,xmm5 2233 paddd xmm0,xmm4 2234 pxor xmm15,xmm3 2235 pxor xmm14,xmm2 2236 pxor xmm13,xmm1 2237 pxor xmm12,xmm0 2238DB 102,69,15,56,0,248 2239DB 102,69,15,56,0,240 2240DB 102,69,15,56,0,232 2241DB 102,69,15,56,0,224 2242 movdqa xmm8,XMMWORD[((160+80))+rbp] 2243 paddd xmm11,xmm15 2244 paddd xmm10,xmm14 2245 paddd xmm9,xmm13 2246 paddd xmm8,xmm12 2247 pxor xmm7,xmm11 2248 pxor xmm6,xmm10 2249 pxor xmm5,xmm9 2250 pxor xmm4,xmm8 2251 movdqa XMMWORD[(160+80)+rbp],xmm8 2252 movdqa xmm8,xmm7 2253 psrld xmm8,20 2254 pslld xmm7,32-20 2255 pxor xmm7,xmm8 2256 movdqa xmm8,xmm6 2257 psrld xmm8,20 2258 pslld xmm6,32-20 2259 pxor xmm6,xmm8 2260 movdqa xmm8,xmm5 2261 psrld xmm8,20 2262 pslld xmm5,32-20 2263 pxor xmm5,xmm8 2264 movdqa xmm8,xmm4 2265 psrld xmm8,20 2266 pslld xmm4,32-20 2267 pxor xmm4,xmm8 2268 movdqa xmm8,XMMWORD[$L$rol8] 2269 paddd xmm3,xmm7 2270 paddd xmm2,xmm6 2271 paddd xmm1,xmm5 2272 paddd xmm0,xmm4 2273 pxor xmm15,xmm3 2274 pxor xmm14,xmm2 2275 pxor xmm13,xmm1 2276 pxor xmm12,xmm0 2277DB 102,69,15,56,0,248 2278DB 102,69,15,56,0,240 2279DB 102,69,15,56,0,232 2280DB 102,69,15,56,0,224 2281 movdqa xmm8,XMMWORD[((160+80))+rbp] 2282 paddd xmm11,xmm15 2283 paddd xmm10,xmm14 2284 paddd xmm9,xmm13 2285 paddd xmm8,xmm12 2286 pxor xmm7,xmm11 2287 pxor xmm6,xmm10 2288 pxor xmm5,xmm9 2289 pxor xmm4,xmm8 2290 movdqa XMMWORD[(160+80)+rbp],xmm8 2291 movdqa xmm8,xmm7 2292 psrld xmm8,25 2293 pslld xmm7,32-25 2294 pxor xmm7,xmm8 2295 movdqa xmm8,xmm6 2296 psrld xmm8,25 2297 pslld xmm6,32-25 2298 pxor xmm6,xmm8 2299 movdqa xmm8,xmm5 2300 psrld xmm8,25 2301 pslld xmm5,32-25 2302 pxor xmm5,xmm8 2303 movdqa xmm8,xmm4 2304 psrld xmm8,25 2305 pslld xmm4,32-25 2306 pxor xmm4,xmm8 2307 movdqa xmm8,XMMWORD[((160+80))+rbp] 2308DB 102,15,58,15,255,4 2309DB 102,69,15,58,15,219,8 2310DB 102,69,15,58,15,255,12 2311DB 102,15,58,15,246,4 2312DB 102,69,15,58,15,210,8 2313DB 102,69,15,58,15,246,12 2314DB 102,15,58,15,237,4 2315DB 102,69,15,58,15,201,8 2316DB 102,69,15,58,15,237,12 2317DB 102,15,58,15,228,4 2318DB 102,69,15,58,15,192,8 2319DB 102,69,15,58,15,228,12 2320 movdqa XMMWORD[(160+80)+rbp],xmm8 2321 movdqa xmm8,XMMWORD[$L$rol16] 2322 paddd xmm3,xmm7 2323 paddd xmm2,xmm6 2324 paddd xmm1,xmm5 2325 paddd xmm0,xmm4 2326 pxor xmm15,xmm3 2327 pxor xmm14,xmm2 2328 pxor xmm13,xmm1 2329 pxor xmm12,xmm0 2330DB 102,69,15,56,0,248 2331DB 102,69,15,56,0,240 2332DB 102,69,15,56,0,232 2333DB 102,69,15,56,0,224 2334 movdqa xmm8,XMMWORD[((160+80))+rbp] 2335 paddd xmm11,xmm15 2336 paddd xmm10,xmm14 2337 paddd xmm9,xmm13 2338 paddd xmm8,xmm12 2339 pxor xmm7,xmm11 2340 pxor xmm6,xmm10 2341 pxor xmm5,xmm9 2342 pxor xmm4,xmm8 2343 movdqa XMMWORD[(160+80)+rbp],xmm8 2344 movdqa xmm8,xmm7 2345 psrld xmm8,20 2346 pslld xmm7,32-20 2347 pxor xmm7,xmm8 2348 movdqa xmm8,xmm6 2349 psrld xmm8,20 2350 pslld xmm6,32-20 2351 pxor xmm6,xmm8 2352 movdqa xmm8,xmm5 2353 psrld xmm8,20 2354 pslld xmm5,32-20 2355 pxor xmm5,xmm8 2356 movdqa xmm8,xmm4 2357 psrld xmm8,20 2358 pslld xmm4,32-20 2359 pxor xmm4,xmm8 2360 movdqa xmm8,XMMWORD[$L$rol8] 2361 paddd xmm3,xmm7 2362 paddd xmm2,xmm6 2363 paddd xmm1,xmm5 2364 paddd xmm0,xmm4 2365 pxor xmm15,xmm3 2366 pxor xmm14,xmm2 2367 pxor xmm13,xmm1 2368 pxor xmm12,xmm0 2369DB 102,69,15,56,0,248 2370DB 102,69,15,56,0,240 2371DB 102,69,15,56,0,232 2372DB 102,69,15,56,0,224 2373 movdqa xmm8,XMMWORD[((160+80))+rbp] 2374 paddd xmm11,xmm15 2375 paddd xmm10,xmm14 2376 paddd xmm9,xmm13 2377 paddd xmm8,xmm12 2378 pxor xmm7,xmm11 2379 pxor xmm6,xmm10 2380 pxor xmm5,xmm9 2381 pxor xmm4,xmm8 2382 movdqa XMMWORD[(160+80)+rbp],xmm8 2383 movdqa xmm8,xmm7 2384 psrld xmm8,25 2385 pslld xmm7,32-25 2386 pxor xmm7,xmm8 2387 movdqa xmm8,xmm6 2388 psrld xmm8,25 2389 pslld xmm6,32-25 2390 pxor xmm6,xmm8 2391 movdqa xmm8,xmm5 2392 psrld xmm8,25 2393 pslld xmm5,32-25 2394 pxor xmm5,xmm8 2395 movdqa xmm8,xmm4 2396 psrld xmm8,25 2397 pslld xmm4,32-25 2398 pxor xmm4,xmm8 2399 movdqa xmm8,XMMWORD[((160+80))+rbp] 2400DB 102,15,58,15,255,12 2401DB 102,69,15,58,15,219,8 2402DB 102,69,15,58,15,255,4 2403DB 102,15,58,15,246,12 2404DB 102,69,15,58,15,210,8 2405DB 102,69,15,58,15,246,4 2406DB 102,15,58,15,237,12 2407DB 102,69,15,58,15,201,8 2408DB 102,69,15,58,15,237,4 2409DB 102,15,58,15,228,12 2410DB 102,69,15,58,15,192,8 2411DB 102,69,15,58,15,228,4 2412 2413 dec r10 2414 jnz NEAR $L$seal_sse_init_rounds 2415 paddd xmm3,XMMWORD[$L$chacha20_consts] 2416 paddd xmm7,XMMWORD[((160+48))+rbp] 2417 paddd xmm11,XMMWORD[((160+64))+rbp] 2418 paddd xmm15,XMMWORD[((160+144))+rbp] 2419 paddd xmm2,XMMWORD[$L$chacha20_consts] 2420 paddd xmm6,XMMWORD[((160+48))+rbp] 2421 paddd xmm10,XMMWORD[((160+64))+rbp] 2422 paddd xmm14,XMMWORD[((160+128))+rbp] 2423 paddd xmm1,XMMWORD[$L$chacha20_consts] 2424 paddd xmm5,XMMWORD[((160+48))+rbp] 2425 paddd xmm9,XMMWORD[((160+64))+rbp] 2426 paddd xmm13,XMMWORD[((160+112))+rbp] 2427 paddd xmm0,XMMWORD[$L$chacha20_consts] 2428 paddd xmm4,XMMWORD[((160+48))+rbp] 2429 paddd xmm8,XMMWORD[((160+64))+rbp] 2430 paddd xmm12,XMMWORD[((160+96))+rbp] 2431 2432 2433 pand xmm3,XMMWORD[$L$clamp] 2434 movdqa XMMWORD[(160+0)+rbp],xmm3 2435 movdqa XMMWORD[(160+16)+rbp],xmm7 2436 2437 mov r8,r8 2438 call poly_hash_ad_internal 2439 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 2440 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 2441 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 2442 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 2443 pxor xmm2,xmm3 2444 pxor xmm6,xmm7 2445 pxor xmm10,xmm11 2446 pxor xmm15,xmm14 2447 movdqu XMMWORD[(0 + 0)+rdi],xmm2 2448 movdqu XMMWORD[(16 + 0)+rdi],xmm6 2449 movdqu XMMWORD[(32 + 0)+rdi],xmm10 2450 movdqu XMMWORD[(48 + 0)+rdi],xmm15 2451 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 2452 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 2453 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 2454 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 2455 pxor xmm1,xmm3 2456 pxor xmm5,xmm7 2457 pxor xmm9,xmm11 2458 pxor xmm15,xmm13 2459 movdqu XMMWORD[(0 + 64)+rdi],xmm1 2460 movdqu XMMWORD[(16 + 64)+rdi],xmm5 2461 movdqu XMMWORD[(32 + 64)+rdi],xmm9 2462 movdqu XMMWORD[(48 + 64)+rdi],xmm15 2463 2464 cmp rbx,12*16 2465 ja NEAR $L$seal_sse_main_init 2466 mov rcx,8*16 2467 sub rbx,8*16 2468 lea rsi,[128+rsi] 2469 jmp NEAR $L$seal_sse_128_tail_hash 2470$L$seal_sse_main_init: 2471 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 2472 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 2473 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 2474 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 2475 pxor xmm0,xmm3 2476 pxor xmm4,xmm7 2477 pxor xmm8,xmm11 2478 pxor xmm15,xmm12 2479 movdqu XMMWORD[(0 + 128)+rdi],xmm0 2480 movdqu XMMWORD[(16 + 128)+rdi],xmm4 2481 movdqu XMMWORD[(32 + 128)+rdi],xmm8 2482 movdqu XMMWORD[(48 + 128)+rdi],xmm15 2483 2484 mov rcx,12*16 2485 sub rbx,12*16 2486 lea rsi,[192+rsi] 2487 mov rcx,2 2488 mov r8,8 2489 cmp rbx,4*16 2490 jbe NEAR $L$seal_sse_tail_64 2491 cmp rbx,8*16 2492 jbe NEAR $L$seal_sse_tail_128 2493 cmp rbx,12*16 2494 jbe NEAR $L$seal_sse_tail_192 2495 2496$L$seal_sse_main_loop: 2497 movdqa xmm0,XMMWORD[$L$chacha20_consts] 2498 movdqa xmm4,XMMWORD[((160+48))+rbp] 2499 movdqa xmm8,XMMWORD[((160+64))+rbp] 2500 movdqa xmm1,xmm0 2501 movdqa xmm5,xmm4 2502 movdqa xmm9,xmm8 2503 movdqa xmm2,xmm0 2504 movdqa xmm6,xmm4 2505 movdqa xmm10,xmm8 2506 movdqa xmm3,xmm0 2507 movdqa xmm7,xmm4 2508 movdqa xmm11,xmm8 2509 movdqa xmm15,XMMWORD[((160+96))+rbp] 2510 paddd xmm15,XMMWORD[$L$sse_inc] 2511 movdqa xmm14,xmm15 2512 paddd xmm14,XMMWORD[$L$sse_inc] 2513 movdqa xmm13,xmm14 2514 paddd xmm13,XMMWORD[$L$sse_inc] 2515 movdqa xmm12,xmm13 2516 paddd xmm12,XMMWORD[$L$sse_inc] 2517 movdqa XMMWORD[(160+96)+rbp],xmm12 2518 movdqa XMMWORD[(160+112)+rbp],xmm13 2519 movdqa XMMWORD[(160+128)+rbp],xmm14 2520 movdqa XMMWORD[(160+144)+rbp],xmm15 2521 2522ALIGN 32 2523$L$seal_sse_main_rounds: 2524 movdqa XMMWORD[(160+80)+rbp],xmm8 2525 movdqa xmm8,XMMWORD[$L$rol16] 2526 paddd xmm3,xmm7 2527 paddd xmm2,xmm6 2528 paddd xmm1,xmm5 2529 paddd xmm0,xmm4 2530 pxor xmm15,xmm3 2531 pxor xmm14,xmm2 2532 pxor xmm13,xmm1 2533 pxor xmm12,xmm0 2534DB 102,69,15,56,0,248 2535DB 102,69,15,56,0,240 2536DB 102,69,15,56,0,232 2537DB 102,69,15,56,0,224 2538 movdqa xmm8,XMMWORD[((160+80))+rbp] 2539 paddd xmm11,xmm15 2540 paddd xmm10,xmm14 2541 paddd xmm9,xmm13 2542 paddd xmm8,xmm12 2543 pxor xmm7,xmm11 2544 add r10,QWORD[((0+0))+rdi] 2545 adc r11,QWORD[((8+0))+rdi] 2546 adc r12,1 2547 pxor xmm6,xmm10 2548 pxor xmm5,xmm9 2549 pxor xmm4,xmm8 2550 movdqa XMMWORD[(160+80)+rbp],xmm8 2551 movdqa xmm8,xmm7 2552 psrld xmm8,20 2553 pslld xmm7,32-20 2554 pxor xmm7,xmm8 2555 movdqa xmm8,xmm6 2556 psrld xmm8,20 2557 pslld xmm6,32-20 2558 pxor xmm6,xmm8 2559 movdqa xmm8,xmm5 2560 psrld xmm8,20 2561 pslld xmm5,32-20 2562 pxor xmm5,xmm8 2563 movdqa xmm8,xmm4 2564 psrld xmm8,20 2565 pslld xmm4,32-20 2566 pxor xmm4,xmm8 2567 mov rax,QWORD[((0+160+0))+rbp] 2568 mov r15,rax 2569 mul r10 2570 mov r13,rax 2571 mov r14,rdx 2572 mov rax,QWORD[((0+160+0))+rbp] 2573 mul r11 2574 imul r15,r12 2575 add r14,rax 2576 adc r15,rdx 2577 movdqa xmm8,XMMWORD[$L$rol8] 2578 paddd xmm3,xmm7 2579 paddd xmm2,xmm6 2580 paddd xmm1,xmm5 2581 paddd xmm0,xmm4 2582 pxor xmm15,xmm3 2583 pxor xmm14,xmm2 2584 pxor xmm13,xmm1 2585 pxor xmm12,xmm0 2586DB 102,69,15,56,0,248 2587DB 102,69,15,56,0,240 2588DB 102,69,15,56,0,232 2589DB 102,69,15,56,0,224 2590 movdqa xmm8,XMMWORD[((160+80))+rbp] 2591 paddd xmm11,xmm15 2592 paddd xmm10,xmm14 2593 paddd xmm9,xmm13 2594 paddd xmm8,xmm12 2595 pxor xmm7,xmm11 2596 pxor xmm6,xmm10 2597 mov rax,QWORD[((8+160+0))+rbp] 2598 mov r9,rax 2599 mul r10 2600 add r14,rax 2601 adc rdx,0 2602 mov r10,rdx 2603 mov rax,QWORD[((8+160+0))+rbp] 2604 mul r11 2605 add r15,rax 2606 adc rdx,0 2607 pxor xmm5,xmm9 2608 pxor xmm4,xmm8 2609 movdqa XMMWORD[(160+80)+rbp],xmm8 2610 movdqa xmm8,xmm7 2611 psrld xmm8,25 2612 pslld xmm7,32-25 2613 pxor xmm7,xmm8 2614 movdqa xmm8,xmm6 2615 psrld xmm8,25 2616 pslld xmm6,32-25 2617 pxor xmm6,xmm8 2618 movdqa xmm8,xmm5 2619 psrld xmm8,25 2620 pslld xmm5,32-25 2621 pxor xmm5,xmm8 2622 movdqa xmm8,xmm4 2623 psrld xmm8,25 2624 pslld xmm4,32-25 2625 pxor xmm4,xmm8 2626 movdqa xmm8,XMMWORD[((160+80))+rbp] 2627 imul r9,r12 2628 add r15,r10 2629 adc r9,rdx 2630DB 102,15,58,15,255,4 2631DB 102,69,15,58,15,219,8 2632DB 102,69,15,58,15,255,12 2633DB 102,15,58,15,246,4 2634DB 102,69,15,58,15,210,8 2635DB 102,69,15,58,15,246,12 2636DB 102,15,58,15,237,4 2637DB 102,69,15,58,15,201,8 2638DB 102,69,15,58,15,237,12 2639DB 102,15,58,15,228,4 2640DB 102,69,15,58,15,192,8 2641DB 102,69,15,58,15,228,12 2642 movdqa XMMWORD[(160+80)+rbp],xmm8 2643 movdqa xmm8,XMMWORD[$L$rol16] 2644 paddd xmm3,xmm7 2645 paddd xmm2,xmm6 2646 paddd xmm1,xmm5 2647 paddd xmm0,xmm4 2648 pxor xmm15,xmm3 2649 pxor xmm14,xmm2 2650 mov r10,r13 2651 mov r11,r14 2652 mov r12,r15 2653 and r12,3 2654 mov r13,r15 2655 and r13,-4 2656 mov r14,r9 2657 shrd r15,r9,2 2658 shr r9,2 2659 add r15,r13 2660 adc r9,r14 2661 add r10,r15 2662 adc r11,r9 2663 adc r12,0 2664 pxor xmm13,xmm1 2665 pxor xmm12,xmm0 2666DB 102,69,15,56,0,248 2667DB 102,69,15,56,0,240 2668DB 102,69,15,56,0,232 2669DB 102,69,15,56,0,224 2670 movdqa xmm8,XMMWORD[((160+80))+rbp] 2671 paddd xmm11,xmm15 2672 paddd xmm10,xmm14 2673 paddd xmm9,xmm13 2674 paddd xmm8,xmm12 2675 pxor xmm7,xmm11 2676 pxor xmm6,xmm10 2677 pxor xmm5,xmm9 2678 pxor xmm4,xmm8 2679 movdqa XMMWORD[(160+80)+rbp],xmm8 2680 movdqa xmm8,xmm7 2681 psrld xmm8,20 2682 pslld xmm7,32-20 2683 pxor xmm7,xmm8 2684 movdqa xmm8,xmm6 2685 psrld xmm8,20 2686 pslld xmm6,32-20 2687 pxor xmm6,xmm8 2688 movdqa xmm8,xmm5 2689 psrld xmm8,20 2690 pslld xmm5,32-20 2691 pxor xmm5,xmm8 2692 movdqa xmm8,xmm4 2693 psrld xmm8,20 2694 pslld xmm4,32-20 2695 pxor xmm4,xmm8 2696 movdqa xmm8,XMMWORD[$L$rol8] 2697 paddd xmm3,xmm7 2698 paddd xmm2,xmm6 2699 paddd xmm1,xmm5 2700 paddd xmm0,xmm4 2701 pxor xmm15,xmm3 2702 pxor xmm14,xmm2 2703 pxor xmm13,xmm1 2704 pxor xmm12,xmm0 2705DB 102,69,15,56,0,248 2706DB 102,69,15,56,0,240 2707DB 102,69,15,56,0,232 2708DB 102,69,15,56,0,224 2709 movdqa xmm8,XMMWORD[((160+80))+rbp] 2710 paddd xmm11,xmm15 2711 paddd xmm10,xmm14 2712 paddd xmm9,xmm13 2713 paddd xmm8,xmm12 2714 pxor xmm7,xmm11 2715 pxor xmm6,xmm10 2716 pxor xmm5,xmm9 2717 pxor xmm4,xmm8 2718 movdqa XMMWORD[(160+80)+rbp],xmm8 2719 movdqa xmm8,xmm7 2720 psrld xmm8,25 2721 pslld xmm7,32-25 2722 pxor xmm7,xmm8 2723 movdqa xmm8,xmm6 2724 psrld xmm8,25 2725 pslld xmm6,32-25 2726 pxor xmm6,xmm8 2727 movdqa xmm8,xmm5 2728 psrld xmm8,25 2729 pslld xmm5,32-25 2730 pxor xmm5,xmm8 2731 movdqa xmm8,xmm4 2732 psrld xmm8,25 2733 pslld xmm4,32-25 2734 pxor xmm4,xmm8 2735 movdqa xmm8,XMMWORD[((160+80))+rbp] 2736DB 102,15,58,15,255,12 2737DB 102,69,15,58,15,219,8 2738DB 102,69,15,58,15,255,4 2739DB 102,15,58,15,246,12 2740DB 102,69,15,58,15,210,8 2741DB 102,69,15,58,15,246,4 2742DB 102,15,58,15,237,12 2743DB 102,69,15,58,15,201,8 2744DB 102,69,15,58,15,237,4 2745DB 102,15,58,15,228,12 2746DB 102,69,15,58,15,192,8 2747DB 102,69,15,58,15,228,4 2748 2749 lea rdi,[16+rdi] 2750 dec r8 2751 jge NEAR $L$seal_sse_main_rounds 2752 add r10,QWORD[((0+0))+rdi] 2753 adc r11,QWORD[((8+0))+rdi] 2754 adc r12,1 2755 mov rax,QWORD[((0+160+0))+rbp] 2756 mov r15,rax 2757 mul r10 2758 mov r13,rax 2759 mov r14,rdx 2760 mov rax,QWORD[((0+160+0))+rbp] 2761 mul r11 2762 imul r15,r12 2763 add r14,rax 2764 adc r15,rdx 2765 mov rax,QWORD[((8+160+0))+rbp] 2766 mov r9,rax 2767 mul r10 2768 add r14,rax 2769 adc rdx,0 2770 mov r10,rdx 2771 mov rax,QWORD[((8+160+0))+rbp] 2772 mul r11 2773 add r15,rax 2774 adc rdx,0 2775 imul r9,r12 2776 add r15,r10 2777 adc r9,rdx 2778 mov r10,r13 2779 mov r11,r14 2780 mov r12,r15 2781 and r12,3 2782 mov r13,r15 2783 and r13,-4 2784 mov r14,r9 2785 shrd r15,r9,2 2786 shr r9,2 2787 add r15,r13 2788 adc r9,r14 2789 add r10,r15 2790 adc r11,r9 2791 adc r12,0 2792 2793 lea rdi,[16+rdi] 2794 dec rcx 2795 jg NEAR $L$seal_sse_main_rounds 2796 paddd xmm3,XMMWORD[$L$chacha20_consts] 2797 paddd xmm7,XMMWORD[((160+48))+rbp] 2798 paddd xmm11,XMMWORD[((160+64))+rbp] 2799 paddd xmm15,XMMWORD[((160+144))+rbp] 2800 paddd xmm2,XMMWORD[$L$chacha20_consts] 2801 paddd xmm6,XMMWORD[((160+48))+rbp] 2802 paddd xmm10,XMMWORD[((160+64))+rbp] 2803 paddd xmm14,XMMWORD[((160+128))+rbp] 2804 paddd xmm1,XMMWORD[$L$chacha20_consts] 2805 paddd xmm5,XMMWORD[((160+48))+rbp] 2806 paddd xmm9,XMMWORD[((160+64))+rbp] 2807 paddd xmm13,XMMWORD[((160+112))+rbp] 2808 paddd xmm0,XMMWORD[$L$chacha20_consts] 2809 paddd xmm4,XMMWORD[((160+48))+rbp] 2810 paddd xmm8,XMMWORD[((160+64))+rbp] 2811 paddd xmm12,XMMWORD[((160+96))+rbp] 2812 2813 movdqa XMMWORD[(160+80)+rbp],xmm14 2814 movdqa XMMWORD[(160+80)+rbp],xmm14 2815 movdqu xmm14,XMMWORD[((0 + 0))+rsi] 2816 pxor xmm14,xmm3 2817 movdqu XMMWORD[(0 + 0)+rdi],xmm14 2818 movdqu xmm14,XMMWORD[((16 + 0))+rsi] 2819 pxor xmm14,xmm7 2820 movdqu XMMWORD[(16 + 0)+rdi],xmm14 2821 movdqu xmm14,XMMWORD[((32 + 0))+rsi] 2822 pxor xmm14,xmm11 2823 movdqu XMMWORD[(32 + 0)+rdi],xmm14 2824 movdqu xmm14,XMMWORD[((48 + 0))+rsi] 2825 pxor xmm14,xmm15 2826 movdqu XMMWORD[(48 + 0)+rdi],xmm14 2827 2828 movdqa xmm14,XMMWORD[((160+80))+rbp] 2829 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 2830 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 2831 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 2832 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 2833 pxor xmm2,xmm3 2834 pxor xmm6,xmm7 2835 pxor xmm10,xmm11 2836 pxor xmm15,xmm14 2837 movdqu XMMWORD[(0 + 64)+rdi],xmm2 2838 movdqu XMMWORD[(16 + 64)+rdi],xmm6 2839 movdqu XMMWORD[(32 + 64)+rdi],xmm10 2840 movdqu XMMWORD[(48 + 64)+rdi],xmm15 2841 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 2842 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 2843 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 2844 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 2845 pxor xmm1,xmm3 2846 pxor xmm5,xmm7 2847 pxor xmm9,xmm11 2848 pxor xmm15,xmm13 2849 movdqu XMMWORD[(0 + 128)+rdi],xmm1 2850 movdqu XMMWORD[(16 + 128)+rdi],xmm5 2851 movdqu XMMWORD[(32 + 128)+rdi],xmm9 2852 movdqu XMMWORD[(48 + 128)+rdi],xmm15 2853 2854 cmp rbx,16*16 2855 ja NEAR $L$seal_sse_main_loop_xor 2856 2857 mov rcx,12*16 2858 sub rbx,12*16 2859 lea rsi,[192+rsi] 2860 jmp NEAR $L$seal_sse_128_tail_hash 2861$L$seal_sse_main_loop_xor: 2862 movdqu xmm3,XMMWORD[((0 + 192))+rsi] 2863 movdqu xmm7,XMMWORD[((16 + 192))+rsi] 2864 movdqu xmm11,XMMWORD[((32 + 192))+rsi] 2865 movdqu xmm15,XMMWORD[((48 + 192))+rsi] 2866 pxor xmm0,xmm3 2867 pxor xmm4,xmm7 2868 pxor xmm8,xmm11 2869 pxor xmm15,xmm12 2870 movdqu XMMWORD[(0 + 192)+rdi],xmm0 2871 movdqu XMMWORD[(16 + 192)+rdi],xmm4 2872 movdqu XMMWORD[(32 + 192)+rdi],xmm8 2873 movdqu XMMWORD[(48 + 192)+rdi],xmm15 2874 2875 lea rsi,[256+rsi] 2876 sub rbx,16*16 2877 mov rcx,6 2878 mov r8,4 2879 cmp rbx,12*16 2880 jg NEAR $L$seal_sse_main_loop 2881 mov rcx,rbx 2882 test rbx,rbx 2883 je NEAR $L$seal_sse_128_tail_hash 2884 mov rcx,6 2885 cmp rbx,8*16 2886 ja NEAR $L$seal_sse_tail_192 2887 cmp rbx,4*16 2888 ja NEAR $L$seal_sse_tail_128 2889 2890$L$seal_sse_tail_64: 2891 movdqa xmm0,XMMWORD[$L$chacha20_consts] 2892 movdqa xmm4,XMMWORD[((160+48))+rbp] 2893 movdqa xmm8,XMMWORD[((160+64))+rbp] 2894 movdqa xmm12,XMMWORD[((160+96))+rbp] 2895 paddd xmm12,XMMWORD[$L$sse_inc] 2896 movdqa XMMWORD[(160+96)+rbp],xmm12 2897 2898$L$seal_sse_tail_64_rounds_and_x2hash: 2899 add r10,QWORD[((0+0))+rdi] 2900 adc r11,QWORD[((8+0))+rdi] 2901 adc r12,1 2902 mov rax,QWORD[((0+160+0))+rbp] 2903 mov r15,rax 2904 mul r10 2905 mov r13,rax 2906 mov r14,rdx 2907 mov rax,QWORD[((0+160+0))+rbp] 2908 mul r11 2909 imul r15,r12 2910 add r14,rax 2911 adc r15,rdx 2912 mov rax,QWORD[((8+160+0))+rbp] 2913 mov r9,rax 2914 mul r10 2915 add r14,rax 2916 adc rdx,0 2917 mov r10,rdx 2918 mov rax,QWORD[((8+160+0))+rbp] 2919 mul r11 2920 add r15,rax 2921 adc rdx,0 2922 imul r9,r12 2923 add r15,r10 2924 adc r9,rdx 2925 mov r10,r13 2926 mov r11,r14 2927 mov r12,r15 2928 and r12,3 2929 mov r13,r15 2930 and r13,-4 2931 mov r14,r9 2932 shrd r15,r9,2 2933 shr r9,2 2934 add r15,r13 2935 adc r9,r14 2936 add r10,r15 2937 adc r11,r9 2938 adc r12,0 2939 2940 lea rdi,[16+rdi] 2941$L$seal_sse_tail_64_rounds_and_x1hash: 2942 paddd xmm0,xmm4 2943 pxor xmm12,xmm0 2944 pshufb xmm12,XMMWORD[$L$rol16] 2945 paddd xmm8,xmm12 2946 pxor xmm4,xmm8 2947 movdqa xmm3,xmm4 2948 pslld xmm3,12 2949 psrld xmm4,20 2950 pxor xmm4,xmm3 2951 paddd xmm0,xmm4 2952 pxor xmm12,xmm0 2953 pshufb xmm12,XMMWORD[$L$rol8] 2954 paddd xmm8,xmm12 2955 pxor xmm4,xmm8 2956 movdqa xmm3,xmm4 2957 pslld xmm3,7 2958 psrld xmm4,25 2959 pxor xmm4,xmm3 2960DB 102,15,58,15,228,4 2961DB 102,69,15,58,15,192,8 2962DB 102,69,15,58,15,228,12 2963 paddd xmm0,xmm4 2964 pxor xmm12,xmm0 2965 pshufb xmm12,XMMWORD[$L$rol16] 2966 paddd xmm8,xmm12 2967 pxor xmm4,xmm8 2968 movdqa xmm3,xmm4 2969 pslld xmm3,12 2970 psrld xmm4,20 2971 pxor xmm4,xmm3 2972 paddd xmm0,xmm4 2973 pxor xmm12,xmm0 2974 pshufb xmm12,XMMWORD[$L$rol8] 2975 paddd xmm8,xmm12 2976 pxor xmm4,xmm8 2977 movdqa xmm3,xmm4 2978 pslld xmm3,7 2979 psrld xmm4,25 2980 pxor xmm4,xmm3 2981DB 102,15,58,15,228,12 2982DB 102,69,15,58,15,192,8 2983DB 102,69,15,58,15,228,4 2984 add r10,QWORD[((0+0))+rdi] 2985 adc r11,QWORD[((8+0))+rdi] 2986 adc r12,1 2987 mov rax,QWORD[((0+160+0))+rbp] 2988 mov r15,rax 2989 mul r10 2990 mov r13,rax 2991 mov r14,rdx 2992 mov rax,QWORD[((0+160+0))+rbp] 2993 mul r11 2994 imul r15,r12 2995 add r14,rax 2996 adc r15,rdx 2997 mov rax,QWORD[((8+160+0))+rbp] 2998 mov r9,rax 2999 mul r10 3000 add r14,rax 3001 adc rdx,0 3002 mov r10,rdx 3003 mov rax,QWORD[((8+160+0))+rbp] 3004 mul r11 3005 add r15,rax 3006 adc rdx,0 3007 imul r9,r12 3008 add r15,r10 3009 adc r9,rdx 3010 mov r10,r13 3011 mov r11,r14 3012 mov r12,r15 3013 and r12,3 3014 mov r13,r15 3015 and r13,-4 3016 mov r14,r9 3017 shrd r15,r9,2 3018 shr r9,2 3019 add r15,r13 3020 adc r9,r14 3021 add r10,r15 3022 adc r11,r9 3023 adc r12,0 3024 3025 lea rdi,[16+rdi] 3026 dec rcx 3027 jg NEAR $L$seal_sse_tail_64_rounds_and_x2hash 3028 dec r8 3029 jge NEAR $L$seal_sse_tail_64_rounds_and_x1hash 3030 paddd xmm0,XMMWORD[$L$chacha20_consts] 3031 paddd xmm4,XMMWORD[((160+48))+rbp] 3032 paddd xmm8,XMMWORD[((160+64))+rbp] 3033 paddd xmm12,XMMWORD[((160+96))+rbp] 3034 3035 jmp NEAR $L$seal_sse_128_tail_xor 3036 3037$L$seal_sse_tail_128: 3038 movdqa xmm0,XMMWORD[$L$chacha20_consts] 3039 movdqa xmm4,XMMWORD[((160+48))+rbp] 3040 movdqa xmm8,XMMWORD[((160+64))+rbp] 3041 movdqa xmm1,xmm0 3042 movdqa xmm5,xmm4 3043 movdqa xmm9,xmm8 3044 movdqa xmm13,XMMWORD[((160+96))+rbp] 3045 paddd xmm13,XMMWORD[$L$sse_inc] 3046 movdqa xmm12,xmm13 3047 paddd xmm12,XMMWORD[$L$sse_inc] 3048 movdqa XMMWORD[(160+96)+rbp],xmm12 3049 movdqa XMMWORD[(160+112)+rbp],xmm13 3050 3051$L$seal_sse_tail_128_rounds_and_x2hash: 3052 add r10,QWORD[((0+0))+rdi] 3053 adc r11,QWORD[((8+0))+rdi] 3054 adc r12,1 3055 mov rax,QWORD[((0+160+0))+rbp] 3056 mov r15,rax 3057 mul r10 3058 mov r13,rax 3059 mov r14,rdx 3060 mov rax,QWORD[((0+160+0))+rbp] 3061 mul r11 3062 imul r15,r12 3063 add r14,rax 3064 adc r15,rdx 3065 mov rax,QWORD[((8+160+0))+rbp] 3066 mov r9,rax 3067 mul r10 3068 add r14,rax 3069 adc rdx,0 3070 mov r10,rdx 3071 mov rax,QWORD[((8+160+0))+rbp] 3072 mul r11 3073 add r15,rax 3074 adc rdx,0 3075 imul r9,r12 3076 add r15,r10 3077 adc r9,rdx 3078 mov r10,r13 3079 mov r11,r14 3080 mov r12,r15 3081 and r12,3 3082 mov r13,r15 3083 and r13,-4 3084 mov r14,r9 3085 shrd r15,r9,2 3086 shr r9,2 3087 add r15,r13 3088 adc r9,r14 3089 add r10,r15 3090 adc r11,r9 3091 adc r12,0 3092 3093 lea rdi,[16+rdi] 3094$L$seal_sse_tail_128_rounds_and_x1hash: 3095 paddd xmm0,xmm4 3096 pxor xmm12,xmm0 3097 pshufb xmm12,XMMWORD[$L$rol16] 3098 paddd xmm8,xmm12 3099 pxor xmm4,xmm8 3100 movdqa xmm3,xmm4 3101 pslld xmm3,12 3102 psrld xmm4,20 3103 pxor xmm4,xmm3 3104 paddd xmm0,xmm4 3105 pxor xmm12,xmm0 3106 pshufb xmm12,XMMWORD[$L$rol8] 3107 paddd xmm8,xmm12 3108 pxor xmm4,xmm8 3109 movdqa xmm3,xmm4 3110 pslld xmm3,7 3111 psrld xmm4,25 3112 pxor xmm4,xmm3 3113DB 102,15,58,15,228,4 3114DB 102,69,15,58,15,192,8 3115DB 102,69,15,58,15,228,12 3116 paddd xmm1,xmm5 3117 pxor xmm13,xmm1 3118 pshufb xmm13,XMMWORD[$L$rol16] 3119 paddd xmm9,xmm13 3120 pxor xmm5,xmm9 3121 movdqa xmm3,xmm5 3122 pslld xmm3,12 3123 psrld xmm5,20 3124 pxor xmm5,xmm3 3125 paddd xmm1,xmm5 3126 pxor xmm13,xmm1 3127 pshufb xmm13,XMMWORD[$L$rol8] 3128 paddd xmm9,xmm13 3129 pxor xmm5,xmm9 3130 movdqa xmm3,xmm5 3131 pslld xmm3,7 3132 psrld xmm5,25 3133 pxor xmm5,xmm3 3134DB 102,15,58,15,237,4 3135DB 102,69,15,58,15,201,8 3136DB 102,69,15,58,15,237,12 3137 add r10,QWORD[((0+0))+rdi] 3138 adc r11,QWORD[((8+0))+rdi] 3139 adc r12,1 3140 mov rax,QWORD[((0+160+0))+rbp] 3141 mov r15,rax 3142 mul r10 3143 mov r13,rax 3144 mov r14,rdx 3145 mov rax,QWORD[((0+160+0))+rbp] 3146 mul r11 3147 imul r15,r12 3148 add r14,rax 3149 adc r15,rdx 3150 mov rax,QWORD[((8+160+0))+rbp] 3151 mov r9,rax 3152 mul r10 3153 add r14,rax 3154 adc rdx,0 3155 mov r10,rdx 3156 mov rax,QWORD[((8+160+0))+rbp] 3157 mul r11 3158 add r15,rax 3159 adc rdx,0 3160 imul r9,r12 3161 add r15,r10 3162 adc r9,rdx 3163 mov r10,r13 3164 mov r11,r14 3165 mov r12,r15 3166 and r12,3 3167 mov r13,r15 3168 and r13,-4 3169 mov r14,r9 3170 shrd r15,r9,2 3171 shr r9,2 3172 add r15,r13 3173 adc r9,r14 3174 add r10,r15 3175 adc r11,r9 3176 adc r12,0 3177 paddd xmm0,xmm4 3178 pxor xmm12,xmm0 3179 pshufb xmm12,XMMWORD[$L$rol16] 3180 paddd xmm8,xmm12 3181 pxor xmm4,xmm8 3182 movdqa xmm3,xmm4 3183 pslld xmm3,12 3184 psrld xmm4,20 3185 pxor xmm4,xmm3 3186 paddd xmm0,xmm4 3187 pxor xmm12,xmm0 3188 pshufb xmm12,XMMWORD[$L$rol8] 3189 paddd xmm8,xmm12 3190 pxor xmm4,xmm8 3191 movdqa xmm3,xmm4 3192 pslld xmm3,7 3193 psrld xmm4,25 3194 pxor xmm4,xmm3 3195DB 102,15,58,15,228,12 3196DB 102,69,15,58,15,192,8 3197DB 102,69,15,58,15,228,4 3198 paddd xmm1,xmm5 3199 pxor xmm13,xmm1 3200 pshufb xmm13,XMMWORD[$L$rol16] 3201 paddd xmm9,xmm13 3202 pxor xmm5,xmm9 3203 movdqa xmm3,xmm5 3204 pslld xmm3,12 3205 psrld xmm5,20 3206 pxor xmm5,xmm3 3207 paddd xmm1,xmm5 3208 pxor xmm13,xmm1 3209 pshufb xmm13,XMMWORD[$L$rol8] 3210 paddd xmm9,xmm13 3211 pxor xmm5,xmm9 3212 movdqa xmm3,xmm5 3213 pslld xmm3,7 3214 psrld xmm5,25 3215 pxor xmm5,xmm3 3216DB 102,15,58,15,237,12 3217DB 102,69,15,58,15,201,8 3218DB 102,69,15,58,15,237,4 3219 3220 lea rdi,[16+rdi] 3221 dec rcx 3222 jg NEAR $L$seal_sse_tail_128_rounds_and_x2hash 3223 dec r8 3224 jge NEAR $L$seal_sse_tail_128_rounds_and_x1hash 3225 paddd xmm1,XMMWORD[$L$chacha20_consts] 3226 paddd xmm5,XMMWORD[((160+48))+rbp] 3227 paddd xmm9,XMMWORD[((160+64))+rbp] 3228 paddd xmm13,XMMWORD[((160+112))+rbp] 3229 paddd xmm0,XMMWORD[$L$chacha20_consts] 3230 paddd xmm4,XMMWORD[((160+48))+rbp] 3231 paddd xmm8,XMMWORD[((160+64))+rbp] 3232 paddd xmm12,XMMWORD[((160+96))+rbp] 3233 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 3234 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 3235 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 3236 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 3237 pxor xmm1,xmm3 3238 pxor xmm5,xmm7 3239 pxor xmm9,xmm11 3240 pxor xmm15,xmm13 3241 movdqu XMMWORD[(0 + 0)+rdi],xmm1 3242 movdqu XMMWORD[(16 + 0)+rdi],xmm5 3243 movdqu XMMWORD[(32 + 0)+rdi],xmm9 3244 movdqu XMMWORD[(48 + 0)+rdi],xmm15 3245 3246 mov rcx,4*16 3247 sub rbx,4*16 3248 lea rsi,[64+rsi] 3249 jmp NEAR $L$seal_sse_128_tail_hash 3250 3251$L$seal_sse_tail_192: 3252 movdqa xmm0,XMMWORD[$L$chacha20_consts] 3253 movdqa xmm4,XMMWORD[((160+48))+rbp] 3254 movdqa xmm8,XMMWORD[((160+64))+rbp] 3255 movdqa xmm1,xmm0 3256 movdqa xmm5,xmm4 3257 movdqa xmm9,xmm8 3258 movdqa xmm2,xmm0 3259 movdqa xmm6,xmm4 3260 movdqa xmm10,xmm8 3261 movdqa xmm14,XMMWORD[((160+96))+rbp] 3262 paddd xmm14,XMMWORD[$L$sse_inc] 3263 movdqa xmm13,xmm14 3264 paddd xmm13,XMMWORD[$L$sse_inc] 3265 movdqa xmm12,xmm13 3266 paddd xmm12,XMMWORD[$L$sse_inc] 3267 movdqa XMMWORD[(160+96)+rbp],xmm12 3268 movdqa XMMWORD[(160+112)+rbp],xmm13 3269 movdqa XMMWORD[(160+128)+rbp],xmm14 3270 3271$L$seal_sse_tail_192_rounds_and_x2hash: 3272 add r10,QWORD[((0+0))+rdi] 3273 adc r11,QWORD[((8+0))+rdi] 3274 adc r12,1 3275 mov rax,QWORD[((0+160+0))+rbp] 3276 mov r15,rax 3277 mul r10 3278 mov r13,rax 3279 mov r14,rdx 3280 mov rax,QWORD[((0+160+0))+rbp] 3281 mul r11 3282 imul r15,r12 3283 add r14,rax 3284 adc r15,rdx 3285 mov rax,QWORD[((8+160+0))+rbp] 3286 mov r9,rax 3287 mul r10 3288 add r14,rax 3289 adc rdx,0 3290 mov r10,rdx 3291 mov rax,QWORD[((8+160+0))+rbp] 3292 mul r11 3293 add r15,rax 3294 adc rdx,0 3295 imul r9,r12 3296 add r15,r10 3297 adc r9,rdx 3298 mov r10,r13 3299 mov r11,r14 3300 mov r12,r15 3301 and r12,3 3302 mov r13,r15 3303 and r13,-4 3304 mov r14,r9 3305 shrd r15,r9,2 3306 shr r9,2 3307 add r15,r13 3308 adc r9,r14 3309 add r10,r15 3310 adc r11,r9 3311 adc r12,0 3312 3313 lea rdi,[16+rdi] 3314$L$seal_sse_tail_192_rounds_and_x1hash: 3315 paddd xmm0,xmm4 3316 pxor xmm12,xmm0 3317 pshufb xmm12,XMMWORD[$L$rol16] 3318 paddd xmm8,xmm12 3319 pxor xmm4,xmm8 3320 movdqa xmm3,xmm4 3321 pslld xmm3,12 3322 psrld xmm4,20 3323 pxor xmm4,xmm3 3324 paddd xmm0,xmm4 3325 pxor xmm12,xmm0 3326 pshufb xmm12,XMMWORD[$L$rol8] 3327 paddd xmm8,xmm12 3328 pxor xmm4,xmm8 3329 movdqa xmm3,xmm4 3330 pslld xmm3,7 3331 psrld xmm4,25 3332 pxor xmm4,xmm3 3333DB 102,15,58,15,228,4 3334DB 102,69,15,58,15,192,8 3335DB 102,69,15,58,15,228,12 3336 paddd xmm1,xmm5 3337 pxor xmm13,xmm1 3338 pshufb xmm13,XMMWORD[$L$rol16] 3339 paddd xmm9,xmm13 3340 pxor xmm5,xmm9 3341 movdqa xmm3,xmm5 3342 pslld xmm3,12 3343 psrld xmm5,20 3344 pxor xmm5,xmm3 3345 paddd xmm1,xmm5 3346 pxor xmm13,xmm1 3347 pshufb xmm13,XMMWORD[$L$rol8] 3348 paddd xmm9,xmm13 3349 pxor xmm5,xmm9 3350 movdqa xmm3,xmm5 3351 pslld xmm3,7 3352 psrld xmm5,25 3353 pxor xmm5,xmm3 3354DB 102,15,58,15,237,4 3355DB 102,69,15,58,15,201,8 3356DB 102,69,15,58,15,237,12 3357 paddd xmm2,xmm6 3358 pxor xmm14,xmm2 3359 pshufb xmm14,XMMWORD[$L$rol16] 3360 paddd xmm10,xmm14 3361 pxor xmm6,xmm10 3362 movdqa xmm3,xmm6 3363 pslld xmm3,12 3364 psrld xmm6,20 3365 pxor xmm6,xmm3 3366 paddd xmm2,xmm6 3367 pxor xmm14,xmm2 3368 pshufb xmm14,XMMWORD[$L$rol8] 3369 paddd xmm10,xmm14 3370 pxor xmm6,xmm10 3371 movdqa xmm3,xmm6 3372 pslld xmm3,7 3373 psrld xmm6,25 3374 pxor xmm6,xmm3 3375DB 102,15,58,15,246,4 3376DB 102,69,15,58,15,210,8 3377DB 102,69,15,58,15,246,12 3378 add r10,QWORD[((0+0))+rdi] 3379 adc r11,QWORD[((8+0))+rdi] 3380 adc r12,1 3381 mov rax,QWORD[((0+160+0))+rbp] 3382 mov r15,rax 3383 mul r10 3384 mov r13,rax 3385 mov r14,rdx 3386 mov rax,QWORD[((0+160+0))+rbp] 3387 mul r11 3388 imul r15,r12 3389 add r14,rax 3390 adc r15,rdx 3391 mov rax,QWORD[((8+160+0))+rbp] 3392 mov r9,rax 3393 mul r10 3394 add r14,rax 3395 adc rdx,0 3396 mov r10,rdx 3397 mov rax,QWORD[((8+160+0))+rbp] 3398 mul r11 3399 add r15,rax 3400 adc rdx,0 3401 imul r9,r12 3402 add r15,r10 3403 adc r9,rdx 3404 mov r10,r13 3405 mov r11,r14 3406 mov r12,r15 3407 and r12,3 3408 mov r13,r15 3409 and r13,-4 3410 mov r14,r9 3411 shrd r15,r9,2 3412 shr r9,2 3413 add r15,r13 3414 adc r9,r14 3415 add r10,r15 3416 adc r11,r9 3417 adc r12,0 3418 paddd xmm0,xmm4 3419 pxor xmm12,xmm0 3420 pshufb xmm12,XMMWORD[$L$rol16] 3421 paddd xmm8,xmm12 3422 pxor xmm4,xmm8 3423 movdqa xmm3,xmm4 3424 pslld xmm3,12 3425 psrld xmm4,20 3426 pxor xmm4,xmm3 3427 paddd xmm0,xmm4 3428 pxor xmm12,xmm0 3429 pshufb xmm12,XMMWORD[$L$rol8] 3430 paddd xmm8,xmm12 3431 pxor xmm4,xmm8 3432 movdqa xmm3,xmm4 3433 pslld xmm3,7 3434 psrld xmm4,25 3435 pxor xmm4,xmm3 3436DB 102,15,58,15,228,12 3437DB 102,69,15,58,15,192,8 3438DB 102,69,15,58,15,228,4 3439 paddd xmm1,xmm5 3440 pxor xmm13,xmm1 3441 pshufb xmm13,XMMWORD[$L$rol16] 3442 paddd xmm9,xmm13 3443 pxor xmm5,xmm9 3444 movdqa xmm3,xmm5 3445 pslld xmm3,12 3446 psrld xmm5,20 3447 pxor xmm5,xmm3 3448 paddd xmm1,xmm5 3449 pxor xmm13,xmm1 3450 pshufb xmm13,XMMWORD[$L$rol8] 3451 paddd xmm9,xmm13 3452 pxor xmm5,xmm9 3453 movdqa xmm3,xmm5 3454 pslld xmm3,7 3455 psrld xmm5,25 3456 pxor xmm5,xmm3 3457DB 102,15,58,15,237,12 3458DB 102,69,15,58,15,201,8 3459DB 102,69,15,58,15,237,4 3460 paddd xmm2,xmm6 3461 pxor xmm14,xmm2 3462 pshufb xmm14,XMMWORD[$L$rol16] 3463 paddd xmm10,xmm14 3464 pxor xmm6,xmm10 3465 movdqa xmm3,xmm6 3466 pslld xmm3,12 3467 psrld xmm6,20 3468 pxor xmm6,xmm3 3469 paddd xmm2,xmm6 3470 pxor xmm14,xmm2 3471 pshufb xmm14,XMMWORD[$L$rol8] 3472 paddd xmm10,xmm14 3473 pxor xmm6,xmm10 3474 movdqa xmm3,xmm6 3475 pslld xmm3,7 3476 psrld xmm6,25 3477 pxor xmm6,xmm3 3478DB 102,15,58,15,246,12 3479DB 102,69,15,58,15,210,8 3480DB 102,69,15,58,15,246,4 3481 3482 lea rdi,[16+rdi] 3483 dec rcx 3484 jg NEAR $L$seal_sse_tail_192_rounds_and_x2hash 3485 dec r8 3486 jge NEAR $L$seal_sse_tail_192_rounds_and_x1hash 3487 paddd xmm2,XMMWORD[$L$chacha20_consts] 3488 paddd xmm6,XMMWORD[((160+48))+rbp] 3489 paddd xmm10,XMMWORD[((160+64))+rbp] 3490 paddd xmm14,XMMWORD[((160+128))+rbp] 3491 paddd xmm1,XMMWORD[$L$chacha20_consts] 3492 paddd xmm5,XMMWORD[((160+48))+rbp] 3493 paddd xmm9,XMMWORD[((160+64))+rbp] 3494 paddd xmm13,XMMWORD[((160+112))+rbp] 3495 paddd xmm0,XMMWORD[$L$chacha20_consts] 3496 paddd xmm4,XMMWORD[((160+48))+rbp] 3497 paddd xmm8,XMMWORD[((160+64))+rbp] 3498 paddd xmm12,XMMWORD[((160+96))+rbp] 3499 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 3500 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 3501 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 3502 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 3503 pxor xmm2,xmm3 3504 pxor xmm6,xmm7 3505 pxor xmm10,xmm11 3506 pxor xmm15,xmm14 3507 movdqu XMMWORD[(0 + 0)+rdi],xmm2 3508 movdqu XMMWORD[(16 + 0)+rdi],xmm6 3509 movdqu XMMWORD[(32 + 0)+rdi],xmm10 3510 movdqu XMMWORD[(48 + 0)+rdi],xmm15 3511 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 3512 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 3513 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 3514 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 3515 pxor xmm1,xmm3 3516 pxor xmm5,xmm7 3517 pxor xmm9,xmm11 3518 pxor xmm15,xmm13 3519 movdqu XMMWORD[(0 + 64)+rdi],xmm1 3520 movdqu XMMWORD[(16 + 64)+rdi],xmm5 3521 movdqu XMMWORD[(32 + 64)+rdi],xmm9 3522 movdqu XMMWORD[(48 + 64)+rdi],xmm15 3523 3524 mov rcx,8*16 3525 sub rbx,8*16 3526 lea rsi,[128+rsi] 3527 3528$L$seal_sse_128_tail_hash: 3529 cmp rcx,16 3530 jb NEAR $L$seal_sse_128_tail_xor 3531 add r10,QWORD[((0+0))+rdi] 3532 adc r11,QWORD[((8+0))+rdi] 3533 adc r12,1 3534 mov rax,QWORD[((0+160+0))+rbp] 3535 mov r15,rax 3536 mul r10 3537 mov r13,rax 3538 mov r14,rdx 3539 mov rax,QWORD[((0+160+0))+rbp] 3540 mul r11 3541 imul r15,r12 3542 add r14,rax 3543 adc r15,rdx 3544 mov rax,QWORD[((8+160+0))+rbp] 3545 mov r9,rax 3546 mul r10 3547 add r14,rax 3548 adc rdx,0 3549 mov r10,rdx 3550 mov rax,QWORD[((8+160+0))+rbp] 3551 mul r11 3552 add r15,rax 3553 adc rdx,0 3554 imul r9,r12 3555 add r15,r10 3556 adc r9,rdx 3557 mov r10,r13 3558 mov r11,r14 3559 mov r12,r15 3560 and r12,3 3561 mov r13,r15 3562 and r13,-4 3563 mov r14,r9 3564 shrd r15,r9,2 3565 shr r9,2 3566 add r15,r13 3567 adc r9,r14 3568 add r10,r15 3569 adc r11,r9 3570 adc r12,0 3571 3572 sub rcx,16 3573 lea rdi,[16+rdi] 3574 jmp NEAR $L$seal_sse_128_tail_hash 3575 3576$L$seal_sse_128_tail_xor: 3577 cmp rbx,16 3578 jb NEAR $L$seal_sse_tail_16 3579 sub rbx,16 3580 3581 movdqu xmm3,XMMWORD[rsi] 3582 pxor xmm0,xmm3 3583 movdqu XMMWORD[rdi],xmm0 3584 3585 add r10,QWORD[rdi] 3586 adc r11,QWORD[8+rdi] 3587 adc r12,1 3588 lea rsi,[16+rsi] 3589 lea rdi,[16+rdi] 3590 mov rax,QWORD[((0+160+0))+rbp] 3591 mov r15,rax 3592 mul r10 3593 mov r13,rax 3594 mov r14,rdx 3595 mov rax,QWORD[((0+160+0))+rbp] 3596 mul r11 3597 imul r15,r12 3598 add r14,rax 3599 adc r15,rdx 3600 mov rax,QWORD[((8+160+0))+rbp] 3601 mov r9,rax 3602 mul r10 3603 add r14,rax 3604 adc rdx,0 3605 mov r10,rdx 3606 mov rax,QWORD[((8+160+0))+rbp] 3607 mul r11 3608 add r15,rax 3609 adc rdx,0 3610 imul r9,r12 3611 add r15,r10 3612 adc r9,rdx 3613 mov r10,r13 3614 mov r11,r14 3615 mov r12,r15 3616 and r12,3 3617 mov r13,r15 3618 and r13,-4 3619 mov r14,r9 3620 shrd r15,r9,2 3621 shr r9,2 3622 add r15,r13 3623 adc r9,r14 3624 add r10,r15 3625 adc r11,r9 3626 adc r12,0 3627 3628 3629 movdqa xmm0,xmm4 3630 movdqa xmm4,xmm8 3631 movdqa xmm8,xmm12 3632 movdqa xmm12,xmm1 3633 movdqa xmm1,xmm5 3634 movdqa xmm5,xmm9 3635 movdqa xmm9,xmm13 3636 jmp NEAR $L$seal_sse_128_tail_xor 3637 3638$L$seal_sse_tail_16: 3639 test rbx,rbx 3640 jz NEAR $L$process_blocks_of_extra_in 3641 3642 mov r8,rbx 3643 mov rcx,rbx 3644 lea rsi,[((-1))+rbx*1+rsi] 3645 pxor xmm15,xmm15 3646$L$seal_sse_tail_16_compose: 3647 pslldq xmm15,1 3648 pinsrb xmm15,BYTE[rsi],0 3649 lea rsi,[((-1))+rsi] 3650 dec rcx 3651 jne NEAR $L$seal_sse_tail_16_compose 3652 3653 3654 pxor xmm15,xmm0 3655 3656 3657 mov rcx,rbx 3658 movdqu xmm0,xmm15 3659$L$seal_sse_tail_16_extract: 3660 pextrb XMMWORD[rdi],xmm0,0 3661 psrldq xmm0,1 3662 add rdi,1 3663 sub rcx,1 3664 jnz NEAR $L$seal_sse_tail_16_extract 3665 3666 3667 3668 3669 3670 3671 3672 3673 mov r9,QWORD[((288 + 160 + 32))+rsp] 3674 mov r14,QWORD[56+r9] 3675 mov r13,QWORD[48+r9] 3676 test r14,r14 3677 jz NEAR $L$process_partial_block 3678 3679 mov r15,16 3680 sub r15,rbx 3681 cmp r14,r15 3682 3683 jge NEAR $L$load_extra_in 3684 mov r15,r14 3685 3686$L$load_extra_in: 3687 3688 3689 lea rsi,[((-1))+r15*1+r13] 3690 3691 3692 add r13,r15 3693 sub r14,r15 3694 mov QWORD[48+r9],r13 3695 mov QWORD[56+r9],r14 3696 3697 3698 3699 add r8,r15 3700 3701 3702 pxor xmm11,xmm11 3703$L$load_extra_load_loop: 3704 pslldq xmm11,1 3705 pinsrb xmm11,BYTE[rsi],0 3706 lea rsi,[((-1))+rsi] 3707 sub r15,1 3708 jnz NEAR $L$load_extra_load_loop 3709 3710 3711 3712 3713 mov r15,rbx 3714 3715$L$load_extra_shift_loop: 3716 pslldq xmm11,1 3717 sub r15,1 3718 jnz NEAR $L$load_extra_shift_loop 3719 3720 3721 3722 3723 lea r15,[$L$and_masks] 3724 shl rbx,4 3725 pand xmm15,XMMWORD[((-16))+rbx*1+r15] 3726 3727 3728 por xmm15,xmm11 3729 3730 3731 3732DB 102,77,15,126,253 3733 pextrq r14,xmm15,1 3734 add r10,r13 3735 adc r11,r14 3736 adc r12,1 3737 mov rax,QWORD[((0+160+0))+rbp] 3738 mov r15,rax 3739 mul r10 3740 mov r13,rax 3741 mov r14,rdx 3742 mov rax,QWORD[((0+160+0))+rbp] 3743 mul r11 3744 imul r15,r12 3745 add r14,rax 3746 adc r15,rdx 3747 mov rax,QWORD[((8+160+0))+rbp] 3748 mov r9,rax 3749 mul r10 3750 add r14,rax 3751 adc rdx,0 3752 mov r10,rdx 3753 mov rax,QWORD[((8+160+0))+rbp] 3754 mul r11 3755 add r15,rax 3756 adc rdx,0 3757 imul r9,r12 3758 add r15,r10 3759 adc r9,rdx 3760 mov r10,r13 3761 mov r11,r14 3762 mov r12,r15 3763 and r12,3 3764 mov r13,r15 3765 and r13,-4 3766 mov r14,r9 3767 shrd r15,r9,2 3768 shr r9,2 3769 add r15,r13 3770 adc r9,r14 3771 add r10,r15 3772 adc r11,r9 3773 adc r12,0 3774 3775 3776$L$process_blocks_of_extra_in: 3777 3778 mov r9,QWORD[((288+32+160 ))+rsp] 3779 mov rsi,QWORD[48+r9] 3780 mov r8,QWORD[56+r9] 3781 mov rcx,r8 3782 shr r8,4 3783 3784$L$process_extra_hash_loop: 3785 jz NEAR process_extra_in_trailer 3786 add r10,QWORD[((0+0))+rsi] 3787 adc r11,QWORD[((8+0))+rsi] 3788 adc r12,1 3789 mov rax,QWORD[((0+160+0))+rbp] 3790 mov r15,rax 3791 mul r10 3792 mov r13,rax 3793 mov r14,rdx 3794 mov rax,QWORD[((0+160+0))+rbp] 3795 mul r11 3796 imul r15,r12 3797 add r14,rax 3798 adc r15,rdx 3799 mov rax,QWORD[((8+160+0))+rbp] 3800 mov r9,rax 3801 mul r10 3802 add r14,rax 3803 adc rdx,0 3804 mov r10,rdx 3805 mov rax,QWORD[((8+160+0))+rbp] 3806 mul r11 3807 add r15,rax 3808 adc rdx,0 3809 imul r9,r12 3810 add r15,r10 3811 adc r9,rdx 3812 mov r10,r13 3813 mov r11,r14 3814 mov r12,r15 3815 and r12,3 3816 mov r13,r15 3817 and r13,-4 3818 mov r14,r9 3819 shrd r15,r9,2 3820 shr r9,2 3821 add r15,r13 3822 adc r9,r14 3823 add r10,r15 3824 adc r11,r9 3825 adc r12,0 3826 3827 lea rsi,[16+rsi] 3828 sub r8,1 3829 jmp NEAR $L$process_extra_hash_loop 3830process_extra_in_trailer: 3831 and rcx,15 3832 mov rbx,rcx 3833 jz NEAR $L$do_length_block 3834 lea rsi,[((-1))+rcx*1+rsi] 3835 3836$L$process_extra_in_trailer_load: 3837 pslldq xmm15,1 3838 pinsrb xmm15,BYTE[rsi],0 3839 lea rsi,[((-1))+rsi] 3840 sub rcx,1 3841 jnz NEAR $L$process_extra_in_trailer_load 3842 3843$L$process_partial_block: 3844 3845 lea r15,[$L$and_masks] 3846 shl rbx,4 3847 pand xmm15,XMMWORD[((-16))+rbx*1+r15] 3848DB 102,77,15,126,253 3849 pextrq r14,xmm15,1 3850 add r10,r13 3851 adc r11,r14 3852 adc r12,1 3853 mov rax,QWORD[((0+160+0))+rbp] 3854 mov r15,rax 3855 mul r10 3856 mov r13,rax 3857 mov r14,rdx 3858 mov rax,QWORD[((0+160+0))+rbp] 3859 mul r11 3860 imul r15,r12 3861 add r14,rax 3862 adc r15,rdx 3863 mov rax,QWORD[((8+160+0))+rbp] 3864 mov r9,rax 3865 mul r10 3866 add r14,rax 3867 adc rdx,0 3868 mov r10,rdx 3869 mov rax,QWORD[((8+160+0))+rbp] 3870 mul r11 3871 add r15,rax 3872 adc rdx,0 3873 imul r9,r12 3874 add r15,r10 3875 adc r9,rdx 3876 mov r10,r13 3877 mov r11,r14 3878 mov r12,r15 3879 and r12,3 3880 mov r13,r15 3881 and r13,-4 3882 mov r14,r9 3883 shrd r15,r9,2 3884 shr r9,2 3885 add r15,r13 3886 adc r9,r14 3887 add r10,r15 3888 adc r11,r9 3889 adc r12,0 3890 3891 3892$L$do_length_block: 3893 add r10,QWORD[((0+160+32))+rbp] 3894 adc r11,QWORD[((8+160+32))+rbp] 3895 adc r12,1 3896 mov rax,QWORD[((0+160+0))+rbp] 3897 mov r15,rax 3898 mul r10 3899 mov r13,rax 3900 mov r14,rdx 3901 mov rax,QWORD[((0+160+0))+rbp] 3902 mul r11 3903 imul r15,r12 3904 add r14,rax 3905 adc r15,rdx 3906 mov rax,QWORD[((8+160+0))+rbp] 3907 mov r9,rax 3908 mul r10 3909 add r14,rax 3910 adc rdx,0 3911 mov r10,rdx 3912 mov rax,QWORD[((8+160+0))+rbp] 3913 mul r11 3914 add r15,rax 3915 adc rdx,0 3916 imul r9,r12 3917 add r15,r10 3918 adc r9,rdx 3919 mov r10,r13 3920 mov r11,r14 3921 mov r12,r15 3922 and r12,3 3923 mov r13,r15 3924 and r13,-4 3925 mov r14,r9 3926 shrd r15,r9,2 3927 shr r9,2 3928 add r15,r13 3929 adc r9,r14 3930 add r10,r15 3931 adc r11,r9 3932 adc r12,0 3933 3934 3935 mov r13,r10 3936 mov r14,r11 3937 mov r15,r12 3938 sub r10,-5 3939 sbb r11,-1 3940 sbb r12,3 3941 cmovc r10,r13 3942 cmovc r11,r14 3943 cmovc r12,r15 3944 3945 add r10,QWORD[((0+160+16))+rbp] 3946 adc r11,QWORD[((8+160+16))+rbp] 3947 3948 movaps xmm6,XMMWORD[((0+0))+rbp] 3949 movaps xmm7,XMMWORD[((16+0))+rbp] 3950 movaps xmm8,XMMWORD[((32+0))+rbp] 3951 movaps xmm9,XMMWORD[((48+0))+rbp] 3952 movaps xmm10,XMMWORD[((64+0))+rbp] 3953 movaps xmm11,XMMWORD[((80+0))+rbp] 3954 movaps xmm12,XMMWORD[((96+0))+rbp] 3955 movaps xmm13,XMMWORD[((112+0))+rbp] 3956 movaps xmm14,XMMWORD[((128+0))+rbp] 3957 movaps xmm15,XMMWORD[((144+0))+rbp] 3958 3959 3960 add rsp,288 + 160 + 32 3961 3962 3963 pop r9 3964 3965 mov QWORD[r9],r10 3966 mov QWORD[8+r9],r11 3967 pop r15 3968 3969 pop r14 3970 3971 pop r13 3972 3973 pop r12 3974 3975 pop rbx 3976 3977 pop rbp 3978 3979 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 3980 mov rsi,QWORD[16+rsp] 3981 ret 3982 3983$L$seal_sse_128: 3984 3985 movdqu xmm0,XMMWORD[$L$chacha20_consts] 3986 movdqa xmm1,xmm0 3987 movdqa xmm2,xmm0 3988 movdqu xmm4,XMMWORD[r9] 3989 movdqa xmm5,xmm4 3990 movdqa xmm6,xmm4 3991 movdqu xmm8,XMMWORD[16+r9] 3992 movdqa xmm9,xmm8 3993 movdqa xmm10,xmm8 3994 movdqu xmm14,XMMWORD[32+r9] 3995 movdqa xmm12,xmm14 3996 paddd xmm12,XMMWORD[$L$sse_inc] 3997 movdqa xmm13,xmm12 3998 paddd xmm13,XMMWORD[$L$sse_inc] 3999 movdqa xmm7,xmm4 4000 movdqa xmm11,xmm8 4001 movdqa xmm15,xmm12 4002 mov r10,10 4003 4004$L$seal_sse_128_rounds: 4005 paddd xmm0,xmm4 4006 pxor xmm12,xmm0 4007 pshufb xmm12,XMMWORD[$L$rol16] 4008 paddd xmm8,xmm12 4009 pxor xmm4,xmm8 4010 movdqa xmm3,xmm4 4011 pslld xmm3,12 4012 psrld xmm4,20 4013 pxor xmm4,xmm3 4014 paddd xmm0,xmm4 4015 pxor xmm12,xmm0 4016 pshufb xmm12,XMMWORD[$L$rol8] 4017 paddd xmm8,xmm12 4018 pxor xmm4,xmm8 4019 movdqa xmm3,xmm4 4020 pslld xmm3,7 4021 psrld xmm4,25 4022 pxor xmm4,xmm3 4023DB 102,15,58,15,228,4 4024DB 102,69,15,58,15,192,8 4025DB 102,69,15,58,15,228,12 4026 paddd xmm1,xmm5 4027 pxor xmm13,xmm1 4028 pshufb xmm13,XMMWORD[$L$rol16] 4029 paddd xmm9,xmm13 4030 pxor xmm5,xmm9 4031 movdqa xmm3,xmm5 4032 pslld xmm3,12 4033 psrld xmm5,20 4034 pxor xmm5,xmm3 4035 paddd xmm1,xmm5 4036 pxor xmm13,xmm1 4037 pshufb xmm13,XMMWORD[$L$rol8] 4038 paddd xmm9,xmm13 4039 pxor xmm5,xmm9 4040 movdqa xmm3,xmm5 4041 pslld xmm3,7 4042 psrld xmm5,25 4043 pxor xmm5,xmm3 4044DB 102,15,58,15,237,4 4045DB 102,69,15,58,15,201,8 4046DB 102,69,15,58,15,237,12 4047 paddd xmm2,xmm6 4048 pxor xmm14,xmm2 4049 pshufb xmm14,XMMWORD[$L$rol16] 4050 paddd xmm10,xmm14 4051 pxor xmm6,xmm10 4052 movdqa xmm3,xmm6 4053 pslld xmm3,12 4054 psrld xmm6,20 4055 pxor xmm6,xmm3 4056 paddd xmm2,xmm6 4057 pxor xmm14,xmm2 4058 pshufb xmm14,XMMWORD[$L$rol8] 4059 paddd xmm10,xmm14 4060 pxor xmm6,xmm10 4061 movdqa xmm3,xmm6 4062 pslld xmm3,7 4063 psrld xmm6,25 4064 pxor xmm6,xmm3 4065DB 102,15,58,15,246,4 4066DB 102,69,15,58,15,210,8 4067DB 102,69,15,58,15,246,12 4068 paddd xmm0,xmm4 4069 pxor xmm12,xmm0 4070 pshufb xmm12,XMMWORD[$L$rol16] 4071 paddd xmm8,xmm12 4072 pxor xmm4,xmm8 4073 movdqa xmm3,xmm4 4074 pslld xmm3,12 4075 psrld xmm4,20 4076 pxor xmm4,xmm3 4077 paddd xmm0,xmm4 4078 pxor xmm12,xmm0 4079 pshufb xmm12,XMMWORD[$L$rol8] 4080 paddd xmm8,xmm12 4081 pxor xmm4,xmm8 4082 movdqa xmm3,xmm4 4083 pslld xmm3,7 4084 psrld xmm4,25 4085 pxor xmm4,xmm3 4086DB 102,15,58,15,228,12 4087DB 102,69,15,58,15,192,8 4088DB 102,69,15,58,15,228,4 4089 paddd xmm1,xmm5 4090 pxor xmm13,xmm1 4091 pshufb xmm13,XMMWORD[$L$rol16] 4092 paddd xmm9,xmm13 4093 pxor xmm5,xmm9 4094 movdqa xmm3,xmm5 4095 pslld xmm3,12 4096 psrld xmm5,20 4097 pxor xmm5,xmm3 4098 paddd xmm1,xmm5 4099 pxor xmm13,xmm1 4100 pshufb xmm13,XMMWORD[$L$rol8] 4101 paddd xmm9,xmm13 4102 pxor xmm5,xmm9 4103 movdqa xmm3,xmm5 4104 pslld xmm3,7 4105 psrld xmm5,25 4106 pxor xmm5,xmm3 4107DB 102,15,58,15,237,12 4108DB 102,69,15,58,15,201,8 4109DB 102,69,15,58,15,237,4 4110 paddd xmm2,xmm6 4111 pxor xmm14,xmm2 4112 pshufb xmm14,XMMWORD[$L$rol16] 4113 paddd xmm10,xmm14 4114 pxor xmm6,xmm10 4115 movdqa xmm3,xmm6 4116 pslld xmm3,12 4117 psrld xmm6,20 4118 pxor xmm6,xmm3 4119 paddd xmm2,xmm6 4120 pxor xmm14,xmm2 4121 pshufb xmm14,XMMWORD[$L$rol8] 4122 paddd xmm10,xmm14 4123 pxor xmm6,xmm10 4124 movdqa xmm3,xmm6 4125 pslld xmm3,7 4126 psrld xmm6,25 4127 pxor xmm6,xmm3 4128DB 102,15,58,15,246,12 4129DB 102,69,15,58,15,210,8 4130DB 102,69,15,58,15,246,4 4131 4132 dec r10 4133 jnz NEAR $L$seal_sse_128_rounds 4134 paddd xmm0,XMMWORD[$L$chacha20_consts] 4135 paddd xmm1,XMMWORD[$L$chacha20_consts] 4136 paddd xmm2,XMMWORD[$L$chacha20_consts] 4137 paddd xmm4,xmm7 4138 paddd xmm5,xmm7 4139 paddd xmm6,xmm7 4140 paddd xmm8,xmm11 4141 paddd xmm9,xmm11 4142 paddd xmm12,xmm15 4143 paddd xmm15,XMMWORD[$L$sse_inc] 4144 paddd xmm13,xmm15 4145 4146 pand xmm2,XMMWORD[$L$clamp] 4147 movdqa XMMWORD[(160+0)+rbp],xmm2 4148 movdqa XMMWORD[(160+16)+rbp],xmm6 4149 4150 mov r8,r8 4151 call poly_hash_ad_internal 4152 jmp NEAR $L$seal_sse_128_tail_xor 4153$L$SEH_end_chacha20_poly1305_seal: 4154 4155 4156 4157 4158ALIGN 64 4159chacha20_poly1305_open_avx2: 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 vzeroupper 4173 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4174 vbroadcasti128 ymm4,XMMWORD[r9] 4175 vbroadcasti128 ymm8,XMMWORD[16+r9] 4176 vbroadcasti128 ymm12,XMMWORD[32+r9] 4177 vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init] 4178 cmp rbx,6*32 4179 jbe NEAR $L$open_avx2_192 4180 cmp rbx,10*32 4181 jbe NEAR $L$open_avx2_320 4182 4183 vmovdqa YMMWORD[(160+64)+rbp],ymm4 4184 vmovdqa YMMWORD[(160+96)+rbp],ymm8 4185 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4186 mov r10,10 4187$L$open_avx2_init_rounds: 4188 vpaddd ymm0,ymm0,ymm4 4189 vpxor ymm12,ymm12,ymm0 4190 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4191 vpaddd ymm8,ymm8,ymm12 4192 vpxor ymm4,ymm4,ymm8 4193 vpsrld ymm3,ymm4,20 4194 vpslld ymm4,ymm4,12 4195 vpxor ymm4,ymm4,ymm3 4196 vpaddd ymm0,ymm0,ymm4 4197 vpxor ymm12,ymm12,ymm0 4198 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4199 vpaddd ymm8,ymm8,ymm12 4200 vpxor ymm4,ymm4,ymm8 4201 vpslld ymm3,ymm4,7 4202 vpsrld ymm4,ymm4,25 4203 vpxor ymm4,ymm4,ymm3 4204 vpalignr ymm12,ymm12,ymm12,12 4205 vpalignr ymm8,ymm8,ymm8,8 4206 vpalignr ymm4,ymm4,ymm4,4 4207 vpaddd ymm0,ymm0,ymm4 4208 vpxor ymm12,ymm12,ymm0 4209 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4210 vpaddd ymm8,ymm8,ymm12 4211 vpxor ymm4,ymm4,ymm8 4212 vpsrld ymm3,ymm4,20 4213 vpslld ymm4,ymm4,12 4214 vpxor ymm4,ymm4,ymm3 4215 vpaddd ymm0,ymm0,ymm4 4216 vpxor ymm12,ymm12,ymm0 4217 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4218 vpaddd ymm8,ymm8,ymm12 4219 vpxor ymm4,ymm4,ymm8 4220 vpslld ymm3,ymm4,7 4221 vpsrld ymm4,ymm4,25 4222 vpxor ymm4,ymm4,ymm3 4223 vpalignr ymm12,ymm12,ymm12,4 4224 vpalignr ymm8,ymm8,ymm8,8 4225 vpalignr ymm4,ymm4,ymm4,12 4226 4227 dec r10 4228 jne NEAR $L$open_avx2_init_rounds 4229 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 4230 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 4231 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 4232 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4233 4234 vperm2i128 ymm3,ymm4,ymm0,0x02 4235 4236 vpand ymm3,ymm3,YMMWORD[$L$clamp] 4237 vmovdqa YMMWORD[(160+0)+rbp],ymm3 4238 4239 vperm2i128 ymm0,ymm4,ymm0,0x13 4240 vperm2i128 ymm4,ymm12,ymm8,0x13 4241 4242 mov r8,r8 4243 call poly_hash_ad_internal 4244 4245 xor rcx,rcx 4246$L$open_avx2_init_hash: 4247 add r10,QWORD[((0+0))+rcx*1+rsi] 4248 adc r11,QWORD[((8+0))+rcx*1+rsi] 4249 adc r12,1 4250 mov rax,QWORD[((0+160+0))+rbp] 4251 mov r15,rax 4252 mul r10 4253 mov r13,rax 4254 mov r14,rdx 4255 mov rax,QWORD[((0+160+0))+rbp] 4256 mul r11 4257 imul r15,r12 4258 add r14,rax 4259 adc r15,rdx 4260 mov rax,QWORD[((8+160+0))+rbp] 4261 mov r9,rax 4262 mul r10 4263 add r14,rax 4264 adc rdx,0 4265 mov r10,rdx 4266 mov rax,QWORD[((8+160+0))+rbp] 4267 mul r11 4268 add r15,rax 4269 adc rdx,0 4270 imul r9,r12 4271 add r15,r10 4272 adc r9,rdx 4273 mov r10,r13 4274 mov r11,r14 4275 mov r12,r15 4276 and r12,3 4277 mov r13,r15 4278 and r13,-4 4279 mov r14,r9 4280 shrd r15,r9,2 4281 shr r9,2 4282 add r15,r13 4283 adc r9,r14 4284 add r10,r15 4285 adc r11,r9 4286 adc r12,0 4287 4288 add rcx,16 4289 cmp rcx,2*32 4290 jne NEAR $L$open_avx2_init_hash 4291 4292 vpxor ymm0,ymm0,YMMWORD[rsi] 4293 vpxor ymm4,ymm4,YMMWORD[32+rsi] 4294 4295 vmovdqu YMMWORD[rdi],ymm0 4296 vmovdqu YMMWORD[32+rdi],ymm4 4297 lea rsi,[64+rsi] 4298 lea rdi,[64+rdi] 4299 sub rbx,2*32 4300$L$open_avx2_main_loop: 4301 4302 cmp rbx,16*32 4303 jb NEAR $L$open_avx2_main_loop_done 4304 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4305 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 4306 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 4307 vmovdqa ymm1,ymm0 4308 vmovdqa ymm5,ymm4 4309 vmovdqa ymm9,ymm8 4310 vmovdqa ymm2,ymm0 4311 vmovdqa ymm6,ymm4 4312 vmovdqa ymm10,ymm8 4313 vmovdqa ymm3,ymm0 4314 vmovdqa ymm7,ymm4 4315 vmovdqa ymm11,ymm8 4316 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 4317 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 4318 vpaddd ymm14,ymm12,ymm15 4319 vpaddd ymm13,ymm12,ymm14 4320 vpaddd ymm12,ymm12,ymm13 4321 vmovdqa YMMWORD[(160+256)+rbp],ymm15 4322 vmovdqa YMMWORD[(160+224)+rbp],ymm14 4323 vmovdqa YMMWORD[(160+192)+rbp],ymm13 4324 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4325 4326 xor rcx,rcx 4327$L$open_avx2_main_loop_rounds: 4328 add r10,QWORD[((0+0))+rcx*1+rsi] 4329 adc r11,QWORD[((8+0))+rcx*1+rsi] 4330 adc r12,1 4331 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4332 vmovdqa ymm8,YMMWORD[$L$rol16] 4333 vpaddd ymm3,ymm3,ymm7 4334 vpaddd ymm2,ymm2,ymm6 4335 vpaddd ymm1,ymm1,ymm5 4336 vpaddd ymm0,ymm0,ymm4 4337 vpxor ymm15,ymm15,ymm3 4338 vpxor ymm14,ymm14,ymm2 4339 vpxor ymm13,ymm13,ymm1 4340 vpxor ymm12,ymm12,ymm0 4341 mov rdx,QWORD[((0+160+0))+rbp] 4342 mov r15,rdx 4343 mulx r14,r13,r10 4344 mulx rdx,rax,r11 4345 imul r15,r12 4346 add r14,rax 4347 adc r15,rdx 4348 vpshufb ymm15,ymm15,ymm8 4349 vpshufb ymm14,ymm14,ymm8 4350 vpshufb ymm13,ymm13,ymm8 4351 vpshufb ymm12,ymm12,ymm8 4352 vpaddd ymm11,ymm11,ymm15 4353 vpaddd ymm10,ymm10,ymm14 4354 vpaddd ymm9,ymm9,ymm13 4355 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4356 vpxor ymm7,ymm7,ymm11 4357 mov rdx,QWORD[((8+160+0))+rbp] 4358 mulx rax,r10,r10 4359 add r14,r10 4360 mulx r9,r11,r11 4361 adc r15,r11 4362 adc r9,0 4363 imul rdx,r12 4364 vpxor ymm6,ymm6,ymm10 4365 vpxor ymm5,ymm5,ymm9 4366 vpxor ymm4,ymm4,ymm8 4367 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4368 vpsrld ymm8,ymm7,20 4369 vpslld ymm7,ymm7,32-20 4370 vpxor ymm7,ymm7,ymm8 4371 vpsrld ymm8,ymm6,20 4372 vpslld ymm6,ymm6,32-20 4373 vpxor ymm6,ymm6,ymm8 4374 vpsrld ymm8,ymm5,20 4375 vpslld ymm5,ymm5,32-20 4376 add r15,rax 4377 adc r9,rdx 4378 vpxor ymm5,ymm5,ymm8 4379 vpsrld ymm8,ymm4,20 4380 vpslld ymm4,ymm4,32-20 4381 vpxor ymm4,ymm4,ymm8 4382 vmovdqa ymm8,YMMWORD[$L$rol8] 4383 vpaddd ymm3,ymm3,ymm7 4384 vpaddd ymm2,ymm2,ymm6 4385 vpaddd ymm1,ymm1,ymm5 4386 vpaddd ymm0,ymm0,ymm4 4387 vpxor ymm15,ymm15,ymm3 4388 mov r10,r13 4389 mov r11,r14 4390 mov r12,r15 4391 and r12,3 4392 mov r13,r15 4393 and r13,-4 4394 mov r14,r9 4395 shrd r15,r9,2 4396 shr r9,2 4397 add r15,r13 4398 adc r9,r14 4399 add r10,r15 4400 adc r11,r9 4401 adc r12,0 4402 vpxor ymm14,ymm14,ymm2 4403 vpxor ymm13,ymm13,ymm1 4404 vpxor ymm12,ymm12,ymm0 4405 vpshufb ymm15,ymm15,ymm8 4406 vpshufb ymm14,ymm14,ymm8 4407 vpshufb ymm13,ymm13,ymm8 4408 vpshufb ymm12,ymm12,ymm8 4409 vpaddd ymm11,ymm11,ymm15 4410 vpaddd ymm10,ymm10,ymm14 4411 add r10,QWORD[((0+16))+rcx*1+rsi] 4412 adc r11,QWORD[((8+16))+rcx*1+rsi] 4413 adc r12,1 4414 vpaddd ymm9,ymm9,ymm13 4415 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4416 vpxor ymm7,ymm7,ymm11 4417 vpxor ymm6,ymm6,ymm10 4418 vpxor ymm5,ymm5,ymm9 4419 vpxor ymm4,ymm4,ymm8 4420 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4421 vpsrld ymm8,ymm7,25 4422 mov rdx,QWORD[((0+160+0))+rbp] 4423 mov r15,rdx 4424 mulx r14,r13,r10 4425 mulx rdx,rax,r11 4426 imul r15,r12 4427 add r14,rax 4428 adc r15,rdx 4429 vpslld ymm7,ymm7,32-25 4430 vpxor ymm7,ymm7,ymm8 4431 vpsrld ymm8,ymm6,25 4432 vpslld ymm6,ymm6,32-25 4433 vpxor ymm6,ymm6,ymm8 4434 vpsrld ymm8,ymm5,25 4435 vpslld ymm5,ymm5,32-25 4436 vpxor ymm5,ymm5,ymm8 4437 vpsrld ymm8,ymm4,25 4438 vpslld ymm4,ymm4,32-25 4439 vpxor ymm4,ymm4,ymm8 4440 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 4441 vpalignr ymm7,ymm7,ymm7,4 4442 vpalignr ymm11,ymm11,ymm11,8 4443 vpalignr ymm15,ymm15,ymm15,12 4444 vpalignr ymm6,ymm6,ymm6,4 4445 vpalignr ymm10,ymm10,ymm10,8 4446 vpalignr ymm14,ymm14,ymm14,12 4447 mov rdx,QWORD[((8+160+0))+rbp] 4448 mulx rax,r10,r10 4449 add r14,r10 4450 mulx r9,r11,r11 4451 adc r15,r11 4452 adc r9,0 4453 imul rdx,r12 4454 vpalignr ymm5,ymm5,ymm5,4 4455 vpalignr ymm9,ymm9,ymm9,8 4456 vpalignr ymm13,ymm13,ymm13,12 4457 vpalignr ymm4,ymm4,ymm4,4 4458 vpalignr ymm8,ymm8,ymm8,8 4459 vpalignr ymm12,ymm12,ymm12,12 4460 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4461 vmovdqa ymm8,YMMWORD[$L$rol16] 4462 vpaddd ymm3,ymm3,ymm7 4463 vpaddd ymm2,ymm2,ymm6 4464 vpaddd ymm1,ymm1,ymm5 4465 vpaddd ymm0,ymm0,ymm4 4466 vpxor ymm15,ymm15,ymm3 4467 vpxor ymm14,ymm14,ymm2 4468 vpxor ymm13,ymm13,ymm1 4469 vpxor ymm12,ymm12,ymm0 4470 vpshufb ymm15,ymm15,ymm8 4471 vpshufb ymm14,ymm14,ymm8 4472 add r15,rax 4473 adc r9,rdx 4474 vpshufb ymm13,ymm13,ymm8 4475 vpshufb ymm12,ymm12,ymm8 4476 vpaddd ymm11,ymm11,ymm15 4477 vpaddd ymm10,ymm10,ymm14 4478 vpaddd ymm9,ymm9,ymm13 4479 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4480 vpxor ymm7,ymm7,ymm11 4481 vpxor ymm6,ymm6,ymm10 4482 vpxor ymm5,ymm5,ymm9 4483 mov r10,r13 4484 mov r11,r14 4485 mov r12,r15 4486 and r12,3 4487 mov r13,r15 4488 and r13,-4 4489 mov r14,r9 4490 shrd r15,r9,2 4491 shr r9,2 4492 add r15,r13 4493 adc r9,r14 4494 add r10,r15 4495 adc r11,r9 4496 adc r12,0 4497 vpxor ymm4,ymm4,ymm8 4498 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4499 vpsrld ymm8,ymm7,20 4500 vpslld ymm7,ymm7,32-20 4501 vpxor ymm7,ymm7,ymm8 4502 vpsrld ymm8,ymm6,20 4503 vpslld ymm6,ymm6,32-20 4504 vpxor ymm6,ymm6,ymm8 4505 add r10,QWORD[((0+32))+rcx*1+rsi] 4506 adc r11,QWORD[((8+32))+rcx*1+rsi] 4507 adc r12,1 4508 4509 lea rcx,[48+rcx] 4510 vpsrld ymm8,ymm5,20 4511 vpslld ymm5,ymm5,32-20 4512 vpxor ymm5,ymm5,ymm8 4513 vpsrld ymm8,ymm4,20 4514 vpslld ymm4,ymm4,32-20 4515 vpxor ymm4,ymm4,ymm8 4516 vmovdqa ymm8,YMMWORD[$L$rol8] 4517 vpaddd ymm3,ymm3,ymm7 4518 vpaddd ymm2,ymm2,ymm6 4519 vpaddd ymm1,ymm1,ymm5 4520 vpaddd ymm0,ymm0,ymm4 4521 vpxor ymm15,ymm15,ymm3 4522 vpxor ymm14,ymm14,ymm2 4523 vpxor ymm13,ymm13,ymm1 4524 vpxor ymm12,ymm12,ymm0 4525 vpshufb ymm15,ymm15,ymm8 4526 vpshufb ymm14,ymm14,ymm8 4527 vpshufb ymm13,ymm13,ymm8 4528 mov rdx,QWORD[((0+160+0))+rbp] 4529 mov r15,rdx 4530 mulx r14,r13,r10 4531 mulx rdx,rax,r11 4532 imul r15,r12 4533 add r14,rax 4534 adc r15,rdx 4535 vpshufb ymm12,ymm12,ymm8 4536 vpaddd ymm11,ymm11,ymm15 4537 vpaddd ymm10,ymm10,ymm14 4538 vpaddd ymm9,ymm9,ymm13 4539 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4540 vpxor ymm7,ymm7,ymm11 4541 vpxor ymm6,ymm6,ymm10 4542 vpxor ymm5,ymm5,ymm9 4543 mov rdx,QWORD[((8+160+0))+rbp] 4544 mulx rax,r10,r10 4545 add r14,r10 4546 mulx r9,r11,r11 4547 adc r15,r11 4548 adc r9,0 4549 imul rdx,r12 4550 vpxor ymm4,ymm4,ymm8 4551 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4552 vpsrld ymm8,ymm7,25 4553 vpslld ymm7,ymm7,32-25 4554 vpxor ymm7,ymm7,ymm8 4555 vpsrld ymm8,ymm6,25 4556 vpslld ymm6,ymm6,32-25 4557 vpxor ymm6,ymm6,ymm8 4558 add r15,rax 4559 adc r9,rdx 4560 vpsrld ymm8,ymm5,25 4561 vpslld ymm5,ymm5,32-25 4562 vpxor ymm5,ymm5,ymm8 4563 vpsrld ymm8,ymm4,25 4564 vpslld ymm4,ymm4,32-25 4565 vpxor ymm4,ymm4,ymm8 4566 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 4567 vpalignr ymm7,ymm7,ymm7,12 4568 vpalignr ymm11,ymm11,ymm11,8 4569 vpalignr ymm15,ymm15,ymm15,4 4570 vpalignr ymm6,ymm6,ymm6,12 4571 vpalignr ymm10,ymm10,ymm10,8 4572 vpalignr ymm14,ymm14,ymm14,4 4573 vpalignr ymm5,ymm5,ymm5,12 4574 vpalignr ymm9,ymm9,ymm9,8 4575 vpalignr ymm13,ymm13,ymm13,4 4576 vpalignr ymm4,ymm4,ymm4,12 4577 vpalignr ymm8,ymm8,ymm8,8 4578 mov r10,r13 4579 mov r11,r14 4580 mov r12,r15 4581 and r12,3 4582 mov r13,r15 4583 and r13,-4 4584 mov r14,r9 4585 shrd r15,r9,2 4586 shr r9,2 4587 add r15,r13 4588 adc r9,r14 4589 add r10,r15 4590 adc r11,r9 4591 adc r12,0 4592 vpalignr ymm12,ymm12,ymm12,4 4593 4594 cmp rcx,10*6*8 4595 jne NEAR $L$open_avx2_main_loop_rounds 4596 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 4597 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 4598 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 4599 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 4600 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 4601 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 4602 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 4603 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 4604 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 4605 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 4606 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 4607 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 4608 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 4609 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 4610 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 4611 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4612 4613 vmovdqa YMMWORD[(160+128)+rbp],ymm0 4614 add r10,QWORD[((0+480))+rsi] 4615 adc r11,QWORD[((8+480))+rsi] 4616 adc r12,1 4617 vperm2i128 ymm0,ymm7,ymm3,0x02 4618 vperm2i128 ymm7,ymm7,ymm3,0x13 4619 vperm2i128 ymm3,ymm15,ymm11,0x02 4620 vperm2i128 ymm11,ymm15,ymm11,0x13 4621 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 4622 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 4623 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 4624 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 4625 vmovdqu YMMWORD[(0+0)+rdi],ymm0 4626 vmovdqu YMMWORD[(32+0)+rdi],ymm3 4627 vmovdqu YMMWORD[(64+0)+rdi],ymm7 4628 vmovdqu YMMWORD[(96+0)+rdi],ymm11 4629 4630 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 4631 mov rax,QWORD[((0+160+0))+rbp] 4632 mov r15,rax 4633 mul r10 4634 mov r13,rax 4635 mov r14,rdx 4636 mov rax,QWORD[((0+160+0))+rbp] 4637 mul r11 4638 imul r15,r12 4639 add r14,rax 4640 adc r15,rdx 4641 mov rax,QWORD[((8+160+0))+rbp] 4642 mov r9,rax 4643 mul r10 4644 add r14,rax 4645 adc rdx,0 4646 mov r10,rdx 4647 mov rax,QWORD[((8+160+0))+rbp] 4648 mul r11 4649 add r15,rax 4650 adc rdx,0 4651 imul r9,r12 4652 add r15,r10 4653 adc r9,rdx 4654 mov r10,r13 4655 mov r11,r14 4656 mov r12,r15 4657 and r12,3 4658 mov r13,r15 4659 and r13,-4 4660 mov r14,r9 4661 shrd r15,r9,2 4662 shr r9,2 4663 add r15,r13 4664 adc r9,r14 4665 add r10,r15 4666 adc r11,r9 4667 adc r12,0 4668 vperm2i128 ymm3,ymm6,ymm2,0x02 4669 vperm2i128 ymm6,ymm6,ymm2,0x13 4670 vperm2i128 ymm2,ymm14,ymm10,0x02 4671 vperm2i128 ymm10,ymm14,ymm10,0x13 4672 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 4673 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 4674 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 4675 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 4676 vmovdqu YMMWORD[(0+128)+rdi],ymm3 4677 vmovdqu YMMWORD[(32+128)+rdi],ymm2 4678 vmovdqu YMMWORD[(64+128)+rdi],ymm6 4679 vmovdqu YMMWORD[(96+128)+rdi],ymm10 4680 add r10,QWORD[((0+480+16))+rsi] 4681 adc r11,QWORD[((8+480+16))+rsi] 4682 adc r12,1 4683 vperm2i128 ymm3,ymm5,ymm1,0x02 4684 vperm2i128 ymm5,ymm5,ymm1,0x13 4685 vperm2i128 ymm1,ymm13,ymm9,0x02 4686 vperm2i128 ymm9,ymm13,ymm9,0x13 4687 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 4688 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 4689 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 4690 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 4691 vmovdqu YMMWORD[(0+256)+rdi],ymm3 4692 vmovdqu YMMWORD[(32+256)+rdi],ymm1 4693 vmovdqu YMMWORD[(64+256)+rdi],ymm5 4694 vmovdqu YMMWORD[(96+256)+rdi],ymm9 4695 mov rax,QWORD[((0+160+0))+rbp] 4696 mov r15,rax 4697 mul r10 4698 mov r13,rax 4699 mov r14,rdx 4700 mov rax,QWORD[((0+160+0))+rbp] 4701 mul r11 4702 imul r15,r12 4703 add r14,rax 4704 adc r15,rdx 4705 mov rax,QWORD[((8+160+0))+rbp] 4706 mov r9,rax 4707 mul r10 4708 add r14,rax 4709 adc rdx,0 4710 mov r10,rdx 4711 mov rax,QWORD[((8+160+0))+rbp] 4712 mul r11 4713 add r15,rax 4714 adc rdx,0 4715 imul r9,r12 4716 add r15,r10 4717 adc r9,rdx 4718 mov r10,r13 4719 mov r11,r14 4720 mov r12,r15 4721 and r12,3 4722 mov r13,r15 4723 and r13,-4 4724 mov r14,r9 4725 shrd r15,r9,2 4726 shr r9,2 4727 add r15,r13 4728 adc r9,r14 4729 add r10,r15 4730 adc r11,r9 4731 adc r12,0 4732 vperm2i128 ymm3,ymm4,ymm0,0x02 4733 vperm2i128 ymm4,ymm4,ymm0,0x13 4734 vperm2i128 ymm0,ymm12,ymm8,0x02 4735 vperm2i128 ymm8,ymm12,ymm8,0x13 4736 vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi] 4737 vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi] 4738 vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi] 4739 vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi] 4740 vmovdqu YMMWORD[(0+384)+rdi],ymm3 4741 vmovdqu YMMWORD[(32+384)+rdi],ymm0 4742 vmovdqu YMMWORD[(64+384)+rdi],ymm4 4743 vmovdqu YMMWORD[(96+384)+rdi],ymm8 4744 4745 lea rsi,[512+rsi] 4746 lea rdi,[512+rdi] 4747 sub rbx,16*32 4748 jmp NEAR $L$open_avx2_main_loop 4749$L$open_avx2_main_loop_done: 4750 test rbx,rbx 4751 vzeroupper 4752 je NEAR $L$open_sse_finalize 4753 4754 cmp rbx,12*32 4755 ja NEAR $L$open_avx2_tail_512 4756 cmp rbx,8*32 4757 ja NEAR $L$open_avx2_tail_384 4758 cmp rbx,4*32 4759 ja NEAR $L$open_avx2_tail_256 4760 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4761 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 4762 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 4763 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 4764 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4765 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4766 4767 xor r8,r8 4768 mov rcx,rbx 4769 and rcx,-16 4770 test rcx,rcx 4771 je NEAR $L$open_avx2_tail_128_rounds 4772$L$open_avx2_tail_128_rounds_and_x1hash: 4773 add r10,QWORD[((0+0))+r8*1+rsi] 4774 adc r11,QWORD[((8+0))+r8*1+rsi] 4775 adc r12,1 4776 mov rax,QWORD[((0+160+0))+rbp] 4777 mov r15,rax 4778 mul r10 4779 mov r13,rax 4780 mov r14,rdx 4781 mov rax,QWORD[((0+160+0))+rbp] 4782 mul r11 4783 imul r15,r12 4784 add r14,rax 4785 adc r15,rdx 4786 mov rax,QWORD[((8+160+0))+rbp] 4787 mov r9,rax 4788 mul r10 4789 add r14,rax 4790 adc rdx,0 4791 mov r10,rdx 4792 mov rax,QWORD[((8+160+0))+rbp] 4793 mul r11 4794 add r15,rax 4795 adc rdx,0 4796 imul r9,r12 4797 add r15,r10 4798 adc r9,rdx 4799 mov r10,r13 4800 mov r11,r14 4801 mov r12,r15 4802 and r12,3 4803 mov r13,r15 4804 and r13,-4 4805 mov r14,r9 4806 shrd r15,r9,2 4807 shr r9,2 4808 add r15,r13 4809 adc r9,r14 4810 add r10,r15 4811 adc r11,r9 4812 adc r12,0 4813 4814$L$open_avx2_tail_128_rounds: 4815 add r8,16 4816 vpaddd ymm0,ymm0,ymm4 4817 vpxor ymm12,ymm12,ymm0 4818 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4819 vpaddd ymm8,ymm8,ymm12 4820 vpxor ymm4,ymm4,ymm8 4821 vpsrld ymm3,ymm4,20 4822 vpslld ymm4,ymm4,12 4823 vpxor ymm4,ymm4,ymm3 4824 vpaddd ymm0,ymm0,ymm4 4825 vpxor ymm12,ymm12,ymm0 4826 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4827 vpaddd ymm8,ymm8,ymm12 4828 vpxor ymm4,ymm4,ymm8 4829 vpslld ymm3,ymm4,7 4830 vpsrld ymm4,ymm4,25 4831 vpxor ymm4,ymm4,ymm3 4832 vpalignr ymm12,ymm12,ymm12,12 4833 vpalignr ymm8,ymm8,ymm8,8 4834 vpalignr ymm4,ymm4,ymm4,4 4835 vpaddd ymm0,ymm0,ymm4 4836 vpxor ymm12,ymm12,ymm0 4837 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4838 vpaddd ymm8,ymm8,ymm12 4839 vpxor ymm4,ymm4,ymm8 4840 vpsrld ymm3,ymm4,20 4841 vpslld ymm4,ymm4,12 4842 vpxor ymm4,ymm4,ymm3 4843 vpaddd ymm0,ymm0,ymm4 4844 vpxor ymm12,ymm12,ymm0 4845 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4846 vpaddd ymm8,ymm8,ymm12 4847 vpxor ymm4,ymm4,ymm8 4848 vpslld ymm3,ymm4,7 4849 vpsrld ymm4,ymm4,25 4850 vpxor ymm4,ymm4,ymm3 4851 vpalignr ymm12,ymm12,ymm12,4 4852 vpalignr ymm8,ymm8,ymm8,8 4853 vpalignr ymm4,ymm4,ymm4,12 4854 4855 cmp r8,rcx 4856 jb NEAR $L$open_avx2_tail_128_rounds_and_x1hash 4857 cmp r8,160 4858 jne NEAR $L$open_avx2_tail_128_rounds 4859 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 4860 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 4861 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 4862 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4863 vperm2i128 ymm3,ymm4,ymm0,0x13 4864 vperm2i128 ymm0,ymm4,ymm0,0x02 4865 vperm2i128 ymm4,ymm12,ymm8,0x02 4866 vperm2i128 ymm12,ymm12,ymm8,0x13 4867 vmovdqa ymm8,ymm3 4868 4869 jmp NEAR $L$open_avx2_tail_128_xor 4870 4871$L$open_avx2_tail_256: 4872 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4873 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 4874 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 4875 vmovdqa ymm1,ymm0 4876 vmovdqa ymm5,ymm4 4877 vmovdqa ymm9,ymm8 4878 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 4879 vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp] 4880 vpaddd ymm12,ymm12,ymm13 4881 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4882 vmovdqa YMMWORD[(160+192)+rbp],ymm13 4883 4884 mov QWORD[((160+128))+rbp],rbx 4885 mov rcx,rbx 4886 sub rcx,4*32 4887 shr rcx,4 4888 mov r8,10 4889 cmp rcx,10 4890 cmovg rcx,r8 4891 mov rbx,rsi 4892 xor r8,r8 4893$L$open_avx2_tail_256_rounds_and_x1hash: 4894 add r10,QWORD[((0+0))+rbx] 4895 adc r11,QWORD[((8+0))+rbx] 4896 adc r12,1 4897 mov rdx,QWORD[((0+160+0))+rbp] 4898 mov r15,rdx 4899 mulx r14,r13,r10 4900 mulx rdx,rax,r11 4901 imul r15,r12 4902 add r14,rax 4903 adc r15,rdx 4904 mov rdx,QWORD[((8+160+0))+rbp] 4905 mulx rax,r10,r10 4906 add r14,r10 4907 mulx r9,r11,r11 4908 adc r15,r11 4909 adc r9,0 4910 imul rdx,r12 4911 add r15,rax 4912 adc r9,rdx 4913 mov r10,r13 4914 mov r11,r14 4915 mov r12,r15 4916 and r12,3 4917 mov r13,r15 4918 and r13,-4 4919 mov r14,r9 4920 shrd r15,r9,2 4921 shr r9,2 4922 add r15,r13 4923 adc r9,r14 4924 add r10,r15 4925 adc r11,r9 4926 adc r12,0 4927 4928 lea rbx,[16+rbx] 4929$L$open_avx2_tail_256_rounds: 4930 vpaddd ymm0,ymm0,ymm4 4931 vpxor ymm12,ymm12,ymm0 4932 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4933 vpaddd ymm8,ymm8,ymm12 4934 vpxor ymm4,ymm4,ymm8 4935 vpsrld ymm3,ymm4,20 4936 vpslld ymm4,ymm4,12 4937 vpxor ymm4,ymm4,ymm3 4938 vpaddd ymm0,ymm0,ymm4 4939 vpxor ymm12,ymm12,ymm0 4940 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4941 vpaddd ymm8,ymm8,ymm12 4942 vpxor ymm4,ymm4,ymm8 4943 vpslld ymm3,ymm4,7 4944 vpsrld ymm4,ymm4,25 4945 vpxor ymm4,ymm4,ymm3 4946 vpalignr ymm12,ymm12,ymm12,12 4947 vpalignr ymm8,ymm8,ymm8,8 4948 vpalignr ymm4,ymm4,ymm4,4 4949 vpaddd ymm1,ymm1,ymm5 4950 vpxor ymm13,ymm13,ymm1 4951 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 4952 vpaddd ymm9,ymm9,ymm13 4953 vpxor ymm5,ymm5,ymm9 4954 vpsrld ymm3,ymm5,20 4955 vpslld ymm5,ymm5,12 4956 vpxor ymm5,ymm5,ymm3 4957 vpaddd ymm1,ymm1,ymm5 4958 vpxor ymm13,ymm13,ymm1 4959 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 4960 vpaddd ymm9,ymm9,ymm13 4961 vpxor ymm5,ymm5,ymm9 4962 vpslld ymm3,ymm5,7 4963 vpsrld ymm5,ymm5,25 4964 vpxor ymm5,ymm5,ymm3 4965 vpalignr ymm13,ymm13,ymm13,12 4966 vpalignr ymm9,ymm9,ymm9,8 4967 vpalignr ymm5,ymm5,ymm5,4 4968 4969 inc r8 4970 vpaddd ymm0,ymm0,ymm4 4971 vpxor ymm12,ymm12,ymm0 4972 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4973 vpaddd ymm8,ymm8,ymm12 4974 vpxor ymm4,ymm4,ymm8 4975 vpsrld ymm3,ymm4,20 4976 vpslld ymm4,ymm4,12 4977 vpxor ymm4,ymm4,ymm3 4978 vpaddd ymm0,ymm0,ymm4 4979 vpxor ymm12,ymm12,ymm0 4980 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4981 vpaddd ymm8,ymm8,ymm12 4982 vpxor ymm4,ymm4,ymm8 4983 vpslld ymm3,ymm4,7 4984 vpsrld ymm4,ymm4,25 4985 vpxor ymm4,ymm4,ymm3 4986 vpalignr ymm12,ymm12,ymm12,4 4987 vpalignr ymm8,ymm8,ymm8,8 4988 vpalignr ymm4,ymm4,ymm4,12 4989 vpaddd ymm1,ymm1,ymm5 4990 vpxor ymm13,ymm13,ymm1 4991 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 4992 vpaddd ymm9,ymm9,ymm13 4993 vpxor ymm5,ymm5,ymm9 4994 vpsrld ymm3,ymm5,20 4995 vpslld ymm5,ymm5,12 4996 vpxor ymm5,ymm5,ymm3 4997 vpaddd ymm1,ymm1,ymm5 4998 vpxor ymm13,ymm13,ymm1 4999 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5000 vpaddd ymm9,ymm9,ymm13 5001 vpxor ymm5,ymm5,ymm9 5002 vpslld ymm3,ymm5,7 5003 vpsrld ymm5,ymm5,25 5004 vpxor ymm5,ymm5,ymm3 5005 vpalignr ymm13,ymm13,ymm13,4 5006 vpalignr ymm9,ymm9,ymm9,8 5007 vpalignr ymm5,ymm5,ymm5,12 5008 vpaddd ymm2,ymm2,ymm6 5009 vpxor ymm14,ymm14,ymm2 5010 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 5011 vpaddd ymm10,ymm10,ymm14 5012 vpxor ymm6,ymm6,ymm10 5013 vpsrld ymm3,ymm6,20 5014 vpslld ymm6,ymm6,12 5015 vpxor ymm6,ymm6,ymm3 5016 vpaddd ymm2,ymm2,ymm6 5017 vpxor ymm14,ymm14,ymm2 5018 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 5019 vpaddd ymm10,ymm10,ymm14 5020 vpxor ymm6,ymm6,ymm10 5021 vpslld ymm3,ymm6,7 5022 vpsrld ymm6,ymm6,25 5023 vpxor ymm6,ymm6,ymm3 5024 vpalignr ymm14,ymm14,ymm14,4 5025 vpalignr ymm10,ymm10,ymm10,8 5026 vpalignr ymm6,ymm6,ymm6,12 5027 5028 cmp r8,rcx 5029 jb NEAR $L$open_avx2_tail_256_rounds_and_x1hash 5030 cmp r8,10 5031 jne NEAR $L$open_avx2_tail_256_rounds 5032 mov r8,rbx 5033 sub rbx,rsi 5034 mov rcx,rbx 5035 mov rbx,QWORD[((160+128))+rbp] 5036$L$open_avx2_tail_256_hash: 5037 add rcx,16 5038 cmp rcx,rbx 5039 jg NEAR $L$open_avx2_tail_256_done 5040 add r10,QWORD[((0+0))+r8] 5041 adc r11,QWORD[((8+0))+r8] 5042 adc r12,1 5043 mov rdx,QWORD[((0+160+0))+rbp] 5044 mov r15,rdx 5045 mulx r14,r13,r10 5046 mulx rdx,rax,r11 5047 imul r15,r12 5048 add r14,rax 5049 adc r15,rdx 5050 mov rdx,QWORD[((8+160+0))+rbp] 5051 mulx rax,r10,r10 5052 add r14,r10 5053 mulx r9,r11,r11 5054 adc r15,r11 5055 adc r9,0 5056 imul rdx,r12 5057 add r15,rax 5058 adc r9,rdx 5059 mov r10,r13 5060 mov r11,r14 5061 mov r12,r15 5062 and r12,3 5063 mov r13,r15 5064 and r13,-4 5065 mov r14,r9 5066 shrd r15,r9,2 5067 shr r9,2 5068 add r15,r13 5069 adc r9,r14 5070 add r10,r15 5071 adc r11,r9 5072 adc r12,0 5073 5074 lea r8,[16+r8] 5075 jmp NEAR $L$open_avx2_tail_256_hash 5076$L$open_avx2_tail_256_done: 5077 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 5078 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 5079 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 5080 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 5081 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 5082 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 5083 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 5084 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 5085 vperm2i128 ymm3,ymm5,ymm1,0x02 5086 vperm2i128 ymm5,ymm5,ymm1,0x13 5087 vperm2i128 ymm1,ymm13,ymm9,0x02 5088 vperm2i128 ymm9,ymm13,ymm9,0x13 5089 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 5090 vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi] 5091 vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi] 5092 vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi] 5093 vmovdqu YMMWORD[(0+0)+rdi],ymm3 5094 vmovdqu YMMWORD[(32+0)+rdi],ymm1 5095 vmovdqu YMMWORD[(64+0)+rdi],ymm5 5096 vmovdqu YMMWORD[(96+0)+rdi],ymm9 5097 vperm2i128 ymm3,ymm4,ymm0,0x13 5098 vperm2i128 ymm0,ymm4,ymm0,0x02 5099 vperm2i128 ymm4,ymm12,ymm8,0x02 5100 vperm2i128 ymm12,ymm12,ymm8,0x13 5101 vmovdqa ymm8,ymm3 5102 5103 lea rsi,[128+rsi] 5104 lea rdi,[128+rdi] 5105 sub rbx,4*32 5106 jmp NEAR $L$open_avx2_tail_128_xor 5107 5108$L$open_avx2_tail_384: 5109 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 5110 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 5111 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 5112 vmovdqa ymm1,ymm0 5113 vmovdqa ymm5,ymm4 5114 vmovdqa ymm9,ymm8 5115 vmovdqa ymm2,ymm0 5116 vmovdqa ymm6,ymm4 5117 vmovdqa ymm10,ymm8 5118 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 5119 vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp] 5120 vpaddd ymm13,ymm12,ymm14 5121 vpaddd ymm12,ymm12,ymm13 5122 vmovdqa YMMWORD[(160+160)+rbp],ymm12 5123 vmovdqa YMMWORD[(160+192)+rbp],ymm13 5124 vmovdqa YMMWORD[(160+224)+rbp],ymm14 5125 5126 mov QWORD[((160+128))+rbp],rbx 5127 mov rcx,rbx 5128 sub rcx,8*32 5129 shr rcx,4 5130 add rcx,6 5131 mov r8,10 5132 cmp rcx,10 5133 cmovg rcx,r8 5134 mov rbx,rsi 5135 xor r8,r8 5136$L$open_avx2_tail_384_rounds_and_x2hash: 5137 add r10,QWORD[((0+0))+rbx] 5138 adc r11,QWORD[((8+0))+rbx] 5139 adc r12,1 5140 mov rdx,QWORD[((0+160+0))+rbp] 5141 mov r15,rdx 5142 mulx r14,r13,r10 5143 mulx rdx,rax,r11 5144 imul r15,r12 5145 add r14,rax 5146 adc r15,rdx 5147 mov rdx,QWORD[((8+160+0))+rbp] 5148 mulx rax,r10,r10 5149 add r14,r10 5150 mulx r9,r11,r11 5151 adc r15,r11 5152 adc r9,0 5153 imul rdx,r12 5154 add r15,rax 5155 adc r9,rdx 5156 mov r10,r13 5157 mov r11,r14 5158 mov r12,r15 5159 and r12,3 5160 mov r13,r15 5161 and r13,-4 5162 mov r14,r9 5163 shrd r15,r9,2 5164 shr r9,2 5165 add r15,r13 5166 adc r9,r14 5167 add r10,r15 5168 adc r11,r9 5169 adc r12,0 5170 5171 lea rbx,[16+rbx] 5172$L$open_avx2_tail_384_rounds_and_x1hash: 5173 vpaddd ymm2,ymm2,ymm6 5174 vpxor ymm14,ymm14,ymm2 5175 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 5176 vpaddd ymm10,ymm10,ymm14 5177 vpxor ymm6,ymm6,ymm10 5178 vpsrld ymm3,ymm6,20 5179 vpslld ymm6,ymm6,12 5180 vpxor ymm6,ymm6,ymm3 5181 vpaddd ymm2,ymm2,ymm6 5182 vpxor ymm14,ymm14,ymm2 5183 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 5184 vpaddd ymm10,ymm10,ymm14 5185 vpxor ymm6,ymm6,ymm10 5186 vpslld ymm3,ymm6,7 5187 vpsrld ymm6,ymm6,25 5188 vpxor ymm6,ymm6,ymm3 5189 vpalignr ymm14,ymm14,ymm14,12 5190 vpalignr ymm10,ymm10,ymm10,8 5191 vpalignr ymm6,ymm6,ymm6,4 5192 vpaddd ymm1,ymm1,ymm5 5193 vpxor ymm13,ymm13,ymm1 5194 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5195 vpaddd ymm9,ymm9,ymm13 5196 vpxor ymm5,ymm5,ymm9 5197 vpsrld ymm3,ymm5,20 5198 vpslld ymm5,ymm5,12 5199 vpxor ymm5,ymm5,ymm3 5200 vpaddd ymm1,ymm1,ymm5 5201 vpxor ymm13,ymm13,ymm1 5202 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5203 vpaddd ymm9,ymm9,ymm13 5204 vpxor ymm5,ymm5,ymm9 5205 vpslld ymm3,ymm5,7 5206 vpsrld ymm5,ymm5,25 5207 vpxor ymm5,ymm5,ymm3 5208 vpalignr ymm13,ymm13,ymm13,12 5209 vpalignr ymm9,ymm9,ymm9,8 5210 vpalignr ymm5,ymm5,ymm5,4 5211 vpaddd ymm0,ymm0,ymm4 5212 vpxor ymm12,ymm12,ymm0 5213 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5214 vpaddd ymm8,ymm8,ymm12 5215 vpxor ymm4,ymm4,ymm8 5216 vpsrld ymm3,ymm4,20 5217 vpslld ymm4,ymm4,12 5218 vpxor ymm4,ymm4,ymm3 5219 vpaddd ymm0,ymm0,ymm4 5220 vpxor ymm12,ymm12,ymm0 5221 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5222 vpaddd ymm8,ymm8,ymm12 5223 vpxor ymm4,ymm4,ymm8 5224 vpslld ymm3,ymm4,7 5225 vpsrld ymm4,ymm4,25 5226 vpxor ymm4,ymm4,ymm3 5227 vpalignr ymm12,ymm12,ymm12,12 5228 vpalignr ymm8,ymm8,ymm8,8 5229 vpalignr ymm4,ymm4,ymm4,4 5230 add r10,QWORD[((0+0))+rbx] 5231 adc r11,QWORD[((8+0))+rbx] 5232 adc r12,1 5233 mov rax,QWORD[((0+160+0))+rbp] 5234 mov r15,rax 5235 mul r10 5236 mov r13,rax 5237 mov r14,rdx 5238 mov rax,QWORD[((0+160+0))+rbp] 5239 mul r11 5240 imul r15,r12 5241 add r14,rax 5242 adc r15,rdx 5243 mov rax,QWORD[((8+160+0))+rbp] 5244 mov r9,rax 5245 mul r10 5246 add r14,rax 5247 adc rdx,0 5248 mov r10,rdx 5249 mov rax,QWORD[((8+160+0))+rbp] 5250 mul r11 5251 add r15,rax 5252 adc rdx,0 5253 imul r9,r12 5254 add r15,r10 5255 adc r9,rdx 5256 mov r10,r13 5257 mov r11,r14 5258 mov r12,r15 5259 and r12,3 5260 mov r13,r15 5261 and r13,-4 5262 mov r14,r9 5263 shrd r15,r9,2 5264 shr r9,2 5265 add r15,r13 5266 adc r9,r14 5267 add r10,r15 5268 adc r11,r9 5269 adc r12,0 5270 5271 lea rbx,[16+rbx] 5272 inc r8 5273 vpaddd ymm2,ymm2,ymm6 5274 vpxor ymm14,ymm14,ymm2 5275 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 5276 vpaddd ymm10,ymm10,ymm14 5277 vpxor ymm6,ymm6,ymm10 5278 vpsrld ymm3,ymm6,20 5279 vpslld ymm6,ymm6,12 5280 vpxor ymm6,ymm6,ymm3 5281 vpaddd ymm2,ymm2,ymm6 5282 vpxor ymm14,ymm14,ymm2 5283 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 5284 vpaddd ymm10,ymm10,ymm14 5285 vpxor ymm6,ymm6,ymm10 5286 vpslld ymm3,ymm6,7 5287 vpsrld ymm6,ymm6,25 5288 vpxor ymm6,ymm6,ymm3 5289 vpalignr ymm14,ymm14,ymm14,4 5290 vpalignr ymm10,ymm10,ymm10,8 5291 vpalignr ymm6,ymm6,ymm6,12 5292 vpaddd ymm1,ymm1,ymm5 5293 vpxor ymm13,ymm13,ymm1 5294 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5295 vpaddd ymm9,ymm9,ymm13 5296 vpxor ymm5,ymm5,ymm9 5297 vpsrld ymm3,ymm5,20 5298 vpslld ymm5,ymm5,12 5299 vpxor ymm5,ymm5,ymm3 5300 vpaddd ymm1,ymm1,ymm5 5301 vpxor ymm13,ymm13,ymm1 5302 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5303 vpaddd ymm9,ymm9,ymm13 5304 vpxor ymm5,ymm5,ymm9 5305 vpslld ymm3,ymm5,7 5306 vpsrld ymm5,ymm5,25 5307 vpxor ymm5,ymm5,ymm3 5308 vpalignr ymm13,ymm13,ymm13,4 5309 vpalignr ymm9,ymm9,ymm9,8 5310 vpalignr ymm5,ymm5,ymm5,12 5311 vpaddd ymm0,ymm0,ymm4 5312 vpxor ymm12,ymm12,ymm0 5313 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5314 vpaddd ymm8,ymm8,ymm12 5315 vpxor ymm4,ymm4,ymm8 5316 vpsrld ymm3,ymm4,20 5317 vpslld ymm4,ymm4,12 5318 vpxor ymm4,ymm4,ymm3 5319 vpaddd ymm0,ymm0,ymm4 5320 vpxor ymm12,ymm12,ymm0 5321 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5322 vpaddd ymm8,ymm8,ymm12 5323 vpxor ymm4,ymm4,ymm8 5324 vpslld ymm3,ymm4,7 5325 vpsrld ymm4,ymm4,25 5326 vpxor ymm4,ymm4,ymm3 5327 vpalignr ymm12,ymm12,ymm12,4 5328 vpalignr ymm8,ymm8,ymm8,8 5329 vpalignr ymm4,ymm4,ymm4,12 5330 5331 cmp r8,rcx 5332 jb NEAR $L$open_avx2_tail_384_rounds_and_x2hash 5333 cmp r8,10 5334 jne NEAR $L$open_avx2_tail_384_rounds_and_x1hash 5335 mov r8,rbx 5336 sub rbx,rsi 5337 mov rcx,rbx 5338 mov rbx,QWORD[((160+128))+rbp] 5339$L$open_avx2_384_tail_hash: 5340 add rcx,16 5341 cmp rcx,rbx 5342 jg NEAR $L$open_avx2_384_tail_done 5343 add r10,QWORD[((0+0))+r8] 5344 adc r11,QWORD[((8+0))+r8] 5345 adc r12,1 5346 mov rdx,QWORD[((0+160+0))+rbp] 5347 mov r15,rdx 5348 mulx r14,r13,r10 5349 mulx rdx,rax,r11 5350 imul r15,r12 5351 add r14,rax 5352 adc r15,rdx 5353 mov rdx,QWORD[((8+160+0))+rbp] 5354 mulx rax,r10,r10 5355 add r14,r10 5356 mulx r9,r11,r11 5357 adc r15,r11 5358 adc r9,0 5359 imul rdx,r12 5360 add r15,rax 5361 adc r9,rdx 5362 mov r10,r13 5363 mov r11,r14 5364 mov r12,r15 5365 and r12,3 5366 mov r13,r15 5367 and r13,-4 5368 mov r14,r9 5369 shrd r15,r9,2 5370 shr r9,2 5371 add r15,r13 5372 adc r9,r14 5373 add r10,r15 5374 adc r11,r9 5375 adc r12,0 5376 5377 lea r8,[16+r8] 5378 jmp NEAR $L$open_avx2_384_tail_hash 5379$L$open_avx2_384_tail_done: 5380 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 5381 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 5382 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 5383 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 5384 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 5385 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 5386 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 5387 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 5388 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 5389 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 5390 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 5391 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 5392 vperm2i128 ymm3,ymm6,ymm2,0x02 5393 vperm2i128 ymm6,ymm6,ymm2,0x13 5394 vperm2i128 ymm2,ymm14,ymm10,0x02 5395 vperm2i128 ymm10,ymm14,ymm10,0x13 5396 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 5397 vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi] 5398 vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi] 5399 vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi] 5400 vmovdqu YMMWORD[(0+0)+rdi],ymm3 5401 vmovdqu YMMWORD[(32+0)+rdi],ymm2 5402 vmovdqu YMMWORD[(64+0)+rdi],ymm6 5403 vmovdqu YMMWORD[(96+0)+rdi],ymm10 5404 vperm2i128 ymm3,ymm5,ymm1,0x02 5405 vperm2i128 ymm5,ymm5,ymm1,0x13 5406 vperm2i128 ymm1,ymm13,ymm9,0x02 5407 vperm2i128 ymm9,ymm13,ymm9,0x13 5408 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 5409 vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi] 5410 vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi] 5411 vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi] 5412 vmovdqu YMMWORD[(0+128)+rdi],ymm3 5413 vmovdqu YMMWORD[(32+128)+rdi],ymm1 5414 vmovdqu YMMWORD[(64+128)+rdi],ymm5 5415 vmovdqu YMMWORD[(96+128)+rdi],ymm9 5416 vperm2i128 ymm3,ymm4,ymm0,0x13 5417 vperm2i128 ymm0,ymm4,ymm0,0x02 5418 vperm2i128 ymm4,ymm12,ymm8,0x02 5419 vperm2i128 ymm12,ymm12,ymm8,0x13 5420 vmovdqa ymm8,ymm3 5421 5422 lea rsi,[256+rsi] 5423 lea rdi,[256+rdi] 5424 sub rbx,8*32 5425 jmp NEAR $L$open_avx2_tail_128_xor 5426 5427$L$open_avx2_tail_512: 5428 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 5429 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 5430 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 5431 vmovdqa ymm1,ymm0 5432 vmovdqa ymm5,ymm4 5433 vmovdqa ymm9,ymm8 5434 vmovdqa ymm2,ymm0 5435 vmovdqa ymm6,ymm4 5436 vmovdqa ymm10,ymm8 5437 vmovdqa ymm3,ymm0 5438 vmovdqa ymm7,ymm4 5439 vmovdqa ymm11,ymm8 5440 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 5441 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 5442 vpaddd ymm14,ymm12,ymm15 5443 vpaddd ymm13,ymm12,ymm14 5444 vpaddd ymm12,ymm12,ymm13 5445 vmovdqa YMMWORD[(160+256)+rbp],ymm15 5446 vmovdqa YMMWORD[(160+224)+rbp],ymm14 5447 vmovdqa YMMWORD[(160+192)+rbp],ymm13 5448 vmovdqa YMMWORD[(160+160)+rbp],ymm12 5449 5450 xor rcx,rcx 5451 mov r8,rsi 5452$L$open_avx2_tail_512_rounds_and_x2hash: 5453 add r10,QWORD[((0+0))+r8] 5454 adc r11,QWORD[((8+0))+r8] 5455 adc r12,1 5456 mov rax,QWORD[((0+160+0))+rbp] 5457 mov r15,rax 5458 mul r10 5459 mov r13,rax 5460 mov r14,rdx 5461 mov rax,QWORD[((0+160+0))+rbp] 5462 mul r11 5463 imul r15,r12 5464 add r14,rax 5465 adc r15,rdx 5466 mov rax,QWORD[((8+160+0))+rbp] 5467 mov r9,rax 5468 mul r10 5469 add r14,rax 5470 adc rdx,0 5471 mov r10,rdx 5472 mov rax,QWORD[((8+160+0))+rbp] 5473 mul r11 5474 add r15,rax 5475 adc rdx,0 5476 imul r9,r12 5477 add r15,r10 5478 adc r9,rdx 5479 mov r10,r13 5480 mov r11,r14 5481 mov r12,r15 5482 and r12,3 5483 mov r13,r15 5484 and r13,-4 5485 mov r14,r9 5486 shrd r15,r9,2 5487 shr r9,2 5488 add r15,r13 5489 adc r9,r14 5490 add r10,r15 5491 adc r11,r9 5492 adc r12,0 5493 5494 lea r8,[16+r8] 5495$L$open_avx2_tail_512_rounds_and_x1hash: 5496 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5497 vmovdqa ymm8,YMMWORD[$L$rol16] 5498 vpaddd ymm3,ymm3,ymm7 5499 vpaddd ymm2,ymm2,ymm6 5500 vpaddd ymm1,ymm1,ymm5 5501 vpaddd ymm0,ymm0,ymm4 5502 vpxor ymm15,ymm15,ymm3 5503 vpxor ymm14,ymm14,ymm2 5504 vpxor ymm13,ymm13,ymm1 5505 vpxor ymm12,ymm12,ymm0 5506 vpshufb ymm15,ymm15,ymm8 5507 vpshufb ymm14,ymm14,ymm8 5508 vpshufb ymm13,ymm13,ymm8 5509 vpshufb ymm12,ymm12,ymm8 5510 vpaddd ymm11,ymm11,ymm15 5511 vpaddd ymm10,ymm10,ymm14 5512 vpaddd ymm9,ymm9,ymm13 5513 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5514 vpxor ymm7,ymm7,ymm11 5515 vpxor ymm6,ymm6,ymm10 5516 vpxor ymm5,ymm5,ymm9 5517 vpxor ymm4,ymm4,ymm8 5518 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5519 vpsrld ymm8,ymm7,20 5520 vpslld ymm7,ymm7,32-20 5521 vpxor ymm7,ymm7,ymm8 5522 vpsrld ymm8,ymm6,20 5523 vpslld ymm6,ymm6,32-20 5524 vpxor ymm6,ymm6,ymm8 5525 vpsrld ymm8,ymm5,20 5526 vpslld ymm5,ymm5,32-20 5527 vpxor ymm5,ymm5,ymm8 5528 vpsrld ymm8,ymm4,20 5529 vpslld ymm4,ymm4,32-20 5530 vpxor ymm4,ymm4,ymm8 5531 vmovdqa ymm8,YMMWORD[$L$rol8] 5532 vpaddd ymm3,ymm3,ymm7 5533 add r10,QWORD[((0+0))+r8] 5534 adc r11,QWORD[((8+0))+r8] 5535 adc r12,1 5536 mov rdx,QWORD[((0+160+0))+rbp] 5537 mov r15,rdx 5538 mulx r14,r13,r10 5539 mulx rdx,rax,r11 5540 imul r15,r12 5541 add r14,rax 5542 adc r15,rdx 5543 mov rdx,QWORD[((8+160+0))+rbp] 5544 mulx rax,r10,r10 5545 add r14,r10 5546 mulx r9,r11,r11 5547 adc r15,r11 5548 adc r9,0 5549 imul rdx,r12 5550 add r15,rax 5551 adc r9,rdx 5552 mov r10,r13 5553 mov r11,r14 5554 mov r12,r15 5555 and r12,3 5556 mov r13,r15 5557 and r13,-4 5558 mov r14,r9 5559 shrd r15,r9,2 5560 shr r9,2 5561 add r15,r13 5562 adc r9,r14 5563 add r10,r15 5564 adc r11,r9 5565 adc r12,0 5566 vpaddd ymm2,ymm2,ymm6 5567 vpaddd ymm1,ymm1,ymm5 5568 vpaddd ymm0,ymm0,ymm4 5569 vpxor ymm15,ymm15,ymm3 5570 vpxor ymm14,ymm14,ymm2 5571 vpxor ymm13,ymm13,ymm1 5572 vpxor ymm12,ymm12,ymm0 5573 vpshufb ymm15,ymm15,ymm8 5574 vpshufb ymm14,ymm14,ymm8 5575 vpshufb ymm13,ymm13,ymm8 5576 vpshufb ymm12,ymm12,ymm8 5577 vpaddd ymm11,ymm11,ymm15 5578 vpaddd ymm10,ymm10,ymm14 5579 vpaddd ymm9,ymm9,ymm13 5580 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5581 vpxor ymm7,ymm7,ymm11 5582 vpxor ymm6,ymm6,ymm10 5583 vpxor ymm5,ymm5,ymm9 5584 vpxor ymm4,ymm4,ymm8 5585 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5586 vpsrld ymm8,ymm7,25 5587 vpslld ymm7,ymm7,32-25 5588 vpxor ymm7,ymm7,ymm8 5589 vpsrld ymm8,ymm6,25 5590 vpslld ymm6,ymm6,32-25 5591 vpxor ymm6,ymm6,ymm8 5592 vpsrld ymm8,ymm5,25 5593 vpslld ymm5,ymm5,32-25 5594 vpxor ymm5,ymm5,ymm8 5595 vpsrld ymm8,ymm4,25 5596 vpslld ymm4,ymm4,32-25 5597 vpxor ymm4,ymm4,ymm8 5598 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 5599 vpalignr ymm7,ymm7,ymm7,4 5600 vpalignr ymm11,ymm11,ymm11,8 5601 vpalignr ymm15,ymm15,ymm15,12 5602 vpalignr ymm6,ymm6,ymm6,4 5603 vpalignr ymm10,ymm10,ymm10,8 5604 vpalignr ymm14,ymm14,ymm14,12 5605 vpalignr ymm5,ymm5,ymm5,4 5606 vpalignr ymm9,ymm9,ymm9,8 5607 vpalignr ymm13,ymm13,ymm13,12 5608 vpalignr ymm4,ymm4,ymm4,4 5609 vpalignr ymm8,ymm8,ymm8,8 5610 vpalignr ymm12,ymm12,ymm12,12 5611 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5612 vmovdqa ymm8,YMMWORD[$L$rol16] 5613 vpaddd ymm3,ymm3,ymm7 5614 add r10,QWORD[((0+16))+r8] 5615 adc r11,QWORD[((8+16))+r8] 5616 adc r12,1 5617 mov rdx,QWORD[((0+160+0))+rbp] 5618 mov r15,rdx 5619 mulx r14,r13,r10 5620 mulx rdx,rax,r11 5621 imul r15,r12 5622 add r14,rax 5623 adc r15,rdx 5624 mov rdx,QWORD[((8+160+0))+rbp] 5625 mulx rax,r10,r10 5626 add r14,r10 5627 mulx r9,r11,r11 5628 adc r15,r11 5629 adc r9,0 5630 imul rdx,r12 5631 add r15,rax 5632 adc r9,rdx 5633 mov r10,r13 5634 mov r11,r14 5635 mov r12,r15 5636 and r12,3 5637 mov r13,r15 5638 and r13,-4 5639 mov r14,r9 5640 shrd r15,r9,2 5641 shr r9,2 5642 add r15,r13 5643 adc r9,r14 5644 add r10,r15 5645 adc r11,r9 5646 adc r12,0 5647 5648 lea r8,[32+r8] 5649 vpaddd ymm2,ymm2,ymm6 5650 vpaddd ymm1,ymm1,ymm5 5651 vpaddd ymm0,ymm0,ymm4 5652 vpxor ymm15,ymm15,ymm3 5653 vpxor ymm14,ymm14,ymm2 5654 vpxor ymm13,ymm13,ymm1 5655 vpxor ymm12,ymm12,ymm0 5656 vpshufb ymm15,ymm15,ymm8 5657 vpshufb ymm14,ymm14,ymm8 5658 vpshufb ymm13,ymm13,ymm8 5659 vpshufb ymm12,ymm12,ymm8 5660 vpaddd ymm11,ymm11,ymm15 5661 vpaddd ymm10,ymm10,ymm14 5662 vpaddd ymm9,ymm9,ymm13 5663 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5664 vpxor ymm7,ymm7,ymm11 5665 vpxor ymm6,ymm6,ymm10 5666 vpxor ymm5,ymm5,ymm9 5667 vpxor ymm4,ymm4,ymm8 5668 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5669 vpsrld ymm8,ymm7,20 5670 vpslld ymm7,ymm7,32-20 5671 vpxor ymm7,ymm7,ymm8 5672 vpsrld ymm8,ymm6,20 5673 vpslld ymm6,ymm6,32-20 5674 vpxor ymm6,ymm6,ymm8 5675 vpsrld ymm8,ymm5,20 5676 vpslld ymm5,ymm5,32-20 5677 vpxor ymm5,ymm5,ymm8 5678 vpsrld ymm8,ymm4,20 5679 vpslld ymm4,ymm4,32-20 5680 vpxor ymm4,ymm4,ymm8 5681 vmovdqa ymm8,YMMWORD[$L$rol8] 5682 vpaddd ymm3,ymm3,ymm7 5683 vpaddd ymm2,ymm2,ymm6 5684 vpaddd ymm1,ymm1,ymm5 5685 vpaddd ymm0,ymm0,ymm4 5686 vpxor ymm15,ymm15,ymm3 5687 vpxor ymm14,ymm14,ymm2 5688 vpxor ymm13,ymm13,ymm1 5689 vpxor ymm12,ymm12,ymm0 5690 vpshufb ymm15,ymm15,ymm8 5691 vpshufb ymm14,ymm14,ymm8 5692 vpshufb ymm13,ymm13,ymm8 5693 vpshufb ymm12,ymm12,ymm8 5694 vpaddd ymm11,ymm11,ymm15 5695 vpaddd ymm10,ymm10,ymm14 5696 vpaddd ymm9,ymm9,ymm13 5697 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5698 vpxor ymm7,ymm7,ymm11 5699 vpxor ymm6,ymm6,ymm10 5700 vpxor ymm5,ymm5,ymm9 5701 vpxor ymm4,ymm4,ymm8 5702 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5703 vpsrld ymm8,ymm7,25 5704 vpslld ymm7,ymm7,32-25 5705 vpxor ymm7,ymm7,ymm8 5706 vpsrld ymm8,ymm6,25 5707 vpslld ymm6,ymm6,32-25 5708 vpxor ymm6,ymm6,ymm8 5709 vpsrld ymm8,ymm5,25 5710 vpslld ymm5,ymm5,32-25 5711 vpxor ymm5,ymm5,ymm8 5712 vpsrld ymm8,ymm4,25 5713 vpslld ymm4,ymm4,32-25 5714 vpxor ymm4,ymm4,ymm8 5715 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 5716 vpalignr ymm7,ymm7,ymm7,12 5717 vpalignr ymm11,ymm11,ymm11,8 5718 vpalignr ymm15,ymm15,ymm15,4 5719 vpalignr ymm6,ymm6,ymm6,12 5720 vpalignr ymm10,ymm10,ymm10,8 5721 vpalignr ymm14,ymm14,ymm14,4 5722 vpalignr ymm5,ymm5,ymm5,12 5723 vpalignr ymm9,ymm9,ymm9,8 5724 vpalignr ymm13,ymm13,ymm13,4 5725 vpalignr ymm4,ymm4,ymm4,12 5726 vpalignr ymm8,ymm8,ymm8,8 5727 vpalignr ymm12,ymm12,ymm12,4 5728 5729 inc rcx 5730 cmp rcx,4 5731 jl NEAR $L$open_avx2_tail_512_rounds_and_x2hash 5732 cmp rcx,10 5733 jne NEAR $L$open_avx2_tail_512_rounds_and_x1hash 5734 mov rcx,rbx 5735 sub rcx,12*32 5736 and rcx,-16 5737$L$open_avx2_tail_512_hash: 5738 test rcx,rcx 5739 je NEAR $L$open_avx2_tail_512_done 5740 add r10,QWORD[((0+0))+r8] 5741 adc r11,QWORD[((8+0))+r8] 5742 adc r12,1 5743 mov rdx,QWORD[((0+160+0))+rbp] 5744 mov r15,rdx 5745 mulx r14,r13,r10 5746 mulx rdx,rax,r11 5747 imul r15,r12 5748 add r14,rax 5749 adc r15,rdx 5750 mov rdx,QWORD[((8+160+0))+rbp] 5751 mulx rax,r10,r10 5752 add r14,r10 5753 mulx r9,r11,r11 5754 adc r15,r11 5755 adc r9,0 5756 imul rdx,r12 5757 add r15,rax 5758 adc r9,rdx 5759 mov r10,r13 5760 mov r11,r14 5761 mov r12,r15 5762 and r12,3 5763 mov r13,r15 5764 and r13,-4 5765 mov r14,r9 5766 shrd r15,r9,2 5767 shr r9,2 5768 add r15,r13 5769 adc r9,r14 5770 add r10,r15 5771 adc r11,r9 5772 adc r12,0 5773 5774 lea r8,[16+r8] 5775 sub rcx,2*8 5776 jmp NEAR $L$open_avx2_tail_512_hash 5777$L$open_avx2_tail_512_done: 5778 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 5779 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 5780 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 5781 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 5782 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 5783 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 5784 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 5785 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 5786 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 5787 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 5788 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 5789 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 5790 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 5791 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 5792 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 5793 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 5794 5795 vmovdqa YMMWORD[(160+128)+rbp],ymm0 5796 vperm2i128 ymm0,ymm7,ymm3,0x02 5797 vperm2i128 ymm7,ymm7,ymm3,0x13 5798 vperm2i128 ymm3,ymm15,ymm11,0x02 5799 vperm2i128 ymm11,ymm15,ymm11,0x13 5800 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 5801 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 5802 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 5803 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 5804 vmovdqu YMMWORD[(0+0)+rdi],ymm0 5805 vmovdqu YMMWORD[(32+0)+rdi],ymm3 5806 vmovdqu YMMWORD[(64+0)+rdi],ymm7 5807 vmovdqu YMMWORD[(96+0)+rdi],ymm11 5808 5809 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 5810 vperm2i128 ymm3,ymm6,ymm2,0x02 5811 vperm2i128 ymm6,ymm6,ymm2,0x13 5812 vperm2i128 ymm2,ymm14,ymm10,0x02 5813 vperm2i128 ymm10,ymm14,ymm10,0x13 5814 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 5815 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 5816 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 5817 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 5818 vmovdqu YMMWORD[(0+128)+rdi],ymm3 5819 vmovdqu YMMWORD[(32+128)+rdi],ymm2 5820 vmovdqu YMMWORD[(64+128)+rdi],ymm6 5821 vmovdqu YMMWORD[(96+128)+rdi],ymm10 5822 vperm2i128 ymm3,ymm5,ymm1,0x02 5823 vperm2i128 ymm5,ymm5,ymm1,0x13 5824 vperm2i128 ymm1,ymm13,ymm9,0x02 5825 vperm2i128 ymm9,ymm13,ymm9,0x13 5826 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 5827 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 5828 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 5829 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 5830 vmovdqu YMMWORD[(0+256)+rdi],ymm3 5831 vmovdqu YMMWORD[(32+256)+rdi],ymm1 5832 vmovdqu YMMWORD[(64+256)+rdi],ymm5 5833 vmovdqu YMMWORD[(96+256)+rdi],ymm9 5834 vperm2i128 ymm3,ymm4,ymm0,0x13 5835 vperm2i128 ymm0,ymm4,ymm0,0x02 5836 vperm2i128 ymm4,ymm12,ymm8,0x02 5837 vperm2i128 ymm12,ymm12,ymm8,0x13 5838 vmovdqa ymm8,ymm3 5839 5840 lea rsi,[384+rsi] 5841 lea rdi,[384+rdi] 5842 sub rbx,12*32 5843$L$open_avx2_tail_128_xor: 5844 cmp rbx,32 5845 jb NEAR $L$open_avx2_tail_32_xor 5846 sub rbx,32 5847 vpxor ymm0,ymm0,YMMWORD[rsi] 5848 vmovdqu YMMWORD[rdi],ymm0 5849 lea rsi,[32+rsi] 5850 lea rdi,[32+rdi] 5851 vmovdqa ymm0,ymm4 5852 vmovdqa ymm4,ymm8 5853 vmovdqa ymm8,ymm12 5854 jmp NEAR $L$open_avx2_tail_128_xor 5855$L$open_avx2_tail_32_xor: 5856 cmp rbx,16 5857 vmovdqa xmm1,xmm0 5858 jb NEAR $L$open_avx2_exit 5859 sub rbx,16 5860 5861 vpxor xmm1,xmm0,XMMWORD[rsi] 5862 vmovdqu XMMWORD[rdi],xmm1 5863 lea rsi,[16+rsi] 5864 lea rdi,[16+rdi] 5865 vperm2i128 ymm0,ymm0,ymm0,0x11 5866 vmovdqa xmm1,xmm0 5867$L$open_avx2_exit: 5868 vzeroupper 5869 jmp NEAR $L$open_sse_tail_16 5870 5871$L$open_avx2_192: 5872 vmovdqa ymm1,ymm0 5873 vmovdqa ymm2,ymm0 5874 vmovdqa ymm5,ymm4 5875 vmovdqa ymm6,ymm4 5876 vmovdqa ymm9,ymm8 5877 vmovdqa ymm10,ymm8 5878 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 5879 vmovdqa ymm11,ymm12 5880 vmovdqa ymm15,ymm13 5881 mov r10,10 5882$L$open_avx2_192_rounds: 5883 vpaddd ymm0,ymm0,ymm4 5884 vpxor ymm12,ymm12,ymm0 5885 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5886 vpaddd ymm8,ymm8,ymm12 5887 vpxor ymm4,ymm4,ymm8 5888 vpsrld ymm3,ymm4,20 5889 vpslld ymm4,ymm4,12 5890 vpxor ymm4,ymm4,ymm3 5891 vpaddd ymm0,ymm0,ymm4 5892 vpxor ymm12,ymm12,ymm0 5893 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5894 vpaddd ymm8,ymm8,ymm12 5895 vpxor ymm4,ymm4,ymm8 5896 vpslld ymm3,ymm4,7 5897 vpsrld ymm4,ymm4,25 5898 vpxor ymm4,ymm4,ymm3 5899 vpalignr ymm12,ymm12,ymm12,12 5900 vpalignr ymm8,ymm8,ymm8,8 5901 vpalignr ymm4,ymm4,ymm4,4 5902 vpaddd ymm1,ymm1,ymm5 5903 vpxor ymm13,ymm13,ymm1 5904 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5905 vpaddd ymm9,ymm9,ymm13 5906 vpxor ymm5,ymm5,ymm9 5907 vpsrld ymm3,ymm5,20 5908 vpslld ymm5,ymm5,12 5909 vpxor ymm5,ymm5,ymm3 5910 vpaddd ymm1,ymm1,ymm5 5911 vpxor ymm13,ymm13,ymm1 5912 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5913 vpaddd ymm9,ymm9,ymm13 5914 vpxor ymm5,ymm5,ymm9 5915 vpslld ymm3,ymm5,7 5916 vpsrld ymm5,ymm5,25 5917 vpxor ymm5,ymm5,ymm3 5918 vpalignr ymm13,ymm13,ymm13,12 5919 vpalignr ymm9,ymm9,ymm9,8 5920 vpalignr ymm5,ymm5,ymm5,4 5921 vpaddd ymm0,ymm0,ymm4 5922 vpxor ymm12,ymm12,ymm0 5923 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5924 vpaddd ymm8,ymm8,ymm12 5925 vpxor ymm4,ymm4,ymm8 5926 vpsrld ymm3,ymm4,20 5927 vpslld ymm4,ymm4,12 5928 vpxor ymm4,ymm4,ymm3 5929 vpaddd ymm0,ymm0,ymm4 5930 vpxor ymm12,ymm12,ymm0 5931 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5932 vpaddd ymm8,ymm8,ymm12 5933 vpxor ymm4,ymm4,ymm8 5934 vpslld ymm3,ymm4,7 5935 vpsrld ymm4,ymm4,25 5936 vpxor ymm4,ymm4,ymm3 5937 vpalignr ymm12,ymm12,ymm12,4 5938 vpalignr ymm8,ymm8,ymm8,8 5939 vpalignr ymm4,ymm4,ymm4,12 5940 vpaddd ymm1,ymm1,ymm5 5941 vpxor ymm13,ymm13,ymm1 5942 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5943 vpaddd ymm9,ymm9,ymm13 5944 vpxor ymm5,ymm5,ymm9 5945 vpsrld ymm3,ymm5,20 5946 vpslld ymm5,ymm5,12 5947 vpxor ymm5,ymm5,ymm3 5948 vpaddd ymm1,ymm1,ymm5 5949 vpxor ymm13,ymm13,ymm1 5950 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5951 vpaddd ymm9,ymm9,ymm13 5952 vpxor ymm5,ymm5,ymm9 5953 vpslld ymm3,ymm5,7 5954 vpsrld ymm5,ymm5,25 5955 vpxor ymm5,ymm5,ymm3 5956 vpalignr ymm13,ymm13,ymm13,4 5957 vpalignr ymm9,ymm9,ymm9,8 5958 vpalignr ymm5,ymm5,ymm5,12 5959 5960 dec r10 5961 jne NEAR $L$open_avx2_192_rounds 5962 vpaddd ymm0,ymm0,ymm2 5963 vpaddd ymm1,ymm1,ymm2 5964 vpaddd ymm4,ymm4,ymm6 5965 vpaddd ymm5,ymm5,ymm6 5966 vpaddd ymm8,ymm8,ymm10 5967 vpaddd ymm9,ymm9,ymm10 5968 vpaddd ymm12,ymm12,ymm11 5969 vpaddd ymm13,ymm13,ymm15 5970 vperm2i128 ymm3,ymm4,ymm0,0x02 5971 5972 vpand ymm3,ymm3,YMMWORD[$L$clamp] 5973 vmovdqa YMMWORD[(160+0)+rbp],ymm3 5974 5975 vperm2i128 ymm0,ymm4,ymm0,0x13 5976 vperm2i128 ymm4,ymm12,ymm8,0x13 5977 vperm2i128 ymm8,ymm5,ymm1,0x02 5978 vperm2i128 ymm12,ymm13,ymm9,0x02 5979 vperm2i128 ymm1,ymm5,ymm1,0x13 5980 vperm2i128 ymm5,ymm13,ymm9,0x13 5981$L$open_avx2_short: 5982 mov r8,r8 5983 call poly_hash_ad_internal 5984$L$open_avx2_short_hash_and_xor_loop: 5985 cmp rbx,32 5986 jb NEAR $L$open_avx2_short_tail_32 5987 sub rbx,32 5988 add r10,QWORD[((0+0))+rsi] 5989 adc r11,QWORD[((8+0))+rsi] 5990 adc r12,1 5991 mov rax,QWORD[((0+160+0))+rbp] 5992 mov r15,rax 5993 mul r10 5994 mov r13,rax 5995 mov r14,rdx 5996 mov rax,QWORD[((0+160+0))+rbp] 5997 mul r11 5998 imul r15,r12 5999 add r14,rax 6000 adc r15,rdx 6001 mov rax,QWORD[((8+160+0))+rbp] 6002 mov r9,rax 6003 mul r10 6004 add r14,rax 6005 adc rdx,0 6006 mov r10,rdx 6007 mov rax,QWORD[((8+160+0))+rbp] 6008 mul r11 6009 add r15,rax 6010 adc rdx,0 6011 imul r9,r12 6012 add r15,r10 6013 adc r9,rdx 6014 mov r10,r13 6015 mov r11,r14 6016 mov r12,r15 6017 and r12,3 6018 mov r13,r15 6019 and r13,-4 6020 mov r14,r9 6021 shrd r15,r9,2 6022 shr r9,2 6023 add r15,r13 6024 adc r9,r14 6025 add r10,r15 6026 adc r11,r9 6027 adc r12,0 6028 add r10,QWORD[((0+16))+rsi] 6029 adc r11,QWORD[((8+16))+rsi] 6030 adc r12,1 6031 mov rax,QWORD[((0+160+0))+rbp] 6032 mov r15,rax 6033 mul r10 6034 mov r13,rax 6035 mov r14,rdx 6036 mov rax,QWORD[((0+160+0))+rbp] 6037 mul r11 6038 imul r15,r12 6039 add r14,rax 6040 adc r15,rdx 6041 mov rax,QWORD[((8+160+0))+rbp] 6042 mov r9,rax 6043 mul r10 6044 add r14,rax 6045 adc rdx,0 6046 mov r10,rdx 6047 mov rax,QWORD[((8+160+0))+rbp] 6048 mul r11 6049 add r15,rax 6050 adc rdx,0 6051 imul r9,r12 6052 add r15,r10 6053 adc r9,rdx 6054 mov r10,r13 6055 mov r11,r14 6056 mov r12,r15 6057 and r12,3 6058 mov r13,r15 6059 and r13,-4 6060 mov r14,r9 6061 shrd r15,r9,2 6062 shr r9,2 6063 add r15,r13 6064 adc r9,r14 6065 add r10,r15 6066 adc r11,r9 6067 adc r12,0 6068 6069 6070 vpxor ymm0,ymm0,YMMWORD[rsi] 6071 vmovdqu YMMWORD[rdi],ymm0 6072 lea rsi,[32+rsi] 6073 lea rdi,[32+rdi] 6074 6075 vmovdqa ymm0,ymm4 6076 vmovdqa ymm4,ymm8 6077 vmovdqa ymm8,ymm12 6078 vmovdqa ymm12,ymm1 6079 vmovdqa ymm1,ymm5 6080 vmovdqa ymm5,ymm9 6081 vmovdqa ymm9,ymm13 6082 vmovdqa ymm13,ymm2 6083 vmovdqa ymm2,ymm6 6084 jmp NEAR $L$open_avx2_short_hash_and_xor_loop 6085$L$open_avx2_short_tail_32: 6086 cmp rbx,16 6087 vmovdqa xmm1,xmm0 6088 jb NEAR $L$open_avx2_short_tail_32_exit 6089 sub rbx,16 6090 add r10,QWORD[((0+0))+rsi] 6091 adc r11,QWORD[((8+0))+rsi] 6092 adc r12,1 6093 mov rax,QWORD[((0+160+0))+rbp] 6094 mov r15,rax 6095 mul r10 6096 mov r13,rax 6097 mov r14,rdx 6098 mov rax,QWORD[((0+160+0))+rbp] 6099 mul r11 6100 imul r15,r12 6101 add r14,rax 6102 adc r15,rdx 6103 mov rax,QWORD[((8+160+0))+rbp] 6104 mov r9,rax 6105 mul r10 6106 add r14,rax 6107 adc rdx,0 6108 mov r10,rdx 6109 mov rax,QWORD[((8+160+0))+rbp] 6110 mul r11 6111 add r15,rax 6112 adc rdx,0 6113 imul r9,r12 6114 add r15,r10 6115 adc r9,rdx 6116 mov r10,r13 6117 mov r11,r14 6118 mov r12,r15 6119 and r12,3 6120 mov r13,r15 6121 and r13,-4 6122 mov r14,r9 6123 shrd r15,r9,2 6124 shr r9,2 6125 add r15,r13 6126 adc r9,r14 6127 add r10,r15 6128 adc r11,r9 6129 adc r12,0 6130 6131 vpxor xmm3,xmm0,XMMWORD[rsi] 6132 vmovdqu XMMWORD[rdi],xmm3 6133 lea rsi,[16+rsi] 6134 lea rdi,[16+rdi] 6135 vextracti128 xmm1,ymm0,1 6136$L$open_avx2_short_tail_32_exit: 6137 vzeroupper 6138 jmp NEAR $L$open_sse_tail_16 6139 6140$L$open_avx2_320: 6141 vmovdqa ymm1,ymm0 6142 vmovdqa ymm2,ymm0 6143 vmovdqa ymm5,ymm4 6144 vmovdqa ymm6,ymm4 6145 vmovdqa ymm9,ymm8 6146 vmovdqa ymm10,ymm8 6147 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 6148 vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc] 6149 vmovdqa ymm7,ymm4 6150 vmovdqa ymm11,ymm8 6151 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6152 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6153 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6154 mov r10,10 6155$L$open_avx2_320_rounds: 6156 vpaddd ymm0,ymm0,ymm4 6157 vpxor ymm12,ymm12,ymm0 6158 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 6159 vpaddd ymm8,ymm8,ymm12 6160 vpxor ymm4,ymm4,ymm8 6161 vpsrld ymm3,ymm4,20 6162 vpslld ymm4,ymm4,12 6163 vpxor ymm4,ymm4,ymm3 6164 vpaddd ymm0,ymm0,ymm4 6165 vpxor ymm12,ymm12,ymm0 6166 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 6167 vpaddd ymm8,ymm8,ymm12 6168 vpxor ymm4,ymm4,ymm8 6169 vpslld ymm3,ymm4,7 6170 vpsrld ymm4,ymm4,25 6171 vpxor ymm4,ymm4,ymm3 6172 vpalignr ymm12,ymm12,ymm12,12 6173 vpalignr ymm8,ymm8,ymm8,8 6174 vpalignr ymm4,ymm4,ymm4,4 6175 vpaddd ymm1,ymm1,ymm5 6176 vpxor ymm13,ymm13,ymm1 6177 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 6178 vpaddd ymm9,ymm9,ymm13 6179 vpxor ymm5,ymm5,ymm9 6180 vpsrld ymm3,ymm5,20 6181 vpslld ymm5,ymm5,12 6182 vpxor ymm5,ymm5,ymm3 6183 vpaddd ymm1,ymm1,ymm5 6184 vpxor ymm13,ymm13,ymm1 6185 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 6186 vpaddd ymm9,ymm9,ymm13 6187 vpxor ymm5,ymm5,ymm9 6188 vpslld ymm3,ymm5,7 6189 vpsrld ymm5,ymm5,25 6190 vpxor ymm5,ymm5,ymm3 6191 vpalignr ymm13,ymm13,ymm13,12 6192 vpalignr ymm9,ymm9,ymm9,8 6193 vpalignr ymm5,ymm5,ymm5,4 6194 vpaddd ymm2,ymm2,ymm6 6195 vpxor ymm14,ymm14,ymm2 6196 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 6197 vpaddd ymm10,ymm10,ymm14 6198 vpxor ymm6,ymm6,ymm10 6199 vpsrld ymm3,ymm6,20 6200 vpslld ymm6,ymm6,12 6201 vpxor ymm6,ymm6,ymm3 6202 vpaddd ymm2,ymm2,ymm6 6203 vpxor ymm14,ymm14,ymm2 6204 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 6205 vpaddd ymm10,ymm10,ymm14 6206 vpxor ymm6,ymm6,ymm10 6207 vpslld ymm3,ymm6,7 6208 vpsrld ymm6,ymm6,25 6209 vpxor ymm6,ymm6,ymm3 6210 vpalignr ymm14,ymm14,ymm14,12 6211 vpalignr ymm10,ymm10,ymm10,8 6212 vpalignr ymm6,ymm6,ymm6,4 6213 vpaddd ymm0,ymm0,ymm4 6214 vpxor ymm12,ymm12,ymm0 6215 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 6216 vpaddd ymm8,ymm8,ymm12 6217 vpxor ymm4,ymm4,ymm8 6218 vpsrld ymm3,ymm4,20 6219 vpslld ymm4,ymm4,12 6220 vpxor ymm4,ymm4,ymm3 6221 vpaddd ymm0,ymm0,ymm4 6222 vpxor ymm12,ymm12,ymm0 6223 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 6224 vpaddd ymm8,ymm8,ymm12 6225 vpxor ymm4,ymm4,ymm8 6226 vpslld ymm3,ymm4,7 6227 vpsrld ymm4,ymm4,25 6228 vpxor ymm4,ymm4,ymm3 6229 vpalignr ymm12,ymm12,ymm12,4 6230 vpalignr ymm8,ymm8,ymm8,8 6231 vpalignr ymm4,ymm4,ymm4,12 6232 vpaddd ymm1,ymm1,ymm5 6233 vpxor ymm13,ymm13,ymm1 6234 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 6235 vpaddd ymm9,ymm9,ymm13 6236 vpxor ymm5,ymm5,ymm9 6237 vpsrld ymm3,ymm5,20 6238 vpslld ymm5,ymm5,12 6239 vpxor ymm5,ymm5,ymm3 6240 vpaddd ymm1,ymm1,ymm5 6241 vpxor ymm13,ymm13,ymm1 6242 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 6243 vpaddd ymm9,ymm9,ymm13 6244 vpxor ymm5,ymm5,ymm9 6245 vpslld ymm3,ymm5,7 6246 vpsrld ymm5,ymm5,25 6247 vpxor ymm5,ymm5,ymm3 6248 vpalignr ymm13,ymm13,ymm13,4 6249 vpalignr ymm9,ymm9,ymm9,8 6250 vpalignr ymm5,ymm5,ymm5,12 6251 vpaddd ymm2,ymm2,ymm6 6252 vpxor ymm14,ymm14,ymm2 6253 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 6254 vpaddd ymm10,ymm10,ymm14 6255 vpxor ymm6,ymm6,ymm10 6256 vpsrld ymm3,ymm6,20 6257 vpslld ymm6,ymm6,12 6258 vpxor ymm6,ymm6,ymm3 6259 vpaddd ymm2,ymm2,ymm6 6260 vpxor ymm14,ymm14,ymm2 6261 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 6262 vpaddd ymm10,ymm10,ymm14 6263 vpxor ymm6,ymm6,ymm10 6264 vpslld ymm3,ymm6,7 6265 vpsrld ymm6,ymm6,25 6266 vpxor ymm6,ymm6,ymm3 6267 vpalignr ymm14,ymm14,ymm14,4 6268 vpalignr ymm10,ymm10,ymm10,8 6269 vpalignr ymm6,ymm6,ymm6,12 6270 6271 dec r10 6272 jne NEAR $L$open_avx2_320_rounds 6273 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 6274 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 6275 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 6276 vpaddd ymm4,ymm4,ymm7 6277 vpaddd ymm5,ymm5,ymm7 6278 vpaddd ymm6,ymm6,ymm7 6279 vpaddd ymm8,ymm8,ymm11 6280 vpaddd ymm9,ymm9,ymm11 6281 vpaddd ymm10,ymm10,ymm11 6282 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 6283 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 6284 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 6285 vperm2i128 ymm3,ymm4,ymm0,0x02 6286 6287 vpand ymm3,ymm3,YMMWORD[$L$clamp] 6288 vmovdqa YMMWORD[(160+0)+rbp],ymm3 6289 6290 vperm2i128 ymm0,ymm4,ymm0,0x13 6291 vperm2i128 ymm4,ymm12,ymm8,0x13 6292 vperm2i128 ymm8,ymm5,ymm1,0x02 6293 vperm2i128 ymm12,ymm13,ymm9,0x02 6294 vperm2i128 ymm1,ymm5,ymm1,0x13 6295 vperm2i128 ymm5,ymm13,ymm9,0x13 6296 vperm2i128 ymm9,ymm6,ymm2,0x02 6297 vperm2i128 ymm13,ymm14,ymm10,0x02 6298 vperm2i128 ymm2,ymm6,ymm2,0x13 6299 vperm2i128 ymm6,ymm14,ymm10,0x13 6300 jmp NEAR $L$open_avx2_short 6301 6302 6303 6304 6305 6306ALIGN 64 6307chacha20_poly1305_seal_avx2: 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 vzeroupper 6321 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 6322 vbroadcasti128 ymm4,XMMWORD[r9] 6323 vbroadcasti128 ymm8,XMMWORD[16+r9] 6324 vbroadcasti128 ymm12,XMMWORD[32+r9] 6325 vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init] 6326 cmp rbx,6*32 6327 jbe NEAR $L$seal_avx2_192 6328 cmp rbx,10*32 6329 jbe NEAR $L$seal_avx2_320 6330 vmovdqa ymm1,ymm0 6331 vmovdqa ymm2,ymm0 6332 vmovdqa ymm3,ymm0 6333 vmovdqa ymm5,ymm4 6334 vmovdqa ymm6,ymm4 6335 vmovdqa ymm7,ymm4 6336 vmovdqa YMMWORD[(160+64)+rbp],ymm4 6337 vmovdqa ymm9,ymm8 6338 vmovdqa ymm10,ymm8 6339 vmovdqa ymm11,ymm8 6340 vmovdqa YMMWORD[(160+96)+rbp],ymm8 6341 vmovdqa ymm15,ymm12 6342 vpaddd ymm14,ymm15,YMMWORD[$L$avx2_inc] 6343 vpaddd ymm13,ymm14,YMMWORD[$L$avx2_inc] 6344 vpaddd ymm12,ymm13,YMMWORD[$L$avx2_inc] 6345 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6346 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6347 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6348 vmovdqa YMMWORD[(160+256)+rbp],ymm15 6349 mov r10,10 6350$L$seal_avx2_init_rounds: 6351 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6352 vmovdqa ymm8,YMMWORD[$L$rol16] 6353 vpaddd ymm3,ymm3,ymm7 6354 vpaddd ymm2,ymm2,ymm6 6355 vpaddd ymm1,ymm1,ymm5 6356 vpaddd ymm0,ymm0,ymm4 6357 vpxor ymm15,ymm15,ymm3 6358 vpxor ymm14,ymm14,ymm2 6359 vpxor ymm13,ymm13,ymm1 6360 vpxor ymm12,ymm12,ymm0 6361 vpshufb ymm15,ymm15,ymm8 6362 vpshufb ymm14,ymm14,ymm8 6363 vpshufb ymm13,ymm13,ymm8 6364 vpshufb ymm12,ymm12,ymm8 6365 vpaddd ymm11,ymm11,ymm15 6366 vpaddd ymm10,ymm10,ymm14 6367 vpaddd ymm9,ymm9,ymm13 6368 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6369 vpxor ymm7,ymm7,ymm11 6370 vpxor ymm6,ymm6,ymm10 6371 vpxor ymm5,ymm5,ymm9 6372 vpxor ymm4,ymm4,ymm8 6373 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6374 vpsrld ymm8,ymm7,20 6375 vpslld ymm7,ymm7,32-20 6376 vpxor ymm7,ymm7,ymm8 6377 vpsrld ymm8,ymm6,20 6378 vpslld ymm6,ymm6,32-20 6379 vpxor ymm6,ymm6,ymm8 6380 vpsrld ymm8,ymm5,20 6381 vpslld ymm5,ymm5,32-20 6382 vpxor ymm5,ymm5,ymm8 6383 vpsrld ymm8,ymm4,20 6384 vpslld ymm4,ymm4,32-20 6385 vpxor ymm4,ymm4,ymm8 6386 vmovdqa ymm8,YMMWORD[$L$rol8] 6387 vpaddd ymm3,ymm3,ymm7 6388 vpaddd ymm2,ymm2,ymm6 6389 vpaddd ymm1,ymm1,ymm5 6390 vpaddd ymm0,ymm0,ymm4 6391 vpxor ymm15,ymm15,ymm3 6392 vpxor ymm14,ymm14,ymm2 6393 vpxor ymm13,ymm13,ymm1 6394 vpxor ymm12,ymm12,ymm0 6395 vpshufb ymm15,ymm15,ymm8 6396 vpshufb ymm14,ymm14,ymm8 6397 vpshufb ymm13,ymm13,ymm8 6398 vpshufb ymm12,ymm12,ymm8 6399 vpaddd ymm11,ymm11,ymm15 6400 vpaddd ymm10,ymm10,ymm14 6401 vpaddd ymm9,ymm9,ymm13 6402 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6403 vpxor ymm7,ymm7,ymm11 6404 vpxor ymm6,ymm6,ymm10 6405 vpxor ymm5,ymm5,ymm9 6406 vpxor ymm4,ymm4,ymm8 6407 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6408 vpsrld ymm8,ymm7,25 6409 vpslld ymm7,ymm7,32-25 6410 vpxor ymm7,ymm7,ymm8 6411 vpsrld ymm8,ymm6,25 6412 vpslld ymm6,ymm6,32-25 6413 vpxor ymm6,ymm6,ymm8 6414 vpsrld ymm8,ymm5,25 6415 vpslld ymm5,ymm5,32-25 6416 vpxor ymm5,ymm5,ymm8 6417 vpsrld ymm8,ymm4,25 6418 vpslld ymm4,ymm4,32-25 6419 vpxor ymm4,ymm4,ymm8 6420 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6421 vpalignr ymm7,ymm7,ymm7,4 6422 vpalignr ymm11,ymm11,ymm11,8 6423 vpalignr ymm15,ymm15,ymm15,12 6424 vpalignr ymm6,ymm6,ymm6,4 6425 vpalignr ymm10,ymm10,ymm10,8 6426 vpalignr ymm14,ymm14,ymm14,12 6427 vpalignr ymm5,ymm5,ymm5,4 6428 vpalignr ymm9,ymm9,ymm9,8 6429 vpalignr ymm13,ymm13,ymm13,12 6430 vpalignr ymm4,ymm4,ymm4,4 6431 vpalignr ymm8,ymm8,ymm8,8 6432 vpalignr ymm12,ymm12,ymm12,12 6433 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6434 vmovdqa ymm8,YMMWORD[$L$rol16] 6435 vpaddd ymm3,ymm3,ymm7 6436 vpaddd ymm2,ymm2,ymm6 6437 vpaddd ymm1,ymm1,ymm5 6438 vpaddd ymm0,ymm0,ymm4 6439 vpxor ymm15,ymm15,ymm3 6440 vpxor ymm14,ymm14,ymm2 6441 vpxor ymm13,ymm13,ymm1 6442 vpxor ymm12,ymm12,ymm0 6443 vpshufb ymm15,ymm15,ymm8 6444 vpshufb ymm14,ymm14,ymm8 6445 vpshufb ymm13,ymm13,ymm8 6446 vpshufb ymm12,ymm12,ymm8 6447 vpaddd ymm11,ymm11,ymm15 6448 vpaddd ymm10,ymm10,ymm14 6449 vpaddd ymm9,ymm9,ymm13 6450 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6451 vpxor ymm7,ymm7,ymm11 6452 vpxor ymm6,ymm6,ymm10 6453 vpxor ymm5,ymm5,ymm9 6454 vpxor ymm4,ymm4,ymm8 6455 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6456 vpsrld ymm8,ymm7,20 6457 vpslld ymm7,ymm7,32-20 6458 vpxor ymm7,ymm7,ymm8 6459 vpsrld ymm8,ymm6,20 6460 vpslld ymm6,ymm6,32-20 6461 vpxor ymm6,ymm6,ymm8 6462 vpsrld ymm8,ymm5,20 6463 vpslld ymm5,ymm5,32-20 6464 vpxor ymm5,ymm5,ymm8 6465 vpsrld ymm8,ymm4,20 6466 vpslld ymm4,ymm4,32-20 6467 vpxor ymm4,ymm4,ymm8 6468 vmovdqa ymm8,YMMWORD[$L$rol8] 6469 vpaddd ymm3,ymm3,ymm7 6470 vpaddd ymm2,ymm2,ymm6 6471 vpaddd ymm1,ymm1,ymm5 6472 vpaddd ymm0,ymm0,ymm4 6473 vpxor ymm15,ymm15,ymm3 6474 vpxor ymm14,ymm14,ymm2 6475 vpxor ymm13,ymm13,ymm1 6476 vpxor ymm12,ymm12,ymm0 6477 vpshufb ymm15,ymm15,ymm8 6478 vpshufb ymm14,ymm14,ymm8 6479 vpshufb ymm13,ymm13,ymm8 6480 vpshufb ymm12,ymm12,ymm8 6481 vpaddd ymm11,ymm11,ymm15 6482 vpaddd ymm10,ymm10,ymm14 6483 vpaddd ymm9,ymm9,ymm13 6484 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6485 vpxor ymm7,ymm7,ymm11 6486 vpxor ymm6,ymm6,ymm10 6487 vpxor ymm5,ymm5,ymm9 6488 vpxor ymm4,ymm4,ymm8 6489 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6490 vpsrld ymm8,ymm7,25 6491 vpslld ymm7,ymm7,32-25 6492 vpxor ymm7,ymm7,ymm8 6493 vpsrld ymm8,ymm6,25 6494 vpslld ymm6,ymm6,32-25 6495 vpxor ymm6,ymm6,ymm8 6496 vpsrld ymm8,ymm5,25 6497 vpslld ymm5,ymm5,32-25 6498 vpxor ymm5,ymm5,ymm8 6499 vpsrld ymm8,ymm4,25 6500 vpslld ymm4,ymm4,32-25 6501 vpxor ymm4,ymm4,ymm8 6502 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6503 vpalignr ymm7,ymm7,ymm7,12 6504 vpalignr ymm11,ymm11,ymm11,8 6505 vpalignr ymm15,ymm15,ymm15,4 6506 vpalignr ymm6,ymm6,ymm6,12 6507 vpalignr ymm10,ymm10,ymm10,8 6508 vpalignr ymm14,ymm14,ymm14,4 6509 vpalignr ymm5,ymm5,ymm5,12 6510 vpalignr ymm9,ymm9,ymm9,8 6511 vpalignr ymm13,ymm13,ymm13,4 6512 vpalignr ymm4,ymm4,ymm4,12 6513 vpalignr ymm8,ymm8,ymm8,8 6514 vpalignr ymm12,ymm12,ymm12,4 6515 6516 dec r10 6517 jnz NEAR $L$seal_avx2_init_rounds 6518 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 6519 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 6520 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 6521 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 6522 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 6523 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 6524 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 6525 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 6526 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 6527 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 6528 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 6529 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 6530 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 6531 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 6532 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 6533 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 6534 6535 vperm2i128 ymm11,ymm15,ymm11,0x13 6536 vperm2i128 ymm15,ymm7,ymm3,0x02 6537 vperm2i128 ymm3,ymm7,ymm3,0x13 6538 vpand ymm15,ymm15,YMMWORD[$L$clamp] 6539 vmovdqa YMMWORD[(160+0)+rbp],ymm15 6540 mov r8,r8 6541 call poly_hash_ad_internal 6542 6543 vpxor ymm3,ymm3,YMMWORD[rsi] 6544 vpxor ymm11,ymm11,YMMWORD[32+rsi] 6545 vmovdqu YMMWORD[rdi],ymm3 6546 vmovdqu YMMWORD[32+rdi],ymm11 6547 vperm2i128 ymm15,ymm6,ymm2,0x02 6548 vperm2i128 ymm6,ymm6,ymm2,0x13 6549 vperm2i128 ymm2,ymm14,ymm10,0x02 6550 vperm2i128 ymm10,ymm14,ymm10,0x13 6551 vpxor ymm15,ymm15,YMMWORD[((0+64))+rsi] 6552 vpxor ymm2,ymm2,YMMWORD[((32+64))+rsi] 6553 vpxor ymm6,ymm6,YMMWORD[((64+64))+rsi] 6554 vpxor ymm10,ymm10,YMMWORD[((96+64))+rsi] 6555 vmovdqu YMMWORD[(0+64)+rdi],ymm15 6556 vmovdqu YMMWORD[(32+64)+rdi],ymm2 6557 vmovdqu YMMWORD[(64+64)+rdi],ymm6 6558 vmovdqu YMMWORD[(96+64)+rdi],ymm10 6559 vperm2i128 ymm15,ymm5,ymm1,0x02 6560 vperm2i128 ymm5,ymm5,ymm1,0x13 6561 vperm2i128 ymm1,ymm13,ymm9,0x02 6562 vperm2i128 ymm9,ymm13,ymm9,0x13 6563 vpxor ymm15,ymm15,YMMWORD[((0+192))+rsi] 6564 vpxor ymm1,ymm1,YMMWORD[((32+192))+rsi] 6565 vpxor ymm5,ymm5,YMMWORD[((64+192))+rsi] 6566 vpxor ymm9,ymm9,YMMWORD[((96+192))+rsi] 6567 vmovdqu YMMWORD[(0+192)+rdi],ymm15 6568 vmovdqu YMMWORD[(32+192)+rdi],ymm1 6569 vmovdqu YMMWORD[(64+192)+rdi],ymm5 6570 vmovdqu YMMWORD[(96+192)+rdi],ymm9 6571 vperm2i128 ymm15,ymm4,ymm0,0x13 6572 vperm2i128 ymm0,ymm4,ymm0,0x02 6573 vperm2i128 ymm4,ymm12,ymm8,0x02 6574 vperm2i128 ymm12,ymm12,ymm8,0x13 6575 vmovdqa ymm8,ymm15 6576 6577 lea rsi,[320+rsi] 6578 sub rbx,10*32 6579 mov rcx,10*32 6580 cmp rbx,4*32 6581 jbe NEAR $L$seal_avx2_short_hash_remainder 6582 vpxor ymm0,ymm0,YMMWORD[rsi] 6583 vpxor ymm4,ymm4,YMMWORD[32+rsi] 6584 vpxor ymm8,ymm8,YMMWORD[64+rsi] 6585 vpxor ymm12,ymm12,YMMWORD[96+rsi] 6586 vmovdqu YMMWORD[320+rdi],ymm0 6587 vmovdqu YMMWORD[352+rdi],ymm4 6588 vmovdqu YMMWORD[384+rdi],ymm8 6589 vmovdqu YMMWORD[416+rdi],ymm12 6590 lea rsi,[128+rsi] 6591 sub rbx,4*32 6592 mov rcx,8 6593 mov r8,2 6594 cmp rbx,4*32 6595 jbe NEAR $L$seal_avx2_tail_128 6596 cmp rbx,8*32 6597 jbe NEAR $L$seal_avx2_tail_256 6598 cmp rbx,12*32 6599 jbe NEAR $L$seal_avx2_tail_384 6600 cmp rbx,16*32 6601 jbe NEAR $L$seal_avx2_tail_512 6602 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 6603 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 6604 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 6605 vmovdqa ymm1,ymm0 6606 vmovdqa ymm5,ymm4 6607 vmovdqa ymm9,ymm8 6608 vmovdqa ymm2,ymm0 6609 vmovdqa ymm6,ymm4 6610 vmovdqa ymm10,ymm8 6611 vmovdqa ymm3,ymm0 6612 vmovdqa ymm7,ymm4 6613 vmovdqa ymm11,ymm8 6614 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 6615 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 6616 vpaddd ymm14,ymm12,ymm15 6617 vpaddd ymm13,ymm12,ymm14 6618 vpaddd ymm12,ymm12,ymm13 6619 vmovdqa YMMWORD[(160+256)+rbp],ymm15 6620 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6621 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6622 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6623 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6624 vmovdqa ymm8,YMMWORD[$L$rol16] 6625 vpaddd ymm3,ymm3,ymm7 6626 vpaddd ymm2,ymm2,ymm6 6627 vpaddd ymm1,ymm1,ymm5 6628 vpaddd ymm0,ymm0,ymm4 6629 vpxor ymm15,ymm15,ymm3 6630 vpxor ymm14,ymm14,ymm2 6631 vpxor ymm13,ymm13,ymm1 6632 vpxor ymm12,ymm12,ymm0 6633 vpshufb ymm15,ymm15,ymm8 6634 vpshufb ymm14,ymm14,ymm8 6635 vpshufb ymm13,ymm13,ymm8 6636 vpshufb ymm12,ymm12,ymm8 6637 vpaddd ymm11,ymm11,ymm15 6638 vpaddd ymm10,ymm10,ymm14 6639 vpaddd ymm9,ymm9,ymm13 6640 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6641 vpxor ymm7,ymm7,ymm11 6642 vpxor ymm6,ymm6,ymm10 6643 vpxor ymm5,ymm5,ymm9 6644 vpxor ymm4,ymm4,ymm8 6645 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6646 vpsrld ymm8,ymm7,20 6647 vpslld ymm7,ymm7,32-20 6648 vpxor ymm7,ymm7,ymm8 6649 vpsrld ymm8,ymm6,20 6650 vpslld ymm6,ymm6,32-20 6651 vpxor ymm6,ymm6,ymm8 6652 vpsrld ymm8,ymm5,20 6653 vpslld ymm5,ymm5,32-20 6654 vpxor ymm5,ymm5,ymm8 6655 vpsrld ymm8,ymm4,20 6656 vpslld ymm4,ymm4,32-20 6657 vpxor ymm4,ymm4,ymm8 6658 vmovdqa ymm8,YMMWORD[$L$rol8] 6659 vpaddd ymm3,ymm3,ymm7 6660 vpaddd ymm2,ymm2,ymm6 6661 vpaddd ymm1,ymm1,ymm5 6662 vpaddd ymm0,ymm0,ymm4 6663 vpxor ymm15,ymm15,ymm3 6664 vpxor ymm14,ymm14,ymm2 6665 vpxor ymm13,ymm13,ymm1 6666 vpxor ymm12,ymm12,ymm0 6667 vpshufb ymm15,ymm15,ymm8 6668 vpshufb ymm14,ymm14,ymm8 6669 vpshufb ymm13,ymm13,ymm8 6670 vpshufb ymm12,ymm12,ymm8 6671 vpaddd ymm11,ymm11,ymm15 6672 vpaddd ymm10,ymm10,ymm14 6673 vpaddd ymm9,ymm9,ymm13 6674 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6675 vpxor ymm7,ymm7,ymm11 6676 vpxor ymm6,ymm6,ymm10 6677 vpxor ymm5,ymm5,ymm9 6678 vpxor ymm4,ymm4,ymm8 6679 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6680 vpsrld ymm8,ymm7,25 6681 vpslld ymm7,ymm7,32-25 6682 vpxor ymm7,ymm7,ymm8 6683 vpsrld ymm8,ymm6,25 6684 vpslld ymm6,ymm6,32-25 6685 vpxor ymm6,ymm6,ymm8 6686 vpsrld ymm8,ymm5,25 6687 vpslld ymm5,ymm5,32-25 6688 vpxor ymm5,ymm5,ymm8 6689 vpsrld ymm8,ymm4,25 6690 vpslld ymm4,ymm4,32-25 6691 vpxor ymm4,ymm4,ymm8 6692 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6693 vpalignr ymm7,ymm7,ymm7,4 6694 vpalignr ymm11,ymm11,ymm11,8 6695 vpalignr ymm15,ymm15,ymm15,12 6696 vpalignr ymm6,ymm6,ymm6,4 6697 vpalignr ymm10,ymm10,ymm10,8 6698 vpalignr ymm14,ymm14,ymm14,12 6699 vpalignr ymm5,ymm5,ymm5,4 6700 vpalignr ymm9,ymm9,ymm9,8 6701 vpalignr ymm13,ymm13,ymm13,12 6702 vpalignr ymm4,ymm4,ymm4,4 6703 vpalignr ymm8,ymm8,ymm8,8 6704 vpalignr ymm12,ymm12,ymm12,12 6705 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6706 vmovdqa ymm8,YMMWORD[$L$rol16] 6707 vpaddd ymm3,ymm3,ymm7 6708 vpaddd ymm2,ymm2,ymm6 6709 vpaddd ymm1,ymm1,ymm5 6710 vpaddd ymm0,ymm0,ymm4 6711 vpxor ymm15,ymm15,ymm3 6712 vpxor ymm14,ymm14,ymm2 6713 vpxor ymm13,ymm13,ymm1 6714 vpxor ymm12,ymm12,ymm0 6715 vpshufb ymm15,ymm15,ymm8 6716 vpshufb ymm14,ymm14,ymm8 6717 vpshufb ymm13,ymm13,ymm8 6718 vpshufb ymm12,ymm12,ymm8 6719 vpaddd ymm11,ymm11,ymm15 6720 vpaddd ymm10,ymm10,ymm14 6721 vpaddd ymm9,ymm9,ymm13 6722 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6723 vpxor ymm7,ymm7,ymm11 6724 vpxor ymm6,ymm6,ymm10 6725 vpxor ymm5,ymm5,ymm9 6726 vpxor ymm4,ymm4,ymm8 6727 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6728 vpsrld ymm8,ymm7,20 6729 vpslld ymm7,ymm7,32-20 6730 vpxor ymm7,ymm7,ymm8 6731 vpsrld ymm8,ymm6,20 6732 vpslld ymm6,ymm6,32-20 6733 vpxor ymm6,ymm6,ymm8 6734 vpsrld ymm8,ymm5,20 6735 vpslld ymm5,ymm5,32-20 6736 vpxor ymm5,ymm5,ymm8 6737 vpsrld ymm8,ymm4,20 6738 vpslld ymm4,ymm4,32-20 6739 vpxor ymm4,ymm4,ymm8 6740 vmovdqa ymm8,YMMWORD[$L$rol8] 6741 vpaddd ymm3,ymm3,ymm7 6742 vpaddd ymm2,ymm2,ymm6 6743 vpaddd ymm1,ymm1,ymm5 6744 vpaddd ymm0,ymm0,ymm4 6745 vpxor ymm15,ymm15,ymm3 6746 vpxor ymm14,ymm14,ymm2 6747 vpxor ymm13,ymm13,ymm1 6748 vpxor ymm12,ymm12,ymm0 6749 vpshufb ymm15,ymm15,ymm8 6750 vpshufb ymm14,ymm14,ymm8 6751 vpshufb ymm13,ymm13,ymm8 6752 vpshufb ymm12,ymm12,ymm8 6753 vpaddd ymm11,ymm11,ymm15 6754 vpaddd ymm10,ymm10,ymm14 6755 vpaddd ymm9,ymm9,ymm13 6756 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6757 vpxor ymm7,ymm7,ymm11 6758 vpxor ymm6,ymm6,ymm10 6759 vpxor ymm5,ymm5,ymm9 6760 vpxor ymm4,ymm4,ymm8 6761 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6762 vpsrld ymm8,ymm7,25 6763 vpslld ymm7,ymm7,32-25 6764 vpxor ymm7,ymm7,ymm8 6765 vpsrld ymm8,ymm6,25 6766 vpslld ymm6,ymm6,32-25 6767 vpxor ymm6,ymm6,ymm8 6768 vpsrld ymm8,ymm5,25 6769 vpslld ymm5,ymm5,32-25 6770 vpxor ymm5,ymm5,ymm8 6771 vpsrld ymm8,ymm4,25 6772 vpslld ymm4,ymm4,32-25 6773 vpxor ymm4,ymm4,ymm8 6774 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6775 vpalignr ymm7,ymm7,ymm7,12 6776 vpalignr ymm11,ymm11,ymm11,8 6777 vpalignr ymm15,ymm15,ymm15,4 6778 vpalignr ymm6,ymm6,ymm6,12 6779 vpalignr ymm10,ymm10,ymm10,8 6780 vpalignr ymm14,ymm14,ymm14,4 6781 vpalignr ymm5,ymm5,ymm5,12 6782 vpalignr ymm9,ymm9,ymm9,8 6783 vpalignr ymm13,ymm13,ymm13,4 6784 vpalignr ymm4,ymm4,ymm4,12 6785 vpalignr ymm8,ymm8,ymm8,8 6786 vpalignr ymm12,ymm12,ymm12,4 6787 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6788 vmovdqa ymm8,YMMWORD[$L$rol16] 6789 vpaddd ymm3,ymm3,ymm7 6790 vpaddd ymm2,ymm2,ymm6 6791 vpaddd ymm1,ymm1,ymm5 6792 vpaddd ymm0,ymm0,ymm4 6793 vpxor ymm15,ymm15,ymm3 6794 vpxor ymm14,ymm14,ymm2 6795 vpxor ymm13,ymm13,ymm1 6796 vpxor ymm12,ymm12,ymm0 6797 vpshufb ymm15,ymm15,ymm8 6798 vpshufb ymm14,ymm14,ymm8 6799 vpshufb ymm13,ymm13,ymm8 6800 vpshufb ymm12,ymm12,ymm8 6801 vpaddd ymm11,ymm11,ymm15 6802 vpaddd ymm10,ymm10,ymm14 6803 vpaddd ymm9,ymm9,ymm13 6804 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6805 vpxor ymm7,ymm7,ymm11 6806 vpxor ymm6,ymm6,ymm10 6807 vpxor ymm5,ymm5,ymm9 6808 vpxor ymm4,ymm4,ymm8 6809 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6810 vpsrld ymm8,ymm7,20 6811 vpslld ymm7,ymm7,32-20 6812 vpxor ymm7,ymm7,ymm8 6813 vpsrld ymm8,ymm6,20 6814 vpslld ymm6,ymm6,32-20 6815 vpxor ymm6,ymm6,ymm8 6816 vpsrld ymm8,ymm5,20 6817 vpslld ymm5,ymm5,32-20 6818 vpxor ymm5,ymm5,ymm8 6819 vpsrld ymm8,ymm4,20 6820 vpslld ymm4,ymm4,32-20 6821 vpxor ymm4,ymm4,ymm8 6822 vmovdqa ymm8,YMMWORD[$L$rol8] 6823 vpaddd ymm3,ymm3,ymm7 6824 vpaddd ymm2,ymm2,ymm6 6825 vpaddd ymm1,ymm1,ymm5 6826 vpaddd ymm0,ymm0,ymm4 6827 vpxor ymm15,ymm15,ymm3 6828 6829 sub rdi,16 6830 mov rcx,9 6831 jmp NEAR $L$seal_avx2_main_loop_rounds_entry 6832ALIGN 32 6833$L$seal_avx2_main_loop: 6834 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 6835 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 6836 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 6837 vmovdqa ymm1,ymm0 6838 vmovdqa ymm5,ymm4 6839 vmovdqa ymm9,ymm8 6840 vmovdqa ymm2,ymm0 6841 vmovdqa ymm6,ymm4 6842 vmovdqa ymm10,ymm8 6843 vmovdqa ymm3,ymm0 6844 vmovdqa ymm7,ymm4 6845 vmovdqa ymm11,ymm8 6846 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 6847 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 6848 vpaddd ymm14,ymm12,ymm15 6849 vpaddd ymm13,ymm12,ymm14 6850 vpaddd ymm12,ymm12,ymm13 6851 vmovdqa YMMWORD[(160+256)+rbp],ymm15 6852 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6853 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6854 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6855 6856 mov rcx,10 6857ALIGN 32 6858$L$seal_avx2_main_loop_rounds: 6859 add r10,QWORD[((0+0))+rdi] 6860 adc r11,QWORD[((8+0))+rdi] 6861 adc r12,1 6862 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6863 vmovdqa ymm8,YMMWORD[$L$rol16] 6864 vpaddd ymm3,ymm3,ymm7 6865 vpaddd ymm2,ymm2,ymm6 6866 vpaddd ymm1,ymm1,ymm5 6867 vpaddd ymm0,ymm0,ymm4 6868 vpxor ymm15,ymm15,ymm3 6869 vpxor ymm14,ymm14,ymm2 6870 vpxor ymm13,ymm13,ymm1 6871 vpxor ymm12,ymm12,ymm0 6872 mov rdx,QWORD[((0+160+0))+rbp] 6873 mov r15,rdx 6874 mulx r14,r13,r10 6875 mulx rdx,rax,r11 6876 imul r15,r12 6877 add r14,rax 6878 adc r15,rdx 6879 vpshufb ymm15,ymm15,ymm8 6880 vpshufb ymm14,ymm14,ymm8 6881 vpshufb ymm13,ymm13,ymm8 6882 vpshufb ymm12,ymm12,ymm8 6883 vpaddd ymm11,ymm11,ymm15 6884 vpaddd ymm10,ymm10,ymm14 6885 vpaddd ymm9,ymm9,ymm13 6886 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6887 vpxor ymm7,ymm7,ymm11 6888 mov rdx,QWORD[((8+160+0))+rbp] 6889 mulx rax,r10,r10 6890 add r14,r10 6891 mulx r9,r11,r11 6892 adc r15,r11 6893 adc r9,0 6894 imul rdx,r12 6895 vpxor ymm6,ymm6,ymm10 6896 vpxor ymm5,ymm5,ymm9 6897 vpxor ymm4,ymm4,ymm8 6898 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6899 vpsrld ymm8,ymm7,20 6900 vpslld ymm7,ymm7,32-20 6901 vpxor ymm7,ymm7,ymm8 6902 vpsrld ymm8,ymm6,20 6903 vpslld ymm6,ymm6,32-20 6904 vpxor ymm6,ymm6,ymm8 6905 vpsrld ymm8,ymm5,20 6906 vpslld ymm5,ymm5,32-20 6907 add r15,rax 6908 adc r9,rdx 6909 vpxor ymm5,ymm5,ymm8 6910 vpsrld ymm8,ymm4,20 6911 vpslld ymm4,ymm4,32-20 6912 vpxor ymm4,ymm4,ymm8 6913 vmovdqa ymm8,YMMWORD[$L$rol8] 6914 vpaddd ymm3,ymm3,ymm7 6915 vpaddd ymm2,ymm2,ymm6 6916 vpaddd ymm1,ymm1,ymm5 6917 vpaddd ymm0,ymm0,ymm4 6918 vpxor ymm15,ymm15,ymm3 6919 mov r10,r13 6920 mov r11,r14 6921 mov r12,r15 6922 and r12,3 6923 mov r13,r15 6924 and r13,-4 6925 mov r14,r9 6926 shrd r15,r9,2 6927 shr r9,2 6928 add r15,r13 6929 adc r9,r14 6930 add r10,r15 6931 adc r11,r9 6932 adc r12,0 6933 6934$L$seal_avx2_main_loop_rounds_entry: 6935 vpxor ymm14,ymm14,ymm2 6936 vpxor ymm13,ymm13,ymm1 6937 vpxor ymm12,ymm12,ymm0 6938 vpshufb ymm15,ymm15,ymm8 6939 vpshufb ymm14,ymm14,ymm8 6940 vpshufb ymm13,ymm13,ymm8 6941 vpshufb ymm12,ymm12,ymm8 6942 vpaddd ymm11,ymm11,ymm15 6943 vpaddd ymm10,ymm10,ymm14 6944 add r10,QWORD[((0+16))+rdi] 6945 adc r11,QWORD[((8+16))+rdi] 6946 adc r12,1 6947 vpaddd ymm9,ymm9,ymm13 6948 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6949 vpxor ymm7,ymm7,ymm11 6950 vpxor ymm6,ymm6,ymm10 6951 vpxor ymm5,ymm5,ymm9 6952 vpxor ymm4,ymm4,ymm8 6953 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6954 vpsrld ymm8,ymm7,25 6955 mov rdx,QWORD[((0+160+0))+rbp] 6956 mov r15,rdx 6957 mulx r14,r13,r10 6958 mulx rdx,rax,r11 6959 imul r15,r12 6960 add r14,rax 6961 adc r15,rdx 6962 vpslld ymm7,ymm7,32-25 6963 vpxor ymm7,ymm7,ymm8 6964 vpsrld ymm8,ymm6,25 6965 vpslld ymm6,ymm6,32-25 6966 vpxor ymm6,ymm6,ymm8 6967 vpsrld ymm8,ymm5,25 6968 vpslld ymm5,ymm5,32-25 6969 vpxor ymm5,ymm5,ymm8 6970 vpsrld ymm8,ymm4,25 6971 vpslld ymm4,ymm4,32-25 6972 vpxor ymm4,ymm4,ymm8 6973 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6974 vpalignr ymm7,ymm7,ymm7,4 6975 vpalignr ymm11,ymm11,ymm11,8 6976 vpalignr ymm15,ymm15,ymm15,12 6977 vpalignr ymm6,ymm6,ymm6,4 6978 vpalignr ymm10,ymm10,ymm10,8 6979 vpalignr ymm14,ymm14,ymm14,12 6980 mov rdx,QWORD[((8+160+0))+rbp] 6981 mulx rax,r10,r10 6982 add r14,r10 6983 mulx r9,r11,r11 6984 adc r15,r11 6985 adc r9,0 6986 imul rdx,r12 6987 vpalignr ymm5,ymm5,ymm5,4 6988 vpalignr ymm9,ymm9,ymm9,8 6989 vpalignr ymm13,ymm13,ymm13,12 6990 vpalignr ymm4,ymm4,ymm4,4 6991 vpalignr ymm8,ymm8,ymm8,8 6992 vpalignr ymm12,ymm12,ymm12,12 6993 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6994 vmovdqa ymm8,YMMWORD[$L$rol16] 6995 vpaddd ymm3,ymm3,ymm7 6996 vpaddd ymm2,ymm2,ymm6 6997 vpaddd ymm1,ymm1,ymm5 6998 vpaddd ymm0,ymm0,ymm4 6999 vpxor ymm15,ymm15,ymm3 7000 vpxor ymm14,ymm14,ymm2 7001 vpxor ymm13,ymm13,ymm1 7002 vpxor ymm12,ymm12,ymm0 7003 vpshufb ymm15,ymm15,ymm8 7004 vpshufb ymm14,ymm14,ymm8 7005 add r15,rax 7006 adc r9,rdx 7007 vpshufb ymm13,ymm13,ymm8 7008 vpshufb ymm12,ymm12,ymm8 7009 vpaddd ymm11,ymm11,ymm15 7010 vpaddd ymm10,ymm10,ymm14 7011 vpaddd ymm9,ymm9,ymm13 7012 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 7013 vpxor ymm7,ymm7,ymm11 7014 vpxor ymm6,ymm6,ymm10 7015 vpxor ymm5,ymm5,ymm9 7016 mov r10,r13 7017 mov r11,r14 7018 mov r12,r15 7019 and r12,3 7020 mov r13,r15 7021 and r13,-4 7022 mov r14,r9 7023 shrd r15,r9,2 7024 shr r9,2 7025 add r15,r13 7026 adc r9,r14 7027 add r10,r15 7028 adc r11,r9 7029 adc r12,0 7030 vpxor ymm4,ymm4,ymm8 7031 vmovdqa YMMWORD[(160+128)+rbp],ymm8 7032 vpsrld ymm8,ymm7,20 7033 vpslld ymm7,ymm7,32-20 7034 vpxor ymm7,ymm7,ymm8 7035 vpsrld ymm8,ymm6,20 7036 vpslld ymm6,ymm6,32-20 7037 vpxor ymm6,ymm6,ymm8 7038 add r10,QWORD[((0+32))+rdi] 7039 adc r11,QWORD[((8+32))+rdi] 7040 adc r12,1 7041 7042 lea rdi,[48+rdi] 7043 vpsrld ymm8,ymm5,20 7044 vpslld ymm5,ymm5,32-20 7045 vpxor ymm5,ymm5,ymm8 7046 vpsrld ymm8,ymm4,20 7047 vpslld ymm4,ymm4,32-20 7048 vpxor ymm4,ymm4,ymm8 7049 vmovdqa ymm8,YMMWORD[$L$rol8] 7050 vpaddd ymm3,ymm3,ymm7 7051 vpaddd ymm2,ymm2,ymm6 7052 vpaddd ymm1,ymm1,ymm5 7053 vpaddd ymm0,ymm0,ymm4 7054 vpxor ymm15,ymm15,ymm3 7055 vpxor ymm14,ymm14,ymm2 7056 vpxor ymm13,ymm13,ymm1 7057 vpxor ymm12,ymm12,ymm0 7058 vpshufb ymm15,ymm15,ymm8 7059 vpshufb ymm14,ymm14,ymm8 7060 vpshufb ymm13,ymm13,ymm8 7061 mov rdx,QWORD[((0+160+0))+rbp] 7062 mov r15,rdx 7063 mulx r14,r13,r10 7064 mulx rdx,rax,r11 7065 imul r15,r12 7066 add r14,rax 7067 adc r15,rdx 7068 vpshufb ymm12,ymm12,ymm8 7069 vpaddd ymm11,ymm11,ymm15 7070 vpaddd ymm10,ymm10,ymm14 7071 vpaddd ymm9,ymm9,ymm13 7072 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 7073 vpxor ymm7,ymm7,ymm11 7074 vpxor ymm6,ymm6,ymm10 7075 vpxor ymm5,ymm5,ymm9 7076 mov rdx,QWORD[((8+160+0))+rbp] 7077 mulx rax,r10,r10 7078 add r14,r10 7079 mulx r9,r11,r11 7080 adc r15,r11 7081 adc r9,0 7082 imul rdx,r12 7083 vpxor ymm4,ymm4,ymm8 7084 vmovdqa YMMWORD[(160+128)+rbp],ymm8 7085 vpsrld ymm8,ymm7,25 7086 vpslld ymm7,ymm7,32-25 7087 vpxor ymm7,ymm7,ymm8 7088 vpsrld ymm8,ymm6,25 7089 vpslld ymm6,ymm6,32-25 7090 vpxor ymm6,ymm6,ymm8 7091 add r15,rax 7092 adc r9,rdx 7093 vpsrld ymm8,ymm5,25 7094 vpslld ymm5,ymm5,32-25 7095 vpxor ymm5,ymm5,ymm8 7096 vpsrld ymm8,ymm4,25 7097 vpslld ymm4,ymm4,32-25 7098 vpxor ymm4,ymm4,ymm8 7099 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 7100 vpalignr ymm7,ymm7,ymm7,12 7101 vpalignr ymm11,ymm11,ymm11,8 7102 vpalignr ymm15,ymm15,ymm15,4 7103 vpalignr ymm6,ymm6,ymm6,12 7104 vpalignr ymm10,ymm10,ymm10,8 7105 vpalignr ymm14,ymm14,ymm14,4 7106 vpalignr ymm5,ymm5,ymm5,12 7107 vpalignr ymm9,ymm9,ymm9,8 7108 vpalignr ymm13,ymm13,ymm13,4 7109 vpalignr ymm4,ymm4,ymm4,12 7110 vpalignr ymm8,ymm8,ymm8,8 7111 mov r10,r13 7112 mov r11,r14 7113 mov r12,r15 7114 and r12,3 7115 mov r13,r15 7116 and r13,-4 7117 mov r14,r9 7118 shrd r15,r9,2 7119 shr r9,2 7120 add r15,r13 7121 adc r9,r14 7122 add r10,r15 7123 adc r11,r9 7124 adc r12,0 7125 vpalignr ymm12,ymm12,ymm12,4 7126 7127 dec rcx 7128 jne NEAR $L$seal_avx2_main_loop_rounds 7129 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 7130 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 7131 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 7132 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 7133 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 7134 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 7135 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 7136 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 7137 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 7138 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 7139 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 7140 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 7141 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 7142 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 7143 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 7144 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7145 7146 vmovdqa YMMWORD[(160+128)+rbp],ymm0 7147 add r10,QWORD[((0+0))+rdi] 7148 adc r11,QWORD[((8+0))+rdi] 7149 adc r12,1 7150 mov rdx,QWORD[((0+160+0))+rbp] 7151 mov r15,rdx 7152 mulx r14,r13,r10 7153 mulx rdx,rax,r11 7154 imul r15,r12 7155 add r14,rax 7156 adc r15,rdx 7157 mov rdx,QWORD[((8+160+0))+rbp] 7158 mulx rax,r10,r10 7159 add r14,r10 7160 mulx r9,r11,r11 7161 adc r15,r11 7162 adc r9,0 7163 imul rdx,r12 7164 add r15,rax 7165 adc r9,rdx 7166 mov r10,r13 7167 mov r11,r14 7168 mov r12,r15 7169 and r12,3 7170 mov r13,r15 7171 and r13,-4 7172 mov r14,r9 7173 shrd r15,r9,2 7174 shr r9,2 7175 add r15,r13 7176 adc r9,r14 7177 add r10,r15 7178 adc r11,r9 7179 adc r12,0 7180 add r10,QWORD[((0+16))+rdi] 7181 adc r11,QWORD[((8+16))+rdi] 7182 adc r12,1 7183 mov rdx,QWORD[((0+160+0))+rbp] 7184 mov r15,rdx 7185 mulx r14,r13,r10 7186 mulx rdx,rax,r11 7187 imul r15,r12 7188 add r14,rax 7189 adc r15,rdx 7190 mov rdx,QWORD[((8+160+0))+rbp] 7191 mulx rax,r10,r10 7192 add r14,r10 7193 mulx r9,r11,r11 7194 adc r15,r11 7195 adc r9,0 7196 imul rdx,r12 7197 add r15,rax 7198 adc r9,rdx 7199 mov r10,r13 7200 mov r11,r14 7201 mov r12,r15 7202 and r12,3 7203 mov r13,r15 7204 and r13,-4 7205 mov r14,r9 7206 shrd r15,r9,2 7207 shr r9,2 7208 add r15,r13 7209 adc r9,r14 7210 add r10,r15 7211 adc r11,r9 7212 adc r12,0 7213 7214 lea rdi,[32+rdi] 7215 vperm2i128 ymm0,ymm7,ymm3,0x02 7216 vperm2i128 ymm7,ymm7,ymm3,0x13 7217 vperm2i128 ymm3,ymm15,ymm11,0x02 7218 vperm2i128 ymm11,ymm15,ymm11,0x13 7219 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 7220 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 7221 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 7222 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 7223 vmovdqu YMMWORD[(0+0)+rdi],ymm0 7224 vmovdqu YMMWORD[(32+0)+rdi],ymm3 7225 vmovdqu YMMWORD[(64+0)+rdi],ymm7 7226 vmovdqu YMMWORD[(96+0)+rdi],ymm11 7227 7228 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 7229 vperm2i128 ymm3,ymm6,ymm2,0x02 7230 vperm2i128 ymm6,ymm6,ymm2,0x13 7231 vperm2i128 ymm2,ymm14,ymm10,0x02 7232 vperm2i128 ymm10,ymm14,ymm10,0x13 7233 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 7234 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 7235 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 7236 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 7237 vmovdqu YMMWORD[(0+128)+rdi],ymm3 7238 vmovdqu YMMWORD[(32+128)+rdi],ymm2 7239 vmovdqu YMMWORD[(64+128)+rdi],ymm6 7240 vmovdqu YMMWORD[(96+128)+rdi],ymm10 7241 vperm2i128 ymm3,ymm5,ymm1,0x02 7242 vperm2i128 ymm5,ymm5,ymm1,0x13 7243 vperm2i128 ymm1,ymm13,ymm9,0x02 7244 vperm2i128 ymm9,ymm13,ymm9,0x13 7245 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 7246 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 7247 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 7248 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 7249 vmovdqu YMMWORD[(0+256)+rdi],ymm3 7250 vmovdqu YMMWORD[(32+256)+rdi],ymm1 7251 vmovdqu YMMWORD[(64+256)+rdi],ymm5 7252 vmovdqu YMMWORD[(96+256)+rdi],ymm9 7253 vperm2i128 ymm3,ymm4,ymm0,0x02 7254 vperm2i128 ymm4,ymm4,ymm0,0x13 7255 vperm2i128 ymm0,ymm12,ymm8,0x02 7256 vperm2i128 ymm8,ymm12,ymm8,0x13 7257 vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi] 7258 vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi] 7259 vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi] 7260 vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi] 7261 vmovdqu YMMWORD[(0+384)+rdi],ymm3 7262 vmovdqu YMMWORD[(32+384)+rdi],ymm0 7263 vmovdqu YMMWORD[(64+384)+rdi],ymm4 7264 vmovdqu YMMWORD[(96+384)+rdi],ymm8 7265 7266 lea rsi,[512+rsi] 7267 sub rbx,16*32 7268 cmp rbx,16*32 7269 jg NEAR $L$seal_avx2_main_loop 7270 7271 add r10,QWORD[((0+0))+rdi] 7272 adc r11,QWORD[((8+0))+rdi] 7273 adc r12,1 7274 mov rdx,QWORD[((0+160+0))+rbp] 7275 mov r15,rdx 7276 mulx r14,r13,r10 7277 mulx rdx,rax,r11 7278 imul r15,r12 7279 add r14,rax 7280 adc r15,rdx 7281 mov rdx,QWORD[((8+160+0))+rbp] 7282 mulx rax,r10,r10 7283 add r14,r10 7284 mulx r9,r11,r11 7285 adc r15,r11 7286 adc r9,0 7287 imul rdx,r12 7288 add r15,rax 7289 adc r9,rdx 7290 mov r10,r13 7291 mov r11,r14 7292 mov r12,r15 7293 and r12,3 7294 mov r13,r15 7295 and r13,-4 7296 mov r14,r9 7297 shrd r15,r9,2 7298 shr r9,2 7299 add r15,r13 7300 adc r9,r14 7301 add r10,r15 7302 adc r11,r9 7303 adc r12,0 7304 add r10,QWORD[((0+16))+rdi] 7305 adc r11,QWORD[((8+16))+rdi] 7306 adc r12,1 7307 mov rdx,QWORD[((0+160+0))+rbp] 7308 mov r15,rdx 7309 mulx r14,r13,r10 7310 mulx rdx,rax,r11 7311 imul r15,r12 7312 add r14,rax 7313 adc r15,rdx 7314 mov rdx,QWORD[((8+160+0))+rbp] 7315 mulx rax,r10,r10 7316 add r14,r10 7317 mulx r9,r11,r11 7318 adc r15,r11 7319 adc r9,0 7320 imul rdx,r12 7321 add r15,rax 7322 adc r9,rdx 7323 mov r10,r13 7324 mov r11,r14 7325 mov r12,r15 7326 and r12,3 7327 mov r13,r15 7328 and r13,-4 7329 mov r14,r9 7330 shrd r15,r9,2 7331 shr r9,2 7332 add r15,r13 7333 adc r9,r14 7334 add r10,r15 7335 adc r11,r9 7336 adc r12,0 7337 7338 lea rdi,[32+rdi] 7339 mov rcx,10 7340 xor r8,r8 7341 7342 cmp rbx,12*32 7343 ja NEAR $L$seal_avx2_tail_512 7344 cmp rbx,8*32 7345 ja NEAR $L$seal_avx2_tail_384 7346 cmp rbx,4*32 7347 ja NEAR $L$seal_avx2_tail_256 7348 7349$L$seal_avx2_tail_128: 7350 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 7351 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 7352 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 7353 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 7354 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7355 vmovdqa YMMWORD[(160+160)+rbp],ymm12 7356 7357$L$seal_avx2_tail_128_rounds_and_3xhash: 7358 add r10,QWORD[((0+0))+rdi] 7359 adc r11,QWORD[((8+0))+rdi] 7360 adc r12,1 7361 mov rdx,QWORD[((0+160+0))+rbp] 7362 mov r15,rdx 7363 mulx r14,r13,r10 7364 mulx rdx,rax,r11 7365 imul r15,r12 7366 add r14,rax 7367 adc r15,rdx 7368 mov rdx,QWORD[((8+160+0))+rbp] 7369 mulx rax,r10,r10 7370 add r14,r10 7371 mulx r9,r11,r11 7372 adc r15,r11 7373 adc r9,0 7374 imul rdx,r12 7375 add r15,rax 7376 adc r9,rdx 7377 mov r10,r13 7378 mov r11,r14 7379 mov r12,r15 7380 and r12,3 7381 mov r13,r15 7382 and r13,-4 7383 mov r14,r9 7384 shrd r15,r9,2 7385 shr r9,2 7386 add r15,r13 7387 adc r9,r14 7388 add r10,r15 7389 adc r11,r9 7390 adc r12,0 7391 7392 lea rdi,[16+rdi] 7393$L$seal_avx2_tail_128_rounds_and_2xhash: 7394 vpaddd ymm0,ymm0,ymm4 7395 vpxor ymm12,ymm12,ymm0 7396 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7397 vpaddd ymm8,ymm8,ymm12 7398 vpxor ymm4,ymm4,ymm8 7399 vpsrld ymm3,ymm4,20 7400 vpslld ymm4,ymm4,12 7401 vpxor ymm4,ymm4,ymm3 7402 vpaddd ymm0,ymm0,ymm4 7403 vpxor ymm12,ymm12,ymm0 7404 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7405 vpaddd ymm8,ymm8,ymm12 7406 vpxor ymm4,ymm4,ymm8 7407 vpslld ymm3,ymm4,7 7408 vpsrld ymm4,ymm4,25 7409 vpxor ymm4,ymm4,ymm3 7410 vpalignr ymm12,ymm12,ymm12,12 7411 vpalignr ymm8,ymm8,ymm8,8 7412 vpalignr ymm4,ymm4,ymm4,4 7413 add r10,QWORD[((0+0))+rdi] 7414 adc r11,QWORD[((8+0))+rdi] 7415 adc r12,1 7416 mov rdx,QWORD[((0+160+0))+rbp] 7417 mov r15,rdx 7418 mulx r14,r13,r10 7419 mulx rdx,rax,r11 7420 imul r15,r12 7421 add r14,rax 7422 adc r15,rdx 7423 mov rdx,QWORD[((8+160+0))+rbp] 7424 mulx rax,r10,r10 7425 add r14,r10 7426 mulx r9,r11,r11 7427 adc r15,r11 7428 adc r9,0 7429 imul rdx,r12 7430 add r15,rax 7431 adc r9,rdx 7432 mov r10,r13 7433 mov r11,r14 7434 mov r12,r15 7435 and r12,3 7436 mov r13,r15 7437 and r13,-4 7438 mov r14,r9 7439 shrd r15,r9,2 7440 shr r9,2 7441 add r15,r13 7442 adc r9,r14 7443 add r10,r15 7444 adc r11,r9 7445 adc r12,0 7446 vpaddd ymm0,ymm0,ymm4 7447 vpxor ymm12,ymm12,ymm0 7448 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7449 vpaddd ymm8,ymm8,ymm12 7450 vpxor ymm4,ymm4,ymm8 7451 vpsrld ymm3,ymm4,20 7452 vpslld ymm4,ymm4,12 7453 vpxor ymm4,ymm4,ymm3 7454 vpaddd ymm0,ymm0,ymm4 7455 vpxor ymm12,ymm12,ymm0 7456 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7457 vpaddd ymm8,ymm8,ymm12 7458 vpxor ymm4,ymm4,ymm8 7459 vpslld ymm3,ymm4,7 7460 vpsrld ymm4,ymm4,25 7461 vpxor ymm4,ymm4,ymm3 7462 vpalignr ymm12,ymm12,ymm12,4 7463 vpalignr ymm8,ymm8,ymm8,8 7464 vpalignr ymm4,ymm4,ymm4,12 7465 add r10,QWORD[((0+16))+rdi] 7466 adc r11,QWORD[((8+16))+rdi] 7467 adc r12,1 7468 mov rdx,QWORD[((0+160+0))+rbp] 7469 mov r15,rdx 7470 mulx r14,r13,r10 7471 mulx rdx,rax,r11 7472 imul r15,r12 7473 add r14,rax 7474 adc r15,rdx 7475 mov rdx,QWORD[((8+160+0))+rbp] 7476 mulx rax,r10,r10 7477 add r14,r10 7478 mulx r9,r11,r11 7479 adc r15,r11 7480 adc r9,0 7481 imul rdx,r12 7482 add r15,rax 7483 adc r9,rdx 7484 mov r10,r13 7485 mov r11,r14 7486 mov r12,r15 7487 and r12,3 7488 mov r13,r15 7489 and r13,-4 7490 mov r14,r9 7491 shrd r15,r9,2 7492 shr r9,2 7493 add r15,r13 7494 adc r9,r14 7495 add r10,r15 7496 adc r11,r9 7497 adc r12,0 7498 7499 lea rdi,[32+rdi] 7500 dec rcx 7501 jg NEAR $L$seal_avx2_tail_128_rounds_and_3xhash 7502 dec r8 7503 jge NEAR $L$seal_avx2_tail_128_rounds_and_2xhash 7504 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 7505 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 7506 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 7507 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7508 vperm2i128 ymm3,ymm4,ymm0,0x13 7509 vperm2i128 ymm0,ymm4,ymm0,0x02 7510 vperm2i128 ymm4,ymm12,ymm8,0x02 7511 vperm2i128 ymm12,ymm12,ymm8,0x13 7512 vmovdqa ymm8,ymm3 7513 7514 jmp NEAR $L$seal_avx2_short_loop 7515 7516$L$seal_avx2_tail_256: 7517 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 7518 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 7519 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 7520 vmovdqa ymm1,ymm0 7521 vmovdqa ymm5,ymm4 7522 vmovdqa ymm9,ymm8 7523 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 7524 vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp] 7525 vpaddd ymm12,ymm12,ymm13 7526 vmovdqa YMMWORD[(160+160)+rbp],ymm12 7527 vmovdqa YMMWORD[(160+192)+rbp],ymm13 7528 7529$L$seal_avx2_tail_256_rounds_and_3xhash: 7530 add r10,QWORD[((0+0))+rdi] 7531 adc r11,QWORD[((8+0))+rdi] 7532 adc r12,1 7533 mov rax,QWORD[((0+160+0))+rbp] 7534 mov r15,rax 7535 mul r10 7536 mov r13,rax 7537 mov r14,rdx 7538 mov rax,QWORD[((0+160+0))+rbp] 7539 mul r11 7540 imul r15,r12 7541 add r14,rax 7542 adc r15,rdx 7543 mov rax,QWORD[((8+160+0))+rbp] 7544 mov r9,rax 7545 mul r10 7546 add r14,rax 7547 adc rdx,0 7548 mov r10,rdx 7549 mov rax,QWORD[((8+160+0))+rbp] 7550 mul r11 7551 add r15,rax 7552 adc rdx,0 7553 imul r9,r12 7554 add r15,r10 7555 adc r9,rdx 7556 mov r10,r13 7557 mov r11,r14 7558 mov r12,r15 7559 and r12,3 7560 mov r13,r15 7561 and r13,-4 7562 mov r14,r9 7563 shrd r15,r9,2 7564 shr r9,2 7565 add r15,r13 7566 adc r9,r14 7567 add r10,r15 7568 adc r11,r9 7569 adc r12,0 7570 7571 lea rdi,[16+rdi] 7572$L$seal_avx2_tail_256_rounds_and_2xhash: 7573 vpaddd ymm0,ymm0,ymm4 7574 vpxor ymm12,ymm12,ymm0 7575 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7576 vpaddd ymm8,ymm8,ymm12 7577 vpxor ymm4,ymm4,ymm8 7578 vpsrld ymm3,ymm4,20 7579 vpslld ymm4,ymm4,12 7580 vpxor ymm4,ymm4,ymm3 7581 vpaddd ymm0,ymm0,ymm4 7582 vpxor ymm12,ymm12,ymm0 7583 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7584 vpaddd ymm8,ymm8,ymm12 7585 vpxor ymm4,ymm4,ymm8 7586 vpslld ymm3,ymm4,7 7587 vpsrld ymm4,ymm4,25 7588 vpxor ymm4,ymm4,ymm3 7589 vpalignr ymm12,ymm12,ymm12,12 7590 vpalignr ymm8,ymm8,ymm8,8 7591 vpalignr ymm4,ymm4,ymm4,4 7592 vpaddd ymm1,ymm1,ymm5 7593 vpxor ymm13,ymm13,ymm1 7594 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7595 vpaddd ymm9,ymm9,ymm13 7596 vpxor ymm5,ymm5,ymm9 7597 vpsrld ymm3,ymm5,20 7598 vpslld ymm5,ymm5,12 7599 vpxor ymm5,ymm5,ymm3 7600 vpaddd ymm1,ymm1,ymm5 7601 vpxor ymm13,ymm13,ymm1 7602 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7603 vpaddd ymm9,ymm9,ymm13 7604 vpxor ymm5,ymm5,ymm9 7605 vpslld ymm3,ymm5,7 7606 vpsrld ymm5,ymm5,25 7607 vpxor ymm5,ymm5,ymm3 7608 vpalignr ymm13,ymm13,ymm13,12 7609 vpalignr ymm9,ymm9,ymm9,8 7610 vpalignr ymm5,ymm5,ymm5,4 7611 add r10,QWORD[((0+0))+rdi] 7612 adc r11,QWORD[((8+0))+rdi] 7613 adc r12,1 7614 mov rax,QWORD[((0+160+0))+rbp] 7615 mov r15,rax 7616 mul r10 7617 mov r13,rax 7618 mov r14,rdx 7619 mov rax,QWORD[((0+160+0))+rbp] 7620 mul r11 7621 imul r15,r12 7622 add r14,rax 7623 adc r15,rdx 7624 mov rax,QWORD[((8+160+0))+rbp] 7625 mov r9,rax 7626 mul r10 7627 add r14,rax 7628 adc rdx,0 7629 mov r10,rdx 7630 mov rax,QWORD[((8+160+0))+rbp] 7631 mul r11 7632 add r15,rax 7633 adc rdx,0 7634 imul r9,r12 7635 add r15,r10 7636 adc r9,rdx 7637 mov r10,r13 7638 mov r11,r14 7639 mov r12,r15 7640 and r12,3 7641 mov r13,r15 7642 and r13,-4 7643 mov r14,r9 7644 shrd r15,r9,2 7645 shr r9,2 7646 add r15,r13 7647 adc r9,r14 7648 add r10,r15 7649 adc r11,r9 7650 adc r12,0 7651 vpaddd ymm0,ymm0,ymm4 7652 vpxor ymm12,ymm12,ymm0 7653 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7654 vpaddd ymm8,ymm8,ymm12 7655 vpxor ymm4,ymm4,ymm8 7656 vpsrld ymm3,ymm4,20 7657 vpslld ymm4,ymm4,12 7658 vpxor ymm4,ymm4,ymm3 7659 vpaddd ymm0,ymm0,ymm4 7660 vpxor ymm12,ymm12,ymm0 7661 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7662 vpaddd ymm8,ymm8,ymm12 7663 vpxor ymm4,ymm4,ymm8 7664 vpslld ymm3,ymm4,7 7665 vpsrld ymm4,ymm4,25 7666 vpxor ymm4,ymm4,ymm3 7667 vpalignr ymm12,ymm12,ymm12,4 7668 vpalignr ymm8,ymm8,ymm8,8 7669 vpalignr ymm4,ymm4,ymm4,12 7670 vpaddd ymm1,ymm1,ymm5 7671 vpxor ymm13,ymm13,ymm1 7672 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7673 vpaddd ymm9,ymm9,ymm13 7674 vpxor ymm5,ymm5,ymm9 7675 vpsrld ymm3,ymm5,20 7676 vpslld ymm5,ymm5,12 7677 vpxor ymm5,ymm5,ymm3 7678 vpaddd ymm1,ymm1,ymm5 7679 vpxor ymm13,ymm13,ymm1 7680 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7681 vpaddd ymm9,ymm9,ymm13 7682 vpxor ymm5,ymm5,ymm9 7683 vpslld ymm3,ymm5,7 7684 vpsrld ymm5,ymm5,25 7685 vpxor ymm5,ymm5,ymm3 7686 vpalignr ymm13,ymm13,ymm13,4 7687 vpalignr ymm9,ymm9,ymm9,8 7688 vpalignr ymm5,ymm5,ymm5,12 7689 add r10,QWORD[((0+16))+rdi] 7690 adc r11,QWORD[((8+16))+rdi] 7691 adc r12,1 7692 mov rax,QWORD[((0+160+0))+rbp] 7693 mov r15,rax 7694 mul r10 7695 mov r13,rax 7696 mov r14,rdx 7697 mov rax,QWORD[((0+160+0))+rbp] 7698 mul r11 7699 imul r15,r12 7700 add r14,rax 7701 adc r15,rdx 7702 mov rax,QWORD[((8+160+0))+rbp] 7703 mov r9,rax 7704 mul r10 7705 add r14,rax 7706 adc rdx,0 7707 mov r10,rdx 7708 mov rax,QWORD[((8+160+0))+rbp] 7709 mul r11 7710 add r15,rax 7711 adc rdx,0 7712 imul r9,r12 7713 add r15,r10 7714 adc r9,rdx 7715 mov r10,r13 7716 mov r11,r14 7717 mov r12,r15 7718 and r12,3 7719 mov r13,r15 7720 and r13,-4 7721 mov r14,r9 7722 shrd r15,r9,2 7723 shr r9,2 7724 add r15,r13 7725 adc r9,r14 7726 add r10,r15 7727 adc r11,r9 7728 adc r12,0 7729 7730 lea rdi,[32+rdi] 7731 dec rcx 7732 jg NEAR $L$seal_avx2_tail_256_rounds_and_3xhash 7733 dec r8 7734 jge NEAR $L$seal_avx2_tail_256_rounds_and_2xhash 7735 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 7736 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 7737 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 7738 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 7739 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 7740 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 7741 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 7742 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7743 vperm2i128 ymm3,ymm5,ymm1,0x02 7744 vperm2i128 ymm5,ymm5,ymm1,0x13 7745 vperm2i128 ymm1,ymm13,ymm9,0x02 7746 vperm2i128 ymm9,ymm13,ymm9,0x13 7747 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 7748 vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi] 7749 vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi] 7750 vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi] 7751 vmovdqu YMMWORD[(0+0)+rdi],ymm3 7752 vmovdqu YMMWORD[(32+0)+rdi],ymm1 7753 vmovdqu YMMWORD[(64+0)+rdi],ymm5 7754 vmovdqu YMMWORD[(96+0)+rdi],ymm9 7755 vperm2i128 ymm3,ymm4,ymm0,0x13 7756 vperm2i128 ymm0,ymm4,ymm0,0x02 7757 vperm2i128 ymm4,ymm12,ymm8,0x02 7758 vperm2i128 ymm12,ymm12,ymm8,0x13 7759 vmovdqa ymm8,ymm3 7760 7761 mov rcx,4*32 7762 lea rsi,[128+rsi] 7763 sub rbx,4*32 7764 jmp NEAR $L$seal_avx2_short_hash_remainder 7765 7766$L$seal_avx2_tail_384: 7767 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 7768 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 7769 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 7770 vmovdqa ymm1,ymm0 7771 vmovdqa ymm5,ymm4 7772 vmovdqa ymm9,ymm8 7773 vmovdqa ymm2,ymm0 7774 vmovdqa ymm6,ymm4 7775 vmovdqa ymm10,ymm8 7776 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 7777 vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp] 7778 vpaddd ymm13,ymm12,ymm14 7779 vpaddd ymm12,ymm12,ymm13 7780 vmovdqa YMMWORD[(160+160)+rbp],ymm12 7781 vmovdqa YMMWORD[(160+192)+rbp],ymm13 7782 vmovdqa YMMWORD[(160+224)+rbp],ymm14 7783 7784$L$seal_avx2_tail_384_rounds_and_3xhash: 7785 add r10,QWORD[((0+0))+rdi] 7786 adc r11,QWORD[((8+0))+rdi] 7787 adc r12,1 7788 mov rax,QWORD[((0+160+0))+rbp] 7789 mov r15,rax 7790 mul r10 7791 mov r13,rax 7792 mov r14,rdx 7793 mov rax,QWORD[((0+160+0))+rbp] 7794 mul r11 7795 imul r15,r12 7796 add r14,rax 7797 adc r15,rdx 7798 mov rax,QWORD[((8+160+0))+rbp] 7799 mov r9,rax 7800 mul r10 7801 add r14,rax 7802 adc rdx,0 7803 mov r10,rdx 7804 mov rax,QWORD[((8+160+0))+rbp] 7805 mul r11 7806 add r15,rax 7807 adc rdx,0 7808 imul r9,r12 7809 add r15,r10 7810 adc r9,rdx 7811 mov r10,r13 7812 mov r11,r14 7813 mov r12,r15 7814 and r12,3 7815 mov r13,r15 7816 and r13,-4 7817 mov r14,r9 7818 shrd r15,r9,2 7819 shr r9,2 7820 add r15,r13 7821 adc r9,r14 7822 add r10,r15 7823 adc r11,r9 7824 adc r12,0 7825 7826 lea rdi,[16+rdi] 7827$L$seal_avx2_tail_384_rounds_and_2xhash: 7828 vpaddd ymm0,ymm0,ymm4 7829 vpxor ymm12,ymm12,ymm0 7830 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7831 vpaddd ymm8,ymm8,ymm12 7832 vpxor ymm4,ymm4,ymm8 7833 vpsrld ymm3,ymm4,20 7834 vpslld ymm4,ymm4,12 7835 vpxor ymm4,ymm4,ymm3 7836 vpaddd ymm0,ymm0,ymm4 7837 vpxor ymm12,ymm12,ymm0 7838 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7839 vpaddd ymm8,ymm8,ymm12 7840 vpxor ymm4,ymm4,ymm8 7841 vpslld ymm3,ymm4,7 7842 vpsrld ymm4,ymm4,25 7843 vpxor ymm4,ymm4,ymm3 7844 vpalignr ymm12,ymm12,ymm12,12 7845 vpalignr ymm8,ymm8,ymm8,8 7846 vpalignr ymm4,ymm4,ymm4,4 7847 vpaddd ymm1,ymm1,ymm5 7848 vpxor ymm13,ymm13,ymm1 7849 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7850 vpaddd ymm9,ymm9,ymm13 7851 vpxor ymm5,ymm5,ymm9 7852 vpsrld ymm3,ymm5,20 7853 vpslld ymm5,ymm5,12 7854 vpxor ymm5,ymm5,ymm3 7855 vpaddd ymm1,ymm1,ymm5 7856 vpxor ymm13,ymm13,ymm1 7857 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7858 vpaddd ymm9,ymm9,ymm13 7859 vpxor ymm5,ymm5,ymm9 7860 vpslld ymm3,ymm5,7 7861 vpsrld ymm5,ymm5,25 7862 vpxor ymm5,ymm5,ymm3 7863 vpalignr ymm13,ymm13,ymm13,12 7864 vpalignr ymm9,ymm9,ymm9,8 7865 vpalignr ymm5,ymm5,ymm5,4 7866 add r10,QWORD[((0+0))+rdi] 7867 adc r11,QWORD[((8+0))+rdi] 7868 adc r12,1 7869 mov rax,QWORD[((0+160+0))+rbp] 7870 mov r15,rax 7871 mul r10 7872 mov r13,rax 7873 mov r14,rdx 7874 mov rax,QWORD[((0+160+0))+rbp] 7875 mul r11 7876 imul r15,r12 7877 add r14,rax 7878 adc r15,rdx 7879 mov rax,QWORD[((8+160+0))+rbp] 7880 mov r9,rax 7881 mul r10 7882 add r14,rax 7883 adc rdx,0 7884 mov r10,rdx 7885 mov rax,QWORD[((8+160+0))+rbp] 7886 mul r11 7887 add r15,rax 7888 adc rdx,0 7889 imul r9,r12 7890 add r15,r10 7891 adc r9,rdx 7892 mov r10,r13 7893 mov r11,r14 7894 mov r12,r15 7895 and r12,3 7896 mov r13,r15 7897 and r13,-4 7898 mov r14,r9 7899 shrd r15,r9,2 7900 shr r9,2 7901 add r15,r13 7902 adc r9,r14 7903 add r10,r15 7904 adc r11,r9 7905 adc r12,0 7906 vpaddd ymm2,ymm2,ymm6 7907 vpxor ymm14,ymm14,ymm2 7908 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 7909 vpaddd ymm10,ymm10,ymm14 7910 vpxor ymm6,ymm6,ymm10 7911 vpsrld ymm3,ymm6,20 7912 vpslld ymm6,ymm6,12 7913 vpxor ymm6,ymm6,ymm3 7914 vpaddd ymm2,ymm2,ymm6 7915 vpxor ymm14,ymm14,ymm2 7916 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 7917 vpaddd ymm10,ymm10,ymm14 7918 vpxor ymm6,ymm6,ymm10 7919 vpslld ymm3,ymm6,7 7920 vpsrld ymm6,ymm6,25 7921 vpxor ymm6,ymm6,ymm3 7922 vpalignr ymm14,ymm14,ymm14,12 7923 vpalignr ymm10,ymm10,ymm10,8 7924 vpalignr ymm6,ymm6,ymm6,4 7925 vpaddd ymm0,ymm0,ymm4 7926 vpxor ymm12,ymm12,ymm0 7927 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7928 vpaddd ymm8,ymm8,ymm12 7929 vpxor ymm4,ymm4,ymm8 7930 vpsrld ymm3,ymm4,20 7931 vpslld ymm4,ymm4,12 7932 vpxor ymm4,ymm4,ymm3 7933 vpaddd ymm0,ymm0,ymm4 7934 vpxor ymm12,ymm12,ymm0 7935 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7936 vpaddd ymm8,ymm8,ymm12 7937 vpxor ymm4,ymm4,ymm8 7938 vpslld ymm3,ymm4,7 7939 vpsrld ymm4,ymm4,25 7940 vpxor ymm4,ymm4,ymm3 7941 vpalignr ymm12,ymm12,ymm12,4 7942 vpalignr ymm8,ymm8,ymm8,8 7943 vpalignr ymm4,ymm4,ymm4,12 7944 add r10,QWORD[((0+16))+rdi] 7945 adc r11,QWORD[((8+16))+rdi] 7946 adc r12,1 7947 mov rax,QWORD[((0+160+0))+rbp] 7948 mov r15,rax 7949 mul r10 7950 mov r13,rax 7951 mov r14,rdx 7952 mov rax,QWORD[((0+160+0))+rbp] 7953 mul r11 7954 imul r15,r12 7955 add r14,rax 7956 adc r15,rdx 7957 mov rax,QWORD[((8+160+0))+rbp] 7958 mov r9,rax 7959 mul r10 7960 add r14,rax 7961 adc rdx,0 7962 mov r10,rdx 7963 mov rax,QWORD[((8+160+0))+rbp] 7964 mul r11 7965 add r15,rax 7966 adc rdx,0 7967 imul r9,r12 7968 add r15,r10 7969 adc r9,rdx 7970 mov r10,r13 7971 mov r11,r14 7972 mov r12,r15 7973 and r12,3 7974 mov r13,r15 7975 and r13,-4 7976 mov r14,r9 7977 shrd r15,r9,2 7978 shr r9,2 7979 add r15,r13 7980 adc r9,r14 7981 add r10,r15 7982 adc r11,r9 7983 adc r12,0 7984 vpaddd ymm1,ymm1,ymm5 7985 vpxor ymm13,ymm13,ymm1 7986 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7987 vpaddd ymm9,ymm9,ymm13 7988 vpxor ymm5,ymm5,ymm9 7989 vpsrld ymm3,ymm5,20 7990 vpslld ymm5,ymm5,12 7991 vpxor ymm5,ymm5,ymm3 7992 vpaddd ymm1,ymm1,ymm5 7993 vpxor ymm13,ymm13,ymm1 7994 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7995 vpaddd ymm9,ymm9,ymm13 7996 vpxor ymm5,ymm5,ymm9 7997 vpslld ymm3,ymm5,7 7998 vpsrld ymm5,ymm5,25 7999 vpxor ymm5,ymm5,ymm3 8000 vpalignr ymm13,ymm13,ymm13,4 8001 vpalignr ymm9,ymm9,ymm9,8 8002 vpalignr ymm5,ymm5,ymm5,12 8003 vpaddd ymm2,ymm2,ymm6 8004 vpxor ymm14,ymm14,ymm2 8005 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 8006 vpaddd ymm10,ymm10,ymm14 8007 vpxor ymm6,ymm6,ymm10 8008 vpsrld ymm3,ymm6,20 8009 vpslld ymm6,ymm6,12 8010 vpxor ymm6,ymm6,ymm3 8011 vpaddd ymm2,ymm2,ymm6 8012 vpxor ymm14,ymm14,ymm2 8013 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 8014 vpaddd ymm10,ymm10,ymm14 8015 vpxor ymm6,ymm6,ymm10 8016 vpslld ymm3,ymm6,7 8017 vpsrld ymm6,ymm6,25 8018 vpxor ymm6,ymm6,ymm3 8019 vpalignr ymm14,ymm14,ymm14,4 8020 vpalignr ymm10,ymm10,ymm10,8 8021 vpalignr ymm6,ymm6,ymm6,12 8022 8023 lea rdi,[32+rdi] 8024 dec rcx 8025 jg NEAR $L$seal_avx2_tail_384_rounds_and_3xhash 8026 dec r8 8027 jge NEAR $L$seal_avx2_tail_384_rounds_and_2xhash 8028 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 8029 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 8030 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 8031 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 8032 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 8033 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 8034 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 8035 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 8036 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 8037 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 8038 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 8039 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 8040 vperm2i128 ymm3,ymm6,ymm2,0x02 8041 vperm2i128 ymm6,ymm6,ymm2,0x13 8042 vperm2i128 ymm2,ymm14,ymm10,0x02 8043 vperm2i128 ymm10,ymm14,ymm10,0x13 8044 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 8045 vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi] 8046 vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi] 8047 vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi] 8048 vmovdqu YMMWORD[(0+0)+rdi],ymm3 8049 vmovdqu YMMWORD[(32+0)+rdi],ymm2 8050 vmovdqu YMMWORD[(64+0)+rdi],ymm6 8051 vmovdqu YMMWORD[(96+0)+rdi],ymm10 8052 vperm2i128 ymm3,ymm5,ymm1,0x02 8053 vperm2i128 ymm5,ymm5,ymm1,0x13 8054 vperm2i128 ymm1,ymm13,ymm9,0x02 8055 vperm2i128 ymm9,ymm13,ymm9,0x13 8056 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 8057 vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi] 8058 vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi] 8059 vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi] 8060 vmovdqu YMMWORD[(0+128)+rdi],ymm3 8061 vmovdqu YMMWORD[(32+128)+rdi],ymm1 8062 vmovdqu YMMWORD[(64+128)+rdi],ymm5 8063 vmovdqu YMMWORD[(96+128)+rdi],ymm9 8064 vperm2i128 ymm3,ymm4,ymm0,0x13 8065 vperm2i128 ymm0,ymm4,ymm0,0x02 8066 vperm2i128 ymm4,ymm12,ymm8,0x02 8067 vperm2i128 ymm12,ymm12,ymm8,0x13 8068 vmovdqa ymm8,ymm3 8069 8070 mov rcx,8*32 8071 lea rsi,[256+rsi] 8072 sub rbx,8*32 8073 jmp NEAR $L$seal_avx2_short_hash_remainder 8074 8075$L$seal_avx2_tail_512: 8076 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 8077 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 8078 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 8079 vmovdqa ymm1,ymm0 8080 vmovdqa ymm5,ymm4 8081 vmovdqa ymm9,ymm8 8082 vmovdqa ymm2,ymm0 8083 vmovdqa ymm6,ymm4 8084 vmovdqa ymm10,ymm8 8085 vmovdqa ymm3,ymm0 8086 vmovdqa ymm7,ymm4 8087 vmovdqa ymm11,ymm8 8088 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 8089 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 8090 vpaddd ymm14,ymm12,ymm15 8091 vpaddd ymm13,ymm12,ymm14 8092 vpaddd ymm12,ymm12,ymm13 8093 vmovdqa YMMWORD[(160+256)+rbp],ymm15 8094 vmovdqa YMMWORD[(160+224)+rbp],ymm14 8095 vmovdqa YMMWORD[(160+192)+rbp],ymm13 8096 vmovdqa YMMWORD[(160+160)+rbp],ymm12 8097 8098$L$seal_avx2_tail_512_rounds_and_3xhash: 8099 add r10,QWORD[((0+0))+rdi] 8100 adc r11,QWORD[((8+0))+rdi] 8101 adc r12,1 8102 mov rdx,QWORD[((0+160+0))+rbp] 8103 mov r15,rdx 8104 mulx r14,r13,r10 8105 mulx rdx,rax,r11 8106 imul r15,r12 8107 add r14,rax 8108 adc r15,rdx 8109 mov rdx,QWORD[((8+160+0))+rbp] 8110 mulx rax,r10,r10 8111 add r14,r10 8112 mulx r9,r11,r11 8113 adc r15,r11 8114 adc r9,0 8115 imul rdx,r12 8116 add r15,rax 8117 adc r9,rdx 8118 mov r10,r13 8119 mov r11,r14 8120 mov r12,r15 8121 and r12,3 8122 mov r13,r15 8123 and r13,-4 8124 mov r14,r9 8125 shrd r15,r9,2 8126 shr r9,2 8127 add r15,r13 8128 adc r9,r14 8129 add r10,r15 8130 adc r11,r9 8131 adc r12,0 8132 8133 lea rdi,[16+rdi] 8134$L$seal_avx2_tail_512_rounds_and_2xhash: 8135 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8136 vmovdqa ymm8,YMMWORD[$L$rol16] 8137 vpaddd ymm3,ymm3,ymm7 8138 vpaddd ymm2,ymm2,ymm6 8139 vpaddd ymm1,ymm1,ymm5 8140 vpaddd ymm0,ymm0,ymm4 8141 vpxor ymm15,ymm15,ymm3 8142 vpxor ymm14,ymm14,ymm2 8143 vpxor ymm13,ymm13,ymm1 8144 vpxor ymm12,ymm12,ymm0 8145 vpshufb ymm15,ymm15,ymm8 8146 vpshufb ymm14,ymm14,ymm8 8147 vpshufb ymm13,ymm13,ymm8 8148 vpshufb ymm12,ymm12,ymm8 8149 vpaddd ymm11,ymm11,ymm15 8150 vpaddd ymm10,ymm10,ymm14 8151 vpaddd ymm9,ymm9,ymm13 8152 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8153 vpxor ymm7,ymm7,ymm11 8154 vpxor ymm6,ymm6,ymm10 8155 add r10,QWORD[((0+0))+rdi] 8156 adc r11,QWORD[((8+0))+rdi] 8157 adc r12,1 8158 vpxor ymm5,ymm5,ymm9 8159 vpxor ymm4,ymm4,ymm8 8160 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8161 vpsrld ymm8,ymm7,20 8162 vpslld ymm7,ymm7,32-20 8163 vpxor ymm7,ymm7,ymm8 8164 vpsrld ymm8,ymm6,20 8165 vpslld ymm6,ymm6,32-20 8166 vpxor ymm6,ymm6,ymm8 8167 vpsrld ymm8,ymm5,20 8168 vpslld ymm5,ymm5,32-20 8169 vpxor ymm5,ymm5,ymm8 8170 vpsrld ymm8,ymm4,20 8171 vpslld ymm4,ymm4,32-20 8172 vpxor ymm4,ymm4,ymm8 8173 vmovdqa ymm8,YMMWORD[$L$rol8] 8174 vpaddd ymm3,ymm3,ymm7 8175 vpaddd ymm2,ymm2,ymm6 8176 vpaddd ymm1,ymm1,ymm5 8177 vpaddd ymm0,ymm0,ymm4 8178 mov rdx,QWORD[((0+160+0))+rbp] 8179 mov r15,rdx 8180 mulx r14,r13,r10 8181 mulx rdx,rax,r11 8182 imul r15,r12 8183 add r14,rax 8184 adc r15,rdx 8185 vpxor ymm15,ymm15,ymm3 8186 vpxor ymm14,ymm14,ymm2 8187 vpxor ymm13,ymm13,ymm1 8188 vpxor ymm12,ymm12,ymm0 8189 vpshufb ymm15,ymm15,ymm8 8190 vpshufb ymm14,ymm14,ymm8 8191 vpshufb ymm13,ymm13,ymm8 8192 vpshufb ymm12,ymm12,ymm8 8193 vpaddd ymm11,ymm11,ymm15 8194 vpaddd ymm10,ymm10,ymm14 8195 vpaddd ymm9,ymm9,ymm13 8196 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8197 vpxor ymm7,ymm7,ymm11 8198 vpxor ymm6,ymm6,ymm10 8199 vpxor ymm5,ymm5,ymm9 8200 vpxor ymm4,ymm4,ymm8 8201 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8202 vpsrld ymm8,ymm7,25 8203 vpslld ymm7,ymm7,32-25 8204 vpxor ymm7,ymm7,ymm8 8205 mov rdx,QWORD[((8+160+0))+rbp] 8206 mulx rax,r10,r10 8207 add r14,r10 8208 mulx r9,r11,r11 8209 adc r15,r11 8210 adc r9,0 8211 imul rdx,r12 8212 vpsrld ymm8,ymm6,25 8213 vpslld ymm6,ymm6,32-25 8214 vpxor ymm6,ymm6,ymm8 8215 vpsrld ymm8,ymm5,25 8216 vpslld ymm5,ymm5,32-25 8217 vpxor ymm5,ymm5,ymm8 8218 vpsrld ymm8,ymm4,25 8219 vpslld ymm4,ymm4,32-25 8220 vpxor ymm4,ymm4,ymm8 8221 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 8222 vpalignr ymm7,ymm7,ymm7,4 8223 vpalignr ymm11,ymm11,ymm11,8 8224 vpalignr ymm15,ymm15,ymm15,12 8225 vpalignr ymm6,ymm6,ymm6,4 8226 vpalignr ymm10,ymm10,ymm10,8 8227 vpalignr ymm14,ymm14,ymm14,12 8228 vpalignr ymm5,ymm5,ymm5,4 8229 vpalignr ymm9,ymm9,ymm9,8 8230 vpalignr ymm13,ymm13,ymm13,12 8231 vpalignr ymm4,ymm4,ymm4,4 8232 add r15,rax 8233 adc r9,rdx 8234 vpalignr ymm8,ymm8,ymm8,8 8235 vpalignr ymm12,ymm12,ymm12,12 8236 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8237 vmovdqa ymm8,YMMWORD[$L$rol16] 8238 vpaddd ymm3,ymm3,ymm7 8239 vpaddd ymm2,ymm2,ymm6 8240 vpaddd ymm1,ymm1,ymm5 8241 vpaddd ymm0,ymm0,ymm4 8242 vpxor ymm15,ymm15,ymm3 8243 vpxor ymm14,ymm14,ymm2 8244 vpxor ymm13,ymm13,ymm1 8245 vpxor ymm12,ymm12,ymm0 8246 vpshufb ymm15,ymm15,ymm8 8247 vpshufb ymm14,ymm14,ymm8 8248 vpshufb ymm13,ymm13,ymm8 8249 vpshufb ymm12,ymm12,ymm8 8250 vpaddd ymm11,ymm11,ymm15 8251 vpaddd ymm10,ymm10,ymm14 8252 vpaddd ymm9,ymm9,ymm13 8253 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8254 mov r10,r13 8255 mov r11,r14 8256 mov r12,r15 8257 and r12,3 8258 mov r13,r15 8259 and r13,-4 8260 mov r14,r9 8261 shrd r15,r9,2 8262 shr r9,2 8263 add r15,r13 8264 adc r9,r14 8265 add r10,r15 8266 adc r11,r9 8267 adc r12,0 8268 vpxor ymm7,ymm7,ymm11 8269 vpxor ymm6,ymm6,ymm10 8270 vpxor ymm5,ymm5,ymm9 8271 vpxor ymm4,ymm4,ymm8 8272 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8273 vpsrld ymm8,ymm7,20 8274 vpslld ymm7,ymm7,32-20 8275 vpxor ymm7,ymm7,ymm8 8276 vpsrld ymm8,ymm6,20 8277 vpslld ymm6,ymm6,32-20 8278 vpxor ymm6,ymm6,ymm8 8279 vpsrld ymm8,ymm5,20 8280 vpslld ymm5,ymm5,32-20 8281 vpxor ymm5,ymm5,ymm8 8282 vpsrld ymm8,ymm4,20 8283 vpslld ymm4,ymm4,32-20 8284 vpxor ymm4,ymm4,ymm8 8285 vmovdqa ymm8,YMMWORD[$L$rol8] 8286 vpaddd ymm3,ymm3,ymm7 8287 vpaddd ymm2,ymm2,ymm6 8288 add r10,QWORD[((0+16))+rdi] 8289 adc r11,QWORD[((8+16))+rdi] 8290 adc r12,1 8291 vpaddd ymm1,ymm1,ymm5 8292 vpaddd ymm0,ymm0,ymm4 8293 vpxor ymm15,ymm15,ymm3 8294 vpxor ymm14,ymm14,ymm2 8295 vpxor ymm13,ymm13,ymm1 8296 vpxor ymm12,ymm12,ymm0 8297 vpshufb ymm15,ymm15,ymm8 8298 vpshufb ymm14,ymm14,ymm8 8299 vpshufb ymm13,ymm13,ymm8 8300 vpshufb ymm12,ymm12,ymm8 8301 vpaddd ymm11,ymm11,ymm15 8302 vpaddd ymm10,ymm10,ymm14 8303 vpaddd ymm9,ymm9,ymm13 8304 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8305 vpxor ymm7,ymm7,ymm11 8306 vpxor ymm6,ymm6,ymm10 8307 vpxor ymm5,ymm5,ymm9 8308 vpxor ymm4,ymm4,ymm8 8309 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8310 vpsrld ymm8,ymm7,25 8311 mov rdx,QWORD[((0+160+0))+rbp] 8312 mov r15,rdx 8313 mulx r14,r13,r10 8314 mulx rdx,rax,r11 8315 imul r15,r12 8316 add r14,rax 8317 adc r15,rdx 8318 vpslld ymm7,ymm7,32-25 8319 vpxor ymm7,ymm7,ymm8 8320 vpsrld ymm8,ymm6,25 8321 vpslld ymm6,ymm6,32-25 8322 vpxor ymm6,ymm6,ymm8 8323 vpsrld ymm8,ymm5,25 8324 vpslld ymm5,ymm5,32-25 8325 vpxor ymm5,ymm5,ymm8 8326 vpsrld ymm8,ymm4,25 8327 vpslld ymm4,ymm4,32-25 8328 vpxor ymm4,ymm4,ymm8 8329 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 8330 vpalignr ymm7,ymm7,ymm7,12 8331 vpalignr ymm11,ymm11,ymm11,8 8332 vpalignr ymm15,ymm15,ymm15,4 8333 vpalignr ymm6,ymm6,ymm6,12 8334 vpalignr ymm10,ymm10,ymm10,8 8335 vpalignr ymm14,ymm14,ymm14,4 8336 vpalignr ymm5,ymm5,ymm5,12 8337 vpalignr ymm9,ymm9,ymm9,8 8338 mov rdx,QWORD[((8+160+0))+rbp] 8339 mulx rax,r10,r10 8340 add r14,r10 8341 mulx r9,r11,r11 8342 adc r15,r11 8343 adc r9,0 8344 imul rdx,r12 8345 vpalignr ymm13,ymm13,ymm13,4 8346 vpalignr ymm4,ymm4,ymm4,12 8347 vpalignr ymm8,ymm8,ymm8,8 8348 vpalignr ymm12,ymm12,ymm12,4 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 add r15,rax 8366 adc r9,rdx 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 8385 8386 8387 mov r10,r13 8388 mov r11,r14 8389 mov r12,r15 8390 and r12,3 8391 mov r13,r15 8392 and r13,-4 8393 mov r14,r9 8394 shrd r15,r9,2 8395 shr r9,2 8396 add r15,r13 8397 adc r9,r14 8398 add r10,r15 8399 adc r11,r9 8400 adc r12,0 8401 8402 lea rdi,[32+rdi] 8403 dec rcx 8404 jg NEAR $L$seal_avx2_tail_512_rounds_and_3xhash 8405 dec r8 8406 jge NEAR $L$seal_avx2_tail_512_rounds_and_2xhash 8407 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 8408 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 8409 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 8410 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 8411 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 8412 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 8413 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 8414 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 8415 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 8416 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 8417 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 8418 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 8419 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 8420 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 8421 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 8422 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 8423 8424 vmovdqa YMMWORD[(160+128)+rbp],ymm0 8425 vperm2i128 ymm0,ymm7,ymm3,0x02 8426 vperm2i128 ymm7,ymm7,ymm3,0x13 8427 vperm2i128 ymm3,ymm15,ymm11,0x02 8428 vperm2i128 ymm11,ymm15,ymm11,0x13 8429 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 8430 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 8431 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 8432 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 8433 vmovdqu YMMWORD[(0+0)+rdi],ymm0 8434 vmovdqu YMMWORD[(32+0)+rdi],ymm3 8435 vmovdqu YMMWORD[(64+0)+rdi],ymm7 8436 vmovdqu YMMWORD[(96+0)+rdi],ymm11 8437 8438 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 8439 vperm2i128 ymm3,ymm6,ymm2,0x02 8440 vperm2i128 ymm6,ymm6,ymm2,0x13 8441 vperm2i128 ymm2,ymm14,ymm10,0x02 8442 vperm2i128 ymm10,ymm14,ymm10,0x13 8443 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 8444 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 8445 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 8446 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 8447 vmovdqu YMMWORD[(0+128)+rdi],ymm3 8448 vmovdqu YMMWORD[(32+128)+rdi],ymm2 8449 vmovdqu YMMWORD[(64+128)+rdi],ymm6 8450 vmovdqu YMMWORD[(96+128)+rdi],ymm10 8451 vperm2i128 ymm3,ymm5,ymm1,0x02 8452 vperm2i128 ymm5,ymm5,ymm1,0x13 8453 vperm2i128 ymm1,ymm13,ymm9,0x02 8454 vperm2i128 ymm9,ymm13,ymm9,0x13 8455 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 8456 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 8457 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 8458 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 8459 vmovdqu YMMWORD[(0+256)+rdi],ymm3 8460 vmovdqu YMMWORD[(32+256)+rdi],ymm1 8461 vmovdqu YMMWORD[(64+256)+rdi],ymm5 8462 vmovdqu YMMWORD[(96+256)+rdi],ymm9 8463 vperm2i128 ymm3,ymm4,ymm0,0x13 8464 vperm2i128 ymm0,ymm4,ymm0,0x02 8465 vperm2i128 ymm4,ymm12,ymm8,0x02 8466 vperm2i128 ymm12,ymm12,ymm8,0x13 8467 vmovdqa ymm8,ymm3 8468 8469 mov rcx,12*32 8470 lea rsi,[384+rsi] 8471 sub rbx,12*32 8472 jmp NEAR $L$seal_avx2_short_hash_remainder 8473 8474$L$seal_avx2_320: 8475 vmovdqa ymm1,ymm0 8476 vmovdqa ymm2,ymm0 8477 vmovdqa ymm5,ymm4 8478 vmovdqa ymm6,ymm4 8479 vmovdqa ymm9,ymm8 8480 vmovdqa ymm10,ymm8 8481 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 8482 vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc] 8483 vmovdqa ymm7,ymm4 8484 vmovdqa ymm11,ymm8 8485 vmovdqa YMMWORD[(160+160)+rbp],ymm12 8486 vmovdqa YMMWORD[(160+192)+rbp],ymm13 8487 vmovdqa YMMWORD[(160+224)+rbp],ymm14 8488 mov r10,10 8489$L$seal_avx2_320_rounds: 8490 vpaddd ymm0,ymm0,ymm4 8491 vpxor ymm12,ymm12,ymm0 8492 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8493 vpaddd ymm8,ymm8,ymm12 8494 vpxor ymm4,ymm4,ymm8 8495 vpsrld ymm3,ymm4,20 8496 vpslld ymm4,ymm4,12 8497 vpxor ymm4,ymm4,ymm3 8498 vpaddd ymm0,ymm0,ymm4 8499 vpxor ymm12,ymm12,ymm0 8500 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8501 vpaddd ymm8,ymm8,ymm12 8502 vpxor ymm4,ymm4,ymm8 8503 vpslld ymm3,ymm4,7 8504 vpsrld ymm4,ymm4,25 8505 vpxor ymm4,ymm4,ymm3 8506 vpalignr ymm12,ymm12,ymm12,12 8507 vpalignr ymm8,ymm8,ymm8,8 8508 vpalignr ymm4,ymm4,ymm4,4 8509 vpaddd ymm1,ymm1,ymm5 8510 vpxor ymm13,ymm13,ymm1 8511 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8512 vpaddd ymm9,ymm9,ymm13 8513 vpxor ymm5,ymm5,ymm9 8514 vpsrld ymm3,ymm5,20 8515 vpslld ymm5,ymm5,12 8516 vpxor ymm5,ymm5,ymm3 8517 vpaddd ymm1,ymm1,ymm5 8518 vpxor ymm13,ymm13,ymm1 8519 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8520 vpaddd ymm9,ymm9,ymm13 8521 vpxor ymm5,ymm5,ymm9 8522 vpslld ymm3,ymm5,7 8523 vpsrld ymm5,ymm5,25 8524 vpxor ymm5,ymm5,ymm3 8525 vpalignr ymm13,ymm13,ymm13,12 8526 vpalignr ymm9,ymm9,ymm9,8 8527 vpalignr ymm5,ymm5,ymm5,4 8528 vpaddd ymm2,ymm2,ymm6 8529 vpxor ymm14,ymm14,ymm2 8530 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 8531 vpaddd ymm10,ymm10,ymm14 8532 vpxor ymm6,ymm6,ymm10 8533 vpsrld ymm3,ymm6,20 8534 vpslld ymm6,ymm6,12 8535 vpxor ymm6,ymm6,ymm3 8536 vpaddd ymm2,ymm2,ymm6 8537 vpxor ymm14,ymm14,ymm2 8538 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 8539 vpaddd ymm10,ymm10,ymm14 8540 vpxor ymm6,ymm6,ymm10 8541 vpslld ymm3,ymm6,7 8542 vpsrld ymm6,ymm6,25 8543 vpxor ymm6,ymm6,ymm3 8544 vpalignr ymm14,ymm14,ymm14,12 8545 vpalignr ymm10,ymm10,ymm10,8 8546 vpalignr ymm6,ymm6,ymm6,4 8547 vpaddd ymm0,ymm0,ymm4 8548 vpxor ymm12,ymm12,ymm0 8549 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8550 vpaddd ymm8,ymm8,ymm12 8551 vpxor ymm4,ymm4,ymm8 8552 vpsrld ymm3,ymm4,20 8553 vpslld ymm4,ymm4,12 8554 vpxor ymm4,ymm4,ymm3 8555 vpaddd ymm0,ymm0,ymm4 8556 vpxor ymm12,ymm12,ymm0 8557 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8558 vpaddd ymm8,ymm8,ymm12 8559 vpxor ymm4,ymm4,ymm8 8560 vpslld ymm3,ymm4,7 8561 vpsrld ymm4,ymm4,25 8562 vpxor ymm4,ymm4,ymm3 8563 vpalignr ymm12,ymm12,ymm12,4 8564 vpalignr ymm8,ymm8,ymm8,8 8565 vpalignr ymm4,ymm4,ymm4,12 8566 vpaddd ymm1,ymm1,ymm5 8567 vpxor ymm13,ymm13,ymm1 8568 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8569 vpaddd ymm9,ymm9,ymm13 8570 vpxor ymm5,ymm5,ymm9 8571 vpsrld ymm3,ymm5,20 8572 vpslld ymm5,ymm5,12 8573 vpxor ymm5,ymm5,ymm3 8574 vpaddd ymm1,ymm1,ymm5 8575 vpxor ymm13,ymm13,ymm1 8576 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8577 vpaddd ymm9,ymm9,ymm13 8578 vpxor ymm5,ymm5,ymm9 8579 vpslld ymm3,ymm5,7 8580 vpsrld ymm5,ymm5,25 8581 vpxor ymm5,ymm5,ymm3 8582 vpalignr ymm13,ymm13,ymm13,4 8583 vpalignr ymm9,ymm9,ymm9,8 8584 vpalignr ymm5,ymm5,ymm5,12 8585 vpaddd ymm2,ymm2,ymm6 8586 vpxor ymm14,ymm14,ymm2 8587 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 8588 vpaddd ymm10,ymm10,ymm14 8589 vpxor ymm6,ymm6,ymm10 8590 vpsrld ymm3,ymm6,20 8591 vpslld ymm6,ymm6,12 8592 vpxor ymm6,ymm6,ymm3 8593 vpaddd ymm2,ymm2,ymm6 8594 vpxor ymm14,ymm14,ymm2 8595 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 8596 vpaddd ymm10,ymm10,ymm14 8597 vpxor ymm6,ymm6,ymm10 8598 vpslld ymm3,ymm6,7 8599 vpsrld ymm6,ymm6,25 8600 vpxor ymm6,ymm6,ymm3 8601 vpalignr ymm14,ymm14,ymm14,4 8602 vpalignr ymm10,ymm10,ymm10,8 8603 vpalignr ymm6,ymm6,ymm6,12 8604 8605 dec r10 8606 jne NEAR $L$seal_avx2_320_rounds 8607 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 8608 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 8609 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 8610 vpaddd ymm4,ymm4,ymm7 8611 vpaddd ymm5,ymm5,ymm7 8612 vpaddd ymm6,ymm6,ymm7 8613 vpaddd ymm8,ymm8,ymm11 8614 vpaddd ymm9,ymm9,ymm11 8615 vpaddd ymm10,ymm10,ymm11 8616 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 8617 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 8618 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 8619 vperm2i128 ymm3,ymm4,ymm0,0x02 8620 8621 vpand ymm3,ymm3,YMMWORD[$L$clamp] 8622 vmovdqa YMMWORD[(160+0)+rbp],ymm3 8623 8624 vperm2i128 ymm0,ymm4,ymm0,0x13 8625 vperm2i128 ymm4,ymm12,ymm8,0x13 8626 vperm2i128 ymm8,ymm5,ymm1,0x02 8627 vperm2i128 ymm12,ymm13,ymm9,0x02 8628 vperm2i128 ymm1,ymm5,ymm1,0x13 8629 vperm2i128 ymm5,ymm13,ymm9,0x13 8630 vperm2i128 ymm9,ymm6,ymm2,0x02 8631 vperm2i128 ymm13,ymm14,ymm10,0x02 8632 vperm2i128 ymm2,ymm6,ymm2,0x13 8633 vperm2i128 ymm6,ymm14,ymm10,0x13 8634 jmp NEAR $L$seal_avx2_short 8635 8636$L$seal_avx2_192: 8637 vmovdqa ymm1,ymm0 8638 vmovdqa ymm2,ymm0 8639 vmovdqa ymm5,ymm4 8640 vmovdqa ymm6,ymm4 8641 vmovdqa ymm9,ymm8 8642 vmovdqa ymm10,ymm8 8643 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 8644 vmovdqa ymm11,ymm12 8645 vmovdqa ymm15,ymm13 8646 mov r10,10 8647$L$seal_avx2_192_rounds: 8648 vpaddd ymm0,ymm0,ymm4 8649 vpxor ymm12,ymm12,ymm0 8650 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8651 vpaddd ymm8,ymm8,ymm12 8652 vpxor ymm4,ymm4,ymm8 8653 vpsrld ymm3,ymm4,20 8654 vpslld ymm4,ymm4,12 8655 vpxor ymm4,ymm4,ymm3 8656 vpaddd ymm0,ymm0,ymm4 8657 vpxor ymm12,ymm12,ymm0 8658 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8659 vpaddd ymm8,ymm8,ymm12 8660 vpxor ymm4,ymm4,ymm8 8661 vpslld ymm3,ymm4,7 8662 vpsrld ymm4,ymm4,25 8663 vpxor ymm4,ymm4,ymm3 8664 vpalignr ymm12,ymm12,ymm12,12 8665 vpalignr ymm8,ymm8,ymm8,8 8666 vpalignr ymm4,ymm4,ymm4,4 8667 vpaddd ymm1,ymm1,ymm5 8668 vpxor ymm13,ymm13,ymm1 8669 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8670 vpaddd ymm9,ymm9,ymm13 8671 vpxor ymm5,ymm5,ymm9 8672 vpsrld ymm3,ymm5,20 8673 vpslld ymm5,ymm5,12 8674 vpxor ymm5,ymm5,ymm3 8675 vpaddd ymm1,ymm1,ymm5 8676 vpxor ymm13,ymm13,ymm1 8677 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8678 vpaddd ymm9,ymm9,ymm13 8679 vpxor ymm5,ymm5,ymm9 8680 vpslld ymm3,ymm5,7 8681 vpsrld ymm5,ymm5,25 8682 vpxor ymm5,ymm5,ymm3 8683 vpalignr ymm13,ymm13,ymm13,12 8684 vpalignr ymm9,ymm9,ymm9,8 8685 vpalignr ymm5,ymm5,ymm5,4 8686 vpaddd ymm0,ymm0,ymm4 8687 vpxor ymm12,ymm12,ymm0 8688 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8689 vpaddd ymm8,ymm8,ymm12 8690 vpxor ymm4,ymm4,ymm8 8691 vpsrld ymm3,ymm4,20 8692 vpslld ymm4,ymm4,12 8693 vpxor ymm4,ymm4,ymm3 8694 vpaddd ymm0,ymm0,ymm4 8695 vpxor ymm12,ymm12,ymm0 8696 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8697 vpaddd ymm8,ymm8,ymm12 8698 vpxor ymm4,ymm4,ymm8 8699 vpslld ymm3,ymm4,7 8700 vpsrld ymm4,ymm4,25 8701 vpxor ymm4,ymm4,ymm3 8702 vpalignr ymm12,ymm12,ymm12,4 8703 vpalignr ymm8,ymm8,ymm8,8 8704 vpalignr ymm4,ymm4,ymm4,12 8705 vpaddd ymm1,ymm1,ymm5 8706 vpxor ymm13,ymm13,ymm1 8707 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8708 vpaddd ymm9,ymm9,ymm13 8709 vpxor ymm5,ymm5,ymm9 8710 vpsrld ymm3,ymm5,20 8711 vpslld ymm5,ymm5,12 8712 vpxor ymm5,ymm5,ymm3 8713 vpaddd ymm1,ymm1,ymm5 8714 vpxor ymm13,ymm13,ymm1 8715 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8716 vpaddd ymm9,ymm9,ymm13 8717 vpxor ymm5,ymm5,ymm9 8718 vpslld ymm3,ymm5,7 8719 vpsrld ymm5,ymm5,25 8720 vpxor ymm5,ymm5,ymm3 8721 vpalignr ymm13,ymm13,ymm13,4 8722 vpalignr ymm9,ymm9,ymm9,8 8723 vpalignr ymm5,ymm5,ymm5,12 8724 8725 dec r10 8726 jne NEAR $L$seal_avx2_192_rounds 8727 vpaddd ymm0,ymm0,ymm2 8728 vpaddd ymm1,ymm1,ymm2 8729 vpaddd ymm4,ymm4,ymm6 8730 vpaddd ymm5,ymm5,ymm6 8731 vpaddd ymm8,ymm8,ymm10 8732 vpaddd ymm9,ymm9,ymm10 8733 vpaddd ymm12,ymm12,ymm11 8734 vpaddd ymm13,ymm13,ymm15 8735 vperm2i128 ymm3,ymm4,ymm0,0x02 8736 8737 vpand ymm3,ymm3,YMMWORD[$L$clamp] 8738 vmovdqa YMMWORD[(160+0)+rbp],ymm3 8739 8740 vperm2i128 ymm0,ymm4,ymm0,0x13 8741 vperm2i128 ymm4,ymm12,ymm8,0x13 8742 vperm2i128 ymm8,ymm5,ymm1,0x02 8743 vperm2i128 ymm12,ymm13,ymm9,0x02 8744 vperm2i128 ymm1,ymm5,ymm1,0x13 8745 vperm2i128 ymm5,ymm13,ymm9,0x13 8746$L$seal_avx2_short: 8747 mov r8,r8 8748 call poly_hash_ad_internal 8749 xor rcx,rcx 8750$L$seal_avx2_short_hash_remainder: 8751 cmp rcx,16 8752 jb NEAR $L$seal_avx2_short_loop 8753 add r10,QWORD[((0+0))+rdi] 8754 adc r11,QWORD[((8+0))+rdi] 8755 adc r12,1 8756 mov rax,QWORD[((0+160+0))+rbp] 8757 mov r15,rax 8758 mul r10 8759 mov r13,rax 8760 mov r14,rdx 8761 mov rax,QWORD[((0+160+0))+rbp] 8762 mul r11 8763 imul r15,r12 8764 add r14,rax 8765 adc r15,rdx 8766 mov rax,QWORD[((8+160+0))+rbp] 8767 mov r9,rax 8768 mul r10 8769 add r14,rax 8770 adc rdx,0 8771 mov r10,rdx 8772 mov rax,QWORD[((8+160+0))+rbp] 8773 mul r11 8774 add r15,rax 8775 adc rdx,0 8776 imul r9,r12 8777 add r15,r10 8778 adc r9,rdx 8779 mov r10,r13 8780 mov r11,r14 8781 mov r12,r15 8782 and r12,3 8783 mov r13,r15 8784 and r13,-4 8785 mov r14,r9 8786 shrd r15,r9,2 8787 shr r9,2 8788 add r15,r13 8789 adc r9,r14 8790 add r10,r15 8791 adc r11,r9 8792 adc r12,0 8793 8794 sub rcx,16 8795 add rdi,16 8796 jmp NEAR $L$seal_avx2_short_hash_remainder 8797$L$seal_avx2_short_loop: 8798 cmp rbx,32 8799 jb NEAR $L$seal_avx2_short_tail 8800 sub rbx,32 8801 8802 vpxor ymm0,ymm0,YMMWORD[rsi] 8803 vmovdqu YMMWORD[rdi],ymm0 8804 lea rsi,[32+rsi] 8805 8806 add r10,QWORD[((0+0))+rdi] 8807 adc r11,QWORD[((8+0))+rdi] 8808 adc r12,1 8809 mov rax,QWORD[((0+160+0))+rbp] 8810 mov r15,rax 8811 mul r10 8812 mov r13,rax 8813 mov r14,rdx 8814 mov rax,QWORD[((0+160+0))+rbp] 8815 mul r11 8816 imul r15,r12 8817 add r14,rax 8818 adc r15,rdx 8819 mov rax,QWORD[((8+160+0))+rbp] 8820 mov r9,rax 8821 mul r10 8822 add r14,rax 8823 adc rdx,0 8824 mov r10,rdx 8825 mov rax,QWORD[((8+160+0))+rbp] 8826 mul r11 8827 add r15,rax 8828 adc rdx,0 8829 imul r9,r12 8830 add r15,r10 8831 adc r9,rdx 8832 mov r10,r13 8833 mov r11,r14 8834 mov r12,r15 8835 and r12,3 8836 mov r13,r15 8837 and r13,-4 8838 mov r14,r9 8839 shrd r15,r9,2 8840 shr r9,2 8841 add r15,r13 8842 adc r9,r14 8843 add r10,r15 8844 adc r11,r9 8845 adc r12,0 8846 add r10,QWORD[((0+16))+rdi] 8847 adc r11,QWORD[((8+16))+rdi] 8848 adc r12,1 8849 mov rax,QWORD[((0+160+0))+rbp] 8850 mov r15,rax 8851 mul r10 8852 mov r13,rax 8853 mov r14,rdx 8854 mov rax,QWORD[((0+160+0))+rbp] 8855 mul r11 8856 imul r15,r12 8857 add r14,rax 8858 adc r15,rdx 8859 mov rax,QWORD[((8+160+0))+rbp] 8860 mov r9,rax 8861 mul r10 8862 add r14,rax 8863 adc rdx,0 8864 mov r10,rdx 8865 mov rax,QWORD[((8+160+0))+rbp] 8866 mul r11 8867 add r15,rax 8868 adc rdx,0 8869 imul r9,r12 8870 add r15,r10 8871 adc r9,rdx 8872 mov r10,r13 8873 mov r11,r14 8874 mov r12,r15 8875 and r12,3 8876 mov r13,r15 8877 and r13,-4 8878 mov r14,r9 8879 shrd r15,r9,2 8880 shr r9,2 8881 add r15,r13 8882 adc r9,r14 8883 add r10,r15 8884 adc r11,r9 8885 adc r12,0 8886 8887 lea rdi,[32+rdi] 8888 8889 vmovdqa ymm0,ymm4 8890 vmovdqa ymm4,ymm8 8891 vmovdqa ymm8,ymm12 8892 vmovdqa ymm12,ymm1 8893 vmovdqa ymm1,ymm5 8894 vmovdqa ymm5,ymm9 8895 vmovdqa ymm9,ymm13 8896 vmovdqa ymm13,ymm2 8897 vmovdqa ymm2,ymm6 8898 jmp NEAR $L$seal_avx2_short_loop 8899$L$seal_avx2_short_tail: 8900 cmp rbx,16 8901 jb NEAR $L$seal_avx2_exit 8902 sub rbx,16 8903 vpxor xmm3,xmm0,XMMWORD[rsi] 8904 vmovdqu XMMWORD[rdi],xmm3 8905 lea rsi,[16+rsi] 8906 add r10,QWORD[((0+0))+rdi] 8907 adc r11,QWORD[((8+0))+rdi] 8908 adc r12,1 8909 mov rax,QWORD[((0+160+0))+rbp] 8910 mov r15,rax 8911 mul r10 8912 mov r13,rax 8913 mov r14,rdx 8914 mov rax,QWORD[((0+160+0))+rbp] 8915 mul r11 8916 imul r15,r12 8917 add r14,rax 8918 adc r15,rdx 8919 mov rax,QWORD[((8+160+0))+rbp] 8920 mov r9,rax 8921 mul r10 8922 add r14,rax 8923 adc rdx,0 8924 mov r10,rdx 8925 mov rax,QWORD[((8+160+0))+rbp] 8926 mul r11 8927 add r15,rax 8928 adc rdx,0 8929 imul r9,r12 8930 add r15,r10 8931 adc r9,rdx 8932 mov r10,r13 8933 mov r11,r14 8934 mov r12,r15 8935 and r12,3 8936 mov r13,r15 8937 and r13,-4 8938 mov r14,r9 8939 shrd r15,r9,2 8940 shr r9,2 8941 add r15,r13 8942 adc r9,r14 8943 add r10,r15 8944 adc r11,r9 8945 adc r12,0 8946 8947 lea rdi,[16+rdi] 8948 vextracti128 xmm0,ymm0,1 8949$L$seal_avx2_exit: 8950 vzeroupper 8951 jmp NEAR $L$seal_sse_tail_16 8952 8953 8954%else 8955; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 8956ret 8957%endif 8958