1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4default rel 5%define XMMWORD 6%define YMMWORD 7%define ZMMWORD 8 9%include "ring_core_generated/prefix_symbols_nasm.inc" 10section .text code align=64 11 12EXTERN OPENSSL_ia32cap_P 13 14chacha20_poly1305_constants: 15 16ALIGN 64 17$L$chacha20_consts: 18DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 19DB 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 20$L$rol8: 21DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 22DB 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 23$L$rol16: 24DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 25DB 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 26$L$avx2_init: 27 DD 0,0,0,0 28$L$sse_inc: 29 DD 1,0,0,0 30$L$avx2_inc: 31 DD 2,0,0,0,2,0,0,0 32$L$clamp: 33 DQ 0x0FFFFFFC0FFFFFFF,0x0FFFFFFC0FFFFFFC 34 DQ 0xFFFFFFFFFFFFFFFF,0xFFFFFFFFFFFFFFFF 35ALIGN 16 36$L$and_masks: 37DB 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 38DB 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 39DB 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 40DB 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 41DB 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 42DB 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 43DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 44DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 45DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 46DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 47DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 48DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 49DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 50DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 51DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 52DB 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff 53 54 55ALIGN 64 56poly_hash_ad_internal: 57 58 59 xor r10,r10 60 xor r11,r11 61 xor r12,r12 62 cmp r8,13 63 jne NEAR $L$hash_ad_loop 64$L$poly_fast_tls_ad: 65 66 mov r10,QWORD[rcx] 67 mov r11,QWORD[5+rcx] 68 shr r11,24 69 mov r12,1 70 mov rax,QWORD[((0+160+0))+rbp] 71 mov r15,rax 72 mul r10 73 mov r13,rax 74 mov r14,rdx 75 mov rax,QWORD[((0+160+0))+rbp] 76 mul r11 77 imul r15,r12 78 add r14,rax 79 adc r15,rdx 80 mov rax,QWORD[((8+160+0))+rbp] 81 mov r9,rax 82 mul r10 83 add r14,rax 84 adc rdx,0 85 mov r10,rdx 86 mov rax,QWORD[((8+160+0))+rbp] 87 mul r11 88 add r15,rax 89 adc rdx,0 90 imul r9,r12 91 add r15,r10 92 adc r9,rdx 93 mov r10,r13 94 mov r11,r14 95 mov r12,r15 96 and r12,3 97 mov r13,r15 98 and r13,-4 99 mov r14,r9 100 shrd r15,r9,2 101 shr r9,2 102 add r15,r13 103 adc r9,r14 104 add r10,r15 105 adc r11,r9 106 adc r12,0 107 108 DB 0F3h,0C3h ;repret 109$L$hash_ad_loop: 110 111 cmp r8,16 112 jb NEAR $L$hash_ad_tail 113 add r10,QWORD[((0+0))+rcx] 114 adc r11,QWORD[((8+0))+rcx] 115 adc r12,1 116 mov rax,QWORD[((0+160+0))+rbp] 117 mov r15,rax 118 mul r10 119 mov r13,rax 120 mov r14,rdx 121 mov rax,QWORD[((0+160+0))+rbp] 122 mul r11 123 imul r15,r12 124 add r14,rax 125 adc r15,rdx 126 mov rax,QWORD[((8+160+0))+rbp] 127 mov r9,rax 128 mul r10 129 add r14,rax 130 adc rdx,0 131 mov r10,rdx 132 mov rax,QWORD[((8+160+0))+rbp] 133 mul r11 134 add r15,rax 135 adc rdx,0 136 imul r9,r12 137 add r15,r10 138 adc r9,rdx 139 mov r10,r13 140 mov r11,r14 141 mov r12,r15 142 and r12,3 143 mov r13,r15 144 and r13,-4 145 mov r14,r9 146 shrd r15,r9,2 147 shr r9,2 148 add r15,r13 149 adc r9,r14 150 add r10,r15 151 adc r11,r9 152 adc r12,0 153 154 lea rcx,[16+rcx] 155 sub r8,16 156 jmp NEAR $L$hash_ad_loop 157$L$hash_ad_tail: 158 cmp r8,0 159 je NEAR $L$hash_ad_done 160 161 xor r13,r13 162 xor r14,r14 163 xor r15,r15 164 add rcx,r8 165$L$hash_ad_tail_loop: 166 shld r14,r13,8 167 shl r13,8 168 movzx r15,BYTE[((-1))+rcx] 169 xor r13,r15 170 dec rcx 171 dec r8 172 jne NEAR $L$hash_ad_tail_loop 173 174 add r10,r13 175 adc r11,r14 176 adc r12,1 177 mov rax,QWORD[((0+160+0))+rbp] 178 mov r15,rax 179 mul r10 180 mov r13,rax 181 mov r14,rdx 182 mov rax,QWORD[((0+160+0))+rbp] 183 mul r11 184 imul r15,r12 185 add r14,rax 186 adc r15,rdx 187 mov rax,QWORD[((8+160+0))+rbp] 188 mov r9,rax 189 mul r10 190 add r14,rax 191 adc rdx,0 192 mov r10,rdx 193 mov rax,QWORD[((8+160+0))+rbp] 194 mul r11 195 add r15,rax 196 adc rdx,0 197 imul r9,r12 198 add r15,r10 199 adc r9,rdx 200 mov r10,r13 201 mov r11,r14 202 mov r12,r15 203 and r12,3 204 mov r13,r15 205 and r13,-4 206 mov r14,r9 207 shrd r15,r9,2 208 shr r9,2 209 add r15,r13 210 adc r9,r14 211 add r10,r15 212 adc r11,r9 213 adc r12,0 214 215 216$L$hash_ad_done: 217 DB 0F3h,0C3h ;repret 218 219 220 221global chacha20_poly1305_open 222 223ALIGN 64 224chacha20_poly1305_open: 225 mov QWORD[8+rsp],rdi ;WIN64 prologue 226 mov QWORD[16+rsp],rsi 227 mov rax,rsp 228$L$SEH_begin_chacha20_poly1305_open: 229 mov rdi,rcx 230 mov rsi,rdx 231 mov rdx,r8 232 mov rcx,r9 233 mov r8,QWORD[40+rsp] 234 mov r9,QWORD[48+rsp] 235 236 237 238 push rbp 239 240 push rbx 241 242 push r12 243 244 push r13 245 246 push r14 247 248 push r15 249 250 251 252 push r9 253 254 sub rsp,288 + 160 + 32 255 256 257 lea rbp,[32+rsp] 258 and rbp,-32 259 260 movaps XMMWORD[(0+0)+rbp],xmm6 261 movaps XMMWORD[(16+0)+rbp],xmm7 262 movaps XMMWORD[(32+0)+rbp],xmm8 263 movaps XMMWORD[(48+0)+rbp],xmm9 264 movaps XMMWORD[(64+0)+rbp],xmm10 265 movaps XMMWORD[(80+0)+rbp],xmm11 266 movaps XMMWORD[(96+0)+rbp],xmm12 267 movaps XMMWORD[(112+0)+rbp],xmm13 268 movaps XMMWORD[(128+0)+rbp],xmm14 269 movaps XMMWORD[(144+0)+rbp],xmm15 270 271 mov rbx,rdx 272 mov QWORD[((0+160+32))+rbp],r8 273 mov QWORD[((8+160+32))+rbp],rbx 274 275 mov eax,DWORD[((OPENSSL_ia32cap_P+8))] 276 and eax,288 277 xor eax,288 278 jz NEAR chacha20_poly1305_open_avx2 279 280 cmp rbx,128 281 jbe NEAR $L$open_sse_128 282 283 movdqa xmm0,XMMWORD[$L$chacha20_consts] 284 movdqu xmm4,XMMWORD[r9] 285 movdqu xmm8,XMMWORD[16+r9] 286 movdqu xmm12,XMMWORD[32+r9] 287 288 movdqa xmm7,xmm12 289 290 movdqa XMMWORD[(160+48)+rbp],xmm4 291 movdqa XMMWORD[(160+64)+rbp],xmm8 292 movdqa XMMWORD[(160+96)+rbp],xmm12 293 mov r10,10 294$L$open_sse_init_rounds: 295 paddd xmm0,xmm4 296 pxor xmm12,xmm0 297 pshufb xmm12,XMMWORD[$L$rol16] 298 paddd xmm8,xmm12 299 pxor xmm4,xmm8 300 movdqa xmm3,xmm4 301 pslld xmm3,12 302 psrld xmm4,20 303 pxor xmm4,xmm3 304 paddd xmm0,xmm4 305 pxor xmm12,xmm0 306 pshufb xmm12,XMMWORD[$L$rol8] 307 paddd xmm8,xmm12 308 pxor xmm4,xmm8 309 movdqa xmm3,xmm4 310 pslld xmm3,7 311 psrld xmm4,25 312 pxor xmm4,xmm3 313DB 102,15,58,15,228,4 314DB 102,69,15,58,15,192,8 315DB 102,69,15,58,15,228,12 316 paddd xmm0,xmm4 317 pxor xmm12,xmm0 318 pshufb xmm12,XMMWORD[$L$rol16] 319 paddd xmm8,xmm12 320 pxor xmm4,xmm8 321 movdqa xmm3,xmm4 322 pslld xmm3,12 323 psrld xmm4,20 324 pxor xmm4,xmm3 325 paddd xmm0,xmm4 326 pxor xmm12,xmm0 327 pshufb xmm12,XMMWORD[$L$rol8] 328 paddd xmm8,xmm12 329 pxor xmm4,xmm8 330 movdqa xmm3,xmm4 331 pslld xmm3,7 332 psrld xmm4,25 333 pxor xmm4,xmm3 334DB 102,15,58,15,228,12 335DB 102,69,15,58,15,192,8 336DB 102,69,15,58,15,228,4 337 338 dec r10 339 jne NEAR $L$open_sse_init_rounds 340 341 paddd xmm0,XMMWORD[$L$chacha20_consts] 342 paddd xmm4,XMMWORD[((160+48))+rbp] 343 344 pand xmm0,XMMWORD[$L$clamp] 345 movdqa XMMWORD[(160+0)+rbp],xmm0 346 movdqa XMMWORD[(160+16)+rbp],xmm4 347 348 mov r8,r8 349 call poly_hash_ad_internal 350$L$open_sse_main_loop: 351 cmp rbx,16*16 352 jb NEAR $L$open_sse_tail 353 354 movdqa xmm0,XMMWORD[$L$chacha20_consts] 355 movdqa xmm4,XMMWORD[((160+48))+rbp] 356 movdqa xmm8,XMMWORD[((160+64))+rbp] 357 movdqa xmm1,xmm0 358 movdqa xmm5,xmm4 359 movdqa xmm9,xmm8 360 movdqa xmm2,xmm0 361 movdqa xmm6,xmm4 362 movdqa xmm10,xmm8 363 movdqa xmm3,xmm0 364 movdqa xmm7,xmm4 365 movdqa xmm11,xmm8 366 movdqa xmm15,XMMWORD[((160+96))+rbp] 367 paddd xmm15,XMMWORD[$L$sse_inc] 368 movdqa xmm14,xmm15 369 paddd xmm14,XMMWORD[$L$sse_inc] 370 movdqa xmm13,xmm14 371 paddd xmm13,XMMWORD[$L$sse_inc] 372 movdqa xmm12,xmm13 373 paddd xmm12,XMMWORD[$L$sse_inc] 374 movdqa XMMWORD[(160+96)+rbp],xmm12 375 movdqa XMMWORD[(160+112)+rbp],xmm13 376 movdqa XMMWORD[(160+128)+rbp],xmm14 377 movdqa XMMWORD[(160+144)+rbp],xmm15 378 379 380 381 mov rcx,4 382 mov r8,rsi 383$L$open_sse_main_loop_rounds: 384 movdqa XMMWORD[(160+80)+rbp],xmm8 385 movdqa xmm8,XMMWORD[$L$rol16] 386 paddd xmm3,xmm7 387 paddd xmm2,xmm6 388 paddd xmm1,xmm5 389 paddd xmm0,xmm4 390 pxor xmm15,xmm3 391 pxor xmm14,xmm2 392 pxor xmm13,xmm1 393 pxor xmm12,xmm0 394DB 102,69,15,56,0,248 395DB 102,69,15,56,0,240 396DB 102,69,15,56,0,232 397DB 102,69,15,56,0,224 398 movdqa xmm8,XMMWORD[((160+80))+rbp] 399 paddd xmm11,xmm15 400 paddd xmm10,xmm14 401 paddd xmm9,xmm13 402 paddd xmm8,xmm12 403 pxor xmm7,xmm11 404 add r10,QWORD[((0+0))+r8] 405 adc r11,QWORD[((8+0))+r8] 406 adc r12,1 407 408 lea r8,[16+r8] 409 pxor xmm6,xmm10 410 pxor xmm5,xmm9 411 pxor xmm4,xmm8 412 movdqa XMMWORD[(160+80)+rbp],xmm8 413 movdqa xmm8,xmm7 414 psrld xmm8,20 415 pslld xmm7,32-20 416 pxor xmm7,xmm8 417 movdqa xmm8,xmm6 418 psrld xmm8,20 419 pslld xmm6,32-20 420 pxor xmm6,xmm8 421 movdqa xmm8,xmm5 422 psrld xmm8,20 423 pslld xmm5,32-20 424 pxor xmm5,xmm8 425 movdqa xmm8,xmm4 426 psrld xmm8,20 427 pslld xmm4,32-20 428 pxor xmm4,xmm8 429 mov rax,QWORD[((0+160+0))+rbp] 430 mov r15,rax 431 mul r10 432 mov r13,rax 433 mov r14,rdx 434 mov rax,QWORD[((0+160+0))+rbp] 435 mul r11 436 imul r15,r12 437 add r14,rax 438 adc r15,rdx 439 movdqa xmm8,XMMWORD[$L$rol8] 440 paddd xmm3,xmm7 441 paddd xmm2,xmm6 442 paddd xmm1,xmm5 443 paddd xmm0,xmm4 444 pxor xmm15,xmm3 445 pxor xmm14,xmm2 446 pxor xmm13,xmm1 447 pxor xmm12,xmm0 448DB 102,69,15,56,0,248 449DB 102,69,15,56,0,240 450DB 102,69,15,56,0,232 451DB 102,69,15,56,0,224 452 movdqa xmm8,XMMWORD[((160+80))+rbp] 453 paddd xmm11,xmm15 454 paddd xmm10,xmm14 455 paddd xmm9,xmm13 456 paddd xmm8,xmm12 457 pxor xmm7,xmm11 458 pxor xmm6,xmm10 459 mov rax,QWORD[((8+160+0))+rbp] 460 mov r9,rax 461 mul r10 462 add r14,rax 463 adc rdx,0 464 mov r10,rdx 465 mov rax,QWORD[((8+160+0))+rbp] 466 mul r11 467 add r15,rax 468 adc rdx,0 469 pxor xmm5,xmm9 470 pxor xmm4,xmm8 471 movdqa XMMWORD[(160+80)+rbp],xmm8 472 movdqa xmm8,xmm7 473 psrld xmm8,25 474 pslld xmm7,32-25 475 pxor xmm7,xmm8 476 movdqa xmm8,xmm6 477 psrld xmm8,25 478 pslld xmm6,32-25 479 pxor xmm6,xmm8 480 movdqa xmm8,xmm5 481 psrld xmm8,25 482 pslld xmm5,32-25 483 pxor xmm5,xmm8 484 movdqa xmm8,xmm4 485 psrld xmm8,25 486 pslld xmm4,32-25 487 pxor xmm4,xmm8 488 movdqa xmm8,XMMWORD[((160+80))+rbp] 489 imul r9,r12 490 add r15,r10 491 adc r9,rdx 492DB 102,15,58,15,255,4 493DB 102,69,15,58,15,219,8 494DB 102,69,15,58,15,255,12 495DB 102,15,58,15,246,4 496DB 102,69,15,58,15,210,8 497DB 102,69,15,58,15,246,12 498DB 102,15,58,15,237,4 499DB 102,69,15,58,15,201,8 500DB 102,69,15,58,15,237,12 501DB 102,15,58,15,228,4 502DB 102,69,15,58,15,192,8 503DB 102,69,15,58,15,228,12 504 movdqa XMMWORD[(160+80)+rbp],xmm8 505 movdqa xmm8,XMMWORD[$L$rol16] 506 paddd xmm3,xmm7 507 paddd xmm2,xmm6 508 paddd xmm1,xmm5 509 paddd xmm0,xmm4 510 pxor xmm15,xmm3 511 pxor xmm14,xmm2 512 mov r10,r13 513 mov r11,r14 514 mov r12,r15 515 and r12,3 516 mov r13,r15 517 and r13,-4 518 mov r14,r9 519 shrd r15,r9,2 520 shr r9,2 521 add r15,r13 522 adc r9,r14 523 add r10,r15 524 adc r11,r9 525 adc r12,0 526 pxor xmm13,xmm1 527 pxor xmm12,xmm0 528DB 102,69,15,56,0,248 529DB 102,69,15,56,0,240 530DB 102,69,15,56,0,232 531DB 102,69,15,56,0,224 532 movdqa xmm8,XMMWORD[((160+80))+rbp] 533 paddd xmm11,xmm15 534 paddd xmm10,xmm14 535 paddd xmm9,xmm13 536 paddd xmm8,xmm12 537 pxor xmm7,xmm11 538 pxor xmm6,xmm10 539 pxor xmm5,xmm9 540 pxor xmm4,xmm8 541 movdqa XMMWORD[(160+80)+rbp],xmm8 542 movdqa xmm8,xmm7 543 psrld xmm8,20 544 pslld xmm7,32-20 545 pxor xmm7,xmm8 546 movdqa xmm8,xmm6 547 psrld xmm8,20 548 pslld xmm6,32-20 549 pxor xmm6,xmm8 550 movdqa xmm8,xmm5 551 psrld xmm8,20 552 pslld xmm5,32-20 553 pxor xmm5,xmm8 554 movdqa xmm8,xmm4 555 psrld xmm8,20 556 pslld xmm4,32-20 557 pxor xmm4,xmm8 558 movdqa xmm8,XMMWORD[$L$rol8] 559 paddd xmm3,xmm7 560 paddd xmm2,xmm6 561 paddd xmm1,xmm5 562 paddd xmm0,xmm4 563 pxor xmm15,xmm3 564 pxor xmm14,xmm2 565 pxor xmm13,xmm1 566 pxor xmm12,xmm0 567DB 102,69,15,56,0,248 568DB 102,69,15,56,0,240 569DB 102,69,15,56,0,232 570DB 102,69,15,56,0,224 571 movdqa xmm8,XMMWORD[((160+80))+rbp] 572 paddd xmm11,xmm15 573 paddd xmm10,xmm14 574 paddd xmm9,xmm13 575 paddd xmm8,xmm12 576 pxor xmm7,xmm11 577 pxor xmm6,xmm10 578 pxor xmm5,xmm9 579 pxor xmm4,xmm8 580 movdqa XMMWORD[(160+80)+rbp],xmm8 581 movdqa xmm8,xmm7 582 psrld xmm8,25 583 pslld xmm7,32-25 584 pxor xmm7,xmm8 585 movdqa xmm8,xmm6 586 psrld xmm8,25 587 pslld xmm6,32-25 588 pxor xmm6,xmm8 589 movdqa xmm8,xmm5 590 psrld xmm8,25 591 pslld xmm5,32-25 592 pxor xmm5,xmm8 593 movdqa xmm8,xmm4 594 psrld xmm8,25 595 pslld xmm4,32-25 596 pxor xmm4,xmm8 597 movdqa xmm8,XMMWORD[((160+80))+rbp] 598DB 102,15,58,15,255,12 599DB 102,69,15,58,15,219,8 600DB 102,69,15,58,15,255,4 601DB 102,15,58,15,246,12 602DB 102,69,15,58,15,210,8 603DB 102,69,15,58,15,246,4 604DB 102,15,58,15,237,12 605DB 102,69,15,58,15,201,8 606DB 102,69,15,58,15,237,4 607DB 102,15,58,15,228,12 608DB 102,69,15,58,15,192,8 609DB 102,69,15,58,15,228,4 610 611 dec rcx 612 jge NEAR $L$open_sse_main_loop_rounds 613 add r10,QWORD[((0+0))+r8] 614 adc r11,QWORD[((8+0))+r8] 615 adc r12,1 616 mov rax,QWORD[((0+160+0))+rbp] 617 mov r15,rax 618 mul r10 619 mov r13,rax 620 mov r14,rdx 621 mov rax,QWORD[((0+160+0))+rbp] 622 mul r11 623 imul r15,r12 624 add r14,rax 625 adc r15,rdx 626 mov rax,QWORD[((8+160+0))+rbp] 627 mov r9,rax 628 mul r10 629 add r14,rax 630 adc rdx,0 631 mov r10,rdx 632 mov rax,QWORD[((8+160+0))+rbp] 633 mul r11 634 add r15,rax 635 adc rdx,0 636 imul r9,r12 637 add r15,r10 638 adc r9,rdx 639 mov r10,r13 640 mov r11,r14 641 mov r12,r15 642 and r12,3 643 mov r13,r15 644 and r13,-4 645 mov r14,r9 646 shrd r15,r9,2 647 shr r9,2 648 add r15,r13 649 adc r9,r14 650 add r10,r15 651 adc r11,r9 652 adc r12,0 653 654 lea r8,[16+r8] 655 cmp rcx,-6 656 jg NEAR $L$open_sse_main_loop_rounds 657 paddd xmm3,XMMWORD[$L$chacha20_consts] 658 paddd xmm7,XMMWORD[((160+48))+rbp] 659 paddd xmm11,XMMWORD[((160+64))+rbp] 660 paddd xmm15,XMMWORD[((160+144))+rbp] 661 paddd xmm2,XMMWORD[$L$chacha20_consts] 662 paddd xmm6,XMMWORD[((160+48))+rbp] 663 paddd xmm10,XMMWORD[((160+64))+rbp] 664 paddd xmm14,XMMWORD[((160+128))+rbp] 665 paddd xmm1,XMMWORD[$L$chacha20_consts] 666 paddd xmm5,XMMWORD[((160+48))+rbp] 667 paddd xmm9,XMMWORD[((160+64))+rbp] 668 paddd xmm13,XMMWORD[((160+112))+rbp] 669 paddd xmm0,XMMWORD[$L$chacha20_consts] 670 paddd xmm4,XMMWORD[((160+48))+rbp] 671 paddd xmm8,XMMWORD[((160+64))+rbp] 672 paddd xmm12,XMMWORD[((160+96))+rbp] 673 movdqa XMMWORD[(160+80)+rbp],xmm12 674 movdqu xmm12,XMMWORD[((0 + 0))+rsi] 675 pxor xmm12,xmm3 676 movdqu XMMWORD[(0 + 0)+rdi],xmm12 677 movdqu xmm12,XMMWORD[((16 + 0))+rsi] 678 pxor xmm12,xmm7 679 movdqu XMMWORD[(16 + 0)+rdi],xmm12 680 movdqu xmm12,XMMWORD[((32 + 0))+rsi] 681 pxor xmm12,xmm11 682 movdqu XMMWORD[(32 + 0)+rdi],xmm12 683 movdqu xmm12,XMMWORD[((48 + 0))+rsi] 684 pxor xmm12,xmm15 685 movdqu XMMWORD[(48 + 0)+rdi],xmm12 686 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 687 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 688 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 689 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 690 pxor xmm2,xmm3 691 pxor xmm6,xmm7 692 pxor xmm10,xmm11 693 pxor xmm15,xmm14 694 movdqu XMMWORD[(0 + 64)+rdi],xmm2 695 movdqu XMMWORD[(16 + 64)+rdi],xmm6 696 movdqu XMMWORD[(32 + 64)+rdi],xmm10 697 movdqu XMMWORD[(48 + 64)+rdi],xmm15 698 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 699 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 700 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 701 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 702 pxor xmm1,xmm3 703 pxor xmm5,xmm7 704 pxor xmm9,xmm11 705 pxor xmm15,xmm13 706 movdqu XMMWORD[(0 + 128)+rdi],xmm1 707 movdqu XMMWORD[(16 + 128)+rdi],xmm5 708 movdqu XMMWORD[(32 + 128)+rdi],xmm9 709 movdqu XMMWORD[(48 + 128)+rdi],xmm15 710 movdqu xmm3,XMMWORD[((0 + 192))+rsi] 711 movdqu xmm7,XMMWORD[((16 + 192))+rsi] 712 movdqu xmm11,XMMWORD[((32 + 192))+rsi] 713 movdqu xmm15,XMMWORD[((48 + 192))+rsi] 714 pxor xmm0,xmm3 715 pxor xmm4,xmm7 716 pxor xmm8,xmm11 717 pxor xmm15,XMMWORD[((160+80))+rbp] 718 movdqu XMMWORD[(0 + 192)+rdi],xmm0 719 movdqu XMMWORD[(16 + 192)+rdi],xmm4 720 movdqu XMMWORD[(32 + 192)+rdi],xmm8 721 movdqu XMMWORD[(48 + 192)+rdi],xmm15 722 723 lea rsi,[256+rsi] 724 lea rdi,[256+rdi] 725 sub rbx,16*16 726 jmp NEAR $L$open_sse_main_loop 727$L$open_sse_tail: 728 729 test rbx,rbx 730 jz NEAR $L$open_sse_finalize 731 cmp rbx,12*16 732 ja NEAR $L$open_sse_tail_256 733 cmp rbx,8*16 734 ja NEAR $L$open_sse_tail_192 735 cmp rbx,4*16 736 ja NEAR $L$open_sse_tail_128 737 movdqa xmm0,XMMWORD[$L$chacha20_consts] 738 movdqa xmm4,XMMWORD[((160+48))+rbp] 739 movdqa xmm8,XMMWORD[((160+64))+rbp] 740 movdqa xmm12,XMMWORD[((160+96))+rbp] 741 paddd xmm12,XMMWORD[$L$sse_inc] 742 movdqa XMMWORD[(160+96)+rbp],xmm12 743 744 xor r8,r8 745 mov rcx,rbx 746 cmp rcx,16 747 jb NEAR $L$open_sse_tail_64_rounds 748$L$open_sse_tail_64_rounds_and_x1hash: 749 add r10,QWORD[((0+0))+r8*1+rsi] 750 adc r11,QWORD[((8+0))+r8*1+rsi] 751 adc r12,1 752 mov rax,QWORD[((0+160+0))+rbp] 753 mov r15,rax 754 mul r10 755 mov r13,rax 756 mov r14,rdx 757 mov rax,QWORD[((0+160+0))+rbp] 758 mul r11 759 imul r15,r12 760 add r14,rax 761 adc r15,rdx 762 mov rax,QWORD[((8+160+0))+rbp] 763 mov r9,rax 764 mul r10 765 add r14,rax 766 adc rdx,0 767 mov r10,rdx 768 mov rax,QWORD[((8+160+0))+rbp] 769 mul r11 770 add r15,rax 771 adc rdx,0 772 imul r9,r12 773 add r15,r10 774 adc r9,rdx 775 mov r10,r13 776 mov r11,r14 777 mov r12,r15 778 and r12,3 779 mov r13,r15 780 and r13,-4 781 mov r14,r9 782 shrd r15,r9,2 783 shr r9,2 784 add r15,r13 785 adc r9,r14 786 add r10,r15 787 adc r11,r9 788 adc r12,0 789 790 sub rcx,16 791$L$open_sse_tail_64_rounds: 792 add r8,16 793 paddd xmm0,xmm4 794 pxor xmm12,xmm0 795 pshufb xmm12,XMMWORD[$L$rol16] 796 paddd xmm8,xmm12 797 pxor xmm4,xmm8 798 movdqa xmm3,xmm4 799 pslld xmm3,12 800 psrld xmm4,20 801 pxor xmm4,xmm3 802 paddd xmm0,xmm4 803 pxor xmm12,xmm0 804 pshufb xmm12,XMMWORD[$L$rol8] 805 paddd xmm8,xmm12 806 pxor xmm4,xmm8 807 movdqa xmm3,xmm4 808 pslld xmm3,7 809 psrld xmm4,25 810 pxor xmm4,xmm3 811DB 102,15,58,15,228,4 812DB 102,69,15,58,15,192,8 813DB 102,69,15,58,15,228,12 814 paddd xmm0,xmm4 815 pxor xmm12,xmm0 816 pshufb xmm12,XMMWORD[$L$rol16] 817 paddd xmm8,xmm12 818 pxor xmm4,xmm8 819 movdqa xmm3,xmm4 820 pslld xmm3,12 821 psrld xmm4,20 822 pxor xmm4,xmm3 823 paddd xmm0,xmm4 824 pxor xmm12,xmm0 825 pshufb xmm12,XMMWORD[$L$rol8] 826 paddd xmm8,xmm12 827 pxor xmm4,xmm8 828 movdqa xmm3,xmm4 829 pslld xmm3,7 830 psrld xmm4,25 831 pxor xmm4,xmm3 832DB 102,15,58,15,228,12 833DB 102,69,15,58,15,192,8 834DB 102,69,15,58,15,228,4 835 836 cmp rcx,16 837 jae NEAR $L$open_sse_tail_64_rounds_and_x1hash 838 cmp r8,10*16 839 jne NEAR $L$open_sse_tail_64_rounds 840 paddd xmm0,XMMWORD[$L$chacha20_consts] 841 paddd xmm4,XMMWORD[((160+48))+rbp] 842 paddd xmm8,XMMWORD[((160+64))+rbp] 843 paddd xmm12,XMMWORD[((160+96))+rbp] 844 845 jmp NEAR $L$open_sse_tail_64_dec_loop 846 847$L$open_sse_tail_128: 848 movdqa xmm0,XMMWORD[$L$chacha20_consts] 849 movdqa xmm4,XMMWORD[((160+48))+rbp] 850 movdqa xmm8,XMMWORD[((160+64))+rbp] 851 movdqa xmm1,xmm0 852 movdqa xmm5,xmm4 853 movdqa xmm9,xmm8 854 movdqa xmm13,XMMWORD[((160+96))+rbp] 855 paddd xmm13,XMMWORD[$L$sse_inc] 856 movdqa xmm12,xmm13 857 paddd xmm12,XMMWORD[$L$sse_inc] 858 movdqa XMMWORD[(160+96)+rbp],xmm12 859 movdqa XMMWORD[(160+112)+rbp],xmm13 860 861 mov rcx,rbx 862 and rcx,-16 863 xor r8,r8 864$L$open_sse_tail_128_rounds_and_x1hash: 865 add r10,QWORD[((0+0))+r8*1+rsi] 866 adc r11,QWORD[((8+0))+r8*1+rsi] 867 adc r12,1 868 mov rax,QWORD[((0+160+0))+rbp] 869 mov r15,rax 870 mul r10 871 mov r13,rax 872 mov r14,rdx 873 mov rax,QWORD[((0+160+0))+rbp] 874 mul r11 875 imul r15,r12 876 add r14,rax 877 adc r15,rdx 878 mov rax,QWORD[((8+160+0))+rbp] 879 mov r9,rax 880 mul r10 881 add r14,rax 882 adc rdx,0 883 mov r10,rdx 884 mov rax,QWORD[((8+160+0))+rbp] 885 mul r11 886 add r15,rax 887 adc rdx,0 888 imul r9,r12 889 add r15,r10 890 adc r9,rdx 891 mov r10,r13 892 mov r11,r14 893 mov r12,r15 894 and r12,3 895 mov r13,r15 896 and r13,-4 897 mov r14,r9 898 shrd r15,r9,2 899 shr r9,2 900 add r15,r13 901 adc r9,r14 902 add r10,r15 903 adc r11,r9 904 adc r12,0 905 906$L$open_sse_tail_128_rounds: 907 add r8,16 908 paddd xmm0,xmm4 909 pxor xmm12,xmm0 910 pshufb xmm12,XMMWORD[$L$rol16] 911 paddd xmm8,xmm12 912 pxor xmm4,xmm8 913 movdqa xmm3,xmm4 914 pslld xmm3,12 915 psrld xmm4,20 916 pxor xmm4,xmm3 917 paddd xmm0,xmm4 918 pxor xmm12,xmm0 919 pshufb xmm12,XMMWORD[$L$rol8] 920 paddd xmm8,xmm12 921 pxor xmm4,xmm8 922 movdqa xmm3,xmm4 923 pslld xmm3,7 924 psrld xmm4,25 925 pxor xmm4,xmm3 926DB 102,15,58,15,228,4 927DB 102,69,15,58,15,192,8 928DB 102,69,15,58,15,228,12 929 paddd xmm1,xmm5 930 pxor xmm13,xmm1 931 pshufb xmm13,XMMWORD[$L$rol16] 932 paddd xmm9,xmm13 933 pxor xmm5,xmm9 934 movdqa xmm3,xmm5 935 pslld xmm3,12 936 psrld xmm5,20 937 pxor xmm5,xmm3 938 paddd xmm1,xmm5 939 pxor xmm13,xmm1 940 pshufb xmm13,XMMWORD[$L$rol8] 941 paddd xmm9,xmm13 942 pxor xmm5,xmm9 943 movdqa xmm3,xmm5 944 pslld xmm3,7 945 psrld xmm5,25 946 pxor xmm5,xmm3 947DB 102,15,58,15,237,4 948DB 102,69,15,58,15,201,8 949DB 102,69,15,58,15,237,12 950 paddd xmm0,xmm4 951 pxor xmm12,xmm0 952 pshufb xmm12,XMMWORD[$L$rol16] 953 paddd xmm8,xmm12 954 pxor xmm4,xmm8 955 movdqa xmm3,xmm4 956 pslld xmm3,12 957 psrld xmm4,20 958 pxor xmm4,xmm3 959 paddd xmm0,xmm4 960 pxor xmm12,xmm0 961 pshufb xmm12,XMMWORD[$L$rol8] 962 paddd xmm8,xmm12 963 pxor xmm4,xmm8 964 movdqa xmm3,xmm4 965 pslld xmm3,7 966 psrld xmm4,25 967 pxor xmm4,xmm3 968DB 102,15,58,15,228,12 969DB 102,69,15,58,15,192,8 970DB 102,69,15,58,15,228,4 971 paddd xmm1,xmm5 972 pxor xmm13,xmm1 973 pshufb xmm13,XMMWORD[$L$rol16] 974 paddd xmm9,xmm13 975 pxor xmm5,xmm9 976 movdqa xmm3,xmm5 977 pslld xmm3,12 978 psrld xmm5,20 979 pxor xmm5,xmm3 980 paddd xmm1,xmm5 981 pxor xmm13,xmm1 982 pshufb xmm13,XMMWORD[$L$rol8] 983 paddd xmm9,xmm13 984 pxor xmm5,xmm9 985 movdqa xmm3,xmm5 986 pslld xmm3,7 987 psrld xmm5,25 988 pxor xmm5,xmm3 989DB 102,15,58,15,237,12 990DB 102,69,15,58,15,201,8 991DB 102,69,15,58,15,237,4 992 993 cmp r8,rcx 994 jb NEAR $L$open_sse_tail_128_rounds_and_x1hash 995 cmp r8,10*16 996 jne NEAR $L$open_sse_tail_128_rounds 997 paddd xmm1,XMMWORD[$L$chacha20_consts] 998 paddd xmm5,XMMWORD[((160+48))+rbp] 999 paddd xmm9,XMMWORD[((160+64))+rbp] 1000 paddd xmm13,XMMWORD[((160+112))+rbp] 1001 paddd xmm0,XMMWORD[$L$chacha20_consts] 1002 paddd xmm4,XMMWORD[((160+48))+rbp] 1003 paddd xmm8,XMMWORD[((160+64))+rbp] 1004 paddd xmm12,XMMWORD[((160+96))+rbp] 1005 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 1006 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 1007 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 1008 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 1009 pxor xmm1,xmm3 1010 pxor xmm5,xmm7 1011 pxor xmm9,xmm11 1012 pxor xmm15,xmm13 1013 movdqu XMMWORD[(0 + 0)+rdi],xmm1 1014 movdqu XMMWORD[(16 + 0)+rdi],xmm5 1015 movdqu XMMWORD[(32 + 0)+rdi],xmm9 1016 movdqu XMMWORD[(48 + 0)+rdi],xmm15 1017 1018 sub rbx,4*16 1019 lea rsi,[64+rsi] 1020 lea rdi,[64+rdi] 1021 jmp NEAR $L$open_sse_tail_64_dec_loop 1022 1023$L$open_sse_tail_192: 1024 movdqa xmm0,XMMWORD[$L$chacha20_consts] 1025 movdqa xmm4,XMMWORD[((160+48))+rbp] 1026 movdqa xmm8,XMMWORD[((160+64))+rbp] 1027 movdqa xmm1,xmm0 1028 movdqa xmm5,xmm4 1029 movdqa xmm9,xmm8 1030 movdqa xmm2,xmm0 1031 movdqa xmm6,xmm4 1032 movdqa xmm10,xmm8 1033 movdqa xmm14,XMMWORD[((160+96))+rbp] 1034 paddd xmm14,XMMWORD[$L$sse_inc] 1035 movdqa xmm13,xmm14 1036 paddd xmm13,XMMWORD[$L$sse_inc] 1037 movdqa xmm12,xmm13 1038 paddd xmm12,XMMWORD[$L$sse_inc] 1039 movdqa XMMWORD[(160+96)+rbp],xmm12 1040 movdqa XMMWORD[(160+112)+rbp],xmm13 1041 movdqa XMMWORD[(160+128)+rbp],xmm14 1042 1043 mov rcx,rbx 1044 mov r8,10*16 1045 cmp rcx,10*16 1046 cmovg rcx,r8 1047 and rcx,-16 1048 xor r8,r8 1049$L$open_sse_tail_192_rounds_and_x1hash: 1050 add r10,QWORD[((0+0))+r8*1+rsi] 1051 adc r11,QWORD[((8+0))+r8*1+rsi] 1052 adc r12,1 1053 mov rax,QWORD[((0+160+0))+rbp] 1054 mov r15,rax 1055 mul r10 1056 mov r13,rax 1057 mov r14,rdx 1058 mov rax,QWORD[((0+160+0))+rbp] 1059 mul r11 1060 imul r15,r12 1061 add r14,rax 1062 adc r15,rdx 1063 mov rax,QWORD[((8+160+0))+rbp] 1064 mov r9,rax 1065 mul r10 1066 add r14,rax 1067 adc rdx,0 1068 mov r10,rdx 1069 mov rax,QWORD[((8+160+0))+rbp] 1070 mul r11 1071 add r15,rax 1072 adc rdx,0 1073 imul r9,r12 1074 add r15,r10 1075 adc r9,rdx 1076 mov r10,r13 1077 mov r11,r14 1078 mov r12,r15 1079 and r12,3 1080 mov r13,r15 1081 and r13,-4 1082 mov r14,r9 1083 shrd r15,r9,2 1084 shr r9,2 1085 add r15,r13 1086 adc r9,r14 1087 add r10,r15 1088 adc r11,r9 1089 adc r12,0 1090 1091$L$open_sse_tail_192_rounds: 1092 add r8,16 1093 paddd xmm0,xmm4 1094 pxor xmm12,xmm0 1095 pshufb xmm12,XMMWORD[$L$rol16] 1096 paddd xmm8,xmm12 1097 pxor xmm4,xmm8 1098 movdqa xmm3,xmm4 1099 pslld xmm3,12 1100 psrld xmm4,20 1101 pxor xmm4,xmm3 1102 paddd xmm0,xmm4 1103 pxor xmm12,xmm0 1104 pshufb xmm12,XMMWORD[$L$rol8] 1105 paddd xmm8,xmm12 1106 pxor xmm4,xmm8 1107 movdqa xmm3,xmm4 1108 pslld xmm3,7 1109 psrld xmm4,25 1110 pxor xmm4,xmm3 1111DB 102,15,58,15,228,4 1112DB 102,69,15,58,15,192,8 1113DB 102,69,15,58,15,228,12 1114 paddd xmm1,xmm5 1115 pxor xmm13,xmm1 1116 pshufb xmm13,XMMWORD[$L$rol16] 1117 paddd xmm9,xmm13 1118 pxor xmm5,xmm9 1119 movdqa xmm3,xmm5 1120 pslld xmm3,12 1121 psrld xmm5,20 1122 pxor xmm5,xmm3 1123 paddd xmm1,xmm5 1124 pxor xmm13,xmm1 1125 pshufb xmm13,XMMWORD[$L$rol8] 1126 paddd xmm9,xmm13 1127 pxor xmm5,xmm9 1128 movdqa xmm3,xmm5 1129 pslld xmm3,7 1130 psrld xmm5,25 1131 pxor xmm5,xmm3 1132DB 102,15,58,15,237,4 1133DB 102,69,15,58,15,201,8 1134DB 102,69,15,58,15,237,12 1135 paddd xmm2,xmm6 1136 pxor xmm14,xmm2 1137 pshufb xmm14,XMMWORD[$L$rol16] 1138 paddd xmm10,xmm14 1139 pxor xmm6,xmm10 1140 movdqa xmm3,xmm6 1141 pslld xmm3,12 1142 psrld xmm6,20 1143 pxor xmm6,xmm3 1144 paddd xmm2,xmm6 1145 pxor xmm14,xmm2 1146 pshufb xmm14,XMMWORD[$L$rol8] 1147 paddd xmm10,xmm14 1148 pxor xmm6,xmm10 1149 movdqa xmm3,xmm6 1150 pslld xmm3,7 1151 psrld xmm6,25 1152 pxor xmm6,xmm3 1153DB 102,15,58,15,246,4 1154DB 102,69,15,58,15,210,8 1155DB 102,69,15,58,15,246,12 1156 paddd xmm0,xmm4 1157 pxor xmm12,xmm0 1158 pshufb xmm12,XMMWORD[$L$rol16] 1159 paddd xmm8,xmm12 1160 pxor xmm4,xmm8 1161 movdqa xmm3,xmm4 1162 pslld xmm3,12 1163 psrld xmm4,20 1164 pxor xmm4,xmm3 1165 paddd xmm0,xmm4 1166 pxor xmm12,xmm0 1167 pshufb xmm12,XMMWORD[$L$rol8] 1168 paddd xmm8,xmm12 1169 pxor xmm4,xmm8 1170 movdqa xmm3,xmm4 1171 pslld xmm3,7 1172 psrld xmm4,25 1173 pxor xmm4,xmm3 1174DB 102,15,58,15,228,12 1175DB 102,69,15,58,15,192,8 1176DB 102,69,15,58,15,228,4 1177 paddd xmm1,xmm5 1178 pxor xmm13,xmm1 1179 pshufb xmm13,XMMWORD[$L$rol16] 1180 paddd xmm9,xmm13 1181 pxor xmm5,xmm9 1182 movdqa xmm3,xmm5 1183 pslld xmm3,12 1184 psrld xmm5,20 1185 pxor xmm5,xmm3 1186 paddd xmm1,xmm5 1187 pxor xmm13,xmm1 1188 pshufb xmm13,XMMWORD[$L$rol8] 1189 paddd xmm9,xmm13 1190 pxor xmm5,xmm9 1191 movdqa xmm3,xmm5 1192 pslld xmm3,7 1193 psrld xmm5,25 1194 pxor xmm5,xmm3 1195DB 102,15,58,15,237,12 1196DB 102,69,15,58,15,201,8 1197DB 102,69,15,58,15,237,4 1198 paddd xmm2,xmm6 1199 pxor xmm14,xmm2 1200 pshufb xmm14,XMMWORD[$L$rol16] 1201 paddd xmm10,xmm14 1202 pxor xmm6,xmm10 1203 movdqa xmm3,xmm6 1204 pslld xmm3,12 1205 psrld xmm6,20 1206 pxor xmm6,xmm3 1207 paddd xmm2,xmm6 1208 pxor xmm14,xmm2 1209 pshufb xmm14,XMMWORD[$L$rol8] 1210 paddd xmm10,xmm14 1211 pxor xmm6,xmm10 1212 movdqa xmm3,xmm6 1213 pslld xmm3,7 1214 psrld xmm6,25 1215 pxor xmm6,xmm3 1216DB 102,15,58,15,246,12 1217DB 102,69,15,58,15,210,8 1218DB 102,69,15,58,15,246,4 1219 1220 cmp r8,rcx 1221 jb NEAR $L$open_sse_tail_192_rounds_and_x1hash 1222 cmp r8,10*16 1223 jne NEAR $L$open_sse_tail_192_rounds 1224 cmp rbx,11*16 1225 jb NEAR $L$open_sse_tail_192_finish 1226 add r10,QWORD[((0+160))+rsi] 1227 adc r11,QWORD[((8+160))+rsi] 1228 adc r12,1 1229 mov rax,QWORD[((0+160+0))+rbp] 1230 mov r15,rax 1231 mul r10 1232 mov r13,rax 1233 mov r14,rdx 1234 mov rax,QWORD[((0+160+0))+rbp] 1235 mul r11 1236 imul r15,r12 1237 add r14,rax 1238 adc r15,rdx 1239 mov rax,QWORD[((8+160+0))+rbp] 1240 mov r9,rax 1241 mul r10 1242 add r14,rax 1243 adc rdx,0 1244 mov r10,rdx 1245 mov rax,QWORD[((8+160+0))+rbp] 1246 mul r11 1247 add r15,rax 1248 adc rdx,0 1249 imul r9,r12 1250 add r15,r10 1251 adc r9,rdx 1252 mov r10,r13 1253 mov r11,r14 1254 mov r12,r15 1255 and r12,3 1256 mov r13,r15 1257 and r13,-4 1258 mov r14,r9 1259 shrd r15,r9,2 1260 shr r9,2 1261 add r15,r13 1262 adc r9,r14 1263 add r10,r15 1264 adc r11,r9 1265 adc r12,0 1266 1267 cmp rbx,12*16 1268 jb NEAR $L$open_sse_tail_192_finish 1269 add r10,QWORD[((0+176))+rsi] 1270 adc r11,QWORD[((8+176))+rsi] 1271 adc r12,1 1272 mov rax,QWORD[((0+160+0))+rbp] 1273 mov r15,rax 1274 mul r10 1275 mov r13,rax 1276 mov r14,rdx 1277 mov rax,QWORD[((0+160+0))+rbp] 1278 mul r11 1279 imul r15,r12 1280 add r14,rax 1281 adc r15,rdx 1282 mov rax,QWORD[((8+160+0))+rbp] 1283 mov r9,rax 1284 mul r10 1285 add r14,rax 1286 adc rdx,0 1287 mov r10,rdx 1288 mov rax,QWORD[((8+160+0))+rbp] 1289 mul r11 1290 add r15,rax 1291 adc rdx,0 1292 imul r9,r12 1293 add r15,r10 1294 adc r9,rdx 1295 mov r10,r13 1296 mov r11,r14 1297 mov r12,r15 1298 and r12,3 1299 mov r13,r15 1300 and r13,-4 1301 mov r14,r9 1302 shrd r15,r9,2 1303 shr r9,2 1304 add r15,r13 1305 adc r9,r14 1306 add r10,r15 1307 adc r11,r9 1308 adc r12,0 1309 1310$L$open_sse_tail_192_finish: 1311 paddd xmm2,XMMWORD[$L$chacha20_consts] 1312 paddd xmm6,XMMWORD[((160+48))+rbp] 1313 paddd xmm10,XMMWORD[((160+64))+rbp] 1314 paddd xmm14,XMMWORD[((160+128))+rbp] 1315 paddd xmm1,XMMWORD[$L$chacha20_consts] 1316 paddd xmm5,XMMWORD[((160+48))+rbp] 1317 paddd xmm9,XMMWORD[((160+64))+rbp] 1318 paddd xmm13,XMMWORD[((160+112))+rbp] 1319 paddd xmm0,XMMWORD[$L$chacha20_consts] 1320 paddd xmm4,XMMWORD[((160+48))+rbp] 1321 paddd xmm8,XMMWORD[((160+64))+rbp] 1322 paddd xmm12,XMMWORD[((160+96))+rbp] 1323 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 1324 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 1325 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 1326 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 1327 pxor xmm2,xmm3 1328 pxor xmm6,xmm7 1329 pxor xmm10,xmm11 1330 pxor xmm15,xmm14 1331 movdqu XMMWORD[(0 + 0)+rdi],xmm2 1332 movdqu XMMWORD[(16 + 0)+rdi],xmm6 1333 movdqu XMMWORD[(32 + 0)+rdi],xmm10 1334 movdqu XMMWORD[(48 + 0)+rdi],xmm15 1335 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 1336 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 1337 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 1338 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 1339 pxor xmm1,xmm3 1340 pxor xmm5,xmm7 1341 pxor xmm9,xmm11 1342 pxor xmm15,xmm13 1343 movdqu XMMWORD[(0 + 64)+rdi],xmm1 1344 movdqu XMMWORD[(16 + 64)+rdi],xmm5 1345 movdqu XMMWORD[(32 + 64)+rdi],xmm9 1346 movdqu XMMWORD[(48 + 64)+rdi],xmm15 1347 1348 sub rbx,8*16 1349 lea rsi,[128+rsi] 1350 lea rdi,[128+rdi] 1351 jmp NEAR $L$open_sse_tail_64_dec_loop 1352 1353$L$open_sse_tail_256: 1354 movdqa xmm0,XMMWORD[$L$chacha20_consts] 1355 movdqa xmm4,XMMWORD[((160+48))+rbp] 1356 movdqa xmm8,XMMWORD[((160+64))+rbp] 1357 movdqa xmm1,xmm0 1358 movdqa xmm5,xmm4 1359 movdqa xmm9,xmm8 1360 movdqa xmm2,xmm0 1361 movdqa xmm6,xmm4 1362 movdqa xmm10,xmm8 1363 movdqa xmm3,xmm0 1364 movdqa xmm7,xmm4 1365 movdqa xmm11,xmm8 1366 movdqa xmm15,XMMWORD[((160+96))+rbp] 1367 paddd xmm15,XMMWORD[$L$sse_inc] 1368 movdqa xmm14,xmm15 1369 paddd xmm14,XMMWORD[$L$sse_inc] 1370 movdqa xmm13,xmm14 1371 paddd xmm13,XMMWORD[$L$sse_inc] 1372 movdqa xmm12,xmm13 1373 paddd xmm12,XMMWORD[$L$sse_inc] 1374 movdqa XMMWORD[(160+96)+rbp],xmm12 1375 movdqa XMMWORD[(160+112)+rbp],xmm13 1376 movdqa XMMWORD[(160+128)+rbp],xmm14 1377 movdqa XMMWORD[(160+144)+rbp],xmm15 1378 1379 xor r8,r8 1380$L$open_sse_tail_256_rounds_and_x1hash: 1381 add r10,QWORD[((0+0))+r8*1+rsi] 1382 adc r11,QWORD[((8+0))+r8*1+rsi] 1383 adc r12,1 1384 movdqa XMMWORD[(160+80)+rbp],xmm11 1385 paddd xmm0,xmm4 1386 pxor xmm12,xmm0 1387 pshufb xmm12,XMMWORD[$L$rol16] 1388 paddd xmm8,xmm12 1389 pxor xmm4,xmm8 1390 movdqa xmm11,xmm4 1391 pslld xmm11,12 1392 psrld xmm4,20 1393 pxor xmm4,xmm11 1394 paddd xmm0,xmm4 1395 pxor xmm12,xmm0 1396 pshufb xmm12,XMMWORD[$L$rol8] 1397 paddd xmm8,xmm12 1398 pxor xmm4,xmm8 1399 movdqa xmm11,xmm4 1400 pslld xmm11,7 1401 psrld xmm4,25 1402 pxor xmm4,xmm11 1403DB 102,15,58,15,228,4 1404DB 102,69,15,58,15,192,8 1405DB 102,69,15,58,15,228,12 1406 paddd xmm1,xmm5 1407 pxor xmm13,xmm1 1408 pshufb xmm13,XMMWORD[$L$rol16] 1409 paddd xmm9,xmm13 1410 pxor xmm5,xmm9 1411 movdqa xmm11,xmm5 1412 pslld xmm11,12 1413 psrld xmm5,20 1414 pxor xmm5,xmm11 1415 paddd xmm1,xmm5 1416 pxor xmm13,xmm1 1417 pshufb xmm13,XMMWORD[$L$rol8] 1418 paddd xmm9,xmm13 1419 pxor xmm5,xmm9 1420 movdqa xmm11,xmm5 1421 pslld xmm11,7 1422 psrld xmm5,25 1423 pxor xmm5,xmm11 1424DB 102,15,58,15,237,4 1425DB 102,69,15,58,15,201,8 1426DB 102,69,15,58,15,237,12 1427 paddd xmm2,xmm6 1428 pxor xmm14,xmm2 1429 pshufb xmm14,XMMWORD[$L$rol16] 1430 paddd xmm10,xmm14 1431 pxor xmm6,xmm10 1432 movdqa xmm11,xmm6 1433 pslld xmm11,12 1434 psrld xmm6,20 1435 pxor xmm6,xmm11 1436 paddd xmm2,xmm6 1437 pxor xmm14,xmm2 1438 pshufb xmm14,XMMWORD[$L$rol8] 1439 paddd xmm10,xmm14 1440 pxor xmm6,xmm10 1441 movdqa xmm11,xmm6 1442 pslld xmm11,7 1443 psrld xmm6,25 1444 pxor xmm6,xmm11 1445DB 102,15,58,15,246,4 1446DB 102,69,15,58,15,210,8 1447DB 102,69,15,58,15,246,12 1448 movdqa xmm11,XMMWORD[((160+80))+rbp] 1449 mov rax,QWORD[((0+160+0))+rbp] 1450 mov r15,rax 1451 mul r10 1452 mov r13,rax 1453 mov r14,rdx 1454 mov rax,QWORD[((0+160+0))+rbp] 1455 mul r11 1456 imul r15,r12 1457 add r14,rax 1458 adc r15,rdx 1459 movdqa XMMWORD[(160+80)+rbp],xmm9 1460 paddd xmm3,xmm7 1461 pxor xmm15,xmm3 1462 pshufb xmm15,XMMWORD[$L$rol16] 1463 paddd xmm11,xmm15 1464 pxor xmm7,xmm11 1465 movdqa xmm9,xmm7 1466 pslld xmm9,12 1467 psrld xmm7,20 1468 pxor xmm7,xmm9 1469 paddd xmm3,xmm7 1470 pxor xmm15,xmm3 1471 pshufb xmm15,XMMWORD[$L$rol8] 1472 paddd xmm11,xmm15 1473 pxor xmm7,xmm11 1474 movdqa xmm9,xmm7 1475 pslld xmm9,7 1476 psrld xmm7,25 1477 pxor xmm7,xmm9 1478DB 102,15,58,15,255,4 1479DB 102,69,15,58,15,219,8 1480DB 102,69,15,58,15,255,12 1481 movdqa xmm9,XMMWORD[((160+80))+rbp] 1482 mov rax,QWORD[((8+160+0))+rbp] 1483 mov r9,rax 1484 mul r10 1485 add r14,rax 1486 adc rdx,0 1487 mov r10,rdx 1488 mov rax,QWORD[((8+160+0))+rbp] 1489 mul r11 1490 add r15,rax 1491 adc rdx,0 1492 movdqa XMMWORD[(160+80)+rbp],xmm11 1493 paddd xmm0,xmm4 1494 pxor xmm12,xmm0 1495 pshufb xmm12,XMMWORD[$L$rol16] 1496 paddd xmm8,xmm12 1497 pxor xmm4,xmm8 1498 movdqa xmm11,xmm4 1499 pslld xmm11,12 1500 psrld xmm4,20 1501 pxor xmm4,xmm11 1502 paddd xmm0,xmm4 1503 pxor xmm12,xmm0 1504 pshufb xmm12,XMMWORD[$L$rol8] 1505 paddd xmm8,xmm12 1506 pxor xmm4,xmm8 1507 movdqa xmm11,xmm4 1508 pslld xmm11,7 1509 psrld xmm4,25 1510 pxor xmm4,xmm11 1511DB 102,15,58,15,228,12 1512DB 102,69,15,58,15,192,8 1513DB 102,69,15,58,15,228,4 1514 paddd xmm1,xmm5 1515 pxor xmm13,xmm1 1516 pshufb xmm13,XMMWORD[$L$rol16] 1517 paddd xmm9,xmm13 1518 pxor xmm5,xmm9 1519 movdqa xmm11,xmm5 1520 pslld xmm11,12 1521 psrld xmm5,20 1522 pxor xmm5,xmm11 1523 paddd xmm1,xmm5 1524 pxor xmm13,xmm1 1525 pshufb xmm13,XMMWORD[$L$rol8] 1526 paddd xmm9,xmm13 1527 pxor xmm5,xmm9 1528 movdqa xmm11,xmm5 1529 pslld xmm11,7 1530 psrld xmm5,25 1531 pxor xmm5,xmm11 1532DB 102,15,58,15,237,12 1533DB 102,69,15,58,15,201,8 1534DB 102,69,15,58,15,237,4 1535 imul r9,r12 1536 add r15,r10 1537 adc r9,rdx 1538 paddd xmm2,xmm6 1539 pxor xmm14,xmm2 1540 pshufb xmm14,XMMWORD[$L$rol16] 1541 paddd xmm10,xmm14 1542 pxor xmm6,xmm10 1543 movdqa xmm11,xmm6 1544 pslld xmm11,12 1545 psrld xmm6,20 1546 pxor xmm6,xmm11 1547 paddd xmm2,xmm6 1548 pxor xmm14,xmm2 1549 pshufb xmm14,XMMWORD[$L$rol8] 1550 paddd xmm10,xmm14 1551 pxor xmm6,xmm10 1552 movdqa xmm11,xmm6 1553 pslld xmm11,7 1554 psrld xmm6,25 1555 pxor xmm6,xmm11 1556DB 102,15,58,15,246,12 1557DB 102,69,15,58,15,210,8 1558DB 102,69,15,58,15,246,4 1559 movdqa xmm11,XMMWORD[((160+80))+rbp] 1560 mov r10,r13 1561 mov r11,r14 1562 mov r12,r15 1563 and r12,3 1564 mov r13,r15 1565 and r13,-4 1566 mov r14,r9 1567 shrd r15,r9,2 1568 shr r9,2 1569 add r15,r13 1570 adc r9,r14 1571 add r10,r15 1572 adc r11,r9 1573 adc r12,0 1574 movdqa XMMWORD[(160+80)+rbp],xmm9 1575 paddd xmm3,xmm7 1576 pxor xmm15,xmm3 1577 pshufb xmm15,XMMWORD[$L$rol16] 1578 paddd xmm11,xmm15 1579 pxor xmm7,xmm11 1580 movdqa xmm9,xmm7 1581 pslld xmm9,12 1582 psrld xmm7,20 1583 pxor xmm7,xmm9 1584 paddd xmm3,xmm7 1585 pxor xmm15,xmm3 1586 pshufb xmm15,XMMWORD[$L$rol8] 1587 paddd xmm11,xmm15 1588 pxor xmm7,xmm11 1589 movdqa xmm9,xmm7 1590 pslld xmm9,7 1591 psrld xmm7,25 1592 pxor xmm7,xmm9 1593DB 102,15,58,15,255,12 1594DB 102,69,15,58,15,219,8 1595DB 102,69,15,58,15,255,4 1596 movdqa xmm9,XMMWORD[((160+80))+rbp] 1597 1598 add r8,16 1599 cmp r8,10*16 1600 jb NEAR $L$open_sse_tail_256_rounds_and_x1hash 1601 1602 mov rcx,rbx 1603 and rcx,-16 1604$L$open_sse_tail_256_hash: 1605 add r10,QWORD[((0+0))+r8*1+rsi] 1606 adc r11,QWORD[((8+0))+r8*1+rsi] 1607 adc r12,1 1608 mov rax,QWORD[((0+160+0))+rbp] 1609 mov r15,rax 1610 mul r10 1611 mov r13,rax 1612 mov r14,rdx 1613 mov rax,QWORD[((0+160+0))+rbp] 1614 mul r11 1615 imul r15,r12 1616 add r14,rax 1617 adc r15,rdx 1618 mov rax,QWORD[((8+160+0))+rbp] 1619 mov r9,rax 1620 mul r10 1621 add r14,rax 1622 adc rdx,0 1623 mov r10,rdx 1624 mov rax,QWORD[((8+160+0))+rbp] 1625 mul r11 1626 add r15,rax 1627 adc rdx,0 1628 imul r9,r12 1629 add r15,r10 1630 adc r9,rdx 1631 mov r10,r13 1632 mov r11,r14 1633 mov r12,r15 1634 and r12,3 1635 mov r13,r15 1636 and r13,-4 1637 mov r14,r9 1638 shrd r15,r9,2 1639 shr r9,2 1640 add r15,r13 1641 adc r9,r14 1642 add r10,r15 1643 adc r11,r9 1644 adc r12,0 1645 1646 add r8,16 1647 cmp r8,rcx 1648 jb NEAR $L$open_sse_tail_256_hash 1649 paddd xmm3,XMMWORD[$L$chacha20_consts] 1650 paddd xmm7,XMMWORD[((160+48))+rbp] 1651 paddd xmm11,XMMWORD[((160+64))+rbp] 1652 paddd xmm15,XMMWORD[((160+144))+rbp] 1653 paddd xmm2,XMMWORD[$L$chacha20_consts] 1654 paddd xmm6,XMMWORD[((160+48))+rbp] 1655 paddd xmm10,XMMWORD[((160+64))+rbp] 1656 paddd xmm14,XMMWORD[((160+128))+rbp] 1657 paddd xmm1,XMMWORD[$L$chacha20_consts] 1658 paddd xmm5,XMMWORD[((160+48))+rbp] 1659 paddd xmm9,XMMWORD[((160+64))+rbp] 1660 paddd xmm13,XMMWORD[((160+112))+rbp] 1661 paddd xmm0,XMMWORD[$L$chacha20_consts] 1662 paddd xmm4,XMMWORD[((160+48))+rbp] 1663 paddd xmm8,XMMWORD[((160+64))+rbp] 1664 paddd xmm12,XMMWORD[((160+96))+rbp] 1665 movdqa XMMWORD[(160+80)+rbp],xmm12 1666 movdqu xmm12,XMMWORD[((0 + 0))+rsi] 1667 pxor xmm12,xmm3 1668 movdqu XMMWORD[(0 + 0)+rdi],xmm12 1669 movdqu xmm12,XMMWORD[((16 + 0))+rsi] 1670 pxor xmm12,xmm7 1671 movdqu XMMWORD[(16 + 0)+rdi],xmm12 1672 movdqu xmm12,XMMWORD[((32 + 0))+rsi] 1673 pxor xmm12,xmm11 1674 movdqu XMMWORD[(32 + 0)+rdi],xmm12 1675 movdqu xmm12,XMMWORD[((48 + 0))+rsi] 1676 pxor xmm12,xmm15 1677 movdqu XMMWORD[(48 + 0)+rdi],xmm12 1678 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 1679 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 1680 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 1681 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 1682 pxor xmm2,xmm3 1683 pxor xmm6,xmm7 1684 pxor xmm10,xmm11 1685 pxor xmm15,xmm14 1686 movdqu XMMWORD[(0 + 64)+rdi],xmm2 1687 movdqu XMMWORD[(16 + 64)+rdi],xmm6 1688 movdqu XMMWORD[(32 + 64)+rdi],xmm10 1689 movdqu XMMWORD[(48 + 64)+rdi],xmm15 1690 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 1691 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 1692 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 1693 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 1694 pxor xmm1,xmm3 1695 pxor xmm5,xmm7 1696 pxor xmm9,xmm11 1697 pxor xmm15,xmm13 1698 movdqu XMMWORD[(0 + 128)+rdi],xmm1 1699 movdqu XMMWORD[(16 + 128)+rdi],xmm5 1700 movdqu XMMWORD[(32 + 128)+rdi],xmm9 1701 movdqu XMMWORD[(48 + 128)+rdi],xmm15 1702 1703 movdqa xmm12,XMMWORD[((160+80))+rbp] 1704 sub rbx,12*16 1705 lea rsi,[192+rsi] 1706 lea rdi,[192+rdi] 1707 1708 1709$L$open_sse_tail_64_dec_loop: 1710 cmp rbx,16 1711 jb NEAR $L$open_sse_tail_16_init 1712 sub rbx,16 1713 movdqu xmm3,XMMWORD[rsi] 1714 pxor xmm0,xmm3 1715 movdqu XMMWORD[rdi],xmm0 1716 lea rsi,[16+rsi] 1717 lea rdi,[16+rdi] 1718 movdqa xmm0,xmm4 1719 movdqa xmm4,xmm8 1720 movdqa xmm8,xmm12 1721 jmp NEAR $L$open_sse_tail_64_dec_loop 1722$L$open_sse_tail_16_init: 1723 movdqa xmm1,xmm0 1724 1725 1726$L$open_sse_tail_16: 1727 test rbx,rbx 1728 jz NEAR $L$open_sse_finalize 1729 1730 1731 1732 pxor xmm3,xmm3 1733 lea rsi,[((-1))+rbx*1+rsi] 1734 mov r8,rbx 1735$L$open_sse_tail_16_compose: 1736 pslldq xmm3,1 1737 pinsrb xmm3,BYTE[rsi],0 1738 sub rsi,1 1739 sub r8,1 1740 jnz NEAR $L$open_sse_tail_16_compose 1741 1742DB 102,73,15,126,221 1743 pextrq r14,xmm3,1 1744 1745 pxor xmm3,xmm1 1746 1747 1748$L$open_sse_tail_16_extract: 1749 pextrb XMMWORD[rdi],xmm3,0 1750 psrldq xmm3,1 1751 add rdi,1 1752 sub rbx,1 1753 jne NEAR $L$open_sse_tail_16_extract 1754 1755 add r10,r13 1756 adc r11,r14 1757 adc r12,1 1758 mov rax,QWORD[((0+160+0))+rbp] 1759 mov r15,rax 1760 mul r10 1761 mov r13,rax 1762 mov r14,rdx 1763 mov rax,QWORD[((0+160+0))+rbp] 1764 mul r11 1765 imul r15,r12 1766 add r14,rax 1767 adc r15,rdx 1768 mov rax,QWORD[((8+160+0))+rbp] 1769 mov r9,rax 1770 mul r10 1771 add r14,rax 1772 adc rdx,0 1773 mov r10,rdx 1774 mov rax,QWORD[((8+160+0))+rbp] 1775 mul r11 1776 add r15,rax 1777 adc rdx,0 1778 imul r9,r12 1779 add r15,r10 1780 adc r9,rdx 1781 mov r10,r13 1782 mov r11,r14 1783 mov r12,r15 1784 and r12,3 1785 mov r13,r15 1786 and r13,-4 1787 mov r14,r9 1788 shrd r15,r9,2 1789 shr r9,2 1790 add r15,r13 1791 adc r9,r14 1792 add r10,r15 1793 adc r11,r9 1794 adc r12,0 1795 1796 1797$L$open_sse_finalize: 1798 add r10,QWORD[((0+160+32))+rbp] 1799 adc r11,QWORD[((8+160+32))+rbp] 1800 adc r12,1 1801 mov rax,QWORD[((0+160+0))+rbp] 1802 mov r15,rax 1803 mul r10 1804 mov r13,rax 1805 mov r14,rdx 1806 mov rax,QWORD[((0+160+0))+rbp] 1807 mul r11 1808 imul r15,r12 1809 add r14,rax 1810 adc r15,rdx 1811 mov rax,QWORD[((8+160+0))+rbp] 1812 mov r9,rax 1813 mul r10 1814 add r14,rax 1815 adc rdx,0 1816 mov r10,rdx 1817 mov rax,QWORD[((8+160+0))+rbp] 1818 mul r11 1819 add r15,rax 1820 adc rdx,0 1821 imul r9,r12 1822 add r15,r10 1823 adc r9,rdx 1824 mov r10,r13 1825 mov r11,r14 1826 mov r12,r15 1827 and r12,3 1828 mov r13,r15 1829 and r13,-4 1830 mov r14,r9 1831 shrd r15,r9,2 1832 shr r9,2 1833 add r15,r13 1834 adc r9,r14 1835 add r10,r15 1836 adc r11,r9 1837 adc r12,0 1838 1839 1840 mov r13,r10 1841 mov r14,r11 1842 mov r15,r12 1843 sub r10,-5 1844 sbb r11,-1 1845 sbb r12,3 1846 cmovc r10,r13 1847 cmovc r11,r14 1848 cmovc r12,r15 1849 1850 add r10,QWORD[((0+160+16))+rbp] 1851 adc r11,QWORD[((8+160+16))+rbp] 1852 1853 movaps xmm6,XMMWORD[((0+0))+rbp] 1854 movaps xmm7,XMMWORD[((16+0))+rbp] 1855 movaps xmm8,XMMWORD[((32+0))+rbp] 1856 movaps xmm9,XMMWORD[((48+0))+rbp] 1857 movaps xmm10,XMMWORD[((64+0))+rbp] 1858 movaps xmm11,XMMWORD[((80+0))+rbp] 1859 movaps xmm12,XMMWORD[((96+0))+rbp] 1860 movaps xmm13,XMMWORD[((112+0))+rbp] 1861 movaps xmm14,XMMWORD[((128+0))+rbp] 1862 movaps xmm15,XMMWORD[((144+0))+rbp] 1863 1864 1865 add rsp,288 + 160 + 32 1866 1867 1868 pop r9 1869 1870 mov QWORD[r9],r10 1871 mov QWORD[8+r9],r11 1872 pop r15 1873 1874 pop r14 1875 1876 pop r13 1877 1878 pop r12 1879 1880 pop rbx 1881 1882 pop rbp 1883 1884 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1885 mov rsi,QWORD[16+rsp] 1886 DB 0F3h,0C3h ;repret 1887 1888$L$open_sse_128: 1889 1890 movdqu xmm0,XMMWORD[$L$chacha20_consts] 1891 movdqa xmm1,xmm0 1892 movdqa xmm2,xmm0 1893 movdqu xmm4,XMMWORD[r9] 1894 movdqa xmm5,xmm4 1895 movdqa xmm6,xmm4 1896 movdqu xmm8,XMMWORD[16+r9] 1897 movdqa xmm9,xmm8 1898 movdqa xmm10,xmm8 1899 movdqu xmm12,XMMWORD[32+r9] 1900 movdqa xmm13,xmm12 1901 paddd xmm13,XMMWORD[$L$sse_inc] 1902 movdqa xmm14,xmm13 1903 paddd xmm14,XMMWORD[$L$sse_inc] 1904 movdqa xmm7,xmm4 1905 movdqa xmm11,xmm8 1906 movdqa xmm15,xmm13 1907 mov r10,10 1908 1909$L$open_sse_128_rounds: 1910 paddd xmm0,xmm4 1911 pxor xmm12,xmm0 1912 pshufb xmm12,XMMWORD[$L$rol16] 1913 paddd xmm8,xmm12 1914 pxor xmm4,xmm8 1915 movdqa xmm3,xmm4 1916 pslld xmm3,12 1917 psrld xmm4,20 1918 pxor xmm4,xmm3 1919 paddd xmm0,xmm4 1920 pxor xmm12,xmm0 1921 pshufb xmm12,XMMWORD[$L$rol8] 1922 paddd xmm8,xmm12 1923 pxor xmm4,xmm8 1924 movdqa xmm3,xmm4 1925 pslld xmm3,7 1926 psrld xmm4,25 1927 pxor xmm4,xmm3 1928DB 102,15,58,15,228,4 1929DB 102,69,15,58,15,192,8 1930DB 102,69,15,58,15,228,12 1931 paddd xmm1,xmm5 1932 pxor xmm13,xmm1 1933 pshufb xmm13,XMMWORD[$L$rol16] 1934 paddd xmm9,xmm13 1935 pxor xmm5,xmm9 1936 movdqa xmm3,xmm5 1937 pslld xmm3,12 1938 psrld xmm5,20 1939 pxor xmm5,xmm3 1940 paddd xmm1,xmm5 1941 pxor xmm13,xmm1 1942 pshufb xmm13,XMMWORD[$L$rol8] 1943 paddd xmm9,xmm13 1944 pxor xmm5,xmm9 1945 movdqa xmm3,xmm5 1946 pslld xmm3,7 1947 psrld xmm5,25 1948 pxor xmm5,xmm3 1949DB 102,15,58,15,237,4 1950DB 102,69,15,58,15,201,8 1951DB 102,69,15,58,15,237,12 1952 paddd xmm2,xmm6 1953 pxor xmm14,xmm2 1954 pshufb xmm14,XMMWORD[$L$rol16] 1955 paddd xmm10,xmm14 1956 pxor xmm6,xmm10 1957 movdqa xmm3,xmm6 1958 pslld xmm3,12 1959 psrld xmm6,20 1960 pxor xmm6,xmm3 1961 paddd xmm2,xmm6 1962 pxor xmm14,xmm2 1963 pshufb xmm14,XMMWORD[$L$rol8] 1964 paddd xmm10,xmm14 1965 pxor xmm6,xmm10 1966 movdqa xmm3,xmm6 1967 pslld xmm3,7 1968 psrld xmm6,25 1969 pxor xmm6,xmm3 1970DB 102,15,58,15,246,4 1971DB 102,69,15,58,15,210,8 1972DB 102,69,15,58,15,246,12 1973 paddd xmm0,xmm4 1974 pxor xmm12,xmm0 1975 pshufb xmm12,XMMWORD[$L$rol16] 1976 paddd xmm8,xmm12 1977 pxor xmm4,xmm8 1978 movdqa xmm3,xmm4 1979 pslld xmm3,12 1980 psrld xmm4,20 1981 pxor xmm4,xmm3 1982 paddd xmm0,xmm4 1983 pxor xmm12,xmm0 1984 pshufb xmm12,XMMWORD[$L$rol8] 1985 paddd xmm8,xmm12 1986 pxor xmm4,xmm8 1987 movdqa xmm3,xmm4 1988 pslld xmm3,7 1989 psrld xmm4,25 1990 pxor xmm4,xmm3 1991DB 102,15,58,15,228,12 1992DB 102,69,15,58,15,192,8 1993DB 102,69,15,58,15,228,4 1994 paddd xmm1,xmm5 1995 pxor xmm13,xmm1 1996 pshufb xmm13,XMMWORD[$L$rol16] 1997 paddd xmm9,xmm13 1998 pxor xmm5,xmm9 1999 movdqa xmm3,xmm5 2000 pslld xmm3,12 2001 psrld xmm5,20 2002 pxor xmm5,xmm3 2003 paddd xmm1,xmm5 2004 pxor xmm13,xmm1 2005 pshufb xmm13,XMMWORD[$L$rol8] 2006 paddd xmm9,xmm13 2007 pxor xmm5,xmm9 2008 movdqa xmm3,xmm5 2009 pslld xmm3,7 2010 psrld xmm5,25 2011 pxor xmm5,xmm3 2012DB 102,15,58,15,237,12 2013DB 102,69,15,58,15,201,8 2014DB 102,69,15,58,15,237,4 2015 paddd xmm2,xmm6 2016 pxor xmm14,xmm2 2017 pshufb xmm14,XMMWORD[$L$rol16] 2018 paddd xmm10,xmm14 2019 pxor xmm6,xmm10 2020 movdqa xmm3,xmm6 2021 pslld xmm3,12 2022 psrld xmm6,20 2023 pxor xmm6,xmm3 2024 paddd xmm2,xmm6 2025 pxor xmm14,xmm2 2026 pshufb xmm14,XMMWORD[$L$rol8] 2027 paddd xmm10,xmm14 2028 pxor xmm6,xmm10 2029 movdqa xmm3,xmm6 2030 pslld xmm3,7 2031 psrld xmm6,25 2032 pxor xmm6,xmm3 2033DB 102,15,58,15,246,12 2034DB 102,69,15,58,15,210,8 2035DB 102,69,15,58,15,246,4 2036 2037 dec r10 2038 jnz NEAR $L$open_sse_128_rounds 2039 paddd xmm0,XMMWORD[$L$chacha20_consts] 2040 paddd xmm1,XMMWORD[$L$chacha20_consts] 2041 paddd xmm2,XMMWORD[$L$chacha20_consts] 2042 paddd xmm4,xmm7 2043 paddd xmm5,xmm7 2044 paddd xmm6,xmm7 2045 paddd xmm9,xmm11 2046 paddd xmm10,xmm11 2047 paddd xmm13,xmm15 2048 paddd xmm15,XMMWORD[$L$sse_inc] 2049 paddd xmm14,xmm15 2050 2051 pand xmm0,XMMWORD[$L$clamp] 2052 movdqa XMMWORD[(160+0)+rbp],xmm0 2053 movdqa XMMWORD[(160+16)+rbp],xmm4 2054 2055 mov r8,r8 2056 call poly_hash_ad_internal 2057$L$open_sse_128_xor_hash: 2058 cmp rbx,16 2059 jb NEAR $L$open_sse_tail_16 2060 sub rbx,16 2061 add r10,QWORD[((0+0))+rsi] 2062 adc r11,QWORD[((8+0))+rsi] 2063 adc r12,1 2064 2065 2066 movdqu xmm3,XMMWORD[rsi] 2067 pxor xmm1,xmm3 2068 movdqu XMMWORD[rdi],xmm1 2069 lea rsi,[16+rsi] 2070 lea rdi,[16+rdi] 2071 mov rax,QWORD[((0+160+0))+rbp] 2072 mov r15,rax 2073 mul r10 2074 mov r13,rax 2075 mov r14,rdx 2076 mov rax,QWORD[((0+160+0))+rbp] 2077 mul r11 2078 imul r15,r12 2079 add r14,rax 2080 adc r15,rdx 2081 mov rax,QWORD[((8+160+0))+rbp] 2082 mov r9,rax 2083 mul r10 2084 add r14,rax 2085 adc rdx,0 2086 mov r10,rdx 2087 mov rax,QWORD[((8+160+0))+rbp] 2088 mul r11 2089 add r15,rax 2090 adc rdx,0 2091 imul r9,r12 2092 add r15,r10 2093 adc r9,rdx 2094 mov r10,r13 2095 mov r11,r14 2096 mov r12,r15 2097 and r12,3 2098 mov r13,r15 2099 and r13,-4 2100 mov r14,r9 2101 shrd r15,r9,2 2102 shr r9,2 2103 add r15,r13 2104 adc r9,r14 2105 add r10,r15 2106 adc r11,r9 2107 adc r12,0 2108 2109 2110 movdqa xmm1,xmm5 2111 movdqa xmm5,xmm9 2112 movdqa xmm9,xmm13 2113 movdqa xmm13,xmm2 2114 movdqa xmm2,xmm6 2115 movdqa xmm6,xmm10 2116 movdqa xmm10,xmm14 2117 jmp NEAR $L$open_sse_128_xor_hash 2118$L$SEH_end_chacha20_poly1305_open: 2119 2120 2121 2122 2123 2124 2125 2126 2127global chacha20_poly1305_seal 2128 2129ALIGN 64 2130chacha20_poly1305_seal: 2131 mov QWORD[8+rsp],rdi ;WIN64 prologue 2132 mov QWORD[16+rsp],rsi 2133 mov rax,rsp 2134$L$SEH_begin_chacha20_poly1305_seal: 2135 mov rdi,rcx 2136 mov rsi,rdx 2137 mov rdx,r8 2138 mov rcx,r9 2139 mov r8,QWORD[40+rsp] 2140 mov r9,QWORD[48+rsp] 2141 2142 2143 2144 push rbp 2145 2146 push rbx 2147 2148 push r12 2149 2150 push r13 2151 2152 push r14 2153 2154 push r15 2155 2156 2157 2158 push r9 2159 2160 sub rsp,288 + 160 + 32 2161 2162 lea rbp,[32+rsp] 2163 and rbp,-32 2164 2165 movaps XMMWORD[(0+0)+rbp],xmm6 2166 movaps XMMWORD[(16+0)+rbp],xmm7 2167 movaps XMMWORD[(32+0)+rbp],xmm8 2168 movaps XMMWORD[(48+0)+rbp],xmm9 2169 movaps XMMWORD[(64+0)+rbp],xmm10 2170 movaps XMMWORD[(80+0)+rbp],xmm11 2171 movaps XMMWORD[(96+0)+rbp],xmm12 2172 movaps XMMWORD[(112+0)+rbp],xmm13 2173 movaps XMMWORD[(128+0)+rbp],xmm14 2174 movaps XMMWORD[(144+0)+rbp],xmm15 2175 2176 mov rbx,QWORD[56+r9] 2177 add rbx,rdx 2178 mov QWORD[((0+160+32))+rbp],r8 2179 mov QWORD[((8+160+32))+rbp],rbx 2180 mov rbx,rdx 2181 2182 mov eax,DWORD[((OPENSSL_ia32cap_P+8))] 2183 and eax,288 2184 xor eax,288 2185 jz NEAR chacha20_poly1305_seal_avx2 2186 2187 cmp rbx,128 2188 jbe NEAR $L$seal_sse_128 2189 2190 movdqa xmm0,XMMWORD[$L$chacha20_consts] 2191 movdqu xmm4,XMMWORD[r9] 2192 movdqu xmm8,XMMWORD[16+r9] 2193 movdqu xmm12,XMMWORD[32+r9] 2194 2195 movdqa xmm1,xmm0 2196 movdqa xmm2,xmm0 2197 movdqa xmm3,xmm0 2198 movdqa xmm5,xmm4 2199 movdqa xmm6,xmm4 2200 movdqa xmm7,xmm4 2201 movdqa xmm9,xmm8 2202 movdqa xmm10,xmm8 2203 movdqa xmm11,xmm8 2204 movdqa xmm15,xmm12 2205 paddd xmm12,XMMWORD[$L$sse_inc] 2206 movdqa xmm14,xmm12 2207 paddd xmm12,XMMWORD[$L$sse_inc] 2208 movdqa xmm13,xmm12 2209 paddd xmm12,XMMWORD[$L$sse_inc] 2210 2211 movdqa XMMWORD[(160+48)+rbp],xmm4 2212 movdqa XMMWORD[(160+64)+rbp],xmm8 2213 movdqa XMMWORD[(160+96)+rbp],xmm12 2214 movdqa XMMWORD[(160+112)+rbp],xmm13 2215 movdqa XMMWORD[(160+128)+rbp],xmm14 2216 movdqa XMMWORD[(160+144)+rbp],xmm15 2217 mov r10,10 2218$L$seal_sse_init_rounds: 2219 movdqa XMMWORD[(160+80)+rbp],xmm8 2220 movdqa xmm8,XMMWORD[$L$rol16] 2221 paddd xmm3,xmm7 2222 paddd xmm2,xmm6 2223 paddd xmm1,xmm5 2224 paddd xmm0,xmm4 2225 pxor xmm15,xmm3 2226 pxor xmm14,xmm2 2227 pxor xmm13,xmm1 2228 pxor xmm12,xmm0 2229DB 102,69,15,56,0,248 2230DB 102,69,15,56,0,240 2231DB 102,69,15,56,0,232 2232DB 102,69,15,56,0,224 2233 movdqa xmm8,XMMWORD[((160+80))+rbp] 2234 paddd xmm11,xmm15 2235 paddd xmm10,xmm14 2236 paddd xmm9,xmm13 2237 paddd xmm8,xmm12 2238 pxor xmm7,xmm11 2239 pxor xmm6,xmm10 2240 pxor xmm5,xmm9 2241 pxor xmm4,xmm8 2242 movdqa XMMWORD[(160+80)+rbp],xmm8 2243 movdqa xmm8,xmm7 2244 psrld xmm8,20 2245 pslld xmm7,32-20 2246 pxor xmm7,xmm8 2247 movdqa xmm8,xmm6 2248 psrld xmm8,20 2249 pslld xmm6,32-20 2250 pxor xmm6,xmm8 2251 movdqa xmm8,xmm5 2252 psrld xmm8,20 2253 pslld xmm5,32-20 2254 pxor xmm5,xmm8 2255 movdqa xmm8,xmm4 2256 psrld xmm8,20 2257 pslld xmm4,32-20 2258 pxor xmm4,xmm8 2259 movdqa xmm8,XMMWORD[$L$rol8] 2260 paddd xmm3,xmm7 2261 paddd xmm2,xmm6 2262 paddd xmm1,xmm5 2263 paddd xmm0,xmm4 2264 pxor xmm15,xmm3 2265 pxor xmm14,xmm2 2266 pxor xmm13,xmm1 2267 pxor xmm12,xmm0 2268DB 102,69,15,56,0,248 2269DB 102,69,15,56,0,240 2270DB 102,69,15,56,0,232 2271DB 102,69,15,56,0,224 2272 movdqa xmm8,XMMWORD[((160+80))+rbp] 2273 paddd xmm11,xmm15 2274 paddd xmm10,xmm14 2275 paddd xmm9,xmm13 2276 paddd xmm8,xmm12 2277 pxor xmm7,xmm11 2278 pxor xmm6,xmm10 2279 pxor xmm5,xmm9 2280 pxor xmm4,xmm8 2281 movdqa XMMWORD[(160+80)+rbp],xmm8 2282 movdqa xmm8,xmm7 2283 psrld xmm8,25 2284 pslld xmm7,32-25 2285 pxor xmm7,xmm8 2286 movdqa xmm8,xmm6 2287 psrld xmm8,25 2288 pslld xmm6,32-25 2289 pxor xmm6,xmm8 2290 movdqa xmm8,xmm5 2291 psrld xmm8,25 2292 pslld xmm5,32-25 2293 pxor xmm5,xmm8 2294 movdqa xmm8,xmm4 2295 psrld xmm8,25 2296 pslld xmm4,32-25 2297 pxor xmm4,xmm8 2298 movdqa xmm8,XMMWORD[((160+80))+rbp] 2299DB 102,15,58,15,255,4 2300DB 102,69,15,58,15,219,8 2301DB 102,69,15,58,15,255,12 2302DB 102,15,58,15,246,4 2303DB 102,69,15,58,15,210,8 2304DB 102,69,15,58,15,246,12 2305DB 102,15,58,15,237,4 2306DB 102,69,15,58,15,201,8 2307DB 102,69,15,58,15,237,12 2308DB 102,15,58,15,228,4 2309DB 102,69,15,58,15,192,8 2310DB 102,69,15,58,15,228,12 2311 movdqa XMMWORD[(160+80)+rbp],xmm8 2312 movdqa xmm8,XMMWORD[$L$rol16] 2313 paddd xmm3,xmm7 2314 paddd xmm2,xmm6 2315 paddd xmm1,xmm5 2316 paddd xmm0,xmm4 2317 pxor xmm15,xmm3 2318 pxor xmm14,xmm2 2319 pxor xmm13,xmm1 2320 pxor xmm12,xmm0 2321DB 102,69,15,56,0,248 2322DB 102,69,15,56,0,240 2323DB 102,69,15,56,0,232 2324DB 102,69,15,56,0,224 2325 movdqa xmm8,XMMWORD[((160+80))+rbp] 2326 paddd xmm11,xmm15 2327 paddd xmm10,xmm14 2328 paddd xmm9,xmm13 2329 paddd xmm8,xmm12 2330 pxor xmm7,xmm11 2331 pxor xmm6,xmm10 2332 pxor xmm5,xmm9 2333 pxor xmm4,xmm8 2334 movdqa XMMWORD[(160+80)+rbp],xmm8 2335 movdqa xmm8,xmm7 2336 psrld xmm8,20 2337 pslld xmm7,32-20 2338 pxor xmm7,xmm8 2339 movdqa xmm8,xmm6 2340 psrld xmm8,20 2341 pslld xmm6,32-20 2342 pxor xmm6,xmm8 2343 movdqa xmm8,xmm5 2344 psrld xmm8,20 2345 pslld xmm5,32-20 2346 pxor xmm5,xmm8 2347 movdqa xmm8,xmm4 2348 psrld xmm8,20 2349 pslld xmm4,32-20 2350 pxor xmm4,xmm8 2351 movdqa xmm8,XMMWORD[$L$rol8] 2352 paddd xmm3,xmm7 2353 paddd xmm2,xmm6 2354 paddd xmm1,xmm5 2355 paddd xmm0,xmm4 2356 pxor xmm15,xmm3 2357 pxor xmm14,xmm2 2358 pxor xmm13,xmm1 2359 pxor xmm12,xmm0 2360DB 102,69,15,56,0,248 2361DB 102,69,15,56,0,240 2362DB 102,69,15,56,0,232 2363DB 102,69,15,56,0,224 2364 movdqa xmm8,XMMWORD[((160+80))+rbp] 2365 paddd xmm11,xmm15 2366 paddd xmm10,xmm14 2367 paddd xmm9,xmm13 2368 paddd xmm8,xmm12 2369 pxor xmm7,xmm11 2370 pxor xmm6,xmm10 2371 pxor xmm5,xmm9 2372 pxor xmm4,xmm8 2373 movdqa XMMWORD[(160+80)+rbp],xmm8 2374 movdqa xmm8,xmm7 2375 psrld xmm8,25 2376 pslld xmm7,32-25 2377 pxor xmm7,xmm8 2378 movdqa xmm8,xmm6 2379 psrld xmm8,25 2380 pslld xmm6,32-25 2381 pxor xmm6,xmm8 2382 movdqa xmm8,xmm5 2383 psrld xmm8,25 2384 pslld xmm5,32-25 2385 pxor xmm5,xmm8 2386 movdqa xmm8,xmm4 2387 psrld xmm8,25 2388 pslld xmm4,32-25 2389 pxor xmm4,xmm8 2390 movdqa xmm8,XMMWORD[((160+80))+rbp] 2391DB 102,15,58,15,255,12 2392DB 102,69,15,58,15,219,8 2393DB 102,69,15,58,15,255,4 2394DB 102,15,58,15,246,12 2395DB 102,69,15,58,15,210,8 2396DB 102,69,15,58,15,246,4 2397DB 102,15,58,15,237,12 2398DB 102,69,15,58,15,201,8 2399DB 102,69,15,58,15,237,4 2400DB 102,15,58,15,228,12 2401DB 102,69,15,58,15,192,8 2402DB 102,69,15,58,15,228,4 2403 2404 dec r10 2405 jnz NEAR $L$seal_sse_init_rounds 2406 paddd xmm3,XMMWORD[$L$chacha20_consts] 2407 paddd xmm7,XMMWORD[((160+48))+rbp] 2408 paddd xmm11,XMMWORD[((160+64))+rbp] 2409 paddd xmm15,XMMWORD[((160+144))+rbp] 2410 paddd xmm2,XMMWORD[$L$chacha20_consts] 2411 paddd xmm6,XMMWORD[((160+48))+rbp] 2412 paddd xmm10,XMMWORD[((160+64))+rbp] 2413 paddd xmm14,XMMWORD[((160+128))+rbp] 2414 paddd xmm1,XMMWORD[$L$chacha20_consts] 2415 paddd xmm5,XMMWORD[((160+48))+rbp] 2416 paddd xmm9,XMMWORD[((160+64))+rbp] 2417 paddd xmm13,XMMWORD[((160+112))+rbp] 2418 paddd xmm0,XMMWORD[$L$chacha20_consts] 2419 paddd xmm4,XMMWORD[((160+48))+rbp] 2420 paddd xmm8,XMMWORD[((160+64))+rbp] 2421 paddd xmm12,XMMWORD[((160+96))+rbp] 2422 2423 2424 pand xmm3,XMMWORD[$L$clamp] 2425 movdqa XMMWORD[(160+0)+rbp],xmm3 2426 movdqa XMMWORD[(160+16)+rbp],xmm7 2427 2428 mov r8,r8 2429 call poly_hash_ad_internal 2430 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 2431 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 2432 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 2433 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 2434 pxor xmm2,xmm3 2435 pxor xmm6,xmm7 2436 pxor xmm10,xmm11 2437 pxor xmm15,xmm14 2438 movdqu XMMWORD[(0 + 0)+rdi],xmm2 2439 movdqu XMMWORD[(16 + 0)+rdi],xmm6 2440 movdqu XMMWORD[(32 + 0)+rdi],xmm10 2441 movdqu XMMWORD[(48 + 0)+rdi],xmm15 2442 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 2443 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 2444 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 2445 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 2446 pxor xmm1,xmm3 2447 pxor xmm5,xmm7 2448 pxor xmm9,xmm11 2449 pxor xmm15,xmm13 2450 movdqu XMMWORD[(0 + 64)+rdi],xmm1 2451 movdqu XMMWORD[(16 + 64)+rdi],xmm5 2452 movdqu XMMWORD[(32 + 64)+rdi],xmm9 2453 movdqu XMMWORD[(48 + 64)+rdi],xmm15 2454 2455 cmp rbx,12*16 2456 ja NEAR $L$seal_sse_main_init 2457 mov rcx,8*16 2458 sub rbx,8*16 2459 lea rsi,[128+rsi] 2460 jmp NEAR $L$seal_sse_128_tail_hash 2461$L$seal_sse_main_init: 2462 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 2463 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 2464 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 2465 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 2466 pxor xmm0,xmm3 2467 pxor xmm4,xmm7 2468 pxor xmm8,xmm11 2469 pxor xmm15,xmm12 2470 movdqu XMMWORD[(0 + 128)+rdi],xmm0 2471 movdqu XMMWORD[(16 + 128)+rdi],xmm4 2472 movdqu XMMWORD[(32 + 128)+rdi],xmm8 2473 movdqu XMMWORD[(48 + 128)+rdi],xmm15 2474 2475 mov rcx,12*16 2476 sub rbx,12*16 2477 lea rsi,[192+rsi] 2478 mov rcx,2 2479 mov r8,8 2480 cmp rbx,4*16 2481 jbe NEAR $L$seal_sse_tail_64 2482 cmp rbx,8*16 2483 jbe NEAR $L$seal_sse_tail_128 2484 cmp rbx,12*16 2485 jbe NEAR $L$seal_sse_tail_192 2486 2487$L$seal_sse_main_loop: 2488 movdqa xmm0,XMMWORD[$L$chacha20_consts] 2489 movdqa xmm4,XMMWORD[((160+48))+rbp] 2490 movdqa xmm8,XMMWORD[((160+64))+rbp] 2491 movdqa xmm1,xmm0 2492 movdqa xmm5,xmm4 2493 movdqa xmm9,xmm8 2494 movdqa xmm2,xmm0 2495 movdqa xmm6,xmm4 2496 movdqa xmm10,xmm8 2497 movdqa xmm3,xmm0 2498 movdqa xmm7,xmm4 2499 movdqa xmm11,xmm8 2500 movdqa xmm15,XMMWORD[((160+96))+rbp] 2501 paddd xmm15,XMMWORD[$L$sse_inc] 2502 movdqa xmm14,xmm15 2503 paddd xmm14,XMMWORD[$L$sse_inc] 2504 movdqa xmm13,xmm14 2505 paddd xmm13,XMMWORD[$L$sse_inc] 2506 movdqa xmm12,xmm13 2507 paddd xmm12,XMMWORD[$L$sse_inc] 2508 movdqa XMMWORD[(160+96)+rbp],xmm12 2509 movdqa XMMWORD[(160+112)+rbp],xmm13 2510 movdqa XMMWORD[(160+128)+rbp],xmm14 2511 movdqa XMMWORD[(160+144)+rbp],xmm15 2512 2513ALIGN 32 2514$L$seal_sse_main_rounds: 2515 movdqa XMMWORD[(160+80)+rbp],xmm8 2516 movdqa xmm8,XMMWORD[$L$rol16] 2517 paddd xmm3,xmm7 2518 paddd xmm2,xmm6 2519 paddd xmm1,xmm5 2520 paddd xmm0,xmm4 2521 pxor xmm15,xmm3 2522 pxor xmm14,xmm2 2523 pxor xmm13,xmm1 2524 pxor xmm12,xmm0 2525DB 102,69,15,56,0,248 2526DB 102,69,15,56,0,240 2527DB 102,69,15,56,0,232 2528DB 102,69,15,56,0,224 2529 movdqa xmm8,XMMWORD[((160+80))+rbp] 2530 paddd xmm11,xmm15 2531 paddd xmm10,xmm14 2532 paddd xmm9,xmm13 2533 paddd xmm8,xmm12 2534 pxor xmm7,xmm11 2535 add r10,QWORD[((0+0))+rdi] 2536 adc r11,QWORD[((8+0))+rdi] 2537 adc r12,1 2538 pxor xmm6,xmm10 2539 pxor xmm5,xmm9 2540 pxor xmm4,xmm8 2541 movdqa XMMWORD[(160+80)+rbp],xmm8 2542 movdqa xmm8,xmm7 2543 psrld xmm8,20 2544 pslld xmm7,32-20 2545 pxor xmm7,xmm8 2546 movdqa xmm8,xmm6 2547 psrld xmm8,20 2548 pslld xmm6,32-20 2549 pxor xmm6,xmm8 2550 movdqa xmm8,xmm5 2551 psrld xmm8,20 2552 pslld xmm5,32-20 2553 pxor xmm5,xmm8 2554 movdqa xmm8,xmm4 2555 psrld xmm8,20 2556 pslld xmm4,32-20 2557 pxor xmm4,xmm8 2558 mov rax,QWORD[((0+160+0))+rbp] 2559 mov r15,rax 2560 mul r10 2561 mov r13,rax 2562 mov r14,rdx 2563 mov rax,QWORD[((0+160+0))+rbp] 2564 mul r11 2565 imul r15,r12 2566 add r14,rax 2567 adc r15,rdx 2568 movdqa xmm8,XMMWORD[$L$rol8] 2569 paddd xmm3,xmm7 2570 paddd xmm2,xmm6 2571 paddd xmm1,xmm5 2572 paddd xmm0,xmm4 2573 pxor xmm15,xmm3 2574 pxor xmm14,xmm2 2575 pxor xmm13,xmm1 2576 pxor xmm12,xmm0 2577DB 102,69,15,56,0,248 2578DB 102,69,15,56,0,240 2579DB 102,69,15,56,0,232 2580DB 102,69,15,56,0,224 2581 movdqa xmm8,XMMWORD[((160+80))+rbp] 2582 paddd xmm11,xmm15 2583 paddd xmm10,xmm14 2584 paddd xmm9,xmm13 2585 paddd xmm8,xmm12 2586 pxor xmm7,xmm11 2587 pxor xmm6,xmm10 2588 mov rax,QWORD[((8+160+0))+rbp] 2589 mov r9,rax 2590 mul r10 2591 add r14,rax 2592 adc rdx,0 2593 mov r10,rdx 2594 mov rax,QWORD[((8+160+0))+rbp] 2595 mul r11 2596 add r15,rax 2597 adc rdx,0 2598 pxor xmm5,xmm9 2599 pxor xmm4,xmm8 2600 movdqa XMMWORD[(160+80)+rbp],xmm8 2601 movdqa xmm8,xmm7 2602 psrld xmm8,25 2603 pslld xmm7,32-25 2604 pxor xmm7,xmm8 2605 movdqa xmm8,xmm6 2606 psrld xmm8,25 2607 pslld xmm6,32-25 2608 pxor xmm6,xmm8 2609 movdqa xmm8,xmm5 2610 psrld xmm8,25 2611 pslld xmm5,32-25 2612 pxor xmm5,xmm8 2613 movdqa xmm8,xmm4 2614 psrld xmm8,25 2615 pslld xmm4,32-25 2616 pxor xmm4,xmm8 2617 movdqa xmm8,XMMWORD[((160+80))+rbp] 2618 imul r9,r12 2619 add r15,r10 2620 adc r9,rdx 2621DB 102,15,58,15,255,4 2622DB 102,69,15,58,15,219,8 2623DB 102,69,15,58,15,255,12 2624DB 102,15,58,15,246,4 2625DB 102,69,15,58,15,210,8 2626DB 102,69,15,58,15,246,12 2627DB 102,15,58,15,237,4 2628DB 102,69,15,58,15,201,8 2629DB 102,69,15,58,15,237,12 2630DB 102,15,58,15,228,4 2631DB 102,69,15,58,15,192,8 2632DB 102,69,15,58,15,228,12 2633 movdqa XMMWORD[(160+80)+rbp],xmm8 2634 movdqa xmm8,XMMWORD[$L$rol16] 2635 paddd xmm3,xmm7 2636 paddd xmm2,xmm6 2637 paddd xmm1,xmm5 2638 paddd xmm0,xmm4 2639 pxor xmm15,xmm3 2640 pxor xmm14,xmm2 2641 mov r10,r13 2642 mov r11,r14 2643 mov r12,r15 2644 and r12,3 2645 mov r13,r15 2646 and r13,-4 2647 mov r14,r9 2648 shrd r15,r9,2 2649 shr r9,2 2650 add r15,r13 2651 adc r9,r14 2652 add r10,r15 2653 adc r11,r9 2654 adc r12,0 2655 pxor xmm13,xmm1 2656 pxor xmm12,xmm0 2657DB 102,69,15,56,0,248 2658DB 102,69,15,56,0,240 2659DB 102,69,15,56,0,232 2660DB 102,69,15,56,0,224 2661 movdqa xmm8,XMMWORD[((160+80))+rbp] 2662 paddd xmm11,xmm15 2663 paddd xmm10,xmm14 2664 paddd xmm9,xmm13 2665 paddd xmm8,xmm12 2666 pxor xmm7,xmm11 2667 pxor xmm6,xmm10 2668 pxor xmm5,xmm9 2669 pxor xmm4,xmm8 2670 movdqa XMMWORD[(160+80)+rbp],xmm8 2671 movdqa xmm8,xmm7 2672 psrld xmm8,20 2673 pslld xmm7,32-20 2674 pxor xmm7,xmm8 2675 movdqa xmm8,xmm6 2676 psrld xmm8,20 2677 pslld xmm6,32-20 2678 pxor xmm6,xmm8 2679 movdqa xmm8,xmm5 2680 psrld xmm8,20 2681 pslld xmm5,32-20 2682 pxor xmm5,xmm8 2683 movdqa xmm8,xmm4 2684 psrld xmm8,20 2685 pslld xmm4,32-20 2686 pxor xmm4,xmm8 2687 movdqa xmm8,XMMWORD[$L$rol8] 2688 paddd xmm3,xmm7 2689 paddd xmm2,xmm6 2690 paddd xmm1,xmm5 2691 paddd xmm0,xmm4 2692 pxor xmm15,xmm3 2693 pxor xmm14,xmm2 2694 pxor xmm13,xmm1 2695 pxor xmm12,xmm0 2696DB 102,69,15,56,0,248 2697DB 102,69,15,56,0,240 2698DB 102,69,15,56,0,232 2699DB 102,69,15,56,0,224 2700 movdqa xmm8,XMMWORD[((160+80))+rbp] 2701 paddd xmm11,xmm15 2702 paddd xmm10,xmm14 2703 paddd xmm9,xmm13 2704 paddd xmm8,xmm12 2705 pxor xmm7,xmm11 2706 pxor xmm6,xmm10 2707 pxor xmm5,xmm9 2708 pxor xmm4,xmm8 2709 movdqa XMMWORD[(160+80)+rbp],xmm8 2710 movdqa xmm8,xmm7 2711 psrld xmm8,25 2712 pslld xmm7,32-25 2713 pxor xmm7,xmm8 2714 movdqa xmm8,xmm6 2715 psrld xmm8,25 2716 pslld xmm6,32-25 2717 pxor xmm6,xmm8 2718 movdqa xmm8,xmm5 2719 psrld xmm8,25 2720 pslld xmm5,32-25 2721 pxor xmm5,xmm8 2722 movdqa xmm8,xmm4 2723 psrld xmm8,25 2724 pslld xmm4,32-25 2725 pxor xmm4,xmm8 2726 movdqa xmm8,XMMWORD[((160+80))+rbp] 2727DB 102,15,58,15,255,12 2728DB 102,69,15,58,15,219,8 2729DB 102,69,15,58,15,255,4 2730DB 102,15,58,15,246,12 2731DB 102,69,15,58,15,210,8 2732DB 102,69,15,58,15,246,4 2733DB 102,15,58,15,237,12 2734DB 102,69,15,58,15,201,8 2735DB 102,69,15,58,15,237,4 2736DB 102,15,58,15,228,12 2737DB 102,69,15,58,15,192,8 2738DB 102,69,15,58,15,228,4 2739 2740 lea rdi,[16+rdi] 2741 dec r8 2742 jge NEAR $L$seal_sse_main_rounds 2743 add r10,QWORD[((0+0))+rdi] 2744 adc r11,QWORD[((8+0))+rdi] 2745 adc r12,1 2746 mov rax,QWORD[((0+160+0))+rbp] 2747 mov r15,rax 2748 mul r10 2749 mov r13,rax 2750 mov r14,rdx 2751 mov rax,QWORD[((0+160+0))+rbp] 2752 mul r11 2753 imul r15,r12 2754 add r14,rax 2755 adc r15,rdx 2756 mov rax,QWORD[((8+160+0))+rbp] 2757 mov r9,rax 2758 mul r10 2759 add r14,rax 2760 adc rdx,0 2761 mov r10,rdx 2762 mov rax,QWORD[((8+160+0))+rbp] 2763 mul r11 2764 add r15,rax 2765 adc rdx,0 2766 imul r9,r12 2767 add r15,r10 2768 adc r9,rdx 2769 mov r10,r13 2770 mov r11,r14 2771 mov r12,r15 2772 and r12,3 2773 mov r13,r15 2774 and r13,-4 2775 mov r14,r9 2776 shrd r15,r9,2 2777 shr r9,2 2778 add r15,r13 2779 adc r9,r14 2780 add r10,r15 2781 adc r11,r9 2782 adc r12,0 2783 2784 lea rdi,[16+rdi] 2785 dec rcx 2786 jg NEAR $L$seal_sse_main_rounds 2787 paddd xmm3,XMMWORD[$L$chacha20_consts] 2788 paddd xmm7,XMMWORD[((160+48))+rbp] 2789 paddd xmm11,XMMWORD[((160+64))+rbp] 2790 paddd xmm15,XMMWORD[((160+144))+rbp] 2791 paddd xmm2,XMMWORD[$L$chacha20_consts] 2792 paddd xmm6,XMMWORD[((160+48))+rbp] 2793 paddd xmm10,XMMWORD[((160+64))+rbp] 2794 paddd xmm14,XMMWORD[((160+128))+rbp] 2795 paddd xmm1,XMMWORD[$L$chacha20_consts] 2796 paddd xmm5,XMMWORD[((160+48))+rbp] 2797 paddd xmm9,XMMWORD[((160+64))+rbp] 2798 paddd xmm13,XMMWORD[((160+112))+rbp] 2799 paddd xmm0,XMMWORD[$L$chacha20_consts] 2800 paddd xmm4,XMMWORD[((160+48))+rbp] 2801 paddd xmm8,XMMWORD[((160+64))+rbp] 2802 paddd xmm12,XMMWORD[((160+96))+rbp] 2803 2804 movdqa XMMWORD[(160+80)+rbp],xmm14 2805 movdqa XMMWORD[(160+80)+rbp],xmm14 2806 movdqu xmm14,XMMWORD[((0 + 0))+rsi] 2807 pxor xmm14,xmm3 2808 movdqu XMMWORD[(0 + 0)+rdi],xmm14 2809 movdqu xmm14,XMMWORD[((16 + 0))+rsi] 2810 pxor xmm14,xmm7 2811 movdqu XMMWORD[(16 + 0)+rdi],xmm14 2812 movdqu xmm14,XMMWORD[((32 + 0))+rsi] 2813 pxor xmm14,xmm11 2814 movdqu XMMWORD[(32 + 0)+rdi],xmm14 2815 movdqu xmm14,XMMWORD[((48 + 0))+rsi] 2816 pxor xmm14,xmm15 2817 movdqu XMMWORD[(48 + 0)+rdi],xmm14 2818 2819 movdqa xmm14,XMMWORD[((160+80))+rbp] 2820 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 2821 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 2822 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 2823 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 2824 pxor xmm2,xmm3 2825 pxor xmm6,xmm7 2826 pxor xmm10,xmm11 2827 pxor xmm15,xmm14 2828 movdqu XMMWORD[(0 + 64)+rdi],xmm2 2829 movdqu XMMWORD[(16 + 64)+rdi],xmm6 2830 movdqu XMMWORD[(32 + 64)+rdi],xmm10 2831 movdqu XMMWORD[(48 + 64)+rdi],xmm15 2832 movdqu xmm3,XMMWORD[((0 + 128))+rsi] 2833 movdqu xmm7,XMMWORD[((16 + 128))+rsi] 2834 movdqu xmm11,XMMWORD[((32 + 128))+rsi] 2835 movdqu xmm15,XMMWORD[((48 + 128))+rsi] 2836 pxor xmm1,xmm3 2837 pxor xmm5,xmm7 2838 pxor xmm9,xmm11 2839 pxor xmm15,xmm13 2840 movdqu XMMWORD[(0 + 128)+rdi],xmm1 2841 movdqu XMMWORD[(16 + 128)+rdi],xmm5 2842 movdqu XMMWORD[(32 + 128)+rdi],xmm9 2843 movdqu XMMWORD[(48 + 128)+rdi],xmm15 2844 2845 cmp rbx,16*16 2846 ja NEAR $L$seal_sse_main_loop_xor 2847 2848 mov rcx,12*16 2849 sub rbx,12*16 2850 lea rsi,[192+rsi] 2851 jmp NEAR $L$seal_sse_128_tail_hash 2852$L$seal_sse_main_loop_xor: 2853 movdqu xmm3,XMMWORD[((0 + 192))+rsi] 2854 movdqu xmm7,XMMWORD[((16 + 192))+rsi] 2855 movdqu xmm11,XMMWORD[((32 + 192))+rsi] 2856 movdqu xmm15,XMMWORD[((48 + 192))+rsi] 2857 pxor xmm0,xmm3 2858 pxor xmm4,xmm7 2859 pxor xmm8,xmm11 2860 pxor xmm15,xmm12 2861 movdqu XMMWORD[(0 + 192)+rdi],xmm0 2862 movdqu XMMWORD[(16 + 192)+rdi],xmm4 2863 movdqu XMMWORD[(32 + 192)+rdi],xmm8 2864 movdqu XMMWORD[(48 + 192)+rdi],xmm15 2865 2866 lea rsi,[256+rsi] 2867 sub rbx,16*16 2868 mov rcx,6 2869 mov r8,4 2870 cmp rbx,12*16 2871 jg NEAR $L$seal_sse_main_loop 2872 mov rcx,rbx 2873 test rbx,rbx 2874 je NEAR $L$seal_sse_128_tail_hash 2875 mov rcx,6 2876 cmp rbx,8*16 2877 ja NEAR $L$seal_sse_tail_192 2878 cmp rbx,4*16 2879 ja NEAR $L$seal_sse_tail_128 2880 2881$L$seal_sse_tail_64: 2882 movdqa xmm0,XMMWORD[$L$chacha20_consts] 2883 movdqa xmm4,XMMWORD[((160+48))+rbp] 2884 movdqa xmm8,XMMWORD[((160+64))+rbp] 2885 movdqa xmm12,XMMWORD[((160+96))+rbp] 2886 paddd xmm12,XMMWORD[$L$sse_inc] 2887 movdqa XMMWORD[(160+96)+rbp],xmm12 2888 2889$L$seal_sse_tail_64_rounds_and_x2hash: 2890 add r10,QWORD[((0+0))+rdi] 2891 adc r11,QWORD[((8+0))+rdi] 2892 adc r12,1 2893 mov rax,QWORD[((0+160+0))+rbp] 2894 mov r15,rax 2895 mul r10 2896 mov r13,rax 2897 mov r14,rdx 2898 mov rax,QWORD[((0+160+0))+rbp] 2899 mul r11 2900 imul r15,r12 2901 add r14,rax 2902 adc r15,rdx 2903 mov rax,QWORD[((8+160+0))+rbp] 2904 mov r9,rax 2905 mul r10 2906 add r14,rax 2907 adc rdx,0 2908 mov r10,rdx 2909 mov rax,QWORD[((8+160+0))+rbp] 2910 mul r11 2911 add r15,rax 2912 adc rdx,0 2913 imul r9,r12 2914 add r15,r10 2915 adc r9,rdx 2916 mov r10,r13 2917 mov r11,r14 2918 mov r12,r15 2919 and r12,3 2920 mov r13,r15 2921 and r13,-4 2922 mov r14,r9 2923 shrd r15,r9,2 2924 shr r9,2 2925 add r15,r13 2926 adc r9,r14 2927 add r10,r15 2928 adc r11,r9 2929 adc r12,0 2930 2931 lea rdi,[16+rdi] 2932$L$seal_sse_tail_64_rounds_and_x1hash: 2933 paddd xmm0,xmm4 2934 pxor xmm12,xmm0 2935 pshufb xmm12,XMMWORD[$L$rol16] 2936 paddd xmm8,xmm12 2937 pxor xmm4,xmm8 2938 movdqa xmm3,xmm4 2939 pslld xmm3,12 2940 psrld xmm4,20 2941 pxor xmm4,xmm3 2942 paddd xmm0,xmm4 2943 pxor xmm12,xmm0 2944 pshufb xmm12,XMMWORD[$L$rol8] 2945 paddd xmm8,xmm12 2946 pxor xmm4,xmm8 2947 movdqa xmm3,xmm4 2948 pslld xmm3,7 2949 psrld xmm4,25 2950 pxor xmm4,xmm3 2951DB 102,15,58,15,228,4 2952DB 102,69,15,58,15,192,8 2953DB 102,69,15,58,15,228,12 2954 paddd xmm0,xmm4 2955 pxor xmm12,xmm0 2956 pshufb xmm12,XMMWORD[$L$rol16] 2957 paddd xmm8,xmm12 2958 pxor xmm4,xmm8 2959 movdqa xmm3,xmm4 2960 pslld xmm3,12 2961 psrld xmm4,20 2962 pxor xmm4,xmm3 2963 paddd xmm0,xmm4 2964 pxor xmm12,xmm0 2965 pshufb xmm12,XMMWORD[$L$rol8] 2966 paddd xmm8,xmm12 2967 pxor xmm4,xmm8 2968 movdqa xmm3,xmm4 2969 pslld xmm3,7 2970 psrld xmm4,25 2971 pxor xmm4,xmm3 2972DB 102,15,58,15,228,12 2973DB 102,69,15,58,15,192,8 2974DB 102,69,15,58,15,228,4 2975 add r10,QWORD[((0+0))+rdi] 2976 adc r11,QWORD[((8+0))+rdi] 2977 adc r12,1 2978 mov rax,QWORD[((0+160+0))+rbp] 2979 mov r15,rax 2980 mul r10 2981 mov r13,rax 2982 mov r14,rdx 2983 mov rax,QWORD[((0+160+0))+rbp] 2984 mul r11 2985 imul r15,r12 2986 add r14,rax 2987 adc r15,rdx 2988 mov rax,QWORD[((8+160+0))+rbp] 2989 mov r9,rax 2990 mul r10 2991 add r14,rax 2992 adc rdx,0 2993 mov r10,rdx 2994 mov rax,QWORD[((8+160+0))+rbp] 2995 mul r11 2996 add r15,rax 2997 adc rdx,0 2998 imul r9,r12 2999 add r15,r10 3000 adc r9,rdx 3001 mov r10,r13 3002 mov r11,r14 3003 mov r12,r15 3004 and r12,3 3005 mov r13,r15 3006 and r13,-4 3007 mov r14,r9 3008 shrd r15,r9,2 3009 shr r9,2 3010 add r15,r13 3011 adc r9,r14 3012 add r10,r15 3013 adc r11,r9 3014 adc r12,0 3015 3016 lea rdi,[16+rdi] 3017 dec rcx 3018 jg NEAR $L$seal_sse_tail_64_rounds_and_x2hash 3019 dec r8 3020 jge NEAR $L$seal_sse_tail_64_rounds_and_x1hash 3021 paddd xmm0,XMMWORD[$L$chacha20_consts] 3022 paddd xmm4,XMMWORD[((160+48))+rbp] 3023 paddd xmm8,XMMWORD[((160+64))+rbp] 3024 paddd xmm12,XMMWORD[((160+96))+rbp] 3025 3026 jmp NEAR $L$seal_sse_128_tail_xor 3027 3028$L$seal_sse_tail_128: 3029 movdqa xmm0,XMMWORD[$L$chacha20_consts] 3030 movdqa xmm4,XMMWORD[((160+48))+rbp] 3031 movdqa xmm8,XMMWORD[((160+64))+rbp] 3032 movdqa xmm1,xmm0 3033 movdqa xmm5,xmm4 3034 movdqa xmm9,xmm8 3035 movdqa xmm13,XMMWORD[((160+96))+rbp] 3036 paddd xmm13,XMMWORD[$L$sse_inc] 3037 movdqa xmm12,xmm13 3038 paddd xmm12,XMMWORD[$L$sse_inc] 3039 movdqa XMMWORD[(160+96)+rbp],xmm12 3040 movdqa XMMWORD[(160+112)+rbp],xmm13 3041 3042$L$seal_sse_tail_128_rounds_and_x2hash: 3043 add r10,QWORD[((0+0))+rdi] 3044 adc r11,QWORD[((8+0))+rdi] 3045 adc r12,1 3046 mov rax,QWORD[((0+160+0))+rbp] 3047 mov r15,rax 3048 mul r10 3049 mov r13,rax 3050 mov r14,rdx 3051 mov rax,QWORD[((0+160+0))+rbp] 3052 mul r11 3053 imul r15,r12 3054 add r14,rax 3055 adc r15,rdx 3056 mov rax,QWORD[((8+160+0))+rbp] 3057 mov r9,rax 3058 mul r10 3059 add r14,rax 3060 adc rdx,0 3061 mov r10,rdx 3062 mov rax,QWORD[((8+160+0))+rbp] 3063 mul r11 3064 add r15,rax 3065 adc rdx,0 3066 imul r9,r12 3067 add r15,r10 3068 adc r9,rdx 3069 mov r10,r13 3070 mov r11,r14 3071 mov r12,r15 3072 and r12,3 3073 mov r13,r15 3074 and r13,-4 3075 mov r14,r9 3076 shrd r15,r9,2 3077 shr r9,2 3078 add r15,r13 3079 adc r9,r14 3080 add r10,r15 3081 adc r11,r9 3082 adc r12,0 3083 3084 lea rdi,[16+rdi] 3085$L$seal_sse_tail_128_rounds_and_x1hash: 3086 paddd xmm0,xmm4 3087 pxor xmm12,xmm0 3088 pshufb xmm12,XMMWORD[$L$rol16] 3089 paddd xmm8,xmm12 3090 pxor xmm4,xmm8 3091 movdqa xmm3,xmm4 3092 pslld xmm3,12 3093 psrld xmm4,20 3094 pxor xmm4,xmm3 3095 paddd xmm0,xmm4 3096 pxor xmm12,xmm0 3097 pshufb xmm12,XMMWORD[$L$rol8] 3098 paddd xmm8,xmm12 3099 pxor xmm4,xmm8 3100 movdqa xmm3,xmm4 3101 pslld xmm3,7 3102 psrld xmm4,25 3103 pxor xmm4,xmm3 3104DB 102,15,58,15,228,4 3105DB 102,69,15,58,15,192,8 3106DB 102,69,15,58,15,228,12 3107 paddd xmm1,xmm5 3108 pxor xmm13,xmm1 3109 pshufb xmm13,XMMWORD[$L$rol16] 3110 paddd xmm9,xmm13 3111 pxor xmm5,xmm9 3112 movdqa xmm3,xmm5 3113 pslld xmm3,12 3114 psrld xmm5,20 3115 pxor xmm5,xmm3 3116 paddd xmm1,xmm5 3117 pxor xmm13,xmm1 3118 pshufb xmm13,XMMWORD[$L$rol8] 3119 paddd xmm9,xmm13 3120 pxor xmm5,xmm9 3121 movdqa xmm3,xmm5 3122 pslld xmm3,7 3123 psrld xmm5,25 3124 pxor xmm5,xmm3 3125DB 102,15,58,15,237,4 3126DB 102,69,15,58,15,201,8 3127DB 102,69,15,58,15,237,12 3128 add r10,QWORD[((0+0))+rdi] 3129 adc r11,QWORD[((8+0))+rdi] 3130 adc r12,1 3131 mov rax,QWORD[((0+160+0))+rbp] 3132 mov r15,rax 3133 mul r10 3134 mov r13,rax 3135 mov r14,rdx 3136 mov rax,QWORD[((0+160+0))+rbp] 3137 mul r11 3138 imul r15,r12 3139 add r14,rax 3140 adc r15,rdx 3141 mov rax,QWORD[((8+160+0))+rbp] 3142 mov r9,rax 3143 mul r10 3144 add r14,rax 3145 adc rdx,0 3146 mov r10,rdx 3147 mov rax,QWORD[((8+160+0))+rbp] 3148 mul r11 3149 add r15,rax 3150 adc rdx,0 3151 imul r9,r12 3152 add r15,r10 3153 adc r9,rdx 3154 mov r10,r13 3155 mov r11,r14 3156 mov r12,r15 3157 and r12,3 3158 mov r13,r15 3159 and r13,-4 3160 mov r14,r9 3161 shrd r15,r9,2 3162 shr r9,2 3163 add r15,r13 3164 adc r9,r14 3165 add r10,r15 3166 adc r11,r9 3167 adc r12,0 3168 paddd xmm0,xmm4 3169 pxor xmm12,xmm0 3170 pshufb xmm12,XMMWORD[$L$rol16] 3171 paddd xmm8,xmm12 3172 pxor xmm4,xmm8 3173 movdqa xmm3,xmm4 3174 pslld xmm3,12 3175 psrld xmm4,20 3176 pxor xmm4,xmm3 3177 paddd xmm0,xmm4 3178 pxor xmm12,xmm0 3179 pshufb xmm12,XMMWORD[$L$rol8] 3180 paddd xmm8,xmm12 3181 pxor xmm4,xmm8 3182 movdqa xmm3,xmm4 3183 pslld xmm3,7 3184 psrld xmm4,25 3185 pxor xmm4,xmm3 3186DB 102,15,58,15,228,12 3187DB 102,69,15,58,15,192,8 3188DB 102,69,15,58,15,228,4 3189 paddd xmm1,xmm5 3190 pxor xmm13,xmm1 3191 pshufb xmm13,XMMWORD[$L$rol16] 3192 paddd xmm9,xmm13 3193 pxor xmm5,xmm9 3194 movdqa xmm3,xmm5 3195 pslld xmm3,12 3196 psrld xmm5,20 3197 pxor xmm5,xmm3 3198 paddd xmm1,xmm5 3199 pxor xmm13,xmm1 3200 pshufb xmm13,XMMWORD[$L$rol8] 3201 paddd xmm9,xmm13 3202 pxor xmm5,xmm9 3203 movdqa xmm3,xmm5 3204 pslld xmm3,7 3205 psrld xmm5,25 3206 pxor xmm5,xmm3 3207DB 102,15,58,15,237,12 3208DB 102,69,15,58,15,201,8 3209DB 102,69,15,58,15,237,4 3210 3211 lea rdi,[16+rdi] 3212 dec rcx 3213 jg NEAR $L$seal_sse_tail_128_rounds_and_x2hash 3214 dec r8 3215 jge NEAR $L$seal_sse_tail_128_rounds_and_x1hash 3216 paddd xmm1,XMMWORD[$L$chacha20_consts] 3217 paddd xmm5,XMMWORD[((160+48))+rbp] 3218 paddd xmm9,XMMWORD[((160+64))+rbp] 3219 paddd xmm13,XMMWORD[((160+112))+rbp] 3220 paddd xmm0,XMMWORD[$L$chacha20_consts] 3221 paddd xmm4,XMMWORD[((160+48))+rbp] 3222 paddd xmm8,XMMWORD[((160+64))+rbp] 3223 paddd xmm12,XMMWORD[((160+96))+rbp] 3224 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 3225 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 3226 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 3227 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 3228 pxor xmm1,xmm3 3229 pxor xmm5,xmm7 3230 pxor xmm9,xmm11 3231 pxor xmm15,xmm13 3232 movdqu XMMWORD[(0 + 0)+rdi],xmm1 3233 movdqu XMMWORD[(16 + 0)+rdi],xmm5 3234 movdqu XMMWORD[(32 + 0)+rdi],xmm9 3235 movdqu XMMWORD[(48 + 0)+rdi],xmm15 3236 3237 mov rcx,4*16 3238 sub rbx,4*16 3239 lea rsi,[64+rsi] 3240 jmp NEAR $L$seal_sse_128_tail_hash 3241 3242$L$seal_sse_tail_192: 3243 movdqa xmm0,XMMWORD[$L$chacha20_consts] 3244 movdqa xmm4,XMMWORD[((160+48))+rbp] 3245 movdqa xmm8,XMMWORD[((160+64))+rbp] 3246 movdqa xmm1,xmm0 3247 movdqa xmm5,xmm4 3248 movdqa xmm9,xmm8 3249 movdqa xmm2,xmm0 3250 movdqa xmm6,xmm4 3251 movdqa xmm10,xmm8 3252 movdqa xmm14,XMMWORD[((160+96))+rbp] 3253 paddd xmm14,XMMWORD[$L$sse_inc] 3254 movdqa xmm13,xmm14 3255 paddd xmm13,XMMWORD[$L$sse_inc] 3256 movdqa xmm12,xmm13 3257 paddd xmm12,XMMWORD[$L$sse_inc] 3258 movdqa XMMWORD[(160+96)+rbp],xmm12 3259 movdqa XMMWORD[(160+112)+rbp],xmm13 3260 movdqa XMMWORD[(160+128)+rbp],xmm14 3261 3262$L$seal_sse_tail_192_rounds_and_x2hash: 3263 add r10,QWORD[((0+0))+rdi] 3264 adc r11,QWORD[((8+0))+rdi] 3265 adc r12,1 3266 mov rax,QWORD[((0+160+0))+rbp] 3267 mov r15,rax 3268 mul r10 3269 mov r13,rax 3270 mov r14,rdx 3271 mov rax,QWORD[((0+160+0))+rbp] 3272 mul r11 3273 imul r15,r12 3274 add r14,rax 3275 adc r15,rdx 3276 mov rax,QWORD[((8+160+0))+rbp] 3277 mov r9,rax 3278 mul r10 3279 add r14,rax 3280 adc rdx,0 3281 mov r10,rdx 3282 mov rax,QWORD[((8+160+0))+rbp] 3283 mul r11 3284 add r15,rax 3285 adc rdx,0 3286 imul r9,r12 3287 add r15,r10 3288 adc r9,rdx 3289 mov r10,r13 3290 mov r11,r14 3291 mov r12,r15 3292 and r12,3 3293 mov r13,r15 3294 and r13,-4 3295 mov r14,r9 3296 shrd r15,r9,2 3297 shr r9,2 3298 add r15,r13 3299 adc r9,r14 3300 add r10,r15 3301 adc r11,r9 3302 adc r12,0 3303 3304 lea rdi,[16+rdi] 3305$L$seal_sse_tail_192_rounds_and_x1hash: 3306 paddd xmm0,xmm4 3307 pxor xmm12,xmm0 3308 pshufb xmm12,XMMWORD[$L$rol16] 3309 paddd xmm8,xmm12 3310 pxor xmm4,xmm8 3311 movdqa xmm3,xmm4 3312 pslld xmm3,12 3313 psrld xmm4,20 3314 pxor xmm4,xmm3 3315 paddd xmm0,xmm4 3316 pxor xmm12,xmm0 3317 pshufb xmm12,XMMWORD[$L$rol8] 3318 paddd xmm8,xmm12 3319 pxor xmm4,xmm8 3320 movdqa xmm3,xmm4 3321 pslld xmm3,7 3322 psrld xmm4,25 3323 pxor xmm4,xmm3 3324DB 102,15,58,15,228,4 3325DB 102,69,15,58,15,192,8 3326DB 102,69,15,58,15,228,12 3327 paddd xmm1,xmm5 3328 pxor xmm13,xmm1 3329 pshufb xmm13,XMMWORD[$L$rol16] 3330 paddd xmm9,xmm13 3331 pxor xmm5,xmm9 3332 movdqa xmm3,xmm5 3333 pslld xmm3,12 3334 psrld xmm5,20 3335 pxor xmm5,xmm3 3336 paddd xmm1,xmm5 3337 pxor xmm13,xmm1 3338 pshufb xmm13,XMMWORD[$L$rol8] 3339 paddd xmm9,xmm13 3340 pxor xmm5,xmm9 3341 movdqa xmm3,xmm5 3342 pslld xmm3,7 3343 psrld xmm5,25 3344 pxor xmm5,xmm3 3345DB 102,15,58,15,237,4 3346DB 102,69,15,58,15,201,8 3347DB 102,69,15,58,15,237,12 3348 paddd xmm2,xmm6 3349 pxor xmm14,xmm2 3350 pshufb xmm14,XMMWORD[$L$rol16] 3351 paddd xmm10,xmm14 3352 pxor xmm6,xmm10 3353 movdqa xmm3,xmm6 3354 pslld xmm3,12 3355 psrld xmm6,20 3356 pxor xmm6,xmm3 3357 paddd xmm2,xmm6 3358 pxor xmm14,xmm2 3359 pshufb xmm14,XMMWORD[$L$rol8] 3360 paddd xmm10,xmm14 3361 pxor xmm6,xmm10 3362 movdqa xmm3,xmm6 3363 pslld xmm3,7 3364 psrld xmm6,25 3365 pxor xmm6,xmm3 3366DB 102,15,58,15,246,4 3367DB 102,69,15,58,15,210,8 3368DB 102,69,15,58,15,246,12 3369 add r10,QWORD[((0+0))+rdi] 3370 adc r11,QWORD[((8+0))+rdi] 3371 adc r12,1 3372 mov rax,QWORD[((0+160+0))+rbp] 3373 mov r15,rax 3374 mul r10 3375 mov r13,rax 3376 mov r14,rdx 3377 mov rax,QWORD[((0+160+0))+rbp] 3378 mul r11 3379 imul r15,r12 3380 add r14,rax 3381 adc r15,rdx 3382 mov rax,QWORD[((8+160+0))+rbp] 3383 mov r9,rax 3384 mul r10 3385 add r14,rax 3386 adc rdx,0 3387 mov r10,rdx 3388 mov rax,QWORD[((8+160+0))+rbp] 3389 mul r11 3390 add r15,rax 3391 adc rdx,0 3392 imul r9,r12 3393 add r15,r10 3394 adc r9,rdx 3395 mov r10,r13 3396 mov r11,r14 3397 mov r12,r15 3398 and r12,3 3399 mov r13,r15 3400 and r13,-4 3401 mov r14,r9 3402 shrd r15,r9,2 3403 shr r9,2 3404 add r15,r13 3405 adc r9,r14 3406 add r10,r15 3407 adc r11,r9 3408 adc r12,0 3409 paddd xmm0,xmm4 3410 pxor xmm12,xmm0 3411 pshufb xmm12,XMMWORD[$L$rol16] 3412 paddd xmm8,xmm12 3413 pxor xmm4,xmm8 3414 movdqa xmm3,xmm4 3415 pslld xmm3,12 3416 psrld xmm4,20 3417 pxor xmm4,xmm3 3418 paddd xmm0,xmm4 3419 pxor xmm12,xmm0 3420 pshufb xmm12,XMMWORD[$L$rol8] 3421 paddd xmm8,xmm12 3422 pxor xmm4,xmm8 3423 movdqa xmm3,xmm4 3424 pslld xmm3,7 3425 psrld xmm4,25 3426 pxor xmm4,xmm3 3427DB 102,15,58,15,228,12 3428DB 102,69,15,58,15,192,8 3429DB 102,69,15,58,15,228,4 3430 paddd xmm1,xmm5 3431 pxor xmm13,xmm1 3432 pshufb xmm13,XMMWORD[$L$rol16] 3433 paddd xmm9,xmm13 3434 pxor xmm5,xmm9 3435 movdqa xmm3,xmm5 3436 pslld xmm3,12 3437 psrld xmm5,20 3438 pxor xmm5,xmm3 3439 paddd xmm1,xmm5 3440 pxor xmm13,xmm1 3441 pshufb xmm13,XMMWORD[$L$rol8] 3442 paddd xmm9,xmm13 3443 pxor xmm5,xmm9 3444 movdqa xmm3,xmm5 3445 pslld xmm3,7 3446 psrld xmm5,25 3447 pxor xmm5,xmm3 3448DB 102,15,58,15,237,12 3449DB 102,69,15,58,15,201,8 3450DB 102,69,15,58,15,237,4 3451 paddd xmm2,xmm6 3452 pxor xmm14,xmm2 3453 pshufb xmm14,XMMWORD[$L$rol16] 3454 paddd xmm10,xmm14 3455 pxor xmm6,xmm10 3456 movdqa xmm3,xmm6 3457 pslld xmm3,12 3458 psrld xmm6,20 3459 pxor xmm6,xmm3 3460 paddd xmm2,xmm6 3461 pxor xmm14,xmm2 3462 pshufb xmm14,XMMWORD[$L$rol8] 3463 paddd xmm10,xmm14 3464 pxor xmm6,xmm10 3465 movdqa xmm3,xmm6 3466 pslld xmm3,7 3467 psrld xmm6,25 3468 pxor xmm6,xmm3 3469DB 102,15,58,15,246,12 3470DB 102,69,15,58,15,210,8 3471DB 102,69,15,58,15,246,4 3472 3473 lea rdi,[16+rdi] 3474 dec rcx 3475 jg NEAR $L$seal_sse_tail_192_rounds_and_x2hash 3476 dec r8 3477 jge NEAR $L$seal_sse_tail_192_rounds_and_x1hash 3478 paddd xmm2,XMMWORD[$L$chacha20_consts] 3479 paddd xmm6,XMMWORD[((160+48))+rbp] 3480 paddd xmm10,XMMWORD[((160+64))+rbp] 3481 paddd xmm14,XMMWORD[((160+128))+rbp] 3482 paddd xmm1,XMMWORD[$L$chacha20_consts] 3483 paddd xmm5,XMMWORD[((160+48))+rbp] 3484 paddd xmm9,XMMWORD[((160+64))+rbp] 3485 paddd xmm13,XMMWORD[((160+112))+rbp] 3486 paddd xmm0,XMMWORD[$L$chacha20_consts] 3487 paddd xmm4,XMMWORD[((160+48))+rbp] 3488 paddd xmm8,XMMWORD[((160+64))+rbp] 3489 paddd xmm12,XMMWORD[((160+96))+rbp] 3490 movdqu xmm3,XMMWORD[((0 + 0))+rsi] 3491 movdqu xmm7,XMMWORD[((16 + 0))+rsi] 3492 movdqu xmm11,XMMWORD[((32 + 0))+rsi] 3493 movdqu xmm15,XMMWORD[((48 + 0))+rsi] 3494 pxor xmm2,xmm3 3495 pxor xmm6,xmm7 3496 pxor xmm10,xmm11 3497 pxor xmm15,xmm14 3498 movdqu XMMWORD[(0 + 0)+rdi],xmm2 3499 movdqu XMMWORD[(16 + 0)+rdi],xmm6 3500 movdqu XMMWORD[(32 + 0)+rdi],xmm10 3501 movdqu XMMWORD[(48 + 0)+rdi],xmm15 3502 movdqu xmm3,XMMWORD[((0 + 64))+rsi] 3503 movdqu xmm7,XMMWORD[((16 + 64))+rsi] 3504 movdqu xmm11,XMMWORD[((32 + 64))+rsi] 3505 movdqu xmm15,XMMWORD[((48 + 64))+rsi] 3506 pxor xmm1,xmm3 3507 pxor xmm5,xmm7 3508 pxor xmm9,xmm11 3509 pxor xmm15,xmm13 3510 movdqu XMMWORD[(0 + 64)+rdi],xmm1 3511 movdqu XMMWORD[(16 + 64)+rdi],xmm5 3512 movdqu XMMWORD[(32 + 64)+rdi],xmm9 3513 movdqu XMMWORD[(48 + 64)+rdi],xmm15 3514 3515 mov rcx,8*16 3516 sub rbx,8*16 3517 lea rsi,[128+rsi] 3518 3519$L$seal_sse_128_tail_hash: 3520 cmp rcx,16 3521 jb NEAR $L$seal_sse_128_tail_xor 3522 add r10,QWORD[((0+0))+rdi] 3523 adc r11,QWORD[((8+0))+rdi] 3524 adc r12,1 3525 mov rax,QWORD[((0+160+0))+rbp] 3526 mov r15,rax 3527 mul r10 3528 mov r13,rax 3529 mov r14,rdx 3530 mov rax,QWORD[((0+160+0))+rbp] 3531 mul r11 3532 imul r15,r12 3533 add r14,rax 3534 adc r15,rdx 3535 mov rax,QWORD[((8+160+0))+rbp] 3536 mov r9,rax 3537 mul r10 3538 add r14,rax 3539 adc rdx,0 3540 mov r10,rdx 3541 mov rax,QWORD[((8+160+0))+rbp] 3542 mul r11 3543 add r15,rax 3544 adc rdx,0 3545 imul r9,r12 3546 add r15,r10 3547 adc r9,rdx 3548 mov r10,r13 3549 mov r11,r14 3550 mov r12,r15 3551 and r12,3 3552 mov r13,r15 3553 and r13,-4 3554 mov r14,r9 3555 shrd r15,r9,2 3556 shr r9,2 3557 add r15,r13 3558 adc r9,r14 3559 add r10,r15 3560 adc r11,r9 3561 adc r12,0 3562 3563 sub rcx,16 3564 lea rdi,[16+rdi] 3565 jmp NEAR $L$seal_sse_128_tail_hash 3566 3567$L$seal_sse_128_tail_xor: 3568 cmp rbx,16 3569 jb NEAR $L$seal_sse_tail_16 3570 sub rbx,16 3571 3572 movdqu xmm3,XMMWORD[rsi] 3573 pxor xmm0,xmm3 3574 movdqu XMMWORD[rdi],xmm0 3575 3576 add r10,QWORD[rdi] 3577 adc r11,QWORD[8+rdi] 3578 adc r12,1 3579 lea rsi,[16+rsi] 3580 lea rdi,[16+rdi] 3581 mov rax,QWORD[((0+160+0))+rbp] 3582 mov r15,rax 3583 mul r10 3584 mov r13,rax 3585 mov r14,rdx 3586 mov rax,QWORD[((0+160+0))+rbp] 3587 mul r11 3588 imul r15,r12 3589 add r14,rax 3590 adc r15,rdx 3591 mov rax,QWORD[((8+160+0))+rbp] 3592 mov r9,rax 3593 mul r10 3594 add r14,rax 3595 adc rdx,0 3596 mov r10,rdx 3597 mov rax,QWORD[((8+160+0))+rbp] 3598 mul r11 3599 add r15,rax 3600 adc rdx,0 3601 imul r9,r12 3602 add r15,r10 3603 adc r9,rdx 3604 mov r10,r13 3605 mov r11,r14 3606 mov r12,r15 3607 and r12,3 3608 mov r13,r15 3609 and r13,-4 3610 mov r14,r9 3611 shrd r15,r9,2 3612 shr r9,2 3613 add r15,r13 3614 adc r9,r14 3615 add r10,r15 3616 adc r11,r9 3617 adc r12,0 3618 3619 3620 movdqa xmm0,xmm4 3621 movdqa xmm4,xmm8 3622 movdqa xmm8,xmm12 3623 movdqa xmm12,xmm1 3624 movdqa xmm1,xmm5 3625 movdqa xmm5,xmm9 3626 movdqa xmm9,xmm13 3627 jmp NEAR $L$seal_sse_128_tail_xor 3628 3629$L$seal_sse_tail_16: 3630 test rbx,rbx 3631 jz NEAR $L$process_blocks_of_extra_in 3632 3633 mov r8,rbx 3634 mov rcx,rbx 3635 lea rsi,[((-1))+rbx*1+rsi] 3636 pxor xmm15,xmm15 3637$L$seal_sse_tail_16_compose: 3638 pslldq xmm15,1 3639 pinsrb xmm15,BYTE[rsi],0 3640 lea rsi,[((-1))+rsi] 3641 dec rcx 3642 jne NEAR $L$seal_sse_tail_16_compose 3643 3644 3645 pxor xmm15,xmm0 3646 3647 3648 mov rcx,rbx 3649 movdqu xmm0,xmm15 3650$L$seal_sse_tail_16_extract: 3651 pextrb XMMWORD[rdi],xmm0,0 3652 psrldq xmm0,1 3653 add rdi,1 3654 sub rcx,1 3655 jnz NEAR $L$seal_sse_tail_16_extract 3656 3657 3658 3659 3660 3661 3662 3663 3664 mov r9,QWORD[((288 + 160 + 32))+rsp] 3665 mov r14,QWORD[56+r9] 3666 mov r13,QWORD[48+r9] 3667 test r14,r14 3668 jz NEAR $L$process_partial_block 3669 3670 mov r15,16 3671 sub r15,rbx 3672 cmp r14,r15 3673 3674 jge NEAR $L$load_extra_in 3675 mov r15,r14 3676 3677$L$load_extra_in: 3678 3679 3680 lea rsi,[((-1))+r15*1+r13] 3681 3682 3683 add r13,r15 3684 sub r14,r15 3685 mov QWORD[48+r9],r13 3686 mov QWORD[56+r9],r14 3687 3688 3689 3690 add r8,r15 3691 3692 3693 pxor xmm11,xmm11 3694$L$load_extra_load_loop: 3695 pslldq xmm11,1 3696 pinsrb xmm11,BYTE[rsi],0 3697 lea rsi,[((-1))+rsi] 3698 sub r15,1 3699 jnz NEAR $L$load_extra_load_loop 3700 3701 3702 3703 3704 mov r15,rbx 3705 3706$L$load_extra_shift_loop: 3707 pslldq xmm11,1 3708 sub r15,1 3709 jnz NEAR $L$load_extra_shift_loop 3710 3711 3712 3713 3714 lea r15,[$L$and_masks] 3715 shl rbx,4 3716 pand xmm15,XMMWORD[((-16))+rbx*1+r15] 3717 3718 3719 por xmm15,xmm11 3720 3721 3722 3723DB 102,77,15,126,253 3724 pextrq r14,xmm15,1 3725 add r10,r13 3726 adc r11,r14 3727 adc r12,1 3728 mov rax,QWORD[((0+160+0))+rbp] 3729 mov r15,rax 3730 mul r10 3731 mov r13,rax 3732 mov r14,rdx 3733 mov rax,QWORD[((0+160+0))+rbp] 3734 mul r11 3735 imul r15,r12 3736 add r14,rax 3737 adc r15,rdx 3738 mov rax,QWORD[((8+160+0))+rbp] 3739 mov r9,rax 3740 mul r10 3741 add r14,rax 3742 adc rdx,0 3743 mov r10,rdx 3744 mov rax,QWORD[((8+160+0))+rbp] 3745 mul r11 3746 add r15,rax 3747 adc rdx,0 3748 imul r9,r12 3749 add r15,r10 3750 adc r9,rdx 3751 mov r10,r13 3752 mov r11,r14 3753 mov r12,r15 3754 and r12,3 3755 mov r13,r15 3756 and r13,-4 3757 mov r14,r9 3758 shrd r15,r9,2 3759 shr r9,2 3760 add r15,r13 3761 adc r9,r14 3762 add r10,r15 3763 adc r11,r9 3764 adc r12,0 3765 3766 3767$L$process_blocks_of_extra_in: 3768 3769 mov r9,QWORD[((288+32+160 ))+rsp] 3770 mov rsi,QWORD[48+r9] 3771 mov r8,QWORD[56+r9] 3772 mov rcx,r8 3773 shr r8,4 3774 3775$L$process_extra_hash_loop: 3776 jz NEAR process_extra_in_trailer 3777 add r10,QWORD[((0+0))+rsi] 3778 adc r11,QWORD[((8+0))+rsi] 3779 adc r12,1 3780 mov rax,QWORD[((0+160+0))+rbp] 3781 mov r15,rax 3782 mul r10 3783 mov r13,rax 3784 mov r14,rdx 3785 mov rax,QWORD[((0+160+0))+rbp] 3786 mul r11 3787 imul r15,r12 3788 add r14,rax 3789 adc r15,rdx 3790 mov rax,QWORD[((8+160+0))+rbp] 3791 mov r9,rax 3792 mul r10 3793 add r14,rax 3794 adc rdx,0 3795 mov r10,rdx 3796 mov rax,QWORD[((8+160+0))+rbp] 3797 mul r11 3798 add r15,rax 3799 adc rdx,0 3800 imul r9,r12 3801 add r15,r10 3802 adc r9,rdx 3803 mov r10,r13 3804 mov r11,r14 3805 mov r12,r15 3806 and r12,3 3807 mov r13,r15 3808 and r13,-4 3809 mov r14,r9 3810 shrd r15,r9,2 3811 shr r9,2 3812 add r15,r13 3813 adc r9,r14 3814 add r10,r15 3815 adc r11,r9 3816 adc r12,0 3817 3818 lea rsi,[16+rsi] 3819 sub r8,1 3820 jmp NEAR $L$process_extra_hash_loop 3821process_extra_in_trailer: 3822 and rcx,15 3823 mov rbx,rcx 3824 jz NEAR $L$do_length_block 3825 lea rsi,[((-1))+rcx*1+rsi] 3826 3827$L$process_extra_in_trailer_load: 3828 pslldq xmm15,1 3829 pinsrb xmm15,BYTE[rsi],0 3830 lea rsi,[((-1))+rsi] 3831 sub rcx,1 3832 jnz NEAR $L$process_extra_in_trailer_load 3833 3834$L$process_partial_block: 3835 3836 lea r15,[$L$and_masks] 3837 shl rbx,4 3838 pand xmm15,XMMWORD[((-16))+rbx*1+r15] 3839DB 102,77,15,126,253 3840 pextrq r14,xmm15,1 3841 add r10,r13 3842 adc r11,r14 3843 adc r12,1 3844 mov rax,QWORD[((0+160+0))+rbp] 3845 mov r15,rax 3846 mul r10 3847 mov r13,rax 3848 mov r14,rdx 3849 mov rax,QWORD[((0+160+0))+rbp] 3850 mul r11 3851 imul r15,r12 3852 add r14,rax 3853 adc r15,rdx 3854 mov rax,QWORD[((8+160+0))+rbp] 3855 mov r9,rax 3856 mul r10 3857 add r14,rax 3858 adc rdx,0 3859 mov r10,rdx 3860 mov rax,QWORD[((8+160+0))+rbp] 3861 mul r11 3862 add r15,rax 3863 adc rdx,0 3864 imul r9,r12 3865 add r15,r10 3866 adc r9,rdx 3867 mov r10,r13 3868 mov r11,r14 3869 mov r12,r15 3870 and r12,3 3871 mov r13,r15 3872 and r13,-4 3873 mov r14,r9 3874 shrd r15,r9,2 3875 shr r9,2 3876 add r15,r13 3877 adc r9,r14 3878 add r10,r15 3879 adc r11,r9 3880 adc r12,0 3881 3882 3883$L$do_length_block: 3884 add r10,QWORD[((0+160+32))+rbp] 3885 adc r11,QWORD[((8+160+32))+rbp] 3886 adc r12,1 3887 mov rax,QWORD[((0+160+0))+rbp] 3888 mov r15,rax 3889 mul r10 3890 mov r13,rax 3891 mov r14,rdx 3892 mov rax,QWORD[((0+160+0))+rbp] 3893 mul r11 3894 imul r15,r12 3895 add r14,rax 3896 adc r15,rdx 3897 mov rax,QWORD[((8+160+0))+rbp] 3898 mov r9,rax 3899 mul r10 3900 add r14,rax 3901 adc rdx,0 3902 mov r10,rdx 3903 mov rax,QWORD[((8+160+0))+rbp] 3904 mul r11 3905 add r15,rax 3906 adc rdx,0 3907 imul r9,r12 3908 add r15,r10 3909 adc r9,rdx 3910 mov r10,r13 3911 mov r11,r14 3912 mov r12,r15 3913 and r12,3 3914 mov r13,r15 3915 and r13,-4 3916 mov r14,r9 3917 shrd r15,r9,2 3918 shr r9,2 3919 add r15,r13 3920 adc r9,r14 3921 add r10,r15 3922 adc r11,r9 3923 adc r12,0 3924 3925 3926 mov r13,r10 3927 mov r14,r11 3928 mov r15,r12 3929 sub r10,-5 3930 sbb r11,-1 3931 sbb r12,3 3932 cmovc r10,r13 3933 cmovc r11,r14 3934 cmovc r12,r15 3935 3936 add r10,QWORD[((0+160+16))+rbp] 3937 adc r11,QWORD[((8+160+16))+rbp] 3938 3939 movaps xmm6,XMMWORD[((0+0))+rbp] 3940 movaps xmm7,XMMWORD[((16+0))+rbp] 3941 movaps xmm8,XMMWORD[((32+0))+rbp] 3942 movaps xmm9,XMMWORD[((48+0))+rbp] 3943 movaps xmm10,XMMWORD[((64+0))+rbp] 3944 movaps xmm11,XMMWORD[((80+0))+rbp] 3945 movaps xmm12,XMMWORD[((96+0))+rbp] 3946 movaps xmm13,XMMWORD[((112+0))+rbp] 3947 movaps xmm14,XMMWORD[((128+0))+rbp] 3948 movaps xmm15,XMMWORD[((144+0))+rbp] 3949 3950 3951 add rsp,288 + 160 + 32 3952 3953 3954 pop r9 3955 3956 mov QWORD[r9],r10 3957 mov QWORD[8+r9],r11 3958 pop r15 3959 3960 pop r14 3961 3962 pop r13 3963 3964 pop r12 3965 3966 pop rbx 3967 3968 pop rbp 3969 3970 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 3971 mov rsi,QWORD[16+rsp] 3972 DB 0F3h,0C3h ;repret 3973 3974$L$seal_sse_128: 3975 3976 movdqu xmm0,XMMWORD[$L$chacha20_consts] 3977 movdqa xmm1,xmm0 3978 movdqa xmm2,xmm0 3979 movdqu xmm4,XMMWORD[r9] 3980 movdqa xmm5,xmm4 3981 movdqa xmm6,xmm4 3982 movdqu xmm8,XMMWORD[16+r9] 3983 movdqa xmm9,xmm8 3984 movdqa xmm10,xmm8 3985 movdqu xmm14,XMMWORD[32+r9] 3986 movdqa xmm12,xmm14 3987 paddd xmm12,XMMWORD[$L$sse_inc] 3988 movdqa xmm13,xmm12 3989 paddd xmm13,XMMWORD[$L$sse_inc] 3990 movdqa xmm7,xmm4 3991 movdqa xmm11,xmm8 3992 movdqa xmm15,xmm12 3993 mov r10,10 3994 3995$L$seal_sse_128_rounds: 3996 paddd xmm0,xmm4 3997 pxor xmm12,xmm0 3998 pshufb xmm12,XMMWORD[$L$rol16] 3999 paddd xmm8,xmm12 4000 pxor xmm4,xmm8 4001 movdqa xmm3,xmm4 4002 pslld xmm3,12 4003 psrld xmm4,20 4004 pxor xmm4,xmm3 4005 paddd xmm0,xmm4 4006 pxor xmm12,xmm0 4007 pshufb xmm12,XMMWORD[$L$rol8] 4008 paddd xmm8,xmm12 4009 pxor xmm4,xmm8 4010 movdqa xmm3,xmm4 4011 pslld xmm3,7 4012 psrld xmm4,25 4013 pxor xmm4,xmm3 4014DB 102,15,58,15,228,4 4015DB 102,69,15,58,15,192,8 4016DB 102,69,15,58,15,228,12 4017 paddd xmm1,xmm5 4018 pxor xmm13,xmm1 4019 pshufb xmm13,XMMWORD[$L$rol16] 4020 paddd xmm9,xmm13 4021 pxor xmm5,xmm9 4022 movdqa xmm3,xmm5 4023 pslld xmm3,12 4024 psrld xmm5,20 4025 pxor xmm5,xmm3 4026 paddd xmm1,xmm5 4027 pxor xmm13,xmm1 4028 pshufb xmm13,XMMWORD[$L$rol8] 4029 paddd xmm9,xmm13 4030 pxor xmm5,xmm9 4031 movdqa xmm3,xmm5 4032 pslld xmm3,7 4033 psrld xmm5,25 4034 pxor xmm5,xmm3 4035DB 102,15,58,15,237,4 4036DB 102,69,15,58,15,201,8 4037DB 102,69,15,58,15,237,12 4038 paddd xmm2,xmm6 4039 pxor xmm14,xmm2 4040 pshufb xmm14,XMMWORD[$L$rol16] 4041 paddd xmm10,xmm14 4042 pxor xmm6,xmm10 4043 movdqa xmm3,xmm6 4044 pslld xmm3,12 4045 psrld xmm6,20 4046 pxor xmm6,xmm3 4047 paddd xmm2,xmm6 4048 pxor xmm14,xmm2 4049 pshufb xmm14,XMMWORD[$L$rol8] 4050 paddd xmm10,xmm14 4051 pxor xmm6,xmm10 4052 movdqa xmm3,xmm6 4053 pslld xmm3,7 4054 psrld xmm6,25 4055 pxor xmm6,xmm3 4056DB 102,15,58,15,246,4 4057DB 102,69,15,58,15,210,8 4058DB 102,69,15,58,15,246,12 4059 paddd xmm0,xmm4 4060 pxor xmm12,xmm0 4061 pshufb xmm12,XMMWORD[$L$rol16] 4062 paddd xmm8,xmm12 4063 pxor xmm4,xmm8 4064 movdqa xmm3,xmm4 4065 pslld xmm3,12 4066 psrld xmm4,20 4067 pxor xmm4,xmm3 4068 paddd xmm0,xmm4 4069 pxor xmm12,xmm0 4070 pshufb xmm12,XMMWORD[$L$rol8] 4071 paddd xmm8,xmm12 4072 pxor xmm4,xmm8 4073 movdqa xmm3,xmm4 4074 pslld xmm3,7 4075 psrld xmm4,25 4076 pxor xmm4,xmm3 4077DB 102,15,58,15,228,12 4078DB 102,69,15,58,15,192,8 4079DB 102,69,15,58,15,228,4 4080 paddd xmm1,xmm5 4081 pxor xmm13,xmm1 4082 pshufb xmm13,XMMWORD[$L$rol16] 4083 paddd xmm9,xmm13 4084 pxor xmm5,xmm9 4085 movdqa xmm3,xmm5 4086 pslld xmm3,12 4087 psrld xmm5,20 4088 pxor xmm5,xmm3 4089 paddd xmm1,xmm5 4090 pxor xmm13,xmm1 4091 pshufb xmm13,XMMWORD[$L$rol8] 4092 paddd xmm9,xmm13 4093 pxor xmm5,xmm9 4094 movdqa xmm3,xmm5 4095 pslld xmm3,7 4096 psrld xmm5,25 4097 pxor xmm5,xmm3 4098DB 102,15,58,15,237,12 4099DB 102,69,15,58,15,201,8 4100DB 102,69,15,58,15,237,4 4101 paddd xmm2,xmm6 4102 pxor xmm14,xmm2 4103 pshufb xmm14,XMMWORD[$L$rol16] 4104 paddd xmm10,xmm14 4105 pxor xmm6,xmm10 4106 movdqa xmm3,xmm6 4107 pslld xmm3,12 4108 psrld xmm6,20 4109 pxor xmm6,xmm3 4110 paddd xmm2,xmm6 4111 pxor xmm14,xmm2 4112 pshufb xmm14,XMMWORD[$L$rol8] 4113 paddd xmm10,xmm14 4114 pxor xmm6,xmm10 4115 movdqa xmm3,xmm6 4116 pslld xmm3,7 4117 psrld xmm6,25 4118 pxor xmm6,xmm3 4119DB 102,15,58,15,246,12 4120DB 102,69,15,58,15,210,8 4121DB 102,69,15,58,15,246,4 4122 4123 dec r10 4124 jnz NEAR $L$seal_sse_128_rounds 4125 paddd xmm0,XMMWORD[$L$chacha20_consts] 4126 paddd xmm1,XMMWORD[$L$chacha20_consts] 4127 paddd xmm2,XMMWORD[$L$chacha20_consts] 4128 paddd xmm4,xmm7 4129 paddd xmm5,xmm7 4130 paddd xmm6,xmm7 4131 paddd xmm8,xmm11 4132 paddd xmm9,xmm11 4133 paddd xmm12,xmm15 4134 paddd xmm15,XMMWORD[$L$sse_inc] 4135 paddd xmm13,xmm15 4136 4137 pand xmm2,XMMWORD[$L$clamp] 4138 movdqa XMMWORD[(160+0)+rbp],xmm2 4139 movdqa XMMWORD[(160+16)+rbp],xmm6 4140 4141 mov r8,r8 4142 call poly_hash_ad_internal 4143 jmp NEAR $L$seal_sse_128_tail_xor 4144$L$SEH_end_chacha20_poly1305_seal: 4145 4146 4147 4148 4149ALIGN 64 4150chacha20_poly1305_open_avx2: 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 vzeroupper 4164 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4165 vbroadcasti128 ymm4,XMMWORD[r9] 4166 vbroadcasti128 ymm8,XMMWORD[16+r9] 4167 vbroadcasti128 ymm12,XMMWORD[32+r9] 4168 vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init] 4169 cmp rbx,6*32 4170 jbe NEAR $L$open_avx2_192 4171 cmp rbx,10*32 4172 jbe NEAR $L$open_avx2_320 4173 4174 vmovdqa YMMWORD[(160+64)+rbp],ymm4 4175 vmovdqa YMMWORD[(160+96)+rbp],ymm8 4176 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4177 mov r10,10 4178$L$open_avx2_init_rounds: 4179 vpaddd ymm0,ymm0,ymm4 4180 vpxor ymm12,ymm12,ymm0 4181 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4182 vpaddd ymm8,ymm8,ymm12 4183 vpxor ymm4,ymm4,ymm8 4184 vpsrld ymm3,ymm4,20 4185 vpslld ymm4,ymm4,12 4186 vpxor ymm4,ymm4,ymm3 4187 vpaddd ymm0,ymm0,ymm4 4188 vpxor ymm12,ymm12,ymm0 4189 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4190 vpaddd ymm8,ymm8,ymm12 4191 vpxor ymm4,ymm4,ymm8 4192 vpslld ymm3,ymm4,7 4193 vpsrld ymm4,ymm4,25 4194 vpxor ymm4,ymm4,ymm3 4195 vpalignr ymm12,ymm12,ymm12,12 4196 vpalignr ymm8,ymm8,ymm8,8 4197 vpalignr ymm4,ymm4,ymm4,4 4198 vpaddd ymm0,ymm0,ymm4 4199 vpxor ymm12,ymm12,ymm0 4200 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4201 vpaddd ymm8,ymm8,ymm12 4202 vpxor ymm4,ymm4,ymm8 4203 vpsrld ymm3,ymm4,20 4204 vpslld ymm4,ymm4,12 4205 vpxor ymm4,ymm4,ymm3 4206 vpaddd ymm0,ymm0,ymm4 4207 vpxor ymm12,ymm12,ymm0 4208 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4209 vpaddd ymm8,ymm8,ymm12 4210 vpxor ymm4,ymm4,ymm8 4211 vpslld ymm3,ymm4,7 4212 vpsrld ymm4,ymm4,25 4213 vpxor ymm4,ymm4,ymm3 4214 vpalignr ymm12,ymm12,ymm12,4 4215 vpalignr ymm8,ymm8,ymm8,8 4216 vpalignr ymm4,ymm4,ymm4,12 4217 4218 dec r10 4219 jne NEAR $L$open_avx2_init_rounds 4220 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 4221 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 4222 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 4223 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4224 4225 vperm2i128 ymm3,ymm4,ymm0,0x02 4226 4227 vpand ymm3,ymm3,YMMWORD[$L$clamp] 4228 vmovdqa YMMWORD[(160+0)+rbp],ymm3 4229 4230 vperm2i128 ymm0,ymm4,ymm0,0x13 4231 vperm2i128 ymm4,ymm12,ymm8,0x13 4232 4233 mov r8,r8 4234 call poly_hash_ad_internal 4235 4236 xor rcx,rcx 4237$L$open_avx2_init_hash: 4238 add r10,QWORD[((0+0))+rcx*1+rsi] 4239 adc r11,QWORD[((8+0))+rcx*1+rsi] 4240 adc r12,1 4241 mov rax,QWORD[((0+160+0))+rbp] 4242 mov r15,rax 4243 mul r10 4244 mov r13,rax 4245 mov r14,rdx 4246 mov rax,QWORD[((0+160+0))+rbp] 4247 mul r11 4248 imul r15,r12 4249 add r14,rax 4250 adc r15,rdx 4251 mov rax,QWORD[((8+160+0))+rbp] 4252 mov r9,rax 4253 mul r10 4254 add r14,rax 4255 adc rdx,0 4256 mov r10,rdx 4257 mov rax,QWORD[((8+160+0))+rbp] 4258 mul r11 4259 add r15,rax 4260 adc rdx,0 4261 imul r9,r12 4262 add r15,r10 4263 adc r9,rdx 4264 mov r10,r13 4265 mov r11,r14 4266 mov r12,r15 4267 and r12,3 4268 mov r13,r15 4269 and r13,-4 4270 mov r14,r9 4271 shrd r15,r9,2 4272 shr r9,2 4273 add r15,r13 4274 adc r9,r14 4275 add r10,r15 4276 adc r11,r9 4277 adc r12,0 4278 4279 add rcx,16 4280 cmp rcx,2*32 4281 jne NEAR $L$open_avx2_init_hash 4282 4283 vpxor ymm0,ymm0,YMMWORD[rsi] 4284 vpxor ymm4,ymm4,YMMWORD[32+rsi] 4285 4286 vmovdqu YMMWORD[rdi],ymm0 4287 vmovdqu YMMWORD[32+rdi],ymm4 4288 lea rsi,[64+rsi] 4289 lea rdi,[64+rdi] 4290 sub rbx,2*32 4291$L$open_avx2_main_loop: 4292 4293 cmp rbx,16*32 4294 jb NEAR $L$open_avx2_main_loop_done 4295 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4296 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 4297 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 4298 vmovdqa ymm1,ymm0 4299 vmovdqa ymm5,ymm4 4300 vmovdqa ymm9,ymm8 4301 vmovdqa ymm2,ymm0 4302 vmovdqa ymm6,ymm4 4303 vmovdqa ymm10,ymm8 4304 vmovdqa ymm3,ymm0 4305 vmovdqa ymm7,ymm4 4306 vmovdqa ymm11,ymm8 4307 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 4308 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 4309 vpaddd ymm14,ymm12,ymm15 4310 vpaddd ymm13,ymm12,ymm14 4311 vpaddd ymm12,ymm12,ymm13 4312 vmovdqa YMMWORD[(160+256)+rbp],ymm15 4313 vmovdqa YMMWORD[(160+224)+rbp],ymm14 4314 vmovdqa YMMWORD[(160+192)+rbp],ymm13 4315 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4316 4317 xor rcx,rcx 4318$L$open_avx2_main_loop_rounds: 4319 add r10,QWORD[((0+0))+rcx*1+rsi] 4320 adc r11,QWORD[((8+0))+rcx*1+rsi] 4321 adc r12,1 4322 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4323 vmovdqa ymm8,YMMWORD[$L$rol16] 4324 vpaddd ymm3,ymm3,ymm7 4325 vpaddd ymm2,ymm2,ymm6 4326 vpaddd ymm1,ymm1,ymm5 4327 vpaddd ymm0,ymm0,ymm4 4328 vpxor ymm15,ymm15,ymm3 4329 vpxor ymm14,ymm14,ymm2 4330 vpxor ymm13,ymm13,ymm1 4331 vpxor ymm12,ymm12,ymm0 4332 mov rdx,QWORD[((0+160+0))+rbp] 4333 mov r15,rdx 4334 mulx r14,r13,r10 4335 mulx rdx,rax,r11 4336 imul r15,r12 4337 add r14,rax 4338 adc r15,rdx 4339 vpshufb ymm15,ymm15,ymm8 4340 vpshufb ymm14,ymm14,ymm8 4341 vpshufb ymm13,ymm13,ymm8 4342 vpshufb ymm12,ymm12,ymm8 4343 vpaddd ymm11,ymm11,ymm15 4344 vpaddd ymm10,ymm10,ymm14 4345 vpaddd ymm9,ymm9,ymm13 4346 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4347 vpxor ymm7,ymm7,ymm11 4348 mov rdx,QWORD[((8+160+0))+rbp] 4349 mulx rax,r10,r10 4350 add r14,r10 4351 mulx r9,r11,r11 4352 adc r15,r11 4353 adc r9,0 4354 imul rdx,r12 4355 vpxor ymm6,ymm6,ymm10 4356 vpxor ymm5,ymm5,ymm9 4357 vpxor ymm4,ymm4,ymm8 4358 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4359 vpsrld ymm8,ymm7,20 4360 vpslld ymm7,ymm7,32-20 4361 vpxor ymm7,ymm7,ymm8 4362 vpsrld ymm8,ymm6,20 4363 vpslld ymm6,ymm6,32-20 4364 vpxor ymm6,ymm6,ymm8 4365 vpsrld ymm8,ymm5,20 4366 vpslld ymm5,ymm5,32-20 4367 add r15,rax 4368 adc r9,rdx 4369 vpxor ymm5,ymm5,ymm8 4370 vpsrld ymm8,ymm4,20 4371 vpslld ymm4,ymm4,32-20 4372 vpxor ymm4,ymm4,ymm8 4373 vmovdqa ymm8,YMMWORD[$L$rol8] 4374 vpaddd ymm3,ymm3,ymm7 4375 vpaddd ymm2,ymm2,ymm6 4376 vpaddd ymm1,ymm1,ymm5 4377 vpaddd ymm0,ymm0,ymm4 4378 vpxor ymm15,ymm15,ymm3 4379 mov r10,r13 4380 mov r11,r14 4381 mov r12,r15 4382 and r12,3 4383 mov r13,r15 4384 and r13,-4 4385 mov r14,r9 4386 shrd r15,r9,2 4387 shr r9,2 4388 add r15,r13 4389 adc r9,r14 4390 add r10,r15 4391 adc r11,r9 4392 adc r12,0 4393 vpxor ymm14,ymm14,ymm2 4394 vpxor ymm13,ymm13,ymm1 4395 vpxor ymm12,ymm12,ymm0 4396 vpshufb ymm15,ymm15,ymm8 4397 vpshufb ymm14,ymm14,ymm8 4398 vpshufb ymm13,ymm13,ymm8 4399 vpshufb ymm12,ymm12,ymm8 4400 vpaddd ymm11,ymm11,ymm15 4401 vpaddd ymm10,ymm10,ymm14 4402 add r10,QWORD[((0+16))+rcx*1+rsi] 4403 adc r11,QWORD[((8+16))+rcx*1+rsi] 4404 adc r12,1 4405 vpaddd ymm9,ymm9,ymm13 4406 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4407 vpxor ymm7,ymm7,ymm11 4408 vpxor ymm6,ymm6,ymm10 4409 vpxor ymm5,ymm5,ymm9 4410 vpxor ymm4,ymm4,ymm8 4411 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4412 vpsrld ymm8,ymm7,25 4413 mov rdx,QWORD[((0+160+0))+rbp] 4414 mov r15,rdx 4415 mulx r14,r13,r10 4416 mulx rdx,rax,r11 4417 imul r15,r12 4418 add r14,rax 4419 adc r15,rdx 4420 vpslld ymm7,ymm7,32-25 4421 vpxor ymm7,ymm7,ymm8 4422 vpsrld ymm8,ymm6,25 4423 vpslld ymm6,ymm6,32-25 4424 vpxor ymm6,ymm6,ymm8 4425 vpsrld ymm8,ymm5,25 4426 vpslld ymm5,ymm5,32-25 4427 vpxor ymm5,ymm5,ymm8 4428 vpsrld ymm8,ymm4,25 4429 vpslld ymm4,ymm4,32-25 4430 vpxor ymm4,ymm4,ymm8 4431 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 4432 vpalignr ymm7,ymm7,ymm7,4 4433 vpalignr ymm11,ymm11,ymm11,8 4434 vpalignr ymm15,ymm15,ymm15,12 4435 vpalignr ymm6,ymm6,ymm6,4 4436 vpalignr ymm10,ymm10,ymm10,8 4437 vpalignr ymm14,ymm14,ymm14,12 4438 mov rdx,QWORD[((8+160+0))+rbp] 4439 mulx rax,r10,r10 4440 add r14,r10 4441 mulx r9,r11,r11 4442 adc r15,r11 4443 adc r9,0 4444 imul rdx,r12 4445 vpalignr ymm5,ymm5,ymm5,4 4446 vpalignr ymm9,ymm9,ymm9,8 4447 vpalignr ymm13,ymm13,ymm13,12 4448 vpalignr ymm4,ymm4,ymm4,4 4449 vpalignr ymm8,ymm8,ymm8,8 4450 vpalignr ymm12,ymm12,ymm12,12 4451 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4452 vmovdqa ymm8,YMMWORD[$L$rol16] 4453 vpaddd ymm3,ymm3,ymm7 4454 vpaddd ymm2,ymm2,ymm6 4455 vpaddd ymm1,ymm1,ymm5 4456 vpaddd ymm0,ymm0,ymm4 4457 vpxor ymm15,ymm15,ymm3 4458 vpxor ymm14,ymm14,ymm2 4459 vpxor ymm13,ymm13,ymm1 4460 vpxor ymm12,ymm12,ymm0 4461 vpshufb ymm15,ymm15,ymm8 4462 vpshufb ymm14,ymm14,ymm8 4463 add r15,rax 4464 adc r9,rdx 4465 vpshufb ymm13,ymm13,ymm8 4466 vpshufb ymm12,ymm12,ymm8 4467 vpaddd ymm11,ymm11,ymm15 4468 vpaddd ymm10,ymm10,ymm14 4469 vpaddd ymm9,ymm9,ymm13 4470 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4471 vpxor ymm7,ymm7,ymm11 4472 vpxor ymm6,ymm6,ymm10 4473 vpxor ymm5,ymm5,ymm9 4474 mov r10,r13 4475 mov r11,r14 4476 mov r12,r15 4477 and r12,3 4478 mov r13,r15 4479 and r13,-4 4480 mov r14,r9 4481 shrd r15,r9,2 4482 shr r9,2 4483 add r15,r13 4484 adc r9,r14 4485 add r10,r15 4486 adc r11,r9 4487 adc r12,0 4488 vpxor ymm4,ymm4,ymm8 4489 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4490 vpsrld ymm8,ymm7,20 4491 vpslld ymm7,ymm7,32-20 4492 vpxor ymm7,ymm7,ymm8 4493 vpsrld ymm8,ymm6,20 4494 vpslld ymm6,ymm6,32-20 4495 vpxor ymm6,ymm6,ymm8 4496 add r10,QWORD[((0+32))+rcx*1+rsi] 4497 adc r11,QWORD[((8+32))+rcx*1+rsi] 4498 adc r12,1 4499 4500 lea rcx,[48+rcx] 4501 vpsrld ymm8,ymm5,20 4502 vpslld ymm5,ymm5,32-20 4503 vpxor ymm5,ymm5,ymm8 4504 vpsrld ymm8,ymm4,20 4505 vpslld ymm4,ymm4,32-20 4506 vpxor ymm4,ymm4,ymm8 4507 vmovdqa ymm8,YMMWORD[$L$rol8] 4508 vpaddd ymm3,ymm3,ymm7 4509 vpaddd ymm2,ymm2,ymm6 4510 vpaddd ymm1,ymm1,ymm5 4511 vpaddd ymm0,ymm0,ymm4 4512 vpxor ymm15,ymm15,ymm3 4513 vpxor ymm14,ymm14,ymm2 4514 vpxor ymm13,ymm13,ymm1 4515 vpxor ymm12,ymm12,ymm0 4516 vpshufb ymm15,ymm15,ymm8 4517 vpshufb ymm14,ymm14,ymm8 4518 vpshufb ymm13,ymm13,ymm8 4519 mov rdx,QWORD[((0+160+0))+rbp] 4520 mov r15,rdx 4521 mulx r14,r13,r10 4522 mulx rdx,rax,r11 4523 imul r15,r12 4524 add r14,rax 4525 adc r15,rdx 4526 vpshufb ymm12,ymm12,ymm8 4527 vpaddd ymm11,ymm11,ymm15 4528 vpaddd ymm10,ymm10,ymm14 4529 vpaddd ymm9,ymm9,ymm13 4530 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 4531 vpxor ymm7,ymm7,ymm11 4532 vpxor ymm6,ymm6,ymm10 4533 vpxor ymm5,ymm5,ymm9 4534 mov rdx,QWORD[((8+160+0))+rbp] 4535 mulx rax,r10,r10 4536 add r14,r10 4537 mulx r9,r11,r11 4538 adc r15,r11 4539 adc r9,0 4540 imul rdx,r12 4541 vpxor ymm4,ymm4,ymm8 4542 vmovdqa YMMWORD[(160+128)+rbp],ymm8 4543 vpsrld ymm8,ymm7,25 4544 vpslld ymm7,ymm7,32-25 4545 vpxor ymm7,ymm7,ymm8 4546 vpsrld ymm8,ymm6,25 4547 vpslld ymm6,ymm6,32-25 4548 vpxor ymm6,ymm6,ymm8 4549 add r15,rax 4550 adc r9,rdx 4551 vpsrld ymm8,ymm5,25 4552 vpslld ymm5,ymm5,32-25 4553 vpxor ymm5,ymm5,ymm8 4554 vpsrld ymm8,ymm4,25 4555 vpslld ymm4,ymm4,32-25 4556 vpxor ymm4,ymm4,ymm8 4557 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 4558 vpalignr ymm7,ymm7,ymm7,12 4559 vpalignr ymm11,ymm11,ymm11,8 4560 vpalignr ymm15,ymm15,ymm15,4 4561 vpalignr ymm6,ymm6,ymm6,12 4562 vpalignr ymm10,ymm10,ymm10,8 4563 vpalignr ymm14,ymm14,ymm14,4 4564 vpalignr ymm5,ymm5,ymm5,12 4565 vpalignr ymm9,ymm9,ymm9,8 4566 vpalignr ymm13,ymm13,ymm13,4 4567 vpalignr ymm4,ymm4,ymm4,12 4568 vpalignr ymm8,ymm8,ymm8,8 4569 mov r10,r13 4570 mov r11,r14 4571 mov r12,r15 4572 and r12,3 4573 mov r13,r15 4574 and r13,-4 4575 mov r14,r9 4576 shrd r15,r9,2 4577 shr r9,2 4578 add r15,r13 4579 adc r9,r14 4580 add r10,r15 4581 adc r11,r9 4582 adc r12,0 4583 vpalignr ymm12,ymm12,ymm12,4 4584 4585 cmp rcx,10*6*8 4586 jne NEAR $L$open_avx2_main_loop_rounds 4587 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 4588 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 4589 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 4590 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 4591 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 4592 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 4593 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 4594 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 4595 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 4596 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 4597 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 4598 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 4599 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 4600 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 4601 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 4602 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4603 4604 vmovdqa YMMWORD[(160+128)+rbp],ymm0 4605 add r10,QWORD[((0+480))+rsi] 4606 adc r11,QWORD[((8+480))+rsi] 4607 adc r12,1 4608 vperm2i128 ymm0,ymm7,ymm3,0x02 4609 vperm2i128 ymm7,ymm7,ymm3,0x13 4610 vperm2i128 ymm3,ymm15,ymm11,0x02 4611 vperm2i128 ymm11,ymm15,ymm11,0x13 4612 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 4613 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 4614 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 4615 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 4616 vmovdqu YMMWORD[(0+0)+rdi],ymm0 4617 vmovdqu YMMWORD[(32+0)+rdi],ymm3 4618 vmovdqu YMMWORD[(64+0)+rdi],ymm7 4619 vmovdqu YMMWORD[(96+0)+rdi],ymm11 4620 4621 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 4622 mov rax,QWORD[((0+160+0))+rbp] 4623 mov r15,rax 4624 mul r10 4625 mov r13,rax 4626 mov r14,rdx 4627 mov rax,QWORD[((0+160+0))+rbp] 4628 mul r11 4629 imul r15,r12 4630 add r14,rax 4631 adc r15,rdx 4632 mov rax,QWORD[((8+160+0))+rbp] 4633 mov r9,rax 4634 mul r10 4635 add r14,rax 4636 adc rdx,0 4637 mov r10,rdx 4638 mov rax,QWORD[((8+160+0))+rbp] 4639 mul r11 4640 add r15,rax 4641 adc rdx,0 4642 imul r9,r12 4643 add r15,r10 4644 adc r9,rdx 4645 mov r10,r13 4646 mov r11,r14 4647 mov r12,r15 4648 and r12,3 4649 mov r13,r15 4650 and r13,-4 4651 mov r14,r9 4652 shrd r15,r9,2 4653 shr r9,2 4654 add r15,r13 4655 adc r9,r14 4656 add r10,r15 4657 adc r11,r9 4658 adc r12,0 4659 vperm2i128 ymm3,ymm6,ymm2,0x02 4660 vperm2i128 ymm6,ymm6,ymm2,0x13 4661 vperm2i128 ymm2,ymm14,ymm10,0x02 4662 vperm2i128 ymm10,ymm14,ymm10,0x13 4663 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 4664 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 4665 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 4666 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 4667 vmovdqu YMMWORD[(0+128)+rdi],ymm3 4668 vmovdqu YMMWORD[(32+128)+rdi],ymm2 4669 vmovdqu YMMWORD[(64+128)+rdi],ymm6 4670 vmovdqu YMMWORD[(96+128)+rdi],ymm10 4671 add r10,QWORD[((0+480+16))+rsi] 4672 adc r11,QWORD[((8+480+16))+rsi] 4673 adc r12,1 4674 vperm2i128 ymm3,ymm5,ymm1,0x02 4675 vperm2i128 ymm5,ymm5,ymm1,0x13 4676 vperm2i128 ymm1,ymm13,ymm9,0x02 4677 vperm2i128 ymm9,ymm13,ymm9,0x13 4678 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 4679 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 4680 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 4681 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 4682 vmovdqu YMMWORD[(0+256)+rdi],ymm3 4683 vmovdqu YMMWORD[(32+256)+rdi],ymm1 4684 vmovdqu YMMWORD[(64+256)+rdi],ymm5 4685 vmovdqu YMMWORD[(96+256)+rdi],ymm9 4686 mov rax,QWORD[((0+160+0))+rbp] 4687 mov r15,rax 4688 mul r10 4689 mov r13,rax 4690 mov r14,rdx 4691 mov rax,QWORD[((0+160+0))+rbp] 4692 mul r11 4693 imul r15,r12 4694 add r14,rax 4695 adc r15,rdx 4696 mov rax,QWORD[((8+160+0))+rbp] 4697 mov r9,rax 4698 mul r10 4699 add r14,rax 4700 adc rdx,0 4701 mov r10,rdx 4702 mov rax,QWORD[((8+160+0))+rbp] 4703 mul r11 4704 add r15,rax 4705 adc rdx,0 4706 imul r9,r12 4707 add r15,r10 4708 adc r9,rdx 4709 mov r10,r13 4710 mov r11,r14 4711 mov r12,r15 4712 and r12,3 4713 mov r13,r15 4714 and r13,-4 4715 mov r14,r9 4716 shrd r15,r9,2 4717 shr r9,2 4718 add r15,r13 4719 adc r9,r14 4720 add r10,r15 4721 adc r11,r9 4722 adc r12,0 4723 vperm2i128 ymm3,ymm4,ymm0,0x02 4724 vperm2i128 ymm4,ymm4,ymm0,0x13 4725 vperm2i128 ymm0,ymm12,ymm8,0x02 4726 vperm2i128 ymm8,ymm12,ymm8,0x13 4727 vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi] 4728 vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi] 4729 vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi] 4730 vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi] 4731 vmovdqu YMMWORD[(0+384)+rdi],ymm3 4732 vmovdqu YMMWORD[(32+384)+rdi],ymm0 4733 vmovdqu YMMWORD[(64+384)+rdi],ymm4 4734 vmovdqu YMMWORD[(96+384)+rdi],ymm8 4735 4736 lea rsi,[512+rsi] 4737 lea rdi,[512+rdi] 4738 sub rbx,16*32 4739 jmp NEAR $L$open_avx2_main_loop 4740$L$open_avx2_main_loop_done: 4741 test rbx,rbx 4742 vzeroupper 4743 je NEAR $L$open_sse_finalize 4744 4745 cmp rbx,12*32 4746 ja NEAR $L$open_avx2_tail_512 4747 cmp rbx,8*32 4748 ja NEAR $L$open_avx2_tail_384 4749 cmp rbx,4*32 4750 ja NEAR $L$open_avx2_tail_256 4751 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4752 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 4753 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 4754 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 4755 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4756 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4757 4758 xor r8,r8 4759 mov rcx,rbx 4760 and rcx,-16 4761 test rcx,rcx 4762 je NEAR $L$open_avx2_tail_128_rounds 4763$L$open_avx2_tail_128_rounds_and_x1hash: 4764 add r10,QWORD[((0+0))+r8*1+rsi] 4765 adc r11,QWORD[((8+0))+r8*1+rsi] 4766 adc r12,1 4767 mov rax,QWORD[((0+160+0))+rbp] 4768 mov r15,rax 4769 mul r10 4770 mov r13,rax 4771 mov r14,rdx 4772 mov rax,QWORD[((0+160+0))+rbp] 4773 mul r11 4774 imul r15,r12 4775 add r14,rax 4776 adc r15,rdx 4777 mov rax,QWORD[((8+160+0))+rbp] 4778 mov r9,rax 4779 mul r10 4780 add r14,rax 4781 adc rdx,0 4782 mov r10,rdx 4783 mov rax,QWORD[((8+160+0))+rbp] 4784 mul r11 4785 add r15,rax 4786 adc rdx,0 4787 imul r9,r12 4788 add r15,r10 4789 adc r9,rdx 4790 mov r10,r13 4791 mov r11,r14 4792 mov r12,r15 4793 and r12,3 4794 mov r13,r15 4795 and r13,-4 4796 mov r14,r9 4797 shrd r15,r9,2 4798 shr r9,2 4799 add r15,r13 4800 adc r9,r14 4801 add r10,r15 4802 adc r11,r9 4803 adc r12,0 4804 4805$L$open_avx2_tail_128_rounds: 4806 add r8,16 4807 vpaddd ymm0,ymm0,ymm4 4808 vpxor ymm12,ymm12,ymm0 4809 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4810 vpaddd ymm8,ymm8,ymm12 4811 vpxor ymm4,ymm4,ymm8 4812 vpsrld ymm3,ymm4,20 4813 vpslld ymm4,ymm4,12 4814 vpxor ymm4,ymm4,ymm3 4815 vpaddd ymm0,ymm0,ymm4 4816 vpxor ymm12,ymm12,ymm0 4817 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4818 vpaddd ymm8,ymm8,ymm12 4819 vpxor ymm4,ymm4,ymm8 4820 vpslld ymm3,ymm4,7 4821 vpsrld ymm4,ymm4,25 4822 vpxor ymm4,ymm4,ymm3 4823 vpalignr ymm12,ymm12,ymm12,12 4824 vpalignr ymm8,ymm8,ymm8,8 4825 vpalignr ymm4,ymm4,ymm4,4 4826 vpaddd ymm0,ymm0,ymm4 4827 vpxor ymm12,ymm12,ymm0 4828 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4829 vpaddd ymm8,ymm8,ymm12 4830 vpxor ymm4,ymm4,ymm8 4831 vpsrld ymm3,ymm4,20 4832 vpslld ymm4,ymm4,12 4833 vpxor ymm4,ymm4,ymm3 4834 vpaddd ymm0,ymm0,ymm4 4835 vpxor ymm12,ymm12,ymm0 4836 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4837 vpaddd ymm8,ymm8,ymm12 4838 vpxor ymm4,ymm4,ymm8 4839 vpslld ymm3,ymm4,7 4840 vpsrld ymm4,ymm4,25 4841 vpxor ymm4,ymm4,ymm3 4842 vpalignr ymm12,ymm12,ymm12,4 4843 vpalignr ymm8,ymm8,ymm8,8 4844 vpalignr ymm4,ymm4,ymm4,12 4845 4846 cmp r8,rcx 4847 jb NEAR $L$open_avx2_tail_128_rounds_and_x1hash 4848 cmp r8,160 4849 jne NEAR $L$open_avx2_tail_128_rounds 4850 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 4851 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 4852 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 4853 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 4854 vperm2i128 ymm3,ymm4,ymm0,0x13 4855 vperm2i128 ymm0,ymm4,ymm0,0x02 4856 vperm2i128 ymm4,ymm12,ymm8,0x02 4857 vperm2i128 ymm12,ymm12,ymm8,0x13 4858 vmovdqa ymm8,ymm3 4859 4860 jmp NEAR $L$open_avx2_tail_128_xor 4861 4862$L$open_avx2_tail_256: 4863 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 4864 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 4865 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 4866 vmovdqa ymm1,ymm0 4867 vmovdqa ymm5,ymm4 4868 vmovdqa ymm9,ymm8 4869 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 4870 vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp] 4871 vpaddd ymm12,ymm12,ymm13 4872 vmovdqa YMMWORD[(160+160)+rbp],ymm12 4873 vmovdqa YMMWORD[(160+192)+rbp],ymm13 4874 4875 mov QWORD[((160+128))+rbp],rbx 4876 mov rcx,rbx 4877 sub rcx,4*32 4878 shr rcx,4 4879 mov r8,10 4880 cmp rcx,10 4881 cmovg rcx,r8 4882 mov rbx,rsi 4883 xor r8,r8 4884$L$open_avx2_tail_256_rounds_and_x1hash: 4885 add r10,QWORD[((0+0))+rbx] 4886 adc r11,QWORD[((8+0))+rbx] 4887 adc r12,1 4888 mov rdx,QWORD[((0+160+0))+rbp] 4889 mov r15,rdx 4890 mulx r14,r13,r10 4891 mulx rdx,rax,r11 4892 imul r15,r12 4893 add r14,rax 4894 adc r15,rdx 4895 mov rdx,QWORD[((8+160+0))+rbp] 4896 mulx rax,r10,r10 4897 add r14,r10 4898 mulx r9,r11,r11 4899 adc r15,r11 4900 adc r9,0 4901 imul rdx,r12 4902 add r15,rax 4903 adc r9,rdx 4904 mov r10,r13 4905 mov r11,r14 4906 mov r12,r15 4907 and r12,3 4908 mov r13,r15 4909 and r13,-4 4910 mov r14,r9 4911 shrd r15,r9,2 4912 shr r9,2 4913 add r15,r13 4914 adc r9,r14 4915 add r10,r15 4916 adc r11,r9 4917 adc r12,0 4918 4919 lea rbx,[16+rbx] 4920$L$open_avx2_tail_256_rounds: 4921 vpaddd ymm0,ymm0,ymm4 4922 vpxor ymm12,ymm12,ymm0 4923 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4924 vpaddd ymm8,ymm8,ymm12 4925 vpxor ymm4,ymm4,ymm8 4926 vpsrld ymm3,ymm4,20 4927 vpslld ymm4,ymm4,12 4928 vpxor ymm4,ymm4,ymm3 4929 vpaddd ymm0,ymm0,ymm4 4930 vpxor ymm12,ymm12,ymm0 4931 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4932 vpaddd ymm8,ymm8,ymm12 4933 vpxor ymm4,ymm4,ymm8 4934 vpslld ymm3,ymm4,7 4935 vpsrld ymm4,ymm4,25 4936 vpxor ymm4,ymm4,ymm3 4937 vpalignr ymm12,ymm12,ymm12,12 4938 vpalignr ymm8,ymm8,ymm8,8 4939 vpalignr ymm4,ymm4,ymm4,4 4940 vpaddd ymm1,ymm1,ymm5 4941 vpxor ymm13,ymm13,ymm1 4942 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 4943 vpaddd ymm9,ymm9,ymm13 4944 vpxor ymm5,ymm5,ymm9 4945 vpsrld ymm3,ymm5,20 4946 vpslld ymm5,ymm5,12 4947 vpxor ymm5,ymm5,ymm3 4948 vpaddd ymm1,ymm1,ymm5 4949 vpxor ymm13,ymm13,ymm1 4950 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 4951 vpaddd ymm9,ymm9,ymm13 4952 vpxor ymm5,ymm5,ymm9 4953 vpslld ymm3,ymm5,7 4954 vpsrld ymm5,ymm5,25 4955 vpxor ymm5,ymm5,ymm3 4956 vpalignr ymm13,ymm13,ymm13,12 4957 vpalignr ymm9,ymm9,ymm9,8 4958 vpalignr ymm5,ymm5,ymm5,4 4959 4960 inc r8 4961 vpaddd ymm0,ymm0,ymm4 4962 vpxor ymm12,ymm12,ymm0 4963 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 4964 vpaddd ymm8,ymm8,ymm12 4965 vpxor ymm4,ymm4,ymm8 4966 vpsrld ymm3,ymm4,20 4967 vpslld ymm4,ymm4,12 4968 vpxor ymm4,ymm4,ymm3 4969 vpaddd ymm0,ymm0,ymm4 4970 vpxor ymm12,ymm12,ymm0 4971 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 4972 vpaddd ymm8,ymm8,ymm12 4973 vpxor ymm4,ymm4,ymm8 4974 vpslld ymm3,ymm4,7 4975 vpsrld ymm4,ymm4,25 4976 vpxor ymm4,ymm4,ymm3 4977 vpalignr ymm12,ymm12,ymm12,4 4978 vpalignr ymm8,ymm8,ymm8,8 4979 vpalignr ymm4,ymm4,ymm4,12 4980 vpaddd ymm1,ymm1,ymm5 4981 vpxor ymm13,ymm13,ymm1 4982 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 4983 vpaddd ymm9,ymm9,ymm13 4984 vpxor ymm5,ymm5,ymm9 4985 vpsrld ymm3,ymm5,20 4986 vpslld ymm5,ymm5,12 4987 vpxor ymm5,ymm5,ymm3 4988 vpaddd ymm1,ymm1,ymm5 4989 vpxor ymm13,ymm13,ymm1 4990 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 4991 vpaddd ymm9,ymm9,ymm13 4992 vpxor ymm5,ymm5,ymm9 4993 vpslld ymm3,ymm5,7 4994 vpsrld ymm5,ymm5,25 4995 vpxor ymm5,ymm5,ymm3 4996 vpalignr ymm13,ymm13,ymm13,4 4997 vpalignr ymm9,ymm9,ymm9,8 4998 vpalignr ymm5,ymm5,ymm5,12 4999 vpaddd ymm2,ymm2,ymm6 5000 vpxor ymm14,ymm14,ymm2 5001 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 5002 vpaddd ymm10,ymm10,ymm14 5003 vpxor ymm6,ymm6,ymm10 5004 vpsrld ymm3,ymm6,20 5005 vpslld ymm6,ymm6,12 5006 vpxor ymm6,ymm6,ymm3 5007 vpaddd ymm2,ymm2,ymm6 5008 vpxor ymm14,ymm14,ymm2 5009 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 5010 vpaddd ymm10,ymm10,ymm14 5011 vpxor ymm6,ymm6,ymm10 5012 vpslld ymm3,ymm6,7 5013 vpsrld ymm6,ymm6,25 5014 vpxor ymm6,ymm6,ymm3 5015 vpalignr ymm14,ymm14,ymm14,4 5016 vpalignr ymm10,ymm10,ymm10,8 5017 vpalignr ymm6,ymm6,ymm6,12 5018 5019 cmp r8,rcx 5020 jb NEAR $L$open_avx2_tail_256_rounds_and_x1hash 5021 cmp r8,10 5022 jne NEAR $L$open_avx2_tail_256_rounds 5023 mov r8,rbx 5024 sub rbx,rsi 5025 mov rcx,rbx 5026 mov rbx,QWORD[((160+128))+rbp] 5027$L$open_avx2_tail_256_hash: 5028 add rcx,16 5029 cmp rcx,rbx 5030 jg NEAR $L$open_avx2_tail_256_done 5031 add r10,QWORD[((0+0))+r8] 5032 adc r11,QWORD[((8+0))+r8] 5033 adc r12,1 5034 mov rdx,QWORD[((0+160+0))+rbp] 5035 mov r15,rdx 5036 mulx r14,r13,r10 5037 mulx rdx,rax,r11 5038 imul r15,r12 5039 add r14,rax 5040 adc r15,rdx 5041 mov rdx,QWORD[((8+160+0))+rbp] 5042 mulx rax,r10,r10 5043 add r14,r10 5044 mulx r9,r11,r11 5045 adc r15,r11 5046 adc r9,0 5047 imul rdx,r12 5048 add r15,rax 5049 adc r9,rdx 5050 mov r10,r13 5051 mov r11,r14 5052 mov r12,r15 5053 and r12,3 5054 mov r13,r15 5055 and r13,-4 5056 mov r14,r9 5057 shrd r15,r9,2 5058 shr r9,2 5059 add r15,r13 5060 adc r9,r14 5061 add r10,r15 5062 adc r11,r9 5063 adc r12,0 5064 5065 lea r8,[16+r8] 5066 jmp NEAR $L$open_avx2_tail_256_hash 5067$L$open_avx2_tail_256_done: 5068 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 5069 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 5070 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 5071 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 5072 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 5073 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 5074 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 5075 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 5076 vperm2i128 ymm3,ymm5,ymm1,0x02 5077 vperm2i128 ymm5,ymm5,ymm1,0x13 5078 vperm2i128 ymm1,ymm13,ymm9,0x02 5079 vperm2i128 ymm9,ymm13,ymm9,0x13 5080 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 5081 vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi] 5082 vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi] 5083 vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi] 5084 vmovdqu YMMWORD[(0+0)+rdi],ymm3 5085 vmovdqu YMMWORD[(32+0)+rdi],ymm1 5086 vmovdqu YMMWORD[(64+0)+rdi],ymm5 5087 vmovdqu YMMWORD[(96+0)+rdi],ymm9 5088 vperm2i128 ymm3,ymm4,ymm0,0x13 5089 vperm2i128 ymm0,ymm4,ymm0,0x02 5090 vperm2i128 ymm4,ymm12,ymm8,0x02 5091 vperm2i128 ymm12,ymm12,ymm8,0x13 5092 vmovdqa ymm8,ymm3 5093 5094 lea rsi,[128+rsi] 5095 lea rdi,[128+rdi] 5096 sub rbx,4*32 5097 jmp NEAR $L$open_avx2_tail_128_xor 5098 5099$L$open_avx2_tail_384: 5100 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 5101 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 5102 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 5103 vmovdqa ymm1,ymm0 5104 vmovdqa ymm5,ymm4 5105 vmovdqa ymm9,ymm8 5106 vmovdqa ymm2,ymm0 5107 vmovdqa ymm6,ymm4 5108 vmovdqa ymm10,ymm8 5109 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 5110 vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp] 5111 vpaddd ymm13,ymm12,ymm14 5112 vpaddd ymm12,ymm12,ymm13 5113 vmovdqa YMMWORD[(160+160)+rbp],ymm12 5114 vmovdqa YMMWORD[(160+192)+rbp],ymm13 5115 vmovdqa YMMWORD[(160+224)+rbp],ymm14 5116 5117 mov QWORD[((160+128))+rbp],rbx 5118 mov rcx,rbx 5119 sub rcx,8*32 5120 shr rcx,4 5121 add rcx,6 5122 mov r8,10 5123 cmp rcx,10 5124 cmovg rcx,r8 5125 mov rbx,rsi 5126 xor r8,r8 5127$L$open_avx2_tail_384_rounds_and_x2hash: 5128 add r10,QWORD[((0+0))+rbx] 5129 adc r11,QWORD[((8+0))+rbx] 5130 adc r12,1 5131 mov rdx,QWORD[((0+160+0))+rbp] 5132 mov r15,rdx 5133 mulx r14,r13,r10 5134 mulx rdx,rax,r11 5135 imul r15,r12 5136 add r14,rax 5137 adc r15,rdx 5138 mov rdx,QWORD[((8+160+0))+rbp] 5139 mulx rax,r10,r10 5140 add r14,r10 5141 mulx r9,r11,r11 5142 adc r15,r11 5143 adc r9,0 5144 imul rdx,r12 5145 add r15,rax 5146 adc r9,rdx 5147 mov r10,r13 5148 mov r11,r14 5149 mov r12,r15 5150 and r12,3 5151 mov r13,r15 5152 and r13,-4 5153 mov r14,r9 5154 shrd r15,r9,2 5155 shr r9,2 5156 add r15,r13 5157 adc r9,r14 5158 add r10,r15 5159 adc r11,r9 5160 adc r12,0 5161 5162 lea rbx,[16+rbx] 5163$L$open_avx2_tail_384_rounds_and_x1hash: 5164 vpaddd ymm2,ymm2,ymm6 5165 vpxor ymm14,ymm14,ymm2 5166 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 5167 vpaddd ymm10,ymm10,ymm14 5168 vpxor ymm6,ymm6,ymm10 5169 vpsrld ymm3,ymm6,20 5170 vpslld ymm6,ymm6,12 5171 vpxor ymm6,ymm6,ymm3 5172 vpaddd ymm2,ymm2,ymm6 5173 vpxor ymm14,ymm14,ymm2 5174 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 5175 vpaddd ymm10,ymm10,ymm14 5176 vpxor ymm6,ymm6,ymm10 5177 vpslld ymm3,ymm6,7 5178 vpsrld ymm6,ymm6,25 5179 vpxor ymm6,ymm6,ymm3 5180 vpalignr ymm14,ymm14,ymm14,12 5181 vpalignr ymm10,ymm10,ymm10,8 5182 vpalignr ymm6,ymm6,ymm6,4 5183 vpaddd ymm1,ymm1,ymm5 5184 vpxor ymm13,ymm13,ymm1 5185 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5186 vpaddd ymm9,ymm9,ymm13 5187 vpxor ymm5,ymm5,ymm9 5188 vpsrld ymm3,ymm5,20 5189 vpslld ymm5,ymm5,12 5190 vpxor ymm5,ymm5,ymm3 5191 vpaddd ymm1,ymm1,ymm5 5192 vpxor ymm13,ymm13,ymm1 5193 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5194 vpaddd ymm9,ymm9,ymm13 5195 vpxor ymm5,ymm5,ymm9 5196 vpslld ymm3,ymm5,7 5197 vpsrld ymm5,ymm5,25 5198 vpxor ymm5,ymm5,ymm3 5199 vpalignr ymm13,ymm13,ymm13,12 5200 vpalignr ymm9,ymm9,ymm9,8 5201 vpalignr ymm5,ymm5,ymm5,4 5202 vpaddd ymm0,ymm0,ymm4 5203 vpxor ymm12,ymm12,ymm0 5204 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5205 vpaddd ymm8,ymm8,ymm12 5206 vpxor ymm4,ymm4,ymm8 5207 vpsrld ymm3,ymm4,20 5208 vpslld ymm4,ymm4,12 5209 vpxor ymm4,ymm4,ymm3 5210 vpaddd ymm0,ymm0,ymm4 5211 vpxor ymm12,ymm12,ymm0 5212 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5213 vpaddd ymm8,ymm8,ymm12 5214 vpxor ymm4,ymm4,ymm8 5215 vpslld ymm3,ymm4,7 5216 vpsrld ymm4,ymm4,25 5217 vpxor ymm4,ymm4,ymm3 5218 vpalignr ymm12,ymm12,ymm12,12 5219 vpalignr ymm8,ymm8,ymm8,8 5220 vpalignr ymm4,ymm4,ymm4,4 5221 add r10,QWORD[((0+0))+rbx] 5222 adc r11,QWORD[((8+0))+rbx] 5223 adc r12,1 5224 mov rax,QWORD[((0+160+0))+rbp] 5225 mov r15,rax 5226 mul r10 5227 mov r13,rax 5228 mov r14,rdx 5229 mov rax,QWORD[((0+160+0))+rbp] 5230 mul r11 5231 imul r15,r12 5232 add r14,rax 5233 adc r15,rdx 5234 mov rax,QWORD[((8+160+0))+rbp] 5235 mov r9,rax 5236 mul r10 5237 add r14,rax 5238 adc rdx,0 5239 mov r10,rdx 5240 mov rax,QWORD[((8+160+0))+rbp] 5241 mul r11 5242 add r15,rax 5243 adc rdx,0 5244 imul r9,r12 5245 add r15,r10 5246 adc r9,rdx 5247 mov r10,r13 5248 mov r11,r14 5249 mov r12,r15 5250 and r12,3 5251 mov r13,r15 5252 and r13,-4 5253 mov r14,r9 5254 shrd r15,r9,2 5255 shr r9,2 5256 add r15,r13 5257 adc r9,r14 5258 add r10,r15 5259 adc r11,r9 5260 adc r12,0 5261 5262 lea rbx,[16+rbx] 5263 inc r8 5264 vpaddd ymm2,ymm2,ymm6 5265 vpxor ymm14,ymm14,ymm2 5266 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 5267 vpaddd ymm10,ymm10,ymm14 5268 vpxor ymm6,ymm6,ymm10 5269 vpsrld ymm3,ymm6,20 5270 vpslld ymm6,ymm6,12 5271 vpxor ymm6,ymm6,ymm3 5272 vpaddd ymm2,ymm2,ymm6 5273 vpxor ymm14,ymm14,ymm2 5274 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 5275 vpaddd ymm10,ymm10,ymm14 5276 vpxor ymm6,ymm6,ymm10 5277 vpslld ymm3,ymm6,7 5278 vpsrld ymm6,ymm6,25 5279 vpxor ymm6,ymm6,ymm3 5280 vpalignr ymm14,ymm14,ymm14,4 5281 vpalignr ymm10,ymm10,ymm10,8 5282 vpalignr ymm6,ymm6,ymm6,12 5283 vpaddd ymm1,ymm1,ymm5 5284 vpxor ymm13,ymm13,ymm1 5285 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5286 vpaddd ymm9,ymm9,ymm13 5287 vpxor ymm5,ymm5,ymm9 5288 vpsrld ymm3,ymm5,20 5289 vpslld ymm5,ymm5,12 5290 vpxor ymm5,ymm5,ymm3 5291 vpaddd ymm1,ymm1,ymm5 5292 vpxor ymm13,ymm13,ymm1 5293 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5294 vpaddd ymm9,ymm9,ymm13 5295 vpxor ymm5,ymm5,ymm9 5296 vpslld ymm3,ymm5,7 5297 vpsrld ymm5,ymm5,25 5298 vpxor ymm5,ymm5,ymm3 5299 vpalignr ymm13,ymm13,ymm13,4 5300 vpalignr ymm9,ymm9,ymm9,8 5301 vpalignr ymm5,ymm5,ymm5,12 5302 vpaddd ymm0,ymm0,ymm4 5303 vpxor ymm12,ymm12,ymm0 5304 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5305 vpaddd ymm8,ymm8,ymm12 5306 vpxor ymm4,ymm4,ymm8 5307 vpsrld ymm3,ymm4,20 5308 vpslld ymm4,ymm4,12 5309 vpxor ymm4,ymm4,ymm3 5310 vpaddd ymm0,ymm0,ymm4 5311 vpxor ymm12,ymm12,ymm0 5312 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5313 vpaddd ymm8,ymm8,ymm12 5314 vpxor ymm4,ymm4,ymm8 5315 vpslld ymm3,ymm4,7 5316 vpsrld ymm4,ymm4,25 5317 vpxor ymm4,ymm4,ymm3 5318 vpalignr ymm12,ymm12,ymm12,4 5319 vpalignr ymm8,ymm8,ymm8,8 5320 vpalignr ymm4,ymm4,ymm4,12 5321 5322 cmp r8,rcx 5323 jb NEAR $L$open_avx2_tail_384_rounds_and_x2hash 5324 cmp r8,10 5325 jne NEAR $L$open_avx2_tail_384_rounds_and_x1hash 5326 mov r8,rbx 5327 sub rbx,rsi 5328 mov rcx,rbx 5329 mov rbx,QWORD[((160+128))+rbp] 5330$L$open_avx2_384_tail_hash: 5331 add rcx,16 5332 cmp rcx,rbx 5333 jg NEAR $L$open_avx2_384_tail_done 5334 add r10,QWORD[((0+0))+r8] 5335 adc r11,QWORD[((8+0))+r8] 5336 adc r12,1 5337 mov rdx,QWORD[((0+160+0))+rbp] 5338 mov r15,rdx 5339 mulx r14,r13,r10 5340 mulx rdx,rax,r11 5341 imul r15,r12 5342 add r14,rax 5343 adc r15,rdx 5344 mov rdx,QWORD[((8+160+0))+rbp] 5345 mulx rax,r10,r10 5346 add r14,r10 5347 mulx r9,r11,r11 5348 adc r15,r11 5349 adc r9,0 5350 imul rdx,r12 5351 add r15,rax 5352 adc r9,rdx 5353 mov r10,r13 5354 mov r11,r14 5355 mov r12,r15 5356 and r12,3 5357 mov r13,r15 5358 and r13,-4 5359 mov r14,r9 5360 shrd r15,r9,2 5361 shr r9,2 5362 add r15,r13 5363 adc r9,r14 5364 add r10,r15 5365 adc r11,r9 5366 adc r12,0 5367 5368 lea r8,[16+r8] 5369 jmp NEAR $L$open_avx2_384_tail_hash 5370$L$open_avx2_384_tail_done: 5371 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 5372 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 5373 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 5374 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 5375 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 5376 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 5377 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 5378 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 5379 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 5380 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 5381 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 5382 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 5383 vperm2i128 ymm3,ymm6,ymm2,0x02 5384 vperm2i128 ymm6,ymm6,ymm2,0x13 5385 vperm2i128 ymm2,ymm14,ymm10,0x02 5386 vperm2i128 ymm10,ymm14,ymm10,0x13 5387 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 5388 vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi] 5389 vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi] 5390 vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi] 5391 vmovdqu YMMWORD[(0+0)+rdi],ymm3 5392 vmovdqu YMMWORD[(32+0)+rdi],ymm2 5393 vmovdqu YMMWORD[(64+0)+rdi],ymm6 5394 vmovdqu YMMWORD[(96+0)+rdi],ymm10 5395 vperm2i128 ymm3,ymm5,ymm1,0x02 5396 vperm2i128 ymm5,ymm5,ymm1,0x13 5397 vperm2i128 ymm1,ymm13,ymm9,0x02 5398 vperm2i128 ymm9,ymm13,ymm9,0x13 5399 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 5400 vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi] 5401 vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi] 5402 vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi] 5403 vmovdqu YMMWORD[(0+128)+rdi],ymm3 5404 vmovdqu YMMWORD[(32+128)+rdi],ymm1 5405 vmovdqu YMMWORD[(64+128)+rdi],ymm5 5406 vmovdqu YMMWORD[(96+128)+rdi],ymm9 5407 vperm2i128 ymm3,ymm4,ymm0,0x13 5408 vperm2i128 ymm0,ymm4,ymm0,0x02 5409 vperm2i128 ymm4,ymm12,ymm8,0x02 5410 vperm2i128 ymm12,ymm12,ymm8,0x13 5411 vmovdqa ymm8,ymm3 5412 5413 lea rsi,[256+rsi] 5414 lea rdi,[256+rdi] 5415 sub rbx,8*32 5416 jmp NEAR $L$open_avx2_tail_128_xor 5417 5418$L$open_avx2_tail_512: 5419 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 5420 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 5421 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 5422 vmovdqa ymm1,ymm0 5423 vmovdqa ymm5,ymm4 5424 vmovdqa ymm9,ymm8 5425 vmovdqa ymm2,ymm0 5426 vmovdqa ymm6,ymm4 5427 vmovdqa ymm10,ymm8 5428 vmovdqa ymm3,ymm0 5429 vmovdqa ymm7,ymm4 5430 vmovdqa ymm11,ymm8 5431 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 5432 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 5433 vpaddd ymm14,ymm12,ymm15 5434 vpaddd ymm13,ymm12,ymm14 5435 vpaddd ymm12,ymm12,ymm13 5436 vmovdqa YMMWORD[(160+256)+rbp],ymm15 5437 vmovdqa YMMWORD[(160+224)+rbp],ymm14 5438 vmovdqa YMMWORD[(160+192)+rbp],ymm13 5439 vmovdqa YMMWORD[(160+160)+rbp],ymm12 5440 5441 xor rcx,rcx 5442 mov r8,rsi 5443$L$open_avx2_tail_512_rounds_and_x2hash: 5444 add r10,QWORD[((0+0))+r8] 5445 adc r11,QWORD[((8+0))+r8] 5446 adc r12,1 5447 mov rax,QWORD[((0+160+0))+rbp] 5448 mov r15,rax 5449 mul r10 5450 mov r13,rax 5451 mov r14,rdx 5452 mov rax,QWORD[((0+160+0))+rbp] 5453 mul r11 5454 imul r15,r12 5455 add r14,rax 5456 adc r15,rdx 5457 mov rax,QWORD[((8+160+0))+rbp] 5458 mov r9,rax 5459 mul r10 5460 add r14,rax 5461 adc rdx,0 5462 mov r10,rdx 5463 mov rax,QWORD[((8+160+0))+rbp] 5464 mul r11 5465 add r15,rax 5466 adc rdx,0 5467 imul r9,r12 5468 add r15,r10 5469 adc r9,rdx 5470 mov r10,r13 5471 mov r11,r14 5472 mov r12,r15 5473 and r12,3 5474 mov r13,r15 5475 and r13,-4 5476 mov r14,r9 5477 shrd r15,r9,2 5478 shr r9,2 5479 add r15,r13 5480 adc r9,r14 5481 add r10,r15 5482 adc r11,r9 5483 adc r12,0 5484 5485 lea r8,[16+r8] 5486$L$open_avx2_tail_512_rounds_and_x1hash: 5487 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5488 vmovdqa ymm8,YMMWORD[$L$rol16] 5489 vpaddd ymm3,ymm3,ymm7 5490 vpaddd ymm2,ymm2,ymm6 5491 vpaddd ymm1,ymm1,ymm5 5492 vpaddd ymm0,ymm0,ymm4 5493 vpxor ymm15,ymm15,ymm3 5494 vpxor ymm14,ymm14,ymm2 5495 vpxor ymm13,ymm13,ymm1 5496 vpxor ymm12,ymm12,ymm0 5497 vpshufb ymm15,ymm15,ymm8 5498 vpshufb ymm14,ymm14,ymm8 5499 vpshufb ymm13,ymm13,ymm8 5500 vpshufb ymm12,ymm12,ymm8 5501 vpaddd ymm11,ymm11,ymm15 5502 vpaddd ymm10,ymm10,ymm14 5503 vpaddd ymm9,ymm9,ymm13 5504 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5505 vpxor ymm7,ymm7,ymm11 5506 vpxor ymm6,ymm6,ymm10 5507 vpxor ymm5,ymm5,ymm9 5508 vpxor ymm4,ymm4,ymm8 5509 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5510 vpsrld ymm8,ymm7,20 5511 vpslld ymm7,ymm7,32-20 5512 vpxor ymm7,ymm7,ymm8 5513 vpsrld ymm8,ymm6,20 5514 vpslld ymm6,ymm6,32-20 5515 vpxor ymm6,ymm6,ymm8 5516 vpsrld ymm8,ymm5,20 5517 vpslld ymm5,ymm5,32-20 5518 vpxor ymm5,ymm5,ymm8 5519 vpsrld ymm8,ymm4,20 5520 vpslld ymm4,ymm4,32-20 5521 vpxor ymm4,ymm4,ymm8 5522 vmovdqa ymm8,YMMWORD[$L$rol8] 5523 vpaddd ymm3,ymm3,ymm7 5524 add r10,QWORD[((0+0))+r8] 5525 adc r11,QWORD[((8+0))+r8] 5526 adc r12,1 5527 mov rdx,QWORD[((0+160+0))+rbp] 5528 mov r15,rdx 5529 mulx r14,r13,r10 5530 mulx rdx,rax,r11 5531 imul r15,r12 5532 add r14,rax 5533 adc r15,rdx 5534 mov rdx,QWORD[((8+160+0))+rbp] 5535 mulx rax,r10,r10 5536 add r14,r10 5537 mulx r9,r11,r11 5538 adc r15,r11 5539 adc r9,0 5540 imul rdx,r12 5541 add r15,rax 5542 adc r9,rdx 5543 mov r10,r13 5544 mov r11,r14 5545 mov r12,r15 5546 and r12,3 5547 mov r13,r15 5548 and r13,-4 5549 mov r14,r9 5550 shrd r15,r9,2 5551 shr r9,2 5552 add r15,r13 5553 adc r9,r14 5554 add r10,r15 5555 adc r11,r9 5556 adc r12,0 5557 vpaddd ymm2,ymm2,ymm6 5558 vpaddd ymm1,ymm1,ymm5 5559 vpaddd ymm0,ymm0,ymm4 5560 vpxor ymm15,ymm15,ymm3 5561 vpxor ymm14,ymm14,ymm2 5562 vpxor ymm13,ymm13,ymm1 5563 vpxor ymm12,ymm12,ymm0 5564 vpshufb ymm15,ymm15,ymm8 5565 vpshufb ymm14,ymm14,ymm8 5566 vpshufb ymm13,ymm13,ymm8 5567 vpshufb ymm12,ymm12,ymm8 5568 vpaddd ymm11,ymm11,ymm15 5569 vpaddd ymm10,ymm10,ymm14 5570 vpaddd ymm9,ymm9,ymm13 5571 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5572 vpxor ymm7,ymm7,ymm11 5573 vpxor ymm6,ymm6,ymm10 5574 vpxor ymm5,ymm5,ymm9 5575 vpxor ymm4,ymm4,ymm8 5576 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5577 vpsrld ymm8,ymm7,25 5578 vpslld ymm7,ymm7,32-25 5579 vpxor ymm7,ymm7,ymm8 5580 vpsrld ymm8,ymm6,25 5581 vpslld ymm6,ymm6,32-25 5582 vpxor ymm6,ymm6,ymm8 5583 vpsrld ymm8,ymm5,25 5584 vpslld ymm5,ymm5,32-25 5585 vpxor ymm5,ymm5,ymm8 5586 vpsrld ymm8,ymm4,25 5587 vpslld ymm4,ymm4,32-25 5588 vpxor ymm4,ymm4,ymm8 5589 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 5590 vpalignr ymm7,ymm7,ymm7,4 5591 vpalignr ymm11,ymm11,ymm11,8 5592 vpalignr ymm15,ymm15,ymm15,12 5593 vpalignr ymm6,ymm6,ymm6,4 5594 vpalignr ymm10,ymm10,ymm10,8 5595 vpalignr ymm14,ymm14,ymm14,12 5596 vpalignr ymm5,ymm5,ymm5,4 5597 vpalignr ymm9,ymm9,ymm9,8 5598 vpalignr ymm13,ymm13,ymm13,12 5599 vpalignr ymm4,ymm4,ymm4,4 5600 vpalignr ymm8,ymm8,ymm8,8 5601 vpalignr ymm12,ymm12,ymm12,12 5602 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5603 vmovdqa ymm8,YMMWORD[$L$rol16] 5604 vpaddd ymm3,ymm3,ymm7 5605 add r10,QWORD[((0+16))+r8] 5606 adc r11,QWORD[((8+16))+r8] 5607 adc r12,1 5608 mov rdx,QWORD[((0+160+0))+rbp] 5609 mov r15,rdx 5610 mulx r14,r13,r10 5611 mulx rdx,rax,r11 5612 imul r15,r12 5613 add r14,rax 5614 adc r15,rdx 5615 mov rdx,QWORD[((8+160+0))+rbp] 5616 mulx rax,r10,r10 5617 add r14,r10 5618 mulx r9,r11,r11 5619 adc r15,r11 5620 adc r9,0 5621 imul rdx,r12 5622 add r15,rax 5623 adc r9,rdx 5624 mov r10,r13 5625 mov r11,r14 5626 mov r12,r15 5627 and r12,3 5628 mov r13,r15 5629 and r13,-4 5630 mov r14,r9 5631 shrd r15,r9,2 5632 shr r9,2 5633 add r15,r13 5634 adc r9,r14 5635 add r10,r15 5636 adc r11,r9 5637 adc r12,0 5638 5639 lea r8,[32+r8] 5640 vpaddd ymm2,ymm2,ymm6 5641 vpaddd ymm1,ymm1,ymm5 5642 vpaddd ymm0,ymm0,ymm4 5643 vpxor ymm15,ymm15,ymm3 5644 vpxor ymm14,ymm14,ymm2 5645 vpxor ymm13,ymm13,ymm1 5646 vpxor ymm12,ymm12,ymm0 5647 vpshufb ymm15,ymm15,ymm8 5648 vpshufb ymm14,ymm14,ymm8 5649 vpshufb ymm13,ymm13,ymm8 5650 vpshufb ymm12,ymm12,ymm8 5651 vpaddd ymm11,ymm11,ymm15 5652 vpaddd ymm10,ymm10,ymm14 5653 vpaddd ymm9,ymm9,ymm13 5654 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5655 vpxor ymm7,ymm7,ymm11 5656 vpxor ymm6,ymm6,ymm10 5657 vpxor ymm5,ymm5,ymm9 5658 vpxor ymm4,ymm4,ymm8 5659 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5660 vpsrld ymm8,ymm7,20 5661 vpslld ymm7,ymm7,32-20 5662 vpxor ymm7,ymm7,ymm8 5663 vpsrld ymm8,ymm6,20 5664 vpslld ymm6,ymm6,32-20 5665 vpxor ymm6,ymm6,ymm8 5666 vpsrld ymm8,ymm5,20 5667 vpslld ymm5,ymm5,32-20 5668 vpxor ymm5,ymm5,ymm8 5669 vpsrld ymm8,ymm4,20 5670 vpslld ymm4,ymm4,32-20 5671 vpxor ymm4,ymm4,ymm8 5672 vmovdqa ymm8,YMMWORD[$L$rol8] 5673 vpaddd ymm3,ymm3,ymm7 5674 vpaddd ymm2,ymm2,ymm6 5675 vpaddd ymm1,ymm1,ymm5 5676 vpaddd ymm0,ymm0,ymm4 5677 vpxor ymm15,ymm15,ymm3 5678 vpxor ymm14,ymm14,ymm2 5679 vpxor ymm13,ymm13,ymm1 5680 vpxor ymm12,ymm12,ymm0 5681 vpshufb ymm15,ymm15,ymm8 5682 vpshufb ymm14,ymm14,ymm8 5683 vpshufb ymm13,ymm13,ymm8 5684 vpshufb ymm12,ymm12,ymm8 5685 vpaddd ymm11,ymm11,ymm15 5686 vpaddd ymm10,ymm10,ymm14 5687 vpaddd ymm9,ymm9,ymm13 5688 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 5689 vpxor ymm7,ymm7,ymm11 5690 vpxor ymm6,ymm6,ymm10 5691 vpxor ymm5,ymm5,ymm9 5692 vpxor ymm4,ymm4,ymm8 5693 vmovdqa YMMWORD[(160+128)+rbp],ymm8 5694 vpsrld ymm8,ymm7,25 5695 vpslld ymm7,ymm7,32-25 5696 vpxor ymm7,ymm7,ymm8 5697 vpsrld ymm8,ymm6,25 5698 vpslld ymm6,ymm6,32-25 5699 vpxor ymm6,ymm6,ymm8 5700 vpsrld ymm8,ymm5,25 5701 vpslld ymm5,ymm5,32-25 5702 vpxor ymm5,ymm5,ymm8 5703 vpsrld ymm8,ymm4,25 5704 vpslld ymm4,ymm4,32-25 5705 vpxor ymm4,ymm4,ymm8 5706 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 5707 vpalignr ymm7,ymm7,ymm7,12 5708 vpalignr ymm11,ymm11,ymm11,8 5709 vpalignr ymm15,ymm15,ymm15,4 5710 vpalignr ymm6,ymm6,ymm6,12 5711 vpalignr ymm10,ymm10,ymm10,8 5712 vpalignr ymm14,ymm14,ymm14,4 5713 vpalignr ymm5,ymm5,ymm5,12 5714 vpalignr ymm9,ymm9,ymm9,8 5715 vpalignr ymm13,ymm13,ymm13,4 5716 vpalignr ymm4,ymm4,ymm4,12 5717 vpalignr ymm8,ymm8,ymm8,8 5718 vpalignr ymm12,ymm12,ymm12,4 5719 5720 inc rcx 5721 cmp rcx,4 5722 jl NEAR $L$open_avx2_tail_512_rounds_and_x2hash 5723 cmp rcx,10 5724 jne NEAR $L$open_avx2_tail_512_rounds_and_x1hash 5725 mov rcx,rbx 5726 sub rcx,12*32 5727 and rcx,-16 5728$L$open_avx2_tail_512_hash: 5729 test rcx,rcx 5730 je NEAR $L$open_avx2_tail_512_done 5731 add r10,QWORD[((0+0))+r8] 5732 adc r11,QWORD[((8+0))+r8] 5733 adc r12,1 5734 mov rdx,QWORD[((0+160+0))+rbp] 5735 mov r15,rdx 5736 mulx r14,r13,r10 5737 mulx rdx,rax,r11 5738 imul r15,r12 5739 add r14,rax 5740 adc r15,rdx 5741 mov rdx,QWORD[((8+160+0))+rbp] 5742 mulx rax,r10,r10 5743 add r14,r10 5744 mulx r9,r11,r11 5745 adc r15,r11 5746 adc r9,0 5747 imul rdx,r12 5748 add r15,rax 5749 adc r9,rdx 5750 mov r10,r13 5751 mov r11,r14 5752 mov r12,r15 5753 and r12,3 5754 mov r13,r15 5755 and r13,-4 5756 mov r14,r9 5757 shrd r15,r9,2 5758 shr r9,2 5759 add r15,r13 5760 adc r9,r14 5761 add r10,r15 5762 adc r11,r9 5763 adc r12,0 5764 5765 lea r8,[16+r8] 5766 sub rcx,2*8 5767 jmp NEAR $L$open_avx2_tail_512_hash 5768$L$open_avx2_tail_512_done: 5769 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 5770 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 5771 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 5772 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 5773 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 5774 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 5775 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 5776 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 5777 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 5778 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 5779 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 5780 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 5781 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 5782 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 5783 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 5784 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 5785 5786 vmovdqa YMMWORD[(160+128)+rbp],ymm0 5787 vperm2i128 ymm0,ymm7,ymm3,0x02 5788 vperm2i128 ymm7,ymm7,ymm3,0x13 5789 vperm2i128 ymm3,ymm15,ymm11,0x02 5790 vperm2i128 ymm11,ymm15,ymm11,0x13 5791 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 5792 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 5793 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 5794 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 5795 vmovdqu YMMWORD[(0+0)+rdi],ymm0 5796 vmovdqu YMMWORD[(32+0)+rdi],ymm3 5797 vmovdqu YMMWORD[(64+0)+rdi],ymm7 5798 vmovdqu YMMWORD[(96+0)+rdi],ymm11 5799 5800 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 5801 vperm2i128 ymm3,ymm6,ymm2,0x02 5802 vperm2i128 ymm6,ymm6,ymm2,0x13 5803 vperm2i128 ymm2,ymm14,ymm10,0x02 5804 vperm2i128 ymm10,ymm14,ymm10,0x13 5805 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 5806 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 5807 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 5808 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 5809 vmovdqu YMMWORD[(0+128)+rdi],ymm3 5810 vmovdqu YMMWORD[(32+128)+rdi],ymm2 5811 vmovdqu YMMWORD[(64+128)+rdi],ymm6 5812 vmovdqu YMMWORD[(96+128)+rdi],ymm10 5813 vperm2i128 ymm3,ymm5,ymm1,0x02 5814 vperm2i128 ymm5,ymm5,ymm1,0x13 5815 vperm2i128 ymm1,ymm13,ymm9,0x02 5816 vperm2i128 ymm9,ymm13,ymm9,0x13 5817 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 5818 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 5819 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 5820 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 5821 vmovdqu YMMWORD[(0+256)+rdi],ymm3 5822 vmovdqu YMMWORD[(32+256)+rdi],ymm1 5823 vmovdqu YMMWORD[(64+256)+rdi],ymm5 5824 vmovdqu YMMWORD[(96+256)+rdi],ymm9 5825 vperm2i128 ymm3,ymm4,ymm0,0x13 5826 vperm2i128 ymm0,ymm4,ymm0,0x02 5827 vperm2i128 ymm4,ymm12,ymm8,0x02 5828 vperm2i128 ymm12,ymm12,ymm8,0x13 5829 vmovdqa ymm8,ymm3 5830 5831 lea rsi,[384+rsi] 5832 lea rdi,[384+rdi] 5833 sub rbx,12*32 5834$L$open_avx2_tail_128_xor: 5835 cmp rbx,32 5836 jb NEAR $L$open_avx2_tail_32_xor 5837 sub rbx,32 5838 vpxor ymm0,ymm0,YMMWORD[rsi] 5839 vmovdqu YMMWORD[rdi],ymm0 5840 lea rsi,[32+rsi] 5841 lea rdi,[32+rdi] 5842 vmovdqa ymm0,ymm4 5843 vmovdqa ymm4,ymm8 5844 vmovdqa ymm8,ymm12 5845 jmp NEAR $L$open_avx2_tail_128_xor 5846$L$open_avx2_tail_32_xor: 5847 cmp rbx,16 5848 vmovdqa xmm1,xmm0 5849 jb NEAR $L$open_avx2_exit 5850 sub rbx,16 5851 5852 vpxor xmm1,xmm0,XMMWORD[rsi] 5853 vmovdqu XMMWORD[rdi],xmm1 5854 lea rsi,[16+rsi] 5855 lea rdi,[16+rdi] 5856 vperm2i128 ymm0,ymm0,ymm0,0x11 5857 vmovdqa xmm1,xmm0 5858$L$open_avx2_exit: 5859 vzeroupper 5860 jmp NEAR $L$open_sse_tail_16 5861 5862$L$open_avx2_192: 5863 vmovdqa ymm1,ymm0 5864 vmovdqa ymm2,ymm0 5865 vmovdqa ymm5,ymm4 5866 vmovdqa ymm6,ymm4 5867 vmovdqa ymm9,ymm8 5868 vmovdqa ymm10,ymm8 5869 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 5870 vmovdqa ymm11,ymm12 5871 vmovdqa ymm15,ymm13 5872 mov r10,10 5873$L$open_avx2_192_rounds: 5874 vpaddd ymm0,ymm0,ymm4 5875 vpxor ymm12,ymm12,ymm0 5876 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5877 vpaddd ymm8,ymm8,ymm12 5878 vpxor ymm4,ymm4,ymm8 5879 vpsrld ymm3,ymm4,20 5880 vpslld ymm4,ymm4,12 5881 vpxor ymm4,ymm4,ymm3 5882 vpaddd ymm0,ymm0,ymm4 5883 vpxor ymm12,ymm12,ymm0 5884 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5885 vpaddd ymm8,ymm8,ymm12 5886 vpxor ymm4,ymm4,ymm8 5887 vpslld ymm3,ymm4,7 5888 vpsrld ymm4,ymm4,25 5889 vpxor ymm4,ymm4,ymm3 5890 vpalignr ymm12,ymm12,ymm12,12 5891 vpalignr ymm8,ymm8,ymm8,8 5892 vpalignr ymm4,ymm4,ymm4,4 5893 vpaddd ymm1,ymm1,ymm5 5894 vpxor ymm13,ymm13,ymm1 5895 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5896 vpaddd ymm9,ymm9,ymm13 5897 vpxor ymm5,ymm5,ymm9 5898 vpsrld ymm3,ymm5,20 5899 vpslld ymm5,ymm5,12 5900 vpxor ymm5,ymm5,ymm3 5901 vpaddd ymm1,ymm1,ymm5 5902 vpxor ymm13,ymm13,ymm1 5903 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5904 vpaddd ymm9,ymm9,ymm13 5905 vpxor ymm5,ymm5,ymm9 5906 vpslld ymm3,ymm5,7 5907 vpsrld ymm5,ymm5,25 5908 vpxor ymm5,ymm5,ymm3 5909 vpalignr ymm13,ymm13,ymm13,12 5910 vpalignr ymm9,ymm9,ymm9,8 5911 vpalignr ymm5,ymm5,ymm5,4 5912 vpaddd ymm0,ymm0,ymm4 5913 vpxor ymm12,ymm12,ymm0 5914 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 5915 vpaddd ymm8,ymm8,ymm12 5916 vpxor ymm4,ymm4,ymm8 5917 vpsrld ymm3,ymm4,20 5918 vpslld ymm4,ymm4,12 5919 vpxor ymm4,ymm4,ymm3 5920 vpaddd ymm0,ymm0,ymm4 5921 vpxor ymm12,ymm12,ymm0 5922 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 5923 vpaddd ymm8,ymm8,ymm12 5924 vpxor ymm4,ymm4,ymm8 5925 vpslld ymm3,ymm4,7 5926 vpsrld ymm4,ymm4,25 5927 vpxor ymm4,ymm4,ymm3 5928 vpalignr ymm12,ymm12,ymm12,4 5929 vpalignr ymm8,ymm8,ymm8,8 5930 vpalignr ymm4,ymm4,ymm4,12 5931 vpaddd ymm1,ymm1,ymm5 5932 vpxor ymm13,ymm13,ymm1 5933 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 5934 vpaddd ymm9,ymm9,ymm13 5935 vpxor ymm5,ymm5,ymm9 5936 vpsrld ymm3,ymm5,20 5937 vpslld ymm5,ymm5,12 5938 vpxor ymm5,ymm5,ymm3 5939 vpaddd ymm1,ymm1,ymm5 5940 vpxor ymm13,ymm13,ymm1 5941 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 5942 vpaddd ymm9,ymm9,ymm13 5943 vpxor ymm5,ymm5,ymm9 5944 vpslld ymm3,ymm5,7 5945 vpsrld ymm5,ymm5,25 5946 vpxor ymm5,ymm5,ymm3 5947 vpalignr ymm13,ymm13,ymm13,4 5948 vpalignr ymm9,ymm9,ymm9,8 5949 vpalignr ymm5,ymm5,ymm5,12 5950 5951 dec r10 5952 jne NEAR $L$open_avx2_192_rounds 5953 vpaddd ymm0,ymm0,ymm2 5954 vpaddd ymm1,ymm1,ymm2 5955 vpaddd ymm4,ymm4,ymm6 5956 vpaddd ymm5,ymm5,ymm6 5957 vpaddd ymm8,ymm8,ymm10 5958 vpaddd ymm9,ymm9,ymm10 5959 vpaddd ymm12,ymm12,ymm11 5960 vpaddd ymm13,ymm13,ymm15 5961 vperm2i128 ymm3,ymm4,ymm0,0x02 5962 5963 vpand ymm3,ymm3,YMMWORD[$L$clamp] 5964 vmovdqa YMMWORD[(160+0)+rbp],ymm3 5965 5966 vperm2i128 ymm0,ymm4,ymm0,0x13 5967 vperm2i128 ymm4,ymm12,ymm8,0x13 5968 vperm2i128 ymm8,ymm5,ymm1,0x02 5969 vperm2i128 ymm12,ymm13,ymm9,0x02 5970 vperm2i128 ymm1,ymm5,ymm1,0x13 5971 vperm2i128 ymm5,ymm13,ymm9,0x13 5972$L$open_avx2_short: 5973 mov r8,r8 5974 call poly_hash_ad_internal 5975$L$open_avx2_short_hash_and_xor_loop: 5976 cmp rbx,32 5977 jb NEAR $L$open_avx2_short_tail_32 5978 sub rbx,32 5979 add r10,QWORD[((0+0))+rsi] 5980 adc r11,QWORD[((8+0))+rsi] 5981 adc r12,1 5982 mov rax,QWORD[((0+160+0))+rbp] 5983 mov r15,rax 5984 mul r10 5985 mov r13,rax 5986 mov r14,rdx 5987 mov rax,QWORD[((0+160+0))+rbp] 5988 mul r11 5989 imul r15,r12 5990 add r14,rax 5991 adc r15,rdx 5992 mov rax,QWORD[((8+160+0))+rbp] 5993 mov r9,rax 5994 mul r10 5995 add r14,rax 5996 adc rdx,0 5997 mov r10,rdx 5998 mov rax,QWORD[((8+160+0))+rbp] 5999 mul r11 6000 add r15,rax 6001 adc rdx,0 6002 imul r9,r12 6003 add r15,r10 6004 adc r9,rdx 6005 mov r10,r13 6006 mov r11,r14 6007 mov r12,r15 6008 and r12,3 6009 mov r13,r15 6010 and r13,-4 6011 mov r14,r9 6012 shrd r15,r9,2 6013 shr r9,2 6014 add r15,r13 6015 adc r9,r14 6016 add r10,r15 6017 adc r11,r9 6018 adc r12,0 6019 add r10,QWORD[((0+16))+rsi] 6020 adc r11,QWORD[((8+16))+rsi] 6021 adc r12,1 6022 mov rax,QWORD[((0+160+0))+rbp] 6023 mov r15,rax 6024 mul r10 6025 mov r13,rax 6026 mov r14,rdx 6027 mov rax,QWORD[((0+160+0))+rbp] 6028 mul r11 6029 imul r15,r12 6030 add r14,rax 6031 adc r15,rdx 6032 mov rax,QWORD[((8+160+0))+rbp] 6033 mov r9,rax 6034 mul r10 6035 add r14,rax 6036 adc rdx,0 6037 mov r10,rdx 6038 mov rax,QWORD[((8+160+0))+rbp] 6039 mul r11 6040 add r15,rax 6041 adc rdx,0 6042 imul r9,r12 6043 add r15,r10 6044 adc r9,rdx 6045 mov r10,r13 6046 mov r11,r14 6047 mov r12,r15 6048 and r12,3 6049 mov r13,r15 6050 and r13,-4 6051 mov r14,r9 6052 shrd r15,r9,2 6053 shr r9,2 6054 add r15,r13 6055 adc r9,r14 6056 add r10,r15 6057 adc r11,r9 6058 adc r12,0 6059 6060 6061 vpxor ymm0,ymm0,YMMWORD[rsi] 6062 vmovdqu YMMWORD[rdi],ymm0 6063 lea rsi,[32+rsi] 6064 lea rdi,[32+rdi] 6065 6066 vmovdqa ymm0,ymm4 6067 vmovdqa ymm4,ymm8 6068 vmovdqa ymm8,ymm12 6069 vmovdqa ymm12,ymm1 6070 vmovdqa ymm1,ymm5 6071 vmovdqa ymm5,ymm9 6072 vmovdqa ymm9,ymm13 6073 vmovdqa ymm13,ymm2 6074 vmovdqa ymm2,ymm6 6075 jmp NEAR $L$open_avx2_short_hash_and_xor_loop 6076$L$open_avx2_short_tail_32: 6077 cmp rbx,16 6078 vmovdqa xmm1,xmm0 6079 jb NEAR $L$open_avx2_short_tail_32_exit 6080 sub rbx,16 6081 add r10,QWORD[((0+0))+rsi] 6082 adc r11,QWORD[((8+0))+rsi] 6083 adc r12,1 6084 mov rax,QWORD[((0+160+0))+rbp] 6085 mov r15,rax 6086 mul r10 6087 mov r13,rax 6088 mov r14,rdx 6089 mov rax,QWORD[((0+160+0))+rbp] 6090 mul r11 6091 imul r15,r12 6092 add r14,rax 6093 adc r15,rdx 6094 mov rax,QWORD[((8+160+0))+rbp] 6095 mov r9,rax 6096 mul r10 6097 add r14,rax 6098 adc rdx,0 6099 mov r10,rdx 6100 mov rax,QWORD[((8+160+0))+rbp] 6101 mul r11 6102 add r15,rax 6103 adc rdx,0 6104 imul r9,r12 6105 add r15,r10 6106 adc r9,rdx 6107 mov r10,r13 6108 mov r11,r14 6109 mov r12,r15 6110 and r12,3 6111 mov r13,r15 6112 and r13,-4 6113 mov r14,r9 6114 shrd r15,r9,2 6115 shr r9,2 6116 add r15,r13 6117 adc r9,r14 6118 add r10,r15 6119 adc r11,r9 6120 adc r12,0 6121 6122 vpxor xmm3,xmm0,XMMWORD[rsi] 6123 vmovdqu XMMWORD[rdi],xmm3 6124 lea rsi,[16+rsi] 6125 lea rdi,[16+rdi] 6126 vextracti128 xmm1,ymm0,1 6127$L$open_avx2_short_tail_32_exit: 6128 vzeroupper 6129 jmp NEAR $L$open_sse_tail_16 6130 6131$L$open_avx2_320: 6132 vmovdqa ymm1,ymm0 6133 vmovdqa ymm2,ymm0 6134 vmovdqa ymm5,ymm4 6135 vmovdqa ymm6,ymm4 6136 vmovdqa ymm9,ymm8 6137 vmovdqa ymm10,ymm8 6138 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 6139 vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc] 6140 vmovdqa ymm7,ymm4 6141 vmovdqa ymm11,ymm8 6142 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6143 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6144 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6145 mov r10,10 6146$L$open_avx2_320_rounds: 6147 vpaddd ymm0,ymm0,ymm4 6148 vpxor ymm12,ymm12,ymm0 6149 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 6150 vpaddd ymm8,ymm8,ymm12 6151 vpxor ymm4,ymm4,ymm8 6152 vpsrld ymm3,ymm4,20 6153 vpslld ymm4,ymm4,12 6154 vpxor ymm4,ymm4,ymm3 6155 vpaddd ymm0,ymm0,ymm4 6156 vpxor ymm12,ymm12,ymm0 6157 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 6158 vpaddd ymm8,ymm8,ymm12 6159 vpxor ymm4,ymm4,ymm8 6160 vpslld ymm3,ymm4,7 6161 vpsrld ymm4,ymm4,25 6162 vpxor ymm4,ymm4,ymm3 6163 vpalignr ymm12,ymm12,ymm12,12 6164 vpalignr ymm8,ymm8,ymm8,8 6165 vpalignr ymm4,ymm4,ymm4,4 6166 vpaddd ymm1,ymm1,ymm5 6167 vpxor ymm13,ymm13,ymm1 6168 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 6169 vpaddd ymm9,ymm9,ymm13 6170 vpxor ymm5,ymm5,ymm9 6171 vpsrld ymm3,ymm5,20 6172 vpslld ymm5,ymm5,12 6173 vpxor ymm5,ymm5,ymm3 6174 vpaddd ymm1,ymm1,ymm5 6175 vpxor ymm13,ymm13,ymm1 6176 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 6177 vpaddd ymm9,ymm9,ymm13 6178 vpxor ymm5,ymm5,ymm9 6179 vpslld ymm3,ymm5,7 6180 vpsrld ymm5,ymm5,25 6181 vpxor ymm5,ymm5,ymm3 6182 vpalignr ymm13,ymm13,ymm13,12 6183 vpalignr ymm9,ymm9,ymm9,8 6184 vpalignr ymm5,ymm5,ymm5,4 6185 vpaddd ymm2,ymm2,ymm6 6186 vpxor ymm14,ymm14,ymm2 6187 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 6188 vpaddd ymm10,ymm10,ymm14 6189 vpxor ymm6,ymm6,ymm10 6190 vpsrld ymm3,ymm6,20 6191 vpslld ymm6,ymm6,12 6192 vpxor ymm6,ymm6,ymm3 6193 vpaddd ymm2,ymm2,ymm6 6194 vpxor ymm14,ymm14,ymm2 6195 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 6196 vpaddd ymm10,ymm10,ymm14 6197 vpxor ymm6,ymm6,ymm10 6198 vpslld ymm3,ymm6,7 6199 vpsrld ymm6,ymm6,25 6200 vpxor ymm6,ymm6,ymm3 6201 vpalignr ymm14,ymm14,ymm14,12 6202 vpalignr ymm10,ymm10,ymm10,8 6203 vpalignr ymm6,ymm6,ymm6,4 6204 vpaddd ymm0,ymm0,ymm4 6205 vpxor ymm12,ymm12,ymm0 6206 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 6207 vpaddd ymm8,ymm8,ymm12 6208 vpxor ymm4,ymm4,ymm8 6209 vpsrld ymm3,ymm4,20 6210 vpslld ymm4,ymm4,12 6211 vpxor ymm4,ymm4,ymm3 6212 vpaddd ymm0,ymm0,ymm4 6213 vpxor ymm12,ymm12,ymm0 6214 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 6215 vpaddd ymm8,ymm8,ymm12 6216 vpxor ymm4,ymm4,ymm8 6217 vpslld ymm3,ymm4,7 6218 vpsrld ymm4,ymm4,25 6219 vpxor ymm4,ymm4,ymm3 6220 vpalignr ymm12,ymm12,ymm12,4 6221 vpalignr ymm8,ymm8,ymm8,8 6222 vpalignr ymm4,ymm4,ymm4,12 6223 vpaddd ymm1,ymm1,ymm5 6224 vpxor ymm13,ymm13,ymm1 6225 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 6226 vpaddd ymm9,ymm9,ymm13 6227 vpxor ymm5,ymm5,ymm9 6228 vpsrld ymm3,ymm5,20 6229 vpslld ymm5,ymm5,12 6230 vpxor ymm5,ymm5,ymm3 6231 vpaddd ymm1,ymm1,ymm5 6232 vpxor ymm13,ymm13,ymm1 6233 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 6234 vpaddd ymm9,ymm9,ymm13 6235 vpxor ymm5,ymm5,ymm9 6236 vpslld ymm3,ymm5,7 6237 vpsrld ymm5,ymm5,25 6238 vpxor ymm5,ymm5,ymm3 6239 vpalignr ymm13,ymm13,ymm13,4 6240 vpalignr ymm9,ymm9,ymm9,8 6241 vpalignr ymm5,ymm5,ymm5,12 6242 vpaddd ymm2,ymm2,ymm6 6243 vpxor ymm14,ymm14,ymm2 6244 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 6245 vpaddd ymm10,ymm10,ymm14 6246 vpxor ymm6,ymm6,ymm10 6247 vpsrld ymm3,ymm6,20 6248 vpslld ymm6,ymm6,12 6249 vpxor ymm6,ymm6,ymm3 6250 vpaddd ymm2,ymm2,ymm6 6251 vpxor ymm14,ymm14,ymm2 6252 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 6253 vpaddd ymm10,ymm10,ymm14 6254 vpxor ymm6,ymm6,ymm10 6255 vpslld ymm3,ymm6,7 6256 vpsrld ymm6,ymm6,25 6257 vpxor ymm6,ymm6,ymm3 6258 vpalignr ymm14,ymm14,ymm14,4 6259 vpalignr ymm10,ymm10,ymm10,8 6260 vpalignr ymm6,ymm6,ymm6,12 6261 6262 dec r10 6263 jne NEAR $L$open_avx2_320_rounds 6264 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 6265 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 6266 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 6267 vpaddd ymm4,ymm4,ymm7 6268 vpaddd ymm5,ymm5,ymm7 6269 vpaddd ymm6,ymm6,ymm7 6270 vpaddd ymm8,ymm8,ymm11 6271 vpaddd ymm9,ymm9,ymm11 6272 vpaddd ymm10,ymm10,ymm11 6273 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 6274 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 6275 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 6276 vperm2i128 ymm3,ymm4,ymm0,0x02 6277 6278 vpand ymm3,ymm3,YMMWORD[$L$clamp] 6279 vmovdqa YMMWORD[(160+0)+rbp],ymm3 6280 6281 vperm2i128 ymm0,ymm4,ymm0,0x13 6282 vperm2i128 ymm4,ymm12,ymm8,0x13 6283 vperm2i128 ymm8,ymm5,ymm1,0x02 6284 vperm2i128 ymm12,ymm13,ymm9,0x02 6285 vperm2i128 ymm1,ymm5,ymm1,0x13 6286 vperm2i128 ymm5,ymm13,ymm9,0x13 6287 vperm2i128 ymm9,ymm6,ymm2,0x02 6288 vperm2i128 ymm13,ymm14,ymm10,0x02 6289 vperm2i128 ymm2,ymm6,ymm2,0x13 6290 vperm2i128 ymm6,ymm14,ymm10,0x13 6291 jmp NEAR $L$open_avx2_short 6292 6293 6294 6295 6296 6297ALIGN 64 6298chacha20_poly1305_seal_avx2: 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 vzeroupper 6312 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 6313 vbroadcasti128 ymm4,XMMWORD[r9] 6314 vbroadcasti128 ymm8,XMMWORD[16+r9] 6315 vbroadcasti128 ymm12,XMMWORD[32+r9] 6316 vpaddd ymm12,ymm12,YMMWORD[$L$avx2_init] 6317 cmp rbx,6*32 6318 jbe NEAR $L$seal_avx2_192 6319 cmp rbx,10*32 6320 jbe NEAR $L$seal_avx2_320 6321 vmovdqa ymm1,ymm0 6322 vmovdqa ymm2,ymm0 6323 vmovdqa ymm3,ymm0 6324 vmovdqa ymm5,ymm4 6325 vmovdqa ymm6,ymm4 6326 vmovdqa ymm7,ymm4 6327 vmovdqa YMMWORD[(160+64)+rbp],ymm4 6328 vmovdqa ymm9,ymm8 6329 vmovdqa ymm10,ymm8 6330 vmovdqa ymm11,ymm8 6331 vmovdqa YMMWORD[(160+96)+rbp],ymm8 6332 vmovdqa ymm15,ymm12 6333 vpaddd ymm14,ymm15,YMMWORD[$L$avx2_inc] 6334 vpaddd ymm13,ymm14,YMMWORD[$L$avx2_inc] 6335 vpaddd ymm12,ymm13,YMMWORD[$L$avx2_inc] 6336 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6337 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6338 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6339 vmovdqa YMMWORD[(160+256)+rbp],ymm15 6340 mov r10,10 6341$L$seal_avx2_init_rounds: 6342 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6343 vmovdqa ymm8,YMMWORD[$L$rol16] 6344 vpaddd ymm3,ymm3,ymm7 6345 vpaddd ymm2,ymm2,ymm6 6346 vpaddd ymm1,ymm1,ymm5 6347 vpaddd ymm0,ymm0,ymm4 6348 vpxor ymm15,ymm15,ymm3 6349 vpxor ymm14,ymm14,ymm2 6350 vpxor ymm13,ymm13,ymm1 6351 vpxor ymm12,ymm12,ymm0 6352 vpshufb ymm15,ymm15,ymm8 6353 vpshufb ymm14,ymm14,ymm8 6354 vpshufb ymm13,ymm13,ymm8 6355 vpshufb ymm12,ymm12,ymm8 6356 vpaddd ymm11,ymm11,ymm15 6357 vpaddd ymm10,ymm10,ymm14 6358 vpaddd ymm9,ymm9,ymm13 6359 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6360 vpxor ymm7,ymm7,ymm11 6361 vpxor ymm6,ymm6,ymm10 6362 vpxor ymm5,ymm5,ymm9 6363 vpxor ymm4,ymm4,ymm8 6364 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6365 vpsrld ymm8,ymm7,20 6366 vpslld ymm7,ymm7,32-20 6367 vpxor ymm7,ymm7,ymm8 6368 vpsrld ymm8,ymm6,20 6369 vpslld ymm6,ymm6,32-20 6370 vpxor ymm6,ymm6,ymm8 6371 vpsrld ymm8,ymm5,20 6372 vpslld ymm5,ymm5,32-20 6373 vpxor ymm5,ymm5,ymm8 6374 vpsrld ymm8,ymm4,20 6375 vpslld ymm4,ymm4,32-20 6376 vpxor ymm4,ymm4,ymm8 6377 vmovdqa ymm8,YMMWORD[$L$rol8] 6378 vpaddd ymm3,ymm3,ymm7 6379 vpaddd ymm2,ymm2,ymm6 6380 vpaddd ymm1,ymm1,ymm5 6381 vpaddd ymm0,ymm0,ymm4 6382 vpxor ymm15,ymm15,ymm3 6383 vpxor ymm14,ymm14,ymm2 6384 vpxor ymm13,ymm13,ymm1 6385 vpxor ymm12,ymm12,ymm0 6386 vpshufb ymm15,ymm15,ymm8 6387 vpshufb ymm14,ymm14,ymm8 6388 vpshufb ymm13,ymm13,ymm8 6389 vpshufb ymm12,ymm12,ymm8 6390 vpaddd ymm11,ymm11,ymm15 6391 vpaddd ymm10,ymm10,ymm14 6392 vpaddd ymm9,ymm9,ymm13 6393 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6394 vpxor ymm7,ymm7,ymm11 6395 vpxor ymm6,ymm6,ymm10 6396 vpxor ymm5,ymm5,ymm9 6397 vpxor ymm4,ymm4,ymm8 6398 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6399 vpsrld ymm8,ymm7,25 6400 vpslld ymm7,ymm7,32-25 6401 vpxor ymm7,ymm7,ymm8 6402 vpsrld ymm8,ymm6,25 6403 vpslld ymm6,ymm6,32-25 6404 vpxor ymm6,ymm6,ymm8 6405 vpsrld ymm8,ymm5,25 6406 vpslld ymm5,ymm5,32-25 6407 vpxor ymm5,ymm5,ymm8 6408 vpsrld ymm8,ymm4,25 6409 vpslld ymm4,ymm4,32-25 6410 vpxor ymm4,ymm4,ymm8 6411 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6412 vpalignr ymm7,ymm7,ymm7,4 6413 vpalignr ymm11,ymm11,ymm11,8 6414 vpalignr ymm15,ymm15,ymm15,12 6415 vpalignr ymm6,ymm6,ymm6,4 6416 vpalignr ymm10,ymm10,ymm10,8 6417 vpalignr ymm14,ymm14,ymm14,12 6418 vpalignr ymm5,ymm5,ymm5,4 6419 vpalignr ymm9,ymm9,ymm9,8 6420 vpalignr ymm13,ymm13,ymm13,12 6421 vpalignr ymm4,ymm4,ymm4,4 6422 vpalignr ymm8,ymm8,ymm8,8 6423 vpalignr ymm12,ymm12,ymm12,12 6424 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6425 vmovdqa ymm8,YMMWORD[$L$rol16] 6426 vpaddd ymm3,ymm3,ymm7 6427 vpaddd ymm2,ymm2,ymm6 6428 vpaddd ymm1,ymm1,ymm5 6429 vpaddd ymm0,ymm0,ymm4 6430 vpxor ymm15,ymm15,ymm3 6431 vpxor ymm14,ymm14,ymm2 6432 vpxor ymm13,ymm13,ymm1 6433 vpxor ymm12,ymm12,ymm0 6434 vpshufb ymm15,ymm15,ymm8 6435 vpshufb ymm14,ymm14,ymm8 6436 vpshufb ymm13,ymm13,ymm8 6437 vpshufb ymm12,ymm12,ymm8 6438 vpaddd ymm11,ymm11,ymm15 6439 vpaddd ymm10,ymm10,ymm14 6440 vpaddd ymm9,ymm9,ymm13 6441 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6442 vpxor ymm7,ymm7,ymm11 6443 vpxor ymm6,ymm6,ymm10 6444 vpxor ymm5,ymm5,ymm9 6445 vpxor ymm4,ymm4,ymm8 6446 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6447 vpsrld ymm8,ymm7,20 6448 vpslld ymm7,ymm7,32-20 6449 vpxor ymm7,ymm7,ymm8 6450 vpsrld ymm8,ymm6,20 6451 vpslld ymm6,ymm6,32-20 6452 vpxor ymm6,ymm6,ymm8 6453 vpsrld ymm8,ymm5,20 6454 vpslld ymm5,ymm5,32-20 6455 vpxor ymm5,ymm5,ymm8 6456 vpsrld ymm8,ymm4,20 6457 vpslld ymm4,ymm4,32-20 6458 vpxor ymm4,ymm4,ymm8 6459 vmovdqa ymm8,YMMWORD[$L$rol8] 6460 vpaddd ymm3,ymm3,ymm7 6461 vpaddd ymm2,ymm2,ymm6 6462 vpaddd ymm1,ymm1,ymm5 6463 vpaddd ymm0,ymm0,ymm4 6464 vpxor ymm15,ymm15,ymm3 6465 vpxor ymm14,ymm14,ymm2 6466 vpxor ymm13,ymm13,ymm1 6467 vpxor ymm12,ymm12,ymm0 6468 vpshufb ymm15,ymm15,ymm8 6469 vpshufb ymm14,ymm14,ymm8 6470 vpshufb ymm13,ymm13,ymm8 6471 vpshufb ymm12,ymm12,ymm8 6472 vpaddd ymm11,ymm11,ymm15 6473 vpaddd ymm10,ymm10,ymm14 6474 vpaddd ymm9,ymm9,ymm13 6475 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6476 vpxor ymm7,ymm7,ymm11 6477 vpxor ymm6,ymm6,ymm10 6478 vpxor ymm5,ymm5,ymm9 6479 vpxor ymm4,ymm4,ymm8 6480 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6481 vpsrld ymm8,ymm7,25 6482 vpslld ymm7,ymm7,32-25 6483 vpxor ymm7,ymm7,ymm8 6484 vpsrld ymm8,ymm6,25 6485 vpslld ymm6,ymm6,32-25 6486 vpxor ymm6,ymm6,ymm8 6487 vpsrld ymm8,ymm5,25 6488 vpslld ymm5,ymm5,32-25 6489 vpxor ymm5,ymm5,ymm8 6490 vpsrld ymm8,ymm4,25 6491 vpslld ymm4,ymm4,32-25 6492 vpxor ymm4,ymm4,ymm8 6493 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6494 vpalignr ymm7,ymm7,ymm7,12 6495 vpalignr ymm11,ymm11,ymm11,8 6496 vpalignr ymm15,ymm15,ymm15,4 6497 vpalignr ymm6,ymm6,ymm6,12 6498 vpalignr ymm10,ymm10,ymm10,8 6499 vpalignr ymm14,ymm14,ymm14,4 6500 vpalignr ymm5,ymm5,ymm5,12 6501 vpalignr ymm9,ymm9,ymm9,8 6502 vpalignr ymm13,ymm13,ymm13,4 6503 vpalignr ymm4,ymm4,ymm4,12 6504 vpalignr ymm8,ymm8,ymm8,8 6505 vpalignr ymm12,ymm12,ymm12,4 6506 6507 dec r10 6508 jnz NEAR $L$seal_avx2_init_rounds 6509 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 6510 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 6511 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 6512 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 6513 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 6514 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 6515 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 6516 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 6517 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 6518 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 6519 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 6520 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 6521 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 6522 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 6523 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 6524 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 6525 6526 vperm2i128 ymm11,ymm15,ymm11,0x13 6527 vperm2i128 ymm15,ymm7,ymm3,0x02 6528 vperm2i128 ymm3,ymm7,ymm3,0x13 6529 vpand ymm15,ymm15,YMMWORD[$L$clamp] 6530 vmovdqa YMMWORD[(160+0)+rbp],ymm15 6531 mov r8,r8 6532 call poly_hash_ad_internal 6533 6534 vpxor ymm3,ymm3,YMMWORD[rsi] 6535 vpxor ymm11,ymm11,YMMWORD[32+rsi] 6536 vmovdqu YMMWORD[rdi],ymm3 6537 vmovdqu YMMWORD[32+rdi],ymm11 6538 vperm2i128 ymm15,ymm6,ymm2,0x02 6539 vperm2i128 ymm6,ymm6,ymm2,0x13 6540 vperm2i128 ymm2,ymm14,ymm10,0x02 6541 vperm2i128 ymm10,ymm14,ymm10,0x13 6542 vpxor ymm15,ymm15,YMMWORD[((0+64))+rsi] 6543 vpxor ymm2,ymm2,YMMWORD[((32+64))+rsi] 6544 vpxor ymm6,ymm6,YMMWORD[((64+64))+rsi] 6545 vpxor ymm10,ymm10,YMMWORD[((96+64))+rsi] 6546 vmovdqu YMMWORD[(0+64)+rdi],ymm15 6547 vmovdqu YMMWORD[(32+64)+rdi],ymm2 6548 vmovdqu YMMWORD[(64+64)+rdi],ymm6 6549 vmovdqu YMMWORD[(96+64)+rdi],ymm10 6550 vperm2i128 ymm15,ymm5,ymm1,0x02 6551 vperm2i128 ymm5,ymm5,ymm1,0x13 6552 vperm2i128 ymm1,ymm13,ymm9,0x02 6553 vperm2i128 ymm9,ymm13,ymm9,0x13 6554 vpxor ymm15,ymm15,YMMWORD[((0+192))+rsi] 6555 vpxor ymm1,ymm1,YMMWORD[((32+192))+rsi] 6556 vpxor ymm5,ymm5,YMMWORD[((64+192))+rsi] 6557 vpxor ymm9,ymm9,YMMWORD[((96+192))+rsi] 6558 vmovdqu YMMWORD[(0+192)+rdi],ymm15 6559 vmovdqu YMMWORD[(32+192)+rdi],ymm1 6560 vmovdqu YMMWORD[(64+192)+rdi],ymm5 6561 vmovdqu YMMWORD[(96+192)+rdi],ymm9 6562 vperm2i128 ymm15,ymm4,ymm0,0x13 6563 vperm2i128 ymm0,ymm4,ymm0,0x02 6564 vperm2i128 ymm4,ymm12,ymm8,0x02 6565 vperm2i128 ymm12,ymm12,ymm8,0x13 6566 vmovdqa ymm8,ymm15 6567 6568 lea rsi,[320+rsi] 6569 sub rbx,10*32 6570 mov rcx,10*32 6571 cmp rbx,4*32 6572 jbe NEAR $L$seal_avx2_short_hash_remainder 6573 vpxor ymm0,ymm0,YMMWORD[rsi] 6574 vpxor ymm4,ymm4,YMMWORD[32+rsi] 6575 vpxor ymm8,ymm8,YMMWORD[64+rsi] 6576 vpxor ymm12,ymm12,YMMWORD[96+rsi] 6577 vmovdqu YMMWORD[320+rdi],ymm0 6578 vmovdqu YMMWORD[352+rdi],ymm4 6579 vmovdqu YMMWORD[384+rdi],ymm8 6580 vmovdqu YMMWORD[416+rdi],ymm12 6581 lea rsi,[128+rsi] 6582 sub rbx,4*32 6583 mov rcx,8 6584 mov r8,2 6585 cmp rbx,4*32 6586 jbe NEAR $L$seal_avx2_tail_128 6587 cmp rbx,8*32 6588 jbe NEAR $L$seal_avx2_tail_256 6589 cmp rbx,12*32 6590 jbe NEAR $L$seal_avx2_tail_384 6591 cmp rbx,16*32 6592 jbe NEAR $L$seal_avx2_tail_512 6593 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 6594 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 6595 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 6596 vmovdqa ymm1,ymm0 6597 vmovdqa ymm5,ymm4 6598 vmovdqa ymm9,ymm8 6599 vmovdqa ymm2,ymm0 6600 vmovdqa ymm6,ymm4 6601 vmovdqa ymm10,ymm8 6602 vmovdqa ymm3,ymm0 6603 vmovdqa ymm7,ymm4 6604 vmovdqa ymm11,ymm8 6605 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 6606 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 6607 vpaddd ymm14,ymm12,ymm15 6608 vpaddd ymm13,ymm12,ymm14 6609 vpaddd ymm12,ymm12,ymm13 6610 vmovdqa YMMWORD[(160+256)+rbp],ymm15 6611 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6612 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6613 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6614 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6615 vmovdqa ymm8,YMMWORD[$L$rol16] 6616 vpaddd ymm3,ymm3,ymm7 6617 vpaddd ymm2,ymm2,ymm6 6618 vpaddd ymm1,ymm1,ymm5 6619 vpaddd ymm0,ymm0,ymm4 6620 vpxor ymm15,ymm15,ymm3 6621 vpxor ymm14,ymm14,ymm2 6622 vpxor ymm13,ymm13,ymm1 6623 vpxor ymm12,ymm12,ymm0 6624 vpshufb ymm15,ymm15,ymm8 6625 vpshufb ymm14,ymm14,ymm8 6626 vpshufb ymm13,ymm13,ymm8 6627 vpshufb ymm12,ymm12,ymm8 6628 vpaddd ymm11,ymm11,ymm15 6629 vpaddd ymm10,ymm10,ymm14 6630 vpaddd ymm9,ymm9,ymm13 6631 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6632 vpxor ymm7,ymm7,ymm11 6633 vpxor ymm6,ymm6,ymm10 6634 vpxor ymm5,ymm5,ymm9 6635 vpxor ymm4,ymm4,ymm8 6636 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6637 vpsrld ymm8,ymm7,20 6638 vpslld ymm7,ymm7,32-20 6639 vpxor ymm7,ymm7,ymm8 6640 vpsrld ymm8,ymm6,20 6641 vpslld ymm6,ymm6,32-20 6642 vpxor ymm6,ymm6,ymm8 6643 vpsrld ymm8,ymm5,20 6644 vpslld ymm5,ymm5,32-20 6645 vpxor ymm5,ymm5,ymm8 6646 vpsrld ymm8,ymm4,20 6647 vpslld ymm4,ymm4,32-20 6648 vpxor ymm4,ymm4,ymm8 6649 vmovdqa ymm8,YMMWORD[$L$rol8] 6650 vpaddd ymm3,ymm3,ymm7 6651 vpaddd ymm2,ymm2,ymm6 6652 vpaddd ymm1,ymm1,ymm5 6653 vpaddd ymm0,ymm0,ymm4 6654 vpxor ymm15,ymm15,ymm3 6655 vpxor ymm14,ymm14,ymm2 6656 vpxor ymm13,ymm13,ymm1 6657 vpxor ymm12,ymm12,ymm0 6658 vpshufb ymm15,ymm15,ymm8 6659 vpshufb ymm14,ymm14,ymm8 6660 vpshufb ymm13,ymm13,ymm8 6661 vpshufb ymm12,ymm12,ymm8 6662 vpaddd ymm11,ymm11,ymm15 6663 vpaddd ymm10,ymm10,ymm14 6664 vpaddd ymm9,ymm9,ymm13 6665 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6666 vpxor ymm7,ymm7,ymm11 6667 vpxor ymm6,ymm6,ymm10 6668 vpxor ymm5,ymm5,ymm9 6669 vpxor ymm4,ymm4,ymm8 6670 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6671 vpsrld ymm8,ymm7,25 6672 vpslld ymm7,ymm7,32-25 6673 vpxor ymm7,ymm7,ymm8 6674 vpsrld ymm8,ymm6,25 6675 vpslld ymm6,ymm6,32-25 6676 vpxor ymm6,ymm6,ymm8 6677 vpsrld ymm8,ymm5,25 6678 vpslld ymm5,ymm5,32-25 6679 vpxor ymm5,ymm5,ymm8 6680 vpsrld ymm8,ymm4,25 6681 vpslld ymm4,ymm4,32-25 6682 vpxor ymm4,ymm4,ymm8 6683 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6684 vpalignr ymm7,ymm7,ymm7,4 6685 vpalignr ymm11,ymm11,ymm11,8 6686 vpalignr ymm15,ymm15,ymm15,12 6687 vpalignr ymm6,ymm6,ymm6,4 6688 vpalignr ymm10,ymm10,ymm10,8 6689 vpalignr ymm14,ymm14,ymm14,12 6690 vpalignr ymm5,ymm5,ymm5,4 6691 vpalignr ymm9,ymm9,ymm9,8 6692 vpalignr ymm13,ymm13,ymm13,12 6693 vpalignr ymm4,ymm4,ymm4,4 6694 vpalignr ymm8,ymm8,ymm8,8 6695 vpalignr ymm12,ymm12,ymm12,12 6696 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6697 vmovdqa ymm8,YMMWORD[$L$rol16] 6698 vpaddd ymm3,ymm3,ymm7 6699 vpaddd ymm2,ymm2,ymm6 6700 vpaddd ymm1,ymm1,ymm5 6701 vpaddd ymm0,ymm0,ymm4 6702 vpxor ymm15,ymm15,ymm3 6703 vpxor ymm14,ymm14,ymm2 6704 vpxor ymm13,ymm13,ymm1 6705 vpxor ymm12,ymm12,ymm0 6706 vpshufb ymm15,ymm15,ymm8 6707 vpshufb ymm14,ymm14,ymm8 6708 vpshufb ymm13,ymm13,ymm8 6709 vpshufb ymm12,ymm12,ymm8 6710 vpaddd ymm11,ymm11,ymm15 6711 vpaddd ymm10,ymm10,ymm14 6712 vpaddd ymm9,ymm9,ymm13 6713 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6714 vpxor ymm7,ymm7,ymm11 6715 vpxor ymm6,ymm6,ymm10 6716 vpxor ymm5,ymm5,ymm9 6717 vpxor ymm4,ymm4,ymm8 6718 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6719 vpsrld ymm8,ymm7,20 6720 vpslld ymm7,ymm7,32-20 6721 vpxor ymm7,ymm7,ymm8 6722 vpsrld ymm8,ymm6,20 6723 vpslld ymm6,ymm6,32-20 6724 vpxor ymm6,ymm6,ymm8 6725 vpsrld ymm8,ymm5,20 6726 vpslld ymm5,ymm5,32-20 6727 vpxor ymm5,ymm5,ymm8 6728 vpsrld ymm8,ymm4,20 6729 vpslld ymm4,ymm4,32-20 6730 vpxor ymm4,ymm4,ymm8 6731 vmovdqa ymm8,YMMWORD[$L$rol8] 6732 vpaddd ymm3,ymm3,ymm7 6733 vpaddd ymm2,ymm2,ymm6 6734 vpaddd ymm1,ymm1,ymm5 6735 vpaddd ymm0,ymm0,ymm4 6736 vpxor ymm15,ymm15,ymm3 6737 vpxor ymm14,ymm14,ymm2 6738 vpxor ymm13,ymm13,ymm1 6739 vpxor ymm12,ymm12,ymm0 6740 vpshufb ymm15,ymm15,ymm8 6741 vpshufb ymm14,ymm14,ymm8 6742 vpshufb ymm13,ymm13,ymm8 6743 vpshufb ymm12,ymm12,ymm8 6744 vpaddd ymm11,ymm11,ymm15 6745 vpaddd ymm10,ymm10,ymm14 6746 vpaddd ymm9,ymm9,ymm13 6747 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6748 vpxor ymm7,ymm7,ymm11 6749 vpxor ymm6,ymm6,ymm10 6750 vpxor ymm5,ymm5,ymm9 6751 vpxor ymm4,ymm4,ymm8 6752 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6753 vpsrld ymm8,ymm7,25 6754 vpslld ymm7,ymm7,32-25 6755 vpxor ymm7,ymm7,ymm8 6756 vpsrld ymm8,ymm6,25 6757 vpslld ymm6,ymm6,32-25 6758 vpxor ymm6,ymm6,ymm8 6759 vpsrld ymm8,ymm5,25 6760 vpslld ymm5,ymm5,32-25 6761 vpxor ymm5,ymm5,ymm8 6762 vpsrld ymm8,ymm4,25 6763 vpslld ymm4,ymm4,32-25 6764 vpxor ymm4,ymm4,ymm8 6765 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6766 vpalignr ymm7,ymm7,ymm7,12 6767 vpalignr ymm11,ymm11,ymm11,8 6768 vpalignr ymm15,ymm15,ymm15,4 6769 vpalignr ymm6,ymm6,ymm6,12 6770 vpalignr ymm10,ymm10,ymm10,8 6771 vpalignr ymm14,ymm14,ymm14,4 6772 vpalignr ymm5,ymm5,ymm5,12 6773 vpalignr ymm9,ymm9,ymm9,8 6774 vpalignr ymm13,ymm13,ymm13,4 6775 vpalignr ymm4,ymm4,ymm4,12 6776 vpalignr ymm8,ymm8,ymm8,8 6777 vpalignr ymm12,ymm12,ymm12,4 6778 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6779 vmovdqa ymm8,YMMWORD[$L$rol16] 6780 vpaddd ymm3,ymm3,ymm7 6781 vpaddd ymm2,ymm2,ymm6 6782 vpaddd ymm1,ymm1,ymm5 6783 vpaddd ymm0,ymm0,ymm4 6784 vpxor ymm15,ymm15,ymm3 6785 vpxor ymm14,ymm14,ymm2 6786 vpxor ymm13,ymm13,ymm1 6787 vpxor ymm12,ymm12,ymm0 6788 vpshufb ymm15,ymm15,ymm8 6789 vpshufb ymm14,ymm14,ymm8 6790 vpshufb ymm13,ymm13,ymm8 6791 vpshufb ymm12,ymm12,ymm8 6792 vpaddd ymm11,ymm11,ymm15 6793 vpaddd ymm10,ymm10,ymm14 6794 vpaddd ymm9,ymm9,ymm13 6795 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6796 vpxor ymm7,ymm7,ymm11 6797 vpxor ymm6,ymm6,ymm10 6798 vpxor ymm5,ymm5,ymm9 6799 vpxor ymm4,ymm4,ymm8 6800 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6801 vpsrld ymm8,ymm7,20 6802 vpslld ymm7,ymm7,32-20 6803 vpxor ymm7,ymm7,ymm8 6804 vpsrld ymm8,ymm6,20 6805 vpslld ymm6,ymm6,32-20 6806 vpxor ymm6,ymm6,ymm8 6807 vpsrld ymm8,ymm5,20 6808 vpslld ymm5,ymm5,32-20 6809 vpxor ymm5,ymm5,ymm8 6810 vpsrld ymm8,ymm4,20 6811 vpslld ymm4,ymm4,32-20 6812 vpxor ymm4,ymm4,ymm8 6813 vmovdqa ymm8,YMMWORD[$L$rol8] 6814 vpaddd ymm3,ymm3,ymm7 6815 vpaddd ymm2,ymm2,ymm6 6816 vpaddd ymm1,ymm1,ymm5 6817 vpaddd ymm0,ymm0,ymm4 6818 vpxor ymm15,ymm15,ymm3 6819 6820 sub rdi,16 6821 mov rcx,9 6822 jmp NEAR $L$seal_avx2_main_loop_rounds_entry 6823ALIGN 32 6824$L$seal_avx2_main_loop: 6825 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 6826 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 6827 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 6828 vmovdqa ymm1,ymm0 6829 vmovdqa ymm5,ymm4 6830 vmovdqa ymm9,ymm8 6831 vmovdqa ymm2,ymm0 6832 vmovdqa ymm6,ymm4 6833 vmovdqa ymm10,ymm8 6834 vmovdqa ymm3,ymm0 6835 vmovdqa ymm7,ymm4 6836 vmovdqa ymm11,ymm8 6837 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 6838 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 6839 vpaddd ymm14,ymm12,ymm15 6840 vpaddd ymm13,ymm12,ymm14 6841 vpaddd ymm12,ymm12,ymm13 6842 vmovdqa YMMWORD[(160+256)+rbp],ymm15 6843 vmovdqa YMMWORD[(160+224)+rbp],ymm14 6844 vmovdqa YMMWORD[(160+192)+rbp],ymm13 6845 vmovdqa YMMWORD[(160+160)+rbp],ymm12 6846 6847 mov rcx,10 6848ALIGN 32 6849$L$seal_avx2_main_loop_rounds: 6850 add r10,QWORD[((0+0))+rdi] 6851 adc r11,QWORD[((8+0))+rdi] 6852 adc r12,1 6853 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6854 vmovdqa ymm8,YMMWORD[$L$rol16] 6855 vpaddd ymm3,ymm3,ymm7 6856 vpaddd ymm2,ymm2,ymm6 6857 vpaddd ymm1,ymm1,ymm5 6858 vpaddd ymm0,ymm0,ymm4 6859 vpxor ymm15,ymm15,ymm3 6860 vpxor ymm14,ymm14,ymm2 6861 vpxor ymm13,ymm13,ymm1 6862 vpxor ymm12,ymm12,ymm0 6863 mov rdx,QWORD[((0+160+0))+rbp] 6864 mov r15,rdx 6865 mulx r14,r13,r10 6866 mulx rdx,rax,r11 6867 imul r15,r12 6868 add r14,rax 6869 adc r15,rdx 6870 vpshufb ymm15,ymm15,ymm8 6871 vpshufb ymm14,ymm14,ymm8 6872 vpshufb ymm13,ymm13,ymm8 6873 vpshufb ymm12,ymm12,ymm8 6874 vpaddd ymm11,ymm11,ymm15 6875 vpaddd ymm10,ymm10,ymm14 6876 vpaddd ymm9,ymm9,ymm13 6877 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6878 vpxor ymm7,ymm7,ymm11 6879 mov rdx,QWORD[((8+160+0))+rbp] 6880 mulx rax,r10,r10 6881 add r14,r10 6882 mulx r9,r11,r11 6883 adc r15,r11 6884 adc r9,0 6885 imul rdx,r12 6886 vpxor ymm6,ymm6,ymm10 6887 vpxor ymm5,ymm5,ymm9 6888 vpxor ymm4,ymm4,ymm8 6889 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6890 vpsrld ymm8,ymm7,20 6891 vpslld ymm7,ymm7,32-20 6892 vpxor ymm7,ymm7,ymm8 6893 vpsrld ymm8,ymm6,20 6894 vpslld ymm6,ymm6,32-20 6895 vpxor ymm6,ymm6,ymm8 6896 vpsrld ymm8,ymm5,20 6897 vpslld ymm5,ymm5,32-20 6898 add r15,rax 6899 adc r9,rdx 6900 vpxor ymm5,ymm5,ymm8 6901 vpsrld ymm8,ymm4,20 6902 vpslld ymm4,ymm4,32-20 6903 vpxor ymm4,ymm4,ymm8 6904 vmovdqa ymm8,YMMWORD[$L$rol8] 6905 vpaddd ymm3,ymm3,ymm7 6906 vpaddd ymm2,ymm2,ymm6 6907 vpaddd ymm1,ymm1,ymm5 6908 vpaddd ymm0,ymm0,ymm4 6909 vpxor ymm15,ymm15,ymm3 6910 mov r10,r13 6911 mov r11,r14 6912 mov r12,r15 6913 and r12,3 6914 mov r13,r15 6915 and r13,-4 6916 mov r14,r9 6917 shrd r15,r9,2 6918 shr r9,2 6919 add r15,r13 6920 adc r9,r14 6921 add r10,r15 6922 adc r11,r9 6923 adc r12,0 6924 6925$L$seal_avx2_main_loop_rounds_entry: 6926 vpxor ymm14,ymm14,ymm2 6927 vpxor ymm13,ymm13,ymm1 6928 vpxor ymm12,ymm12,ymm0 6929 vpshufb ymm15,ymm15,ymm8 6930 vpshufb ymm14,ymm14,ymm8 6931 vpshufb ymm13,ymm13,ymm8 6932 vpshufb ymm12,ymm12,ymm8 6933 vpaddd ymm11,ymm11,ymm15 6934 vpaddd ymm10,ymm10,ymm14 6935 add r10,QWORD[((0+16))+rdi] 6936 adc r11,QWORD[((8+16))+rdi] 6937 adc r12,1 6938 vpaddd ymm9,ymm9,ymm13 6939 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 6940 vpxor ymm7,ymm7,ymm11 6941 vpxor ymm6,ymm6,ymm10 6942 vpxor ymm5,ymm5,ymm9 6943 vpxor ymm4,ymm4,ymm8 6944 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6945 vpsrld ymm8,ymm7,25 6946 mov rdx,QWORD[((0+160+0))+rbp] 6947 mov r15,rdx 6948 mulx r14,r13,r10 6949 mulx rdx,rax,r11 6950 imul r15,r12 6951 add r14,rax 6952 adc r15,rdx 6953 vpslld ymm7,ymm7,32-25 6954 vpxor ymm7,ymm7,ymm8 6955 vpsrld ymm8,ymm6,25 6956 vpslld ymm6,ymm6,32-25 6957 vpxor ymm6,ymm6,ymm8 6958 vpsrld ymm8,ymm5,25 6959 vpslld ymm5,ymm5,32-25 6960 vpxor ymm5,ymm5,ymm8 6961 vpsrld ymm8,ymm4,25 6962 vpslld ymm4,ymm4,32-25 6963 vpxor ymm4,ymm4,ymm8 6964 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 6965 vpalignr ymm7,ymm7,ymm7,4 6966 vpalignr ymm11,ymm11,ymm11,8 6967 vpalignr ymm15,ymm15,ymm15,12 6968 vpalignr ymm6,ymm6,ymm6,4 6969 vpalignr ymm10,ymm10,ymm10,8 6970 vpalignr ymm14,ymm14,ymm14,12 6971 mov rdx,QWORD[((8+160+0))+rbp] 6972 mulx rax,r10,r10 6973 add r14,r10 6974 mulx r9,r11,r11 6975 adc r15,r11 6976 adc r9,0 6977 imul rdx,r12 6978 vpalignr ymm5,ymm5,ymm5,4 6979 vpalignr ymm9,ymm9,ymm9,8 6980 vpalignr ymm13,ymm13,ymm13,12 6981 vpalignr ymm4,ymm4,ymm4,4 6982 vpalignr ymm8,ymm8,ymm8,8 6983 vpalignr ymm12,ymm12,ymm12,12 6984 vmovdqa YMMWORD[(160+128)+rbp],ymm8 6985 vmovdqa ymm8,YMMWORD[$L$rol16] 6986 vpaddd ymm3,ymm3,ymm7 6987 vpaddd ymm2,ymm2,ymm6 6988 vpaddd ymm1,ymm1,ymm5 6989 vpaddd ymm0,ymm0,ymm4 6990 vpxor ymm15,ymm15,ymm3 6991 vpxor ymm14,ymm14,ymm2 6992 vpxor ymm13,ymm13,ymm1 6993 vpxor ymm12,ymm12,ymm0 6994 vpshufb ymm15,ymm15,ymm8 6995 vpshufb ymm14,ymm14,ymm8 6996 add r15,rax 6997 adc r9,rdx 6998 vpshufb ymm13,ymm13,ymm8 6999 vpshufb ymm12,ymm12,ymm8 7000 vpaddd ymm11,ymm11,ymm15 7001 vpaddd ymm10,ymm10,ymm14 7002 vpaddd ymm9,ymm9,ymm13 7003 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 7004 vpxor ymm7,ymm7,ymm11 7005 vpxor ymm6,ymm6,ymm10 7006 vpxor ymm5,ymm5,ymm9 7007 mov r10,r13 7008 mov r11,r14 7009 mov r12,r15 7010 and r12,3 7011 mov r13,r15 7012 and r13,-4 7013 mov r14,r9 7014 shrd r15,r9,2 7015 shr r9,2 7016 add r15,r13 7017 adc r9,r14 7018 add r10,r15 7019 adc r11,r9 7020 adc r12,0 7021 vpxor ymm4,ymm4,ymm8 7022 vmovdqa YMMWORD[(160+128)+rbp],ymm8 7023 vpsrld ymm8,ymm7,20 7024 vpslld ymm7,ymm7,32-20 7025 vpxor ymm7,ymm7,ymm8 7026 vpsrld ymm8,ymm6,20 7027 vpslld ymm6,ymm6,32-20 7028 vpxor ymm6,ymm6,ymm8 7029 add r10,QWORD[((0+32))+rdi] 7030 adc r11,QWORD[((8+32))+rdi] 7031 adc r12,1 7032 7033 lea rdi,[48+rdi] 7034 vpsrld ymm8,ymm5,20 7035 vpslld ymm5,ymm5,32-20 7036 vpxor ymm5,ymm5,ymm8 7037 vpsrld ymm8,ymm4,20 7038 vpslld ymm4,ymm4,32-20 7039 vpxor ymm4,ymm4,ymm8 7040 vmovdqa ymm8,YMMWORD[$L$rol8] 7041 vpaddd ymm3,ymm3,ymm7 7042 vpaddd ymm2,ymm2,ymm6 7043 vpaddd ymm1,ymm1,ymm5 7044 vpaddd ymm0,ymm0,ymm4 7045 vpxor ymm15,ymm15,ymm3 7046 vpxor ymm14,ymm14,ymm2 7047 vpxor ymm13,ymm13,ymm1 7048 vpxor ymm12,ymm12,ymm0 7049 vpshufb ymm15,ymm15,ymm8 7050 vpshufb ymm14,ymm14,ymm8 7051 vpshufb ymm13,ymm13,ymm8 7052 mov rdx,QWORD[((0+160+0))+rbp] 7053 mov r15,rdx 7054 mulx r14,r13,r10 7055 mulx rdx,rax,r11 7056 imul r15,r12 7057 add r14,rax 7058 adc r15,rdx 7059 vpshufb ymm12,ymm12,ymm8 7060 vpaddd ymm11,ymm11,ymm15 7061 vpaddd ymm10,ymm10,ymm14 7062 vpaddd ymm9,ymm9,ymm13 7063 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 7064 vpxor ymm7,ymm7,ymm11 7065 vpxor ymm6,ymm6,ymm10 7066 vpxor ymm5,ymm5,ymm9 7067 mov rdx,QWORD[((8+160+0))+rbp] 7068 mulx rax,r10,r10 7069 add r14,r10 7070 mulx r9,r11,r11 7071 adc r15,r11 7072 adc r9,0 7073 imul rdx,r12 7074 vpxor ymm4,ymm4,ymm8 7075 vmovdqa YMMWORD[(160+128)+rbp],ymm8 7076 vpsrld ymm8,ymm7,25 7077 vpslld ymm7,ymm7,32-25 7078 vpxor ymm7,ymm7,ymm8 7079 vpsrld ymm8,ymm6,25 7080 vpslld ymm6,ymm6,32-25 7081 vpxor ymm6,ymm6,ymm8 7082 add r15,rax 7083 adc r9,rdx 7084 vpsrld ymm8,ymm5,25 7085 vpslld ymm5,ymm5,32-25 7086 vpxor ymm5,ymm5,ymm8 7087 vpsrld ymm8,ymm4,25 7088 vpslld ymm4,ymm4,32-25 7089 vpxor ymm4,ymm4,ymm8 7090 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 7091 vpalignr ymm7,ymm7,ymm7,12 7092 vpalignr ymm11,ymm11,ymm11,8 7093 vpalignr ymm15,ymm15,ymm15,4 7094 vpalignr ymm6,ymm6,ymm6,12 7095 vpalignr ymm10,ymm10,ymm10,8 7096 vpalignr ymm14,ymm14,ymm14,4 7097 vpalignr ymm5,ymm5,ymm5,12 7098 vpalignr ymm9,ymm9,ymm9,8 7099 vpalignr ymm13,ymm13,ymm13,4 7100 vpalignr ymm4,ymm4,ymm4,12 7101 vpalignr ymm8,ymm8,ymm8,8 7102 mov r10,r13 7103 mov r11,r14 7104 mov r12,r15 7105 and r12,3 7106 mov r13,r15 7107 and r13,-4 7108 mov r14,r9 7109 shrd r15,r9,2 7110 shr r9,2 7111 add r15,r13 7112 adc r9,r14 7113 add r10,r15 7114 adc r11,r9 7115 adc r12,0 7116 vpalignr ymm12,ymm12,ymm12,4 7117 7118 dec rcx 7119 jne NEAR $L$seal_avx2_main_loop_rounds 7120 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 7121 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 7122 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 7123 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 7124 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 7125 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 7126 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 7127 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 7128 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 7129 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 7130 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 7131 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 7132 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 7133 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 7134 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 7135 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7136 7137 vmovdqa YMMWORD[(160+128)+rbp],ymm0 7138 add r10,QWORD[((0+0))+rdi] 7139 adc r11,QWORD[((8+0))+rdi] 7140 adc r12,1 7141 mov rdx,QWORD[((0+160+0))+rbp] 7142 mov r15,rdx 7143 mulx r14,r13,r10 7144 mulx rdx,rax,r11 7145 imul r15,r12 7146 add r14,rax 7147 adc r15,rdx 7148 mov rdx,QWORD[((8+160+0))+rbp] 7149 mulx rax,r10,r10 7150 add r14,r10 7151 mulx r9,r11,r11 7152 adc r15,r11 7153 adc r9,0 7154 imul rdx,r12 7155 add r15,rax 7156 adc r9,rdx 7157 mov r10,r13 7158 mov r11,r14 7159 mov r12,r15 7160 and r12,3 7161 mov r13,r15 7162 and r13,-4 7163 mov r14,r9 7164 shrd r15,r9,2 7165 shr r9,2 7166 add r15,r13 7167 adc r9,r14 7168 add r10,r15 7169 adc r11,r9 7170 adc r12,0 7171 add r10,QWORD[((0+16))+rdi] 7172 adc r11,QWORD[((8+16))+rdi] 7173 adc r12,1 7174 mov rdx,QWORD[((0+160+0))+rbp] 7175 mov r15,rdx 7176 mulx r14,r13,r10 7177 mulx rdx,rax,r11 7178 imul r15,r12 7179 add r14,rax 7180 adc r15,rdx 7181 mov rdx,QWORD[((8+160+0))+rbp] 7182 mulx rax,r10,r10 7183 add r14,r10 7184 mulx r9,r11,r11 7185 adc r15,r11 7186 adc r9,0 7187 imul rdx,r12 7188 add r15,rax 7189 adc r9,rdx 7190 mov r10,r13 7191 mov r11,r14 7192 mov r12,r15 7193 and r12,3 7194 mov r13,r15 7195 and r13,-4 7196 mov r14,r9 7197 shrd r15,r9,2 7198 shr r9,2 7199 add r15,r13 7200 adc r9,r14 7201 add r10,r15 7202 adc r11,r9 7203 adc r12,0 7204 7205 lea rdi,[32+rdi] 7206 vperm2i128 ymm0,ymm7,ymm3,0x02 7207 vperm2i128 ymm7,ymm7,ymm3,0x13 7208 vperm2i128 ymm3,ymm15,ymm11,0x02 7209 vperm2i128 ymm11,ymm15,ymm11,0x13 7210 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 7211 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 7212 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 7213 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 7214 vmovdqu YMMWORD[(0+0)+rdi],ymm0 7215 vmovdqu YMMWORD[(32+0)+rdi],ymm3 7216 vmovdqu YMMWORD[(64+0)+rdi],ymm7 7217 vmovdqu YMMWORD[(96+0)+rdi],ymm11 7218 7219 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 7220 vperm2i128 ymm3,ymm6,ymm2,0x02 7221 vperm2i128 ymm6,ymm6,ymm2,0x13 7222 vperm2i128 ymm2,ymm14,ymm10,0x02 7223 vperm2i128 ymm10,ymm14,ymm10,0x13 7224 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 7225 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 7226 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 7227 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 7228 vmovdqu YMMWORD[(0+128)+rdi],ymm3 7229 vmovdqu YMMWORD[(32+128)+rdi],ymm2 7230 vmovdqu YMMWORD[(64+128)+rdi],ymm6 7231 vmovdqu YMMWORD[(96+128)+rdi],ymm10 7232 vperm2i128 ymm3,ymm5,ymm1,0x02 7233 vperm2i128 ymm5,ymm5,ymm1,0x13 7234 vperm2i128 ymm1,ymm13,ymm9,0x02 7235 vperm2i128 ymm9,ymm13,ymm9,0x13 7236 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 7237 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 7238 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 7239 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 7240 vmovdqu YMMWORD[(0+256)+rdi],ymm3 7241 vmovdqu YMMWORD[(32+256)+rdi],ymm1 7242 vmovdqu YMMWORD[(64+256)+rdi],ymm5 7243 vmovdqu YMMWORD[(96+256)+rdi],ymm9 7244 vperm2i128 ymm3,ymm4,ymm0,0x02 7245 vperm2i128 ymm4,ymm4,ymm0,0x13 7246 vperm2i128 ymm0,ymm12,ymm8,0x02 7247 vperm2i128 ymm8,ymm12,ymm8,0x13 7248 vpxor ymm3,ymm3,YMMWORD[((0+384))+rsi] 7249 vpxor ymm0,ymm0,YMMWORD[((32+384))+rsi] 7250 vpxor ymm4,ymm4,YMMWORD[((64+384))+rsi] 7251 vpxor ymm8,ymm8,YMMWORD[((96+384))+rsi] 7252 vmovdqu YMMWORD[(0+384)+rdi],ymm3 7253 vmovdqu YMMWORD[(32+384)+rdi],ymm0 7254 vmovdqu YMMWORD[(64+384)+rdi],ymm4 7255 vmovdqu YMMWORD[(96+384)+rdi],ymm8 7256 7257 lea rsi,[512+rsi] 7258 sub rbx,16*32 7259 cmp rbx,16*32 7260 jg NEAR $L$seal_avx2_main_loop 7261 7262 add r10,QWORD[((0+0))+rdi] 7263 adc r11,QWORD[((8+0))+rdi] 7264 adc r12,1 7265 mov rdx,QWORD[((0+160+0))+rbp] 7266 mov r15,rdx 7267 mulx r14,r13,r10 7268 mulx rdx,rax,r11 7269 imul r15,r12 7270 add r14,rax 7271 adc r15,rdx 7272 mov rdx,QWORD[((8+160+0))+rbp] 7273 mulx rax,r10,r10 7274 add r14,r10 7275 mulx r9,r11,r11 7276 adc r15,r11 7277 adc r9,0 7278 imul rdx,r12 7279 add r15,rax 7280 adc r9,rdx 7281 mov r10,r13 7282 mov r11,r14 7283 mov r12,r15 7284 and r12,3 7285 mov r13,r15 7286 and r13,-4 7287 mov r14,r9 7288 shrd r15,r9,2 7289 shr r9,2 7290 add r15,r13 7291 adc r9,r14 7292 add r10,r15 7293 adc r11,r9 7294 adc r12,0 7295 add r10,QWORD[((0+16))+rdi] 7296 adc r11,QWORD[((8+16))+rdi] 7297 adc r12,1 7298 mov rdx,QWORD[((0+160+0))+rbp] 7299 mov r15,rdx 7300 mulx r14,r13,r10 7301 mulx rdx,rax,r11 7302 imul r15,r12 7303 add r14,rax 7304 adc r15,rdx 7305 mov rdx,QWORD[((8+160+0))+rbp] 7306 mulx rax,r10,r10 7307 add r14,r10 7308 mulx r9,r11,r11 7309 adc r15,r11 7310 adc r9,0 7311 imul rdx,r12 7312 add r15,rax 7313 adc r9,rdx 7314 mov r10,r13 7315 mov r11,r14 7316 mov r12,r15 7317 and r12,3 7318 mov r13,r15 7319 and r13,-4 7320 mov r14,r9 7321 shrd r15,r9,2 7322 shr r9,2 7323 add r15,r13 7324 adc r9,r14 7325 add r10,r15 7326 adc r11,r9 7327 adc r12,0 7328 7329 lea rdi,[32+rdi] 7330 mov rcx,10 7331 xor r8,r8 7332 7333 cmp rbx,12*32 7334 ja NEAR $L$seal_avx2_tail_512 7335 cmp rbx,8*32 7336 ja NEAR $L$seal_avx2_tail_384 7337 cmp rbx,4*32 7338 ja NEAR $L$seal_avx2_tail_256 7339 7340$L$seal_avx2_tail_128: 7341 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 7342 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 7343 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 7344 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 7345 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7346 vmovdqa YMMWORD[(160+160)+rbp],ymm12 7347 7348$L$seal_avx2_tail_128_rounds_and_3xhash: 7349 add r10,QWORD[((0+0))+rdi] 7350 adc r11,QWORD[((8+0))+rdi] 7351 adc r12,1 7352 mov rdx,QWORD[((0+160+0))+rbp] 7353 mov r15,rdx 7354 mulx r14,r13,r10 7355 mulx rdx,rax,r11 7356 imul r15,r12 7357 add r14,rax 7358 adc r15,rdx 7359 mov rdx,QWORD[((8+160+0))+rbp] 7360 mulx rax,r10,r10 7361 add r14,r10 7362 mulx r9,r11,r11 7363 adc r15,r11 7364 adc r9,0 7365 imul rdx,r12 7366 add r15,rax 7367 adc r9,rdx 7368 mov r10,r13 7369 mov r11,r14 7370 mov r12,r15 7371 and r12,3 7372 mov r13,r15 7373 and r13,-4 7374 mov r14,r9 7375 shrd r15,r9,2 7376 shr r9,2 7377 add r15,r13 7378 adc r9,r14 7379 add r10,r15 7380 adc r11,r9 7381 adc r12,0 7382 7383 lea rdi,[16+rdi] 7384$L$seal_avx2_tail_128_rounds_and_2xhash: 7385 vpaddd ymm0,ymm0,ymm4 7386 vpxor ymm12,ymm12,ymm0 7387 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7388 vpaddd ymm8,ymm8,ymm12 7389 vpxor ymm4,ymm4,ymm8 7390 vpsrld ymm3,ymm4,20 7391 vpslld ymm4,ymm4,12 7392 vpxor ymm4,ymm4,ymm3 7393 vpaddd ymm0,ymm0,ymm4 7394 vpxor ymm12,ymm12,ymm0 7395 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7396 vpaddd ymm8,ymm8,ymm12 7397 vpxor ymm4,ymm4,ymm8 7398 vpslld ymm3,ymm4,7 7399 vpsrld ymm4,ymm4,25 7400 vpxor ymm4,ymm4,ymm3 7401 vpalignr ymm12,ymm12,ymm12,12 7402 vpalignr ymm8,ymm8,ymm8,8 7403 vpalignr ymm4,ymm4,ymm4,4 7404 add r10,QWORD[((0+0))+rdi] 7405 adc r11,QWORD[((8+0))+rdi] 7406 adc r12,1 7407 mov rdx,QWORD[((0+160+0))+rbp] 7408 mov r15,rdx 7409 mulx r14,r13,r10 7410 mulx rdx,rax,r11 7411 imul r15,r12 7412 add r14,rax 7413 adc r15,rdx 7414 mov rdx,QWORD[((8+160+0))+rbp] 7415 mulx rax,r10,r10 7416 add r14,r10 7417 mulx r9,r11,r11 7418 adc r15,r11 7419 adc r9,0 7420 imul rdx,r12 7421 add r15,rax 7422 adc r9,rdx 7423 mov r10,r13 7424 mov r11,r14 7425 mov r12,r15 7426 and r12,3 7427 mov r13,r15 7428 and r13,-4 7429 mov r14,r9 7430 shrd r15,r9,2 7431 shr r9,2 7432 add r15,r13 7433 adc r9,r14 7434 add r10,r15 7435 adc r11,r9 7436 adc r12,0 7437 vpaddd ymm0,ymm0,ymm4 7438 vpxor ymm12,ymm12,ymm0 7439 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7440 vpaddd ymm8,ymm8,ymm12 7441 vpxor ymm4,ymm4,ymm8 7442 vpsrld ymm3,ymm4,20 7443 vpslld ymm4,ymm4,12 7444 vpxor ymm4,ymm4,ymm3 7445 vpaddd ymm0,ymm0,ymm4 7446 vpxor ymm12,ymm12,ymm0 7447 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7448 vpaddd ymm8,ymm8,ymm12 7449 vpxor ymm4,ymm4,ymm8 7450 vpslld ymm3,ymm4,7 7451 vpsrld ymm4,ymm4,25 7452 vpxor ymm4,ymm4,ymm3 7453 vpalignr ymm12,ymm12,ymm12,4 7454 vpalignr ymm8,ymm8,ymm8,8 7455 vpalignr ymm4,ymm4,ymm4,12 7456 add r10,QWORD[((0+16))+rdi] 7457 adc r11,QWORD[((8+16))+rdi] 7458 adc r12,1 7459 mov rdx,QWORD[((0+160+0))+rbp] 7460 mov r15,rdx 7461 mulx r14,r13,r10 7462 mulx rdx,rax,r11 7463 imul r15,r12 7464 add r14,rax 7465 adc r15,rdx 7466 mov rdx,QWORD[((8+160+0))+rbp] 7467 mulx rax,r10,r10 7468 add r14,r10 7469 mulx r9,r11,r11 7470 adc r15,r11 7471 adc r9,0 7472 imul rdx,r12 7473 add r15,rax 7474 adc r9,rdx 7475 mov r10,r13 7476 mov r11,r14 7477 mov r12,r15 7478 and r12,3 7479 mov r13,r15 7480 and r13,-4 7481 mov r14,r9 7482 shrd r15,r9,2 7483 shr r9,2 7484 add r15,r13 7485 adc r9,r14 7486 add r10,r15 7487 adc r11,r9 7488 adc r12,0 7489 7490 lea rdi,[32+rdi] 7491 dec rcx 7492 jg NEAR $L$seal_avx2_tail_128_rounds_and_3xhash 7493 dec r8 7494 jge NEAR $L$seal_avx2_tail_128_rounds_and_2xhash 7495 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 7496 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 7497 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 7498 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7499 vperm2i128 ymm3,ymm4,ymm0,0x13 7500 vperm2i128 ymm0,ymm4,ymm0,0x02 7501 vperm2i128 ymm4,ymm12,ymm8,0x02 7502 vperm2i128 ymm12,ymm12,ymm8,0x13 7503 vmovdqa ymm8,ymm3 7504 7505 jmp NEAR $L$seal_avx2_short_loop 7506 7507$L$seal_avx2_tail_256: 7508 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 7509 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 7510 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 7511 vmovdqa ymm1,ymm0 7512 vmovdqa ymm5,ymm4 7513 vmovdqa ymm9,ymm8 7514 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 7515 vpaddd ymm13,ymm12,YMMWORD[((160+160))+rbp] 7516 vpaddd ymm12,ymm12,ymm13 7517 vmovdqa YMMWORD[(160+160)+rbp],ymm12 7518 vmovdqa YMMWORD[(160+192)+rbp],ymm13 7519 7520$L$seal_avx2_tail_256_rounds_and_3xhash: 7521 add r10,QWORD[((0+0))+rdi] 7522 adc r11,QWORD[((8+0))+rdi] 7523 adc r12,1 7524 mov rax,QWORD[((0+160+0))+rbp] 7525 mov r15,rax 7526 mul r10 7527 mov r13,rax 7528 mov r14,rdx 7529 mov rax,QWORD[((0+160+0))+rbp] 7530 mul r11 7531 imul r15,r12 7532 add r14,rax 7533 adc r15,rdx 7534 mov rax,QWORD[((8+160+0))+rbp] 7535 mov r9,rax 7536 mul r10 7537 add r14,rax 7538 adc rdx,0 7539 mov r10,rdx 7540 mov rax,QWORD[((8+160+0))+rbp] 7541 mul r11 7542 add r15,rax 7543 adc rdx,0 7544 imul r9,r12 7545 add r15,r10 7546 adc r9,rdx 7547 mov r10,r13 7548 mov r11,r14 7549 mov r12,r15 7550 and r12,3 7551 mov r13,r15 7552 and r13,-4 7553 mov r14,r9 7554 shrd r15,r9,2 7555 shr r9,2 7556 add r15,r13 7557 adc r9,r14 7558 add r10,r15 7559 adc r11,r9 7560 adc r12,0 7561 7562 lea rdi,[16+rdi] 7563$L$seal_avx2_tail_256_rounds_and_2xhash: 7564 vpaddd ymm0,ymm0,ymm4 7565 vpxor ymm12,ymm12,ymm0 7566 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7567 vpaddd ymm8,ymm8,ymm12 7568 vpxor ymm4,ymm4,ymm8 7569 vpsrld ymm3,ymm4,20 7570 vpslld ymm4,ymm4,12 7571 vpxor ymm4,ymm4,ymm3 7572 vpaddd ymm0,ymm0,ymm4 7573 vpxor ymm12,ymm12,ymm0 7574 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7575 vpaddd ymm8,ymm8,ymm12 7576 vpxor ymm4,ymm4,ymm8 7577 vpslld ymm3,ymm4,7 7578 vpsrld ymm4,ymm4,25 7579 vpxor ymm4,ymm4,ymm3 7580 vpalignr ymm12,ymm12,ymm12,12 7581 vpalignr ymm8,ymm8,ymm8,8 7582 vpalignr ymm4,ymm4,ymm4,4 7583 vpaddd ymm1,ymm1,ymm5 7584 vpxor ymm13,ymm13,ymm1 7585 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7586 vpaddd ymm9,ymm9,ymm13 7587 vpxor ymm5,ymm5,ymm9 7588 vpsrld ymm3,ymm5,20 7589 vpslld ymm5,ymm5,12 7590 vpxor ymm5,ymm5,ymm3 7591 vpaddd ymm1,ymm1,ymm5 7592 vpxor ymm13,ymm13,ymm1 7593 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7594 vpaddd ymm9,ymm9,ymm13 7595 vpxor ymm5,ymm5,ymm9 7596 vpslld ymm3,ymm5,7 7597 vpsrld ymm5,ymm5,25 7598 vpxor ymm5,ymm5,ymm3 7599 vpalignr ymm13,ymm13,ymm13,12 7600 vpalignr ymm9,ymm9,ymm9,8 7601 vpalignr ymm5,ymm5,ymm5,4 7602 add r10,QWORD[((0+0))+rdi] 7603 adc r11,QWORD[((8+0))+rdi] 7604 adc r12,1 7605 mov rax,QWORD[((0+160+0))+rbp] 7606 mov r15,rax 7607 mul r10 7608 mov r13,rax 7609 mov r14,rdx 7610 mov rax,QWORD[((0+160+0))+rbp] 7611 mul r11 7612 imul r15,r12 7613 add r14,rax 7614 adc r15,rdx 7615 mov rax,QWORD[((8+160+0))+rbp] 7616 mov r9,rax 7617 mul r10 7618 add r14,rax 7619 adc rdx,0 7620 mov r10,rdx 7621 mov rax,QWORD[((8+160+0))+rbp] 7622 mul r11 7623 add r15,rax 7624 adc rdx,0 7625 imul r9,r12 7626 add r15,r10 7627 adc r9,rdx 7628 mov r10,r13 7629 mov r11,r14 7630 mov r12,r15 7631 and r12,3 7632 mov r13,r15 7633 and r13,-4 7634 mov r14,r9 7635 shrd r15,r9,2 7636 shr r9,2 7637 add r15,r13 7638 adc r9,r14 7639 add r10,r15 7640 adc r11,r9 7641 adc r12,0 7642 vpaddd ymm0,ymm0,ymm4 7643 vpxor ymm12,ymm12,ymm0 7644 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7645 vpaddd ymm8,ymm8,ymm12 7646 vpxor ymm4,ymm4,ymm8 7647 vpsrld ymm3,ymm4,20 7648 vpslld ymm4,ymm4,12 7649 vpxor ymm4,ymm4,ymm3 7650 vpaddd ymm0,ymm0,ymm4 7651 vpxor ymm12,ymm12,ymm0 7652 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7653 vpaddd ymm8,ymm8,ymm12 7654 vpxor ymm4,ymm4,ymm8 7655 vpslld ymm3,ymm4,7 7656 vpsrld ymm4,ymm4,25 7657 vpxor ymm4,ymm4,ymm3 7658 vpalignr ymm12,ymm12,ymm12,4 7659 vpalignr ymm8,ymm8,ymm8,8 7660 vpalignr ymm4,ymm4,ymm4,12 7661 vpaddd ymm1,ymm1,ymm5 7662 vpxor ymm13,ymm13,ymm1 7663 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7664 vpaddd ymm9,ymm9,ymm13 7665 vpxor ymm5,ymm5,ymm9 7666 vpsrld ymm3,ymm5,20 7667 vpslld ymm5,ymm5,12 7668 vpxor ymm5,ymm5,ymm3 7669 vpaddd ymm1,ymm1,ymm5 7670 vpxor ymm13,ymm13,ymm1 7671 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7672 vpaddd ymm9,ymm9,ymm13 7673 vpxor ymm5,ymm5,ymm9 7674 vpslld ymm3,ymm5,7 7675 vpsrld ymm5,ymm5,25 7676 vpxor ymm5,ymm5,ymm3 7677 vpalignr ymm13,ymm13,ymm13,4 7678 vpalignr ymm9,ymm9,ymm9,8 7679 vpalignr ymm5,ymm5,ymm5,12 7680 add r10,QWORD[((0+16))+rdi] 7681 adc r11,QWORD[((8+16))+rdi] 7682 adc r12,1 7683 mov rax,QWORD[((0+160+0))+rbp] 7684 mov r15,rax 7685 mul r10 7686 mov r13,rax 7687 mov r14,rdx 7688 mov rax,QWORD[((0+160+0))+rbp] 7689 mul r11 7690 imul r15,r12 7691 add r14,rax 7692 adc r15,rdx 7693 mov rax,QWORD[((8+160+0))+rbp] 7694 mov r9,rax 7695 mul r10 7696 add r14,rax 7697 adc rdx,0 7698 mov r10,rdx 7699 mov rax,QWORD[((8+160+0))+rbp] 7700 mul r11 7701 add r15,rax 7702 adc rdx,0 7703 imul r9,r12 7704 add r15,r10 7705 adc r9,rdx 7706 mov r10,r13 7707 mov r11,r14 7708 mov r12,r15 7709 and r12,3 7710 mov r13,r15 7711 and r13,-4 7712 mov r14,r9 7713 shrd r15,r9,2 7714 shr r9,2 7715 add r15,r13 7716 adc r9,r14 7717 add r10,r15 7718 adc r11,r9 7719 adc r12,0 7720 7721 lea rdi,[32+rdi] 7722 dec rcx 7723 jg NEAR $L$seal_avx2_tail_256_rounds_and_3xhash 7724 dec r8 7725 jge NEAR $L$seal_avx2_tail_256_rounds_and_2xhash 7726 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 7727 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 7728 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 7729 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 7730 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 7731 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 7732 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 7733 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 7734 vperm2i128 ymm3,ymm5,ymm1,0x02 7735 vperm2i128 ymm5,ymm5,ymm1,0x13 7736 vperm2i128 ymm1,ymm13,ymm9,0x02 7737 vperm2i128 ymm9,ymm13,ymm9,0x13 7738 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 7739 vpxor ymm1,ymm1,YMMWORD[((32+0))+rsi] 7740 vpxor ymm5,ymm5,YMMWORD[((64+0))+rsi] 7741 vpxor ymm9,ymm9,YMMWORD[((96+0))+rsi] 7742 vmovdqu YMMWORD[(0+0)+rdi],ymm3 7743 vmovdqu YMMWORD[(32+0)+rdi],ymm1 7744 vmovdqu YMMWORD[(64+0)+rdi],ymm5 7745 vmovdqu YMMWORD[(96+0)+rdi],ymm9 7746 vperm2i128 ymm3,ymm4,ymm0,0x13 7747 vperm2i128 ymm0,ymm4,ymm0,0x02 7748 vperm2i128 ymm4,ymm12,ymm8,0x02 7749 vperm2i128 ymm12,ymm12,ymm8,0x13 7750 vmovdqa ymm8,ymm3 7751 7752 mov rcx,4*32 7753 lea rsi,[128+rsi] 7754 sub rbx,4*32 7755 jmp NEAR $L$seal_avx2_short_hash_remainder 7756 7757$L$seal_avx2_tail_384: 7758 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 7759 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 7760 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 7761 vmovdqa ymm1,ymm0 7762 vmovdqa ymm5,ymm4 7763 vmovdqa ymm9,ymm8 7764 vmovdqa ymm2,ymm0 7765 vmovdqa ymm6,ymm4 7766 vmovdqa ymm10,ymm8 7767 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 7768 vpaddd ymm14,ymm12,YMMWORD[((160+160))+rbp] 7769 vpaddd ymm13,ymm12,ymm14 7770 vpaddd ymm12,ymm12,ymm13 7771 vmovdqa YMMWORD[(160+160)+rbp],ymm12 7772 vmovdqa YMMWORD[(160+192)+rbp],ymm13 7773 vmovdqa YMMWORD[(160+224)+rbp],ymm14 7774 7775$L$seal_avx2_tail_384_rounds_and_3xhash: 7776 add r10,QWORD[((0+0))+rdi] 7777 adc r11,QWORD[((8+0))+rdi] 7778 adc r12,1 7779 mov rax,QWORD[((0+160+0))+rbp] 7780 mov r15,rax 7781 mul r10 7782 mov r13,rax 7783 mov r14,rdx 7784 mov rax,QWORD[((0+160+0))+rbp] 7785 mul r11 7786 imul r15,r12 7787 add r14,rax 7788 adc r15,rdx 7789 mov rax,QWORD[((8+160+0))+rbp] 7790 mov r9,rax 7791 mul r10 7792 add r14,rax 7793 adc rdx,0 7794 mov r10,rdx 7795 mov rax,QWORD[((8+160+0))+rbp] 7796 mul r11 7797 add r15,rax 7798 adc rdx,0 7799 imul r9,r12 7800 add r15,r10 7801 adc r9,rdx 7802 mov r10,r13 7803 mov r11,r14 7804 mov r12,r15 7805 and r12,3 7806 mov r13,r15 7807 and r13,-4 7808 mov r14,r9 7809 shrd r15,r9,2 7810 shr r9,2 7811 add r15,r13 7812 adc r9,r14 7813 add r10,r15 7814 adc r11,r9 7815 adc r12,0 7816 7817 lea rdi,[16+rdi] 7818$L$seal_avx2_tail_384_rounds_and_2xhash: 7819 vpaddd ymm0,ymm0,ymm4 7820 vpxor ymm12,ymm12,ymm0 7821 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7822 vpaddd ymm8,ymm8,ymm12 7823 vpxor ymm4,ymm4,ymm8 7824 vpsrld ymm3,ymm4,20 7825 vpslld ymm4,ymm4,12 7826 vpxor ymm4,ymm4,ymm3 7827 vpaddd ymm0,ymm0,ymm4 7828 vpxor ymm12,ymm12,ymm0 7829 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7830 vpaddd ymm8,ymm8,ymm12 7831 vpxor ymm4,ymm4,ymm8 7832 vpslld ymm3,ymm4,7 7833 vpsrld ymm4,ymm4,25 7834 vpxor ymm4,ymm4,ymm3 7835 vpalignr ymm12,ymm12,ymm12,12 7836 vpalignr ymm8,ymm8,ymm8,8 7837 vpalignr ymm4,ymm4,ymm4,4 7838 vpaddd ymm1,ymm1,ymm5 7839 vpxor ymm13,ymm13,ymm1 7840 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7841 vpaddd ymm9,ymm9,ymm13 7842 vpxor ymm5,ymm5,ymm9 7843 vpsrld ymm3,ymm5,20 7844 vpslld ymm5,ymm5,12 7845 vpxor ymm5,ymm5,ymm3 7846 vpaddd ymm1,ymm1,ymm5 7847 vpxor ymm13,ymm13,ymm1 7848 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7849 vpaddd ymm9,ymm9,ymm13 7850 vpxor ymm5,ymm5,ymm9 7851 vpslld ymm3,ymm5,7 7852 vpsrld ymm5,ymm5,25 7853 vpxor ymm5,ymm5,ymm3 7854 vpalignr ymm13,ymm13,ymm13,12 7855 vpalignr ymm9,ymm9,ymm9,8 7856 vpalignr ymm5,ymm5,ymm5,4 7857 add r10,QWORD[((0+0))+rdi] 7858 adc r11,QWORD[((8+0))+rdi] 7859 adc r12,1 7860 mov rax,QWORD[((0+160+0))+rbp] 7861 mov r15,rax 7862 mul r10 7863 mov r13,rax 7864 mov r14,rdx 7865 mov rax,QWORD[((0+160+0))+rbp] 7866 mul r11 7867 imul r15,r12 7868 add r14,rax 7869 adc r15,rdx 7870 mov rax,QWORD[((8+160+0))+rbp] 7871 mov r9,rax 7872 mul r10 7873 add r14,rax 7874 adc rdx,0 7875 mov r10,rdx 7876 mov rax,QWORD[((8+160+0))+rbp] 7877 mul r11 7878 add r15,rax 7879 adc rdx,0 7880 imul r9,r12 7881 add r15,r10 7882 adc r9,rdx 7883 mov r10,r13 7884 mov r11,r14 7885 mov r12,r15 7886 and r12,3 7887 mov r13,r15 7888 and r13,-4 7889 mov r14,r9 7890 shrd r15,r9,2 7891 shr r9,2 7892 add r15,r13 7893 adc r9,r14 7894 add r10,r15 7895 adc r11,r9 7896 adc r12,0 7897 vpaddd ymm2,ymm2,ymm6 7898 vpxor ymm14,ymm14,ymm2 7899 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 7900 vpaddd ymm10,ymm10,ymm14 7901 vpxor ymm6,ymm6,ymm10 7902 vpsrld ymm3,ymm6,20 7903 vpslld ymm6,ymm6,12 7904 vpxor ymm6,ymm6,ymm3 7905 vpaddd ymm2,ymm2,ymm6 7906 vpxor ymm14,ymm14,ymm2 7907 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 7908 vpaddd ymm10,ymm10,ymm14 7909 vpxor ymm6,ymm6,ymm10 7910 vpslld ymm3,ymm6,7 7911 vpsrld ymm6,ymm6,25 7912 vpxor ymm6,ymm6,ymm3 7913 vpalignr ymm14,ymm14,ymm14,12 7914 vpalignr ymm10,ymm10,ymm10,8 7915 vpalignr ymm6,ymm6,ymm6,4 7916 vpaddd ymm0,ymm0,ymm4 7917 vpxor ymm12,ymm12,ymm0 7918 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 7919 vpaddd ymm8,ymm8,ymm12 7920 vpxor ymm4,ymm4,ymm8 7921 vpsrld ymm3,ymm4,20 7922 vpslld ymm4,ymm4,12 7923 vpxor ymm4,ymm4,ymm3 7924 vpaddd ymm0,ymm0,ymm4 7925 vpxor ymm12,ymm12,ymm0 7926 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 7927 vpaddd ymm8,ymm8,ymm12 7928 vpxor ymm4,ymm4,ymm8 7929 vpslld ymm3,ymm4,7 7930 vpsrld ymm4,ymm4,25 7931 vpxor ymm4,ymm4,ymm3 7932 vpalignr ymm12,ymm12,ymm12,4 7933 vpalignr ymm8,ymm8,ymm8,8 7934 vpalignr ymm4,ymm4,ymm4,12 7935 add r10,QWORD[((0+16))+rdi] 7936 adc r11,QWORD[((8+16))+rdi] 7937 adc r12,1 7938 mov rax,QWORD[((0+160+0))+rbp] 7939 mov r15,rax 7940 mul r10 7941 mov r13,rax 7942 mov r14,rdx 7943 mov rax,QWORD[((0+160+0))+rbp] 7944 mul r11 7945 imul r15,r12 7946 add r14,rax 7947 adc r15,rdx 7948 mov rax,QWORD[((8+160+0))+rbp] 7949 mov r9,rax 7950 mul r10 7951 add r14,rax 7952 adc rdx,0 7953 mov r10,rdx 7954 mov rax,QWORD[((8+160+0))+rbp] 7955 mul r11 7956 add r15,rax 7957 adc rdx,0 7958 imul r9,r12 7959 add r15,r10 7960 adc r9,rdx 7961 mov r10,r13 7962 mov r11,r14 7963 mov r12,r15 7964 and r12,3 7965 mov r13,r15 7966 and r13,-4 7967 mov r14,r9 7968 shrd r15,r9,2 7969 shr r9,2 7970 add r15,r13 7971 adc r9,r14 7972 add r10,r15 7973 adc r11,r9 7974 adc r12,0 7975 vpaddd ymm1,ymm1,ymm5 7976 vpxor ymm13,ymm13,ymm1 7977 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 7978 vpaddd ymm9,ymm9,ymm13 7979 vpxor ymm5,ymm5,ymm9 7980 vpsrld ymm3,ymm5,20 7981 vpslld ymm5,ymm5,12 7982 vpxor ymm5,ymm5,ymm3 7983 vpaddd ymm1,ymm1,ymm5 7984 vpxor ymm13,ymm13,ymm1 7985 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 7986 vpaddd ymm9,ymm9,ymm13 7987 vpxor ymm5,ymm5,ymm9 7988 vpslld ymm3,ymm5,7 7989 vpsrld ymm5,ymm5,25 7990 vpxor ymm5,ymm5,ymm3 7991 vpalignr ymm13,ymm13,ymm13,4 7992 vpalignr ymm9,ymm9,ymm9,8 7993 vpalignr ymm5,ymm5,ymm5,12 7994 vpaddd ymm2,ymm2,ymm6 7995 vpxor ymm14,ymm14,ymm2 7996 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 7997 vpaddd ymm10,ymm10,ymm14 7998 vpxor ymm6,ymm6,ymm10 7999 vpsrld ymm3,ymm6,20 8000 vpslld ymm6,ymm6,12 8001 vpxor ymm6,ymm6,ymm3 8002 vpaddd ymm2,ymm2,ymm6 8003 vpxor ymm14,ymm14,ymm2 8004 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 8005 vpaddd ymm10,ymm10,ymm14 8006 vpxor ymm6,ymm6,ymm10 8007 vpslld ymm3,ymm6,7 8008 vpsrld ymm6,ymm6,25 8009 vpxor ymm6,ymm6,ymm3 8010 vpalignr ymm14,ymm14,ymm14,4 8011 vpalignr ymm10,ymm10,ymm10,8 8012 vpalignr ymm6,ymm6,ymm6,12 8013 8014 lea rdi,[32+rdi] 8015 dec rcx 8016 jg NEAR $L$seal_avx2_tail_384_rounds_and_3xhash 8017 dec r8 8018 jge NEAR $L$seal_avx2_tail_384_rounds_and_2xhash 8019 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 8020 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 8021 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 8022 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 8023 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 8024 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 8025 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 8026 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 8027 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 8028 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 8029 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 8030 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 8031 vperm2i128 ymm3,ymm6,ymm2,0x02 8032 vperm2i128 ymm6,ymm6,ymm2,0x13 8033 vperm2i128 ymm2,ymm14,ymm10,0x02 8034 vperm2i128 ymm10,ymm14,ymm10,0x13 8035 vpxor ymm3,ymm3,YMMWORD[((0+0))+rsi] 8036 vpxor ymm2,ymm2,YMMWORD[((32+0))+rsi] 8037 vpxor ymm6,ymm6,YMMWORD[((64+0))+rsi] 8038 vpxor ymm10,ymm10,YMMWORD[((96+0))+rsi] 8039 vmovdqu YMMWORD[(0+0)+rdi],ymm3 8040 vmovdqu YMMWORD[(32+0)+rdi],ymm2 8041 vmovdqu YMMWORD[(64+0)+rdi],ymm6 8042 vmovdqu YMMWORD[(96+0)+rdi],ymm10 8043 vperm2i128 ymm3,ymm5,ymm1,0x02 8044 vperm2i128 ymm5,ymm5,ymm1,0x13 8045 vperm2i128 ymm1,ymm13,ymm9,0x02 8046 vperm2i128 ymm9,ymm13,ymm9,0x13 8047 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 8048 vpxor ymm1,ymm1,YMMWORD[((32+128))+rsi] 8049 vpxor ymm5,ymm5,YMMWORD[((64+128))+rsi] 8050 vpxor ymm9,ymm9,YMMWORD[((96+128))+rsi] 8051 vmovdqu YMMWORD[(0+128)+rdi],ymm3 8052 vmovdqu YMMWORD[(32+128)+rdi],ymm1 8053 vmovdqu YMMWORD[(64+128)+rdi],ymm5 8054 vmovdqu YMMWORD[(96+128)+rdi],ymm9 8055 vperm2i128 ymm3,ymm4,ymm0,0x13 8056 vperm2i128 ymm0,ymm4,ymm0,0x02 8057 vperm2i128 ymm4,ymm12,ymm8,0x02 8058 vperm2i128 ymm12,ymm12,ymm8,0x13 8059 vmovdqa ymm8,ymm3 8060 8061 mov rcx,8*32 8062 lea rsi,[256+rsi] 8063 sub rbx,8*32 8064 jmp NEAR $L$seal_avx2_short_hash_remainder 8065 8066$L$seal_avx2_tail_512: 8067 vmovdqa ymm0,YMMWORD[$L$chacha20_consts] 8068 vmovdqa ymm4,YMMWORD[((160+64))+rbp] 8069 vmovdqa ymm8,YMMWORD[((160+96))+rbp] 8070 vmovdqa ymm1,ymm0 8071 vmovdqa ymm5,ymm4 8072 vmovdqa ymm9,ymm8 8073 vmovdqa ymm2,ymm0 8074 vmovdqa ymm6,ymm4 8075 vmovdqa ymm10,ymm8 8076 vmovdqa ymm3,ymm0 8077 vmovdqa ymm7,ymm4 8078 vmovdqa ymm11,ymm8 8079 vmovdqa ymm12,YMMWORD[$L$avx2_inc] 8080 vpaddd ymm15,ymm12,YMMWORD[((160+160))+rbp] 8081 vpaddd ymm14,ymm12,ymm15 8082 vpaddd ymm13,ymm12,ymm14 8083 vpaddd ymm12,ymm12,ymm13 8084 vmovdqa YMMWORD[(160+256)+rbp],ymm15 8085 vmovdqa YMMWORD[(160+224)+rbp],ymm14 8086 vmovdqa YMMWORD[(160+192)+rbp],ymm13 8087 vmovdqa YMMWORD[(160+160)+rbp],ymm12 8088 8089$L$seal_avx2_tail_512_rounds_and_3xhash: 8090 add r10,QWORD[((0+0))+rdi] 8091 adc r11,QWORD[((8+0))+rdi] 8092 adc r12,1 8093 mov rdx,QWORD[((0+160+0))+rbp] 8094 mov r15,rdx 8095 mulx r14,r13,r10 8096 mulx rdx,rax,r11 8097 imul r15,r12 8098 add r14,rax 8099 adc r15,rdx 8100 mov rdx,QWORD[((8+160+0))+rbp] 8101 mulx rax,r10,r10 8102 add r14,r10 8103 mulx r9,r11,r11 8104 adc r15,r11 8105 adc r9,0 8106 imul rdx,r12 8107 add r15,rax 8108 adc r9,rdx 8109 mov r10,r13 8110 mov r11,r14 8111 mov r12,r15 8112 and r12,3 8113 mov r13,r15 8114 and r13,-4 8115 mov r14,r9 8116 shrd r15,r9,2 8117 shr r9,2 8118 add r15,r13 8119 adc r9,r14 8120 add r10,r15 8121 adc r11,r9 8122 adc r12,0 8123 8124 lea rdi,[16+rdi] 8125$L$seal_avx2_tail_512_rounds_and_2xhash: 8126 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8127 vmovdqa ymm8,YMMWORD[$L$rol16] 8128 vpaddd ymm3,ymm3,ymm7 8129 vpaddd ymm2,ymm2,ymm6 8130 vpaddd ymm1,ymm1,ymm5 8131 vpaddd ymm0,ymm0,ymm4 8132 vpxor ymm15,ymm15,ymm3 8133 vpxor ymm14,ymm14,ymm2 8134 vpxor ymm13,ymm13,ymm1 8135 vpxor ymm12,ymm12,ymm0 8136 vpshufb ymm15,ymm15,ymm8 8137 vpshufb ymm14,ymm14,ymm8 8138 vpshufb ymm13,ymm13,ymm8 8139 vpshufb ymm12,ymm12,ymm8 8140 vpaddd ymm11,ymm11,ymm15 8141 vpaddd ymm10,ymm10,ymm14 8142 vpaddd ymm9,ymm9,ymm13 8143 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8144 vpxor ymm7,ymm7,ymm11 8145 vpxor ymm6,ymm6,ymm10 8146 add r10,QWORD[((0+0))+rdi] 8147 adc r11,QWORD[((8+0))+rdi] 8148 adc r12,1 8149 vpxor ymm5,ymm5,ymm9 8150 vpxor ymm4,ymm4,ymm8 8151 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8152 vpsrld ymm8,ymm7,20 8153 vpslld ymm7,ymm7,32-20 8154 vpxor ymm7,ymm7,ymm8 8155 vpsrld ymm8,ymm6,20 8156 vpslld ymm6,ymm6,32-20 8157 vpxor ymm6,ymm6,ymm8 8158 vpsrld ymm8,ymm5,20 8159 vpslld ymm5,ymm5,32-20 8160 vpxor ymm5,ymm5,ymm8 8161 vpsrld ymm8,ymm4,20 8162 vpslld ymm4,ymm4,32-20 8163 vpxor ymm4,ymm4,ymm8 8164 vmovdqa ymm8,YMMWORD[$L$rol8] 8165 vpaddd ymm3,ymm3,ymm7 8166 vpaddd ymm2,ymm2,ymm6 8167 vpaddd ymm1,ymm1,ymm5 8168 vpaddd ymm0,ymm0,ymm4 8169 mov rdx,QWORD[((0+160+0))+rbp] 8170 mov r15,rdx 8171 mulx r14,r13,r10 8172 mulx rdx,rax,r11 8173 imul r15,r12 8174 add r14,rax 8175 adc r15,rdx 8176 vpxor ymm15,ymm15,ymm3 8177 vpxor ymm14,ymm14,ymm2 8178 vpxor ymm13,ymm13,ymm1 8179 vpxor ymm12,ymm12,ymm0 8180 vpshufb ymm15,ymm15,ymm8 8181 vpshufb ymm14,ymm14,ymm8 8182 vpshufb ymm13,ymm13,ymm8 8183 vpshufb ymm12,ymm12,ymm8 8184 vpaddd ymm11,ymm11,ymm15 8185 vpaddd ymm10,ymm10,ymm14 8186 vpaddd ymm9,ymm9,ymm13 8187 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8188 vpxor ymm7,ymm7,ymm11 8189 vpxor ymm6,ymm6,ymm10 8190 vpxor ymm5,ymm5,ymm9 8191 vpxor ymm4,ymm4,ymm8 8192 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8193 vpsrld ymm8,ymm7,25 8194 vpslld ymm7,ymm7,32-25 8195 vpxor ymm7,ymm7,ymm8 8196 mov rdx,QWORD[((8+160+0))+rbp] 8197 mulx rax,r10,r10 8198 add r14,r10 8199 mulx r9,r11,r11 8200 adc r15,r11 8201 adc r9,0 8202 imul rdx,r12 8203 vpsrld ymm8,ymm6,25 8204 vpslld ymm6,ymm6,32-25 8205 vpxor ymm6,ymm6,ymm8 8206 vpsrld ymm8,ymm5,25 8207 vpslld ymm5,ymm5,32-25 8208 vpxor ymm5,ymm5,ymm8 8209 vpsrld ymm8,ymm4,25 8210 vpslld ymm4,ymm4,32-25 8211 vpxor ymm4,ymm4,ymm8 8212 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 8213 vpalignr ymm7,ymm7,ymm7,4 8214 vpalignr ymm11,ymm11,ymm11,8 8215 vpalignr ymm15,ymm15,ymm15,12 8216 vpalignr ymm6,ymm6,ymm6,4 8217 vpalignr ymm10,ymm10,ymm10,8 8218 vpalignr ymm14,ymm14,ymm14,12 8219 vpalignr ymm5,ymm5,ymm5,4 8220 vpalignr ymm9,ymm9,ymm9,8 8221 vpalignr ymm13,ymm13,ymm13,12 8222 vpalignr ymm4,ymm4,ymm4,4 8223 add r15,rax 8224 adc r9,rdx 8225 vpalignr ymm8,ymm8,ymm8,8 8226 vpalignr ymm12,ymm12,ymm12,12 8227 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8228 vmovdqa ymm8,YMMWORD[$L$rol16] 8229 vpaddd ymm3,ymm3,ymm7 8230 vpaddd ymm2,ymm2,ymm6 8231 vpaddd ymm1,ymm1,ymm5 8232 vpaddd ymm0,ymm0,ymm4 8233 vpxor ymm15,ymm15,ymm3 8234 vpxor ymm14,ymm14,ymm2 8235 vpxor ymm13,ymm13,ymm1 8236 vpxor ymm12,ymm12,ymm0 8237 vpshufb ymm15,ymm15,ymm8 8238 vpshufb ymm14,ymm14,ymm8 8239 vpshufb ymm13,ymm13,ymm8 8240 vpshufb ymm12,ymm12,ymm8 8241 vpaddd ymm11,ymm11,ymm15 8242 vpaddd ymm10,ymm10,ymm14 8243 vpaddd ymm9,ymm9,ymm13 8244 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8245 mov r10,r13 8246 mov r11,r14 8247 mov r12,r15 8248 and r12,3 8249 mov r13,r15 8250 and r13,-4 8251 mov r14,r9 8252 shrd r15,r9,2 8253 shr r9,2 8254 add r15,r13 8255 adc r9,r14 8256 add r10,r15 8257 adc r11,r9 8258 adc r12,0 8259 vpxor ymm7,ymm7,ymm11 8260 vpxor ymm6,ymm6,ymm10 8261 vpxor ymm5,ymm5,ymm9 8262 vpxor ymm4,ymm4,ymm8 8263 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8264 vpsrld ymm8,ymm7,20 8265 vpslld ymm7,ymm7,32-20 8266 vpxor ymm7,ymm7,ymm8 8267 vpsrld ymm8,ymm6,20 8268 vpslld ymm6,ymm6,32-20 8269 vpxor ymm6,ymm6,ymm8 8270 vpsrld ymm8,ymm5,20 8271 vpslld ymm5,ymm5,32-20 8272 vpxor ymm5,ymm5,ymm8 8273 vpsrld ymm8,ymm4,20 8274 vpslld ymm4,ymm4,32-20 8275 vpxor ymm4,ymm4,ymm8 8276 vmovdqa ymm8,YMMWORD[$L$rol8] 8277 vpaddd ymm3,ymm3,ymm7 8278 vpaddd ymm2,ymm2,ymm6 8279 add r10,QWORD[((0+16))+rdi] 8280 adc r11,QWORD[((8+16))+rdi] 8281 adc r12,1 8282 vpaddd ymm1,ymm1,ymm5 8283 vpaddd ymm0,ymm0,ymm4 8284 vpxor ymm15,ymm15,ymm3 8285 vpxor ymm14,ymm14,ymm2 8286 vpxor ymm13,ymm13,ymm1 8287 vpxor ymm12,ymm12,ymm0 8288 vpshufb ymm15,ymm15,ymm8 8289 vpshufb ymm14,ymm14,ymm8 8290 vpshufb ymm13,ymm13,ymm8 8291 vpshufb ymm12,ymm12,ymm8 8292 vpaddd ymm11,ymm11,ymm15 8293 vpaddd ymm10,ymm10,ymm14 8294 vpaddd ymm9,ymm9,ymm13 8295 vpaddd ymm8,ymm12,YMMWORD[((160+128))+rbp] 8296 vpxor ymm7,ymm7,ymm11 8297 vpxor ymm6,ymm6,ymm10 8298 vpxor ymm5,ymm5,ymm9 8299 vpxor ymm4,ymm4,ymm8 8300 vmovdqa YMMWORD[(160+128)+rbp],ymm8 8301 vpsrld ymm8,ymm7,25 8302 mov rdx,QWORD[((0+160+0))+rbp] 8303 mov r15,rdx 8304 mulx r14,r13,r10 8305 mulx rdx,rax,r11 8306 imul r15,r12 8307 add r14,rax 8308 adc r15,rdx 8309 vpslld ymm7,ymm7,32-25 8310 vpxor ymm7,ymm7,ymm8 8311 vpsrld ymm8,ymm6,25 8312 vpslld ymm6,ymm6,32-25 8313 vpxor ymm6,ymm6,ymm8 8314 vpsrld ymm8,ymm5,25 8315 vpslld ymm5,ymm5,32-25 8316 vpxor ymm5,ymm5,ymm8 8317 vpsrld ymm8,ymm4,25 8318 vpslld ymm4,ymm4,32-25 8319 vpxor ymm4,ymm4,ymm8 8320 vmovdqa ymm8,YMMWORD[((160+128))+rbp] 8321 vpalignr ymm7,ymm7,ymm7,12 8322 vpalignr ymm11,ymm11,ymm11,8 8323 vpalignr ymm15,ymm15,ymm15,4 8324 vpalignr ymm6,ymm6,ymm6,12 8325 vpalignr ymm10,ymm10,ymm10,8 8326 vpalignr ymm14,ymm14,ymm14,4 8327 vpalignr ymm5,ymm5,ymm5,12 8328 vpalignr ymm9,ymm9,ymm9,8 8329 mov rdx,QWORD[((8+160+0))+rbp] 8330 mulx rax,r10,r10 8331 add r14,r10 8332 mulx r9,r11,r11 8333 adc r15,r11 8334 adc r9,0 8335 imul rdx,r12 8336 vpalignr ymm13,ymm13,ymm13,4 8337 vpalignr ymm4,ymm4,ymm4,12 8338 vpalignr ymm8,ymm8,ymm8,8 8339 vpalignr ymm12,ymm12,ymm12,4 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 add r15,rax 8357 adc r9,rdx 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 mov r10,r13 8379 mov r11,r14 8380 mov r12,r15 8381 and r12,3 8382 mov r13,r15 8383 and r13,-4 8384 mov r14,r9 8385 shrd r15,r9,2 8386 shr r9,2 8387 add r15,r13 8388 adc r9,r14 8389 add r10,r15 8390 adc r11,r9 8391 adc r12,0 8392 8393 lea rdi,[32+rdi] 8394 dec rcx 8395 jg NEAR $L$seal_avx2_tail_512_rounds_and_3xhash 8396 dec r8 8397 jge NEAR $L$seal_avx2_tail_512_rounds_and_2xhash 8398 vpaddd ymm3,ymm3,YMMWORD[$L$chacha20_consts] 8399 vpaddd ymm7,ymm7,YMMWORD[((160+64))+rbp] 8400 vpaddd ymm11,ymm11,YMMWORD[((160+96))+rbp] 8401 vpaddd ymm15,ymm15,YMMWORD[((160+256))+rbp] 8402 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 8403 vpaddd ymm6,ymm6,YMMWORD[((160+64))+rbp] 8404 vpaddd ymm10,ymm10,YMMWORD[((160+96))+rbp] 8405 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 8406 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 8407 vpaddd ymm5,ymm5,YMMWORD[((160+64))+rbp] 8408 vpaddd ymm9,ymm9,YMMWORD[((160+96))+rbp] 8409 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 8410 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 8411 vpaddd ymm4,ymm4,YMMWORD[((160+64))+rbp] 8412 vpaddd ymm8,ymm8,YMMWORD[((160+96))+rbp] 8413 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 8414 8415 vmovdqa YMMWORD[(160+128)+rbp],ymm0 8416 vperm2i128 ymm0,ymm7,ymm3,0x02 8417 vperm2i128 ymm7,ymm7,ymm3,0x13 8418 vperm2i128 ymm3,ymm15,ymm11,0x02 8419 vperm2i128 ymm11,ymm15,ymm11,0x13 8420 vpxor ymm0,ymm0,YMMWORD[((0+0))+rsi] 8421 vpxor ymm3,ymm3,YMMWORD[((32+0))+rsi] 8422 vpxor ymm7,ymm7,YMMWORD[((64+0))+rsi] 8423 vpxor ymm11,ymm11,YMMWORD[((96+0))+rsi] 8424 vmovdqu YMMWORD[(0+0)+rdi],ymm0 8425 vmovdqu YMMWORD[(32+0)+rdi],ymm3 8426 vmovdqu YMMWORD[(64+0)+rdi],ymm7 8427 vmovdqu YMMWORD[(96+0)+rdi],ymm11 8428 8429 vmovdqa ymm0,YMMWORD[((160+128))+rbp] 8430 vperm2i128 ymm3,ymm6,ymm2,0x02 8431 vperm2i128 ymm6,ymm6,ymm2,0x13 8432 vperm2i128 ymm2,ymm14,ymm10,0x02 8433 vperm2i128 ymm10,ymm14,ymm10,0x13 8434 vpxor ymm3,ymm3,YMMWORD[((0+128))+rsi] 8435 vpxor ymm2,ymm2,YMMWORD[((32+128))+rsi] 8436 vpxor ymm6,ymm6,YMMWORD[((64+128))+rsi] 8437 vpxor ymm10,ymm10,YMMWORD[((96+128))+rsi] 8438 vmovdqu YMMWORD[(0+128)+rdi],ymm3 8439 vmovdqu YMMWORD[(32+128)+rdi],ymm2 8440 vmovdqu YMMWORD[(64+128)+rdi],ymm6 8441 vmovdqu YMMWORD[(96+128)+rdi],ymm10 8442 vperm2i128 ymm3,ymm5,ymm1,0x02 8443 vperm2i128 ymm5,ymm5,ymm1,0x13 8444 vperm2i128 ymm1,ymm13,ymm9,0x02 8445 vperm2i128 ymm9,ymm13,ymm9,0x13 8446 vpxor ymm3,ymm3,YMMWORD[((0+256))+rsi] 8447 vpxor ymm1,ymm1,YMMWORD[((32+256))+rsi] 8448 vpxor ymm5,ymm5,YMMWORD[((64+256))+rsi] 8449 vpxor ymm9,ymm9,YMMWORD[((96+256))+rsi] 8450 vmovdqu YMMWORD[(0+256)+rdi],ymm3 8451 vmovdqu YMMWORD[(32+256)+rdi],ymm1 8452 vmovdqu YMMWORD[(64+256)+rdi],ymm5 8453 vmovdqu YMMWORD[(96+256)+rdi],ymm9 8454 vperm2i128 ymm3,ymm4,ymm0,0x13 8455 vperm2i128 ymm0,ymm4,ymm0,0x02 8456 vperm2i128 ymm4,ymm12,ymm8,0x02 8457 vperm2i128 ymm12,ymm12,ymm8,0x13 8458 vmovdqa ymm8,ymm3 8459 8460 mov rcx,12*32 8461 lea rsi,[384+rsi] 8462 sub rbx,12*32 8463 jmp NEAR $L$seal_avx2_short_hash_remainder 8464 8465$L$seal_avx2_320: 8466 vmovdqa ymm1,ymm0 8467 vmovdqa ymm2,ymm0 8468 vmovdqa ymm5,ymm4 8469 vmovdqa ymm6,ymm4 8470 vmovdqa ymm9,ymm8 8471 vmovdqa ymm10,ymm8 8472 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 8473 vpaddd ymm14,ymm13,YMMWORD[$L$avx2_inc] 8474 vmovdqa ymm7,ymm4 8475 vmovdqa ymm11,ymm8 8476 vmovdqa YMMWORD[(160+160)+rbp],ymm12 8477 vmovdqa YMMWORD[(160+192)+rbp],ymm13 8478 vmovdqa YMMWORD[(160+224)+rbp],ymm14 8479 mov r10,10 8480$L$seal_avx2_320_rounds: 8481 vpaddd ymm0,ymm0,ymm4 8482 vpxor ymm12,ymm12,ymm0 8483 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8484 vpaddd ymm8,ymm8,ymm12 8485 vpxor ymm4,ymm4,ymm8 8486 vpsrld ymm3,ymm4,20 8487 vpslld ymm4,ymm4,12 8488 vpxor ymm4,ymm4,ymm3 8489 vpaddd ymm0,ymm0,ymm4 8490 vpxor ymm12,ymm12,ymm0 8491 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8492 vpaddd ymm8,ymm8,ymm12 8493 vpxor ymm4,ymm4,ymm8 8494 vpslld ymm3,ymm4,7 8495 vpsrld ymm4,ymm4,25 8496 vpxor ymm4,ymm4,ymm3 8497 vpalignr ymm12,ymm12,ymm12,12 8498 vpalignr ymm8,ymm8,ymm8,8 8499 vpalignr ymm4,ymm4,ymm4,4 8500 vpaddd ymm1,ymm1,ymm5 8501 vpxor ymm13,ymm13,ymm1 8502 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8503 vpaddd ymm9,ymm9,ymm13 8504 vpxor ymm5,ymm5,ymm9 8505 vpsrld ymm3,ymm5,20 8506 vpslld ymm5,ymm5,12 8507 vpxor ymm5,ymm5,ymm3 8508 vpaddd ymm1,ymm1,ymm5 8509 vpxor ymm13,ymm13,ymm1 8510 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8511 vpaddd ymm9,ymm9,ymm13 8512 vpxor ymm5,ymm5,ymm9 8513 vpslld ymm3,ymm5,7 8514 vpsrld ymm5,ymm5,25 8515 vpxor ymm5,ymm5,ymm3 8516 vpalignr ymm13,ymm13,ymm13,12 8517 vpalignr ymm9,ymm9,ymm9,8 8518 vpalignr ymm5,ymm5,ymm5,4 8519 vpaddd ymm2,ymm2,ymm6 8520 vpxor ymm14,ymm14,ymm2 8521 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 8522 vpaddd ymm10,ymm10,ymm14 8523 vpxor ymm6,ymm6,ymm10 8524 vpsrld ymm3,ymm6,20 8525 vpslld ymm6,ymm6,12 8526 vpxor ymm6,ymm6,ymm3 8527 vpaddd ymm2,ymm2,ymm6 8528 vpxor ymm14,ymm14,ymm2 8529 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 8530 vpaddd ymm10,ymm10,ymm14 8531 vpxor ymm6,ymm6,ymm10 8532 vpslld ymm3,ymm6,7 8533 vpsrld ymm6,ymm6,25 8534 vpxor ymm6,ymm6,ymm3 8535 vpalignr ymm14,ymm14,ymm14,12 8536 vpalignr ymm10,ymm10,ymm10,8 8537 vpalignr ymm6,ymm6,ymm6,4 8538 vpaddd ymm0,ymm0,ymm4 8539 vpxor ymm12,ymm12,ymm0 8540 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8541 vpaddd ymm8,ymm8,ymm12 8542 vpxor ymm4,ymm4,ymm8 8543 vpsrld ymm3,ymm4,20 8544 vpslld ymm4,ymm4,12 8545 vpxor ymm4,ymm4,ymm3 8546 vpaddd ymm0,ymm0,ymm4 8547 vpxor ymm12,ymm12,ymm0 8548 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8549 vpaddd ymm8,ymm8,ymm12 8550 vpxor ymm4,ymm4,ymm8 8551 vpslld ymm3,ymm4,7 8552 vpsrld ymm4,ymm4,25 8553 vpxor ymm4,ymm4,ymm3 8554 vpalignr ymm12,ymm12,ymm12,4 8555 vpalignr ymm8,ymm8,ymm8,8 8556 vpalignr ymm4,ymm4,ymm4,12 8557 vpaddd ymm1,ymm1,ymm5 8558 vpxor ymm13,ymm13,ymm1 8559 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8560 vpaddd ymm9,ymm9,ymm13 8561 vpxor ymm5,ymm5,ymm9 8562 vpsrld ymm3,ymm5,20 8563 vpslld ymm5,ymm5,12 8564 vpxor ymm5,ymm5,ymm3 8565 vpaddd ymm1,ymm1,ymm5 8566 vpxor ymm13,ymm13,ymm1 8567 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8568 vpaddd ymm9,ymm9,ymm13 8569 vpxor ymm5,ymm5,ymm9 8570 vpslld ymm3,ymm5,7 8571 vpsrld ymm5,ymm5,25 8572 vpxor ymm5,ymm5,ymm3 8573 vpalignr ymm13,ymm13,ymm13,4 8574 vpalignr ymm9,ymm9,ymm9,8 8575 vpalignr ymm5,ymm5,ymm5,12 8576 vpaddd ymm2,ymm2,ymm6 8577 vpxor ymm14,ymm14,ymm2 8578 vpshufb ymm14,ymm14,YMMWORD[$L$rol16] 8579 vpaddd ymm10,ymm10,ymm14 8580 vpxor ymm6,ymm6,ymm10 8581 vpsrld ymm3,ymm6,20 8582 vpslld ymm6,ymm6,12 8583 vpxor ymm6,ymm6,ymm3 8584 vpaddd ymm2,ymm2,ymm6 8585 vpxor ymm14,ymm14,ymm2 8586 vpshufb ymm14,ymm14,YMMWORD[$L$rol8] 8587 vpaddd ymm10,ymm10,ymm14 8588 vpxor ymm6,ymm6,ymm10 8589 vpslld ymm3,ymm6,7 8590 vpsrld ymm6,ymm6,25 8591 vpxor ymm6,ymm6,ymm3 8592 vpalignr ymm14,ymm14,ymm14,4 8593 vpalignr ymm10,ymm10,ymm10,8 8594 vpalignr ymm6,ymm6,ymm6,12 8595 8596 dec r10 8597 jne NEAR $L$seal_avx2_320_rounds 8598 vpaddd ymm0,ymm0,YMMWORD[$L$chacha20_consts] 8599 vpaddd ymm1,ymm1,YMMWORD[$L$chacha20_consts] 8600 vpaddd ymm2,ymm2,YMMWORD[$L$chacha20_consts] 8601 vpaddd ymm4,ymm4,ymm7 8602 vpaddd ymm5,ymm5,ymm7 8603 vpaddd ymm6,ymm6,ymm7 8604 vpaddd ymm8,ymm8,ymm11 8605 vpaddd ymm9,ymm9,ymm11 8606 vpaddd ymm10,ymm10,ymm11 8607 vpaddd ymm12,ymm12,YMMWORD[((160+160))+rbp] 8608 vpaddd ymm13,ymm13,YMMWORD[((160+192))+rbp] 8609 vpaddd ymm14,ymm14,YMMWORD[((160+224))+rbp] 8610 vperm2i128 ymm3,ymm4,ymm0,0x02 8611 8612 vpand ymm3,ymm3,YMMWORD[$L$clamp] 8613 vmovdqa YMMWORD[(160+0)+rbp],ymm3 8614 8615 vperm2i128 ymm0,ymm4,ymm0,0x13 8616 vperm2i128 ymm4,ymm12,ymm8,0x13 8617 vperm2i128 ymm8,ymm5,ymm1,0x02 8618 vperm2i128 ymm12,ymm13,ymm9,0x02 8619 vperm2i128 ymm1,ymm5,ymm1,0x13 8620 vperm2i128 ymm5,ymm13,ymm9,0x13 8621 vperm2i128 ymm9,ymm6,ymm2,0x02 8622 vperm2i128 ymm13,ymm14,ymm10,0x02 8623 vperm2i128 ymm2,ymm6,ymm2,0x13 8624 vperm2i128 ymm6,ymm14,ymm10,0x13 8625 jmp NEAR $L$seal_avx2_short 8626 8627$L$seal_avx2_192: 8628 vmovdqa ymm1,ymm0 8629 vmovdqa ymm2,ymm0 8630 vmovdqa ymm5,ymm4 8631 vmovdqa ymm6,ymm4 8632 vmovdqa ymm9,ymm8 8633 vmovdqa ymm10,ymm8 8634 vpaddd ymm13,ymm12,YMMWORD[$L$avx2_inc] 8635 vmovdqa ymm11,ymm12 8636 vmovdqa ymm15,ymm13 8637 mov r10,10 8638$L$seal_avx2_192_rounds: 8639 vpaddd ymm0,ymm0,ymm4 8640 vpxor ymm12,ymm12,ymm0 8641 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8642 vpaddd ymm8,ymm8,ymm12 8643 vpxor ymm4,ymm4,ymm8 8644 vpsrld ymm3,ymm4,20 8645 vpslld ymm4,ymm4,12 8646 vpxor ymm4,ymm4,ymm3 8647 vpaddd ymm0,ymm0,ymm4 8648 vpxor ymm12,ymm12,ymm0 8649 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8650 vpaddd ymm8,ymm8,ymm12 8651 vpxor ymm4,ymm4,ymm8 8652 vpslld ymm3,ymm4,7 8653 vpsrld ymm4,ymm4,25 8654 vpxor ymm4,ymm4,ymm3 8655 vpalignr ymm12,ymm12,ymm12,12 8656 vpalignr ymm8,ymm8,ymm8,8 8657 vpalignr ymm4,ymm4,ymm4,4 8658 vpaddd ymm1,ymm1,ymm5 8659 vpxor ymm13,ymm13,ymm1 8660 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8661 vpaddd ymm9,ymm9,ymm13 8662 vpxor ymm5,ymm5,ymm9 8663 vpsrld ymm3,ymm5,20 8664 vpslld ymm5,ymm5,12 8665 vpxor ymm5,ymm5,ymm3 8666 vpaddd ymm1,ymm1,ymm5 8667 vpxor ymm13,ymm13,ymm1 8668 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8669 vpaddd ymm9,ymm9,ymm13 8670 vpxor ymm5,ymm5,ymm9 8671 vpslld ymm3,ymm5,7 8672 vpsrld ymm5,ymm5,25 8673 vpxor ymm5,ymm5,ymm3 8674 vpalignr ymm13,ymm13,ymm13,12 8675 vpalignr ymm9,ymm9,ymm9,8 8676 vpalignr ymm5,ymm5,ymm5,4 8677 vpaddd ymm0,ymm0,ymm4 8678 vpxor ymm12,ymm12,ymm0 8679 vpshufb ymm12,ymm12,YMMWORD[$L$rol16] 8680 vpaddd ymm8,ymm8,ymm12 8681 vpxor ymm4,ymm4,ymm8 8682 vpsrld ymm3,ymm4,20 8683 vpslld ymm4,ymm4,12 8684 vpxor ymm4,ymm4,ymm3 8685 vpaddd ymm0,ymm0,ymm4 8686 vpxor ymm12,ymm12,ymm0 8687 vpshufb ymm12,ymm12,YMMWORD[$L$rol8] 8688 vpaddd ymm8,ymm8,ymm12 8689 vpxor ymm4,ymm4,ymm8 8690 vpslld ymm3,ymm4,7 8691 vpsrld ymm4,ymm4,25 8692 vpxor ymm4,ymm4,ymm3 8693 vpalignr ymm12,ymm12,ymm12,4 8694 vpalignr ymm8,ymm8,ymm8,8 8695 vpalignr ymm4,ymm4,ymm4,12 8696 vpaddd ymm1,ymm1,ymm5 8697 vpxor ymm13,ymm13,ymm1 8698 vpshufb ymm13,ymm13,YMMWORD[$L$rol16] 8699 vpaddd ymm9,ymm9,ymm13 8700 vpxor ymm5,ymm5,ymm9 8701 vpsrld ymm3,ymm5,20 8702 vpslld ymm5,ymm5,12 8703 vpxor ymm5,ymm5,ymm3 8704 vpaddd ymm1,ymm1,ymm5 8705 vpxor ymm13,ymm13,ymm1 8706 vpshufb ymm13,ymm13,YMMWORD[$L$rol8] 8707 vpaddd ymm9,ymm9,ymm13 8708 vpxor ymm5,ymm5,ymm9 8709 vpslld ymm3,ymm5,7 8710 vpsrld ymm5,ymm5,25 8711 vpxor ymm5,ymm5,ymm3 8712 vpalignr ymm13,ymm13,ymm13,4 8713 vpalignr ymm9,ymm9,ymm9,8 8714 vpalignr ymm5,ymm5,ymm5,12 8715 8716 dec r10 8717 jne NEAR $L$seal_avx2_192_rounds 8718 vpaddd ymm0,ymm0,ymm2 8719 vpaddd ymm1,ymm1,ymm2 8720 vpaddd ymm4,ymm4,ymm6 8721 vpaddd ymm5,ymm5,ymm6 8722 vpaddd ymm8,ymm8,ymm10 8723 vpaddd ymm9,ymm9,ymm10 8724 vpaddd ymm12,ymm12,ymm11 8725 vpaddd ymm13,ymm13,ymm15 8726 vperm2i128 ymm3,ymm4,ymm0,0x02 8727 8728 vpand ymm3,ymm3,YMMWORD[$L$clamp] 8729 vmovdqa YMMWORD[(160+0)+rbp],ymm3 8730 8731 vperm2i128 ymm0,ymm4,ymm0,0x13 8732 vperm2i128 ymm4,ymm12,ymm8,0x13 8733 vperm2i128 ymm8,ymm5,ymm1,0x02 8734 vperm2i128 ymm12,ymm13,ymm9,0x02 8735 vperm2i128 ymm1,ymm5,ymm1,0x13 8736 vperm2i128 ymm5,ymm13,ymm9,0x13 8737$L$seal_avx2_short: 8738 mov r8,r8 8739 call poly_hash_ad_internal 8740 xor rcx,rcx 8741$L$seal_avx2_short_hash_remainder: 8742 cmp rcx,16 8743 jb NEAR $L$seal_avx2_short_loop 8744 add r10,QWORD[((0+0))+rdi] 8745 adc r11,QWORD[((8+0))+rdi] 8746 adc r12,1 8747 mov rax,QWORD[((0+160+0))+rbp] 8748 mov r15,rax 8749 mul r10 8750 mov r13,rax 8751 mov r14,rdx 8752 mov rax,QWORD[((0+160+0))+rbp] 8753 mul r11 8754 imul r15,r12 8755 add r14,rax 8756 adc r15,rdx 8757 mov rax,QWORD[((8+160+0))+rbp] 8758 mov r9,rax 8759 mul r10 8760 add r14,rax 8761 adc rdx,0 8762 mov r10,rdx 8763 mov rax,QWORD[((8+160+0))+rbp] 8764 mul r11 8765 add r15,rax 8766 adc rdx,0 8767 imul r9,r12 8768 add r15,r10 8769 adc r9,rdx 8770 mov r10,r13 8771 mov r11,r14 8772 mov r12,r15 8773 and r12,3 8774 mov r13,r15 8775 and r13,-4 8776 mov r14,r9 8777 shrd r15,r9,2 8778 shr r9,2 8779 add r15,r13 8780 adc r9,r14 8781 add r10,r15 8782 adc r11,r9 8783 adc r12,0 8784 8785 sub rcx,16 8786 add rdi,16 8787 jmp NEAR $L$seal_avx2_short_hash_remainder 8788$L$seal_avx2_short_loop: 8789 cmp rbx,32 8790 jb NEAR $L$seal_avx2_short_tail 8791 sub rbx,32 8792 8793 vpxor ymm0,ymm0,YMMWORD[rsi] 8794 vmovdqu YMMWORD[rdi],ymm0 8795 lea rsi,[32+rsi] 8796 8797 add r10,QWORD[((0+0))+rdi] 8798 adc r11,QWORD[((8+0))+rdi] 8799 adc r12,1 8800 mov rax,QWORD[((0+160+0))+rbp] 8801 mov r15,rax 8802 mul r10 8803 mov r13,rax 8804 mov r14,rdx 8805 mov rax,QWORD[((0+160+0))+rbp] 8806 mul r11 8807 imul r15,r12 8808 add r14,rax 8809 adc r15,rdx 8810 mov rax,QWORD[((8+160+0))+rbp] 8811 mov r9,rax 8812 mul r10 8813 add r14,rax 8814 adc rdx,0 8815 mov r10,rdx 8816 mov rax,QWORD[((8+160+0))+rbp] 8817 mul r11 8818 add r15,rax 8819 adc rdx,0 8820 imul r9,r12 8821 add r15,r10 8822 adc r9,rdx 8823 mov r10,r13 8824 mov r11,r14 8825 mov r12,r15 8826 and r12,3 8827 mov r13,r15 8828 and r13,-4 8829 mov r14,r9 8830 shrd r15,r9,2 8831 shr r9,2 8832 add r15,r13 8833 adc r9,r14 8834 add r10,r15 8835 adc r11,r9 8836 adc r12,0 8837 add r10,QWORD[((0+16))+rdi] 8838 adc r11,QWORD[((8+16))+rdi] 8839 adc r12,1 8840 mov rax,QWORD[((0+160+0))+rbp] 8841 mov r15,rax 8842 mul r10 8843 mov r13,rax 8844 mov r14,rdx 8845 mov rax,QWORD[((0+160+0))+rbp] 8846 mul r11 8847 imul r15,r12 8848 add r14,rax 8849 adc r15,rdx 8850 mov rax,QWORD[((8+160+0))+rbp] 8851 mov r9,rax 8852 mul r10 8853 add r14,rax 8854 adc rdx,0 8855 mov r10,rdx 8856 mov rax,QWORD[((8+160+0))+rbp] 8857 mul r11 8858 add r15,rax 8859 adc rdx,0 8860 imul r9,r12 8861 add r15,r10 8862 adc r9,rdx 8863 mov r10,r13 8864 mov r11,r14 8865 mov r12,r15 8866 and r12,3 8867 mov r13,r15 8868 and r13,-4 8869 mov r14,r9 8870 shrd r15,r9,2 8871 shr r9,2 8872 add r15,r13 8873 adc r9,r14 8874 add r10,r15 8875 adc r11,r9 8876 adc r12,0 8877 8878 lea rdi,[32+rdi] 8879 8880 vmovdqa ymm0,ymm4 8881 vmovdqa ymm4,ymm8 8882 vmovdqa ymm8,ymm12 8883 vmovdqa ymm12,ymm1 8884 vmovdqa ymm1,ymm5 8885 vmovdqa ymm5,ymm9 8886 vmovdqa ymm9,ymm13 8887 vmovdqa ymm13,ymm2 8888 vmovdqa ymm2,ymm6 8889 jmp NEAR $L$seal_avx2_short_loop 8890$L$seal_avx2_short_tail: 8891 cmp rbx,16 8892 jb NEAR $L$seal_avx2_exit 8893 sub rbx,16 8894 vpxor xmm3,xmm0,XMMWORD[rsi] 8895 vmovdqu XMMWORD[rdi],xmm3 8896 lea rsi,[16+rsi] 8897 add r10,QWORD[((0+0))+rdi] 8898 adc r11,QWORD[((8+0))+rdi] 8899 adc r12,1 8900 mov rax,QWORD[((0+160+0))+rbp] 8901 mov r15,rax 8902 mul r10 8903 mov r13,rax 8904 mov r14,rdx 8905 mov rax,QWORD[((0+160+0))+rbp] 8906 mul r11 8907 imul r15,r12 8908 add r14,rax 8909 adc r15,rdx 8910 mov rax,QWORD[((8+160+0))+rbp] 8911 mov r9,rax 8912 mul r10 8913 add r14,rax 8914 adc rdx,0 8915 mov r10,rdx 8916 mov rax,QWORD[((8+160+0))+rbp] 8917 mul r11 8918 add r15,rax 8919 adc rdx,0 8920 imul r9,r12 8921 add r15,r10 8922 adc r9,rdx 8923 mov r10,r13 8924 mov r11,r14 8925 mov r12,r15 8926 and r12,3 8927 mov r13,r15 8928 and r13,-4 8929 mov r14,r9 8930 shrd r15,r9,2 8931 shr r9,2 8932 add r15,r13 8933 adc r9,r14 8934 add r10,r15 8935 adc r11,r9 8936 adc r12,0 8937 8938 lea rdi,[16+rdi] 8939 vextracti128 xmm0,ymm0,1 8940$L$seal_avx2_exit: 8941 vzeroupper 8942 jmp NEAR $L$seal_sse_tail_16 8943 8944 8945