1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11.text 12.extern GFp_ia32cap_P 13.hidden GFp_ia32cap_P 14 15chacha20_poly1305_constants: 16 17.align 64 18.Lchacha20_consts: 19.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 20.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 21.Lrol8: 22.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 23.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 24.Lrol16: 25.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 26.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 27.Lavx2_init: 28.long 0,0,0,0 29.Lsse_inc: 30.long 1,0,0,0 31.Lavx2_inc: 32.long 2,0,0,0,2,0,0,0 33.Lclamp: 34.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC 35.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF 36.align 16 37.Land_masks: 38.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 39.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 40.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 41.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 42.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 43.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 44.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 45.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 46.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 47.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 48.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 49.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 50.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 51.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 52.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 53.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff 54 55.type poly_hash_ad_internal,@function 56.align 64 57poly_hash_ad_internal: 58.cfi_startproc 59.cfi_def_cfa rsp, 8 60 xorq %r10,%r10 61 xorq %r11,%r11 62 xorq %r12,%r12 63 cmpq $13,%r8 64 jne .Lhash_ad_loop 65.Lpoly_fast_tls_ad: 66 67 movq (%rcx),%r10 68 movq 5(%rcx),%r11 69 shrq $24,%r11 70 movq $1,%r12 71 movq 0+0+0(%rbp),%rax 72 movq %rax,%r15 73 mulq %r10 74 movq %rax,%r13 75 movq %rdx,%r14 76 movq 0+0+0(%rbp),%rax 77 mulq %r11 78 imulq %r12,%r15 79 addq %rax,%r14 80 adcq %rdx,%r15 81 movq 8+0+0(%rbp),%rax 82 movq %rax,%r9 83 mulq %r10 84 addq %rax,%r14 85 adcq $0,%rdx 86 movq %rdx,%r10 87 movq 8+0+0(%rbp),%rax 88 mulq %r11 89 addq %rax,%r15 90 adcq $0,%rdx 91 imulq %r12,%r9 92 addq %r10,%r15 93 adcq %rdx,%r9 94 movq %r13,%r10 95 movq %r14,%r11 96 movq %r15,%r12 97 andq $3,%r12 98 movq %r15,%r13 99 andq $-4,%r13 100 movq %r9,%r14 101 shrdq $2,%r9,%r15 102 shrq $2,%r9 103 addq %r13,%r15 104 adcq %r14,%r9 105 addq %r15,%r10 106 adcq %r9,%r11 107 adcq $0,%r12 108 109 .byte 0xf3,0xc3 110.Lhash_ad_loop: 111 112 cmpq $16,%r8 113 jb .Lhash_ad_tail 114 addq 0+0(%rcx),%r10 115 adcq 8+0(%rcx),%r11 116 adcq $1,%r12 117 movq 0+0+0(%rbp),%rax 118 movq %rax,%r15 119 mulq %r10 120 movq %rax,%r13 121 movq %rdx,%r14 122 movq 0+0+0(%rbp),%rax 123 mulq %r11 124 imulq %r12,%r15 125 addq %rax,%r14 126 adcq %rdx,%r15 127 movq 8+0+0(%rbp),%rax 128 movq %rax,%r9 129 mulq %r10 130 addq %rax,%r14 131 adcq $0,%rdx 132 movq %rdx,%r10 133 movq 8+0+0(%rbp),%rax 134 mulq %r11 135 addq %rax,%r15 136 adcq $0,%rdx 137 imulq %r12,%r9 138 addq %r10,%r15 139 adcq %rdx,%r9 140 movq %r13,%r10 141 movq %r14,%r11 142 movq %r15,%r12 143 andq $3,%r12 144 movq %r15,%r13 145 andq $-4,%r13 146 movq %r9,%r14 147 shrdq $2,%r9,%r15 148 shrq $2,%r9 149 addq %r13,%r15 150 adcq %r14,%r9 151 addq %r15,%r10 152 adcq %r9,%r11 153 adcq $0,%r12 154 155 leaq 16(%rcx),%rcx 156 subq $16,%r8 157 jmp .Lhash_ad_loop 158.Lhash_ad_tail: 159 cmpq $0,%r8 160 je .Lhash_ad_done 161 162 xorq %r13,%r13 163 xorq %r14,%r14 164 xorq %r15,%r15 165 addq %r8,%rcx 166.Lhash_ad_tail_loop: 167 shldq $8,%r13,%r14 168 shlq $8,%r13 169 movzbq -1(%rcx),%r15 170 xorq %r15,%r13 171 decq %rcx 172 decq %r8 173 jne .Lhash_ad_tail_loop 174 175 addq %r13,%r10 176 adcq %r14,%r11 177 adcq $1,%r12 178 movq 0+0+0(%rbp),%rax 179 movq %rax,%r15 180 mulq %r10 181 movq %rax,%r13 182 movq %rdx,%r14 183 movq 0+0+0(%rbp),%rax 184 mulq %r11 185 imulq %r12,%r15 186 addq %rax,%r14 187 adcq %rdx,%r15 188 movq 8+0+0(%rbp),%rax 189 movq %rax,%r9 190 mulq %r10 191 addq %rax,%r14 192 adcq $0,%rdx 193 movq %rdx,%r10 194 movq 8+0+0(%rbp),%rax 195 mulq %r11 196 addq %rax,%r15 197 adcq $0,%rdx 198 imulq %r12,%r9 199 addq %r10,%r15 200 adcq %rdx,%r9 201 movq %r13,%r10 202 movq %r14,%r11 203 movq %r15,%r12 204 andq $3,%r12 205 movq %r15,%r13 206 andq $-4,%r13 207 movq %r9,%r14 208 shrdq $2,%r9,%r15 209 shrq $2,%r9 210 addq %r13,%r15 211 adcq %r14,%r9 212 addq %r15,%r10 213 adcq %r9,%r11 214 adcq $0,%r12 215 216 217.Lhash_ad_done: 218 .byte 0xf3,0xc3 219.cfi_endproc 220.size poly_hash_ad_internal, .-poly_hash_ad_internal 221 222.globl GFp_chacha20_poly1305_open 223.hidden GFp_chacha20_poly1305_open 224.type GFp_chacha20_poly1305_open,@function 225.align 64 226GFp_chacha20_poly1305_open: 227.cfi_startproc 228 pushq %rbp 229.cfi_adjust_cfa_offset 8 230.cfi_offset %rbp,-16 231 pushq %rbx 232.cfi_adjust_cfa_offset 8 233.cfi_offset %rbx,-24 234 pushq %r12 235.cfi_adjust_cfa_offset 8 236.cfi_offset %r12,-32 237 pushq %r13 238.cfi_adjust_cfa_offset 8 239.cfi_offset %r13,-40 240 pushq %r14 241.cfi_adjust_cfa_offset 8 242.cfi_offset %r14,-48 243 pushq %r15 244.cfi_adjust_cfa_offset 8 245.cfi_offset %r15,-56 246 247 248 pushq %r9 249.cfi_adjust_cfa_offset 8 250.cfi_offset %r9,-64 251 subq $288 + 0 + 32,%rsp 252.cfi_adjust_cfa_offset 288 + 32 253 254 leaq 32(%rsp),%rbp 255 andq $-32,%rbp 256 257 movq %rdx,%rbx 258 movq %r8,0+0+32(%rbp) 259 movq %rbx,8+0+32(%rbp) 260 261 movl GFp_ia32cap_P+8(%rip),%eax 262 andl $288,%eax 263 xorl $288,%eax 264 jz chacha20_poly1305_open_avx2 265 266 cmpq $128,%rbx 267 jbe .Lopen_sse_128 268 269 movdqa .Lchacha20_consts(%rip),%xmm0 270 movdqu 0(%r9),%xmm4 271 movdqu 16(%r9),%xmm8 272 movdqu 32(%r9),%xmm12 273 274 movdqa %xmm12,%xmm7 275 276 movdqa %xmm4,0+48(%rbp) 277 movdqa %xmm8,0+64(%rbp) 278 movdqa %xmm12,0+96(%rbp) 279 movq $10,%r10 280.Lopen_sse_init_rounds: 281 paddd %xmm4,%xmm0 282 pxor %xmm0,%xmm12 283 pshufb .Lrol16(%rip),%xmm12 284 paddd %xmm12,%xmm8 285 pxor %xmm8,%xmm4 286 movdqa %xmm4,%xmm3 287 pslld $12,%xmm3 288 psrld $20,%xmm4 289 pxor %xmm3,%xmm4 290 paddd %xmm4,%xmm0 291 pxor %xmm0,%xmm12 292 pshufb .Lrol8(%rip),%xmm12 293 paddd %xmm12,%xmm8 294 pxor %xmm8,%xmm4 295 movdqa %xmm4,%xmm3 296 pslld $7,%xmm3 297 psrld $25,%xmm4 298 pxor %xmm3,%xmm4 299.byte 102,15,58,15,228,4 300.byte 102,69,15,58,15,192,8 301.byte 102,69,15,58,15,228,12 302 paddd %xmm4,%xmm0 303 pxor %xmm0,%xmm12 304 pshufb .Lrol16(%rip),%xmm12 305 paddd %xmm12,%xmm8 306 pxor %xmm8,%xmm4 307 movdqa %xmm4,%xmm3 308 pslld $12,%xmm3 309 psrld $20,%xmm4 310 pxor %xmm3,%xmm4 311 paddd %xmm4,%xmm0 312 pxor %xmm0,%xmm12 313 pshufb .Lrol8(%rip),%xmm12 314 paddd %xmm12,%xmm8 315 pxor %xmm8,%xmm4 316 movdqa %xmm4,%xmm3 317 pslld $7,%xmm3 318 psrld $25,%xmm4 319 pxor %xmm3,%xmm4 320.byte 102,15,58,15,228,12 321.byte 102,69,15,58,15,192,8 322.byte 102,69,15,58,15,228,4 323 324 decq %r10 325 jne .Lopen_sse_init_rounds 326 327 paddd .Lchacha20_consts(%rip),%xmm0 328 paddd 0+48(%rbp),%xmm4 329 330 pand .Lclamp(%rip),%xmm0 331 movdqa %xmm0,0+0(%rbp) 332 movdqa %xmm4,0+16(%rbp) 333 334 movq %r8,%r8 335 call poly_hash_ad_internal 336.Lopen_sse_main_loop: 337 cmpq $256,%rbx 338 jb .Lopen_sse_tail 339 340 movdqa .Lchacha20_consts(%rip),%xmm0 341 movdqa 0+48(%rbp),%xmm4 342 movdqa 0+64(%rbp),%xmm8 343 movdqa %xmm0,%xmm1 344 movdqa %xmm4,%xmm5 345 movdqa %xmm8,%xmm9 346 movdqa %xmm0,%xmm2 347 movdqa %xmm4,%xmm6 348 movdqa %xmm8,%xmm10 349 movdqa %xmm0,%xmm3 350 movdqa %xmm4,%xmm7 351 movdqa %xmm8,%xmm11 352 movdqa 0+96(%rbp),%xmm15 353 paddd .Lsse_inc(%rip),%xmm15 354 movdqa %xmm15,%xmm14 355 paddd .Lsse_inc(%rip),%xmm14 356 movdqa %xmm14,%xmm13 357 paddd .Lsse_inc(%rip),%xmm13 358 movdqa %xmm13,%xmm12 359 paddd .Lsse_inc(%rip),%xmm12 360 movdqa %xmm12,0+96(%rbp) 361 movdqa %xmm13,0+112(%rbp) 362 movdqa %xmm14,0+128(%rbp) 363 movdqa %xmm15,0+144(%rbp) 364 365 366 367 movq $4,%rcx 368 movq %rsi,%r8 369.Lopen_sse_main_loop_rounds: 370 movdqa %xmm8,0+80(%rbp) 371 movdqa .Lrol16(%rip),%xmm8 372 paddd %xmm7,%xmm3 373 paddd %xmm6,%xmm2 374 paddd %xmm5,%xmm1 375 paddd %xmm4,%xmm0 376 pxor %xmm3,%xmm15 377 pxor %xmm2,%xmm14 378 pxor %xmm1,%xmm13 379 pxor %xmm0,%xmm12 380.byte 102,69,15,56,0,248 381.byte 102,69,15,56,0,240 382.byte 102,69,15,56,0,232 383.byte 102,69,15,56,0,224 384 movdqa 0+80(%rbp),%xmm8 385 paddd %xmm15,%xmm11 386 paddd %xmm14,%xmm10 387 paddd %xmm13,%xmm9 388 paddd %xmm12,%xmm8 389 pxor %xmm11,%xmm7 390 addq 0+0(%r8),%r10 391 adcq 8+0(%r8),%r11 392 adcq $1,%r12 393 394 leaq 16(%r8),%r8 395 pxor %xmm10,%xmm6 396 pxor %xmm9,%xmm5 397 pxor %xmm8,%xmm4 398 movdqa %xmm8,0+80(%rbp) 399 movdqa %xmm7,%xmm8 400 psrld $20,%xmm8 401 pslld $32-20,%xmm7 402 pxor %xmm8,%xmm7 403 movdqa %xmm6,%xmm8 404 psrld $20,%xmm8 405 pslld $32-20,%xmm6 406 pxor %xmm8,%xmm6 407 movdqa %xmm5,%xmm8 408 psrld $20,%xmm8 409 pslld $32-20,%xmm5 410 pxor %xmm8,%xmm5 411 movdqa %xmm4,%xmm8 412 psrld $20,%xmm8 413 pslld $32-20,%xmm4 414 pxor %xmm8,%xmm4 415 movq 0+0+0(%rbp),%rax 416 movq %rax,%r15 417 mulq %r10 418 movq %rax,%r13 419 movq %rdx,%r14 420 movq 0+0+0(%rbp),%rax 421 mulq %r11 422 imulq %r12,%r15 423 addq %rax,%r14 424 adcq %rdx,%r15 425 movdqa .Lrol8(%rip),%xmm8 426 paddd %xmm7,%xmm3 427 paddd %xmm6,%xmm2 428 paddd %xmm5,%xmm1 429 paddd %xmm4,%xmm0 430 pxor %xmm3,%xmm15 431 pxor %xmm2,%xmm14 432 pxor %xmm1,%xmm13 433 pxor %xmm0,%xmm12 434.byte 102,69,15,56,0,248 435.byte 102,69,15,56,0,240 436.byte 102,69,15,56,0,232 437.byte 102,69,15,56,0,224 438 movdqa 0+80(%rbp),%xmm8 439 paddd %xmm15,%xmm11 440 paddd %xmm14,%xmm10 441 paddd %xmm13,%xmm9 442 paddd %xmm12,%xmm8 443 pxor %xmm11,%xmm7 444 pxor %xmm10,%xmm6 445 movq 8+0+0(%rbp),%rax 446 movq %rax,%r9 447 mulq %r10 448 addq %rax,%r14 449 adcq $0,%rdx 450 movq %rdx,%r10 451 movq 8+0+0(%rbp),%rax 452 mulq %r11 453 addq %rax,%r15 454 adcq $0,%rdx 455 pxor %xmm9,%xmm5 456 pxor %xmm8,%xmm4 457 movdqa %xmm8,0+80(%rbp) 458 movdqa %xmm7,%xmm8 459 psrld $25,%xmm8 460 pslld $32-25,%xmm7 461 pxor %xmm8,%xmm7 462 movdqa %xmm6,%xmm8 463 psrld $25,%xmm8 464 pslld $32-25,%xmm6 465 pxor %xmm8,%xmm6 466 movdqa %xmm5,%xmm8 467 psrld $25,%xmm8 468 pslld $32-25,%xmm5 469 pxor %xmm8,%xmm5 470 movdqa %xmm4,%xmm8 471 psrld $25,%xmm8 472 pslld $32-25,%xmm4 473 pxor %xmm8,%xmm4 474 movdqa 0+80(%rbp),%xmm8 475 imulq %r12,%r9 476 addq %r10,%r15 477 adcq %rdx,%r9 478.byte 102,15,58,15,255,4 479.byte 102,69,15,58,15,219,8 480.byte 102,69,15,58,15,255,12 481.byte 102,15,58,15,246,4 482.byte 102,69,15,58,15,210,8 483.byte 102,69,15,58,15,246,12 484.byte 102,15,58,15,237,4 485.byte 102,69,15,58,15,201,8 486.byte 102,69,15,58,15,237,12 487.byte 102,15,58,15,228,4 488.byte 102,69,15,58,15,192,8 489.byte 102,69,15,58,15,228,12 490 movdqa %xmm8,0+80(%rbp) 491 movdqa .Lrol16(%rip),%xmm8 492 paddd %xmm7,%xmm3 493 paddd %xmm6,%xmm2 494 paddd %xmm5,%xmm1 495 paddd %xmm4,%xmm0 496 pxor %xmm3,%xmm15 497 pxor %xmm2,%xmm14 498 movq %r13,%r10 499 movq %r14,%r11 500 movq %r15,%r12 501 andq $3,%r12 502 movq %r15,%r13 503 andq $-4,%r13 504 movq %r9,%r14 505 shrdq $2,%r9,%r15 506 shrq $2,%r9 507 addq %r13,%r15 508 adcq %r14,%r9 509 addq %r15,%r10 510 adcq %r9,%r11 511 adcq $0,%r12 512 pxor %xmm1,%xmm13 513 pxor %xmm0,%xmm12 514.byte 102,69,15,56,0,248 515.byte 102,69,15,56,0,240 516.byte 102,69,15,56,0,232 517.byte 102,69,15,56,0,224 518 movdqa 0+80(%rbp),%xmm8 519 paddd %xmm15,%xmm11 520 paddd %xmm14,%xmm10 521 paddd %xmm13,%xmm9 522 paddd %xmm12,%xmm8 523 pxor %xmm11,%xmm7 524 pxor %xmm10,%xmm6 525 pxor %xmm9,%xmm5 526 pxor %xmm8,%xmm4 527 movdqa %xmm8,0+80(%rbp) 528 movdqa %xmm7,%xmm8 529 psrld $20,%xmm8 530 pslld $32-20,%xmm7 531 pxor %xmm8,%xmm7 532 movdqa %xmm6,%xmm8 533 psrld $20,%xmm8 534 pslld $32-20,%xmm6 535 pxor %xmm8,%xmm6 536 movdqa %xmm5,%xmm8 537 psrld $20,%xmm8 538 pslld $32-20,%xmm5 539 pxor %xmm8,%xmm5 540 movdqa %xmm4,%xmm8 541 psrld $20,%xmm8 542 pslld $32-20,%xmm4 543 pxor %xmm8,%xmm4 544 movdqa .Lrol8(%rip),%xmm8 545 paddd %xmm7,%xmm3 546 paddd %xmm6,%xmm2 547 paddd %xmm5,%xmm1 548 paddd %xmm4,%xmm0 549 pxor %xmm3,%xmm15 550 pxor %xmm2,%xmm14 551 pxor %xmm1,%xmm13 552 pxor %xmm0,%xmm12 553.byte 102,69,15,56,0,248 554.byte 102,69,15,56,0,240 555.byte 102,69,15,56,0,232 556.byte 102,69,15,56,0,224 557 movdqa 0+80(%rbp),%xmm8 558 paddd %xmm15,%xmm11 559 paddd %xmm14,%xmm10 560 paddd %xmm13,%xmm9 561 paddd %xmm12,%xmm8 562 pxor %xmm11,%xmm7 563 pxor %xmm10,%xmm6 564 pxor %xmm9,%xmm5 565 pxor %xmm8,%xmm4 566 movdqa %xmm8,0+80(%rbp) 567 movdqa %xmm7,%xmm8 568 psrld $25,%xmm8 569 pslld $32-25,%xmm7 570 pxor %xmm8,%xmm7 571 movdqa %xmm6,%xmm8 572 psrld $25,%xmm8 573 pslld $32-25,%xmm6 574 pxor %xmm8,%xmm6 575 movdqa %xmm5,%xmm8 576 psrld $25,%xmm8 577 pslld $32-25,%xmm5 578 pxor %xmm8,%xmm5 579 movdqa %xmm4,%xmm8 580 psrld $25,%xmm8 581 pslld $32-25,%xmm4 582 pxor %xmm8,%xmm4 583 movdqa 0+80(%rbp),%xmm8 584.byte 102,15,58,15,255,12 585.byte 102,69,15,58,15,219,8 586.byte 102,69,15,58,15,255,4 587.byte 102,15,58,15,246,12 588.byte 102,69,15,58,15,210,8 589.byte 102,69,15,58,15,246,4 590.byte 102,15,58,15,237,12 591.byte 102,69,15,58,15,201,8 592.byte 102,69,15,58,15,237,4 593.byte 102,15,58,15,228,12 594.byte 102,69,15,58,15,192,8 595.byte 102,69,15,58,15,228,4 596 597 decq %rcx 598 jge .Lopen_sse_main_loop_rounds 599 addq 0+0(%r8),%r10 600 adcq 8+0(%r8),%r11 601 adcq $1,%r12 602 movq 0+0+0(%rbp),%rax 603 movq %rax,%r15 604 mulq %r10 605 movq %rax,%r13 606 movq %rdx,%r14 607 movq 0+0+0(%rbp),%rax 608 mulq %r11 609 imulq %r12,%r15 610 addq %rax,%r14 611 adcq %rdx,%r15 612 movq 8+0+0(%rbp),%rax 613 movq %rax,%r9 614 mulq %r10 615 addq %rax,%r14 616 adcq $0,%rdx 617 movq %rdx,%r10 618 movq 8+0+0(%rbp),%rax 619 mulq %r11 620 addq %rax,%r15 621 adcq $0,%rdx 622 imulq %r12,%r9 623 addq %r10,%r15 624 adcq %rdx,%r9 625 movq %r13,%r10 626 movq %r14,%r11 627 movq %r15,%r12 628 andq $3,%r12 629 movq %r15,%r13 630 andq $-4,%r13 631 movq %r9,%r14 632 shrdq $2,%r9,%r15 633 shrq $2,%r9 634 addq %r13,%r15 635 adcq %r14,%r9 636 addq %r15,%r10 637 adcq %r9,%r11 638 adcq $0,%r12 639 640 leaq 16(%r8),%r8 641 cmpq $-6,%rcx 642 jg .Lopen_sse_main_loop_rounds 643 paddd .Lchacha20_consts(%rip),%xmm3 644 paddd 0+48(%rbp),%xmm7 645 paddd 0+64(%rbp),%xmm11 646 paddd 0+144(%rbp),%xmm15 647 paddd .Lchacha20_consts(%rip),%xmm2 648 paddd 0+48(%rbp),%xmm6 649 paddd 0+64(%rbp),%xmm10 650 paddd 0+128(%rbp),%xmm14 651 paddd .Lchacha20_consts(%rip),%xmm1 652 paddd 0+48(%rbp),%xmm5 653 paddd 0+64(%rbp),%xmm9 654 paddd 0+112(%rbp),%xmm13 655 paddd .Lchacha20_consts(%rip),%xmm0 656 paddd 0+48(%rbp),%xmm4 657 paddd 0+64(%rbp),%xmm8 658 paddd 0+96(%rbp),%xmm12 659 movdqa %xmm12,0+80(%rbp) 660 movdqu 0 + 0(%rsi),%xmm12 661 pxor %xmm3,%xmm12 662 movdqu %xmm12,0 + 0(%rdi) 663 movdqu 16 + 0(%rsi),%xmm12 664 pxor %xmm7,%xmm12 665 movdqu %xmm12,16 + 0(%rdi) 666 movdqu 32 + 0(%rsi),%xmm12 667 pxor %xmm11,%xmm12 668 movdqu %xmm12,32 + 0(%rdi) 669 movdqu 48 + 0(%rsi),%xmm12 670 pxor %xmm15,%xmm12 671 movdqu %xmm12,48 + 0(%rdi) 672 movdqu 0 + 64(%rsi),%xmm3 673 movdqu 16 + 64(%rsi),%xmm7 674 movdqu 32 + 64(%rsi),%xmm11 675 movdqu 48 + 64(%rsi),%xmm15 676 pxor %xmm3,%xmm2 677 pxor %xmm7,%xmm6 678 pxor %xmm11,%xmm10 679 pxor %xmm14,%xmm15 680 movdqu %xmm2,0 + 64(%rdi) 681 movdqu %xmm6,16 + 64(%rdi) 682 movdqu %xmm10,32 + 64(%rdi) 683 movdqu %xmm15,48 + 64(%rdi) 684 movdqu 0 + 128(%rsi),%xmm3 685 movdqu 16 + 128(%rsi),%xmm7 686 movdqu 32 + 128(%rsi),%xmm11 687 movdqu 48 + 128(%rsi),%xmm15 688 pxor %xmm3,%xmm1 689 pxor %xmm7,%xmm5 690 pxor %xmm11,%xmm9 691 pxor %xmm13,%xmm15 692 movdqu %xmm1,0 + 128(%rdi) 693 movdqu %xmm5,16 + 128(%rdi) 694 movdqu %xmm9,32 + 128(%rdi) 695 movdqu %xmm15,48 + 128(%rdi) 696 movdqu 0 + 192(%rsi),%xmm3 697 movdqu 16 + 192(%rsi),%xmm7 698 movdqu 32 + 192(%rsi),%xmm11 699 movdqu 48 + 192(%rsi),%xmm15 700 pxor %xmm3,%xmm0 701 pxor %xmm7,%xmm4 702 pxor %xmm11,%xmm8 703 pxor 0+80(%rbp),%xmm15 704 movdqu %xmm0,0 + 192(%rdi) 705 movdqu %xmm4,16 + 192(%rdi) 706 movdqu %xmm8,32 + 192(%rdi) 707 movdqu %xmm15,48 + 192(%rdi) 708 709 leaq 256(%rsi),%rsi 710 leaq 256(%rdi),%rdi 711 subq $256,%rbx 712 jmp .Lopen_sse_main_loop 713.Lopen_sse_tail: 714 715 testq %rbx,%rbx 716 jz .Lopen_sse_finalize 717 cmpq $192,%rbx 718 ja .Lopen_sse_tail_256 719 cmpq $128,%rbx 720 ja .Lopen_sse_tail_192 721 cmpq $64,%rbx 722 ja .Lopen_sse_tail_128 723 movdqa .Lchacha20_consts(%rip),%xmm0 724 movdqa 0+48(%rbp),%xmm4 725 movdqa 0+64(%rbp),%xmm8 726 movdqa 0+96(%rbp),%xmm12 727 paddd .Lsse_inc(%rip),%xmm12 728 movdqa %xmm12,0+96(%rbp) 729 730 xorq %r8,%r8 731 movq %rbx,%rcx 732 cmpq $16,%rcx 733 jb .Lopen_sse_tail_64_rounds 734.Lopen_sse_tail_64_rounds_and_x1hash: 735 addq 0+0(%rsi,%r8,1),%r10 736 adcq 8+0(%rsi,%r8,1),%r11 737 adcq $1,%r12 738 movq 0+0+0(%rbp),%rax 739 movq %rax,%r15 740 mulq %r10 741 movq %rax,%r13 742 movq %rdx,%r14 743 movq 0+0+0(%rbp),%rax 744 mulq %r11 745 imulq %r12,%r15 746 addq %rax,%r14 747 adcq %rdx,%r15 748 movq 8+0+0(%rbp),%rax 749 movq %rax,%r9 750 mulq %r10 751 addq %rax,%r14 752 adcq $0,%rdx 753 movq %rdx,%r10 754 movq 8+0+0(%rbp),%rax 755 mulq %r11 756 addq %rax,%r15 757 adcq $0,%rdx 758 imulq %r12,%r9 759 addq %r10,%r15 760 adcq %rdx,%r9 761 movq %r13,%r10 762 movq %r14,%r11 763 movq %r15,%r12 764 andq $3,%r12 765 movq %r15,%r13 766 andq $-4,%r13 767 movq %r9,%r14 768 shrdq $2,%r9,%r15 769 shrq $2,%r9 770 addq %r13,%r15 771 adcq %r14,%r9 772 addq %r15,%r10 773 adcq %r9,%r11 774 adcq $0,%r12 775 776 subq $16,%rcx 777.Lopen_sse_tail_64_rounds: 778 addq $16,%r8 779 paddd %xmm4,%xmm0 780 pxor %xmm0,%xmm12 781 pshufb .Lrol16(%rip),%xmm12 782 paddd %xmm12,%xmm8 783 pxor %xmm8,%xmm4 784 movdqa %xmm4,%xmm3 785 pslld $12,%xmm3 786 psrld $20,%xmm4 787 pxor %xmm3,%xmm4 788 paddd %xmm4,%xmm0 789 pxor %xmm0,%xmm12 790 pshufb .Lrol8(%rip),%xmm12 791 paddd %xmm12,%xmm8 792 pxor %xmm8,%xmm4 793 movdqa %xmm4,%xmm3 794 pslld $7,%xmm3 795 psrld $25,%xmm4 796 pxor %xmm3,%xmm4 797.byte 102,15,58,15,228,4 798.byte 102,69,15,58,15,192,8 799.byte 102,69,15,58,15,228,12 800 paddd %xmm4,%xmm0 801 pxor %xmm0,%xmm12 802 pshufb .Lrol16(%rip),%xmm12 803 paddd %xmm12,%xmm8 804 pxor %xmm8,%xmm4 805 movdqa %xmm4,%xmm3 806 pslld $12,%xmm3 807 psrld $20,%xmm4 808 pxor %xmm3,%xmm4 809 paddd %xmm4,%xmm0 810 pxor %xmm0,%xmm12 811 pshufb .Lrol8(%rip),%xmm12 812 paddd %xmm12,%xmm8 813 pxor %xmm8,%xmm4 814 movdqa %xmm4,%xmm3 815 pslld $7,%xmm3 816 psrld $25,%xmm4 817 pxor %xmm3,%xmm4 818.byte 102,15,58,15,228,12 819.byte 102,69,15,58,15,192,8 820.byte 102,69,15,58,15,228,4 821 822 cmpq $16,%rcx 823 jae .Lopen_sse_tail_64_rounds_and_x1hash 824 cmpq $160,%r8 825 jne .Lopen_sse_tail_64_rounds 826 paddd .Lchacha20_consts(%rip),%xmm0 827 paddd 0+48(%rbp),%xmm4 828 paddd 0+64(%rbp),%xmm8 829 paddd 0+96(%rbp),%xmm12 830 831 jmp .Lopen_sse_tail_64_dec_loop 832 833.Lopen_sse_tail_128: 834 movdqa .Lchacha20_consts(%rip),%xmm0 835 movdqa 0+48(%rbp),%xmm4 836 movdqa 0+64(%rbp),%xmm8 837 movdqa %xmm0,%xmm1 838 movdqa %xmm4,%xmm5 839 movdqa %xmm8,%xmm9 840 movdqa 0+96(%rbp),%xmm13 841 paddd .Lsse_inc(%rip),%xmm13 842 movdqa %xmm13,%xmm12 843 paddd .Lsse_inc(%rip),%xmm12 844 movdqa %xmm12,0+96(%rbp) 845 movdqa %xmm13,0+112(%rbp) 846 847 movq %rbx,%rcx 848 andq $-16,%rcx 849 xorq %r8,%r8 850.Lopen_sse_tail_128_rounds_and_x1hash: 851 addq 0+0(%rsi,%r8,1),%r10 852 adcq 8+0(%rsi,%r8,1),%r11 853 adcq $1,%r12 854 movq 0+0+0(%rbp),%rax 855 movq %rax,%r15 856 mulq %r10 857 movq %rax,%r13 858 movq %rdx,%r14 859 movq 0+0+0(%rbp),%rax 860 mulq %r11 861 imulq %r12,%r15 862 addq %rax,%r14 863 adcq %rdx,%r15 864 movq 8+0+0(%rbp),%rax 865 movq %rax,%r9 866 mulq %r10 867 addq %rax,%r14 868 adcq $0,%rdx 869 movq %rdx,%r10 870 movq 8+0+0(%rbp),%rax 871 mulq %r11 872 addq %rax,%r15 873 adcq $0,%rdx 874 imulq %r12,%r9 875 addq %r10,%r15 876 adcq %rdx,%r9 877 movq %r13,%r10 878 movq %r14,%r11 879 movq %r15,%r12 880 andq $3,%r12 881 movq %r15,%r13 882 andq $-4,%r13 883 movq %r9,%r14 884 shrdq $2,%r9,%r15 885 shrq $2,%r9 886 addq %r13,%r15 887 adcq %r14,%r9 888 addq %r15,%r10 889 adcq %r9,%r11 890 adcq $0,%r12 891 892.Lopen_sse_tail_128_rounds: 893 addq $16,%r8 894 paddd %xmm4,%xmm0 895 pxor %xmm0,%xmm12 896 pshufb .Lrol16(%rip),%xmm12 897 paddd %xmm12,%xmm8 898 pxor %xmm8,%xmm4 899 movdqa %xmm4,%xmm3 900 pslld $12,%xmm3 901 psrld $20,%xmm4 902 pxor %xmm3,%xmm4 903 paddd %xmm4,%xmm0 904 pxor %xmm0,%xmm12 905 pshufb .Lrol8(%rip),%xmm12 906 paddd %xmm12,%xmm8 907 pxor %xmm8,%xmm4 908 movdqa %xmm4,%xmm3 909 pslld $7,%xmm3 910 psrld $25,%xmm4 911 pxor %xmm3,%xmm4 912.byte 102,15,58,15,228,4 913.byte 102,69,15,58,15,192,8 914.byte 102,69,15,58,15,228,12 915 paddd %xmm5,%xmm1 916 pxor %xmm1,%xmm13 917 pshufb .Lrol16(%rip),%xmm13 918 paddd %xmm13,%xmm9 919 pxor %xmm9,%xmm5 920 movdqa %xmm5,%xmm3 921 pslld $12,%xmm3 922 psrld $20,%xmm5 923 pxor %xmm3,%xmm5 924 paddd %xmm5,%xmm1 925 pxor %xmm1,%xmm13 926 pshufb .Lrol8(%rip),%xmm13 927 paddd %xmm13,%xmm9 928 pxor %xmm9,%xmm5 929 movdqa %xmm5,%xmm3 930 pslld $7,%xmm3 931 psrld $25,%xmm5 932 pxor %xmm3,%xmm5 933.byte 102,15,58,15,237,4 934.byte 102,69,15,58,15,201,8 935.byte 102,69,15,58,15,237,12 936 paddd %xmm4,%xmm0 937 pxor %xmm0,%xmm12 938 pshufb .Lrol16(%rip),%xmm12 939 paddd %xmm12,%xmm8 940 pxor %xmm8,%xmm4 941 movdqa %xmm4,%xmm3 942 pslld $12,%xmm3 943 psrld $20,%xmm4 944 pxor %xmm3,%xmm4 945 paddd %xmm4,%xmm0 946 pxor %xmm0,%xmm12 947 pshufb .Lrol8(%rip),%xmm12 948 paddd %xmm12,%xmm8 949 pxor %xmm8,%xmm4 950 movdqa %xmm4,%xmm3 951 pslld $7,%xmm3 952 psrld $25,%xmm4 953 pxor %xmm3,%xmm4 954.byte 102,15,58,15,228,12 955.byte 102,69,15,58,15,192,8 956.byte 102,69,15,58,15,228,4 957 paddd %xmm5,%xmm1 958 pxor %xmm1,%xmm13 959 pshufb .Lrol16(%rip),%xmm13 960 paddd %xmm13,%xmm9 961 pxor %xmm9,%xmm5 962 movdqa %xmm5,%xmm3 963 pslld $12,%xmm3 964 psrld $20,%xmm5 965 pxor %xmm3,%xmm5 966 paddd %xmm5,%xmm1 967 pxor %xmm1,%xmm13 968 pshufb .Lrol8(%rip),%xmm13 969 paddd %xmm13,%xmm9 970 pxor %xmm9,%xmm5 971 movdqa %xmm5,%xmm3 972 pslld $7,%xmm3 973 psrld $25,%xmm5 974 pxor %xmm3,%xmm5 975.byte 102,15,58,15,237,12 976.byte 102,69,15,58,15,201,8 977.byte 102,69,15,58,15,237,4 978 979 cmpq %rcx,%r8 980 jb .Lopen_sse_tail_128_rounds_and_x1hash 981 cmpq $160,%r8 982 jne .Lopen_sse_tail_128_rounds 983 paddd .Lchacha20_consts(%rip),%xmm1 984 paddd 0+48(%rbp),%xmm5 985 paddd 0+64(%rbp),%xmm9 986 paddd 0+112(%rbp),%xmm13 987 paddd .Lchacha20_consts(%rip),%xmm0 988 paddd 0+48(%rbp),%xmm4 989 paddd 0+64(%rbp),%xmm8 990 paddd 0+96(%rbp),%xmm12 991 movdqu 0 + 0(%rsi),%xmm3 992 movdqu 16 + 0(%rsi),%xmm7 993 movdqu 32 + 0(%rsi),%xmm11 994 movdqu 48 + 0(%rsi),%xmm15 995 pxor %xmm3,%xmm1 996 pxor %xmm7,%xmm5 997 pxor %xmm11,%xmm9 998 pxor %xmm13,%xmm15 999 movdqu %xmm1,0 + 0(%rdi) 1000 movdqu %xmm5,16 + 0(%rdi) 1001 movdqu %xmm9,32 + 0(%rdi) 1002 movdqu %xmm15,48 + 0(%rdi) 1003 1004 subq $64,%rbx 1005 leaq 64(%rsi),%rsi 1006 leaq 64(%rdi),%rdi 1007 jmp .Lopen_sse_tail_64_dec_loop 1008 1009.Lopen_sse_tail_192: 1010 movdqa .Lchacha20_consts(%rip),%xmm0 1011 movdqa 0+48(%rbp),%xmm4 1012 movdqa 0+64(%rbp),%xmm8 1013 movdqa %xmm0,%xmm1 1014 movdqa %xmm4,%xmm5 1015 movdqa %xmm8,%xmm9 1016 movdqa %xmm0,%xmm2 1017 movdqa %xmm4,%xmm6 1018 movdqa %xmm8,%xmm10 1019 movdqa 0+96(%rbp),%xmm14 1020 paddd .Lsse_inc(%rip),%xmm14 1021 movdqa %xmm14,%xmm13 1022 paddd .Lsse_inc(%rip),%xmm13 1023 movdqa %xmm13,%xmm12 1024 paddd .Lsse_inc(%rip),%xmm12 1025 movdqa %xmm12,0+96(%rbp) 1026 movdqa %xmm13,0+112(%rbp) 1027 movdqa %xmm14,0+128(%rbp) 1028 1029 movq %rbx,%rcx 1030 movq $160,%r8 1031 cmpq $160,%rcx 1032 cmovgq %r8,%rcx 1033 andq $-16,%rcx 1034 xorq %r8,%r8 1035.Lopen_sse_tail_192_rounds_and_x1hash: 1036 addq 0+0(%rsi,%r8,1),%r10 1037 adcq 8+0(%rsi,%r8,1),%r11 1038 adcq $1,%r12 1039 movq 0+0+0(%rbp),%rax 1040 movq %rax,%r15 1041 mulq %r10 1042 movq %rax,%r13 1043 movq %rdx,%r14 1044 movq 0+0+0(%rbp),%rax 1045 mulq %r11 1046 imulq %r12,%r15 1047 addq %rax,%r14 1048 adcq %rdx,%r15 1049 movq 8+0+0(%rbp),%rax 1050 movq %rax,%r9 1051 mulq %r10 1052 addq %rax,%r14 1053 adcq $0,%rdx 1054 movq %rdx,%r10 1055 movq 8+0+0(%rbp),%rax 1056 mulq %r11 1057 addq %rax,%r15 1058 adcq $0,%rdx 1059 imulq %r12,%r9 1060 addq %r10,%r15 1061 adcq %rdx,%r9 1062 movq %r13,%r10 1063 movq %r14,%r11 1064 movq %r15,%r12 1065 andq $3,%r12 1066 movq %r15,%r13 1067 andq $-4,%r13 1068 movq %r9,%r14 1069 shrdq $2,%r9,%r15 1070 shrq $2,%r9 1071 addq %r13,%r15 1072 adcq %r14,%r9 1073 addq %r15,%r10 1074 adcq %r9,%r11 1075 adcq $0,%r12 1076 1077.Lopen_sse_tail_192_rounds: 1078 addq $16,%r8 1079 paddd %xmm4,%xmm0 1080 pxor %xmm0,%xmm12 1081 pshufb .Lrol16(%rip),%xmm12 1082 paddd %xmm12,%xmm8 1083 pxor %xmm8,%xmm4 1084 movdqa %xmm4,%xmm3 1085 pslld $12,%xmm3 1086 psrld $20,%xmm4 1087 pxor %xmm3,%xmm4 1088 paddd %xmm4,%xmm0 1089 pxor %xmm0,%xmm12 1090 pshufb .Lrol8(%rip),%xmm12 1091 paddd %xmm12,%xmm8 1092 pxor %xmm8,%xmm4 1093 movdqa %xmm4,%xmm3 1094 pslld $7,%xmm3 1095 psrld $25,%xmm4 1096 pxor %xmm3,%xmm4 1097.byte 102,15,58,15,228,4 1098.byte 102,69,15,58,15,192,8 1099.byte 102,69,15,58,15,228,12 1100 paddd %xmm5,%xmm1 1101 pxor %xmm1,%xmm13 1102 pshufb .Lrol16(%rip),%xmm13 1103 paddd %xmm13,%xmm9 1104 pxor %xmm9,%xmm5 1105 movdqa %xmm5,%xmm3 1106 pslld $12,%xmm3 1107 psrld $20,%xmm5 1108 pxor %xmm3,%xmm5 1109 paddd %xmm5,%xmm1 1110 pxor %xmm1,%xmm13 1111 pshufb .Lrol8(%rip),%xmm13 1112 paddd %xmm13,%xmm9 1113 pxor %xmm9,%xmm5 1114 movdqa %xmm5,%xmm3 1115 pslld $7,%xmm3 1116 psrld $25,%xmm5 1117 pxor %xmm3,%xmm5 1118.byte 102,15,58,15,237,4 1119.byte 102,69,15,58,15,201,8 1120.byte 102,69,15,58,15,237,12 1121 paddd %xmm6,%xmm2 1122 pxor %xmm2,%xmm14 1123 pshufb .Lrol16(%rip),%xmm14 1124 paddd %xmm14,%xmm10 1125 pxor %xmm10,%xmm6 1126 movdqa %xmm6,%xmm3 1127 pslld $12,%xmm3 1128 psrld $20,%xmm6 1129 pxor %xmm3,%xmm6 1130 paddd %xmm6,%xmm2 1131 pxor %xmm2,%xmm14 1132 pshufb .Lrol8(%rip),%xmm14 1133 paddd %xmm14,%xmm10 1134 pxor %xmm10,%xmm6 1135 movdqa %xmm6,%xmm3 1136 pslld $7,%xmm3 1137 psrld $25,%xmm6 1138 pxor %xmm3,%xmm6 1139.byte 102,15,58,15,246,4 1140.byte 102,69,15,58,15,210,8 1141.byte 102,69,15,58,15,246,12 1142 paddd %xmm4,%xmm0 1143 pxor %xmm0,%xmm12 1144 pshufb .Lrol16(%rip),%xmm12 1145 paddd %xmm12,%xmm8 1146 pxor %xmm8,%xmm4 1147 movdqa %xmm4,%xmm3 1148 pslld $12,%xmm3 1149 psrld $20,%xmm4 1150 pxor %xmm3,%xmm4 1151 paddd %xmm4,%xmm0 1152 pxor %xmm0,%xmm12 1153 pshufb .Lrol8(%rip),%xmm12 1154 paddd %xmm12,%xmm8 1155 pxor %xmm8,%xmm4 1156 movdqa %xmm4,%xmm3 1157 pslld $7,%xmm3 1158 psrld $25,%xmm4 1159 pxor %xmm3,%xmm4 1160.byte 102,15,58,15,228,12 1161.byte 102,69,15,58,15,192,8 1162.byte 102,69,15,58,15,228,4 1163 paddd %xmm5,%xmm1 1164 pxor %xmm1,%xmm13 1165 pshufb .Lrol16(%rip),%xmm13 1166 paddd %xmm13,%xmm9 1167 pxor %xmm9,%xmm5 1168 movdqa %xmm5,%xmm3 1169 pslld $12,%xmm3 1170 psrld $20,%xmm5 1171 pxor %xmm3,%xmm5 1172 paddd %xmm5,%xmm1 1173 pxor %xmm1,%xmm13 1174 pshufb .Lrol8(%rip),%xmm13 1175 paddd %xmm13,%xmm9 1176 pxor %xmm9,%xmm5 1177 movdqa %xmm5,%xmm3 1178 pslld $7,%xmm3 1179 psrld $25,%xmm5 1180 pxor %xmm3,%xmm5 1181.byte 102,15,58,15,237,12 1182.byte 102,69,15,58,15,201,8 1183.byte 102,69,15,58,15,237,4 1184 paddd %xmm6,%xmm2 1185 pxor %xmm2,%xmm14 1186 pshufb .Lrol16(%rip),%xmm14 1187 paddd %xmm14,%xmm10 1188 pxor %xmm10,%xmm6 1189 movdqa %xmm6,%xmm3 1190 pslld $12,%xmm3 1191 psrld $20,%xmm6 1192 pxor %xmm3,%xmm6 1193 paddd %xmm6,%xmm2 1194 pxor %xmm2,%xmm14 1195 pshufb .Lrol8(%rip),%xmm14 1196 paddd %xmm14,%xmm10 1197 pxor %xmm10,%xmm6 1198 movdqa %xmm6,%xmm3 1199 pslld $7,%xmm3 1200 psrld $25,%xmm6 1201 pxor %xmm3,%xmm6 1202.byte 102,15,58,15,246,12 1203.byte 102,69,15,58,15,210,8 1204.byte 102,69,15,58,15,246,4 1205 1206 cmpq %rcx,%r8 1207 jb .Lopen_sse_tail_192_rounds_and_x1hash 1208 cmpq $160,%r8 1209 jne .Lopen_sse_tail_192_rounds 1210 cmpq $176,%rbx 1211 jb .Lopen_sse_tail_192_finish 1212 addq 0+160(%rsi),%r10 1213 adcq 8+160(%rsi),%r11 1214 adcq $1,%r12 1215 movq 0+0+0(%rbp),%rax 1216 movq %rax,%r15 1217 mulq %r10 1218 movq %rax,%r13 1219 movq %rdx,%r14 1220 movq 0+0+0(%rbp),%rax 1221 mulq %r11 1222 imulq %r12,%r15 1223 addq %rax,%r14 1224 adcq %rdx,%r15 1225 movq 8+0+0(%rbp),%rax 1226 movq %rax,%r9 1227 mulq %r10 1228 addq %rax,%r14 1229 adcq $0,%rdx 1230 movq %rdx,%r10 1231 movq 8+0+0(%rbp),%rax 1232 mulq %r11 1233 addq %rax,%r15 1234 adcq $0,%rdx 1235 imulq %r12,%r9 1236 addq %r10,%r15 1237 adcq %rdx,%r9 1238 movq %r13,%r10 1239 movq %r14,%r11 1240 movq %r15,%r12 1241 andq $3,%r12 1242 movq %r15,%r13 1243 andq $-4,%r13 1244 movq %r9,%r14 1245 shrdq $2,%r9,%r15 1246 shrq $2,%r9 1247 addq %r13,%r15 1248 adcq %r14,%r9 1249 addq %r15,%r10 1250 adcq %r9,%r11 1251 adcq $0,%r12 1252 1253 cmpq $192,%rbx 1254 jb .Lopen_sse_tail_192_finish 1255 addq 0+176(%rsi),%r10 1256 adcq 8+176(%rsi),%r11 1257 adcq $1,%r12 1258 movq 0+0+0(%rbp),%rax 1259 movq %rax,%r15 1260 mulq %r10 1261 movq %rax,%r13 1262 movq %rdx,%r14 1263 movq 0+0+0(%rbp),%rax 1264 mulq %r11 1265 imulq %r12,%r15 1266 addq %rax,%r14 1267 adcq %rdx,%r15 1268 movq 8+0+0(%rbp),%rax 1269 movq %rax,%r9 1270 mulq %r10 1271 addq %rax,%r14 1272 adcq $0,%rdx 1273 movq %rdx,%r10 1274 movq 8+0+0(%rbp),%rax 1275 mulq %r11 1276 addq %rax,%r15 1277 adcq $0,%rdx 1278 imulq %r12,%r9 1279 addq %r10,%r15 1280 adcq %rdx,%r9 1281 movq %r13,%r10 1282 movq %r14,%r11 1283 movq %r15,%r12 1284 andq $3,%r12 1285 movq %r15,%r13 1286 andq $-4,%r13 1287 movq %r9,%r14 1288 shrdq $2,%r9,%r15 1289 shrq $2,%r9 1290 addq %r13,%r15 1291 adcq %r14,%r9 1292 addq %r15,%r10 1293 adcq %r9,%r11 1294 adcq $0,%r12 1295 1296.Lopen_sse_tail_192_finish: 1297 paddd .Lchacha20_consts(%rip),%xmm2 1298 paddd 0+48(%rbp),%xmm6 1299 paddd 0+64(%rbp),%xmm10 1300 paddd 0+128(%rbp),%xmm14 1301 paddd .Lchacha20_consts(%rip),%xmm1 1302 paddd 0+48(%rbp),%xmm5 1303 paddd 0+64(%rbp),%xmm9 1304 paddd 0+112(%rbp),%xmm13 1305 paddd .Lchacha20_consts(%rip),%xmm0 1306 paddd 0+48(%rbp),%xmm4 1307 paddd 0+64(%rbp),%xmm8 1308 paddd 0+96(%rbp),%xmm12 1309 movdqu 0 + 0(%rsi),%xmm3 1310 movdqu 16 + 0(%rsi),%xmm7 1311 movdqu 32 + 0(%rsi),%xmm11 1312 movdqu 48 + 0(%rsi),%xmm15 1313 pxor %xmm3,%xmm2 1314 pxor %xmm7,%xmm6 1315 pxor %xmm11,%xmm10 1316 pxor %xmm14,%xmm15 1317 movdqu %xmm2,0 + 0(%rdi) 1318 movdqu %xmm6,16 + 0(%rdi) 1319 movdqu %xmm10,32 + 0(%rdi) 1320 movdqu %xmm15,48 + 0(%rdi) 1321 movdqu 0 + 64(%rsi),%xmm3 1322 movdqu 16 + 64(%rsi),%xmm7 1323 movdqu 32 + 64(%rsi),%xmm11 1324 movdqu 48 + 64(%rsi),%xmm15 1325 pxor %xmm3,%xmm1 1326 pxor %xmm7,%xmm5 1327 pxor %xmm11,%xmm9 1328 pxor %xmm13,%xmm15 1329 movdqu %xmm1,0 + 64(%rdi) 1330 movdqu %xmm5,16 + 64(%rdi) 1331 movdqu %xmm9,32 + 64(%rdi) 1332 movdqu %xmm15,48 + 64(%rdi) 1333 1334 subq $128,%rbx 1335 leaq 128(%rsi),%rsi 1336 leaq 128(%rdi),%rdi 1337 jmp .Lopen_sse_tail_64_dec_loop 1338 1339.Lopen_sse_tail_256: 1340 movdqa .Lchacha20_consts(%rip),%xmm0 1341 movdqa 0+48(%rbp),%xmm4 1342 movdqa 0+64(%rbp),%xmm8 1343 movdqa %xmm0,%xmm1 1344 movdqa %xmm4,%xmm5 1345 movdqa %xmm8,%xmm9 1346 movdqa %xmm0,%xmm2 1347 movdqa %xmm4,%xmm6 1348 movdqa %xmm8,%xmm10 1349 movdqa %xmm0,%xmm3 1350 movdqa %xmm4,%xmm7 1351 movdqa %xmm8,%xmm11 1352 movdqa 0+96(%rbp),%xmm15 1353 paddd .Lsse_inc(%rip),%xmm15 1354 movdqa %xmm15,%xmm14 1355 paddd .Lsse_inc(%rip),%xmm14 1356 movdqa %xmm14,%xmm13 1357 paddd .Lsse_inc(%rip),%xmm13 1358 movdqa %xmm13,%xmm12 1359 paddd .Lsse_inc(%rip),%xmm12 1360 movdqa %xmm12,0+96(%rbp) 1361 movdqa %xmm13,0+112(%rbp) 1362 movdqa %xmm14,0+128(%rbp) 1363 movdqa %xmm15,0+144(%rbp) 1364 1365 xorq %r8,%r8 1366.Lopen_sse_tail_256_rounds_and_x1hash: 1367 addq 0+0(%rsi,%r8,1),%r10 1368 adcq 8+0(%rsi,%r8,1),%r11 1369 adcq $1,%r12 1370 movdqa %xmm11,0+80(%rbp) 1371 paddd %xmm4,%xmm0 1372 pxor %xmm0,%xmm12 1373 pshufb .Lrol16(%rip),%xmm12 1374 paddd %xmm12,%xmm8 1375 pxor %xmm8,%xmm4 1376 movdqa %xmm4,%xmm11 1377 pslld $12,%xmm11 1378 psrld $20,%xmm4 1379 pxor %xmm11,%xmm4 1380 paddd %xmm4,%xmm0 1381 pxor %xmm0,%xmm12 1382 pshufb .Lrol8(%rip),%xmm12 1383 paddd %xmm12,%xmm8 1384 pxor %xmm8,%xmm4 1385 movdqa %xmm4,%xmm11 1386 pslld $7,%xmm11 1387 psrld $25,%xmm4 1388 pxor %xmm11,%xmm4 1389.byte 102,15,58,15,228,4 1390.byte 102,69,15,58,15,192,8 1391.byte 102,69,15,58,15,228,12 1392 paddd %xmm5,%xmm1 1393 pxor %xmm1,%xmm13 1394 pshufb .Lrol16(%rip),%xmm13 1395 paddd %xmm13,%xmm9 1396 pxor %xmm9,%xmm5 1397 movdqa %xmm5,%xmm11 1398 pslld $12,%xmm11 1399 psrld $20,%xmm5 1400 pxor %xmm11,%xmm5 1401 paddd %xmm5,%xmm1 1402 pxor %xmm1,%xmm13 1403 pshufb .Lrol8(%rip),%xmm13 1404 paddd %xmm13,%xmm9 1405 pxor %xmm9,%xmm5 1406 movdqa %xmm5,%xmm11 1407 pslld $7,%xmm11 1408 psrld $25,%xmm5 1409 pxor %xmm11,%xmm5 1410.byte 102,15,58,15,237,4 1411.byte 102,69,15,58,15,201,8 1412.byte 102,69,15,58,15,237,12 1413 paddd %xmm6,%xmm2 1414 pxor %xmm2,%xmm14 1415 pshufb .Lrol16(%rip),%xmm14 1416 paddd %xmm14,%xmm10 1417 pxor %xmm10,%xmm6 1418 movdqa %xmm6,%xmm11 1419 pslld $12,%xmm11 1420 psrld $20,%xmm6 1421 pxor %xmm11,%xmm6 1422 paddd %xmm6,%xmm2 1423 pxor %xmm2,%xmm14 1424 pshufb .Lrol8(%rip),%xmm14 1425 paddd %xmm14,%xmm10 1426 pxor %xmm10,%xmm6 1427 movdqa %xmm6,%xmm11 1428 pslld $7,%xmm11 1429 psrld $25,%xmm6 1430 pxor %xmm11,%xmm6 1431.byte 102,15,58,15,246,4 1432.byte 102,69,15,58,15,210,8 1433.byte 102,69,15,58,15,246,12 1434 movdqa 0+80(%rbp),%xmm11 1435 movq 0+0+0(%rbp),%rax 1436 movq %rax,%r15 1437 mulq %r10 1438 movq %rax,%r13 1439 movq %rdx,%r14 1440 movq 0+0+0(%rbp),%rax 1441 mulq %r11 1442 imulq %r12,%r15 1443 addq %rax,%r14 1444 adcq %rdx,%r15 1445 movdqa %xmm9,0+80(%rbp) 1446 paddd %xmm7,%xmm3 1447 pxor %xmm3,%xmm15 1448 pshufb .Lrol16(%rip),%xmm15 1449 paddd %xmm15,%xmm11 1450 pxor %xmm11,%xmm7 1451 movdqa %xmm7,%xmm9 1452 pslld $12,%xmm9 1453 psrld $20,%xmm7 1454 pxor %xmm9,%xmm7 1455 paddd %xmm7,%xmm3 1456 pxor %xmm3,%xmm15 1457 pshufb .Lrol8(%rip),%xmm15 1458 paddd %xmm15,%xmm11 1459 pxor %xmm11,%xmm7 1460 movdqa %xmm7,%xmm9 1461 pslld $7,%xmm9 1462 psrld $25,%xmm7 1463 pxor %xmm9,%xmm7 1464.byte 102,15,58,15,255,4 1465.byte 102,69,15,58,15,219,8 1466.byte 102,69,15,58,15,255,12 1467 movdqa 0+80(%rbp),%xmm9 1468 movq 8+0+0(%rbp),%rax 1469 movq %rax,%r9 1470 mulq %r10 1471 addq %rax,%r14 1472 adcq $0,%rdx 1473 movq %rdx,%r10 1474 movq 8+0+0(%rbp),%rax 1475 mulq %r11 1476 addq %rax,%r15 1477 adcq $0,%rdx 1478 movdqa %xmm11,0+80(%rbp) 1479 paddd %xmm4,%xmm0 1480 pxor %xmm0,%xmm12 1481 pshufb .Lrol16(%rip),%xmm12 1482 paddd %xmm12,%xmm8 1483 pxor %xmm8,%xmm4 1484 movdqa %xmm4,%xmm11 1485 pslld $12,%xmm11 1486 psrld $20,%xmm4 1487 pxor %xmm11,%xmm4 1488 paddd %xmm4,%xmm0 1489 pxor %xmm0,%xmm12 1490 pshufb .Lrol8(%rip),%xmm12 1491 paddd %xmm12,%xmm8 1492 pxor %xmm8,%xmm4 1493 movdqa %xmm4,%xmm11 1494 pslld $7,%xmm11 1495 psrld $25,%xmm4 1496 pxor %xmm11,%xmm4 1497.byte 102,15,58,15,228,12 1498.byte 102,69,15,58,15,192,8 1499.byte 102,69,15,58,15,228,4 1500 paddd %xmm5,%xmm1 1501 pxor %xmm1,%xmm13 1502 pshufb .Lrol16(%rip),%xmm13 1503 paddd %xmm13,%xmm9 1504 pxor %xmm9,%xmm5 1505 movdqa %xmm5,%xmm11 1506 pslld $12,%xmm11 1507 psrld $20,%xmm5 1508 pxor %xmm11,%xmm5 1509 paddd %xmm5,%xmm1 1510 pxor %xmm1,%xmm13 1511 pshufb .Lrol8(%rip),%xmm13 1512 paddd %xmm13,%xmm9 1513 pxor %xmm9,%xmm5 1514 movdqa %xmm5,%xmm11 1515 pslld $7,%xmm11 1516 psrld $25,%xmm5 1517 pxor %xmm11,%xmm5 1518.byte 102,15,58,15,237,12 1519.byte 102,69,15,58,15,201,8 1520.byte 102,69,15,58,15,237,4 1521 imulq %r12,%r9 1522 addq %r10,%r15 1523 adcq %rdx,%r9 1524 paddd %xmm6,%xmm2 1525 pxor %xmm2,%xmm14 1526 pshufb .Lrol16(%rip),%xmm14 1527 paddd %xmm14,%xmm10 1528 pxor %xmm10,%xmm6 1529 movdqa %xmm6,%xmm11 1530 pslld $12,%xmm11 1531 psrld $20,%xmm6 1532 pxor %xmm11,%xmm6 1533 paddd %xmm6,%xmm2 1534 pxor %xmm2,%xmm14 1535 pshufb .Lrol8(%rip),%xmm14 1536 paddd %xmm14,%xmm10 1537 pxor %xmm10,%xmm6 1538 movdqa %xmm6,%xmm11 1539 pslld $7,%xmm11 1540 psrld $25,%xmm6 1541 pxor %xmm11,%xmm6 1542.byte 102,15,58,15,246,12 1543.byte 102,69,15,58,15,210,8 1544.byte 102,69,15,58,15,246,4 1545 movdqa 0+80(%rbp),%xmm11 1546 movq %r13,%r10 1547 movq %r14,%r11 1548 movq %r15,%r12 1549 andq $3,%r12 1550 movq %r15,%r13 1551 andq $-4,%r13 1552 movq %r9,%r14 1553 shrdq $2,%r9,%r15 1554 shrq $2,%r9 1555 addq %r13,%r15 1556 adcq %r14,%r9 1557 addq %r15,%r10 1558 adcq %r9,%r11 1559 adcq $0,%r12 1560 movdqa %xmm9,0+80(%rbp) 1561 paddd %xmm7,%xmm3 1562 pxor %xmm3,%xmm15 1563 pshufb .Lrol16(%rip),%xmm15 1564 paddd %xmm15,%xmm11 1565 pxor %xmm11,%xmm7 1566 movdqa %xmm7,%xmm9 1567 pslld $12,%xmm9 1568 psrld $20,%xmm7 1569 pxor %xmm9,%xmm7 1570 paddd %xmm7,%xmm3 1571 pxor %xmm3,%xmm15 1572 pshufb .Lrol8(%rip),%xmm15 1573 paddd %xmm15,%xmm11 1574 pxor %xmm11,%xmm7 1575 movdqa %xmm7,%xmm9 1576 pslld $7,%xmm9 1577 psrld $25,%xmm7 1578 pxor %xmm9,%xmm7 1579.byte 102,15,58,15,255,12 1580.byte 102,69,15,58,15,219,8 1581.byte 102,69,15,58,15,255,4 1582 movdqa 0+80(%rbp),%xmm9 1583 1584 addq $16,%r8 1585 cmpq $160,%r8 1586 jb .Lopen_sse_tail_256_rounds_and_x1hash 1587 1588 movq %rbx,%rcx 1589 andq $-16,%rcx 1590.Lopen_sse_tail_256_hash: 1591 addq 0+0(%rsi,%r8,1),%r10 1592 adcq 8+0(%rsi,%r8,1),%r11 1593 adcq $1,%r12 1594 movq 0+0+0(%rbp),%rax 1595 movq %rax,%r15 1596 mulq %r10 1597 movq %rax,%r13 1598 movq %rdx,%r14 1599 movq 0+0+0(%rbp),%rax 1600 mulq %r11 1601 imulq %r12,%r15 1602 addq %rax,%r14 1603 adcq %rdx,%r15 1604 movq 8+0+0(%rbp),%rax 1605 movq %rax,%r9 1606 mulq %r10 1607 addq %rax,%r14 1608 adcq $0,%rdx 1609 movq %rdx,%r10 1610 movq 8+0+0(%rbp),%rax 1611 mulq %r11 1612 addq %rax,%r15 1613 adcq $0,%rdx 1614 imulq %r12,%r9 1615 addq %r10,%r15 1616 adcq %rdx,%r9 1617 movq %r13,%r10 1618 movq %r14,%r11 1619 movq %r15,%r12 1620 andq $3,%r12 1621 movq %r15,%r13 1622 andq $-4,%r13 1623 movq %r9,%r14 1624 shrdq $2,%r9,%r15 1625 shrq $2,%r9 1626 addq %r13,%r15 1627 adcq %r14,%r9 1628 addq %r15,%r10 1629 adcq %r9,%r11 1630 adcq $0,%r12 1631 1632 addq $16,%r8 1633 cmpq %rcx,%r8 1634 jb .Lopen_sse_tail_256_hash 1635 paddd .Lchacha20_consts(%rip),%xmm3 1636 paddd 0+48(%rbp),%xmm7 1637 paddd 0+64(%rbp),%xmm11 1638 paddd 0+144(%rbp),%xmm15 1639 paddd .Lchacha20_consts(%rip),%xmm2 1640 paddd 0+48(%rbp),%xmm6 1641 paddd 0+64(%rbp),%xmm10 1642 paddd 0+128(%rbp),%xmm14 1643 paddd .Lchacha20_consts(%rip),%xmm1 1644 paddd 0+48(%rbp),%xmm5 1645 paddd 0+64(%rbp),%xmm9 1646 paddd 0+112(%rbp),%xmm13 1647 paddd .Lchacha20_consts(%rip),%xmm0 1648 paddd 0+48(%rbp),%xmm4 1649 paddd 0+64(%rbp),%xmm8 1650 paddd 0+96(%rbp),%xmm12 1651 movdqa %xmm12,0+80(%rbp) 1652 movdqu 0 + 0(%rsi),%xmm12 1653 pxor %xmm3,%xmm12 1654 movdqu %xmm12,0 + 0(%rdi) 1655 movdqu 16 + 0(%rsi),%xmm12 1656 pxor %xmm7,%xmm12 1657 movdqu %xmm12,16 + 0(%rdi) 1658 movdqu 32 + 0(%rsi),%xmm12 1659 pxor %xmm11,%xmm12 1660 movdqu %xmm12,32 + 0(%rdi) 1661 movdqu 48 + 0(%rsi),%xmm12 1662 pxor %xmm15,%xmm12 1663 movdqu %xmm12,48 + 0(%rdi) 1664 movdqu 0 + 64(%rsi),%xmm3 1665 movdqu 16 + 64(%rsi),%xmm7 1666 movdqu 32 + 64(%rsi),%xmm11 1667 movdqu 48 + 64(%rsi),%xmm15 1668 pxor %xmm3,%xmm2 1669 pxor %xmm7,%xmm6 1670 pxor %xmm11,%xmm10 1671 pxor %xmm14,%xmm15 1672 movdqu %xmm2,0 + 64(%rdi) 1673 movdqu %xmm6,16 + 64(%rdi) 1674 movdqu %xmm10,32 + 64(%rdi) 1675 movdqu %xmm15,48 + 64(%rdi) 1676 movdqu 0 + 128(%rsi),%xmm3 1677 movdqu 16 + 128(%rsi),%xmm7 1678 movdqu 32 + 128(%rsi),%xmm11 1679 movdqu 48 + 128(%rsi),%xmm15 1680 pxor %xmm3,%xmm1 1681 pxor %xmm7,%xmm5 1682 pxor %xmm11,%xmm9 1683 pxor %xmm13,%xmm15 1684 movdqu %xmm1,0 + 128(%rdi) 1685 movdqu %xmm5,16 + 128(%rdi) 1686 movdqu %xmm9,32 + 128(%rdi) 1687 movdqu %xmm15,48 + 128(%rdi) 1688 1689 movdqa 0+80(%rbp),%xmm12 1690 subq $192,%rbx 1691 leaq 192(%rsi),%rsi 1692 leaq 192(%rdi),%rdi 1693 1694 1695.Lopen_sse_tail_64_dec_loop: 1696 cmpq $16,%rbx 1697 jb .Lopen_sse_tail_16_init 1698 subq $16,%rbx 1699 movdqu (%rsi),%xmm3 1700 pxor %xmm3,%xmm0 1701 movdqu %xmm0,(%rdi) 1702 leaq 16(%rsi),%rsi 1703 leaq 16(%rdi),%rdi 1704 movdqa %xmm4,%xmm0 1705 movdqa %xmm8,%xmm4 1706 movdqa %xmm12,%xmm8 1707 jmp .Lopen_sse_tail_64_dec_loop 1708.Lopen_sse_tail_16_init: 1709 movdqa %xmm0,%xmm1 1710 1711 1712.Lopen_sse_tail_16: 1713 testq %rbx,%rbx 1714 jz .Lopen_sse_finalize 1715 1716 1717 1718 pxor %xmm3,%xmm3 1719 leaq -1(%rsi,%rbx,1),%rsi 1720 movq %rbx,%r8 1721.Lopen_sse_tail_16_compose: 1722 pslldq $1,%xmm3 1723 pinsrb $0,(%rsi),%xmm3 1724 subq $1,%rsi 1725 subq $1,%r8 1726 jnz .Lopen_sse_tail_16_compose 1727 1728.byte 102,73,15,126,221 1729 pextrq $1,%xmm3,%r14 1730 1731 pxor %xmm1,%xmm3 1732 1733 1734.Lopen_sse_tail_16_extract: 1735 pextrb $0,%xmm3,(%rdi) 1736 psrldq $1,%xmm3 1737 addq $1,%rdi 1738 subq $1,%rbx 1739 jne .Lopen_sse_tail_16_extract 1740 1741 addq %r13,%r10 1742 adcq %r14,%r11 1743 adcq $1,%r12 1744 movq 0+0+0(%rbp),%rax 1745 movq %rax,%r15 1746 mulq %r10 1747 movq %rax,%r13 1748 movq %rdx,%r14 1749 movq 0+0+0(%rbp),%rax 1750 mulq %r11 1751 imulq %r12,%r15 1752 addq %rax,%r14 1753 adcq %rdx,%r15 1754 movq 8+0+0(%rbp),%rax 1755 movq %rax,%r9 1756 mulq %r10 1757 addq %rax,%r14 1758 adcq $0,%rdx 1759 movq %rdx,%r10 1760 movq 8+0+0(%rbp),%rax 1761 mulq %r11 1762 addq %rax,%r15 1763 adcq $0,%rdx 1764 imulq %r12,%r9 1765 addq %r10,%r15 1766 adcq %rdx,%r9 1767 movq %r13,%r10 1768 movq %r14,%r11 1769 movq %r15,%r12 1770 andq $3,%r12 1771 movq %r15,%r13 1772 andq $-4,%r13 1773 movq %r9,%r14 1774 shrdq $2,%r9,%r15 1775 shrq $2,%r9 1776 addq %r13,%r15 1777 adcq %r14,%r9 1778 addq %r15,%r10 1779 adcq %r9,%r11 1780 adcq $0,%r12 1781 1782 1783.Lopen_sse_finalize: 1784 addq 0+0+32(%rbp),%r10 1785 adcq 8+0+32(%rbp),%r11 1786 adcq $1,%r12 1787 movq 0+0+0(%rbp),%rax 1788 movq %rax,%r15 1789 mulq %r10 1790 movq %rax,%r13 1791 movq %rdx,%r14 1792 movq 0+0+0(%rbp),%rax 1793 mulq %r11 1794 imulq %r12,%r15 1795 addq %rax,%r14 1796 adcq %rdx,%r15 1797 movq 8+0+0(%rbp),%rax 1798 movq %rax,%r9 1799 mulq %r10 1800 addq %rax,%r14 1801 adcq $0,%rdx 1802 movq %rdx,%r10 1803 movq 8+0+0(%rbp),%rax 1804 mulq %r11 1805 addq %rax,%r15 1806 adcq $0,%rdx 1807 imulq %r12,%r9 1808 addq %r10,%r15 1809 adcq %rdx,%r9 1810 movq %r13,%r10 1811 movq %r14,%r11 1812 movq %r15,%r12 1813 andq $3,%r12 1814 movq %r15,%r13 1815 andq $-4,%r13 1816 movq %r9,%r14 1817 shrdq $2,%r9,%r15 1818 shrq $2,%r9 1819 addq %r13,%r15 1820 adcq %r14,%r9 1821 addq %r15,%r10 1822 adcq %r9,%r11 1823 adcq $0,%r12 1824 1825 1826 movq %r10,%r13 1827 movq %r11,%r14 1828 movq %r12,%r15 1829 subq $-5,%r10 1830 sbbq $-1,%r11 1831 sbbq $3,%r12 1832 cmovcq %r13,%r10 1833 cmovcq %r14,%r11 1834 cmovcq %r15,%r12 1835 1836 addq 0+0+16(%rbp),%r10 1837 adcq 8+0+16(%rbp),%r11 1838 1839.cfi_remember_state 1840 addq $288 + 0 + 32,%rsp 1841.cfi_adjust_cfa_offset -(288 + 32) 1842 1843 popq %r9 1844.cfi_adjust_cfa_offset -8 1845.cfi_restore %r9 1846 movq %r10,(%r9) 1847 movq %r11,8(%r9) 1848 popq %r15 1849.cfi_adjust_cfa_offset -8 1850.cfi_restore %r15 1851 popq %r14 1852.cfi_adjust_cfa_offset -8 1853.cfi_restore %r14 1854 popq %r13 1855.cfi_adjust_cfa_offset -8 1856.cfi_restore %r13 1857 popq %r12 1858.cfi_adjust_cfa_offset -8 1859.cfi_restore %r12 1860 popq %rbx 1861.cfi_adjust_cfa_offset -8 1862.cfi_restore %rbx 1863 popq %rbp 1864.cfi_adjust_cfa_offset -8 1865.cfi_restore %rbp 1866 .byte 0xf3,0xc3 1867 1868.Lopen_sse_128: 1869.cfi_restore_state 1870 movdqu .Lchacha20_consts(%rip),%xmm0 1871 movdqa %xmm0,%xmm1 1872 movdqa %xmm0,%xmm2 1873 movdqu 0(%r9),%xmm4 1874 movdqa %xmm4,%xmm5 1875 movdqa %xmm4,%xmm6 1876 movdqu 16(%r9),%xmm8 1877 movdqa %xmm8,%xmm9 1878 movdqa %xmm8,%xmm10 1879 movdqu 32(%r9),%xmm12 1880 movdqa %xmm12,%xmm13 1881 paddd .Lsse_inc(%rip),%xmm13 1882 movdqa %xmm13,%xmm14 1883 paddd .Lsse_inc(%rip),%xmm14 1884 movdqa %xmm4,%xmm7 1885 movdqa %xmm8,%xmm11 1886 movdqa %xmm13,%xmm15 1887 movq $10,%r10 1888 1889.Lopen_sse_128_rounds: 1890 paddd %xmm4,%xmm0 1891 pxor %xmm0,%xmm12 1892 pshufb .Lrol16(%rip),%xmm12 1893 paddd %xmm12,%xmm8 1894 pxor %xmm8,%xmm4 1895 movdqa %xmm4,%xmm3 1896 pslld $12,%xmm3 1897 psrld $20,%xmm4 1898 pxor %xmm3,%xmm4 1899 paddd %xmm4,%xmm0 1900 pxor %xmm0,%xmm12 1901 pshufb .Lrol8(%rip),%xmm12 1902 paddd %xmm12,%xmm8 1903 pxor %xmm8,%xmm4 1904 movdqa %xmm4,%xmm3 1905 pslld $7,%xmm3 1906 psrld $25,%xmm4 1907 pxor %xmm3,%xmm4 1908.byte 102,15,58,15,228,4 1909.byte 102,69,15,58,15,192,8 1910.byte 102,69,15,58,15,228,12 1911 paddd %xmm5,%xmm1 1912 pxor %xmm1,%xmm13 1913 pshufb .Lrol16(%rip),%xmm13 1914 paddd %xmm13,%xmm9 1915 pxor %xmm9,%xmm5 1916 movdqa %xmm5,%xmm3 1917 pslld $12,%xmm3 1918 psrld $20,%xmm5 1919 pxor %xmm3,%xmm5 1920 paddd %xmm5,%xmm1 1921 pxor %xmm1,%xmm13 1922 pshufb .Lrol8(%rip),%xmm13 1923 paddd %xmm13,%xmm9 1924 pxor %xmm9,%xmm5 1925 movdqa %xmm5,%xmm3 1926 pslld $7,%xmm3 1927 psrld $25,%xmm5 1928 pxor %xmm3,%xmm5 1929.byte 102,15,58,15,237,4 1930.byte 102,69,15,58,15,201,8 1931.byte 102,69,15,58,15,237,12 1932 paddd %xmm6,%xmm2 1933 pxor %xmm2,%xmm14 1934 pshufb .Lrol16(%rip),%xmm14 1935 paddd %xmm14,%xmm10 1936 pxor %xmm10,%xmm6 1937 movdqa %xmm6,%xmm3 1938 pslld $12,%xmm3 1939 psrld $20,%xmm6 1940 pxor %xmm3,%xmm6 1941 paddd %xmm6,%xmm2 1942 pxor %xmm2,%xmm14 1943 pshufb .Lrol8(%rip),%xmm14 1944 paddd %xmm14,%xmm10 1945 pxor %xmm10,%xmm6 1946 movdqa %xmm6,%xmm3 1947 pslld $7,%xmm3 1948 psrld $25,%xmm6 1949 pxor %xmm3,%xmm6 1950.byte 102,15,58,15,246,4 1951.byte 102,69,15,58,15,210,8 1952.byte 102,69,15,58,15,246,12 1953 paddd %xmm4,%xmm0 1954 pxor %xmm0,%xmm12 1955 pshufb .Lrol16(%rip),%xmm12 1956 paddd %xmm12,%xmm8 1957 pxor %xmm8,%xmm4 1958 movdqa %xmm4,%xmm3 1959 pslld $12,%xmm3 1960 psrld $20,%xmm4 1961 pxor %xmm3,%xmm4 1962 paddd %xmm4,%xmm0 1963 pxor %xmm0,%xmm12 1964 pshufb .Lrol8(%rip),%xmm12 1965 paddd %xmm12,%xmm8 1966 pxor %xmm8,%xmm4 1967 movdqa %xmm4,%xmm3 1968 pslld $7,%xmm3 1969 psrld $25,%xmm4 1970 pxor %xmm3,%xmm4 1971.byte 102,15,58,15,228,12 1972.byte 102,69,15,58,15,192,8 1973.byte 102,69,15,58,15,228,4 1974 paddd %xmm5,%xmm1 1975 pxor %xmm1,%xmm13 1976 pshufb .Lrol16(%rip),%xmm13 1977 paddd %xmm13,%xmm9 1978 pxor %xmm9,%xmm5 1979 movdqa %xmm5,%xmm3 1980 pslld $12,%xmm3 1981 psrld $20,%xmm5 1982 pxor %xmm3,%xmm5 1983 paddd %xmm5,%xmm1 1984 pxor %xmm1,%xmm13 1985 pshufb .Lrol8(%rip),%xmm13 1986 paddd %xmm13,%xmm9 1987 pxor %xmm9,%xmm5 1988 movdqa %xmm5,%xmm3 1989 pslld $7,%xmm3 1990 psrld $25,%xmm5 1991 pxor %xmm3,%xmm5 1992.byte 102,15,58,15,237,12 1993.byte 102,69,15,58,15,201,8 1994.byte 102,69,15,58,15,237,4 1995 paddd %xmm6,%xmm2 1996 pxor %xmm2,%xmm14 1997 pshufb .Lrol16(%rip),%xmm14 1998 paddd %xmm14,%xmm10 1999 pxor %xmm10,%xmm6 2000 movdqa %xmm6,%xmm3 2001 pslld $12,%xmm3 2002 psrld $20,%xmm6 2003 pxor %xmm3,%xmm6 2004 paddd %xmm6,%xmm2 2005 pxor %xmm2,%xmm14 2006 pshufb .Lrol8(%rip),%xmm14 2007 paddd %xmm14,%xmm10 2008 pxor %xmm10,%xmm6 2009 movdqa %xmm6,%xmm3 2010 pslld $7,%xmm3 2011 psrld $25,%xmm6 2012 pxor %xmm3,%xmm6 2013.byte 102,15,58,15,246,12 2014.byte 102,69,15,58,15,210,8 2015.byte 102,69,15,58,15,246,4 2016 2017 decq %r10 2018 jnz .Lopen_sse_128_rounds 2019 paddd .Lchacha20_consts(%rip),%xmm0 2020 paddd .Lchacha20_consts(%rip),%xmm1 2021 paddd .Lchacha20_consts(%rip),%xmm2 2022 paddd %xmm7,%xmm4 2023 paddd %xmm7,%xmm5 2024 paddd %xmm7,%xmm6 2025 paddd %xmm11,%xmm9 2026 paddd %xmm11,%xmm10 2027 paddd %xmm15,%xmm13 2028 paddd .Lsse_inc(%rip),%xmm15 2029 paddd %xmm15,%xmm14 2030 2031 pand .Lclamp(%rip),%xmm0 2032 movdqa %xmm0,0+0(%rbp) 2033 movdqa %xmm4,0+16(%rbp) 2034 2035 movq %r8,%r8 2036 call poly_hash_ad_internal 2037.Lopen_sse_128_xor_hash: 2038 cmpq $16,%rbx 2039 jb .Lopen_sse_tail_16 2040 subq $16,%rbx 2041 addq 0+0(%rsi),%r10 2042 adcq 8+0(%rsi),%r11 2043 adcq $1,%r12 2044 2045 2046 movdqu 0(%rsi),%xmm3 2047 pxor %xmm3,%xmm1 2048 movdqu %xmm1,0(%rdi) 2049 leaq 16(%rsi),%rsi 2050 leaq 16(%rdi),%rdi 2051 movq 0+0+0(%rbp),%rax 2052 movq %rax,%r15 2053 mulq %r10 2054 movq %rax,%r13 2055 movq %rdx,%r14 2056 movq 0+0+0(%rbp),%rax 2057 mulq %r11 2058 imulq %r12,%r15 2059 addq %rax,%r14 2060 adcq %rdx,%r15 2061 movq 8+0+0(%rbp),%rax 2062 movq %rax,%r9 2063 mulq %r10 2064 addq %rax,%r14 2065 adcq $0,%rdx 2066 movq %rdx,%r10 2067 movq 8+0+0(%rbp),%rax 2068 mulq %r11 2069 addq %rax,%r15 2070 adcq $0,%rdx 2071 imulq %r12,%r9 2072 addq %r10,%r15 2073 adcq %rdx,%r9 2074 movq %r13,%r10 2075 movq %r14,%r11 2076 movq %r15,%r12 2077 andq $3,%r12 2078 movq %r15,%r13 2079 andq $-4,%r13 2080 movq %r9,%r14 2081 shrdq $2,%r9,%r15 2082 shrq $2,%r9 2083 addq %r13,%r15 2084 adcq %r14,%r9 2085 addq %r15,%r10 2086 adcq %r9,%r11 2087 adcq $0,%r12 2088 2089 2090 movdqa %xmm5,%xmm1 2091 movdqa %xmm9,%xmm5 2092 movdqa %xmm13,%xmm9 2093 movdqa %xmm2,%xmm13 2094 movdqa %xmm6,%xmm2 2095 movdqa %xmm10,%xmm6 2096 movdqa %xmm14,%xmm10 2097 jmp .Lopen_sse_128_xor_hash 2098.size GFp_chacha20_poly1305_open, .-GFp_chacha20_poly1305_open 2099.cfi_endproc 2100 2101 2102 2103 2104 2105 2106.globl GFp_chacha20_poly1305_seal 2107.hidden GFp_chacha20_poly1305_seal 2108.type GFp_chacha20_poly1305_seal,@function 2109.align 64 2110GFp_chacha20_poly1305_seal: 2111.cfi_startproc 2112 pushq %rbp 2113.cfi_adjust_cfa_offset 8 2114.cfi_offset %rbp,-16 2115 pushq %rbx 2116.cfi_adjust_cfa_offset 8 2117.cfi_offset %rbx,-24 2118 pushq %r12 2119.cfi_adjust_cfa_offset 8 2120.cfi_offset %r12,-32 2121 pushq %r13 2122.cfi_adjust_cfa_offset 8 2123.cfi_offset %r13,-40 2124 pushq %r14 2125.cfi_adjust_cfa_offset 8 2126.cfi_offset %r14,-48 2127 pushq %r15 2128.cfi_adjust_cfa_offset 8 2129.cfi_offset %r15,-56 2130 2131 2132 pushq %r9 2133.cfi_adjust_cfa_offset 8 2134.cfi_offset %r9,-64 2135 subq $288 + 0 + 32,%rsp 2136.cfi_adjust_cfa_offset 288 + 32 2137 leaq 32(%rsp),%rbp 2138 andq $-32,%rbp 2139 2140 movq 56(%r9),%rbx 2141 addq %rdx,%rbx 2142 movq %r8,0+0+32(%rbp) 2143 movq %rbx,8+0+32(%rbp) 2144 movq %rdx,%rbx 2145 2146 movl GFp_ia32cap_P+8(%rip),%eax 2147 andl $288,%eax 2148 xorl $288,%eax 2149 jz chacha20_poly1305_seal_avx2 2150 2151 cmpq $128,%rbx 2152 jbe .Lseal_sse_128 2153 2154 movdqa .Lchacha20_consts(%rip),%xmm0 2155 movdqu 0(%r9),%xmm4 2156 movdqu 16(%r9),%xmm8 2157 movdqu 32(%r9),%xmm12 2158 2159 movdqa %xmm0,%xmm1 2160 movdqa %xmm0,%xmm2 2161 movdqa %xmm0,%xmm3 2162 movdqa %xmm4,%xmm5 2163 movdqa %xmm4,%xmm6 2164 movdqa %xmm4,%xmm7 2165 movdqa %xmm8,%xmm9 2166 movdqa %xmm8,%xmm10 2167 movdqa %xmm8,%xmm11 2168 movdqa %xmm12,%xmm15 2169 paddd .Lsse_inc(%rip),%xmm12 2170 movdqa %xmm12,%xmm14 2171 paddd .Lsse_inc(%rip),%xmm12 2172 movdqa %xmm12,%xmm13 2173 paddd .Lsse_inc(%rip),%xmm12 2174 2175 movdqa %xmm4,0+48(%rbp) 2176 movdqa %xmm8,0+64(%rbp) 2177 movdqa %xmm12,0+96(%rbp) 2178 movdqa %xmm13,0+112(%rbp) 2179 movdqa %xmm14,0+128(%rbp) 2180 movdqa %xmm15,0+144(%rbp) 2181 movq $10,%r10 2182.Lseal_sse_init_rounds: 2183 movdqa %xmm8,0+80(%rbp) 2184 movdqa .Lrol16(%rip),%xmm8 2185 paddd %xmm7,%xmm3 2186 paddd %xmm6,%xmm2 2187 paddd %xmm5,%xmm1 2188 paddd %xmm4,%xmm0 2189 pxor %xmm3,%xmm15 2190 pxor %xmm2,%xmm14 2191 pxor %xmm1,%xmm13 2192 pxor %xmm0,%xmm12 2193.byte 102,69,15,56,0,248 2194.byte 102,69,15,56,0,240 2195.byte 102,69,15,56,0,232 2196.byte 102,69,15,56,0,224 2197 movdqa 0+80(%rbp),%xmm8 2198 paddd %xmm15,%xmm11 2199 paddd %xmm14,%xmm10 2200 paddd %xmm13,%xmm9 2201 paddd %xmm12,%xmm8 2202 pxor %xmm11,%xmm7 2203 pxor %xmm10,%xmm6 2204 pxor %xmm9,%xmm5 2205 pxor %xmm8,%xmm4 2206 movdqa %xmm8,0+80(%rbp) 2207 movdqa %xmm7,%xmm8 2208 psrld $20,%xmm8 2209 pslld $32-20,%xmm7 2210 pxor %xmm8,%xmm7 2211 movdqa %xmm6,%xmm8 2212 psrld $20,%xmm8 2213 pslld $32-20,%xmm6 2214 pxor %xmm8,%xmm6 2215 movdqa %xmm5,%xmm8 2216 psrld $20,%xmm8 2217 pslld $32-20,%xmm5 2218 pxor %xmm8,%xmm5 2219 movdqa %xmm4,%xmm8 2220 psrld $20,%xmm8 2221 pslld $32-20,%xmm4 2222 pxor %xmm8,%xmm4 2223 movdqa .Lrol8(%rip),%xmm8 2224 paddd %xmm7,%xmm3 2225 paddd %xmm6,%xmm2 2226 paddd %xmm5,%xmm1 2227 paddd %xmm4,%xmm0 2228 pxor %xmm3,%xmm15 2229 pxor %xmm2,%xmm14 2230 pxor %xmm1,%xmm13 2231 pxor %xmm0,%xmm12 2232.byte 102,69,15,56,0,248 2233.byte 102,69,15,56,0,240 2234.byte 102,69,15,56,0,232 2235.byte 102,69,15,56,0,224 2236 movdqa 0+80(%rbp),%xmm8 2237 paddd %xmm15,%xmm11 2238 paddd %xmm14,%xmm10 2239 paddd %xmm13,%xmm9 2240 paddd %xmm12,%xmm8 2241 pxor %xmm11,%xmm7 2242 pxor %xmm10,%xmm6 2243 pxor %xmm9,%xmm5 2244 pxor %xmm8,%xmm4 2245 movdqa %xmm8,0+80(%rbp) 2246 movdqa %xmm7,%xmm8 2247 psrld $25,%xmm8 2248 pslld $32-25,%xmm7 2249 pxor %xmm8,%xmm7 2250 movdqa %xmm6,%xmm8 2251 psrld $25,%xmm8 2252 pslld $32-25,%xmm6 2253 pxor %xmm8,%xmm6 2254 movdqa %xmm5,%xmm8 2255 psrld $25,%xmm8 2256 pslld $32-25,%xmm5 2257 pxor %xmm8,%xmm5 2258 movdqa %xmm4,%xmm8 2259 psrld $25,%xmm8 2260 pslld $32-25,%xmm4 2261 pxor %xmm8,%xmm4 2262 movdqa 0+80(%rbp),%xmm8 2263.byte 102,15,58,15,255,4 2264.byte 102,69,15,58,15,219,8 2265.byte 102,69,15,58,15,255,12 2266.byte 102,15,58,15,246,4 2267.byte 102,69,15,58,15,210,8 2268.byte 102,69,15,58,15,246,12 2269.byte 102,15,58,15,237,4 2270.byte 102,69,15,58,15,201,8 2271.byte 102,69,15,58,15,237,12 2272.byte 102,15,58,15,228,4 2273.byte 102,69,15,58,15,192,8 2274.byte 102,69,15,58,15,228,12 2275 movdqa %xmm8,0+80(%rbp) 2276 movdqa .Lrol16(%rip),%xmm8 2277 paddd %xmm7,%xmm3 2278 paddd %xmm6,%xmm2 2279 paddd %xmm5,%xmm1 2280 paddd %xmm4,%xmm0 2281 pxor %xmm3,%xmm15 2282 pxor %xmm2,%xmm14 2283 pxor %xmm1,%xmm13 2284 pxor %xmm0,%xmm12 2285.byte 102,69,15,56,0,248 2286.byte 102,69,15,56,0,240 2287.byte 102,69,15,56,0,232 2288.byte 102,69,15,56,0,224 2289 movdqa 0+80(%rbp),%xmm8 2290 paddd %xmm15,%xmm11 2291 paddd %xmm14,%xmm10 2292 paddd %xmm13,%xmm9 2293 paddd %xmm12,%xmm8 2294 pxor %xmm11,%xmm7 2295 pxor %xmm10,%xmm6 2296 pxor %xmm9,%xmm5 2297 pxor %xmm8,%xmm4 2298 movdqa %xmm8,0+80(%rbp) 2299 movdqa %xmm7,%xmm8 2300 psrld $20,%xmm8 2301 pslld $32-20,%xmm7 2302 pxor %xmm8,%xmm7 2303 movdqa %xmm6,%xmm8 2304 psrld $20,%xmm8 2305 pslld $32-20,%xmm6 2306 pxor %xmm8,%xmm6 2307 movdqa %xmm5,%xmm8 2308 psrld $20,%xmm8 2309 pslld $32-20,%xmm5 2310 pxor %xmm8,%xmm5 2311 movdqa %xmm4,%xmm8 2312 psrld $20,%xmm8 2313 pslld $32-20,%xmm4 2314 pxor %xmm8,%xmm4 2315 movdqa .Lrol8(%rip),%xmm8 2316 paddd %xmm7,%xmm3 2317 paddd %xmm6,%xmm2 2318 paddd %xmm5,%xmm1 2319 paddd %xmm4,%xmm0 2320 pxor %xmm3,%xmm15 2321 pxor %xmm2,%xmm14 2322 pxor %xmm1,%xmm13 2323 pxor %xmm0,%xmm12 2324.byte 102,69,15,56,0,248 2325.byte 102,69,15,56,0,240 2326.byte 102,69,15,56,0,232 2327.byte 102,69,15,56,0,224 2328 movdqa 0+80(%rbp),%xmm8 2329 paddd %xmm15,%xmm11 2330 paddd %xmm14,%xmm10 2331 paddd %xmm13,%xmm9 2332 paddd %xmm12,%xmm8 2333 pxor %xmm11,%xmm7 2334 pxor %xmm10,%xmm6 2335 pxor %xmm9,%xmm5 2336 pxor %xmm8,%xmm4 2337 movdqa %xmm8,0+80(%rbp) 2338 movdqa %xmm7,%xmm8 2339 psrld $25,%xmm8 2340 pslld $32-25,%xmm7 2341 pxor %xmm8,%xmm7 2342 movdqa %xmm6,%xmm8 2343 psrld $25,%xmm8 2344 pslld $32-25,%xmm6 2345 pxor %xmm8,%xmm6 2346 movdqa %xmm5,%xmm8 2347 psrld $25,%xmm8 2348 pslld $32-25,%xmm5 2349 pxor %xmm8,%xmm5 2350 movdqa %xmm4,%xmm8 2351 psrld $25,%xmm8 2352 pslld $32-25,%xmm4 2353 pxor %xmm8,%xmm4 2354 movdqa 0+80(%rbp),%xmm8 2355.byte 102,15,58,15,255,12 2356.byte 102,69,15,58,15,219,8 2357.byte 102,69,15,58,15,255,4 2358.byte 102,15,58,15,246,12 2359.byte 102,69,15,58,15,210,8 2360.byte 102,69,15,58,15,246,4 2361.byte 102,15,58,15,237,12 2362.byte 102,69,15,58,15,201,8 2363.byte 102,69,15,58,15,237,4 2364.byte 102,15,58,15,228,12 2365.byte 102,69,15,58,15,192,8 2366.byte 102,69,15,58,15,228,4 2367 2368 decq %r10 2369 jnz .Lseal_sse_init_rounds 2370 paddd .Lchacha20_consts(%rip),%xmm3 2371 paddd 0+48(%rbp),%xmm7 2372 paddd 0+64(%rbp),%xmm11 2373 paddd 0+144(%rbp),%xmm15 2374 paddd .Lchacha20_consts(%rip),%xmm2 2375 paddd 0+48(%rbp),%xmm6 2376 paddd 0+64(%rbp),%xmm10 2377 paddd 0+128(%rbp),%xmm14 2378 paddd .Lchacha20_consts(%rip),%xmm1 2379 paddd 0+48(%rbp),%xmm5 2380 paddd 0+64(%rbp),%xmm9 2381 paddd 0+112(%rbp),%xmm13 2382 paddd .Lchacha20_consts(%rip),%xmm0 2383 paddd 0+48(%rbp),%xmm4 2384 paddd 0+64(%rbp),%xmm8 2385 paddd 0+96(%rbp),%xmm12 2386 2387 2388 pand .Lclamp(%rip),%xmm3 2389 movdqa %xmm3,0+0(%rbp) 2390 movdqa %xmm7,0+16(%rbp) 2391 2392 movq %r8,%r8 2393 call poly_hash_ad_internal 2394 movdqu 0 + 0(%rsi),%xmm3 2395 movdqu 16 + 0(%rsi),%xmm7 2396 movdqu 32 + 0(%rsi),%xmm11 2397 movdqu 48 + 0(%rsi),%xmm15 2398 pxor %xmm3,%xmm2 2399 pxor %xmm7,%xmm6 2400 pxor %xmm11,%xmm10 2401 pxor %xmm14,%xmm15 2402 movdqu %xmm2,0 + 0(%rdi) 2403 movdqu %xmm6,16 + 0(%rdi) 2404 movdqu %xmm10,32 + 0(%rdi) 2405 movdqu %xmm15,48 + 0(%rdi) 2406 movdqu 0 + 64(%rsi),%xmm3 2407 movdqu 16 + 64(%rsi),%xmm7 2408 movdqu 32 + 64(%rsi),%xmm11 2409 movdqu 48 + 64(%rsi),%xmm15 2410 pxor %xmm3,%xmm1 2411 pxor %xmm7,%xmm5 2412 pxor %xmm11,%xmm9 2413 pxor %xmm13,%xmm15 2414 movdqu %xmm1,0 + 64(%rdi) 2415 movdqu %xmm5,16 + 64(%rdi) 2416 movdqu %xmm9,32 + 64(%rdi) 2417 movdqu %xmm15,48 + 64(%rdi) 2418 2419 cmpq $192,%rbx 2420 ja .Lseal_sse_main_init 2421 movq $128,%rcx 2422 subq $128,%rbx 2423 leaq 128(%rsi),%rsi 2424 jmp .Lseal_sse_128_tail_hash 2425.Lseal_sse_main_init: 2426 movdqu 0 + 128(%rsi),%xmm3 2427 movdqu 16 + 128(%rsi),%xmm7 2428 movdqu 32 + 128(%rsi),%xmm11 2429 movdqu 48 + 128(%rsi),%xmm15 2430 pxor %xmm3,%xmm0 2431 pxor %xmm7,%xmm4 2432 pxor %xmm11,%xmm8 2433 pxor %xmm12,%xmm15 2434 movdqu %xmm0,0 + 128(%rdi) 2435 movdqu %xmm4,16 + 128(%rdi) 2436 movdqu %xmm8,32 + 128(%rdi) 2437 movdqu %xmm15,48 + 128(%rdi) 2438 2439 movq $192,%rcx 2440 subq $192,%rbx 2441 leaq 192(%rsi),%rsi 2442 movq $2,%rcx 2443 movq $8,%r8 2444 cmpq $64,%rbx 2445 jbe .Lseal_sse_tail_64 2446 cmpq $128,%rbx 2447 jbe .Lseal_sse_tail_128 2448 cmpq $192,%rbx 2449 jbe .Lseal_sse_tail_192 2450 2451.Lseal_sse_main_loop: 2452 movdqa .Lchacha20_consts(%rip),%xmm0 2453 movdqa 0+48(%rbp),%xmm4 2454 movdqa 0+64(%rbp),%xmm8 2455 movdqa %xmm0,%xmm1 2456 movdqa %xmm4,%xmm5 2457 movdqa %xmm8,%xmm9 2458 movdqa %xmm0,%xmm2 2459 movdqa %xmm4,%xmm6 2460 movdqa %xmm8,%xmm10 2461 movdqa %xmm0,%xmm3 2462 movdqa %xmm4,%xmm7 2463 movdqa %xmm8,%xmm11 2464 movdqa 0+96(%rbp),%xmm15 2465 paddd .Lsse_inc(%rip),%xmm15 2466 movdqa %xmm15,%xmm14 2467 paddd .Lsse_inc(%rip),%xmm14 2468 movdqa %xmm14,%xmm13 2469 paddd .Lsse_inc(%rip),%xmm13 2470 movdqa %xmm13,%xmm12 2471 paddd .Lsse_inc(%rip),%xmm12 2472 movdqa %xmm12,0+96(%rbp) 2473 movdqa %xmm13,0+112(%rbp) 2474 movdqa %xmm14,0+128(%rbp) 2475 movdqa %xmm15,0+144(%rbp) 2476 2477.align 32 2478.Lseal_sse_main_rounds: 2479 movdqa %xmm8,0+80(%rbp) 2480 movdqa .Lrol16(%rip),%xmm8 2481 paddd %xmm7,%xmm3 2482 paddd %xmm6,%xmm2 2483 paddd %xmm5,%xmm1 2484 paddd %xmm4,%xmm0 2485 pxor %xmm3,%xmm15 2486 pxor %xmm2,%xmm14 2487 pxor %xmm1,%xmm13 2488 pxor %xmm0,%xmm12 2489.byte 102,69,15,56,0,248 2490.byte 102,69,15,56,0,240 2491.byte 102,69,15,56,0,232 2492.byte 102,69,15,56,0,224 2493 movdqa 0+80(%rbp),%xmm8 2494 paddd %xmm15,%xmm11 2495 paddd %xmm14,%xmm10 2496 paddd %xmm13,%xmm9 2497 paddd %xmm12,%xmm8 2498 pxor %xmm11,%xmm7 2499 addq 0+0(%rdi),%r10 2500 adcq 8+0(%rdi),%r11 2501 adcq $1,%r12 2502 pxor %xmm10,%xmm6 2503 pxor %xmm9,%xmm5 2504 pxor %xmm8,%xmm4 2505 movdqa %xmm8,0+80(%rbp) 2506 movdqa %xmm7,%xmm8 2507 psrld $20,%xmm8 2508 pslld $32-20,%xmm7 2509 pxor %xmm8,%xmm7 2510 movdqa %xmm6,%xmm8 2511 psrld $20,%xmm8 2512 pslld $32-20,%xmm6 2513 pxor %xmm8,%xmm6 2514 movdqa %xmm5,%xmm8 2515 psrld $20,%xmm8 2516 pslld $32-20,%xmm5 2517 pxor %xmm8,%xmm5 2518 movdqa %xmm4,%xmm8 2519 psrld $20,%xmm8 2520 pslld $32-20,%xmm4 2521 pxor %xmm8,%xmm4 2522 movq 0+0+0(%rbp),%rax 2523 movq %rax,%r15 2524 mulq %r10 2525 movq %rax,%r13 2526 movq %rdx,%r14 2527 movq 0+0+0(%rbp),%rax 2528 mulq %r11 2529 imulq %r12,%r15 2530 addq %rax,%r14 2531 adcq %rdx,%r15 2532 movdqa .Lrol8(%rip),%xmm8 2533 paddd %xmm7,%xmm3 2534 paddd %xmm6,%xmm2 2535 paddd %xmm5,%xmm1 2536 paddd %xmm4,%xmm0 2537 pxor %xmm3,%xmm15 2538 pxor %xmm2,%xmm14 2539 pxor %xmm1,%xmm13 2540 pxor %xmm0,%xmm12 2541.byte 102,69,15,56,0,248 2542.byte 102,69,15,56,0,240 2543.byte 102,69,15,56,0,232 2544.byte 102,69,15,56,0,224 2545 movdqa 0+80(%rbp),%xmm8 2546 paddd %xmm15,%xmm11 2547 paddd %xmm14,%xmm10 2548 paddd %xmm13,%xmm9 2549 paddd %xmm12,%xmm8 2550 pxor %xmm11,%xmm7 2551 pxor %xmm10,%xmm6 2552 movq 8+0+0(%rbp),%rax 2553 movq %rax,%r9 2554 mulq %r10 2555 addq %rax,%r14 2556 adcq $0,%rdx 2557 movq %rdx,%r10 2558 movq 8+0+0(%rbp),%rax 2559 mulq %r11 2560 addq %rax,%r15 2561 adcq $0,%rdx 2562 pxor %xmm9,%xmm5 2563 pxor %xmm8,%xmm4 2564 movdqa %xmm8,0+80(%rbp) 2565 movdqa %xmm7,%xmm8 2566 psrld $25,%xmm8 2567 pslld $32-25,%xmm7 2568 pxor %xmm8,%xmm7 2569 movdqa %xmm6,%xmm8 2570 psrld $25,%xmm8 2571 pslld $32-25,%xmm6 2572 pxor %xmm8,%xmm6 2573 movdqa %xmm5,%xmm8 2574 psrld $25,%xmm8 2575 pslld $32-25,%xmm5 2576 pxor %xmm8,%xmm5 2577 movdqa %xmm4,%xmm8 2578 psrld $25,%xmm8 2579 pslld $32-25,%xmm4 2580 pxor %xmm8,%xmm4 2581 movdqa 0+80(%rbp),%xmm8 2582 imulq %r12,%r9 2583 addq %r10,%r15 2584 adcq %rdx,%r9 2585.byte 102,15,58,15,255,4 2586.byte 102,69,15,58,15,219,8 2587.byte 102,69,15,58,15,255,12 2588.byte 102,15,58,15,246,4 2589.byte 102,69,15,58,15,210,8 2590.byte 102,69,15,58,15,246,12 2591.byte 102,15,58,15,237,4 2592.byte 102,69,15,58,15,201,8 2593.byte 102,69,15,58,15,237,12 2594.byte 102,15,58,15,228,4 2595.byte 102,69,15,58,15,192,8 2596.byte 102,69,15,58,15,228,12 2597 movdqa %xmm8,0+80(%rbp) 2598 movdqa .Lrol16(%rip),%xmm8 2599 paddd %xmm7,%xmm3 2600 paddd %xmm6,%xmm2 2601 paddd %xmm5,%xmm1 2602 paddd %xmm4,%xmm0 2603 pxor %xmm3,%xmm15 2604 pxor %xmm2,%xmm14 2605 movq %r13,%r10 2606 movq %r14,%r11 2607 movq %r15,%r12 2608 andq $3,%r12 2609 movq %r15,%r13 2610 andq $-4,%r13 2611 movq %r9,%r14 2612 shrdq $2,%r9,%r15 2613 shrq $2,%r9 2614 addq %r13,%r15 2615 adcq %r14,%r9 2616 addq %r15,%r10 2617 adcq %r9,%r11 2618 adcq $0,%r12 2619 pxor %xmm1,%xmm13 2620 pxor %xmm0,%xmm12 2621.byte 102,69,15,56,0,248 2622.byte 102,69,15,56,0,240 2623.byte 102,69,15,56,0,232 2624.byte 102,69,15,56,0,224 2625 movdqa 0+80(%rbp),%xmm8 2626 paddd %xmm15,%xmm11 2627 paddd %xmm14,%xmm10 2628 paddd %xmm13,%xmm9 2629 paddd %xmm12,%xmm8 2630 pxor %xmm11,%xmm7 2631 pxor %xmm10,%xmm6 2632 pxor %xmm9,%xmm5 2633 pxor %xmm8,%xmm4 2634 movdqa %xmm8,0+80(%rbp) 2635 movdqa %xmm7,%xmm8 2636 psrld $20,%xmm8 2637 pslld $32-20,%xmm7 2638 pxor %xmm8,%xmm7 2639 movdqa %xmm6,%xmm8 2640 psrld $20,%xmm8 2641 pslld $32-20,%xmm6 2642 pxor %xmm8,%xmm6 2643 movdqa %xmm5,%xmm8 2644 psrld $20,%xmm8 2645 pslld $32-20,%xmm5 2646 pxor %xmm8,%xmm5 2647 movdqa %xmm4,%xmm8 2648 psrld $20,%xmm8 2649 pslld $32-20,%xmm4 2650 pxor %xmm8,%xmm4 2651 movdqa .Lrol8(%rip),%xmm8 2652 paddd %xmm7,%xmm3 2653 paddd %xmm6,%xmm2 2654 paddd %xmm5,%xmm1 2655 paddd %xmm4,%xmm0 2656 pxor %xmm3,%xmm15 2657 pxor %xmm2,%xmm14 2658 pxor %xmm1,%xmm13 2659 pxor %xmm0,%xmm12 2660.byte 102,69,15,56,0,248 2661.byte 102,69,15,56,0,240 2662.byte 102,69,15,56,0,232 2663.byte 102,69,15,56,0,224 2664 movdqa 0+80(%rbp),%xmm8 2665 paddd %xmm15,%xmm11 2666 paddd %xmm14,%xmm10 2667 paddd %xmm13,%xmm9 2668 paddd %xmm12,%xmm8 2669 pxor %xmm11,%xmm7 2670 pxor %xmm10,%xmm6 2671 pxor %xmm9,%xmm5 2672 pxor %xmm8,%xmm4 2673 movdqa %xmm8,0+80(%rbp) 2674 movdqa %xmm7,%xmm8 2675 psrld $25,%xmm8 2676 pslld $32-25,%xmm7 2677 pxor %xmm8,%xmm7 2678 movdqa %xmm6,%xmm8 2679 psrld $25,%xmm8 2680 pslld $32-25,%xmm6 2681 pxor %xmm8,%xmm6 2682 movdqa %xmm5,%xmm8 2683 psrld $25,%xmm8 2684 pslld $32-25,%xmm5 2685 pxor %xmm8,%xmm5 2686 movdqa %xmm4,%xmm8 2687 psrld $25,%xmm8 2688 pslld $32-25,%xmm4 2689 pxor %xmm8,%xmm4 2690 movdqa 0+80(%rbp),%xmm8 2691.byte 102,15,58,15,255,12 2692.byte 102,69,15,58,15,219,8 2693.byte 102,69,15,58,15,255,4 2694.byte 102,15,58,15,246,12 2695.byte 102,69,15,58,15,210,8 2696.byte 102,69,15,58,15,246,4 2697.byte 102,15,58,15,237,12 2698.byte 102,69,15,58,15,201,8 2699.byte 102,69,15,58,15,237,4 2700.byte 102,15,58,15,228,12 2701.byte 102,69,15,58,15,192,8 2702.byte 102,69,15,58,15,228,4 2703 2704 leaq 16(%rdi),%rdi 2705 decq %r8 2706 jge .Lseal_sse_main_rounds 2707 addq 0+0(%rdi),%r10 2708 adcq 8+0(%rdi),%r11 2709 adcq $1,%r12 2710 movq 0+0+0(%rbp),%rax 2711 movq %rax,%r15 2712 mulq %r10 2713 movq %rax,%r13 2714 movq %rdx,%r14 2715 movq 0+0+0(%rbp),%rax 2716 mulq %r11 2717 imulq %r12,%r15 2718 addq %rax,%r14 2719 adcq %rdx,%r15 2720 movq 8+0+0(%rbp),%rax 2721 movq %rax,%r9 2722 mulq %r10 2723 addq %rax,%r14 2724 adcq $0,%rdx 2725 movq %rdx,%r10 2726 movq 8+0+0(%rbp),%rax 2727 mulq %r11 2728 addq %rax,%r15 2729 adcq $0,%rdx 2730 imulq %r12,%r9 2731 addq %r10,%r15 2732 adcq %rdx,%r9 2733 movq %r13,%r10 2734 movq %r14,%r11 2735 movq %r15,%r12 2736 andq $3,%r12 2737 movq %r15,%r13 2738 andq $-4,%r13 2739 movq %r9,%r14 2740 shrdq $2,%r9,%r15 2741 shrq $2,%r9 2742 addq %r13,%r15 2743 adcq %r14,%r9 2744 addq %r15,%r10 2745 adcq %r9,%r11 2746 adcq $0,%r12 2747 2748 leaq 16(%rdi),%rdi 2749 decq %rcx 2750 jg .Lseal_sse_main_rounds 2751 paddd .Lchacha20_consts(%rip),%xmm3 2752 paddd 0+48(%rbp),%xmm7 2753 paddd 0+64(%rbp),%xmm11 2754 paddd 0+144(%rbp),%xmm15 2755 paddd .Lchacha20_consts(%rip),%xmm2 2756 paddd 0+48(%rbp),%xmm6 2757 paddd 0+64(%rbp),%xmm10 2758 paddd 0+128(%rbp),%xmm14 2759 paddd .Lchacha20_consts(%rip),%xmm1 2760 paddd 0+48(%rbp),%xmm5 2761 paddd 0+64(%rbp),%xmm9 2762 paddd 0+112(%rbp),%xmm13 2763 paddd .Lchacha20_consts(%rip),%xmm0 2764 paddd 0+48(%rbp),%xmm4 2765 paddd 0+64(%rbp),%xmm8 2766 paddd 0+96(%rbp),%xmm12 2767 2768 movdqa %xmm14,0+80(%rbp) 2769 movdqa %xmm14,0+80(%rbp) 2770 movdqu 0 + 0(%rsi),%xmm14 2771 pxor %xmm3,%xmm14 2772 movdqu %xmm14,0 + 0(%rdi) 2773 movdqu 16 + 0(%rsi),%xmm14 2774 pxor %xmm7,%xmm14 2775 movdqu %xmm14,16 + 0(%rdi) 2776 movdqu 32 + 0(%rsi),%xmm14 2777 pxor %xmm11,%xmm14 2778 movdqu %xmm14,32 + 0(%rdi) 2779 movdqu 48 + 0(%rsi),%xmm14 2780 pxor %xmm15,%xmm14 2781 movdqu %xmm14,48 + 0(%rdi) 2782 2783 movdqa 0+80(%rbp),%xmm14 2784 movdqu 0 + 64(%rsi),%xmm3 2785 movdqu 16 + 64(%rsi),%xmm7 2786 movdqu 32 + 64(%rsi),%xmm11 2787 movdqu 48 + 64(%rsi),%xmm15 2788 pxor %xmm3,%xmm2 2789 pxor %xmm7,%xmm6 2790 pxor %xmm11,%xmm10 2791 pxor %xmm14,%xmm15 2792 movdqu %xmm2,0 + 64(%rdi) 2793 movdqu %xmm6,16 + 64(%rdi) 2794 movdqu %xmm10,32 + 64(%rdi) 2795 movdqu %xmm15,48 + 64(%rdi) 2796 movdqu 0 + 128(%rsi),%xmm3 2797 movdqu 16 + 128(%rsi),%xmm7 2798 movdqu 32 + 128(%rsi),%xmm11 2799 movdqu 48 + 128(%rsi),%xmm15 2800 pxor %xmm3,%xmm1 2801 pxor %xmm7,%xmm5 2802 pxor %xmm11,%xmm9 2803 pxor %xmm13,%xmm15 2804 movdqu %xmm1,0 + 128(%rdi) 2805 movdqu %xmm5,16 + 128(%rdi) 2806 movdqu %xmm9,32 + 128(%rdi) 2807 movdqu %xmm15,48 + 128(%rdi) 2808 2809 cmpq $256,%rbx 2810 ja .Lseal_sse_main_loop_xor 2811 2812 movq $192,%rcx 2813 subq $192,%rbx 2814 leaq 192(%rsi),%rsi 2815 jmp .Lseal_sse_128_tail_hash 2816.Lseal_sse_main_loop_xor: 2817 movdqu 0 + 192(%rsi),%xmm3 2818 movdqu 16 + 192(%rsi),%xmm7 2819 movdqu 32 + 192(%rsi),%xmm11 2820 movdqu 48 + 192(%rsi),%xmm15 2821 pxor %xmm3,%xmm0 2822 pxor %xmm7,%xmm4 2823 pxor %xmm11,%xmm8 2824 pxor %xmm12,%xmm15 2825 movdqu %xmm0,0 + 192(%rdi) 2826 movdqu %xmm4,16 + 192(%rdi) 2827 movdqu %xmm8,32 + 192(%rdi) 2828 movdqu %xmm15,48 + 192(%rdi) 2829 2830 leaq 256(%rsi),%rsi 2831 subq $256,%rbx 2832 movq $6,%rcx 2833 movq $4,%r8 2834 cmpq $192,%rbx 2835 jg .Lseal_sse_main_loop 2836 movq %rbx,%rcx 2837 testq %rbx,%rbx 2838 je .Lseal_sse_128_tail_hash 2839 movq $6,%rcx 2840 cmpq $128,%rbx 2841 ja .Lseal_sse_tail_192 2842 cmpq $64,%rbx 2843 ja .Lseal_sse_tail_128 2844 2845.Lseal_sse_tail_64: 2846 movdqa .Lchacha20_consts(%rip),%xmm0 2847 movdqa 0+48(%rbp),%xmm4 2848 movdqa 0+64(%rbp),%xmm8 2849 movdqa 0+96(%rbp),%xmm12 2850 paddd .Lsse_inc(%rip),%xmm12 2851 movdqa %xmm12,0+96(%rbp) 2852 2853.Lseal_sse_tail_64_rounds_and_x2hash: 2854 addq 0+0(%rdi),%r10 2855 adcq 8+0(%rdi),%r11 2856 adcq $1,%r12 2857 movq 0+0+0(%rbp),%rax 2858 movq %rax,%r15 2859 mulq %r10 2860 movq %rax,%r13 2861 movq %rdx,%r14 2862 movq 0+0+0(%rbp),%rax 2863 mulq %r11 2864 imulq %r12,%r15 2865 addq %rax,%r14 2866 adcq %rdx,%r15 2867 movq 8+0+0(%rbp),%rax 2868 movq %rax,%r9 2869 mulq %r10 2870 addq %rax,%r14 2871 adcq $0,%rdx 2872 movq %rdx,%r10 2873 movq 8+0+0(%rbp),%rax 2874 mulq %r11 2875 addq %rax,%r15 2876 adcq $0,%rdx 2877 imulq %r12,%r9 2878 addq %r10,%r15 2879 adcq %rdx,%r9 2880 movq %r13,%r10 2881 movq %r14,%r11 2882 movq %r15,%r12 2883 andq $3,%r12 2884 movq %r15,%r13 2885 andq $-4,%r13 2886 movq %r9,%r14 2887 shrdq $2,%r9,%r15 2888 shrq $2,%r9 2889 addq %r13,%r15 2890 adcq %r14,%r9 2891 addq %r15,%r10 2892 adcq %r9,%r11 2893 adcq $0,%r12 2894 2895 leaq 16(%rdi),%rdi 2896.Lseal_sse_tail_64_rounds_and_x1hash: 2897 paddd %xmm4,%xmm0 2898 pxor %xmm0,%xmm12 2899 pshufb .Lrol16(%rip),%xmm12 2900 paddd %xmm12,%xmm8 2901 pxor %xmm8,%xmm4 2902 movdqa %xmm4,%xmm3 2903 pslld $12,%xmm3 2904 psrld $20,%xmm4 2905 pxor %xmm3,%xmm4 2906 paddd %xmm4,%xmm0 2907 pxor %xmm0,%xmm12 2908 pshufb .Lrol8(%rip),%xmm12 2909 paddd %xmm12,%xmm8 2910 pxor %xmm8,%xmm4 2911 movdqa %xmm4,%xmm3 2912 pslld $7,%xmm3 2913 psrld $25,%xmm4 2914 pxor %xmm3,%xmm4 2915.byte 102,15,58,15,228,4 2916.byte 102,69,15,58,15,192,8 2917.byte 102,69,15,58,15,228,12 2918 paddd %xmm4,%xmm0 2919 pxor %xmm0,%xmm12 2920 pshufb .Lrol16(%rip),%xmm12 2921 paddd %xmm12,%xmm8 2922 pxor %xmm8,%xmm4 2923 movdqa %xmm4,%xmm3 2924 pslld $12,%xmm3 2925 psrld $20,%xmm4 2926 pxor %xmm3,%xmm4 2927 paddd %xmm4,%xmm0 2928 pxor %xmm0,%xmm12 2929 pshufb .Lrol8(%rip),%xmm12 2930 paddd %xmm12,%xmm8 2931 pxor %xmm8,%xmm4 2932 movdqa %xmm4,%xmm3 2933 pslld $7,%xmm3 2934 psrld $25,%xmm4 2935 pxor %xmm3,%xmm4 2936.byte 102,15,58,15,228,12 2937.byte 102,69,15,58,15,192,8 2938.byte 102,69,15,58,15,228,4 2939 addq 0+0(%rdi),%r10 2940 adcq 8+0(%rdi),%r11 2941 adcq $1,%r12 2942 movq 0+0+0(%rbp),%rax 2943 movq %rax,%r15 2944 mulq %r10 2945 movq %rax,%r13 2946 movq %rdx,%r14 2947 movq 0+0+0(%rbp),%rax 2948 mulq %r11 2949 imulq %r12,%r15 2950 addq %rax,%r14 2951 adcq %rdx,%r15 2952 movq 8+0+0(%rbp),%rax 2953 movq %rax,%r9 2954 mulq %r10 2955 addq %rax,%r14 2956 adcq $0,%rdx 2957 movq %rdx,%r10 2958 movq 8+0+0(%rbp),%rax 2959 mulq %r11 2960 addq %rax,%r15 2961 adcq $0,%rdx 2962 imulq %r12,%r9 2963 addq %r10,%r15 2964 adcq %rdx,%r9 2965 movq %r13,%r10 2966 movq %r14,%r11 2967 movq %r15,%r12 2968 andq $3,%r12 2969 movq %r15,%r13 2970 andq $-4,%r13 2971 movq %r9,%r14 2972 shrdq $2,%r9,%r15 2973 shrq $2,%r9 2974 addq %r13,%r15 2975 adcq %r14,%r9 2976 addq %r15,%r10 2977 adcq %r9,%r11 2978 adcq $0,%r12 2979 2980 leaq 16(%rdi),%rdi 2981 decq %rcx 2982 jg .Lseal_sse_tail_64_rounds_and_x2hash 2983 decq %r8 2984 jge .Lseal_sse_tail_64_rounds_and_x1hash 2985 paddd .Lchacha20_consts(%rip),%xmm0 2986 paddd 0+48(%rbp),%xmm4 2987 paddd 0+64(%rbp),%xmm8 2988 paddd 0+96(%rbp),%xmm12 2989 2990 jmp .Lseal_sse_128_tail_xor 2991 2992.Lseal_sse_tail_128: 2993 movdqa .Lchacha20_consts(%rip),%xmm0 2994 movdqa 0+48(%rbp),%xmm4 2995 movdqa 0+64(%rbp),%xmm8 2996 movdqa %xmm0,%xmm1 2997 movdqa %xmm4,%xmm5 2998 movdqa %xmm8,%xmm9 2999 movdqa 0+96(%rbp),%xmm13 3000 paddd .Lsse_inc(%rip),%xmm13 3001 movdqa %xmm13,%xmm12 3002 paddd .Lsse_inc(%rip),%xmm12 3003 movdqa %xmm12,0+96(%rbp) 3004 movdqa %xmm13,0+112(%rbp) 3005 3006.Lseal_sse_tail_128_rounds_and_x2hash: 3007 addq 0+0(%rdi),%r10 3008 adcq 8+0(%rdi),%r11 3009 adcq $1,%r12 3010 movq 0+0+0(%rbp),%rax 3011 movq %rax,%r15 3012 mulq %r10 3013 movq %rax,%r13 3014 movq %rdx,%r14 3015 movq 0+0+0(%rbp),%rax 3016 mulq %r11 3017 imulq %r12,%r15 3018 addq %rax,%r14 3019 adcq %rdx,%r15 3020 movq 8+0+0(%rbp),%rax 3021 movq %rax,%r9 3022 mulq %r10 3023 addq %rax,%r14 3024 adcq $0,%rdx 3025 movq %rdx,%r10 3026 movq 8+0+0(%rbp),%rax 3027 mulq %r11 3028 addq %rax,%r15 3029 adcq $0,%rdx 3030 imulq %r12,%r9 3031 addq %r10,%r15 3032 adcq %rdx,%r9 3033 movq %r13,%r10 3034 movq %r14,%r11 3035 movq %r15,%r12 3036 andq $3,%r12 3037 movq %r15,%r13 3038 andq $-4,%r13 3039 movq %r9,%r14 3040 shrdq $2,%r9,%r15 3041 shrq $2,%r9 3042 addq %r13,%r15 3043 adcq %r14,%r9 3044 addq %r15,%r10 3045 adcq %r9,%r11 3046 adcq $0,%r12 3047 3048 leaq 16(%rdi),%rdi 3049.Lseal_sse_tail_128_rounds_and_x1hash: 3050 paddd %xmm4,%xmm0 3051 pxor %xmm0,%xmm12 3052 pshufb .Lrol16(%rip),%xmm12 3053 paddd %xmm12,%xmm8 3054 pxor %xmm8,%xmm4 3055 movdqa %xmm4,%xmm3 3056 pslld $12,%xmm3 3057 psrld $20,%xmm4 3058 pxor %xmm3,%xmm4 3059 paddd %xmm4,%xmm0 3060 pxor %xmm0,%xmm12 3061 pshufb .Lrol8(%rip),%xmm12 3062 paddd %xmm12,%xmm8 3063 pxor %xmm8,%xmm4 3064 movdqa %xmm4,%xmm3 3065 pslld $7,%xmm3 3066 psrld $25,%xmm4 3067 pxor %xmm3,%xmm4 3068.byte 102,15,58,15,228,4 3069.byte 102,69,15,58,15,192,8 3070.byte 102,69,15,58,15,228,12 3071 paddd %xmm5,%xmm1 3072 pxor %xmm1,%xmm13 3073 pshufb .Lrol16(%rip),%xmm13 3074 paddd %xmm13,%xmm9 3075 pxor %xmm9,%xmm5 3076 movdqa %xmm5,%xmm3 3077 pslld $12,%xmm3 3078 psrld $20,%xmm5 3079 pxor %xmm3,%xmm5 3080 paddd %xmm5,%xmm1 3081 pxor %xmm1,%xmm13 3082 pshufb .Lrol8(%rip),%xmm13 3083 paddd %xmm13,%xmm9 3084 pxor %xmm9,%xmm5 3085 movdqa %xmm5,%xmm3 3086 pslld $7,%xmm3 3087 psrld $25,%xmm5 3088 pxor %xmm3,%xmm5 3089.byte 102,15,58,15,237,4 3090.byte 102,69,15,58,15,201,8 3091.byte 102,69,15,58,15,237,12 3092 addq 0+0(%rdi),%r10 3093 adcq 8+0(%rdi),%r11 3094 adcq $1,%r12 3095 movq 0+0+0(%rbp),%rax 3096 movq %rax,%r15 3097 mulq %r10 3098 movq %rax,%r13 3099 movq %rdx,%r14 3100 movq 0+0+0(%rbp),%rax 3101 mulq %r11 3102 imulq %r12,%r15 3103 addq %rax,%r14 3104 adcq %rdx,%r15 3105 movq 8+0+0(%rbp),%rax 3106 movq %rax,%r9 3107 mulq %r10 3108 addq %rax,%r14 3109 adcq $0,%rdx 3110 movq %rdx,%r10 3111 movq 8+0+0(%rbp),%rax 3112 mulq %r11 3113 addq %rax,%r15 3114 adcq $0,%rdx 3115 imulq %r12,%r9 3116 addq %r10,%r15 3117 adcq %rdx,%r9 3118 movq %r13,%r10 3119 movq %r14,%r11 3120 movq %r15,%r12 3121 andq $3,%r12 3122 movq %r15,%r13 3123 andq $-4,%r13 3124 movq %r9,%r14 3125 shrdq $2,%r9,%r15 3126 shrq $2,%r9 3127 addq %r13,%r15 3128 adcq %r14,%r9 3129 addq %r15,%r10 3130 adcq %r9,%r11 3131 adcq $0,%r12 3132 paddd %xmm4,%xmm0 3133 pxor %xmm0,%xmm12 3134 pshufb .Lrol16(%rip),%xmm12 3135 paddd %xmm12,%xmm8 3136 pxor %xmm8,%xmm4 3137 movdqa %xmm4,%xmm3 3138 pslld $12,%xmm3 3139 psrld $20,%xmm4 3140 pxor %xmm3,%xmm4 3141 paddd %xmm4,%xmm0 3142 pxor %xmm0,%xmm12 3143 pshufb .Lrol8(%rip),%xmm12 3144 paddd %xmm12,%xmm8 3145 pxor %xmm8,%xmm4 3146 movdqa %xmm4,%xmm3 3147 pslld $7,%xmm3 3148 psrld $25,%xmm4 3149 pxor %xmm3,%xmm4 3150.byte 102,15,58,15,228,12 3151.byte 102,69,15,58,15,192,8 3152.byte 102,69,15,58,15,228,4 3153 paddd %xmm5,%xmm1 3154 pxor %xmm1,%xmm13 3155 pshufb .Lrol16(%rip),%xmm13 3156 paddd %xmm13,%xmm9 3157 pxor %xmm9,%xmm5 3158 movdqa %xmm5,%xmm3 3159 pslld $12,%xmm3 3160 psrld $20,%xmm5 3161 pxor %xmm3,%xmm5 3162 paddd %xmm5,%xmm1 3163 pxor %xmm1,%xmm13 3164 pshufb .Lrol8(%rip),%xmm13 3165 paddd %xmm13,%xmm9 3166 pxor %xmm9,%xmm5 3167 movdqa %xmm5,%xmm3 3168 pslld $7,%xmm3 3169 psrld $25,%xmm5 3170 pxor %xmm3,%xmm5 3171.byte 102,15,58,15,237,12 3172.byte 102,69,15,58,15,201,8 3173.byte 102,69,15,58,15,237,4 3174 3175 leaq 16(%rdi),%rdi 3176 decq %rcx 3177 jg .Lseal_sse_tail_128_rounds_and_x2hash 3178 decq %r8 3179 jge .Lseal_sse_tail_128_rounds_and_x1hash 3180 paddd .Lchacha20_consts(%rip),%xmm1 3181 paddd 0+48(%rbp),%xmm5 3182 paddd 0+64(%rbp),%xmm9 3183 paddd 0+112(%rbp),%xmm13 3184 paddd .Lchacha20_consts(%rip),%xmm0 3185 paddd 0+48(%rbp),%xmm4 3186 paddd 0+64(%rbp),%xmm8 3187 paddd 0+96(%rbp),%xmm12 3188 movdqu 0 + 0(%rsi),%xmm3 3189 movdqu 16 + 0(%rsi),%xmm7 3190 movdqu 32 + 0(%rsi),%xmm11 3191 movdqu 48 + 0(%rsi),%xmm15 3192 pxor %xmm3,%xmm1 3193 pxor %xmm7,%xmm5 3194 pxor %xmm11,%xmm9 3195 pxor %xmm13,%xmm15 3196 movdqu %xmm1,0 + 0(%rdi) 3197 movdqu %xmm5,16 + 0(%rdi) 3198 movdqu %xmm9,32 + 0(%rdi) 3199 movdqu %xmm15,48 + 0(%rdi) 3200 3201 movq $64,%rcx 3202 subq $64,%rbx 3203 leaq 64(%rsi),%rsi 3204 jmp .Lseal_sse_128_tail_hash 3205 3206.Lseal_sse_tail_192: 3207 movdqa .Lchacha20_consts(%rip),%xmm0 3208 movdqa 0+48(%rbp),%xmm4 3209 movdqa 0+64(%rbp),%xmm8 3210 movdqa %xmm0,%xmm1 3211 movdqa %xmm4,%xmm5 3212 movdqa %xmm8,%xmm9 3213 movdqa %xmm0,%xmm2 3214 movdqa %xmm4,%xmm6 3215 movdqa %xmm8,%xmm10 3216 movdqa 0+96(%rbp),%xmm14 3217 paddd .Lsse_inc(%rip),%xmm14 3218 movdqa %xmm14,%xmm13 3219 paddd .Lsse_inc(%rip),%xmm13 3220 movdqa %xmm13,%xmm12 3221 paddd .Lsse_inc(%rip),%xmm12 3222 movdqa %xmm12,0+96(%rbp) 3223 movdqa %xmm13,0+112(%rbp) 3224 movdqa %xmm14,0+128(%rbp) 3225 3226.Lseal_sse_tail_192_rounds_and_x2hash: 3227 addq 0+0(%rdi),%r10 3228 adcq 8+0(%rdi),%r11 3229 adcq $1,%r12 3230 movq 0+0+0(%rbp),%rax 3231 movq %rax,%r15 3232 mulq %r10 3233 movq %rax,%r13 3234 movq %rdx,%r14 3235 movq 0+0+0(%rbp),%rax 3236 mulq %r11 3237 imulq %r12,%r15 3238 addq %rax,%r14 3239 adcq %rdx,%r15 3240 movq 8+0+0(%rbp),%rax 3241 movq %rax,%r9 3242 mulq %r10 3243 addq %rax,%r14 3244 adcq $0,%rdx 3245 movq %rdx,%r10 3246 movq 8+0+0(%rbp),%rax 3247 mulq %r11 3248 addq %rax,%r15 3249 adcq $0,%rdx 3250 imulq %r12,%r9 3251 addq %r10,%r15 3252 adcq %rdx,%r9 3253 movq %r13,%r10 3254 movq %r14,%r11 3255 movq %r15,%r12 3256 andq $3,%r12 3257 movq %r15,%r13 3258 andq $-4,%r13 3259 movq %r9,%r14 3260 shrdq $2,%r9,%r15 3261 shrq $2,%r9 3262 addq %r13,%r15 3263 adcq %r14,%r9 3264 addq %r15,%r10 3265 adcq %r9,%r11 3266 adcq $0,%r12 3267 3268 leaq 16(%rdi),%rdi 3269.Lseal_sse_tail_192_rounds_and_x1hash: 3270 paddd %xmm4,%xmm0 3271 pxor %xmm0,%xmm12 3272 pshufb .Lrol16(%rip),%xmm12 3273 paddd %xmm12,%xmm8 3274 pxor %xmm8,%xmm4 3275 movdqa %xmm4,%xmm3 3276 pslld $12,%xmm3 3277 psrld $20,%xmm4 3278 pxor %xmm3,%xmm4 3279 paddd %xmm4,%xmm0 3280 pxor %xmm0,%xmm12 3281 pshufb .Lrol8(%rip),%xmm12 3282 paddd %xmm12,%xmm8 3283 pxor %xmm8,%xmm4 3284 movdqa %xmm4,%xmm3 3285 pslld $7,%xmm3 3286 psrld $25,%xmm4 3287 pxor %xmm3,%xmm4 3288.byte 102,15,58,15,228,4 3289.byte 102,69,15,58,15,192,8 3290.byte 102,69,15,58,15,228,12 3291 paddd %xmm5,%xmm1 3292 pxor %xmm1,%xmm13 3293 pshufb .Lrol16(%rip),%xmm13 3294 paddd %xmm13,%xmm9 3295 pxor %xmm9,%xmm5 3296 movdqa %xmm5,%xmm3 3297 pslld $12,%xmm3 3298 psrld $20,%xmm5 3299 pxor %xmm3,%xmm5 3300 paddd %xmm5,%xmm1 3301 pxor %xmm1,%xmm13 3302 pshufb .Lrol8(%rip),%xmm13 3303 paddd %xmm13,%xmm9 3304 pxor %xmm9,%xmm5 3305 movdqa %xmm5,%xmm3 3306 pslld $7,%xmm3 3307 psrld $25,%xmm5 3308 pxor %xmm3,%xmm5 3309.byte 102,15,58,15,237,4 3310.byte 102,69,15,58,15,201,8 3311.byte 102,69,15,58,15,237,12 3312 paddd %xmm6,%xmm2 3313 pxor %xmm2,%xmm14 3314 pshufb .Lrol16(%rip),%xmm14 3315 paddd %xmm14,%xmm10 3316 pxor %xmm10,%xmm6 3317 movdqa %xmm6,%xmm3 3318 pslld $12,%xmm3 3319 psrld $20,%xmm6 3320 pxor %xmm3,%xmm6 3321 paddd %xmm6,%xmm2 3322 pxor %xmm2,%xmm14 3323 pshufb .Lrol8(%rip),%xmm14 3324 paddd %xmm14,%xmm10 3325 pxor %xmm10,%xmm6 3326 movdqa %xmm6,%xmm3 3327 pslld $7,%xmm3 3328 psrld $25,%xmm6 3329 pxor %xmm3,%xmm6 3330.byte 102,15,58,15,246,4 3331.byte 102,69,15,58,15,210,8 3332.byte 102,69,15,58,15,246,12 3333 addq 0+0(%rdi),%r10 3334 adcq 8+0(%rdi),%r11 3335 adcq $1,%r12 3336 movq 0+0+0(%rbp),%rax 3337 movq %rax,%r15 3338 mulq %r10 3339 movq %rax,%r13 3340 movq %rdx,%r14 3341 movq 0+0+0(%rbp),%rax 3342 mulq %r11 3343 imulq %r12,%r15 3344 addq %rax,%r14 3345 adcq %rdx,%r15 3346 movq 8+0+0(%rbp),%rax 3347 movq %rax,%r9 3348 mulq %r10 3349 addq %rax,%r14 3350 adcq $0,%rdx 3351 movq %rdx,%r10 3352 movq 8+0+0(%rbp),%rax 3353 mulq %r11 3354 addq %rax,%r15 3355 adcq $0,%rdx 3356 imulq %r12,%r9 3357 addq %r10,%r15 3358 adcq %rdx,%r9 3359 movq %r13,%r10 3360 movq %r14,%r11 3361 movq %r15,%r12 3362 andq $3,%r12 3363 movq %r15,%r13 3364 andq $-4,%r13 3365 movq %r9,%r14 3366 shrdq $2,%r9,%r15 3367 shrq $2,%r9 3368 addq %r13,%r15 3369 adcq %r14,%r9 3370 addq %r15,%r10 3371 adcq %r9,%r11 3372 adcq $0,%r12 3373 paddd %xmm4,%xmm0 3374 pxor %xmm0,%xmm12 3375 pshufb .Lrol16(%rip),%xmm12 3376 paddd %xmm12,%xmm8 3377 pxor %xmm8,%xmm4 3378 movdqa %xmm4,%xmm3 3379 pslld $12,%xmm3 3380 psrld $20,%xmm4 3381 pxor %xmm3,%xmm4 3382 paddd %xmm4,%xmm0 3383 pxor %xmm0,%xmm12 3384 pshufb .Lrol8(%rip),%xmm12 3385 paddd %xmm12,%xmm8 3386 pxor %xmm8,%xmm4 3387 movdqa %xmm4,%xmm3 3388 pslld $7,%xmm3 3389 psrld $25,%xmm4 3390 pxor %xmm3,%xmm4 3391.byte 102,15,58,15,228,12 3392.byte 102,69,15,58,15,192,8 3393.byte 102,69,15,58,15,228,4 3394 paddd %xmm5,%xmm1 3395 pxor %xmm1,%xmm13 3396 pshufb .Lrol16(%rip),%xmm13 3397 paddd %xmm13,%xmm9 3398 pxor %xmm9,%xmm5 3399 movdqa %xmm5,%xmm3 3400 pslld $12,%xmm3 3401 psrld $20,%xmm5 3402 pxor %xmm3,%xmm5 3403 paddd %xmm5,%xmm1 3404 pxor %xmm1,%xmm13 3405 pshufb .Lrol8(%rip),%xmm13 3406 paddd %xmm13,%xmm9 3407 pxor %xmm9,%xmm5 3408 movdqa %xmm5,%xmm3 3409 pslld $7,%xmm3 3410 psrld $25,%xmm5 3411 pxor %xmm3,%xmm5 3412.byte 102,15,58,15,237,12 3413.byte 102,69,15,58,15,201,8 3414.byte 102,69,15,58,15,237,4 3415 paddd %xmm6,%xmm2 3416 pxor %xmm2,%xmm14 3417 pshufb .Lrol16(%rip),%xmm14 3418 paddd %xmm14,%xmm10 3419 pxor %xmm10,%xmm6 3420 movdqa %xmm6,%xmm3 3421 pslld $12,%xmm3 3422 psrld $20,%xmm6 3423 pxor %xmm3,%xmm6 3424 paddd %xmm6,%xmm2 3425 pxor %xmm2,%xmm14 3426 pshufb .Lrol8(%rip),%xmm14 3427 paddd %xmm14,%xmm10 3428 pxor %xmm10,%xmm6 3429 movdqa %xmm6,%xmm3 3430 pslld $7,%xmm3 3431 psrld $25,%xmm6 3432 pxor %xmm3,%xmm6 3433.byte 102,15,58,15,246,12 3434.byte 102,69,15,58,15,210,8 3435.byte 102,69,15,58,15,246,4 3436 3437 leaq 16(%rdi),%rdi 3438 decq %rcx 3439 jg .Lseal_sse_tail_192_rounds_and_x2hash 3440 decq %r8 3441 jge .Lseal_sse_tail_192_rounds_and_x1hash 3442 paddd .Lchacha20_consts(%rip),%xmm2 3443 paddd 0+48(%rbp),%xmm6 3444 paddd 0+64(%rbp),%xmm10 3445 paddd 0+128(%rbp),%xmm14 3446 paddd .Lchacha20_consts(%rip),%xmm1 3447 paddd 0+48(%rbp),%xmm5 3448 paddd 0+64(%rbp),%xmm9 3449 paddd 0+112(%rbp),%xmm13 3450 paddd .Lchacha20_consts(%rip),%xmm0 3451 paddd 0+48(%rbp),%xmm4 3452 paddd 0+64(%rbp),%xmm8 3453 paddd 0+96(%rbp),%xmm12 3454 movdqu 0 + 0(%rsi),%xmm3 3455 movdqu 16 + 0(%rsi),%xmm7 3456 movdqu 32 + 0(%rsi),%xmm11 3457 movdqu 48 + 0(%rsi),%xmm15 3458 pxor %xmm3,%xmm2 3459 pxor %xmm7,%xmm6 3460 pxor %xmm11,%xmm10 3461 pxor %xmm14,%xmm15 3462 movdqu %xmm2,0 + 0(%rdi) 3463 movdqu %xmm6,16 + 0(%rdi) 3464 movdqu %xmm10,32 + 0(%rdi) 3465 movdqu %xmm15,48 + 0(%rdi) 3466 movdqu 0 + 64(%rsi),%xmm3 3467 movdqu 16 + 64(%rsi),%xmm7 3468 movdqu 32 + 64(%rsi),%xmm11 3469 movdqu 48 + 64(%rsi),%xmm15 3470 pxor %xmm3,%xmm1 3471 pxor %xmm7,%xmm5 3472 pxor %xmm11,%xmm9 3473 pxor %xmm13,%xmm15 3474 movdqu %xmm1,0 + 64(%rdi) 3475 movdqu %xmm5,16 + 64(%rdi) 3476 movdqu %xmm9,32 + 64(%rdi) 3477 movdqu %xmm15,48 + 64(%rdi) 3478 3479 movq $128,%rcx 3480 subq $128,%rbx 3481 leaq 128(%rsi),%rsi 3482 3483.Lseal_sse_128_tail_hash: 3484 cmpq $16,%rcx 3485 jb .Lseal_sse_128_tail_xor 3486 addq 0+0(%rdi),%r10 3487 adcq 8+0(%rdi),%r11 3488 adcq $1,%r12 3489 movq 0+0+0(%rbp),%rax 3490 movq %rax,%r15 3491 mulq %r10 3492 movq %rax,%r13 3493 movq %rdx,%r14 3494 movq 0+0+0(%rbp),%rax 3495 mulq %r11 3496 imulq %r12,%r15 3497 addq %rax,%r14 3498 adcq %rdx,%r15 3499 movq 8+0+0(%rbp),%rax 3500 movq %rax,%r9 3501 mulq %r10 3502 addq %rax,%r14 3503 adcq $0,%rdx 3504 movq %rdx,%r10 3505 movq 8+0+0(%rbp),%rax 3506 mulq %r11 3507 addq %rax,%r15 3508 adcq $0,%rdx 3509 imulq %r12,%r9 3510 addq %r10,%r15 3511 adcq %rdx,%r9 3512 movq %r13,%r10 3513 movq %r14,%r11 3514 movq %r15,%r12 3515 andq $3,%r12 3516 movq %r15,%r13 3517 andq $-4,%r13 3518 movq %r9,%r14 3519 shrdq $2,%r9,%r15 3520 shrq $2,%r9 3521 addq %r13,%r15 3522 adcq %r14,%r9 3523 addq %r15,%r10 3524 adcq %r9,%r11 3525 adcq $0,%r12 3526 3527 subq $16,%rcx 3528 leaq 16(%rdi),%rdi 3529 jmp .Lseal_sse_128_tail_hash 3530 3531.Lseal_sse_128_tail_xor: 3532 cmpq $16,%rbx 3533 jb .Lseal_sse_tail_16 3534 subq $16,%rbx 3535 3536 movdqu 0(%rsi),%xmm3 3537 pxor %xmm3,%xmm0 3538 movdqu %xmm0,0(%rdi) 3539 3540 addq 0(%rdi),%r10 3541 adcq 8(%rdi),%r11 3542 adcq $1,%r12 3543 leaq 16(%rsi),%rsi 3544 leaq 16(%rdi),%rdi 3545 movq 0+0+0(%rbp),%rax 3546 movq %rax,%r15 3547 mulq %r10 3548 movq %rax,%r13 3549 movq %rdx,%r14 3550 movq 0+0+0(%rbp),%rax 3551 mulq %r11 3552 imulq %r12,%r15 3553 addq %rax,%r14 3554 adcq %rdx,%r15 3555 movq 8+0+0(%rbp),%rax 3556 movq %rax,%r9 3557 mulq %r10 3558 addq %rax,%r14 3559 adcq $0,%rdx 3560 movq %rdx,%r10 3561 movq 8+0+0(%rbp),%rax 3562 mulq %r11 3563 addq %rax,%r15 3564 adcq $0,%rdx 3565 imulq %r12,%r9 3566 addq %r10,%r15 3567 adcq %rdx,%r9 3568 movq %r13,%r10 3569 movq %r14,%r11 3570 movq %r15,%r12 3571 andq $3,%r12 3572 movq %r15,%r13 3573 andq $-4,%r13 3574 movq %r9,%r14 3575 shrdq $2,%r9,%r15 3576 shrq $2,%r9 3577 addq %r13,%r15 3578 adcq %r14,%r9 3579 addq %r15,%r10 3580 adcq %r9,%r11 3581 adcq $0,%r12 3582 3583 3584 movdqa %xmm4,%xmm0 3585 movdqa %xmm8,%xmm4 3586 movdqa %xmm12,%xmm8 3587 movdqa %xmm1,%xmm12 3588 movdqa %xmm5,%xmm1 3589 movdqa %xmm9,%xmm5 3590 movdqa %xmm13,%xmm9 3591 jmp .Lseal_sse_128_tail_xor 3592 3593.Lseal_sse_tail_16: 3594 testq %rbx,%rbx 3595 jz .Lprocess_blocks_of_extra_in 3596 3597 movq %rbx,%r8 3598 movq %rbx,%rcx 3599 leaq -1(%rsi,%rbx,1),%rsi 3600 pxor %xmm15,%xmm15 3601.Lseal_sse_tail_16_compose: 3602 pslldq $1,%xmm15 3603 pinsrb $0,(%rsi),%xmm15 3604 leaq -1(%rsi),%rsi 3605 decq %rcx 3606 jne .Lseal_sse_tail_16_compose 3607 3608 3609 pxor %xmm0,%xmm15 3610 3611 3612 movq %rbx,%rcx 3613 movdqu %xmm15,%xmm0 3614.Lseal_sse_tail_16_extract: 3615 pextrb $0,%xmm0,(%rdi) 3616 psrldq $1,%xmm0 3617 addq $1,%rdi 3618 subq $1,%rcx 3619 jnz .Lseal_sse_tail_16_extract 3620 3621 3622 3623 3624 3625 3626 3627 3628 movq 288 + 0 + 32(%rsp),%r9 3629 movq 56(%r9),%r14 3630 movq 48(%r9),%r13 3631 testq %r14,%r14 3632 jz .Lprocess_partial_block 3633 3634 movq $16,%r15 3635 subq %rbx,%r15 3636 cmpq %r15,%r14 3637 3638 jge .Lload_extra_in 3639 movq %r14,%r15 3640 3641.Lload_extra_in: 3642 3643 3644 leaq -1(%r13,%r15,1),%rsi 3645 3646 3647 addq %r15,%r13 3648 subq %r15,%r14 3649 movq %r13,48(%r9) 3650 movq %r14,56(%r9) 3651 3652 3653 3654 addq %r15,%r8 3655 3656 3657 pxor %xmm11,%xmm11 3658.Lload_extra_load_loop: 3659 pslldq $1,%xmm11 3660 pinsrb $0,(%rsi),%xmm11 3661 leaq -1(%rsi),%rsi 3662 subq $1,%r15 3663 jnz .Lload_extra_load_loop 3664 3665 3666 3667 3668 movq %rbx,%r15 3669 3670.Lload_extra_shift_loop: 3671 pslldq $1,%xmm11 3672 subq $1,%r15 3673 jnz .Lload_extra_shift_loop 3674 3675 3676 3677 3678 leaq .Land_masks(%rip),%r15 3679 shlq $4,%rbx 3680 pand -16(%r15,%rbx,1),%xmm15 3681 3682 3683 por %xmm11,%xmm15 3684 3685 3686 3687.byte 102,77,15,126,253 3688 pextrq $1,%xmm15,%r14 3689 addq %r13,%r10 3690 adcq %r14,%r11 3691 adcq $1,%r12 3692 movq 0+0+0(%rbp),%rax 3693 movq %rax,%r15 3694 mulq %r10 3695 movq %rax,%r13 3696 movq %rdx,%r14 3697 movq 0+0+0(%rbp),%rax 3698 mulq %r11 3699 imulq %r12,%r15 3700 addq %rax,%r14 3701 adcq %rdx,%r15 3702 movq 8+0+0(%rbp),%rax 3703 movq %rax,%r9 3704 mulq %r10 3705 addq %rax,%r14 3706 adcq $0,%rdx 3707 movq %rdx,%r10 3708 movq 8+0+0(%rbp),%rax 3709 mulq %r11 3710 addq %rax,%r15 3711 adcq $0,%rdx 3712 imulq %r12,%r9 3713 addq %r10,%r15 3714 adcq %rdx,%r9 3715 movq %r13,%r10 3716 movq %r14,%r11 3717 movq %r15,%r12 3718 andq $3,%r12 3719 movq %r15,%r13 3720 andq $-4,%r13 3721 movq %r9,%r14 3722 shrdq $2,%r9,%r15 3723 shrq $2,%r9 3724 addq %r13,%r15 3725 adcq %r14,%r9 3726 addq %r15,%r10 3727 adcq %r9,%r11 3728 adcq $0,%r12 3729 3730 3731.Lprocess_blocks_of_extra_in: 3732 3733 movq 288+32+0 (%rsp),%r9 3734 movq 48(%r9),%rsi 3735 movq 56(%r9),%r8 3736 movq %r8,%rcx 3737 shrq $4,%r8 3738 3739.Lprocess_extra_hash_loop: 3740 jz process_extra_in_trailer 3741 addq 0+0(%rsi),%r10 3742 adcq 8+0(%rsi),%r11 3743 adcq $1,%r12 3744 movq 0+0+0(%rbp),%rax 3745 movq %rax,%r15 3746 mulq %r10 3747 movq %rax,%r13 3748 movq %rdx,%r14 3749 movq 0+0+0(%rbp),%rax 3750 mulq %r11 3751 imulq %r12,%r15 3752 addq %rax,%r14 3753 adcq %rdx,%r15 3754 movq 8+0+0(%rbp),%rax 3755 movq %rax,%r9 3756 mulq %r10 3757 addq %rax,%r14 3758 adcq $0,%rdx 3759 movq %rdx,%r10 3760 movq 8+0+0(%rbp),%rax 3761 mulq %r11 3762 addq %rax,%r15 3763 adcq $0,%rdx 3764 imulq %r12,%r9 3765 addq %r10,%r15 3766 adcq %rdx,%r9 3767 movq %r13,%r10 3768 movq %r14,%r11 3769 movq %r15,%r12 3770 andq $3,%r12 3771 movq %r15,%r13 3772 andq $-4,%r13 3773 movq %r9,%r14 3774 shrdq $2,%r9,%r15 3775 shrq $2,%r9 3776 addq %r13,%r15 3777 adcq %r14,%r9 3778 addq %r15,%r10 3779 adcq %r9,%r11 3780 adcq $0,%r12 3781 3782 leaq 16(%rsi),%rsi 3783 subq $1,%r8 3784 jmp .Lprocess_extra_hash_loop 3785process_extra_in_trailer: 3786 andq $15,%rcx 3787 movq %rcx,%rbx 3788 jz .Ldo_length_block 3789 leaq -1(%rsi,%rcx,1),%rsi 3790 3791.Lprocess_extra_in_trailer_load: 3792 pslldq $1,%xmm15 3793 pinsrb $0,(%rsi),%xmm15 3794 leaq -1(%rsi),%rsi 3795 subq $1,%rcx 3796 jnz .Lprocess_extra_in_trailer_load 3797 3798.Lprocess_partial_block: 3799 3800 leaq .Land_masks(%rip),%r15 3801 shlq $4,%rbx 3802 pand -16(%r15,%rbx,1),%xmm15 3803.byte 102,77,15,126,253 3804 pextrq $1,%xmm15,%r14 3805 addq %r13,%r10 3806 adcq %r14,%r11 3807 adcq $1,%r12 3808 movq 0+0+0(%rbp),%rax 3809 movq %rax,%r15 3810 mulq %r10 3811 movq %rax,%r13 3812 movq %rdx,%r14 3813 movq 0+0+0(%rbp),%rax 3814 mulq %r11 3815 imulq %r12,%r15 3816 addq %rax,%r14 3817 adcq %rdx,%r15 3818 movq 8+0+0(%rbp),%rax 3819 movq %rax,%r9 3820 mulq %r10 3821 addq %rax,%r14 3822 adcq $0,%rdx 3823 movq %rdx,%r10 3824 movq 8+0+0(%rbp),%rax 3825 mulq %r11 3826 addq %rax,%r15 3827 adcq $0,%rdx 3828 imulq %r12,%r9 3829 addq %r10,%r15 3830 adcq %rdx,%r9 3831 movq %r13,%r10 3832 movq %r14,%r11 3833 movq %r15,%r12 3834 andq $3,%r12 3835 movq %r15,%r13 3836 andq $-4,%r13 3837 movq %r9,%r14 3838 shrdq $2,%r9,%r15 3839 shrq $2,%r9 3840 addq %r13,%r15 3841 adcq %r14,%r9 3842 addq %r15,%r10 3843 adcq %r9,%r11 3844 adcq $0,%r12 3845 3846 3847.Ldo_length_block: 3848 addq 0+0+32(%rbp),%r10 3849 adcq 8+0+32(%rbp),%r11 3850 adcq $1,%r12 3851 movq 0+0+0(%rbp),%rax 3852 movq %rax,%r15 3853 mulq %r10 3854 movq %rax,%r13 3855 movq %rdx,%r14 3856 movq 0+0+0(%rbp),%rax 3857 mulq %r11 3858 imulq %r12,%r15 3859 addq %rax,%r14 3860 adcq %rdx,%r15 3861 movq 8+0+0(%rbp),%rax 3862 movq %rax,%r9 3863 mulq %r10 3864 addq %rax,%r14 3865 adcq $0,%rdx 3866 movq %rdx,%r10 3867 movq 8+0+0(%rbp),%rax 3868 mulq %r11 3869 addq %rax,%r15 3870 adcq $0,%rdx 3871 imulq %r12,%r9 3872 addq %r10,%r15 3873 adcq %rdx,%r9 3874 movq %r13,%r10 3875 movq %r14,%r11 3876 movq %r15,%r12 3877 andq $3,%r12 3878 movq %r15,%r13 3879 andq $-4,%r13 3880 movq %r9,%r14 3881 shrdq $2,%r9,%r15 3882 shrq $2,%r9 3883 addq %r13,%r15 3884 adcq %r14,%r9 3885 addq %r15,%r10 3886 adcq %r9,%r11 3887 adcq $0,%r12 3888 3889 3890 movq %r10,%r13 3891 movq %r11,%r14 3892 movq %r12,%r15 3893 subq $-5,%r10 3894 sbbq $-1,%r11 3895 sbbq $3,%r12 3896 cmovcq %r13,%r10 3897 cmovcq %r14,%r11 3898 cmovcq %r15,%r12 3899 3900 addq 0+0+16(%rbp),%r10 3901 adcq 8+0+16(%rbp),%r11 3902 3903.cfi_remember_state 3904 addq $288 + 0 + 32,%rsp 3905.cfi_adjust_cfa_offset -(288 + 32) 3906 3907 popq %r9 3908.cfi_adjust_cfa_offset -8 3909.cfi_restore %r9 3910 movq %r10,(%r9) 3911 movq %r11,8(%r9) 3912 popq %r15 3913.cfi_adjust_cfa_offset -8 3914.cfi_restore %r15 3915 popq %r14 3916.cfi_adjust_cfa_offset -8 3917.cfi_restore %r14 3918 popq %r13 3919.cfi_adjust_cfa_offset -8 3920.cfi_restore %r13 3921 popq %r12 3922.cfi_adjust_cfa_offset -8 3923.cfi_restore %r12 3924 popq %rbx 3925.cfi_adjust_cfa_offset -8 3926.cfi_restore %rbx 3927 popq %rbp 3928.cfi_adjust_cfa_offset -8 3929.cfi_restore %rbp 3930 .byte 0xf3,0xc3 3931 3932.Lseal_sse_128: 3933.cfi_restore_state 3934 movdqu .Lchacha20_consts(%rip),%xmm0 3935 movdqa %xmm0,%xmm1 3936 movdqa %xmm0,%xmm2 3937 movdqu 0(%r9),%xmm4 3938 movdqa %xmm4,%xmm5 3939 movdqa %xmm4,%xmm6 3940 movdqu 16(%r9),%xmm8 3941 movdqa %xmm8,%xmm9 3942 movdqa %xmm8,%xmm10 3943 movdqu 32(%r9),%xmm14 3944 movdqa %xmm14,%xmm12 3945 paddd .Lsse_inc(%rip),%xmm12 3946 movdqa %xmm12,%xmm13 3947 paddd .Lsse_inc(%rip),%xmm13 3948 movdqa %xmm4,%xmm7 3949 movdqa %xmm8,%xmm11 3950 movdqa %xmm12,%xmm15 3951 movq $10,%r10 3952 3953.Lseal_sse_128_rounds: 3954 paddd %xmm4,%xmm0 3955 pxor %xmm0,%xmm12 3956 pshufb .Lrol16(%rip),%xmm12 3957 paddd %xmm12,%xmm8 3958 pxor %xmm8,%xmm4 3959 movdqa %xmm4,%xmm3 3960 pslld $12,%xmm3 3961 psrld $20,%xmm4 3962 pxor %xmm3,%xmm4 3963 paddd %xmm4,%xmm0 3964 pxor %xmm0,%xmm12 3965 pshufb .Lrol8(%rip),%xmm12 3966 paddd %xmm12,%xmm8 3967 pxor %xmm8,%xmm4 3968 movdqa %xmm4,%xmm3 3969 pslld $7,%xmm3 3970 psrld $25,%xmm4 3971 pxor %xmm3,%xmm4 3972.byte 102,15,58,15,228,4 3973.byte 102,69,15,58,15,192,8 3974.byte 102,69,15,58,15,228,12 3975 paddd %xmm5,%xmm1 3976 pxor %xmm1,%xmm13 3977 pshufb .Lrol16(%rip),%xmm13 3978 paddd %xmm13,%xmm9 3979 pxor %xmm9,%xmm5 3980 movdqa %xmm5,%xmm3 3981 pslld $12,%xmm3 3982 psrld $20,%xmm5 3983 pxor %xmm3,%xmm5 3984 paddd %xmm5,%xmm1 3985 pxor %xmm1,%xmm13 3986 pshufb .Lrol8(%rip),%xmm13 3987 paddd %xmm13,%xmm9 3988 pxor %xmm9,%xmm5 3989 movdqa %xmm5,%xmm3 3990 pslld $7,%xmm3 3991 psrld $25,%xmm5 3992 pxor %xmm3,%xmm5 3993.byte 102,15,58,15,237,4 3994.byte 102,69,15,58,15,201,8 3995.byte 102,69,15,58,15,237,12 3996 paddd %xmm6,%xmm2 3997 pxor %xmm2,%xmm14 3998 pshufb .Lrol16(%rip),%xmm14 3999 paddd %xmm14,%xmm10 4000 pxor %xmm10,%xmm6 4001 movdqa %xmm6,%xmm3 4002 pslld $12,%xmm3 4003 psrld $20,%xmm6 4004 pxor %xmm3,%xmm6 4005 paddd %xmm6,%xmm2 4006 pxor %xmm2,%xmm14 4007 pshufb .Lrol8(%rip),%xmm14 4008 paddd %xmm14,%xmm10 4009 pxor %xmm10,%xmm6 4010 movdqa %xmm6,%xmm3 4011 pslld $7,%xmm3 4012 psrld $25,%xmm6 4013 pxor %xmm3,%xmm6 4014.byte 102,15,58,15,246,4 4015.byte 102,69,15,58,15,210,8 4016.byte 102,69,15,58,15,246,12 4017 paddd %xmm4,%xmm0 4018 pxor %xmm0,%xmm12 4019 pshufb .Lrol16(%rip),%xmm12 4020 paddd %xmm12,%xmm8 4021 pxor %xmm8,%xmm4 4022 movdqa %xmm4,%xmm3 4023 pslld $12,%xmm3 4024 psrld $20,%xmm4 4025 pxor %xmm3,%xmm4 4026 paddd %xmm4,%xmm0 4027 pxor %xmm0,%xmm12 4028 pshufb .Lrol8(%rip),%xmm12 4029 paddd %xmm12,%xmm8 4030 pxor %xmm8,%xmm4 4031 movdqa %xmm4,%xmm3 4032 pslld $7,%xmm3 4033 psrld $25,%xmm4 4034 pxor %xmm3,%xmm4 4035.byte 102,15,58,15,228,12 4036.byte 102,69,15,58,15,192,8 4037.byte 102,69,15,58,15,228,4 4038 paddd %xmm5,%xmm1 4039 pxor %xmm1,%xmm13 4040 pshufb .Lrol16(%rip),%xmm13 4041 paddd %xmm13,%xmm9 4042 pxor %xmm9,%xmm5 4043 movdqa %xmm5,%xmm3 4044 pslld $12,%xmm3 4045 psrld $20,%xmm5 4046 pxor %xmm3,%xmm5 4047 paddd %xmm5,%xmm1 4048 pxor %xmm1,%xmm13 4049 pshufb .Lrol8(%rip),%xmm13 4050 paddd %xmm13,%xmm9 4051 pxor %xmm9,%xmm5 4052 movdqa %xmm5,%xmm3 4053 pslld $7,%xmm3 4054 psrld $25,%xmm5 4055 pxor %xmm3,%xmm5 4056.byte 102,15,58,15,237,12 4057.byte 102,69,15,58,15,201,8 4058.byte 102,69,15,58,15,237,4 4059 paddd %xmm6,%xmm2 4060 pxor %xmm2,%xmm14 4061 pshufb .Lrol16(%rip),%xmm14 4062 paddd %xmm14,%xmm10 4063 pxor %xmm10,%xmm6 4064 movdqa %xmm6,%xmm3 4065 pslld $12,%xmm3 4066 psrld $20,%xmm6 4067 pxor %xmm3,%xmm6 4068 paddd %xmm6,%xmm2 4069 pxor %xmm2,%xmm14 4070 pshufb .Lrol8(%rip),%xmm14 4071 paddd %xmm14,%xmm10 4072 pxor %xmm10,%xmm6 4073 movdqa %xmm6,%xmm3 4074 pslld $7,%xmm3 4075 psrld $25,%xmm6 4076 pxor %xmm3,%xmm6 4077.byte 102,15,58,15,246,12 4078.byte 102,69,15,58,15,210,8 4079.byte 102,69,15,58,15,246,4 4080 4081 decq %r10 4082 jnz .Lseal_sse_128_rounds 4083 paddd .Lchacha20_consts(%rip),%xmm0 4084 paddd .Lchacha20_consts(%rip),%xmm1 4085 paddd .Lchacha20_consts(%rip),%xmm2 4086 paddd %xmm7,%xmm4 4087 paddd %xmm7,%xmm5 4088 paddd %xmm7,%xmm6 4089 paddd %xmm11,%xmm8 4090 paddd %xmm11,%xmm9 4091 paddd %xmm15,%xmm12 4092 paddd .Lsse_inc(%rip),%xmm15 4093 paddd %xmm15,%xmm13 4094 4095 pand .Lclamp(%rip),%xmm2 4096 movdqa %xmm2,0+0(%rbp) 4097 movdqa %xmm6,0+16(%rbp) 4098 4099 movq %r8,%r8 4100 call poly_hash_ad_internal 4101 jmp .Lseal_sse_128_tail_xor 4102.size GFp_chacha20_poly1305_seal, .-GFp_chacha20_poly1305_seal 4103.cfi_endproc 4104 4105 4106.type chacha20_poly1305_open_avx2,@function 4107.align 64 4108chacha20_poly1305_open_avx2: 4109.cfi_startproc 4110 4111 4112.cfi_adjust_cfa_offset 8 4113.cfi_offset %rbp,-16 4114.cfi_adjust_cfa_offset 8 4115.cfi_offset %rbx,-24 4116.cfi_adjust_cfa_offset 8 4117.cfi_offset %r12,-32 4118.cfi_adjust_cfa_offset 8 4119.cfi_offset %r13,-40 4120.cfi_adjust_cfa_offset 8 4121.cfi_offset %r14,-48 4122.cfi_adjust_cfa_offset 8 4123.cfi_offset %r15,-56 4124.cfi_adjust_cfa_offset 8 4125.cfi_offset %r9,-64 4126.cfi_adjust_cfa_offset 288 + 32 4127 4128 vzeroupper 4129 vmovdqa .Lchacha20_consts(%rip),%ymm0 4130 vbroadcasti128 0(%r9),%ymm4 4131 vbroadcasti128 16(%r9),%ymm8 4132 vbroadcasti128 32(%r9),%ymm12 4133 vpaddd .Lavx2_init(%rip),%ymm12,%ymm12 4134 cmpq $192,%rbx 4135 jbe .Lopen_avx2_192 4136 cmpq $320,%rbx 4137 jbe .Lopen_avx2_320 4138 4139 vmovdqa %ymm4,0+64(%rbp) 4140 vmovdqa %ymm8,0+96(%rbp) 4141 vmovdqa %ymm12,0+160(%rbp) 4142 movq $10,%r10 4143.Lopen_avx2_init_rounds: 4144 vpaddd %ymm4,%ymm0,%ymm0 4145 vpxor %ymm0,%ymm12,%ymm12 4146 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4147 vpaddd %ymm12,%ymm8,%ymm8 4148 vpxor %ymm8,%ymm4,%ymm4 4149 vpsrld $20,%ymm4,%ymm3 4150 vpslld $12,%ymm4,%ymm4 4151 vpxor %ymm3,%ymm4,%ymm4 4152 vpaddd %ymm4,%ymm0,%ymm0 4153 vpxor %ymm0,%ymm12,%ymm12 4154 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4155 vpaddd %ymm12,%ymm8,%ymm8 4156 vpxor %ymm8,%ymm4,%ymm4 4157 vpslld $7,%ymm4,%ymm3 4158 vpsrld $25,%ymm4,%ymm4 4159 vpxor %ymm3,%ymm4,%ymm4 4160 vpalignr $12,%ymm12,%ymm12,%ymm12 4161 vpalignr $8,%ymm8,%ymm8,%ymm8 4162 vpalignr $4,%ymm4,%ymm4,%ymm4 4163 vpaddd %ymm4,%ymm0,%ymm0 4164 vpxor %ymm0,%ymm12,%ymm12 4165 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4166 vpaddd %ymm12,%ymm8,%ymm8 4167 vpxor %ymm8,%ymm4,%ymm4 4168 vpsrld $20,%ymm4,%ymm3 4169 vpslld $12,%ymm4,%ymm4 4170 vpxor %ymm3,%ymm4,%ymm4 4171 vpaddd %ymm4,%ymm0,%ymm0 4172 vpxor %ymm0,%ymm12,%ymm12 4173 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4174 vpaddd %ymm12,%ymm8,%ymm8 4175 vpxor %ymm8,%ymm4,%ymm4 4176 vpslld $7,%ymm4,%ymm3 4177 vpsrld $25,%ymm4,%ymm4 4178 vpxor %ymm3,%ymm4,%ymm4 4179 vpalignr $4,%ymm12,%ymm12,%ymm12 4180 vpalignr $8,%ymm8,%ymm8,%ymm8 4181 vpalignr $12,%ymm4,%ymm4,%ymm4 4182 4183 decq %r10 4184 jne .Lopen_avx2_init_rounds 4185 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 4186 vpaddd 0+64(%rbp),%ymm4,%ymm4 4187 vpaddd 0+96(%rbp),%ymm8,%ymm8 4188 vpaddd 0+160(%rbp),%ymm12,%ymm12 4189 4190 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4191 4192 vpand .Lclamp(%rip),%ymm3,%ymm3 4193 vmovdqa %ymm3,0+0(%rbp) 4194 4195 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 4196 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 4197 4198 movq %r8,%r8 4199 call poly_hash_ad_internal 4200 4201 xorq %rcx,%rcx 4202.Lopen_avx2_init_hash: 4203 addq 0+0(%rsi,%rcx,1),%r10 4204 adcq 8+0(%rsi,%rcx,1),%r11 4205 adcq $1,%r12 4206 movq 0+0+0(%rbp),%rax 4207 movq %rax,%r15 4208 mulq %r10 4209 movq %rax,%r13 4210 movq %rdx,%r14 4211 movq 0+0+0(%rbp),%rax 4212 mulq %r11 4213 imulq %r12,%r15 4214 addq %rax,%r14 4215 adcq %rdx,%r15 4216 movq 8+0+0(%rbp),%rax 4217 movq %rax,%r9 4218 mulq %r10 4219 addq %rax,%r14 4220 adcq $0,%rdx 4221 movq %rdx,%r10 4222 movq 8+0+0(%rbp),%rax 4223 mulq %r11 4224 addq %rax,%r15 4225 adcq $0,%rdx 4226 imulq %r12,%r9 4227 addq %r10,%r15 4228 adcq %rdx,%r9 4229 movq %r13,%r10 4230 movq %r14,%r11 4231 movq %r15,%r12 4232 andq $3,%r12 4233 movq %r15,%r13 4234 andq $-4,%r13 4235 movq %r9,%r14 4236 shrdq $2,%r9,%r15 4237 shrq $2,%r9 4238 addq %r13,%r15 4239 adcq %r14,%r9 4240 addq %r15,%r10 4241 adcq %r9,%r11 4242 adcq $0,%r12 4243 4244 addq $16,%rcx 4245 cmpq $64,%rcx 4246 jne .Lopen_avx2_init_hash 4247 4248 vpxor 0(%rsi),%ymm0,%ymm0 4249 vpxor 32(%rsi),%ymm4,%ymm4 4250 4251 vmovdqu %ymm0,0(%rdi) 4252 vmovdqu %ymm4,32(%rdi) 4253 leaq 64(%rsi),%rsi 4254 leaq 64(%rdi),%rdi 4255 subq $64,%rbx 4256.Lopen_avx2_main_loop: 4257 4258 cmpq $512,%rbx 4259 jb .Lopen_avx2_main_loop_done 4260 vmovdqa .Lchacha20_consts(%rip),%ymm0 4261 vmovdqa 0+64(%rbp),%ymm4 4262 vmovdqa 0+96(%rbp),%ymm8 4263 vmovdqa %ymm0,%ymm1 4264 vmovdqa %ymm4,%ymm5 4265 vmovdqa %ymm8,%ymm9 4266 vmovdqa %ymm0,%ymm2 4267 vmovdqa %ymm4,%ymm6 4268 vmovdqa %ymm8,%ymm10 4269 vmovdqa %ymm0,%ymm3 4270 vmovdqa %ymm4,%ymm7 4271 vmovdqa %ymm8,%ymm11 4272 vmovdqa .Lavx2_inc(%rip),%ymm12 4273 vpaddd 0+160(%rbp),%ymm12,%ymm15 4274 vpaddd %ymm15,%ymm12,%ymm14 4275 vpaddd %ymm14,%ymm12,%ymm13 4276 vpaddd %ymm13,%ymm12,%ymm12 4277 vmovdqa %ymm15,0+256(%rbp) 4278 vmovdqa %ymm14,0+224(%rbp) 4279 vmovdqa %ymm13,0+192(%rbp) 4280 vmovdqa %ymm12,0+160(%rbp) 4281 4282 xorq %rcx,%rcx 4283.Lopen_avx2_main_loop_rounds: 4284 addq 0+0(%rsi,%rcx,1),%r10 4285 adcq 8+0(%rsi,%rcx,1),%r11 4286 adcq $1,%r12 4287 vmovdqa %ymm8,0+128(%rbp) 4288 vmovdqa .Lrol16(%rip),%ymm8 4289 vpaddd %ymm7,%ymm3,%ymm3 4290 vpaddd %ymm6,%ymm2,%ymm2 4291 vpaddd %ymm5,%ymm1,%ymm1 4292 vpaddd %ymm4,%ymm0,%ymm0 4293 vpxor %ymm3,%ymm15,%ymm15 4294 vpxor %ymm2,%ymm14,%ymm14 4295 vpxor %ymm1,%ymm13,%ymm13 4296 vpxor %ymm0,%ymm12,%ymm12 4297 movq 0+0+0(%rbp),%rdx 4298 movq %rdx,%r15 4299 mulxq %r10,%r13,%r14 4300 mulxq %r11,%rax,%rdx 4301 imulq %r12,%r15 4302 addq %rax,%r14 4303 adcq %rdx,%r15 4304 vpshufb %ymm8,%ymm15,%ymm15 4305 vpshufb %ymm8,%ymm14,%ymm14 4306 vpshufb %ymm8,%ymm13,%ymm13 4307 vpshufb %ymm8,%ymm12,%ymm12 4308 vpaddd %ymm15,%ymm11,%ymm11 4309 vpaddd %ymm14,%ymm10,%ymm10 4310 vpaddd %ymm13,%ymm9,%ymm9 4311 vpaddd 0+128(%rbp),%ymm12,%ymm8 4312 vpxor %ymm11,%ymm7,%ymm7 4313 movq 8+0+0(%rbp),%rdx 4314 mulxq %r10,%r10,%rax 4315 addq %r10,%r14 4316 mulxq %r11,%r11,%r9 4317 adcq %r11,%r15 4318 adcq $0,%r9 4319 imulq %r12,%rdx 4320 vpxor %ymm10,%ymm6,%ymm6 4321 vpxor %ymm9,%ymm5,%ymm5 4322 vpxor %ymm8,%ymm4,%ymm4 4323 vmovdqa %ymm8,0+128(%rbp) 4324 vpsrld $20,%ymm7,%ymm8 4325 vpslld $32-20,%ymm7,%ymm7 4326 vpxor %ymm8,%ymm7,%ymm7 4327 vpsrld $20,%ymm6,%ymm8 4328 vpslld $32-20,%ymm6,%ymm6 4329 vpxor %ymm8,%ymm6,%ymm6 4330 vpsrld $20,%ymm5,%ymm8 4331 vpslld $32-20,%ymm5,%ymm5 4332 addq %rax,%r15 4333 adcq %rdx,%r9 4334 vpxor %ymm8,%ymm5,%ymm5 4335 vpsrld $20,%ymm4,%ymm8 4336 vpslld $32-20,%ymm4,%ymm4 4337 vpxor %ymm8,%ymm4,%ymm4 4338 vmovdqa .Lrol8(%rip),%ymm8 4339 vpaddd %ymm7,%ymm3,%ymm3 4340 vpaddd %ymm6,%ymm2,%ymm2 4341 vpaddd %ymm5,%ymm1,%ymm1 4342 vpaddd %ymm4,%ymm0,%ymm0 4343 vpxor %ymm3,%ymm15,%ymm15 4344 movq %r13,%r10 4345 movq %r14,%r11 4346 movq %r15,%r12 4347 andq $3,%r12 4348 movq %r15,%r13 4349 andq $-4,%r13 4350 movq %r9,%r14 4351 shrdq $2,%r9,%r15 4352 shrq $2,%r9 4353 addq %r13,%r15 4354 adcq %r14,%r9 4355 addq %r15,%r10 4356 adcq %r9,%r11 4357 adcq $0,%r12 4358 vpxor %ymm2,%ymm14,%ymm14 4359 vpxor %ymm1,%ymm13,%ymm13 4360 vpxor %ymm0,%ymm12,%ymm12 4361 vpshufb %ymm8,%ymm15,%ymm15 4362 vpshufb %ymm8,%ymm14,%ymm14 4363 vpshufb %ymm8,%ymm13,%ymm13 4364 vpshufb %ymm8,%ymm12,%ymm12 4365 vpaddd %ymm15,%ymm11,%ymm11 4366 vpaddd %ymm14,%ymm10,%ymm10 4367 addq 0+16(%rsi,%rcx,1),%r10 4368 adcq 8+16(%rsi,%rcx,1),%r11 4369 adcq $1,%r12 4370 vpaddd %ymm13,%ymm9,%ymm9 4371 vpaddd 0+128(%rbp),%ymm12,%ymm8 4372 vpxor %ymm11,%ymm7,%ymm7 4373 vpxor %ymm10,%ymm6,%ymm6 4374 vpxor %ymm9,%ymm5,%ymm5 4375 vpxor %ymm8,%ymm4,%ymm4 4376 vmovdqa %ymm8,0+128(%rbp) 4377 vpsrld $25,%ymm7,%ymm8 4378 movq 0+0+0(%rbp),%rdx 4379 movq %rdx,%r15 4380 mulxq %r10,%r13,%r14 4381 mulxq %r11,%rax,%rdx 4382 imulq %r12,%r15 4383 addq %rax,%r14 4384 adcq %rdx,%r15 4385 vpslld $32-25,%ymm7,%ymm7 4386 vpxor %ymm8,%ymm7,%ymm7 4387 vpsrld $25,%ymm6,%ymm8 4388 vpslld $32-25,%ymm6,%ymm6 4389 vpxor %ymm8,%ymm6,%ymm6 4390 vpsrld $25,%ymm5,%ymm8 4391 vpslld $32-25,%ymm5,%ymm5 4392 vpxor %ymm8,%ymm5,%ymm5 4393 vpsrld $25,%ymm4,%ymm8 4394 vpslld $32-25,%ymm4,%ymm4 4395 vpxor %ymm8,%ymm4,%ymm4 4396 vmovdqa 0+128(%rbp),%ymm8 4397 vpalignr $4,%ymm7,%ymm7,%ymm7 4398 vpalignr $8,%ymm11,%ymm11,%ymm11 4399 vpalignr $12,%ymm15,%ymm15,%ymm15 4400 vpalignr $4,%ymm6,%ymm6,%ymm6 4401 vpalignr $8,%ymm10,%ymm10,%ymm10 4402 vpalignr $12,%ymm14,%ymm14,%ymm14 4403 movq 8+0+0(%rbp),%rdx 4404 mulxq %r10,%r10,%rax 4405 addq %r10,%r14 4406 mulxq %r11,%r11,%r9 4407 adcq %r11,%r15 4408 adcq $0,%r9 4409 imulq %r12,%rdx 4410 vpalignr $4,%ymm5,%ymm5,%ymm5 4411 vpalignr $8,%ymm9,%ymm9,%ymm9 4412 vpalignr $12,%ymm13,%ymm13,%ymm13 4413 vpalignr $4,%ymm4,%ymm4,%ymm4 4414 vpalignr $8,%ymm8,%ymm8,%ymm8 4415 vpalignr $12,%ymm12,%ymm12,%ymm12 4416 vmovdqa %ymm8,0+128(%rbp) 4417 vmovdqa .Lrol16(%rip),%ymm8 4418 vpaddd %ymm7,%ymm3,%ymm3 4419 vpaddd %ymm6,%ymm2,%ymm2 4420 vpaddd %ymm5,%ymm1,%ymm1 4421 vpaddd %ymm4,%ymm0,%ymm0 4422 vpxor %ymm3,%ymm15,%ymm15 4423 vpxor %ymm2,%ymm14,%ymm14 4424 vpxor %ymm1,%ymm13,%ymm13 4425 vpxor %ymm0,%ymm12,%ymm12 4426 vpshufb %ymm8,%ymm15,%ymm15 4427 vpshufb %ymm8,%ymm14,%ymm14 4428 addq %rax,%r15 4429 adcq %rdx,%r9 4430 vpshufb %ymm8,%ymm13,%ymm13 4431 vpshufb %ymm8,%ymm12,%ymm12 4432 vpaddd %ymm15,%ymm11,%ymm11 4433 vpaddd %ymm14,%ymm10,%ymm10 4434 vpaddd %ymm13,%ymm9,%ymm9 4435 vpaddd 0+128(%rbp),%ymm12,%ymm8 4436 vpxor %ymm11,%ymm7,%ymm7 4437 vpxor %ymm10,%ymm6,%ymm6 4438 vpxor %ymm9,%ymm5,%ymm5 4439 movq %r13,%r10 4440 movq %r14,%r11 4441 movq %r15,%r12 4442 andq $3,%r12 4443 movq %r15,%r13 4444 andq $-4,%r13 4445 movq %r9,%r14 4446 shrdq $2,%r9,%r15 4447 shrq $2,%r9 4448 addq %r13,%r15 4449 adcq %r14,%r9 4450 addq %r15,%r10 4451 adcq %r9,%r11 4452 adcq $0,%r12 4453 vpxor %ymm8,%ymm4,%ymm4 4454 vmovdqa %ymm8,0+128(%rbp) 4455 vpsrld $20,%ymm7,%ymm8 4456 vpslld $32-20,%ymm7,%ymm7 4457 vpxor %ymm8,%ymm7,%ymm7 4458 vpsrld $20,%ymm6,%ymm8 4459 vpslld $32-20,%ymm6,%ymm6 4460 vpxor %ymm8,%ymm6,%ymm6 4461 addq 0+32(%rsi,%rcx,1),%r10 4462 adcq 8+32(%rsi,%rcx,1),%r11 4463 adcq $1,%r12 4464 4465 leaq 48(%rcx),%rcx 4466 vpsrld $20,%ymm5,%ymm8 4467 vpslld $32-20,%ymm5,%ymm5 4468 vpxor %ymm8,%ymm5,%ymm5 4469 vpsrld $20,%ymm4,%ymm8 4470 vpslld $32-20,%ymm4,%ymm4 4471 vpxor %ymm8,%ymm4,%ymm4 4472 vmovdqa .Lrol8(%rip),%ymm8 4473 vpaddd %ymm7,%ymm3,%ymm3 4474 vpaddd %ymm6,%ymm2,%ymm2 4475 vpaddd %ymm5,%ymm1,%ymm1 4476 vpaddd %ymm4,%ymm0,%ymm0 4477 vpxor %ymm3,%ymm15,%ymm15 4478 vpxor %ymm2,%ymm14,%ymm14 4479 vpxor %ymm1,%ymm13,%ymm13 4480 vpxor %ymm0,%ymm12,%ymm12 4481 vpshufb %ymm8,%ymm15,%ymm15 4482 vpshufb %ymm8,%ymm14,%ymm14 4483 vpshufb %ymm8,%ymm13,%ymm13 4484 movq 0+0+0(%rbp),%rdx 4485 movq %rdx,%r15 4486 mulxq %r10,%r13,%r14 4487 mulxq %r11,%rax,%rdx 4488 imulq %r12,%r15 4489 addq %rax,%r14 4490 adcq %rdx,%r15 4491 vpshufb %ymm8,%ymm12,%ymm12 4492 vpaddd %ymm15,%ymm11,%ymm11 4493 vpaddd %ymm14,%ymm10,%ymm10 4494 vpaddd %ymm13,%ymm9,%ymm9 4495 vpaddd 0+128(%rbp),%ymm12,%ymm8 4496 vpxor %ymm11,%ymm7,%ymm7 4497 vpxor %ymm10,%ymm6,%ymm6 4498 vpxor %ymm9,%ymm5,%ymm5 4499 movq 8+0+0(%rbp),%rdx 4500 mulxq %r10,%r10,%rax 4501 addq %r10,%r14 4502 mulxq %r11,%r11,%r9 4503 adcq %r11,%r15 4504 adcq $0,%r9 4505 imulq %r12,%rdx 4506 vpxor %ymm8,%ymm4,%ymm4 4507 vmovdqa %ymm8,0+128(%rbp) 4508 vpsrld $25,%ymm7,%ymm8 4509 vpslld $32-25,%ymm7,%ymm7 4510 vpxor %ymm8,%ymm7,%ymm7 4511 vpsrld $25,%ymm6,%ymm8 4512 vpslld $32-25,%ymm6,%ymm6 4513 vpxor %ymm8,%ymm6,%ymm6 4514 addq %rax,%r15 4515 adcq %rdx,%r9 4516 vpsrld $25,%ymm5,%ymm8 4517 vpslld $32-25,%ymm5,%ymm5 4518 vpxor %ymm8,%ymm5,%ymm5 4519 vpsrld $25,%ymm4,%ymm8 4520 vpslld $32-25,%ymm4,%ymm4 4521 vpxor %ymm8,%ymm4,%ymm4 4522 vmovdqa 0+128(%rbp),%ymm8 4523 vpalignr $12,%ymm7,%ymm7,%ymm7 4524 vpalignr $8,%ymm11,%ymm11,%ymm11 4525 vpalignr $4,%ymm15,%ymm15,%ymm15 4526 vpalignr $12,%ymm6,%ymm6,%ymm6 4527 vpalignr $8,%ymm10,%ymm10,%ymm10 4528 vpalignr $4,%ymm14,%ymm14,%ymm14 4529 vpalignr $12,%ymm5,%ymm5,%ymm5 4530 vpalignr $8,%ymm9,%ymm9,%ymm9 4531 vpalignr $4,%ymm13,%ymm13,%ymm13 4532 vpalignr $12,%ymm4,%ymm4,%ymm4 4533 vpalignr $8,%ymm8,%ymm8,%ymm8 4534 movq %r13,%r10 4535 movq %r14,%r11 4536 movq %r15,%r12 4537 andq $3,%r12 4538 movq %r15,%r13 4539 andq $-4,%r13 4540 movq %r9,%r14 4541 shrdq $2,%r9,%r15 4542 shrq $2,%r9 4543 addq %r13,%r15 4544 adcq %r14,%r9 4545 addq %r15,%r10 4546 adcq %r9,%r11 4547 adcq $0,%r12 4548 vpalignr $4,%ymm12,%ymm12,%ymm12 4549 4550 cmpq $60*8,%rcx 4551 jne .Lopen_avx2_main_loop_rounds 4552 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 4553 vpaddd 0+64(%rbp),%ymm7,%ymm7 4554 vpaddd 0+96(%rbp),%ymm11,%ymm11 4555 vpaddd 0+256(%rbp),%ymm15,%ymm15 4556 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 4557 vpaddd 0+64(%rbp),%ymm6,%ymm6 4558 vpaddd 0+96(%rbp),%ymm10,%ymm10 4559 vpaddd 0+224(%rbp),%ymm14,%ymm14 4560 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 4561 vpaddd 0+64(%rbp),%ymm5,%ymm5 4562 vpaddd 0+96(%rbp),%ymm9,%ymm9 4563 vpaddd 0+192(%rbp),%ymm13,%ymm13 4564 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 4565 vpaddd 0+64(%rbp),%ymm4,%ymm4 4566 vpaddd 0+96(%rbp),%ymm8,%ymm8 4567 vpaddd 0+160(%rbp),%ymm12,%ymm12 4568 4569 vmovdqa %ymm0,0+128(%rbp) 4570 addq 0+60*8(%rsi),%r10 4571 adcq 8+60*8(%rsi),%r11 4572 adcq $1,%r12 4573 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 4574 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 4575 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 4576 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 4577 vpxor 0+0(%rsi),%ymm0,%ymm0 4578 vpxor 32+0(%rsi),%ymm3,%ymm3 4579 vpxor 64+0(%rsi),%ymm7,%ymm7 4580 vpxor 96+0(%rsi),%ymm11,%ymm11 4581 vmovdqu %ymm0,0+0(%rdi) 4582 vmovdqu %ymm3,32+0(%rdi) 4583 vmovdqu %ymm7,64+0(%rdi) 4584 vmovdqu %ymm11,96+0(%rdi) 4585 4586 vmovdqa 0+128(%rbp),%ymm0 4587 movq 0+0+0(%rbp),%rax 4588 movq %rax,%r15 4589 mulq %r10 4590 movq %rax,%r13 4591 movq %rdx,%r14 4592 movq 0+0+0(%rbp),%rax 4593 mulq %r11 4594 imulq %r12,%r15 4595 addq %rax,%r14 4596 adcq %rdx,%r15 4597 movq 8+0+0(%rbp),%rax 4598 movq %rax,%r9 4599 mulq %r10 4600 addq %rax,%r14 4601 adcq $0,%rdx 4602 movq %rdx,%r10 4603 movq 8+0+0(%rbp),%rax 4604 mulq %r11 4605 addq %rax,%r15 4606 adcq $0,%rdx 4607 imulq %r12,%r9 4608 addq %r10,%r15 4609 adcq %rdx,%r9 4610 movq %r13,%r10 4611 movq %r14,%r11 4612 movq %r15,%r12 4613 andq $3,%r12 4614 movq %r15,%r13 4615 andq $-4,%r13 4616 movq %r9,%r14 4617 shrdq $2,%r9,%r15 4618 shrq $2,%r9 4619 addq %r13,%r15 4620 adcq %r14,%r9 4621 addq %r15,%r10 4622 adcq %r9,%r11 4623 adcq $0,%r12 4624 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 4625 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 4626 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 4627 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 4628 vpxor 0+128(%rsi),%ymm3,%ymm3 4629 vpxor 32+128(%rsi),%ymm2,%ymm2 4630 vpxor 64+128(%rsi),%ymm6,%ymm6 4631 vpxor 96+128(%rsi),%ymm10,%ymm10 4632 vmovdqu %ymm3,0+128(%rdi) 4633 vmovdqu %ymm2,32+128(%rdi) 4634 vmovdqu %ymm6,64+128(%rdi) 4635 vmovdqu %ymm10,96+128(%rdi) 4636 addq 0+60*8+16(%rsi),%r10 4637 adcq 8+60*8+16(%rsi),%r11 4638 adcq $1,%r12 4639 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 4640 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 4641 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 4642 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 4643 vpxor 0+256(%rsi),%ymm3,%ymm3 4644 vpxor 32+256(%rsi),%ymm1,%ymm1 4645 vpxor 64+256(%rsi),%ymm5,%ymm5 4646 vpxor 96+256(%rsi),%ymm9,%ymm9 4647 vmovdqu %ymm3,0+256(%rdi) 4648 vmovdqu %ymm1,32+256(%rdi) 4649 vmovdqu %ymm5,64+256(%rdi) 4650 vmovdqu %ymm9,96+256(%rdi) 4651 movq 0+0+0(%rbp),%rax 4652 movq %rax,%r15 4653 mulq %r10 4654 movq %rax,%r13 4655 movq %rdx,%r14 4656 movq 0+0+0(%rbp),%rax 4657 mulq %r11 4658 imulq %r12,%r15 4659 addq %rax,%r14 4660 adcq %rdx,%r15 4661 movq 8+0+0(%rbp),%rax 4662 movq %rax,%r9 4663 mulq %r10 4664 addq %rax,%r14 4665 adcq $0,%rdx 4666 movq %rdx,%r10 4667 movq 8+0+0(%rbp),%rax 4668 mulq %r11 4669 addq %rax,%r15 4670 adcq $0,%rdx 4671 imulq %r12,%r9 4672 addq %r10,%r15 4673 adcq %rdx,%r9 4674 movq %r13,%r10 4675 movq %r14,%r11 4676 movq %r15,%r12 4677 andq $3,%r12 4678 movq %r15,%r13 4679 andq $-4,%r13 4680 movq %r9,%r14 4681 shrdq $2,%r9,%r15 4682 shrq $2,%r9 4683 addq %r13,%r15 4684 adcq %r14,%r9 4685 addq %r15,%r10 4686 adcq %r9,%r11 4687 adcq $0,%r12 4688 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4689 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 4690 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 4691 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 4692 vpxor 0+384(%rsi),%ymm3,%ymm3 4693 vpxor 32+384(%rsi),%ymm0,%ymm0 4694 vpxor 64+384(%rsi),%ymm4,%ymm4 4695 vpxor 96+384(%rsi),%ymm8,%ymm8 4696 vmovdqu %ymm3,0+384(%rdi) 4697 vmovdqu %ymm0,32+384(%rdi) 4698 vmovdqu %ymm4,64+384(%rdi) 4699 vmovdqu %ymm8,96+384(%rdi) 4700 4701 leaq 512(%rsi),%rsi 4702 leaq 512(%rdi),%rdi 4703 subq $512,%rbx 4704 jmp .Lopen_avx2_main_loop 4705.Lopen_avx2_main_loop_done: 4706 testq %rbx,%rbx 4707 vzeroupper 4708 je .Lopen_sse_finalize 4709 4710 cmpq $384,%rbx 4711 ja .Lopen_avx2_tail_512 4712 cmpq $256,%rbx 4713 ja .Lopen_avx2_tail_384 4714 cmpq $128,%rbx 4715 ja .Lopen_avx2_tail_256 4716 vmovdqa .Lchacha20_consts(%rip),%ymm0 4717 vmovdqa 0+64(%rbp),%ymm4 4718 vmovdqa 0+96(%rbp),%ymm8 4719 vmovdqa .Lavx2_inc(%rip),%ymm12 4720 vpaddd 0+160(%rbp),%ymm12,%ymm12 4721 vmovdqa %ymm12,0+160(%rbp) 4722 4723 xorq %r8,%r8 4724 movq %rbx,%rcx 4725 andq $-16,%rcx 4726 testq %rcx,%rcx 4727 je .Lopen_avx2_tail_128_rounds 4728.Lopen_avx2_tail_128_rounds_and_x1hash: 4729 addq 0+0(%rsi,%r8,1),%r10 4730 adcq 8+0(%rsi,%r8,1),%r11 4731 adcq $1,%r12 4732 movq 0+0+0(%rbp),%rax 4733 movq %rax,%r15 4734 mulq %r10 4735 movq %rax,%r13 4736 movq %rdx,%r14 4737 movq 0+0+0(%rbp),%rax 4738 mulq %r11 4739 imulq %r12,%r15 4740 addq %rax,%r14 4741 adcq %rdx,%r15 4742 movq 8+0+0(%rbp),%rax 4743 movq %rax,%r9 4744 mulq %r10 4745 addq %rax,%r14 4746 adcq $0,%rdx 4747 movq %rdx,%r10 4748 movq 8+0+0(%rbp),%rax 4749 mulq %r11 4750 addq %rax,%r15 4751 adcq $0,%rdx 4752 imulq %r12,%r9 4753 addq %r10,%r15 4754 adcq %rdx,%r9 4755 movq %r13,%r10 4756 movq %r14,%r11 4757 movq %r15,%r12 4758 andq $3,%r12 4759 movq %r15,%r13 4760 andq $-4,%r13 4761 movq %r9,%r14 4762 shrdq $2,%r9,%r15 4763 shrq $2,%r9 4764 addq %r13,%r15 4765 adcq %r14,%r9 4766 addq %r15,%r10 4767 adcq %r9,%r11 4768 adcq $0,%r12 4769 4770.Lopen_avx2_tail_128_rounds: 4771 addq $16,%r8 4772 vpaddd %ymm4,%ymm0,%ymm0 4773 vpxor %ymm0,%ymm12,%ymm12 4774 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4775 vpaddd %ymm12,%ymm8,%ymm8 4776 vpxor %ymm8,%ymm4,%ymm4 4777 vpsrld $20,%ymm4,%ymm3 4778 vpslld $12,%ymm4,%ymm4 4779 vpxor %ymm3,%ymm4,%ymm4 4780 vpaddd %ymm4,%ymm0,%ymm0 4781 vpxor %ymm0,%ymm12,%ymm12 4782 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4783 vpaddd %ymm12,%ymm8,%ymm8 4784 vpxor %ymm8,%ymm4,%ymm4 4785 vpslld $7,%ymm4,%ymm3 4786 vpsrld $25,%ymm4,%ymm4 4787 vpxor %ymm3,%ymm4,%ymm4 4788 vpalignr $12,%ymm12,%ymm12,%ymm12 4789 vpalignr $8,%ymm8,%ymm8,%ymm8 4790 vpalignr $4,%ymm4,%ymm4,%ymm4 4791 vpaddd %ymm4,%ymm0,%ymm0 4792 vpxor %ymm0,%ymm12,%ymm12 4793 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4794 vpaddd %ymm12,%ymm8,%ymm8 4795 vpxor %ymm8,%ymm4,%ymm4 4796 vpsrld $20,%ymm4,%ymm3 4797 vpslld $12,%ymm4,%ymm4 4798 vpxor %ymm3,%ymm4,%ymm4 4799 vpaddd %ymm4,%ymm0,%ymm0 4800 vpxor %ymm0,%ymm12,%ymm12 4801 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4802 vpaddd %ymm12,%ymm8,%ymm8 4803 vpxor %ymm8,%ymm4,%ymm4 4804 vpslld $7,%ymm4,%ymm3 4805 vpsrld $25,%ymm4,%ymm4 4806 vpxor %ymm3,%ymm4,%ymm4 4807 vpalignr $4,%ymm12,%ymm12,%ymm12 4808 vpalignr $8,%ymm8,%ymm8,%ymm8 4809 vpalignr $12,%ymm4,%ymm4,%ymm4 4810 4811 cmpq %rcx,%r8 4812 jb .Lopen_avx2_tail_128_rounds_and_x1hash 4813 cmpq $160,%r8 4814 jne .Lopen_avx2_tail_128_rounds 4815 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 4816 vpaddd 0+64(%rbp),%ymm4,%ymm4 4817 vpaddd 0+96(%rbp),%ymm8,%ymm8 4818 vpaddd 0+160(%rbp),%ymm12,%ymm12 4819 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 4820 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 4821 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 4822 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 4823 vmovdqa %ymm3,%ymm8 4824 4825 jmp .Lopen_avx2_tail_128_xor 4826 4827.Lopen_avx2_tail_256: 4828 vmovdqa .Lchacha20_consts(%rip),%ymm0 4829 vmovdqa 0+64(%rbp),%ymm4 4830 vmovdqa 0+96(%rbp),%ymm8 4831 vmovdqa %ymm0,%ymm1 4832 vmovdqa %ymm4,%ymm5 4833 vmovdqa %ymm8,%ymm9 4834 vmovdqa .Lavx2_inc(%rip),%ymm12 4835 vpaddd 0+160(%rbp),%ymm12,%ymm13 4836 vpaddd %ymm13,%ymm12,%ymm12 4837 vmovdqa %ymm12,0+160(%rbp) 4838 vmovdqa %ymm13,0+192(%rbp) 4839 4840 movq %rbx,0+128(%rbp) 4841 movq %rbx,%rcx 4842 subq $128,%rcx 4843 shrq $4,%rcx 4844 movq $10,%r8 4845 cmpq $10,%rcx 4846 cmovgq %r8,%rcx 4847 movq %rsi,%rbx 4848 xorq %r8,%r8 4849.Lopen_avx2_tail_256_rounds_and_x1hash: 4850 addq 0+0(%rbx),%r10 4851 adcq 8+0(%rbx),%r11 4852 adcq $1,%r12 4853 movq 0+0+0(%rbp),%rdx 4854 movq %rdx,%r15 4855 mulxq %r10,%r13,%r14 4856 mulxq %r11,%rax,%rdx 4857 imulq %r12,%r15 4858 addq %rax,%r14 4859 adcq %rdx,%r15 4860 movq 8+0+0(%rbp),%rdx 4861 mulxq %r10,%r10,%rax 4862 addq %r10,%r14 4863 mulxq %r11,%r11,%r9 4864 adcq %r11,%r15 4865 adcq $0,%r9 4866 imulq %r12,%rdx 4867 addq %rax,%r15 4868 adcq %rdx,%r9 4869 movq %r13,%r10 4870 movq %r14,%r11 4871 movq %r15,%r12 4872 andq $3,%r12 4873 movq %r15,%r13 4874 andq $-4,%r13 4875 movq %r9,%r14 4876 shrdq $2,%r9,%r15 4877 shrq $2,%r9 4878 addq %r13,%r15 4879 adcq %r14,%r9 4880 addq %r15,%r10 4881 adcq %r9,%r11 4882 adcq $0,%r12 4883 4884 leaq 16(%rbx),%rbx 4885.Lopen_avx2_tail_256_rounds: 4886 vpaddd %ymm4,%ymm0,%ymm0 4887 vpxor %ymm0,%ymm12,%ymm12 4888 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4889 vpaddd %ymm12,%ymm8,%ymm8 4890 vpxor %ymm8,%ymm4,%ymm4 4891 vpsrld $20,%ymm4,%ymm3 4892 vpslld $12,%ymm4,%ymm4 4893 vpxor %ymm3,%ymm4,%ymm4 4894 vpaddd %ymm4,%ymm0,%ymm0 4895 vpxor %ymm0,%ymm12,%ymm12 4896 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4897 vpaddd %ymm12,%ymm8,%ymm8 4898 vpxor %ymm8,%ymm4,%ymm4 4899 vpslld $7,%ymm4,%ymm3 4900 vpsrld $25,%ymm4,%ymm4 4901 vpxor %ymm3,%ymm4,%ymm4 4902 vpalignr $12,%ymm12,%ymm12,%ymm12 4903 vpalignr $8,%ymm8,%ymm8,%ymm8 4904 vpalignr $4,%ymm4,%ymm4,%ymm4 4905 vpaddd %ymm5,%ymm1,%ymm1 4906 vpxor %ymm1,%ymm13,%ymm13 4907 vpshufb .Lrol16(%rip),%ymm13,%ymm13 4908 vpaddd %ymm13,%ymm9,%ymm9 4909 vpxor %ymm9,%ymm5,%ymm5 4910 vpsrld $20,%ymm5,%ymm3 4911 vpslld $12,%ymm5,%ymm5 4912 vpxor %ymm3,%ymm5,%ymm5 4913 vpaddd %ymm5,%ymm1,%ymm1 4914 vpxor %ymm1,%ymm13,%ymm13 4915 vpshufb .Lrol8(%rip),%ymm13,%ymm13 4916 vpaddd %ymm13,%ymm9,%ymm9 4917 vpxor %ymm9,%ymm5,%ymm5 4918 vpslld $7,%ymm5,%ymm3 4919 vpsrld $25,%ymm5,%ymm5 4920 vpxor %ymm3,%ymm5,%ymm5 4921 vpalignr $12,%ymm13,%ymm13,%ymm13 4922 vpalignr $8,%ymm9,%ymm9,%ymm9 4923 vpalignr $4,%ymm5,%ymm5,%ymm5 4924 4925 incq %r8 4926 vpaddd %ymm4,%ymm0,%ymm0 4927 vpxor %ymm0,%ymm12,%ymm12 4928 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4929 vpaddd %ymm12,%ymm8,%ymm8 4930 vpxor %ymm8,%ymm4,%ymm4 4931 vpsrld $20,%ymm4,%ymm3 4932 vpslld $12,%ymm4,%ymm4 4933 vpxor %ymm3,%ymm4,%ymm4 4934 vpaddd %ymm4,%ymm0,%ymm0 4935 vpxor %ymm0,%ymm12,%ymm12 4936 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4937 vpaddd %ymm12,%ymm8,%ymm8 4938 vpxor %ymm8,%ymm4,%ymm4 4939 vpslld $7,%ymm4,%ymm3 4940 vpsrld $25,%ymm4,%ymm4 4941 vpxor %ymm3,%ymm4,%ymm4 4942 vpalignr $4,%ymm12,%ymm12,%ymm12 4943 vpalignr $8,%ymm8,%ymm8,%ymm8 4944 vpalignr $12,%ymm4,%ymm4,%ymm4 4945 vpaddd %ymm5,%ymm1,%ymm1 4946 vpxor %ymm1,%ymm13,%ymm13 4947 vpshufb .Lrol16(%rip),%ymm13,%ymm13 4948 vpaddd %ymm13,%ymm9,%ymm9 4949 vpxor %ymm9,%ymm5,%ymm5 4950 vpsrld $20,%ymm5,%ymm3 4951 vpslld $12,%ymm5,%ymm5 4952 vpxor %ymm3,%ymm5,%ymm5 4953 vpaddd %ymm5,%ymm1,%ymm1 4954 vpxor %ymm1,%ymm13,%ymm13 4955 vpshufb .Lrol8(%rip),%ymm13,%ymm13 4956 vpaddd %ymm13,%ymm9,%ymm9 4957 vpxor %ymm9,%ymm5,%ymm5 4958 vpslld $7,%ymm5,%ymm3 4959 vpsrld $25,%ymm5,%ymm5 4960 vpxor %ymm3,%ymm5,%ymm5 4961 vpalignr $4,%ymm13,%ymm13,%ymm13 4962 vpalignr $8,%ymm9,%ymm9,%ymm9 4963 vpalignr $12,%ymm5,%ymm5,%ymm5 4964 vpaddd %ymm6,%ymm2,%ymm2 4965 vpxor %ymm2,%ymm14,%ymm14 4966 vpshufb .Lrol16(%rip),%ymm14,%ymm14 4967 vpaddd %ymm14,%ymm10,%ymm10 4968 vpxor %ymm10,%ymm6,%ymm6 4969 vpsrld $20,%ymm6,%ymm3 4970 vpslld $12,%ymm6,%ymm6 4971 vpxor %ymm3,%ymm6,%ymm6 4972 vpaddd %ymm6,%ymm2,%ymm2 4973 vpxor %ymm2,%ymm14,%ymm14 4974 vpshufb .Lrol8(%rip),%ymm14,%ymm14 4975 vpaddd %ymm14,%ymm10,%ymm10 4976 vpxor %ymm10,%ymm6,%ymm6 4977 vpslld $7,%ymm6,%ymm3 4978 vpsrld $25,%ymm6,%ymm6 4979 vpxor %ymm3,%ymm6,%ymm6 4980 vpalignr $4,%ymm14,%ymm14,%ymm14 4981 vpalignr $8,%ymm10,%ymm10,%ymm10 4982 vpalignr $12,%ymm6,%ymm6,%ymm6 4983 4984 cmpq %rcx,%r8 4985 jb .Lopen_avx2_tail_256_rounds_and_x1hash 4986 cmpq $10,%r8 4987 jne .Lopen_avx2_tail_256_rounds 4988 movq %rbx,%r8 4989 subq %rsi,%rbx 4990 movq %rbx,%rcx 4991 movq 0+128(%rbp),%rbx 4992.Lopen_avx2_tail_256_hash: 4993 addq $16,%rcx 4994 cmpq %rbx,%rcx 4995 jg .Lopen_avx2_tail_256_done 4996 addq 0+0(%r8),%r10 4997 adcq 8+0(%r8),%r11 4998 adcq $1,%r12 4999 movq 0+0+0(%rbp),%rdx 5000 movq %rdx,%r15 5001 mulxq %r10,%r13,%r14 5002 mulxq %r11,%rax,%rdx 5003 imulq %r12,%r15 5004 addq %rax,%r14 5005 adcq %rdx,%r15 5006 movq 8+0+0(%rbp),%rdx 5007 mulxq %r10,%r10,%rax 5008 addq %r10,%r14 5009 mulxq %r11,%r11,%r9 5010 adcq %r11,%r15 5011 adcq $0,%r9 5012 imulq %r12,%rdx 5013 addq %rax,%r15 5014 adcq %rdx,%r9 5015 movq %r13,%r10 5016 movq %r14,%r11 5017 movq %r15,%r12 5018 andq $3,%r12 5019 movq %r15,%r13 5020 andq $-4,%r13 5021 movq %r9,%r14 5022 shrdq $2,%r9,%r15 5023 shrq $2,%r9 5024 addq %r13,%r15 5025 adcq %r14,%r9 5026 addq %r15,%r10 5027 adcq %r9,%r11 5028 adcq $0,%r12 5029 5030 leaq 16(%r8),%r8 5031 jmp .Lopen_avx2_tail_256_hash 5032.Lopen_avx2_tail_256_done: 5033 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 5034 vpaddd 0+64(%rbp),%ymm5,%ymm5 5035 vpaddd 0+96(%rbp),%ymm9,%ymm9 5036 vpaddd 0+192(%rbp),%ymm13,%ymm13 5037 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 5038 vpaddd 0+64(%rbp),%ymm4,%ymm4 5039 vpaddd 0+96(%rbp),%ymm8,%ymm8 5040 vpaddd 0+160(%rbp),%ymm12,%ymm12 5041 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5042 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5043 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5044 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5045 vpxor 0+0(%rsi),%ymm3,%ymm3 5046 vpxor 32+0(%rsi),%ymm1,%ymm1 5047 vpxor 64+0(%rsi),%ymm5,%ymm5 5048 vpxor 96+0(%rsi),%ymm9,%ymm9 5049 vmovdqu %ymm3,0+0(%rdi) 5050 vmovdqu %ymm1,32+0(%rdi) 5051 vmovdqu %ymm5,64+0(%rdi) 5052 vmovdqu %ymm9,96+0(%rdi) 5053 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5054 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5055 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5056 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5057 vmovdqa %ymm3,%ymm8 5058 5059 leaq 128(%rsi),%rsi 5060 leaq 128(%rdi),%rdi 5061 subq $128,%rbx 5062 jmp .Lopen_avx2_tail_128_xor 5063 5064.Lopen_avx2_tail_384: 5065 vmovdqa .Lchacha20_consts(%rip),%ymm0 5066 vmovdqa 0+64(%rbp),%ymm4 5067 vmovdqa 0+96(%rbp),%ymm8 5068 vmovdqa %ymm0,%ymm1 5069 vmovdqa %ymm4,%ymm5 5070 vmovdqa %ymm8,%ymm9 5071 vmovdqa %ymm0,%ymm2 5072 vmovdqa %ymm4,%ymm6 5073 vmovdqa %ymm8,%ymm10 5074 vmovdqa .Lavx2_inc(%rip),%ymm12 5075 vpaddd 0+160(%rbp),%ymm12,%ymm14 5076 vpaddd %ymm14,%ymm12,%ymm13 5077 vpaddd %ymm13,%ymm12,%ymm12 5078 vmovdqa %ymm12,0+160(%rbp) 5079 vmovdqa %ymm13,0+192(%rbp) 5080 vmovdqa %ymm14,0+224(%rbp) 5081 5082 movq %rbx,0+128(%rbp) 5083 movq %rbx,%rcx 5084 subq $256,%rcx 5085 shrq $4,%rcx 5086 addq $6,%rcx 5087 movq $10,%r8 5088 cmpq $10,%rcx 5089 cmovgq %r8,%rcx 5090 movq %rsi,%rbx 5091 xorq %r8,%r8 5092.Lopen_avx2_tail_384_rounds_and_x2hash: 5093 addq 0+0(%rbx),%r10 5094 adcq 8+0(%rbx),%r11 5095 adcq $1,%r12 5096 movq 0+0+0(%rbp),%rdx 5097 movq %rdx,%r15 5098 mulxq %r10,%r13,%r14 5099 mulxq %r11,%rax,%rdx 5100 imulq %r12,%r15 5101 addq %rax,%r14 5102 adcq %rdx,%r15 5103 movq 8+0+0(%rbp),%rdx 5104 mulxq %r10,%r10,%rax 5105 addq %r10,%r14 5106 mulxq %r11,%r11,%r9 5107 adcq %r11,%r15 5108 adcq $0,%r9 5109 imulq %r12,%rdx 5110 addq %rax,%r15 5111 adcq %rdx,%r9 5112 movq %r13,%r10 5113 movq %r14,%r11 5114 movq %r15,%r12 5115 andq $3,%r12 5116 movq %r15,%r13 5117 andq $-4,%r13 5118 movq %r9,%r14 5119 shrdq $2,%r9,%r15 5120 shrq $2,%r9 5121 addq %r13,%r15 5122 adcq %r14,%r9 5123 addq %r15,%r10 5124 adcq %r9,%r11 5125 adcq $0,%r12 5126 5127 leaq 16(%rbx),%rbx 5128.Lopen_avx2_tail_384_rounds_and_x1hash: 5129 vpaddd %ymm6,%ymm2,%ymm2 5130 vpxor %ymm2,%ymm14,%ymm14 5131 vpshufb .Lrol16(%rip),%ymm14,%ymm14 5132 vpaddd %ymm14,%ymm10,%ymm10 5133 vpxor %ymm10,%ymm6,%ymm6 5134 vpsrld $20,%ymm6,%ymm3 5135 vpslld $12,%ymm6,%ymm6 5136 vpxor %ymm3,%ymm6,%ymm6 5137 vpaddd %ymm6,%ymm2,%ymm2 5138 vpxor %ymm2,%ymm14,%ymm14 5139 vpshufb .Lrol8(%rip),%ymm14,%ymm14 5140 vpaddd %ymm14,%ymm10,%ymm10 5141 vpxor %ymm10,%ymm6,%ymm6 5142 vpslld $7,%ymm6,%ymm3 5143 vpsrld $25,%ymm6,%ymm6 5144 vpxor %ymm3,%ymm6,%ymm6 5145 vpalignr $12,%ymm14,%ymm14,%ymm14 5146 vpalignr $8,%ymm10,%ymm10,%ymm10 5147 vpalignr $4,%ymm6,%ymm6,%ymm6 5148 vpaddd %ymm5,%ymm1,%ymm1 5149 vpxor %ymm1,%ymm13,%ymm13 5150 vpshufb .Lrol16(%rip),%ymm13,%ymm13 5151 vpaddd %ymm13,%ymm9,%ymm9 5152 vpxor %ymm9,%ymm5,%ymm5 5153 vpsrld $20,%ymm5,%ymm3 5154 vpslld $12,%ymm5,%ymm5 5155 vpxor %ymm3,%ymm5,%ymm5 5156 vpaddd %ymm5,%ymm1,%ymm1 5157 vpxor %ymm1,%ymm13,%ymm13 5158 vpshufb .Lrol8(%rip),%ymm13,%ymm13 5159 vpaddd %ymm13,%ymm9,%ymm9 5160 vpxor %ymm9,%ymm5,%ymm5 5161 vpslld $7,%ymm5,%ymm3 5162 vpsrld $25,%ymm5,%ymm5 5163 vpxor %ymm3,%ymm5,%ymm5 5164 vpalignr $12,%ymm13,%ymm13,%ymm13 5165 vpalignr $8,%ymm9,%ymm9,%ymm9 5166 vpalignr $4,%ymm5,%ymm5,%ymm5 5167 vpaddd %ymm4,%ymm0,%ymm0 5168 vpxor %ymm0,%ymm12,%ymm12 5169 vpshufb .Lrol16(%rip),%ymm12,%ymm12 5170 vpaddd %ymm12,%ymm8,%ymm8 5171 vpxor %ymm8,%ymm4,%ymm4 5172 vpsrld $20,%ymm4,%ymm3 5173 vpslld $12,%ymm4,%ymm4 5174 vpxor %ymm3,%ymm4,%ymm4 5175 vpaddd %ymm4,%ymm0,%ymm0 5176 vpxor %ymm0,%ymm12,%ymm12 5177 vpshufb .Lrol8(%rip),%ymm12,%ymm12 5178 vpaddd %ymm12,%ymm8,%ymm8 5179 vpxor %ymm8,%ymm4,%ymm4 5180 vpslld $7,%ymm4,%ymm3 5181 vpsrld $25,%ymm4,%ymm4 5182 vpxor %ymm3,%ymm4,%ymm4 5183 vpalignr $12,%ymm12,%ymm12,%ymm12 5184 vpalignr $8,%ymm8,%ymm8,%ymm8 5185 vpalignr $4,%ymm4,%ymm4,%ymm4 5186 addq 0+0(%rbx),%r10 5187 adcq 8+0(%rbx),%r11 5188 adcq $1,%r12 5189 movq 0+0+0(%rbp),%rax 5190 movq %rax,%r15 5191 mulq %r10 5192 movq %rax,%r13 5193 movq %rdx,%r14 5194 movq 0+0+0(%rbp),%rax 5195 mulq %r11 5196 imulq %r12,%r15 5197 addq %rax,%r14 5198 adcq %rdx,%r15 5199 movq 8+0+0(%rbp),%rax 5200 movq %rax,%r9 5201 mulq %r10 5202 addq %rax,%r14 5203 adcq $0,%rdx 5204 movq %rdx,%r10 5205 movq 8+0+0(%rbp),%rax 5206 mulq %r11 5207 addq %rax,%r15 5208 adcq $0,%rdx 5209 imulq %r12,%r9 5210 addq %r10,%r15 5211 adcq %rdx,%r9 5212 movq %r13,%r10 5213 movq %r14,%r11 5214 movq %r15,%r12 5215 andq $3,%r12 5216 movq %r15,%r13 5217 andq $-4,%r13 5218 movq %r9,%r14 5219 shrdq $2,%r9,%r15 5220 shrq $2,%r9 5221 addq %r13,%r15 5222 adcq %r14,%r9 5223 addq %r15,%r10 5224 adcq %r9,%r11 5225 adcq $0,%r12 5226 5227 leaq 16(%rbx),%rbx 5228 incq %r8 5229 vpaddd %ymm6,%ymm2,%ymm2 5230 vpxor %ymm2,%ymm14,%ymm14 5231 vpshufb .Lrol16(%rip),%ymm14,%ymm14 5232 vpaddd %ymm14,%ymm10,%ymm10 5233 vpxor %ymm10,%ymm6,%ymm6 5234 vpsrld $20,%ymm6,%ymm3 5235 vpslld $12,%ymm6,%ymm6 5236 vpxor %ymm3,%ymm6,%ymm6 5237 vpaddd %ymm6,%ymm2,%ymm2 5238 vpxor %ymm2,%ymm14,%ymm14 5239 vpshufb .Lrol8(%rip),%ymm14,%ymm14 5240 vpaddd %ymm14,%ymm10,%ymm10 5241 vpxor %ymm10,%ymm6,%ymm6 5242 vpslld $7,%ymm6,%ymm3 5243 vpsrld $25,%ymm6,%ymm6 5244 vpxor %ymm3,%ymm6,%ymm6 5245 vpalignr $4,%ymm14,%ymm14,%ymm14 5246 vpalignr $8,%ymm10,%ymm10,%ymm10 5247 vpalignr $12,%ymm6,%ymm6,%ymm6 5248 vpaddd %ymm5,%ymm1,%ymm1 5249 vpxor %ymm1,%ymm13,%ymm13 5250 vpshufb .Lrol16(%rip),%ymm13,%ymm13 5251 vpaddd %ymm13,%ymm9,%ymm9 5252 vpxor %ymm9,%ymm5,%ymm5 5253 vpsrld $20,%ymm5,%ymm3 5254 vpslld $12,%ymm5,%ymm5 5255 vpxor %ymm3,%ymm5,%ymm5 5256 vpaddd %ymm5,%ymm1,%ymm1 5257 vpxor %ymm1,%ymm13,%ymm13 5258 vpshufb .Lrol8(%rip),%ymm13,%ymm13 5259 vpaddd %ymm13,%ymm9,%ymm9 5260 vpxor %ymm9,%ymm5,%ymm5 5261 vpslld $7,%ymm5,%ymm3 5262 vpsrld $25,%ymm5,%ymm5 5263 vpxor %ymm3,%ymm5,%ymm5 5264 vpalignr $4,%ymm13,%ymm13,%ymm13 5265 vpalignr $8,%ymm9,%ymm9,%ymm9 5266 vpalignr $12,%ymm5,%ymm5,%ymm5 5267 vpaddd %ymm4,%ymm0,%ymm0 5268 vpxor %ymm0,%ymm12,%ymm12 5269 vpshufb .Lrol16(%rip),%ymm12,%ymm12 5270 vpaddd %ymm12,%ymm8,%ymm8 5271 vpxor %ymm8,%ymm4,%ymm4 5272 vpsrld $20,%ymm4,%ymm3 5273 vpslld $12,%ymm4,%ymm4 5274 vpxor %ymm3,%ymm4,%ymm4 5275 vpaddd %ymm4,%ymm0,%ymm0 5276 vpxor %ymm0,%ymm12,%ymm12 5277 vpshufb .Lrol8(%rip),%ymm12,%ymm12 5278 vpaddd %ymm12,%ymm8,%ymm8 5279 vpxor %ymm8,%ymm4,%ymm4 5280 vpslld $7,%ymm4,%ymm3 5281 vpsrld $25,%ymm4,%ymm4 5282 vpxor %ymm3,%ymm4,%ymm4 5283 vpalignr $4,%ymm12,%ymm12,%ymm12 5284 vpalignr $8,%ymm8,%ymm8,%ymm8 5285 vpalignr $12,%ymm4,%ymm4,%ymm4 5286 5287 cmpq %rcx,%r8 5288 jb .Lopen_avx2_tail_384_rounds_and_x2hash 5289 cmpq $10,%r8 5290 jne .Lopen_avx2_tail_384_rounds_and_x1hash 5291 movq %rbx,%r8 5292 subq %rsi,%rbx 5293 movq %rbx,%rcx 5294 movq 0+128(%rbp),%rbx 5295.Lopen_avx2_384_tail_hash: 5296 addq $16,%rcx 5297 cmpq %rbx,%rcx 5298 jg .Lopen_avx2_384_tail_done 5299 addq 0+0(%r8),%r10 5300 adcq 8+0(%r8),%r11 5301 adcq $1,%r12 5302 movq 0+0+0(%rbp),%rdx 5303 movq %rdx,%r15 5304 mulxq %r10,%r13,%r14 5305 mulxq %r11,%rax,%rdx 5306 imulq %r12,%r15 5307 addq %rax,%r14 5308 adcq %rdx,%r15 5309 movq 8+0+0(%rbp),%rdx 5310 mulxq %r10,%r10,%rax 5311 addq %r10,%r14 5312 mulxq %r11,%r11,%r9 5313 adcq %r11,%r15 5314 adcq $0,%r9 5315 imulq %r12,%rdx 5316 addq %rax,%r15 5317 adcq %rdx,%r9 5318 movq %r13,%r10 5319 movq %r14,%r11 5320 movq %r15,%r12 5321 andq $3,%r12 5322 movq %r15,%r13 5323 andq $-4,%r13 5324 movq %r9,%r14 5325 shrdq $2,%r9,%r15 5326 shrq $2,%r9 5327 addq %r13,%r15 5328 adcq %r14,%r9 5329 addq %r15,%r10 5330 adcq %r9,%r11 5331 adcq $0,%r12 5332 5333 leaq 16(%r8),%r8 5334 jmp .Lopen_avx2_384_tail_hash 5335.Lopen_avx2_384_tail_done: 5336 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 5337 vpaddd 0+64(%rbp),%ymm6,%ymm6 5338 vpaddd 0+96(%rbp),%ymm10,%ymm10 5339 vpaddd 0+224(%rbp),%ymm14,%ymm14 5340 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 5341 vpaddd 0+64(%rbp),%ymm5,%ymm5 5342 vpaddd 0+96(%rbp),%ymm9,%ymm9 5343 vpaddd 0+192(%rbp),%ymm13,%ymm13 5344 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 5345 vpaddd 0+64(%rbp),%ymm4,%ymm4 5346 vpaddd 0+96(%rbp),%ymm8,%ymm8 5347 vpaddd 0+160(%rbp),%ymm12,%ymm12 5348 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5349 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5350 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5351 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5352 vpxor 0+0(%rsi),%ymm3,%ymm3 5353 vpxor 32+0(%rsi),%ymm2,%ymm2 5354 vpxor 64+0(%rsi),%ymm6,%ymm6 5355 vpxor 96+0(%rsi),%ymm10,%ymm10 5356 vmovdqu %ymm3,0+0(%rdi) 5357 vmovdqu %ymm2,32+0(%rdi) 5358 vmovdqu %ymm6,64+0(%rdi) 5359 vmovdqu %ymm10,96+0(%rdi) 5360 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5361 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5362 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5363 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5364 vpxor 0+128(%rsi),%ymm3,%ymm3 5365 vpxor 32+128(%rsi),%ymm1,%ymm1 5366 vpxor 64+128(%rsi),%ymm5,%ymm5 5367 vpxor 96+128(%rsi),%ymm9,%ymm9 5368 vmovdqu %ymm3,0+128(%rdi) 5369 vmovdqu %ymm1,32+128(%rdi) 5370 vmovdqu %ymm5,64+128(%rdi) 5371 vmovdqu %ymm9,96+128(%rdi) 5372 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5373 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5374 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5375 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5376 vmovdqa %ymm3,%ymm8 5377 5378 leaq 256(%rsi),%rsi 5379 leaq 256(%rdi),%rdi 5380 subq $256,%rbx 5381 jmp .Lopen_avx2_tail_128_xor 5382 5383.Lopen_avx2_tail_512: 5384 vmovdqa .Lchacha20_consts(%rip),%ymm0 5385 vmovdqa 0+64(%rbp),%ymm4 5386 vmovdqa 0+96(%rbp),%ymm8 5387 vmovdqa %ymm0,%ymm1 5388 vmovdqa %ymm4,%ymm5 5389 vmovdqa %ymm8,%ymm9 5390 vmovdqa %ymm0,%ymm2 5391 vmovdqa %ymm4,%ymm6 5392 vmovdqa %ymm8,%ymm10 5393 vmovdqa %ymm0,%ymm3 5394 vmovdqa %ymm4,%ymm7 5395 vmovdqa %ymm8,%ymm11 5396 vmovdqa .Lavx2_inc(%rip),%ymm12 5397 vpaddd 0+160(%rbp),%ymm12,%ymm15 5398 vpaddd %ymm15,%ymm12,%ymm14 5399 vpaddd %ymm14,%ymm12,%ymm13 5400 vpaddd %ymm13,%ymm12,%ymm12 5401 vmovdqa %ymm15,0+256(%rbp) 5402 vmovdqa %ymm14,0+224(%rbp) 5403 vmovdqa %ymm13,0+192(%rbp) 5404 vmovdqa %ymm12,0+160(%rbp) 5405 5406 xorq %rcx,%rcx 5407 movq %rsi,%r8 5408.Lopen_avx2_tail_512_rounds_and_x2hash: 5409 addq 0+0(%r8),%r10 5410 adcq 8+0(%r8),%r11 5411 adcq $1,%r12 5412 movq 0+0+0(%rbp),%rax 5413 movq %rax,%r15 5414 mulq %r10 5415 movq %rax,%r13 5416 movq %rdx,%r14 5417 movq 0+0+0(%rbp),%rax 5418 mulq %r11 5419 imulq %r12,%r15 5420 addq %rax,%r14 5421 adcq %rdx,%r15 5422 movq 8+0+0(%rbp),%rax 5423 movq %rax,%r9 5424 mulq %r10 5425 addq %rax,%r14 5426 adcq $0,%rdx 5427 movq %rdx,%r10 5428 movq 8+0+0(%rbp),%rax 5429 mulq %r11 5430 addq %rax,%r15 5431 adcq $0,%rdx 5432 imulq %r12,%r9 5433 addq %r10,%r15 5434 adcq %rdx,%r9 5435 movq %r13,%r10 5436 movq %r14,%r11 5437 movq %r15,%r12 5438 andq $3,%r12 5439 movq %r15,%r13 5440 andq $-4,%r13 5441 movq %r9,%r14 5442 shrdq $2,%r9,%r15 5443 shrq $2,%r9 5444 addq %r13,%r15 5445 adcq %r14,%r9 5446 addq %r15,%r10 5447 adcq %r9,%r11 5448 adcq $0,%r12 5449 5450 leaq 16(%r8),%r8 5451.Lopen_avx2_tail_512_rounds_and_x1hash: 5452 vmovdqa %ymm8,0+128(%rbp) 5453 vmovdqa .Lrol16(%rip),%ymm8 5454 vpaddd %ymm7,%ymm3,%ymm3 5455 vpaddd %ymm6,%ymm2,%ymm2 5456 vpaddd %ymm5,%ymm1,%ymm1 5457 vpaddd %ymm4,%ymm0,%ymm0 5458 vpxor %ymm3,%ymm15,%ymm15 5459 vpxor %ymm2,%ymm14,%ymm14 5460 vpxor %ymm1,%ymm13,%ymm13 5461 vpxor %ymm0,%ymm12,%ymm12 5462 vpshufb %ymm8,%ymm15,%ymm15 5463 vpshufb %ymm8,%ymm14,%ymm14 5464 vpshufb %ymm8,%ymm13,%ymm13 5465 vpshufb %ymm8,%ymm12,%ymm12 5466 vpaddd %ymm15,%ymm11,%ymm11 5467 vpaddd %ymm14,%ymm10,%ymm10 5468 vpaddd %ymm13,%ymm9,%ymm9 5469 vpaddd 0+128(%rbp),%ymm12,%ymm8 5470 vpxor %ymm11,%ymm7,%ymm7 5471 vpxor %ymm10,%ymm6,%ymm6 5472 vpxor %ymm9,%ymm5,%ymm5 5473 vpxor %ymm8,%ymm4,%ymm4 5474 vmovdqa %ymm8,0+128(%rbp) 5475 vpsrld $20,%ymm7,%ymm8 5476 vpslld $32-20,%ymm7,%ymm7 5477 vpxor %ymm8,%ymm7,%ymm7 5478 vpsrld $20,%ymm6,%ymm8 5479 vpslld $32-20,%ymm6,%ymm6 5480 vpxor %ymm8,%ymm6,%ymm6 5481 vpsrld $20,%ymm5,%ymm8 5482 vpslld $32-20,%ymm5,%ymm5 5483 vpxor %ymm8,%ymm5,%ymm5 5484 vpsrld $20,%ymm4,%ymm8 5485 vpslld $32-20,%ymm4,%ymm4 5486 vpxor %ymm8,%ymm4,%ymm4 5487 vmovdqa .Lrol8(%rip),%ymm8 5488 vpaddd %ymm7,%ymm3,%ymm3 5489 addq 0+0(%r8),%r10 5490 adcq 8+0(%r8),%r11 5491 adcq $1,%r12 5492 movq 0+0+0(%rbp),%rdx 5493 movq %rdx,%r15 5494 mulxq %r10,%r13,%r14 5495 mulxq %r11,%rax,%rdx 5496 imulq %r12,%r15 5497 addq %rax,%r14 5498 adcq %rdx,%r15 5499 movq 8+0+0(%rbp),%rdx 5500 mulxq %r10,%r10,%rax 5501 addq %r10,%r14 5502 mulxq %r11,%r11,%r9 5503 adcq %r11,%r15 5504 adcq $0,%r9 5505 imulq %r12,%rdx 5506 addq %rax,%r15 5507 adcq %rdx,%r9 5508 movq %r13,%r10 5509 movq %r14,%r11 5510 movq %r15,%r12 5511 andq $3,%r12 5512 movq %r15,%r13 5513 andq $-4,%r13 5514 movq %r9,%r14 5515 shrdq $2,%r9,%r15 5516 shrq $2,%r9 5517 addq %r13,%r15 5518 adcq %r14,%r9 5519 addq %r15,%r10 5520 adcq %r9,%r11 5521 adcq $0,%r12 5522 vpaddd %ymm6,%ymm2,%ymm2 5523 vpaddd %ymm5,%ymm1,%ymm1 5524 vpaddd %ymm4,%ymm0,%ymm0 5525 vpxor %ymm3,%ymm15,%ymm15 5526 vpxor %ymm2,%ymm14,%ymm14 5527 vpxor %ymm1,%ymm13,%ymm13 5528 vpxor %ymm0,%ymm12,%ymm12 5529 vpshufb %ymm8,%ymm15,%ymm15 5530 vpshufb %ymm8,%ymm14,%ymm14 5531 vpshufb %ymm8,%ymm13,%ymm13 5532 vpshufb %ymm8,%ymm12,%ymm12 5533 vpaddd %ymm15,%ymm11,%ymm11 5534 vpaddd %ymm14,%ymm10,%ymm10 5535 vpaddd %ymm13,%ymm9,%ymm9 5536 vpaddd 0+128(%rbp),%ymm12,%ymm8 5537 vpxor %ymm11,%ymm7,%ymm7 5538 vpxor %ymm10,%ymm6,%ymm6 5539 vpxor %ymm9,%ymm5,%ymm5 5540 vpxor %ymm8,%ymm4,%ymm4 5541 vmovdqa %ymm8,0+128(%rbp) 5542 vpsrld $25,%ymm7,%ymm8 5543 vpslld $32-25,%ymm7,%ymm7 5544 vpxor %ymm8,%ymm7,%ymm7 5545 vpsrld $25,%ymm6,%ymm8 5546 vpslld $32-25,%ymm6,%ymm6 5547 vpxor %ymm8,%ymm6,%ymm6 5548 vpsrld $25,%ymm5,%ymm8 5549 vpslld $32-25,%ymm5,%ymm5 5550 vpxor %ymm8,%ymm5,%ymm5 5551 vpsrld $25,%ymm4,%ymm8 5552 vpslld $32-25,%ymm4,%ymm4 5553 vpxor %ymm8,%ymm4,%ymm4 5554 vmovdqa 0+128(%rbp),%ymm8 5555 vpalignr $4,%ymm7,%ymm7,%ymm7 5556 vpalignr $8,%ymm11,%ymm11,%ymm11 5557 vpalignr $12,%ymm15,%ymm15,%ymm15 5558 vpalignr $4,%ymm6,%ymm6,%ymm6 5559 vpalignr $8,%ymm10,%ymm10,%ymm10 5560 vpalignr $12,%ymm14,%ymm14,%ymm14 5561 vpalignr $4,%ymm5,%ymm5,%ymm5 5562 vpalignr $8,%ymm9,%ymm9,%ymm9 5563 vpalignr $12,%ymm13,%ymm13,%ymm13 5564 vpalignr $4,%ymm4,%ymm4,%ymm4 5565 vpalignr $8,%ymm8,%ymm8,%ymm8 5566 vpalignr $12,%ymm12,%ymm12,%ymm12 5567 vmovdqa %ymm8,0+128(%rbp) 5568 vmovdqa .Lrol16(%rip),%ymm8 5569 vpaddd %ymm7,%ymm3,%ymm3 5570 addq 0+16(%r8),%r10 5571 adcq 8+16(%r8),%r11 5572 adcq $1,%r12 5573 movq 0+0+0(%rbp),%rdx 5574 movq %rdx,%r15 5575 mulxq %r10,%r13,%r14 5576 mulxq %r11,%rax,%rdx 5577 imulq %r12,%r15 5578 addq %rax,%r14 5579 adcq %rdx,%r15 5580 movq 8+0+0(%rbp),%rdx 5581 mulxq %r10,%r10,%rax 5582 addq %r10,%r14 5583 mulxq %r11,%r11,%r9 5584 adcq %r11,%r15 5585 adcq $0,%r9 5586 imulq %r12,%rdx 5587 addq %rax,%r15 5588 adcq %rdx,%r9 5589 movq %r13,%r10 5590 movq %r14,%r11 5591 movq %r15,%r12 5592 andq $3,%r12 5593 movq %r15,%r13 5594 andq $-4,%r13 5595 movq %r9,%r14 5596 shrdq $2,%r9,%r15 5597 shrq $2,%r9 5598 addq %r13,%r15 5599 adcq %r14,%r9 5600 addq %r15,%r10 5601 adcq %r9,%r11 5602 adcq $0,%r12 5603 5604 leaq 32(%r8),%r8 5605 vpaddd %ymm6,%ymm2,%ymm2 5606 vpaddd %ymm5,%ymm1,%ymm1 5607 vpaddd %ymm4,%ymm0,%ymm0 5608 vpxor %ymm3,%ymm15,%ymm15 5609 vpxor %ymm2,%ymm14,%ymm14 5610 vpxor %ymm1,%ymm13,%ymm13 5611 vpxor %ymm0,%ymm12,%ymm12 5612 vpshufb %ymm8,%ymm15,%ymm15 5613 vpshufb %ymm8,%ymm14,%ymm14 5614 vpshufb %ymm8,%ymm13,%ymm13 5615 vpshufb %ymm8,%ymm12,%ymm12 5616 vpaddd %ymm15,%ymm11,%ymm11 5617 vpaddd %ymm14,%ymm10,%ymm10 5618 vpaddd %ymm13,%ymm9,%ymm9 5619 vpaddd 0+128(%rbp),%ymm12,%ymm8 5620 vpxor %ymm11,%ymm7,%ymm7 5621 vpxor %ymm10,%ymm6,%ymm6 5622 vpxor %ymm9,%ymm5,%ymm5 5623 vpxor %ymm8,%ymm4,%ymm4 5624 vmovdqa %ymm8,0+128(%rbp) 5625 vpsrld $20,%ymm7,%ymm8 5626 vpslld $32-20,%ymm7,%ymm7 5627 vpxor %ymm8,%ymm7,%ymm7 5628 vpsrld $20,%ymm6,%ymm8 5629 vpslld $32-20,%ymm6,%ymm6 5630 vpxor %ymm8,%ymm6,%ymm6 5631 vpsrld $20,%ymm5,%ymm8 5632 vpslld $32-20,%ymm5,%ymm5 5633 vpxor %ymm8,%ymm5,%ymm5 5634 vpsrld $20,%ymm4,%ymm8 5635 vpslld $32-20,%ymm4,%ymm4 5636 vpxor %ymm8,%ymm4,%ymm4 5637 vmovdqa .Lrol8(%rip),%ymm8 5638 vpaddd %ymm7,%ymm3,%ymm3 5639 vpaddd %ymm6,%ymm2,%ymm2 5640 vpaddd %ymm5,%ymm1,%ymm1 5641 vpaddd %ymm4,%ymm0,%ymm0 5642 vpxor %ymm3,%ymm15,%ymm15 5643 vpxor %ymm2,%ymm14,%ymm14 5644 vpxor %ymm1,%ymm13,%ymm13 5645 vpxor %ymm0,%ymm12,%ymm12 5646 vpshufb %ymm8,%ymm15,%ymm15 5647 vpshufb %ymm8,%ymm14,%ymm14 5648 vpshufb %ymm8,%ymm13,%ymm13 5649 vpshufb %ymm8,%ymm12,%ymm12 5650 vpaddd %ymm15,%ymm11,%ymm11 5651 vpaddd %ymm14,%ymm10,%ymm10 5652 vpaddd %ymm13,%ymm9,%ymm9 5653 vpaddd 0+128(%rbp),%ymm12,%ymm8 5654 vpxor %ymm11,%ymm7,%ymm7 5655 vpxor %ymm10,%ymm6,%ymm6 5656 vpxor %ymm9,%ymm5,%ymm5 5657 vpxor %ymm8,%ymm4,%ymm4 5658 vmovdqa %ymm8,0+128(%rbp) 5659 vpsrld $25,%ymm7,%ymm8 5660 vpslld $32-25,%ymm7,%ymm7 5661 vpxor %ymm8,%ymm7,%ymm7 5662 vpsrld $25,%ymm6,%ymm8 5663 vpslld $32-25,%ymm6,%ymm6 5664 vpxor %ymm8,%ymm6,%ymm6 5665 vpsrld $25,%ymm5,%ymm8 5666 vpslld $32-25,%ymm5,%ymm5 5667 vpxor %ymm8,%ymm5,%ymm5 5668 vpsrld $25,%ymm4,%ymm8 5669 vpslld $32-25,%ymm4,%ymm4 5670 vpxor %ymm8,%ymm4,%ymm4 5671 vmovdqa 0+128(%rbp),%ymm8 5672 vpalignr $12,%ymm7,%ymm7,%ymm7 5673 vpalignr $8,%ymm11,%ymm11,%ymm11 5674 vpalignr $4,%ymm15,%ymm15,%ymm15 5675 vpalignr $12,%ymm6,%ymm6,%ymm6 5676 vpalignr $8,%ymm10,%ymm10,%ymm10 5677 vpalignr $4,%ymm14,%ymm14,%ymm14 5678 vpalignr $12,%ymm5,%ymm5,%ymm5 5679 vpalignr $8,%ymm9,%ymm9,%ymm9 5680 vpalignr $4,%ymm13,%ymm13,%ymm13 5681 vpalignr $12,%ymm4,%ymm4,%ymm4 5682 vpalignr $8,%ymm8,%ymm8,%ymm8 5683 vpalignr $4,%ymm12,%ymm12,%ymm12 5684 5685 incq %rcx 5686 cmpq $4,%rcx 5687 jl .Lopen_avx2_tail_512_rounds_and_x2hash 5688 cmpq $10,%rcx 5689 jne .Lopen_avx2_tail_512_rounds_and_x1hash 5690 movq %rbx,%rcx 5691 subq $384,%rcx 5692 andq $-16,%rcx 5693.Lopen_avx2_tail_512_hash: 5694 testq %rcx,%rcx 5695 je .Lopen_avx2_tail_512_done 5696 addq 0+0(%r8),%r10 5697 adcq 8+0(%r8),%r11 5698 adcq $1,%r12 5699 movq 0+0+0(%rbp),%rdx 5700 movq %rdx,%r15 5701 mulxq %r10,%r13,%r14 5702 mulxq %r11,%rax,%rdx 5703 imulq %r12,%r15 5704 addq %rax,%r14 5705 adcq %rdx,%r15 5706 movq 8+0+0(%rbp),%rdx 5707 mulxq %r10,%r10,%rax 5708 addq %r10,%r14 5709 mulxq %r11,%r11,%r9 5710 adcq %r11,%r15 5711 adcq $0,%r9 5712 imulq %r12,%rdx 5713 addq %rax,%r15 5714 adcq %rdx,%r9 5715 movq %r13,%r10 5716 movq %r14,%r11 5717 movq %r15,%r12 5718 andq $3,%r12 5719 movq %r15,%r13 5720 andq $-4,%r13 5721 movq %r9,%r14 5722 shrdq $2,%r9,%r15 5723 shrq $2,%r9 5724 addq %r13,%r15 5725 adcq %r14,%r9 5726 addq %r15,%r10 5727 adcq %r9,%r11 5728 adcq $0,%r12 5729 5730 leaq 16(%r8),%r8 5731 subq $16,%rcx 5732 jmp .Lopen_avx2_tail_512_hash 5733.Lopen_avx2_tail_512_done: 5734 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 5735 vpaddd 0+64(%rbp),%ymm7,%ymm7 5736 vpaddd 0+96(%rbp),%ymm11,%ymm11 5737 vpaddd 0+256(%rbp),%ymm15,%ymm15 5738 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 5739 vpaddd 0+64(%rbp),%ymm6,%ymm6 5740 vpaddd 0+96(%rbp),%ymm10,%ymm10 5741 vpaddd 0+224(%rbp),%ymm14,%ymm14 5742 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 5743 vpaddd 0+64(%rbp),%ymm5,%ymm5 5744 vpaddd 0+96(%rbp),%ymm9,%ymm9 5745 vpaddd 0+192(%rbp),%ymm13,%ymm13 5746 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 5747 vpaddd 0+64(%rbp),%ymm4,%ymm4 5748 vpaddd 0+96(%rbp),%ymm8,%ymm8 5749 vpaddd 0+160(%rbp),%ymm12,%ymm12 5750 5751 vmovdqa %ymm0,0+128(%rbp) 5752 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 5753 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 5754 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 5755 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 5756 vpxor 0+0(%rsi),%ymm0,%ymm0 5757 vpxor 32+0(%rsi),%ymm3,%ymm3 5758 vpxor 64+0(%rsi),%ymm7,%ymm7 5759 vpxor 96+0(%rsi),%ymm11,%ymm11 5760 vmovdqu %ymm0,0+0(%rdi) 5761 vmovdqu %ymm3,32+0(%rdi) 5762 vmovdqu %ymm7,64+0(%rdi) 5763 vmovdqu %ymm11,96+0(%rdi) 5764 5765 vmovdqa 0+128(%rbp),%ymm0 5766 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5767 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5768 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5769 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5770 vpxor 0+128(%rsi),%ymm3,%ymm3 5771 vpxor 32+128(%rsi),%ymm2,%ymm2 5772 vpxor 64+128(%rsi),%ymm6,%ymm6 5773 vpxor 96+128(%rsi),%ymm10,%ymm10 5774 vmovdqu %ymm3,0+128(%rdi) 5775 vmovdqu %ymm2,32+128(%rdi) 5776 vmovdqu %ymm6,64+128(%rdi) 5777 vmovdqu %ymm10,96+128(%rdi) 5778 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5779 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5780 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5781 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5782 vpxor 0+256(%rsi),%ymm3,%ymm3 5783 vpxor 32+256(%rsi),%ymm1,%ymm1 5784 vpxor 64+256(%rsi),%ymm5,%ymm5 5785 vpxor 96+256(%rsi),%ymm9,%ymm9 5786 vmovdqu %ymm3,0+256(%rdi) 5787 vmovdqu %ymm1,32+256(%rdi) 5788 vmovdqu %ymm5,64+256(%rdi) 5789 vmovdqu %ymm9,96+256(%rdi) 5790 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5791 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5792 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5793 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5794 vmovdqa %ymm3,%ymm8 5795 5796 leaq 384(%rsi),%rsi 5797 leaq 384(%rdi),%rdi 5798 subq $384,%rbx 5799.Lopen_avx2_tail_128_xor: 5800 cmpq $32,%rbx 5801 jb .Lopen_avx2_tail_32_xor 5802 subq $32,%rbx 5803 vpxor (%rsi),%ymm0,%ymm0 5804 vmovdqu %ymm0,(%rdi) 5805 leaq 32(%rsi),%rsi 5806 leaq 32(%rdi),%rdi 5807 vmovdqa %ymm4,%ymm0 5808 vmovdqa %ymm8,%ymm4 5809 vmovdqa %ymm12,%ymm8 5810 jmp .Lopen_avx2_tail_128_xor 5811.Lopen_avx2_tail_32_xor: 5812 cmpq $16,%rbx 5813 vmovdqa %xmm0,%xmm1 5814 jb .Lopen_avx2_exit 5815 subq $16,%rbx 5816 5817 vpxor (%rsi),%xmm0,%xmm1 5818 vmovdqu %xmm1,(%rdi) 5819 leaq 16(%rsi),%rsi 5820 leaq 16(%rdi),%rdi 5821 vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 5822 vmovdqa %xmm0,%xmm1 5823.Lopen_avx2_exit: 5824 vzeroupper 5825 jmp .Lopen_sse_tail_16 5826 5827.Lopen_avx2_192: 5828 vmovdqa %ymm0,%ymm1 5829 vmovdqa %ymm0,%ymm2 5830 vmovdqa %ymm4,%ymm5 5831 vmovdqa %ymm4,%ymm6 5832 vmovdqa %ymm8,%ymm9 5833 vmovdqa %ymm8,%ymm10 5834 vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 5835 vmovdqa %ymm12,%ymm11 5836 vmovdqa %ymm13,%ymm15 5837 movq $10,%r10 5838.Lopen_avx2_192_rounds: 5839 vpaddd %ymm4,%ymm0,%ymm0 5840 vpxor %ymm0,%ymm12,%ymm12 5841 vpshufb .Lrol16(%rip),%ymm12,%ymm12 5842 vpaddd %ymm12,%ymm8,%ymm8 5843 vpxor %ymm8,%ymm4,%ymm4 5844 vpsrld $20,%ymm4,%ymm3 5845 vpslld $12,%ymm4,%ymm4 5846 vpxor %ymm3,%ymm4,%ymm4 5847 vpaddd %ymm4,%ymm0,%ymm0 5848 vpxor %ymm0,%ymm12,%ymm12 5849 vpshufb .Lrol8(%rip),%ymm12,%ymm12 5850 vpaddd %ymm12,%ymm8,%ymm8 5851 vpxor %ymm8,%ymm4,%ymm4 5852 vpslld $7,%ymm4,%ymm3 5853 vpsrld $25,%ymm4,%ymm4 5854 vpxor %ymm3,%ymm4,%ymm4 5855 vpalignr $12,%ymm12,%ymm12,%ymm12 5856 vpalignr $8,%ymm8,%ymm8,%ymm8 5857 vpalignr $4,%ymm4,%ymm4,%ymm4 5858 vpaddd %ymm5,%ymm1,%ymm1 5859 vpxor %ymm1,%ymm13,%ymm13 5860 vpshufb .Lrol16(%rip),%ymm13,%ymm13 5861 vpaddd %ymm13,%ymm9,%ymm9 5862 vpxor %ymm9,%ymm5,%ymm5 5863 vpsrld $20,%ymm5,%ymm3 5864 vpslld $12,%ymm5,%ymm5 5865 vpxor %ymm3,%ymm5,%ymm5 5866 vpaddd %ymm5,%ymm1,%ymm1 5867 vpxor %ymm1,%ymm13,%ymm13 5868 vpshufb .Lrol8(%rip),%ymm13,%ymm13 5869 vpaddd %ymm13,%ymm9,%ymm9 5870 vpxor %ymm9,%ymm5,%ymm5 5871 vpslld $7,%ymm5,%ymm3 5872 vpsrld $25,%ymm5,%ymm5 5873 vpxor %ymm3,%ymm5,%ymm5 5874 vpalignr $12,%ymm13,%ymm13,%ymm13 5875 vpalignr $8,%ymm9,%ymm9,%ymm9 5876 vpalignr $4,%ymm5,%ymm5,%ymm5 5877 vpaddd %ymm4,%ymm0,%ymm0 5878 vpxor %ymm0,%ymm12,%ymm12 5879 vpshufb .Lrol16(%rip),%ymm12,%ymm12 5880 vpaddd %ymm12,%ymm8,%ymm8 5881 vpxor %ymm8,%ymm4,%ymm4 5882 vpsrld $20,%ymm4,%ymm3 5883 vpslld $12,%ymm4,%ymm4 5884 vpxor %ymm3,%ymm4,%ymm4 5885 vpaddd %ymm4,%ymm0,%ymm0 5886 vpxor %ymm0,%ymm12,%ymm12 5887 vpshufb .Lrol8(%rip),%ymm12,%ymm12 5888 vpaddd %ymm12,%ymm8,%ymm8 5889 vpxor %ymm8,%ymm4,%ymm4 5890 vpslld $7,%ymm4,%ymm3 5891 vpsrld $25,%ymm4,%ymm4 5892 vpxor %ymm3,%ymm4,%ymm4 5893 vpalignr $4,%ymm12,%ymm12,%ymm12 5894 vpalignr $8,%ymm8,%ymm8,%ymm8 5895 vpalignr $12,%ymm4,%ymm4,%ymm4 5896 vpaddd %ymm5,%ymm1,%ymm1 5897 vpxor %ymm1,%ymm13,%ymm13 5898 vpshufb .Lrol16(%rip),%ymm13,%ymm13 5899 vpaddd %ymm13,%ymm9,%ymm9 5900 vpxor %ymm9,%ymm5,%ymm5 5901 vpsrld $20,%ymm5,%ymm3 5902 vpslld $12,%ymm5,%ymm5 5903 vpxor %ymm3,%ymm5,%ymm5 5904 vpaddd %ymm5,%ymm1,%ymm1 5905 vpxor %ymm1,%ymm13,%ymm13 5906 vpshufb .Lrol8(%rip),%ymm13,%ymm13 5907 vpaddd %ymm13,%ymm9,%ymm9 5908 vpxor %ymm9,%ymm5,%ymm5 5909 vpslld $7,%ymm5,%ymm3 5910 vpsrld $25,%ymm5,%ymm5 5911 vpxor %ymm3,%ymm5,%ymm5 5912 vpalignr $4,%ymm13,%ymm13,%ymm13 5913 vpalignr $8,%ymm9,%ymm9,%ymm9 5914 vpalignr $12,%ymm5,%ymm5,%ymm5 5915 5916 decq %r10 5917 jne .Lopen_avx2_192_rounds 5918 vpaddd %ymm2,%ymm0,%ymm0 5919 vpaddd %ymm2,%ymm1,%ymm1 5920 vpaddd %ymm6,%ymm4,%ymm4 5921 vpaddd %ymm6,%ymm5,%ymm5 5922 vpaddd %ymm10,%ymm8,%ymm8 5923 vpaddd %ymm10,%ymm9,%ymm9 5924 vpaddd %ymm11,%ymm12,%ymm12 5925 vpaddd %ymm15,%ymm13,%ymm13 5926 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 5927 5928 vpand .Lclamp(%rip),%ymm3,%ymm3 5929 vmovdqa %ymm3,0+0(%rbp) 5930 5931 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 5932 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 5933 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 5934 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 5935 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 5936 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 5937.Lopen_avx2_short: 5938 movq %r8,%r8 5939 call poly_hash_ad_internal 5940.Lopen_avx2_short_hash_and_xor_loop: 5941 cmpq $32,%rbx 5942 jb .Lopen_avx2_short_tail_32 5943 subq $32,%rbx 5944 addq 0+0(%rsi),%r10 5945 adcq 8+0(%rsi),%r11 5946 adcq $1,%r12 5947 movq 0+0+0(%rbp),%rax 5948 movq %rax,%r15 5949 mulq %r10 5950 movq %rax,%r13 5951 movq %rdx,%r14 5952 movq 0+0+0(%rbp),%rax 5953 mulq %r11 5954 imulq %r12,%r15 5955 addq %rax,%r14 5956 adcq %rdx,%r15 5957 movq 8+0+0(%rbp),%rax 5958 movq %rax,%r9 5959 mulq %r10 5960 addq %rax,%r14 5961 adcq $0,%rdx 5962 movq %rdx,%r10 5963 movq 8+0+0(%rbp),%rax 5964 mulq %r11 5965 addq %rax,%r15 5966 adcq $0,%rdx 5967 imulq %r12,%r9 5968 addq %r10,%r15 5969 adcq %rdx,%r9 5970 movq %r13,%r10 5971 movq %r14,%r11 5972 movq %r15,%r12 5973 andq $3,%r12 5974 movq %r15,%r13 5975 andq $-4,%r13 5976 movq %r9,%r14 5977 shrdq $2,%r9,%r15 5978 shrq $2,%r9 5979 addq %r13,%r15 5980 adcq %r14,%r9 5981 addq %r15,%r10 5982 adcq %r9,%r11 5983 adcq $0,%r12 5984 addq 0+16(%rsi),%r10 5985 adcq 8+16(%rsi),%r11 5986 adcq $1,%r12 5987 movq 0+0+0(%rbp),%rax 5988 movq %rax,%r15 5989 mulq %r10 5990 movq %rax,%r13 5991 movq %rdx,%r14 5992 movq 0+0+0(%rbp),%rax 5993 mulq %r11 5994 imulq %r12,%r15 5995 addq %rax,%r14 5996 adcq %rdx,%r15 5997 movq 8+0+0(%rbp),%rax 5998 movq %rax,%r9 5999 mulq %r10 6000 addq %rax,%r14 6001 adcq $0,%rdx 6002 movq %rdx,%r10 6003 movq 8+0+0(%rbp),%rax 6004 mulq %r11 6005 addq %rax,%r15 6006 adcq $0,%rdx 6007 imulq %r12,%r9 6008 addq %r10,%r15 6009 adcq %rdx,%r9 6010 movq %r13,%r10 6011 movq %r14,%r11 6012 movq %r15,%r12 6013 andq $3,%r12 6014 movq %r15,%r13 6015 andq $-4,%r13 6016 movq %r9,%r14 6017 shrdq $2,%r9,%r15 6018 shrq $2,%r9 6019 addq %r13,%r15 6020 adcq %r14,%r9 6021 addq %r15,%r10 6022 adcq %r9,%r11 6023 adcq $0,%r12 6024 6025 6026 vpxor (%rsi),%ymm0,%ymm0 6027 vmovdqu %ymm0,(%rdi) 6028 leaq 32(%rsi),%rsi 6029 leaq 32(%rdi),%rdi 6030 6031 vmovdqa %ymm4,%ymm0 6032 vmovdqa %ymm8,%ymm4 6033 vmovdqa %ymm12,%ymm8 6034 vmovdqa %ymm1,%ymm12 6035 vmovdqa %ymm5,%ymm1 6036 vmovdqa %ymm9,%ymm5 6037 vmovdqa %ymm13,%ymm9 6038 vmovdqa %ymm2,%ymm13 6039 vmovdqa %ymm6,%ymm2 6040 jmp .Lopen_avx2_short_hash_and_xor_loop 6041.Lopen_avx2_short_tail_32: 6042 cmpq $16,%rbx 6043 vmovdqa %xmm0,%xmm1 6044 jb .Lopen_avx2_short_tail_32_exit 6045 subq $16,%rbx 6046 addq 0+0(%rsi),%r10 6047 adcq 8+0(%rsi),%r11 6048 adcq $1,%r12 6049 movq 0+0+0(%rbp),%rax 6050 movq %rax,%r15 6051 mulq %r10 6052 movq %rax,%r13 6053 movq %rdx,%r14 6054 movq 0+0+0(%rbp),%rax 6055 mulq %r11 6056 imulq %r12,%r15 6057 addq %rax,%r14 6058 adcq %rdx,%r15 6059 movq 8+0+0(%rbp),%rax 6060 movq %rax,%r9 6061 mulq %r10 6062 addq %rax,%r14 6063 adcq $0,%rdx 6064 movq %rdx,%r10 6065 movq 8+0+0(%rbp),%rax 6066 mulq %r11 6067 addq %rax,%r15 6068 adcq $0,%rdx 6069 imulq %r12,%r9 6070 addq %r10,%r15 6071 adcq %rdx,%r9 6072 movq %r13,%r10 6073 movq %r14,%r11 6074 movq %r15,%r12 6075 andq $3,%r12 6076 movq %r15,%r13 6077 andq $-4,%r13 6078 movq %r9,%r14 6079 shrdq $2,%r9,%r15 6080 shrq $2,%r9 6081 addq %r13,%r15 6082 adcq %r14,%r9 6083 addq %r15,%r10 6084 adcq %r9,%r11 6085 adcq $0,%r12 6086 6087 vpxor (%rsi),%xmm0,%xmm3 6088 vmovdqu %xmm3,(%rdi) 6089 leaq 16(%rsi),%rsi 6090 leaq 16(%rdi),%rdi 6091 vextracti128 $1,%ymm0,%xmm1 6092.Lopen_avx2_short_tail_32_exit: 6093 vzeroupper 6094 jmp .Lopen_sse_tail_16 6095 6096.Lopen_avx2_320: 6097 vmovdqa %ymm0,%ymm1 6098 vmovdqa %ymm0,%ymm2 6099 vmovdqa %ymm4,%ymm5 6100 vmovdqa %ymm4,%ymm6 6101 vmovdqa %ymm8,%ymm9 6102 vmovdqa %ymm8,%ymm10 6103 vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 6104 vpaddd .Lavx2_inc(%rip),%ymm13,%ymm14 6105 vmovdqa %ymm4,%ymm7 6106 vmovdqa %ymm8,%ymm11 6107 vmovdqa %ymm12,0+160(%rbp) 6108 vmovdqa %ymm13,0+192(%rbp) 6109 vmovdqa %ymm14,0+224(%rbp) 6110 movq $10,%r10 6111.Lopen_avx2_320_rounds: 6112 vpaddd %ymm4,%ymm0,%ymm0 6113 vpxor %ymm0,%ymm12,%ymm12 6114 vpshufb .Lrol16(%rip),%ymm12,%ymm12 6115 vpaddd %ymm12,%ymm8,%ymm8 6116 vpxor %ymm8,%ymm4,%ymm4 6117 vpsrld $20,%ymm4,%ymm3 6118 vpslld $12,%ymm4,%ymm4 6119 vpxor %ymm3,%ymm4,%ymm4 6120 vpaddd %ymm4,%ymm0,%ymm0 6121 vpxor %ymm0,%ymm12,%ymm12 6122 vpshufb .Lrol8(%rip),%ymm12,%ymm12 6123 vpaddd %ymm12,%ymm8,%ymm8 6124 vpxor %ymm8,%ymm4,%ymm4 6125 vpslld $7,%ymm4,%ymm3 6126 vpsrld $25,%ymm4,%ymm4 6127 vpxor %ymm3,%ymm4,%ymm4 6128 vpalignr $12,%ymm12,%ymm12,%ymm12 6129 vpalignr $8,%ymm8,%ymm8,%ymm8 6130 vpalignr $4,%ymm4,%ymm4,%ymm4 6131 vpaddd %ymm5,%ymm1,%ymm1 6132 vpxor %ymm1,%ymm13,%ymm13 6133 vpshufb .Lrol16(%rip),%ymm13,%ymm13 6134 vpaddd %ymm13,%ymm9,%ymm9 6135 vpxor %ymm9,%ymm5,%ymm5 6136 vpsrld $20,%ymm5,%ymm3 6137 vpslld $12,%ymm5,%ymm5 6138 vpxor %ymm3,%ymm5,%ymm5 6139 vpaddd %ymm5,%ymm1,%ymm1 6140 vpxor %ymm1,%ymm13,%ymm13 6141 vpshufb .Lrol8(%rip),%ymm13,%ymm13 6142 vpaddd %ymm13,%ymm9,%ymm9 6143 vpxor %ymm9,%ymm5,%ymm5 6144 vpslld $7,%ymm5,%ymm3 6145 vpsrld $25,%ymm5,%ymm5 6146 vpxor %ymm3,%ymm5,%ymm5 6147 vpalignr $12,%ymm13,%ymm13,%ymm13 6148 vpalignr $8,%ymm9,%ymm9,%ymm9 6149 vpalignr $4,%ymm5,%ymm5,%ymm5 6150 vpaddd %ymm6,%ymm2,%ymm2 6151 vpxor %ymm2,%ymm14,%ymm14 6152 vpshufb .Lrol16(%rip),%ymm14,%ymm14 6153 vpaddd %ymm14,%ymm10,%ymm10 6154 vpxor %ymm10,%ymm6,%ymm6 6155 vpsrld $20,%ymm6,%ymm3 6156 vpslld $12,%ymm6,%ymm6 6157 vpxor %ymm3,%ymm6,%ymm6 6158 vpaddd %ymm6,%ymm2,%ymm2 6159 vpxor %ymm2,%ymm14,%ymm14 6160 vpshufb .Lrol8(%rip),%ymm14,%ymm14 6161 vpaddd %ymm14,%ymm10,%ymm10 6162 vpxor %ymm10,%ymm6,%ymm6 6163 vpslld $7,%ymm6,%ymm3 6164 vpsrld $25,%ymm6,%ymm6 6165 vpxor %ymm3,%ymm6,%ymm6 6166 vpalignr $12,%ymm14,%ymm14,%ymm14 6167 vpalignr $8,%ymm10,%ymm10,%ymm10 6168 vpalignr $4,%ymm6,%ymm6,%ymm6 6169 vpaddd %ymm4,%ymm0,%ymm0 6170 vpxor %ymm0,%ymm12,%ymm12 6171 vpshufb .Lrol16(%rip),%ymm12,%ymm12 6172 vpaddd %ymm12,%ymm8,%ymm8 6173 vpxor %ymm8,%ymm4,%ymm4 6174 vpsrld $20,%ymm4,%ymm3 6175 vpslld $12,%ymm4,%ymm4 6176 vpxor %ymm3,%ymm4,%ymm4 6177 vpaddd %ymm4,%ymm0,%ymm0 6178 vpxor %ymm0,%ymm12,%ymm12 6179 vpshufb .Lrol8(%rip),%ymm12,%ymm12 6180 vpaddd %ymm12,%ymm8,%ymm8 6181 vpxor %ymm8,%ymm4,%ymm4 6182 vpslld $7,%ymm4,%ymm3 6183 vpsrld $25,%ymm4,%ymm4 6184 vpxor %ymm3,%ymm4,%ymm4 6185 vpalignr $4,%ymm12,%ymm12,%ymm12 6186 vpalignr $8,%ymm8,%ymm8,%ymm8 6187 vpalignr $12,%ymm4,%ymm4,%ymm4 6188 vpaddd %ymm5,%ymm1,%ymm1 6189 vpxor %ymm1,%ymm13,%ymm13 6190 vpshufb .Lrol16(%rip),%ymm13,%ymm13 6191 vpaddd %ymm13,%ymm9,%ymm9 6192 vpxor %ymm9,%ymm5,%ymm5 6193 vpsrld $20,%ymm5,%ymm3 6194 vpslld $12,%ymm5,%ymm5 6195 vpxor %ymm3,%ymm5,%ymm5 6196 vpaddd %ymm5,%ymm1,%ymm1 6197 vpxor %ymm1,%ymm13,%ymm13 6198 vpshufb .Lrol8(%rip),%ymm13,%ymm13 6199 vpaddd %ymm13,%ymm9,%ymm9 6200 vpxor %ymm9,%ymm5,%ymm5 6201 vpslld $7,%ymm5,%ymm3 6202 vpsrld $25,%ymm5,%ymm5 6203 vpxor %ymm3,%ymm5,%ymm5 6204 vpalignr $4,%ymm13,%ymm13,%ymm13 6205 vpalignr $8,%ymm9,%ymm9,%ymm9 6206 vpalignr $12,%ymm5,%ymm5,%ymm5 6207 vpaddd %ymm6,%ymm2,%ymm2 6208 vpxor %ymm2,%ymm14,%ymm14 6209 vpshufb .Lrol16(%rip),%ymm14,%ymm14 6210 vpaddd %ymm14,%ymm10,%ymm10 6211 vpxor %ymm10,%ymm6,%ymm6 6212 vpsrld $20,%ymm6,%ymm3 6213 vpslld $12,%ymm6,%ymm6 6214 vpxor %ymm3,%ymm6,%ymm6 6215 vpaddd %ymm6,%ymm2,%ymm2 6216 vpxor %ymm2,%ymm14,%ymm14 6217 vpshufb .Lrol8(%rip),%ymm14,%ymm14 6218 vpaddd %ymm14,%ymm10,%ymm10 6219 vpxor %ymm10,%ymm6,%ymm6 6220 vpslld $7,%ymm6,%ymm3 6221 vpsrld $25,%ymm6,%ymm6 6222 vpxor %ymm3,%ymm6,%ymm6 6223 vpalignr $4,%ymm14,%ymm14,%ymm14 6224 vpalignr $8,%ymm10,%ymm10,%ymm10 6225 vpalignr $12,%ymm6,%ymm6,%ymm6 6226 6227 decq %r10 6228 jne .Lopen_avx2_320_rounds 6229 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 6230 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 6231 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 6232 vpaddd %ymm7,%ymm4,%ymm4 6233 vpaddd %ymm7,%ymm5,%ymm5 6234 vpaddd %ymm7,%ymm6,%ymm6 6235 vpaddd %ymm11,%ymm8,%ymm8 6236 vpaddd %ymm11,%ymm9,%ymm9 6237 vpaddd %ymm11,%ymm10,%ymm10 6238 vpaddd 0+160(%rbp),%ymm12,%ymm12 6239 vpaddd 0+192(%rbp),%ymm13,%ymm13 6240 vpaddd 0+224(%rbp),%ymm14,%ymm14 6241 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 6242 6243 vpand .Lclamp(%rip),%ymm3,%ymm3 6244 vmovdqa %ymm3,0+0(%rbp) 6245 6246 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 6247 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 6248 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 6249 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 6250 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 6251 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 6252 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 6253 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 6254 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 6255 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 6256 jmp .Lopen_avx2_short 6257.size chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2 6258.cfi_endproc 6259 6260 6261.type chacha20_poly1305_seal_avx2,@function 6262.align 64 6263chacha20_poly1305_seal_avx2: 6264.cfi_startproc 6265 6266 6267.cfi_adjust_cfa_offset 8 6268.cfi_offset %rbp,-16 6269.cfi_adjust_cfa_offset 8 6270.cfi_offset %rbx,-24 6271.cfi_adjust_cfa_offset 8 6272.cfi_offset %r12,-32 6273.cfi_adjust_cfa_offset 8 6274.cfi_offset %r13,-40 6275.cfi_adjust_cfa_offset 8 6276.cfi_offset %r14,-48 6277.cfi_adjust_cfa_offset 8 6278.cfi_offset %r15,-56 6279.cfi_adjust_cfa_offset 8 6280.cfi_offset %r9,-64 6281.cfi_adjust_cfa_offset 288 + 32 6282 6283 vzeroupper 6284 vmovdqa .Lchacha20_consts(%rip),%ymm0 6285 vbroadcasti128 0(%r9),%ymm4 6286 vbroadcasti128 16(%r9),%ymm8 6287 vbroadcasti128 32(%r9),%ymm12 6288 vpaddd .Lavx2_init(%rip),%ymm12,%ymm12 6289 cmpq $192,%rbx 6290 jbe .Lseal_avx2_192 6291 cmpq $320,%rbx 6292 jbe .Lseal_avx2_320 6293 vmovdqa %ymm0,%ymm1 6294 vmovdqa %ymm0,%ymm2 6295 vmovdqa %ymm0,%ymm3 6296 vmovdqa %ymm4,%ymm5 6297 vmovdqa %ymm4,%ymm6 6298 vmovdqa %ymm4,%ymm7 6299 vmovdqa %ymm4,0+64(%rbp) 6300 vmovdqa %ymm8,%ymm9 6301 vmovdqa %ymm8,%ymm10 6302 vmovdqa %ymm8,%ymm11 6303 vmovdqa %ymm8,0+96(%rbp) 6304 vmovdqa %ymm12,%ymm15 6305 vpaddd .Lavx2_inc(%rip),%ymm15,%ymm14 6306 vpaddd .Lavx2_inc(%rip),%ymm14,%ymm13 6307 vpaddd .Lavx2_inc(%rip),%ymm13,%ymm12 6308 vmovdqa %ymm12,0+160(%rbp) 6309 vmovdqa %ymm13,0+192(%rbp) 6310 vmovdqa %ymm14,0+224(%rbp) 6311 vmovdqa %ymm15,0+256(%rbp) 6312 movq $10,%r10 6313.Lseal_avx2_init_rounds: 6314 vmovdqa %ymm8,0+128(%rbp) 6315 vmovdqa .Lrol16(%rip),%ymm8 6316 vpaddd %ymm7,%ymm3,%ymm3 6317 vpaddd %ymm6,%ymm2,%ymm2 6318 vpaddd %ymm5,%ymm1,%ymm1 6319 vpaddd %ymm4,%ymm0,%ymm0 6320 vpxor %ymm3,%ymm15,%ymm15 6321 vpxor %ymm2,%ymm14,%ymm14 6322 vpxor %ymm1,%ymm13,%ymm13 6323 vpxor %ymm0,%ymm12,%ymm12 6324 vpshufb %ymm8,%ymm15,%ymm15 6325 vpshufb %ymm8,%ymm14,%ymm14 6326 vpshufb %ymm8,%ymm13,%ymm13 6327 vpshufb %ymm8,%ymm12,%ymm12 6328 vpaddd %ymm15,%ymm11,%ymm11 6329 vpaddd %ymm14,%ymm10,%ymm10 6330 vpaddd %ymm13,%ymm9,%ymm9 6331 vpaddd 0+128(%rbp),%ymm12,%ymm8 6332 vpxor %ymm11,%ymm7,%ymm7 6333 vpxor %ymm10,%ymm6,%ymm6 6334 vpxor %ymm9,%ymm5,%ymm5 6335 vpxor %ymm8,%ymm4,%ymm4 6336 vmovdqa %ymm8,0+128(%rbp) 6337 vpsrld $20,%ymm7,%ymm8 6338 vpslld $32-20,%ymm7,%ymm7 6339 vpxor %ymm8,%ymm7,%ymm7 6340 vpsrld $20,%ymm6,%ymm8 6341 vpslld $32-20,%ymm6,%ymm6 6342 vpxor %ymm8,%ymm6,%ymm6 6343 vpsrld $20,%ymm5,%ymm8 6344 vpslld $32-20,%ymm5,%ymm5 6345 vpxor %ymm8,%ymm5,%ymm5 6346 vpsrld $20,%ymm4,%ymm8 6347 vpslld $32-20,%ymm4,%ymm4 6348 vpxor %ymm8,%ymm4,%ymm4 6349 vmovdqa .Lrol8(%rip),%ymm8 6350 vpaddd %ymm7,%ymm3,%ymm3 6351 vpaddd %ymm6,%ymm2,%ymm2 6352 vpaddd %ymm5,%ymm1,%ymm1 6353 vpaddd %ymm4,%ymm0,%ymm0 6354 vpxor %ymm3,%ymm15,%ymm15 6355 vpxor %ymm2,%ymm14,%ymm14 6356 vpxor %ymm1,%ymm13,%ymm13 6357 vpxor %ymm0,%ymm12,%ymm12 6358 vpshufb %ymm8,%ymm15,%ymm15 6359 vpshufb %ymm8,%ymm14,%ymm14 6360 vpshufb %ymm8,%ymm13,%ymm13 6361 vpshufb %ymm8,%ymm12,%ymm12 6362 vpaddd %ymm15,%ymm11,%ymm11 6363 vpaddd %ymm14,%ymm10,%ymm10 6364 vpaddd %ymm13,%ymm9,%ymm9 6365 vpaddd 0+128(%rbp),%ymm12,%ymm8 6366 vpxor %ymm11,%ymm7,%ymm7 6367 vpxor %ymm10,%ymm6,%ymm6 6368 vpxor %ymm9,%ymm5,%ymm5 6369 vpxor %ymm8,%ymm4,%ymm4 6370 vmovdqa %ymm8,0+128(%rbp) 6371 vpsrld $25,%ymm7,%ymm8 6372 vpslld $32-25,%ymm7,%ymm7 6373 vpxor %ymm8,%ymm7,%ymm7 6374 vpsrld $25,%ymm6,%ymm8 6375 vpslld $32-25,%ymm6,%ymm6 6376 vpxor %ymm8,%ymm6,%ymm6 6377 vpsrld $25,%ymm5,%ymm8 6378 vpslld $32-25,%ymm5,%ymm5 6379 vpxor %ymm8,%ymm5,%ymm5 6380 vpsrld $25,%ymm4,%ymm8 6381 vpslld $32-25,%ymm4,%ymm4 6382 vpxor %ymm8,%ymm4,%ymm4 6383 vmovdqa 0+128(%rbp),%ymm8 6384 vpalignr $4,%ymm7,%ymm7,%ymm7 6385 vpalignr $8,%ymm11,%ymm11,%ymm11 6386 vpalignr $12,%ymm15,%ymm15,%ymm15 6387 vpalignr $4,%ymm6,%ymm6,%ymm6 6388 vpalignr $8,%ymm10,%ymm10,%ymm10 6389 vpalignr $12,%ymm14,%ymm14,%ymm14 6390 vpalignr $4,%ymm5,%ymm5,%ymm5 6391 vpalignr $8,%ymm9,%ymm9,%ymm9 6392 vpalignr $12,%ymm13,%ymm13,%ymm13 6393 vpalignr $4,%ymm4,%ymm4,%ymm4 6394 vpalignr $8,%ymm8,%ymm8,%ymm8 6395 vpalignr $12,%ymm12,%ymm12,%ymm12 6396 vmovdqa %ymm8,0+128(%rbp) 6397 vmovdqa .Lrol16(%rip),%ymm8 6398 vpaddd %ymm7,%ymm3,%ymm3 6399 vpaddd %ymm6,%ymm2,%ymm2 6400 vpaddd %ymm5,%ymm1,%ymm1 6401 vpaddd %ymm4,%ymm0,%ymm0 6402 vpxor %ymm3,%ymm15,%ymm15 6403 vpxor %ymm2,%ymm14,%ymm14 6404 vpxor %ymm1,%ymm13,%ymm13 6405 vpxor %ymm0,%ymm12,%ymm12 6406 vpshufb %ymm8,%ymm15,%ymm15 6407 vpshufb %ymm8,%ymm14,%ymm14 6408 vpshufb %ymm8,%ymm13,%ymm13 6409 vpshufb %ymm8,%ymm12,%ymm12 6410 vpaddd %ymm15,%ymm11,%ymm11 6411 vpaddd %ymm14,%ymm10,%ymm10 6412 vpaddd %ymm13,%ymm9,%ymm9 6413 vpaddd 0+128(%rbp),%ymm12,%ymm8 6414 vpxor %ymm11,%ymm7,%ymm7 6415 vpxor %ymm10,%ymm6,%ymm6 6416 vpxor %ymm9,%ymm5,%ymm5 6417 vpxor %ymm8,%ymm4,%ymm4 6418 vmovdqa %ymm8,0+128(%rbp) 6419 vpsrld $20,%ymm7,%ymm8 6420 vpslld $32-20,%ymm7,%ymm7 6421 vpxor %ymm8,%ymm7,%ymm7 6422 vpsrld $20,%ymm6,%ymm8 6423 vpslld $32-20,%ymm6,%ymm6 6424 vpxor %ymm8,%ymm6,%ymm6 6425 vpsrld $20,%ymm5,%ymm8 6426 vpslld $32-20,%ymm5,%ymm5 6427 vpxor %ymm8,%ymm5,%ymm5 6428 vpsrld $20,%ymm4,%ymm8 6429 vpslld $32-20,%ymm4,%ymm4 6430 vpxor %ymm8,%ymm4,%ymm4 6431 vmovdqa .Lrol8(%rip),%ymm8 6432 vpaddd %ymm7,%ymm3,%ymm3 6433 vpaddd %ymm6,%ymm2,%ymm2 6434 vpaddd %ymm5,%ymm1,%ymm1 6435 vpaddd %ymm4,%ymm0,%ymm0 6436 vpxor %ymm3,%ymm15,%ymm15 6437 vpxor %ymm2,%ymm14,%ymm14 6438 vpxor %ymm1,%ymm13,%ymm13 6439 vpxor %ymm0,%ymm12,%ymm12 6440 vpshufb %ymm8,%ymm15,%ymm15 6441 vpshufb %ymm8,%ymm14,%ymm14 6442 vpshufb %ymm8,%ymm13,%ymm13 6443 vpshufb %ymm8,%ymm12,%ymm12 6444 vpaddd %ymm15,%ymm11,%ymm11 6445 vpaddd %ymm14,%ymm10,%ymm10 6446 vpaddd %ymm13,%ymm9,%ymm9 6447 vpaddd 0+128(%rbp),%ymm12,%ymm8 6448 vpxor %ymm11,%ymm7,%ymm7 6449 vpxor %ymm10,%ymm6,%ymm6 6450 vpxor %ymm9,%ymm5,%ymm5 6451 vpxor %ymm8,%ymm4,%ymm4 6452 vmovdqa %ymm8,0+128(%rbp) 6453 vpsrld $25,%ymm7,%ymm8 6454 vpslld $32-25,%ymm7,%ymm7 6455 vpxor %ymm8,%ymm7,%ymm7 6456 vpsrld $25,%ymm6,%ymm8 6457 vpslld $32-25,%ymm6,%ymm6 6458 vpxor %ymm8,%ymm6,%ymm6 6459 vpsrld $25,%ymm5,%ymm8 6460 vpslld $32-25,%ymm5,%ymm5 6461 vpxor %ymm8,%ymm5,%ymm5 6462 vpsrld $25,%ymm4,%ymm8 6463 vpslld $32-25,%ymm4,%ymm4 6464 vpxor %ymm8,%ymm4,%ymm4 6465 vmovdqa 0+128(%rbp),%ymm8 6466 vpalignr $12,%ymm7,%ymm7,%ymm7 6467 vpalignr $8,%ymm11,%ymm11,%ymm11 6468 vpalignr $4,%ymm15,%ymm15,%ymm15 6469 vpalignr $12,%ymm6,%ymm6,%ymm6 6470 vpalignr $8,%ymm10,%ymm10,%ymm10 6471 vpalignr $4,%ymm14,%ymm14,%ymm14 6472 vpalignr $12,%ymm5,%ymm5,%ymm5 6473 vpalignr $8,%ymm9,%ymm9,%ymm9 6474 vpalignr $4,%ymm13,%ymm13,%ymm13 6475 vpalignr $12,%ymm4,%ymm4,%ymm4 6476 vpalignr $8,%ymm8,%ymm8,%ymm8 6477 vpalignr $4,%ymm12,%ymm12,%ymm12 6478 6479 decq %r10 6480 jnz .Lseal_avx2_init_rounds 6481 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 6482 vpaddd 0+64(%rbp),%ymm7,%ymm7 6483 vpaddd 0+96(%rbp),%ymm11,%ymm11 6484 vpaddd 0+256(%rbp),%ymm15,%ymm15 6485 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 6486 vpaddd 0+64(%rbp),%ymm6,%ymm6 6487 vpaddd 0+96(%rbp),%ymm10,%ymm10 6488 vpaddd 0+224(%rbp),%ymm14,%ymm14 6489 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 6490 vpaddd 0+64(%rbp),%ymm5,%ymm5 6491 vpaddd 0+96(%rbp),%ymm9,%ymm9 6492 vpaddd 0+192(%rbp),%ymm13,%ymm13 6493 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 6494 vpaddd 0+64(%rbp),%ymm4,%ymm4 6495 vpaddd 0+96(%rbp),%ymm8,%ymm8 6496 vpaddd 0+160(%rbp),%ymm12,%ymm12 6497 6498 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 6499 vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 6500 vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 6501 vpand .Lclamp(%rip),%ymm15,%ymm15 6502 vmovdqa %ymm15,0+0(%rbp) 6503 movq %r8,%r8 6504 call poly_hash_ad_internal 6505 6506 vpxor 0(%rsi),%ymm3,%ymm3 6507 vpxor 32(%rsi),%ymm11,%ymm11 6508 vmovdqu %ymm3,0(%rdi) 6509 vmovdqu %ymm11,32(%rdi) 6510 vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 6511 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 6512 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 6513 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 6514 vpxor 0+64(%rsi),%ymm15,%ymm15 6515 vpxor 32+64(%rsi),%ymm2,%ymm2 6516 vpxor 64+64(%rsi),%ymm6,%ymm6 6517 vpxor 96+64(%rsi),%ymm10,%ymm10 6518 vmovdqu %ymm15,0+64(%rdi) 6519 vmovdqu %ymm2,32+64(%rdi) 6520 vmovdqu %ymm6,64+64(%rdi) 6521 vmovdqu %ymm10,96+64(%rdi) 6522 vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 6523 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 6524 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 6525 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 6526 vpxor 0+192(%rsi),%ymm15,%ymm15 6527 vpxor 32+192(%rsi),%ymm1,%ymm1 6528 vpxor 64+192(%rsi),%ymm5,%ymm5 6529 vpxor 96+192(%rsi),%ymm9,%ymm9 6530 vmovdqu %ymm15,0+192(%rdi) 6531 vmovdqu %ymm1,32+192(%rdi) 6532 vmovdqu %ymm5,64+192(%rdi) 6533 vmovdqu %ymm9,96+192(%rdi) 6534 vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 6535 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 6536 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 6537 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 6538 vmovdqa %ymm15,%ymm8 6539 6540 leaq 320(%rsi),%rsi 6541 subq $320,%rbx 6542 movq $320,%rcx 6543 cmpq $128,%rbx 6544 jbe .Lseal_avx2_short_hash_remainder 6545 vpxor 0(%rsi),%ymm0,%ymm0 6546 vpxor 32(%rsi),%ymm4,%ymm4 6547 vpxor 64(%rsi),%ymm8,%ymm8 6548 vpxor 96(%rsi),%ymm12,%ymm12 6549 vmovdqu %ymm0,320(%rdi) 6550 vmovdqu %ymm4,352(%rdi) 6551 vmovdqu %ymm8,384(%rdi) 6552 vmovdqu %ymm12,416(%rdi) 6553 leaq 128(%rsi),%rsi 6554 subq $128,%rbx 6555 movq $8,%rcx 6556 movq $2,%r8 6557 cmpq $128,%rbx 6558 jbe .Lseal_avx2_tail_128 6559 cmpq $256,%rbx 6560 jbe .Lseal_avx2_tail_256 6561 cmpq $384,%rbx 6562 jbe .Lseal_avx2_tail_384 6563 cmpq $512,%rbx 6564 jbe .Lseal_avx2_tail_512 6565 vmovdqa .Lchacha20_consts(%rip),%ymm0 6566 vmovdqa 0+64(%rbp),%ymm4 6567 vmovdqa 0+96(%rbp),%ymm8 6568 vmovdqa %ymm0,%ymm1 6569 vmovdqa %ymm4,%ymm5 6570 vmovdqa %ymm8,%ymm9 6571 vmovdqa %ymm0,%ymm2 6572 vmovdqa %ymm4,%ymm6 6573 vmovdqa %ymm8,%ymm10 6574 vmovdqa %ymm0,%ymm3 6575 vmovdqa %ymm4,%ymm7 6576 vmovdqa %ymm8,%ymm11 6577 vmovdqa .Lavx2_inc(%rip),%ymm12 6578 vpaddd 0+160(%rbp),%ymm12,%ymm15 6579 vpaddd %ymm15,%ymm12,%ymm14 6580 vpaddd %ymm14,%ymm12,%ymm13 6581 vpaddd %ymm13,%ymm12,%ymm12 6582 vmovdqa %ymm15,0+256(%rbp) 6583 vmovdqa %ymm14,0+224(%rbp) 6584 vmovdqa %ymm13,0+192(%rbp) 6585 vmovdqa %ymm12,0+160(%rbp) 6586 vmovdqa %ymm8,0+128(%rbp) 6587 vmovdqa .Lrol16(%rip),%ymm8 6588 vpaddd %ymm7,%ymm3,%ymm3 6589 vpaddd %ymm6,%ymm2,%ymm2 6590 vpaddd %ymm5,%ymm1,%ymm1 6591 vpaddd %ymm4,%ymm0,%ymm0 6592 vpxor %ymm3,%ymm15,%ymm15 6593 vpxor %ymm2,%ymm14,%ymm14 6594 vpxor %ymm1,%ymm13,%ymm13 6595 vpxor %ymm0,%ymm12,%ymm12 6596 vpshufb %ymm8,%ymm15,%ymm15 6597 vpshufb %ymm8,%ymm14,%ymm14 6598 vpshufb %ymm8,%ymm13,%ymm13 6599 vpshufb %ymm8,%ymm12,%ymm12 6600 vpaddd %ymm15,%ymm11,%ymm11 6601 vpaddd %ymm14,%ymm10,%ymm10 6602 vpaddd %ymm13,%ymm9,%ymm9 6603 vpaddd 0+128(%rbp),%ymm12,%ymm8 6604 vpxor %ymm11,%ymm7,%ymm7 6605 vpxor %ymm10,%ymm6,%ymm6 6606 vpxor %ymm9,%ymm5,%ymm5 6607 vpxor %ymm8,%ymm4,%ymm4 6608 vmovdqa %ymm8,0+128(%rbp) 6609 vpsrld $20,%ymm7,%ymm8 6610 vpslld $32-20,%ymm7,%ymm7 6611 vpxor %ymm8,%ymm7,%ymm7 6612 vpsrld $20,%ymm6,%ymm8 6613 vpslld $32-20,%ymm6,%ymm6 6614 vpxor %ymm8,%ymm6,%ymm6 6615 vpsrld $20,%ymm5,%ymm8 6616 vpslld $32-20,%ymm5,%ymm5 6617 vpxor %ymm8,%ymm5,%ymm5 6618 vpsrld $20,%ymm4,%ymm8 6619 vpslld $32-20,%ymm4,%ymm4 6620 vpxor %ymm8,%ymm4,%ymm4 6621 vmovdqa .Lrol8(%rip),%ymm8 6622 vpaddd %ymm7,%ymm3,%ymm3 6623 vpaddd %ymm6,%ymm2,%ymm2 6624 vpaddd %ymm5,%ymm1,%ymm1 6625 vpaddd %ymm4,%ymm0,%ymm0 6626 vpxor %ymm3,%ymm15,%ymm15 6627 vpxor %ymm2,%ymm14,%ymm14 6628 vpxor %ymm1,%ymm13,%ymm13 6629 vpxor %ymm0,%ymm12,%ymm12 6630 vpshufb %ymm8,%ymm15,%ymm15 6631 vpshufb %ymm8,%ymm14,%ymm14 6632 vpshufb %ymm8,%ymm13,%ymm13 6633 vpshufb %ymm8,%ymm12,%ymm12 6634 vpaddd %ymm15,%ymm11,%ymm11 6635 vpaddd %ymm14,%ymm10,%ymm10 6636 vpaddd %ymm13,%ymm9,%ymm9 6637 vpaddd 0+128(%rbp),%ymm12,%ymm8 6638 vpxor %ymm11,%ymm7,%ymm7 6639 vpxor %ymm10,%ymm6,%ymm6 6640 vpxor %ymm9,%ymm5,%ymm5 6641 vpxor %ymm8,%ymm4,%ymm4 6642 vmovdqa %ymm8,0+128(%rbp) 6643 vpsrld $25,%ymm7,%ymm8 6644 vpslld $32-25,%ymm7,%ymm7 6645 vpxor %ymm8,%ymm7,%ymm7 6646 vpsrld $25,%ymm6,%ymm8 6647 vpslld $32-25,%ymm6,%ymm6 6648 vpxor %ymm8,%ymm6,%ymm6 6649 vpsrld $25,%ymm5,%ymm8 6650 vpslld $32-25,%ymm5,%ymm5 6651 vpxor %ymm8,%ymm5,%ymm5 6652 vpsrld $25,%ymm4,%ymm8 6653 vpslld $32-25,%ymm4,%ymm4 6654 vpxor %ymm8,%ymm4,%ymm4 6655 vmovdqa 0+128(%rbp),%ymm8 6656 vpalignr $4,%ymm7,%ymm7,%ymm7 6657 vpalignr $8,%ymm11,%ymm11,%ymm11 6658 vpalignr $12,%ymm15,%ymm15,%ymm15 6659 vpalignr $4,%ymm6,%ymm6,%ymm6 6660 vpalignr $8,%ymm10,%ymm10,%ymm10 6661 vpalignr $12,%ymm14,%ymm14,%ymm14 6662 vpalignr $4,%ymm5,%ymm5,%ymm5 6663 vpalignr $8,%ymm9,%ymm9,%ymm9 6664 vpalignr $12,%ymm13,%ymm13,%ymm13 6665 vpalignr $4,%ymm4,%ymm4,%ymm4 6666 vpalignr $8,%ymm8,%ymm8,%ymm8 6667 vpalignr $12,%ymm12,%ymm12,%ymm12 6668 vmovdqa %ymm8,0+128(%rbp) 6669 vmovdqa .Lrol16(%rip),%ymm8 6670 vpaddd %ymm7,%ymm3,%ymm3 6671 vpaddd %ymm6,%ymm2,%ymm2 6672 vpaddd %ymm5,%ymm1,%ymm1 6673 vpaddd %ymm4,%ymm0,%ymm0 6674 vpxor %ymm3,%ymm15,%ymm15 6675 vpxor %ymm2,%ymm14,%ymm14 6676 vpxor %ymm1,%ymm13,%ymm13 6677 vpxor %ymm0,%ymm12,%ymm12 6678 vpshufb %ymm8,%ymm15,%ymm15 6679 vpshufb %ymm8,%ymm14,%ymm14 6680 vpshufb %ymm8,%ymm13,%ymm13 6681 vpshufb %ymm8,%ymm12,%ymm12 6682 vpaddd %ymm15,%ymm11,%ymm11 6683 vpaddd %ymm14,%ymm10,%ymm10 6684 vpaddd %ymm13,%ymm9,%ymm9 6685 vpaddd 0+128(%rbp),%ymm12,%ymm8 6686 vpxor %ymm11,%ymm7,%ymm7 6687 vpxor %ymm10,%ymm6,%ymm6 6688 vpxor %ymm9,%ymm5,%ymm5 6689 vpxor %ymm8,%ymm4,%ymm4 6690 vmovdqa %ymm8,0+128(%rbp) 6691 vpsrld $20,%ymm7,%ymm8 6692 vpslld $32-20,%ymm7,%ymm7 6693 vpxor %ymm8,%ymm7,%ymm7 6694 vpsrld $20,%ymm6,%ymm8 6695 vpslld $32-20,%ymm6,%ymm6 6696 vpxor %ymm8,%ymm6,%ymm6 6697 vpsrld $20,%ymm5,%ymm8 6698 vpslld $32-20,%ymm5,%ymm5 6699 vpxor %ymm8,%ymm5,%ymm5 6700 vpsrld $20,%ymm4,%ymm8 6701 vpslld $32-20,%ymm4,%ymm4 6702 vpxor %ymm8,%ymm4,%ymm4 6703 vmovdqa .Lrol8(%rip),%ymm8 6704 vpaddd %ymm7,%ymm3,%ymm3 6705 vpaddd %ymm6,%ymm2,%ymm2 6706 vpaddd %ymm5,%ymm1,%ymm1 6707 vpaddd %ymm4,%ymm0,%ymm0 6708 vpxor %ymm3,%ymm15,%ymm15 6709 vpxor %ymm2,%ymm14,%ymm14 6710 vpxor %ymm1,%ymm13,%ymm13 6711 vpxor %ymm0,%ymm12,%ymm12 6712 vpshufb %ymm8,%ymm15,%ymm15 6713 vpshufb %ymm8,%ymm14,%ymm14 6714 vpshufb %ymm8,%ymm13,%ymm13 6715 vpshufb %ymm8,%ymm12,%ymm12 6716 vpaddd %ymm15,%ymm11,%ymm11 6717 vpaddd %ymm14,%ymm10,%ymm10 6718 vpaddd %ymm13,%ymm9,%ymm9 6719 vpaddd 0+128(%rbp),%ymm12,%ymm8 6720 vpxor %ymm11,%ymm7,%ymm7 6721 vpxor %ymm10,%ymm6,%ymm6 6722 vpxor %ymm9,%ymm5,%ymm5 6723 vpxor %ymm8,%ymm4,%ymm4 6724 vmovdqa %ymm8,0+128(%rbp) 6725 vpsrld $25,%ymm7,%ymm8 6726 vpslld $32-25,%ymm7,%ymm7 6727 vpxor %ymm8,%ymm7,%ymm7 6728 vpsrld $25,%ymm6,%ymm8 6729 vpslld $32-25,%ymm6,%ymm6 6730 vpxor %ymm8,%ymm6,%ymm6 6731 vpsrld $25,%ymm5,%ymm8 6732 vpslld $32-25,%ymm5,%ymm5 6733 vpxor %ymm8,%ymm5,%ymm5 6734 vpsrld $25,%ymm4,%ymm8 6735 vpslld $32-25,%ymm4,%ymm4 6736 vpxor %ymm8,%ymm4,%ymm4 6737 vmovdqa 0+128(%rbp),%ymm8 6738 vpalignr $12,%ymm7,%ymm7,%ymm7 6739 vpalignr $8,%ymm11,%ymm11,%ymm11 6740 vpalignr $4,%ymm15,%ymm15,%ymm15 6741 vpalignr $12,%ymm6,%ymm6,%ymm6 6742 vpalignr $8,%ymm10,%ymm10,%ymm10 6743 vpalignr $4,%ymm14,%ymm14,%ymm14 6744 vpalignr $12,%ymm5,%ymm5,%ymm5 6745 vpalignr $8,%ymm9,%ymm9,%ymm9 6746 vpalignr $4,%ymm13,%ymm13,%ymm13 6747 vpalignr $12,%ymm4,%ymm4,%ymm4 6748 vpalignr $8,%ymm8,%ymm8,%ymm8 6749 vpalignr $4,%ymm12,%ymm12,%ymm12 6750 vmovdqa %ymm8,0+128(%rbp) 6751 vmovdqa .Lrol16(%rip),%ymm8 6752 vpaddd %ymm7,%ymm3,%ymm3 6753 vpaddd %ymm6,%ymm2,%ymm2 6754 vpaddd %ymm5,%ymm1,%ymm1 6755 vpaddd %ymm4,%ymm0,%ymm0 6756 vpxor %ymm3,%ymm15,%ymm15 6757 vpxor %ymm2,%ymm14,%ymm14 6758 vpxor %ymm1,%ymm13,%ymm13 6759 vpxor %ymm0,%ymm12,%ymm12 6760 vpshufb %ymm8,%ymm15,%ymm15 6761 vpshufb %ymm8,%ymm14,%ymm14 6762 vpshufb %ymm8,%ymm13,%ymm13 6763 vpshufb %ymm8,%ymm12,%ymm12 6764 vpaddd %ymm15,%ymm11,%ymm11 6765 vpaddd %ymm14,%ymm10,%ymm10 6766 vpaddd %ymm13,%ymm9,%ymm9 6767 vpaddd 0+128(%rbp),%ymm12,%ymm8 6768 vpxor %ymm11,%ymm7,%ymm7 6769 vpxor %ymm10,%ymm6,%ymm6 6770 vpxor %ymm9,%ymm5,%ymm5 6771 vpxor %ymm8,%ymm4,%ymm4 6772 vmovdqa %ymm8,0+128(%rbp) 6773 vpsrld $20,%ymm7,%ymm8 6774 vpslld $32-20,%ymm7,%ymm7 6775 vpxor %ymm8,%ymm7,%ymm7 6776 vpsrld $20,%ymm6,%ymm8 6777 vpslld $32-20,%ymm6,%ymm6 6778 vpxor %ymm8,%ymm6,%ymm6 6779 vpsrld $20,%ymm5,%ymm8 6780 vpslld $32-20,%ymm5,%ymm5 6781 vpxor %ymm8,%ymm5,%ymm5 6782 vpsrld $20,%ymm4,%ymm8 6783 vpslld $32-20,%ymm4,%ymm4 6784 vpxor %ymm8,%ymm4,%ymm4 6785 vmovdqa .Lrol8(%rip),%ymm8 6786 vpaddd %ymm7,%ymm3,%ymm3 6787 vpaddd %ymm6,%ymm2,%ymm2 6788 vpaddd %ymm5,%ymm1,%ymm1 6789 vpaddd %ymm4,%ymm0,%ymm0 6790 vpxor %ymm3,%ymm15,%ymm15 6791 6792 subq $16,%rdi 6793 movq $9,%rcx 6794 jmp .Lseal_avx2_main_loop_rounds_entry 6795.align 32 6796.Lseal_avx2_main_loop: 6797 vmovdqa .Lchacha20_consts(%rip),%ymm0 6798 vmovdqa 0+64(%rbp),%ymm4 6799 vmovdqa 0+96(%rbp),%ymm8 6800 vmovdqa %ymm0,%ymm1 6801 vmovdqa %ymm4,%ymm5 6802 vmovdqa %ymm8,%ymm9 6803 vmovdqa %ymm0,%ymm2 6804 vmovdqa %ymm4,%ymm6 6805 vmovdqa %ymm8,%ymm10 6806 vmovdqa %ymm0,%ymm3 6807 vmovdqa %ymm4,%ymm7 6808 vmovdqa %ymm8,%ymm11 6809 vmovdqa .Lavx2_inc(%rip),%ymm12 6810 vpaddd 0+160(%rbp),%ymm12,%ymm15 6811 vpaddd %ymm15,%ymm12,%ymm14 6812 vpaddd %ymm14,%ymm12,%ymm13 6813 vpaddd %ymm13,%ymm12,%ymm12 6814 vmovdqa %ymm15,0+256(%rbp) 6815 vmovdqa %ymm14,0+224(%rbp) 6816 vmovdqa %ymm13,0+192(%rbp) 6817 vmovdqa %ymm12,0+160(%rbp) 6818 6819 movq $10,%rcx 6820.align 32 6821.Lseal_avx2_main_loop_rounds: 6822 addq 0+0(%rdi),%r10 6823 adcq 8+0(%rdi),%r11 6824 adcq $1,%r12 6825 vmovdqa %ymm8,0+128(%rbp) 6826 vmovdqa .Lrol16(%rip),%ymm8 6827 vpaddd %ymm7,%ymm3,%ymm3 6828 vpaddd %ymm6,%ymm2,%ymm2 6829 vpaddd %ymm5,%ymm1,%ymm1 6830 vpaddd %ymm4,%ymm0,%ymm0 6831 vpxor %ymm3,%ymm15,%ymm15 6832 vpxor %ymm2,%ymm14,%ymm14 6833 vpxor %ymm1,%ymm13,%ymm13 6834 vpxor %ymm0,%ymm12,%ymm12 6835 movq 0+0+0(%rbp),%rdx 6836 movq %rdx,%r15 6837 mulxq %r10,%r13,%r14 6838 mulxq %r11,%rax,%rdx 6839 imulq %r12,%r15 6840 addq %rax,%r14 6841 adcq %rdx,%r15 6842 vpshufb %ymm8,%ymm15,%ymm15 6843 vpshufb %ymm8,%ymm14,%ymm14 6844 vpshufb %ymm8,%ymm13,%ymm13 6845 vpshufb %ymm8,%ymm12,%ymm12 6846 vpaddd %ymm15,%ymm11,%ymm11 6847 vpaddd %ymm14,%ymm10,%ymm10 6848 vpaddd %ymm13,%ymm9,%ymm9 6849 vpaddd 0+128(%rbp),%ymm12,%ymm8 6850 vpxor %ymm11,%ymm7,%ymm7 6851 movq 8+0+0(%rbp),%rdx 6852 mulxq %r10,%r10,%rax 6853 addq %r10,%r14 6854 mulxq %r11,%r11,%r9 6855 adcq %r11,%r15 6856 adcq $0,%r9 6857 imulq %r12,%rdx 6858 vpxor %ymm10,%ymm6,%ymm6 6859 vpxor %ymm9,%ymm5,%ymm5 6860 vpxor %ymm8,%ymm4,%ymm4 6861 vmovdqa %ymm8,0+128(%rbp) 6862 vpsrld $20,%ymm7,%ymm8 6863 vpslld $32-20,%ymm7,%ymm7 6864 vpxor %ymm8,%ymm7,%ymm7 6865 vpsrld $20,%ymm6,%ymm8 6866 vpslld $32-20,%ymm6,%ymm6 6867 vpxor %ymm8,%ymm6,%ymm6 6868 vpsrld $20,%ymm5,%ymm8 6869 vpslld $32-20,%ymm5,%ymm5 6870 addq %rax,%r15 6871 adcq %rdx,%r9 6872 vpxor %ymm8,%ymm5,%ymm5 6873 vpsrld $20,%ymm4,%ymm8 6874 vpslld $32-20,%ymm4,%ymm4 6875 vpxor %ymm8,%ymm4,%ymm4 6876 vmovdqa .Lrol8(%rip),%ymm8 6877 vpaddd %ymm7,%ymm3,%ymm3 6878 vpaddd %ymm6,%ymm2,%ymm2 6879 vpaddd %ymm5,%ymm1,%ymm1 6880 vpaddd %ymm4,%ymm0,%ymm0 6881 vpxor %ymm3,%ymm15,%ymm15 6882 movq %r13,%r10 6883 movq %r14,%r11 6884 movq %r15,%r12 6885 andq $3,%r12 6886 movq %r15,%r13 6887 andq $-4,%r13 6888 movq %r9,%r14 6889 shrdq $2,%r9,%r15 6890 shrq $2,%r9 6891 addq %r13,%r15 6892 adcq %r14,%r9 6893 addq %r15,%r10 6894 adcq %r9,%r11 6895 adcq $0,%r12 6896 6897.Lseal_avx2_main_loop_rounds_entry: 6898 vpxor %ymm2,%ymm14,%ymm14 6899 vpxor %ymm1,%ymm13,%ymm13 6900 vpxor %ymm0,%ymm12,%ymm12 6901 vpshufb %ymm8,%ymm15,%ymm15 6902 vpshufb %ymm8,%ymm14,%ymm14 6903 vpshufb %ymm8,%ymm13,%ymm13 6904 vpshufb %ymm8,%ymm12,%ymm12 6905 vpaddd %ymm15,%ymm11,%ymm11 6906 vpaddd %ymm14,%ymm10,%ymm10 6907 addq 0+16(%rdi),%r10 6908 adcq 8+16(%rdi),%r11 6909 adcq $1,%r12 6910 vpaddd %ymm13,%ymm9,%ymm9 6911 vpaddd 0+128(%rbp),%ymm12,%ymm8 6912 vpxor %ymm11,%ymm7,%ymm7 6913 vpxor %ymm10,%ymm6,%ymm6 6914 vpxor %ymm9,%ymm5,%ymm5 6915 vpxor %ymm8,%ymm4,%ymm4 6916 vmovdqa %ymm8,0+128(%rbp) 6917 vpsrld $25,%ymm7,%ymm8 6918 movq 0+0+0(%rbp),%rdx 6919 movq %rdx,%r15 6920 mulxq %r10,%r13,%r14 6921 mulxq %r11,%rax,%rdx 6922 imulq %r12,%r15 6923 addq %rax,%r14 6924 adcq %rdx,%r15 6925 vpslld $32-25,%ymm7,%ymm7 6926 vpxor %ymm8,%ymm7,%ymm7 6927 vpsrld $25,%ymm6,%ymm8 6928 vpslld $32-25,%ymm6,%ymm6 6929 vpxor %ymm8,%ymm6,%ymm6 6930 vpsrld $25,%ymm5,%ymm8 6931 vpslld $32-25,%ymm5,%ymm5 6932 vpxor %ymm8,%ymm5,%ymm5 6933 vpsrld $25,%ymm4,%ymm8 6934 vpslld $32-25,%ymm4,%ymm4 6935 vpxor %ymm8,%ymm4,%ymm4 6936 vmovdqa 0+128(%rbp),%ymm8 6937 vpalignr $4,%ymm7,%ymm7,%ymm7 6938 vpalignr $8,%ymm11,%ymm11,%ymm11 6939 vpalignr $12,%ymm15,%ymm15,%ymm15 6940 vpalignr $4,%ymm6,%ymm6,%ymm6 6941 vpalignr $8,%ymm10,%ymm10,%ymm10 6942 vpalignr $12,%ymm14,%ymm14,%ymm14 6943 movq 8+0+0(%rbp),%rdx 6944 mulxq %r10,%r10,%rax 6945 addq %r10,%r14 6946 mulxq %r11,%r11,%r9 6947 adcq %r11,%r15 6948 adcq $0,%r9 6949 imulq %r12,%rdx 6950 vpalignr $4,%ymm5,%ymm5,%ymm5 6951 vpalignr $8,%ymm9,%ymm9,%ymm9 6952 vpalignr $12,%ymm13,%ymm13,%ymm13 6953 vpalignr $4,%ymm4,%ymm4,%ymm4 6954 vpalignr $8,%ymm8,%ymm8,%ymm8 6955 vpalignr $12,%ymm12,%ymm12,%ymm12 6956 vmovdqa %ymm8,0+128(%rbp) 6957 vmovdqa .Lrol16(%rip),%ymm8 6958 vpaddd %ymm7,%ymm3,%ymm3 6959 vpaddd %ymm6,%ymm2,%ymm2 6960 vpaddd %ymm5,%ymm1,%ymm1 6961 vpaddd %ymm4,%ymm0,%ymm0 6962 vpxor %ymm3,%ymm15,%ymm15 6963 vpxor %ymm2,%ymm14,%ymm14 6964 vpxor %ymm1,%ymm13,%ymm13 6965 vpxor %ymm0,%ymm12,%ymm12 6966 vpshufb %ymm8,%ymm15,%ymm15 6967 vpshufb %ymm8,%ymm14,%ymm14 6968 addq %rax,%r15 6969 adcq %rdx,%r9 6970 vpshufb %ymm8,%ymm13,%ymm13 6971 vpshufb %ymm8,%ymm12,%ymm12 6972 vpaddd %ymm15,%ymm11,%ymm11 6973 vpaddd %ymm14,%ymm10,%ymm10 6974 vpaddd %ymm13,%ymm9,%ymm9 6975 vpaddd 0+128(%rbp),%ymm12,%ymm8 6976 vpxor %ymm11,%ymm7,%ymm7 6977 vpxor %ymm10,%ymm6,%ymm6 6978 vpxor %ymm9,%ymm5,%ymm5 6979 movq %r13,%r10 6980 movq %r14,%r11 6981 movq %r15,%r12 6982 andq $3,%r12 6983 movq %r15,%r13 6984 andq $-4,%r13 6985 movq %r9,%r14 6986 shrdq $2,%r9,%r15 6987 shrq $2,%r9 6988 addq %r13,%r15 6989 adcq %r14,%r9 6990 addq %r15,%r10 6991 adcq %r9,%r11 6992 adcq $0,%r12 6993 vpxor %ymm8,%ymm4,%ymm4 6994 vmovdqa %ymm8,0+128(%rbp) 6995 vpsrld $20,%ymm7,%ymm8 6996 vpslld $32-20,%ymm7,%ymm7 6997 vpxor %ymm8,%ymm7,%ymm7 6998 vpsrld $20,%ymm6,%ymm8 6999 vpslld $32-20,%ymm6,%ymm6 7000 vpxor %ymm8,%ymm6,%ymm6 7001 addq 0+32(%rdi),%r10 7002 adcq 8+32(%rdi),%r11 7003 adcq $1,%r12 7004 7005 leaq 48(%rdi),%rdi 7006 vpsrld $20,%ymm5,%ymm8 7007 vpslld $32-20,%ymm5,%ymm5 7008 vpxor %ymm8,%ymm5,%ymm5 7009 vpsrld $20,%ymm4,%ymm8 7010 vpslld $32-20,%ymm4,%ymm4 7011 vpxor %ymm8,%ymm4,%ymm4 7012 vmovdqa .Lrol8(%rip),%ymm8 7013 vpaddd %ymm7,%ymm3,%ymm3 7014 vpaddd %ymm6,%ymm2,%ymm2 7015 vpaddd %ymm5,%ymm1,%ymm1 7016 vpaddd %ymm4,%ymm0,%ymm0 7017 vpxor %ymm3,%ymm15,%ymm15 7018 vpxor %ymm2,%ymm14,%ymm14 7019 vpxor %ymm1,%ymm13,%ymm13 7020 vpxor %ymm0,%ymm12,%ymm12 7021 vpshufb %ymm8,%ymm15,%ymm15 7022 vpshufb %ymm8,%ymm14,%ymm14 7023 vpshufb %ymm8,%ymm13,%ymm13 7024 movq 0+0+0(%rbp),%rdx 7025 movq %rdx,%r15 7026 mulxq %r10,%r13,%r14 7027 mulxq %r11,%rax,%rdx 7028 imulq %r12,%r15 7029 addq %rax,%r14 7030 adcq %rdx,%r15 7031 vpshufb %ymm8,%ymm12,%ymm12 7032 vpaddd %ymm15,%ymm11,%ymm11 7033 vpaddd %ymm14,%ymm10,%ymm10 7034 vpaddd %ymm13,%ymm9,%ymm9 7035 vpaddd 0+128(%rbp),%ymm12,%ymm8 7036 vpxor %ymm11,%ymm7,%ymm7 7037 vpxor %ymm10,%ymm6,%ymm6 7038 vpxor %ymm9,%ymm5,%ymm5 7039 movq 8+0+0(%rbp),%rdx 7040 mulxq %r10,%r10,%rax 7041 addq %r10,%r14 7042 mulxq %r11,%r11,%r9 7043 adcq %r11,%r15 7044 adcq $0,%r9 7045 imulq %r12,%rdx 7046 vpxor %ymm8,%ymm4,%ymm4 7047 vmovdqa %ymm8,0+128(%rbp) 7048 vpsrld $25,%ymm7,%ymm8 7049 vpslld $32-25,%ymm7,%ymm7 7050 vpxor %ymm8,%ymm7,%ymm7 7051 vpsrld $25,%ymm6,%ymm8 7052 vpslld $32-25,%ymm6,%ymm6 7053 vpxor %ymm8,%ymm6,%ymm6 7054 addq %rax,%r15 7055 adcq %rdx,%r9 7056 vpsrld $25,%ymm5,%ymm8 7057 vpslld $32-25,%ymm5,%ymm5 7058 vpxor %ymm8,%ymm5,%ymm5 7059 vpsrld $25,%ymm4,%ymm8 7060 vpslld $32-25,%ymm4,%ymm4 7061 vpxor %ymm8,%ymm4,%ymm4 7062 vmovdqa 0+128(%rbp),%ymm8 7063 vpalignr $12,%ymm7,%ymm7,%ymm7 7064 vpalignr $8,%ymm11,%ymm11,%ymm11 7065 vpalignr $4,%ymm15,%ymm15,%ymm15 7066 vpalignr $12,%ymm6,%ymm6,%ymm6 7067 vpalignr $8,%ymm10,%ymm10,%ymm10 7068 vpalignr $4,%ymm14,%ymm14,%ymm14 7069 vpalignr $12,%ymm5,%ymm5,%ymm5 7070 vpalignr $8,%ymm9,%ymm9,%ymm9 7071 vpalignr $4,%ymm13,%ymm13,%ymm13 7072 vpalignr $12,%ymm4,%ymm4,%ymm4 7073 vpalignr $8,%ymm8,%ymm8,%ymm8 7074 movq %r13,%r10 7075 movq %r14,%r11 7076 movq %r15,%r12 7077 andq $3,%r12 7078 movq %r15,%r13 7079 andq $-4,%r13 7080 movq %r9,%r14 7081 shrdq $2,%r9,%r15 7082 shrq $2,%r9 7083 addq %r13,%r15 7084 adcq %r14,%r9 7085 addq %r15,%r10 7086 adcq %r9,%r11 7087 adcq $0,%r12 7088 vpalignr $4,%ymm12,%ymm12,%ymm12 7089 7090 decq %rcx 7091 jne .Lseal_avx2_main_loop_rounds 7092 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 7093 vpaddd 0+64(%rbp),%ymm7,%ymm7 7094 vpaddd 0+96(%rbp),%ymm11,%ymm11 7095 vpaddd 0+256(%rbp),%ymm15,%ymm15 7096 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 7097 vpaddd 0+64(%rbp),%ymm6,%ymm6 7098 vpaddd 0+96(%rbp),%ymm10,%ymm10 7099 vpaddd 0+224(%rbp),%ymm14,%ymm14 7100 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 7101 vpaddd 0+64(%rbp),%ymm5,%ymm5 7102 vpaddd 0+96(%rbp),%ymm9,%ymm9 7103 vpaddd 0+192(%rbp),%ymm13,%ymm13 7104 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 7105 vpaddd 0+64(%rbp),%ymm4,%ymm4 7106 vpaddd 0+96(%rbp),%ymm8,%ymm8 7107 vpaddd 0+160(%rbp),%ymm12,%ymm12 7108 7109 vmovdqa %ymm0,0+128(%rbp) 7110 addq 0+0(%rdi),%r10 7111 adcq 8+0(%rdi),%r11 7112 adcq $1,%r12 7113 movq 0+0+0(%rbp),%rdx 7114 movq %rdx,%r15 7115 mulxq %r10,%r13,%r14 7116 mulxq %r11,%rax,%rdx 7117 imulq %r12,%r15 7118 addq %rax,%r14 7119 adcq %rdx,%r15 7120 movq 8+0+0(%rbp),%rdx 7121 mulxq %r10,%r10,%rax 7122 addq %r10,%r14 7123 mulxq %r11,%r11,%r9 7124 adcq %r11,%r15 7125 adcq $0,%r9 7126 imulq %r12,%rdx 7127 addq %rax,%r15 7128 adcq %rdx,%r9 7129 movq %r13,%r10 7130 movq %r14,%r11 7131 movq %r15,%r12 7132 andq $3,%r12 7133 movq %r15,%r13 7134 andq $-4,%r13 7135 movq %r9,%r14 7136 shrdq $2,%r9,%r15 7137 shrq $2,%r9 7138 addq %r13,%r15 7139 adcq %r14,%r9 7140 addq %r15,%r10 7141 adcq %r9,%r11 7142 adcq $0,%r12 7143 addq 0+16(%rdi),%r10 7144 adcq 8+16(%rdi),%r11 7145 adcq $1,%r12 7146 movq 0+0+0(%rbp),%rdx 7147 movq %rdx,%r15 7148 mulxq %r10,%r13,%r14 7149 mulxq %r11,%rax,%rdx 7150 imulq %r12,%r15 7151 addq %rax,%r14 7152 adcq %rdx,%r15 7153 movq 8+0+0(%rbp),%rdx 7154 mulxq %r10,%r10,%rax 7155 addq %r10,%r14 7156 mulxq %r11,%r11,%r9 7157 adcq %r11,%r15 7158 adcq $0,%r9 7159 imulq %r12,%rdx 7160 addq %rax,%r15 7161 adcq %rdx,%r9 7162 movq %r13,%r10 7163 movq %r14,%r11 7164 movq %r15,%r12 7165 andq $3,%r12 7166 movq %r15,%r13 7167 andq $-4,%r13 7168 movq %r9,%r14 7169 shrdq $2,%r9,%r15 7170 shrq $2,%r9 7171 addq %r13,%r15 7172 adcq %r14,%r9 7173 addq %r15,%r10 7174 adcq %r9,%r11 7175 adcq $0,%r12 7176 7177 leaq 32(%rdi),%rdi 7178 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 7179 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 7180 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 7181 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 7182 vpxor 0+0(%rsi),%ymm0,%ymm0 7183 vpxor 32+0(%rsi),%ymm3,%ymm3 7184 vpxor 64+0(%rsi),%ymm7,%ymm7 7185 vpxor 96+0(%rsi),%ymm11,%ymm11 7186 vmovdqu %ymm0,0+0(%rdi) 7187 vmovdqu %ymm3,32+0(%rdi) 7188 vmovdqu %ymm7,64+0(%rdi) 7189 vmovdqu %ymm11,96+0(%rdi) 7190 7191 vmovdqa 0+128(%rbp),%ymm0 7192 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 7193 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 7194 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 7195 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 7196 vpxor 0+128(%rsi),%ymm3,%ymm3 7197 vpxor 32+128(%rsi),%ymm2,%ymm2 7198 vpxor 64+128(%rsi),%ymm6,%ymm6 7199 vpxor 96+128(%rsi),%ymm10,%ymm10 7200 vmovdqu %ymm3,0+128(%rdi) 7201 vmovdqu %ymm2,32+128(%rdi) 7202 vmovdqu %ymm6,64+128(%rdi) 7203 vmovdqu %ymm10,96+128(%rdi) 7204 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7205 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7206 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7207 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7208 vpxor 0+256(%rsi),%ymm3,%ymm3 7209 vpxor 32+256(%rsi),%ymm1,%ymm1 7210 vpxor 64+256(%rsi),%ymm5,%ymm5 7211 vpxor 96+256(%rsi),%ymm9,%ymm9 7212 vmovdqu %ymm3,0+256(%rdi) 7213 vmovdqu %ymm1,32+256(%rdi) 7214 vmovdqu %ymm5,64+256(%rdi) 7215 vmovdqu %ymm9,96+256(%rdi) 7216 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 7217 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 7218 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 7219 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 7220 vpxor 0+384(%rsi),%ymm3,%ymm3 7221 vpxor 32+384(%rsi),%ymm0,%ymm0 7222 vpxor 64+384(%rsi),%ymm4,%ymm4 7223 vpxor 96+384(%rsi),%ymm8,%ymm8 7224 vmovdqu %ymm3,0+384(%rdi) 7225 vmovdqu %ymm0,32+384(%rdi) 7226 vmovdqu %ymm4,64+384(%rdi) 7227 vmovdqu %ymm8,96+384(%rdi) 7228 7229 leaq 512(%rsi),%rsi 7230 subq $512,%rbx 7231 cmpq $512,%rbx 7232 jg .Lseal_avx2_main_loop 7233 7234 addq 0+0(%rdi),%r10 7235 adcq 8+0(%rdi),%r11 7236 adcq $1,%r12 7237 movq 0+0+0(%rbp),%rdx 7238 movq %rdx,%r15 7239 mulxq %r10,%r13,%r14 7240 mulxq %r11,%rax,%rdx 7241 imulq %r12,%r15 7242 addq %rax,%r14 7243 adcq %rdx,%r15 7244 movq 8+0+0(%rbp),%rdx 7245 mulxq %r10,%r10,%rax 7246 addq %r10,%r14 7247 mulxq %r11,%r11,%r9 7248 adcq %r11,%r15 7249 adcq $0,%r9 7250 imulq %r12,%rdx 7251 addq %rax,%r15 7252 adcq %rdx,%r9 7253 movq %r13,%r10 7254 movq %r14,%r11 7255 movq %r15,%r12 7256 andq $3,%r12 7257 movq %r15,%r13 7258 andq $-4,%r13 7259 movq %r9,%r14 7260 shrdq $2,%r9,%r15 7261 shrq $2,%r9 7262 addq %r13,%r15 7263 adcq %r14,%r9 7264 addq %r15,%r10 7265 adcq %r9,%r11 7266 adcq $0,%r12 7267 addq 0+16(%rdi),%r10 7268 adcq 8+16(%rdi),%r11 7269 adcq $1,%r12 7270 movq 0+0+0(%rbp),%rdx 7271 movq %rdx,%r15 7272 mulxq %r10,%r13,%r14 7273 mulxq %r11,%rax,%rdx 7274 imulq %r12,%r15 7275 addq %rax,%r14 7276 adcq %rdx,%r15 7277 movq 8+0+0(%rbp),%rdx 7278 mulxq %r10,%r10,%rax 7279 addq %r10,%r14 7280 mulxq %r11,%r11,%r9 7281 adcq %r11,%r15 7282 adcq $0,%r9 7283 imulq %r12,%rdx 7284 addq %rax,%r15 7285 adcq %rdx,%r9 7286 movq %r13,%r10 7287 movq %r14,%r11 7288 movq %r15,%r12 7289 andq $3,%r12 7290 movq %r15,%r13 7291 andq $-4,%r13 7292 movq %r9,%r14 7293 shrdq $2,%r9,%r15 7294 shrq $2,%r9 7295 addq %r13,%r15 7296 adcq %r14,%r9 7297 addq %r15,%r10 7298 adcq %r9,%r11 7299 adcq $0,%r12 7300 7301 leaq 32(%rdi),%rdi 7302 movq $10,%rcx 7303 xorq %r8,%r8 7304 7305 cmpq $384,%rbx 7306 ja .Lseal_avx2_tail_512 7307 cmpq $256,%rbx 7308 ja .Lseal_avx2_tail_384 7309 cmpq $128,%rbx 7310 ja .Lseal_avx2_tail_256 7311 7312.Lseal_avx2_tail_128: 7313 vmovdqa .Lchacha20_consts(%rip),%ymm0 7314 vmovdqa 0+64(%rbp),%ymm4 7315 vmovdqa 0+96(%rbp),%ymm8 7316 vmovdqa .Lavx2_inc(%rip),%ymm12 7317 vpaddd 0+160(%rbp),%ymm12,%ymm12 7318 vmovdqa %ymm12,0+160(%rbp) 7319 7320.Lseal_avx2_tail_128_rounds_and_3xhash: 7321 addq 0+0(%rdi),%r10 7322 adcq 8+0(%rdi),%r11 7323 adcq $1,%r12 7324 movq 0+0+0(%rbp),%rdx 7325 movq %rdx,%r15 7326 mulxq %r10,%r13,%r14 7327 mulxq %r11,%rax,%rdx 7328 imulq %r12,%r15 7329 addq %rax,%r14 7330 adcq %rdx,%r15 7331 movq 8+0+0(%rbp),%rdx 7332 mulxq %r10,%r10,%rax 7333 addq %r10,%r14 7334 mulxq %r11,%r11,%r9 7335 adcq %r11,%r15 7336 adcq $0,%r9 7337 imulq %r12,%rdx 7338 addq %rax,%r15 7339 adcq %rdx,%r9 7340 movq %r13,%r10 7341 movq %r14,%r11 7342 movq %r15,%r12 7343 andq $3,%r12 7344 movq %r15,%r13 7345 andq $-4,%r13 7346 movq %r9,%r14 7347 shrdq $2,%r9,%r15 7348 shrq $2,%r9 7349 addq %r13,%r15 7350 adcq %r14,%r9 7351 addq %r15,%r10 7352 adcq %r9,%r11 7353 adcq $0,%r12 7354 7355 leaq 16(%rdi),%rdi 7356.Lseal_avx2_tail_128_rounds_and_2xhash: 7357 vpaddd %ymm4,%ymm0,%ymm0 7358 vpxor %ymm0,%ymm12,%ymm12 7359 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7360 vpaddd %ymm12,%ymm8,%ymm8 7361 vpxor %ymm8,%ymm4,%ymm4 7362 vpsrld $20,%ymm4,%ymm3 7363 vpslld $12,%ymm4,%ymm4 7364 vpxor %ymm3,%ymm4,%ymm4 7365 vpaddd %ymm4,%ymm0,%ymm0 7366 vpxor %ymm0,%ymm12,%ymm12 7367 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7368 vpaddd %ymm12,%ymm8,%ymm8 7369 vpxor %ymm8,%ymm4,%ymm4 7370 vpslld $7,%ymm4,%ymm3 7371 vpsrld $25,%ymm4,%ymm4 7372 vpxor %ymm3,%ymm4,%ymm4 7373 vpalignr $12,%ymm12,%ymm12,%ymm12 7374 vpalignr $8,%ymm8,%ymm8,%ymm8 7375 vpalignr $4,%ymm4,%ymm4,%ymm4 7376 addq 0+0(%rdi),%r10 7377 adcq 8+0(%rdi),%r11 7378 adcq $1,%r12 7379 movq 0+0+0(%rbp),%rdx 7380 movq %rdx,%r15 7381 mulxq %r10,%r13,%r14 7382 mulxq %r11,%rax,%rdx 7383 imulq %r12,%r15 7384 addq %rax,%r14 7385 adcq %rdx,%r15 7386 movq 8+0+0(%rbp),%rdx 7387 mulxq %r10,%r10,%rax 7388 addq %r10,%r14 7389 mulxq %r11,%r11,%r9 7390 adcq %r11,%r15 7391 adcq $0,%r9 7392 imulq %r12,%rdx 7393 addq %rax,%r15 7394 adcq %rdx,%r9 7395 movq %r13,%r10 7396 movq %r14,%r11 7397 movq %r15,%r12 7398 andq $3,%r12 7399 movq %r15,%r13 7400 andq $-4,%r13 7401 movq %r9,%r14 7402 shrdq $2,%r9,%r15 7403 shrq $2,%r9 7404 addq %r13,%r15 7405 adcq %r14,%r9 7406 addq %r15,%r10 7407 adcq %r9,%r11 7408 adcq $0,%r12 7409 vpaddd %ymm4,%ymm0,%ymm0 7410 vpxor %ymm0,%ymm12,%ymm12 7411 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7412 vpaddd %ymm12,%ymm8,%ymm8 7413 vpxor %ymm8,%ymm4,%ymm4 7414 vpsrld $20,%ymm4,%ymm3 7415 vpslld $12,%ymm4,%ymm4 7416 vpxor %ymm3,%ymm4,%ymm4 7417 vpaddd %ymm4,%ymm0,%ymm0 7418 vpxor %ymm0,%ymm12,%ymm12 7419 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7420 vpaddd %ymm12,%ymm8,%ymm8 7421 vpxor %ymm8,%ymm4,%ymm4 7422 vpslld $7,%ymm4,%ymm3 7423 vpsrld $25,%ymm4,%ymm4 7424 vpxor %ymm3,%ymm4,%ymm4 7425 vpalignr $4,%ymm12,%ymm12,%ymm12 7426 vpalignr $8,%ymm8,%ymm8,%ymm8 7427 vpalignr $12,%ymm4,%ymm4,%ymm4 7428 addq 0+16(%rdi),%r10 7429 adcq 8+16(%rdi),%r11 7430 adcq $1,%r12 7431 movq 0+0+0(%rbp),%rdx 7432 movq %rdx,%r15 7433 mulxq %r10,%r13,%r14 7434 mulxq %r11,%rax,%rdx 7435 imulq %r12,%r15 7436 addq %rax,%r14 7437 adcq %rdx,%r15 7438 movq 8+0+0(%rbp),%rdx 7439 mulxq %r10,%r10,%rax 7440 addq %r10,%r14 7441 mulxq %r11,%r11,%r9 7442 adcq %r11,%r15 7443 adcq $0,%r9 7444 imulq %r12,%rdx 7445 addq %rax,%r15 7446 adcq %rdx,%r9 7447 movq %r13,%r10 7448 movq %r14,%r11 7449 movq %r15,%r12 7450 andq $3,%r12 7451 movq %r15,%r13 7452 andq $-4,%r13 7453 movq %r9,%r14 7454 shrdq $2,%r9,%r15 7455 shrq $2,%r9 7456 addq %r13,%r15 7457 adcq %r14,%r9 7458 addq %r15,%r10 7459 adcq %r9,%r11 7460 adcq $0,%r12 7461 7462 leaq 32(%rdi),%rdi 7463 decq %rcx 7464 jg .Lseal_avx2_tail_128_rounds_and_3xhash 7465 decq %r8 7466 jge .Lseal_avx2_tail_128_rounds_and_2xhash 7467 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 7468 vpaddd 0+64(%rbp),%ymm4,%ymm4 7469 vpaddd 0+96(%rbp),%ymm8,%ymm8 7470 vpaddd 0+160(%rbp),%ymm12,%ymm12 7471 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7472 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7473 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7474 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7475 vmovdqa %ymm3,%ymm8 7476 7477 jmp .Lseal_avx2_short_loop 7478 7479.Lseal_avx2_tail_256: 7480 vmovdqa .Lchacha20_consts(%rip),%ymm0 7481 vmovdqa 0+64(%rbp),%ymm4 7482 vmovdqa 0+96(%rbp),%ymm8 7483 vmovdqa %ymm0,%ymm1 7484 vmovdqa %ymm4,%ymm5 7485 vmovdqa %ymm8,%ymm9 7486 vmovdqa .Lavx2_inc(%rip),%ymm12 7487 vpaddd 0+160(%rbp),%ymm12,%ymm13 7488 vpaddd %ymm13,%ymm12,%ymm12 7489 vmovdqa %ymm12,0+160(%rbp) 7490 vmovdqa %ymm13,0+192(%rbp) 7491 7492.Lseal_avx2_tail_256_rounds_and_3xhash: 7493 addq 0+0(%rdi),%r10 7494 adcq 8+0(%rdi),%r11 7495 adcq $1,%r12 7496 movq 0+0+0(%rbp),%rax 7497 movq %rax,%r15 7498 mulq %r10 7499 movq %rax,%r13 7500 movq %rdx,%r14 7501 movq 0+0+0(%rbp),%rax 7502 mulq %r11 7503 imulq %r12,%r15 7504 addq %rax,%r14 7505 adcq %rdx,%r15 7506 movq 8+0+0(%rbp),%rax 7507 movq %rax,%r9 7508 mulq %r10 7509 addq %rax,%r14 7510 adcq $0,%rdx 7511 movq %rdx,%r10 7512 movq 8+0+0(%rbp),%rax 7513 mulq %r11 7514 addq %rax,%r15 7515 adcq $0,%rdx 7516 imulq %r12,%r9 7517 addq %r10,%r15 7518 adcq %rdx,%r9 7519 movq %r13,%r10 7520 movq %r14,%r11 7521 movq %r15,%r12 7522 andq $3,%r12 7523 movq %r15,%r13 7524 andq $-4,%r13 7525 movq %r9,%r14 7526 shrdq $2,%r9,%r15 7527 shrq $2,%r9 7528 addq %r13,%r15 7529 adcq %r14,%r9 7530 addq %r15,%r10 7531 adcq %r9,%r11 7532 adcq $0,%r12 7533 7534 leaq 16(%rdi),%rdi 7535.Lseal_avx2_tail_256_rounds_and_2xhash: 7536 vpaddd %ymm4,%ymm0,%ymm0 7537 vpxor %ymm0,%ymm12,%ymm12 7538 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7539 vpaddd %ymm12,%ymm8,%ymm8 7540 vpxor %ymm8,%ymm4,%ymm4 7541 vpsrld $20,%ymm4,%ymm3 7542 vpslld $12,%ymm4,%ymm4 7543 vpxor %ymm3,%ymm4,%ymm4 7544 vpaddd %ymm4,%ymm0,%ymm0 7545 vpxor %ymm0,%ymm12,%ymm12 7546 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7547 vpaddd %ymm12,%ymm8,%ymm8 7548 vpxor %ymm8,%ymm4,%ymm4 7549 vpslld $7,%ymm4,%ymm3 7550 vpsrld $25,%ymm4,%ymm4 7551 vpxor %ymm3,%ymm4,%ymm4 7552 vpalignr $12,%ymm12,%ymm12,%ymm12 7553 vpalignr $8,%ymm8,%ymm8,%ymm8 7554 vpalignr $4,%ymm4,%ymm4,%ymm4 7555 vpaddd %ymm5,%ymm1,%ymm1 7556 vpxor %ymm1,%ymm13,%ymm13 7557 vpshufb .Lrol16(%rip),%ymm13,%ymm13 7558 vpaddd %ymm13,%ymm9,%ymm9 7559 vpxor %ymm9,%ymm5,%ymm5 7560 vpsrld $20,%ymm5,%ymm3 7561 vpslld $12,%ymm5,%ymm5 7562 vpxor %ymm3,%ymm5,%ymm5 7563 vpaddd %ymm5,%ymm1,%ymm1 7564 vpxor %ymm1,%ymm13,%ymm13 7565 vpshufb .Lrol8(%rip),%ymm13,%ymm13 7566 vpaddd %ymm13,%ymm9,%ymm9 7567 vpxor %ymm9,%ymm5,%ymm5 7568 vpslld $7,%ymm5,%ymm3 7569 vpsrld $25,%ymm5,%ymm5 7570 vpxor %ymm3,%ymm5,%ymm5 7571 vpalignr $12,%ymm13,%ymm13,%ymm13 7572 vpalignr $8,%ymm9,%ymm9,%ymm9 7573 vpalignr $4,%ymm5,%ymm5,%ymm5 7574 addq 0+0(%rdi),%r10 7575 adcq 8+0(%rdi),%r11 7576 adcq $1,%r12 7577 movq 0+0+0(%rbp),%rax 7578 movq %rax,%r15 7579 mulq %r10 7580 movq %rax,%r13 7581 movq %rdx,%r14 7582 movq 0+0+0(%rbp),%rax 7583 mulq %r11 7584 imulq %r12,%r15 7585 addq %rax,%r14 7586 adcq %rdx,%r15 7587 movq 8+0+0(%rbp),%rax 7588 movq %rax,%r9 7589 mulq %r10 7590 addq %rax,%r14 7591 adcq $0,%rdx 7592 movq %rdx,%r10 7593 movq 8+0+0(%rbp),%rax 7594 mulq %r11 7595 addq %rax,%r15 7596 adcq $0,%rdx 7597 imulq %r12,%r9 7598 addq %r10,%r15 7599 adcq %rdx,%r9 7600 movq %r13,%r10 7601 movq %r14,%r11 7602 movq %r15,%r12 7603 andq $3,%r12 7604 movq %r15,%r13 7605 andq $-4,%r13 7606 movq %r9,%r14 7607 shrdq $2,%r9,%r15 7608 shrq $2,%r9 7609 addq %r13,%r15 7610 adcq %r14,%r9 7611 addq %r15,%r10 7612 adcq %r9,%r11 7613 adcq $0,%r12 7614 vpaddd %ymm4,%ymm0,%ymm0 7615 vpxor %ymm0,%ymm12,%ymm12 7616 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7617 vpaddd %ymm12,%ymm8,%ymm8 7618 vpxor %ymm8,%ymm4,%ymm4 7619 vpsrld $20,%ymm4,%ymm3 7620 vpslld $12,%ymm4,%ymm4 7621 vpxor %ymm3,%ymm4,%ymm4 7622 vpaddd %ymm4,%ymm0,%ymm0 7623 vpxor %ymm0,%ymm12,%ymm12 7624 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7625 vpaddd %ymm12,%ymm8,%ymm8 7626 vpxor %ymm8,%ymm4,%ymm4 7627 vpslld $7,%ymm4,%ymm3 7628 vpsrld $25,%ymm4,%ymm4 7629 vpxor %ymm3,%ymm4,%ymm4 7630 vpalignr $4,%ymm12,%ymm12,%ymm12 7631 vpalignr $8,%ymm8,%ymm8,%ymm8 7632 vpalignr $12,%ymm4,%ymm4,%ymm4 7633 vpaddd %ymm5,%ymm1,%ymm1 7634 vpxor %ymm1,%ymm13,%ymm13 7635 vpshufb .Lrol16(%rip),%ymm13,%ymm13 7636 vpaddd %ymm13,%ymm9,%ymm9 7637 vpxor %ymm9,%ymm5,%ymm5 7638 vpsrld $20,%ymm5,%ymm3 7639 vpslld $12,%ymm5,%ymm5 7640 vpxor %ymm3,%ymm5,%ymm5 7641 vpaddd %ymm5,%ymm1,%ymm1 7642 vpxor %ymm1,%ymm13,%ymm13 7643 vpshufb .Lrol8(%rip),%ymm13,%ymm13 7644 vpaddd %ymm13,%ymm9,%ymm9 7645 vpxor %ymm9,%ymm5,%ymm5 7646 vpslld $7,%ymm5,%ymm3 7647 vpsrld $25,%ymm5,%ymm5 7648 vpxor %ymm3,%ymm5,%ymm5 7649 vpalignr $4,%ymm13,%ymm13,%ymm13 7650 vpalignr $8,%ymm9,%ymm9,%ymm9 7651 vpalignr $12,%ymm5,%ymm5,%ymm5 7652 addq 0+16(%rdi),%r10 7653 adcq 8+16(%rdi),%r11 7654 adcq $1,%r12 7655 movq 0+0+0(%rbp),%rax 7656 movq %rax,%r15 7657 mulq %r10 7658 movq %rax,%r13 7659 movq %rdx,%r14 7660 movq 0+0+0(%rbp),%rax 7661 mulq %r11 7662 imulq %r12,%r15 7663 addq %rax,%r14 7664 adcq %rdx,%r15 7665 movq 8+0+0(%rbp),%rax 7666 movq %rax,%r9 7667 mulq %r10 7668 addq %rax,%r14 7669 adcq $0,%rdx 7670 movq %rdx,%r10 7671 movq 8+0+0(%rbp),%rax 7672 mulq %r11 7673 addq %rax,%r15 7674 adcq $0,%rdx 7675 imulq %r12,%r9 7676 addq %r10,%r15 7677 adcq %rdx,%r9 7678 movq %r13,%r10 7679 movq %r14,%r11 7680 movq %r15,%r12 7681 andq $3,%r12 7682 movq %r15,%r13 7683 andq $-4,%r13 7684 movq %r9,%r14 7685 shrdq $2,%r9,%r15 7686 shrq $2,%r9 7687 addq %r13,%r15 7688 adcq %r14,%r9 7689 addq %r15,%r10 7690 adcq %r9,%r11 7691 adcq $0,%r12 7692 7693 leaq 32(%rdi),%rdi 7694 decq %rcx 7695 jg .Lseal_avx2_tail_256_rounds_and_3xhash 7696 decq %r8 7697 jge .Lseal_avx2_tail_256_rounds_and_2xhash 7698 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 7699 vpaddd 0+64(%rbp),%ymm5,%ymm5 7700 vpaddd 0+96(%rbp),%ymm9,%ymm9 7701 vpaddd 0+192(%rbp),%ymm13,%ymm13 7702 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 7703 vpaddd 0+64(%rbp),%ymm4,%ymm4 7704 vpaddd 0+96(%rbp),%ymm8,%ymm8 7705 vpaddd 0+160(%rbp),%ymm12,%ymm12 7706 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7707 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7708 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7709 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7710 vpxor 0+0(%rsi),%ymm3,%ymm3 7711 vpxor 32+0(%rsi),%ymm1,%ymm1 7712 vpxor 64+0(%rsi),%ymm5,%ymm5 7713 vpxor 96+0(%rsi),%ymm9,%ymm9 7714 vmovdqu %ymm3,0+0(%rdi) 7715 vmovdqu %ymm1,32+0(%rdi) 7716 vmovdqu %ymm5,64+0(%rdi) 7717 vmovdqu %ymm9,96+0(%rdi) 7718 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7719 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7720 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7721 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7722 vmovdqa %ymm3,%ymm8 7723 7724 movq $128,%rcx 7725 leaq 128(%rsi),%rsi 7726 subq $128,%rbx 7727 jmp .Lseal_avx2_short_hash_remainder 7728 7729.Lseal_avx2_tail_384: 7730 vmovdqa .Lchacha20_consts(%rip),%ymm0 7731 vmovdqa 0+64(%rbp),%ymm4 7732 vmovdqa 0+96(%rbp),%ymm8 7733 vmovdqa %ymm0,%ymm1 7734 vmovdqa %ymm4,%ymm5 7735 vmovdqa %ymm8,%ymm9 7736 vmovdqa %ymm0,%ymm2 7737 vmovdqa %ymm4,%ymm6 7738 vmovdqa %ymm8,%ymm10 7739 vmovdqa .Lavx2_inc(%rip),%ymm12 7740 vpaddd 0+160(%rbp),%ymm12,%ymm14 7741 vpaddd %ymm14,%ymm12,%ymm13 7742 vpaddd %ymm13,%ymm12,%ymm12 7743 vmovdqa %ymm12,0+160(%rbp) 7744 vmovdqa %ymm13,0+192(%rbp) 7745 vmovdqa %ymm14,0+224(%rbp) 7746 7747.Lseal_avx2_tail_384_rounds_and_3xhash: 7748 addq 0+0(%rdi),%r10 7749 adcq 8+0(%rdi),%r11 7750 adcq $1,%r12 7751 movq 0+0+0(%rbp),%rax 7752 movq %rax,%r15 7753 mulq %r10 7754 movq %rax,%r13 7755 movq %rdx,%r14 7756 movq 0+0+0(%rbp),%rax 7757 mulq %r11 7758 imulq %r12,%r15 7759 addq %rax,%r14 7760 adcq %rdx,%r15 7761 movq 8+0+0(%rbp),%rax 7762 movq %rax,%r9 7763 mulq %r10 7764 addq %rax,%r14 7765 adcq $0,%rdx 7766 movq %rdx,%r10 7767 movq 8+0+0(%rbp),%rax 7768 mulq %r11 7769 addq %rax,%r15 7770 adcq $0,%rdx 7771 imulq %r12,%r9 7772 addq %r10,%r15 7773 adcq %rdx,%r9 7774 movq %r13,%r10 7775 movq %r14,%r11 7776 movq %r15,%r12 7777 andq $3,%r12 7778 movq %r15,%r13 7779 andq $-4,%r13 7780 movq %r9,%r14 7781 shrdq $2,%r9,%r15 7782 shrq $2,%r9 7783 addq %r13,%r15 7784 adcq %r14,%r9 7785 addq %r15,%r10 7786 adcq %r9,%r11 7787 adcq $0,%r12 7788 7789 leaq 16(%rdi),%rdi 7790.Lseal_avx2_tail_384_rounds_and_2xhash: 7791 vpaddd %ymm4,%ymm0,%ymm0 7792 vpxor %ymm0,%ymm12,%ymm12 7793 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7794 vpaddd %ymm12,%ymm8,%ymm8 7795 vpxor %ymm8,%ymm4,%ymm4 7796 vpsrld $20,%ymm4,%ymm3 7797 vpslld $12,%ymm4,%ymm4 7798 vpxor %ymm3,%ymm4,%ymm4 7799 vpaddd %ymm4,%ymm0,%ymm0 7800 vpxor %ymm0,%ymm12,%ymm12 7801 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7802 vpaddd %ymm12,%ymm8,%ymm8 7803 vpxor %ymm8,%ymm4,%ymm4 7804 vpslld $7,%ymm4,%ymm3 7805 vpsrld $25,%ymm4,%ymm4 7806 vpxor %ymm3,%ymm4,%ymm4 7807 vpalignr $12,%ymm12,%ymm12,%ymm12 7808 vpalignr $8,%ymm8,%ymm8,%ymm8 7809 vpalignr $4,%ymm4,%ymm4,%ymm4 7810 vpaddd %ymm5,%ymm1,%ymm1 7811 vpxor %ymm1,%ymm13,%ymm13 7812 vpshufb .Lrol16(%rip),%ymm13,%ymm13 7813 vpaddd %ymm13,%ymm9,%ymm9 7814 vpxor %ymm9,%ymm5,%ymm5 7815 vpsrld $20,%ymm5,%ymm3 7816 vpslld $12,%ymm5,%ymm5 7817 vpxor %ymm3,%ymm5,%ymm5 7818 vpaddd %ymm5,%ymm1,%ymm1 7819 vpxor %ymm1,%ymm13,%ymm13 7820 vpshufb .Lrol8(%rip),%ymm13,%ymm13 7821 vpaddd %ymm13,%ymm9,%ymm9 7822 vpxor %ymm9,%ymm5,%ymm5 7823 vpslld $7,%ymm5,%ymm3 7824 vpsrld $25,%ymm5,%ymm5 7825 vpxor %ymm3,%ymm5,%ymm5 7826 vpalignr $12,%ymm13,%ymm13,%ymm13 7827 vpalignr $8,%ymm9,%ymm9,%ymm9 7828 vpalignr $4,%ymm5,%ymm5,%ymm5 7829 addq 0+0(%rdi),%r10 7830 adcq 8+0(%rdi),%r11 7831 adcq $1,%r12 7832 movq 0+0+0(%rbp),%rax 7833 movq %rax,%r15 7834 mulq %r10 7835 movq %rax,%r13 7836 movq %rdx,%r14 7837 movq 0+0+0(%rbp),%rax 7838 mulq %r11 7839 imulq %r12,%r15 7840 addq %rax,%r14 7841 adcq %rdx,%r15 7842 movq 8+0+0(%rbp),%rax 7843 movq %rax,%r9 7844 mulq %r10 7845 addq %rax,%r14 7846 adcq $0,%rdx 7847 movq %rdx,%r10 7848 movq 8+0+0(%rbp),%rax 7849 mulq %r11 7850 addq %rax,%r15 7851 adcq $0,%rdx 7852 imulq %r12,%r9 7853 addq %r10,%r15 7854 adcq %rdx,%r9 7855 movq %r13,%r10 7856 movq %r14,%r11 7857 movq %r15,%r12 7858 andq $3,%r12 7859 movq %r15,%r13 7860 andq $-4,%r13 7861 movq %r9,%r14 7862 shrdq $2,%r9,%r15 7863 shrq $2,%r9 7864 addq %r13,%r15 7865 adcq %r14,%r9 7866 addq %r15,%r10 7867 adcq %r9,%r11 7868 adcq $0,%r12 7869 vpaddd %ymm6,%ymm2,%ymm2 7870 vpxor %ymm2,%ymm14,%ymm14 7871 vpshufb .Lrol16(%rip),%ymm14,%ymm14 7872 vpaddd %ymm14,%ymm10,%ymm10 7873 vpxor %ymm10,%ymm6,%ymm6 7874 vpsrld $20,%ymm6,%ymm3 7875 vpslld $12,%ymm6,%ymm6 7876 vpxor %ymm3,%ymm6,%ymm6 7877 vpaddd %ymm6,%ymm2,%ymm2 7878 vpxor %ymm2,%ymm14,%ymm14 7879 vpshufb .Lrol8(%rip),%ymm14,%ymm14 7880 vpaddd %ymm14,%ymm10,%ymm10 7881 vpxor %ymm10,%ymm6,%ymm6 7882 vpslld $7,%ymm6,%ymm3 7883 vpsrld $25,%ymm6,%ymm6 7884 vpxor %ymm3,%ymm6,%ymm6 7885 vpalignr $12,%ymm14,%ymm14,%ymm14 7886 vpalignr $8,%ymm10,%ymm10,%ymm10 7887 vpalignr $4,%ymm6,%ymm6,%ymm6 7888 vpaddd %ymm4,%ymm0,%ymm0 7889 vpxor %ymm0,%ymm12,%ymm12 7890 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7891 vpaddd %ymm12,%ymm8,%ymm8 7892 vpxor %ymm8,%ymm4,%ymm4 7893 vpsrld $20,%ymm4,%ymm3 7894 vpslld $12,%ymm4,%ymm4 7895 vpxor %ymm3,%ymm4,%ymm4 7896 vpaddd %ymm4,%ymm0,%ymm0 7897 vpxor %ymm0,%ymm12,%ymm12 7898 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7899 vpaddd %ymm12,%ymm8,%ymm8 7900 vpxor %ymm8,%ymm4,%ymm4 7901 vpslld $7,%ymm4,%ymm3 7902 vpsrld $25,%ymm4,%ymm4 7903 vpxor %ymm3,%ymm4,%ymm4 7904 vpalignr $4,%ymm12,%ymm12,%ymm12 7905 vpalignr $8,%ymm8,%ymm8,%ymm8 7906 vpalignr $12,%ymm4,%ymm4,%ymm4 7907 addq 0+16(%rdi),%r10 7908 adcq 8+16(%rdi),%r11 7909 adcq $1,%r12 7910 movq 0+0+0(%rbp),%rax 7911 movq %rax,%r15 7912 mulq %r10 7913 movq %rax,%r13 7914 movq %rdx,%r14 7915 movq 0+0+0(%rbp),%rax 7916 mulq %r11 7917 imulq %r12,%r15 7918 addq %rax,%r14 7919 adcq %rdx,%r15 7920 movq 8+0+0(%rbp),%rax 7921 movq %rax,%r9 7922 mulq %r10 7923 addq %rax,%r14 7924 adcq $0,%rdx 7925 movq %rdx,%r10 7926 movq 8+0+0(%rbp),%rax 7927 mulq %r11 7928 addq %rax,%r15 7929 adcq $0,%rdx 7930 imulq %r12,%r9 7931 addq %r10,%r15 7932 adcq %rdx,%r9 7933 movq %r13,%r10 7934 movq %r14,%r11 7935 movq %r15,%r12 7936 andq $3,%r12 7937 movq %r15,%r13 7938 andq $-4,%r13 7939 movq %r9,%r14 7940 shrdq $2,%r9,%r15 7941 shrq $2,%r9 7942 addq %r13,%r15 7943 adcq %r14,%r9 7944 addq %r15,%r10 7945 adcq %r9,%r11 7946 adcq $0,%r12 7947 vpaddd %ymm5,%ymm1,%ymm1 7948 vpxor %ymm1,%ymm13,%ymm13 7949 vpshufb .Lrol16(%rip),%ymm13,%ymm13 7950 vpaddd %ymm13,%ymm9,%ymm9 7951 vpxor %ymm9,%ymm5,%ymm5 7952 vpsrld $20,%ymm5,%ymm3 7953 vpslld $12,%ymm5,%ymm5 7954 vpxor %ymm3,%ymm5,%ymm5 7955 vpaddd %ymm5,%ymm1,%ymm1 7956 vpxor %ymm1,%ymm13,%ymm13 7957 vpshufb .Lrol8(%rip),%ymm13,%ymm13 7958 vpaddd %ymm13,%ymm9,%ymm9 7959 vpxor %ymm9,%ymm5,%ymm5 7960 vpslld $7,%ymm5,%ymm3 7961 vpsrld $25,%ymm5,%ymm5 7962 vpxor %ymm3,%ymm5,%ymm5 7963 vpalignr $4,%ymm13,%ymm13,%ymm13 7964 vpalignr $8,%ymm9,%ymm9,%ymm9 7965 vpalignr $12,%ymm5,%ymm5,%ymm5 7966 vpaddd %ymm6,%ymm2,%ymm2 7967 vpxor %ymm2,%ymm14,%ymm14 7968 vpshufb .Lrol16(%rip),%ymm14,%ymm14 7969 vpaddd %ymm14,%ymm10,%ymm10 7970 vpxor %ymm10,%ymm6,%ymm6 7971 vpsrld $20,%ymm6,%ymm3 7972 vpslld $12,%ymm6,%ymm6 7973 vpxor %ymm3,%ymm6,%ymm6 7974 vpaddd %ymm6,%ymm2,%ymm2 7975 vpxor %ymm2,%ymm14,%ymm14 7976 vpshufb .Lrol8(%rip),%ymm14,%ymm14 7977 vpaddd %ymm14,%ymm10,%ymm10 7978 vpxor %ymm10,%ymm6,%ymm6 7979 vpslld $7,%ymm6,%ymm3 7980 vpsrld $25,%ymm6,%ymm6 7981 vpxor %ymm3,%ymm6,%ymm6 7982 vpalignr $4,%ymm14,%ymm14,%ymm14 7983 vpalignr $8,%ymm10,%ymm10,%ymm10 7984 vpalignr $12,%ymm6,%ymm6,%ymm6 7985 7986 leaq 32(%rdi),%rdi 7987 decq %rcx 7988 jg .Lseal_avx2_tail_384_rounds_and_3xhash 7989 decq %r8 7990 jge .Lseal_avx2_tail_384_rounds_and_2xhash 7991 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 7992 vpaddd 0+64(%rbp),%ymm6,%ymm6 7993 vpaddd 0+96(%rbp),%ymm10,%ymm10 7994 vpaddd 0+224(%rbp),%ymm14,%ymm14 7995 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 7996 vpaddd 0+64(%rbp),%ymm5,%ymm5 7997 vpaddd 0+96(%rbp),%ymm9,%ymm9 7998 vpaddd 0+192(%rbp),%ymm13,%ymm13 7999 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 8000 vpaddd 0+64(%rbp),%ymm4,%ymm4 8001 vpaddd 0+96(%rbp),%ymm8,%ymm8 8002 vpaddd 0+160(%rbp),%ymm12,%ymm12 8003 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8004 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8005 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8006 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8007 vpxor 0+0(%rsi),%ymm3,%ymm3 8008 vpxor 32+0(%rsi),%ymm2,%ymm2 8009 vpxor 64+0(%rsi),%ymm6,%ymm6 8010 vpxor 96+0(%rsi),%ymm10,%ymm10 8011 vmovdqu %ymm3,0+0(%rdi) 8012 vmovdqu %ymm2,32+0(%rdi) 8013 vmovdqu %ymm6,64+0(%rdi) 8014 vmovdqu %ymm10,96+0(%rdi) 8015 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8016 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8017 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8018 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8019 vpxor 0+128(%rsi),%ymm3,%ymm3 8020 vpxor 32+128(%rsi),%ymm1,%ymm1 8021 vpxor 64+128(%rsi),%ymm5,%ymm5 8022 vpxor 96+128(%rsi),%ymm9,%ymm9 8023 vmovdqu %ymm3,0+128(%rdi) 8024 vmovdqu %ymm1,32+128(%rdi) 8025 vmovdqu %ymm5,64+128(%rdi) 8026 vmovdqu %ymm9,96+128(%rdi) 8027 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8028 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8029 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8030 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8031 vmovdqa %ymm3,%ymm8 8032 8033 movq $256,%rcx 8034 leaq 256(%rsi),%rsi 8035 subq $256,%rbx 8036 jmp .Lseal_avx2_short_hash_remainder 8037 8038.Lseal_avx2_tail_512: 8039 vmovdqa .Lchacha20_consts(%rip),%ymm0 8040 vmovdqa 0+64(%rbp),%ymm4 8041 vmovdqa 0+96(%rbp),%ymm8 8042 vmovdqa %ymm0,%ymm1 8043 vmovdqa %ymm4,%ymm5 8044 vmovdqa %ymm8,%ymm9 8045 vmovdqa %ymm0,%ymm2 8046 vmovdqa %ymm4,%ymm6 8047 vmovdqa %ymm8,%ymm10 8048 vmovdqa %ymm0,%ymm3 8049 vmovdqa %ymm4,%ymm7 8050 vmovdqa %ymm8,%ymm11 8051 vmovdqa .Lavx2_inc(%rip),%ymm12 8052 vpaddd 0+160(%rbp),%ymm12,%ymm15 8053 vpaddd %ymm15,%ymm12,%ymm14 8054 vpaddd %ymm14,%ymm12,%ymm13 8055 vpaddd %ymm13,%ymm12,%ymm12 8056 vmovdqa %ymm15,0+256(%rbp) 8057 vmovdqa %ymm14,0+224(%rbp) 8058 vmovdqa %ymm13,0+192(%rbp) 8059 vmovdqa %ymm12,0+160(%rbp) 8060 8061.Lseal_avx2_tail_512_rounds_and_3xhash: 8062 addq 0+0(%rdi),%r10 8063 adcq 8+0(%rdi),%r11 8064 adcq $1,%r12 8065 movq 0+0+0(%rbp),%rdx 8066 movq %rdx,%r15 8067 mulxq %r10,%r13,%r14 8068 mulxq %r11,%rax,%rdx 8069 imulq %r12,%r15 8070 addq %rax,%r14 8071 adcq %rdx,%r15 8072 movq 8+0+0(%rbp),%rdx 8073 mulxq %r10,%r10,%rax 8074 addq %r10,%r14 8075 mulxq %r11,%r11,%r9 8076 adcq %r11,%r15 8077 adcq $0,%r9 8078 imulq %r12,%rdx 8079 addq %rax,%r15 8080 adcq %rdx,%r9 8081 movq %r13,%r10 8082 movq %r14,%r11 8083 movq %r15,%r12 8084 andq $3,%r12 8085 movq %r15,%r13 8086 andq $-4,%r13 8087 movq %r9,%r14 8088 shrdq $2,%r9,%r15 8089 shrq $2,%r9 8090 addq %r13,%r15 8091 adcq %r14,%r9 8092 addq %r15,%r10 8093 adcq %r9,%r11 8094 adcq $0,%r12 8095 8096 leaq 16(%rdi),%rdi 8097.Lseal_avx2_tail_512_rounds_and_2xhash: 8098 vmovdqa %ymm8,0+128(%rbp) 8099 vmovdqa .Lrol16(%rip),%ymm8 8100 vpaddd %ymm7,%ymm3,%ymm3 8101 vpaddd %ymm6,%ymm2,%ymm2 8102 vpaddd %ymm5,%ymm1,%ymm1 8103 vpaddd %ymm4,%ymm0,%ymm0 8104 vpxor %ymm3,%ymm15,%ymm15 8105 vpxor %ymm2,%ymm14,%ymm14 8106 vpxor %ymm1,%ymm13,%ymm13 8107 vpxor %ymm0,%ymm12,%ymm12 8108 vpshufb %ymm8,%ymm15,%ymm15 8109 vpshufb %ymm8,%ymm14,%ymm14 8110 vpshufb %ymm8,%ymm13,%ymm13 8111 vpshufb %ymm8,%ymm12,%ymm12 8112 vpaddd %ymm15,%ymm11,%ymm11 8113 vpaddd %ymm14,%ymm10,%ymm10 8114 vpaddd %ymm13,%ymm9,%ymm9 8115 vpaddd 0+128(%rbp),%ymm12,%ymm8 8116 vpxor %ymm11,%ymm7,%ymm7 8117 vpxor %ymm10,%ymm6,%ymm6 8118 addq 0+0(%rdi),%r10 8119 adcq 8+0(%rdi),%r11 8120 adcq $1,%r12 8121 vpxor %ymm9,%ymm5,%ymm5 8122 vpxor %ymm8,%ymm4,%ymm4 8123 vmovdqa %ymm8,0+128(%rbp) 8124 vpsrld $20,%ymm7,%ymm8 8125 vpslld $32-20,%ymm7,%ymm7 8126 vpxor %ymm8,%ymm7,%ymm7 8127 vpsrld $20,%ymm6,%ymm8 8128 vpslld $32-20,%ymm6,%ymm6 8129 vpxor %ymm8,%ymm6,%ymm6 8130 vpsrld $20,%ymm5,%ymm8 8131 vpslld $32-20,%ymm5,%ymm5 8132 vpxor %ymm8,%ymm5,%ymm5 8133 vpsrld $20,%ymm4,%ymm8 8134 vpslld $32-20,%ymm4,%ymm4 8135 vpxor %ymm8,%ymm4,%ymm4 8136 vmovdqa .Lrol8(%rip),%ymm8 8137 vpaddd %ymm7,%ymm3,%ymm3 8138 vpaddd %ymm6,%ymm2,%ymm2 8139 vpaddd %ymm5,%ymm1,%ymm1 8140 vpaddd %ymm4,%ymm0,%ymm0 8141 movq 0+0+0(%rbp),%rdx 8142 movq %rdx,%r15 8143 mulxq %r10,%r13,%r14 8144 mulxq %r11,%rax,%rdx 8145 imulq %r12,%r15 8146 addq %rax,%r14 8147 adcq %rdx,%r15 8148 vpxor %ymm3,%ymm15,%ymm15 8149 vpxor %ymm2,%ymm14,%ymm14 8150 vpxor %ymm1,%ymm13,%ymm13 8151 vpxor %ymm0,%ymm12,%ymm12 8152 vpshufb %ymm8,%ymm15,%ymm15 8153 vpshufb %ymm8,%ymm14,%ymm14 8154 vpshufb %ymm8,%ymm13,%ymm13 8155 vpshufb %ymm8,%ymm12,%ymm12 8156 vpaddd %ymm15,%ymm11,%ymm11 8157 vpaddd %ymm14,%ymm10,%ymm10 8158 vpaddd %ymm13,%ymm9,%ymm9 8159 vpaddd 0+128(%rbp),%ymm12,%ymm8 8160 vpxor %ymm11,%ymm7,%ymm7 8161 vpxor %ymm10,%ymm6,%ymm6 8162 vpxor %ymm9,%ymm5,%ymm5 8163 vpxor %ymm8,%ymm4,%ymm4 8164 vmovdqa %ymm8,0+128(%rbp) 8165 vpsrld $25,%ymm7,%ymm8 8166 vpslld $32-25,%ymm7,%ymm7 8167 vpxor %ymm8,%ymm7,%ymm7 8168 movq 8+0+0(%rbp),%rdx 8169 mulxq %r10,%r10,%rax 8170 addq %r10,%r14 8171 mulxq %r11,%r11,%r9 8172 adcq %r11,%r15 8173 adcq $0,%r9 8174 imulq %r12,%rdx 8175 vpsrld $25,%ymm6,%ymm8 8176 vpslld $32-25,%ymm6,%ymm6 8177 vpxor %ymm8,%ymm6,%ymm6 8178 vpsrld $25,%ymm5,%ymm8 8179 vpslld $32-25,%ymm5,%ymm5 8180 vpxor %ymm8,%ymm5,%ymm5 8181 vpsrld $25,%ymm4,%ymm8 8182 vpslld $32-25,%ymm4,%ymm4 8183 vpxor %ymm8,%ymm4,%ymm4 8184 vmovdqa 0+128(%rbp),%ymm8 8185 vpalignr $4,%ymm7,%ymm7,%ymm7 8186 vpalignr $8,%ymm11,%ymm11,%ymm11 8187 vpalignr $12,%ymm15,%ymm15,%ymm15 8188 vpalignr $4,%ymm6,%ymm6,%ymm6 8189 vpalignr $8,%ymm10,%ymm10,%ymm10 8190 vpalignr $12,%ymm14,%ymm14,%ymm14 8191 vpalignr $4,%ymm5,%ymm5,%ymm5 8192 vpalignr $8,%ymm9,%ymm9,%ymm9 8193 vpalignr $12,%ymm13,%ymm13,%ymm13 8194 vpalignr $4,%ymm4,%ymm4,%ymm4 8195 addq %rax,%r15 8196 adcq %rdx,%r9 8197 vpalignr $8,%ymm8,%ymm8,%ymm8 8198 vpalignr $12,%ymm12,%ymm12,%ymm12 8199 vmovdqa %ymm8,0+128(%rbp) 8200 vmovdqa .Lrol16(%rip),%ymm8 8201 vpaddd %ymm7,%ymm3,%ymm3 8202 vpaddd %ymm6,%ymm2,%ymm2 8203 vpaddd %ymm5,%ymm1,%ymm1 8204 vpaddd %ymm4,%ymm0,%ymm0 8205 vpxor %ymm3,%ymm15,%ymm15 8206 vpxor %ymm2,%ymm14,%ymm14 8207 vpxor %ymm1,%ymm13,%ymm13 8208 vpxor %ymm0,%ymm12,%ymm12 8209 vpshufb %ymm8,%ymm15,%ymm15 8210 vpshufb %ymm8,%ymm14,%ymm14 8211 vpshufb %ymm8,%ymm13,%ymm13 8212 vpshufb %ymm8,%ymm12,%ymm12 8213 vpaddd %ymm15,%ymm11,%ymm11 8214 vpaddd %ymm14,%ymm10,%ymm10 8215 vpaddd %ymm13,%ymm9,%ymm9 8216 vpaddd 0+128(%rbp),%ymm12,%ymm8 8217 movq %r13,%r10 8218 movq %r14,%r11 8219 movq %r15,%r12 8220 andq $3,%r12 8221 movq %r15,%r13 8222 andq $-4,%r13 8223 movq %r9,%r14 8224 shrdq $2,%r9,%r15 8225 shrq $2,%r9 8226 addq %r13,%r15 8227 adcq %r14,%r9 8228 addq %r15,%r10 8229 adcq %r9,%r11 8230 adcq $0,%r12 8231 vpxor %ymm11,%ymm7,%ymm7 8232 vpxor %ymm10,%ymm6,%ymm6 8233 vpxor %ymm9,%ymm5,%ymm5 8234 vpxor %ymm8,%ymm4,%ymm4 8235 vmovdqa %ymm8,0+128(%rbp) 8236 vpsrld $20,%ymm7,%ymm8 8237 vpslld $32-20,%ymm7,%ymm7 8238 vpxor %ymm8,%ymm7,%ymm7 8239 vpsrld $20,%ymm6,%ymm8 8240 vpslld $32-20,%ymm6,%ymm6 8241 vpxor %ymm8,%ymm6,%ymm6 8242 vpsrld $20,%ymm5,%ymm8 8243 vpslld $32-20,%ymm5,%ymm5 8244 vpxor %ymm8,%ymm5,%ymm5 8245 vpsrld $20,%ymm4,%ymm8 8246 vpslld $32-20,%ymm4,%ymm4 8247 vpxor %ymm8,%ymm4,%ymm4 8248 vmovdqa .Lrol8(%rip),%ymm8 8249 vpaddd %ymm7,%ymm3,%ymm3 8250 vpaddd %ymm6,%ymm2,%ymm2 8251 addq 0+16(%rdi),%r10 8252 adcq 8+16(%rdi),%r11 8253 adcq $1,%r12 8254 vpaddd %ymm5,%ymm1,%ymm1 8255 vpaddd %ymm4,%ymm0,%ymm0 8256 vpxor %ymm3,%ymm15,%ymm15 8257 vpxor %ymm2,%ymm14,%ymm14 8258 vpxor %ymm1,%ymm13,%ymm13 8259 vpxor %ymm0,%ymm12,%ymm12 8260 vpshufb %ymm8,%ymm15,%ymm15 8261 vpshufb %ymm8,%ymm14,%ymm14 8262 vpshufb %ymm8,%ymm13,%ymm13 8263 vpshufb %ymm8,%ymm12,%ymm12 8264 vpaddd %ymm15,%ymm11,%ymm11 8265 vpaddd %ymm14,%ymm10,%ymm10 8266 vpaddd %ymm13,%ymm9,%ymm9 8267 vpaddd 0+128(%rbp),%ymm12,%ymm8 8268 vpxor %ymm11,%ymm7,%ymm7 8269 vpxor %ymm10,%ymm6,%ymm6 8270 vpxor %ymm9,%ymm5,%ymm5 8271 vpxor %ymm8,%ymm4,%ymm4 8272 vmovdqa %ymm8,0+128(%rbp) 8273 vpsrld $25,%ymm7,%ymm8 8274 movq 0+0+0(%rbp),%rdx 8275 movq %rdx,%r15 8276 mulxq %r10,%r13,%r14 8277 mulxq %r11,%rax,%rdx 8278 imulq %r12,%r15 8279 addq %rax,%r14 8280 adcq %rdx,%r15 8281 vpslld $32-25,%ymm7,%ymm7 8282 vpxor %ymm8,%ymm7,%ymm7 8283 vpsrld $25,%ymm6,%ymm8 8284 vpslld $32-25,%ymm6,%ymm6 8285 vpxor %ymm8,%ymm6,%ymm6 8286 vpsrld $25,%ymm5,%ymm8 8287 vpslld $32-25,%ymm5,%ymm5 8288 vpxor %ymm8,%ymm5,%ymm5 8289 vpsrld $25,%ymm4,%ymm8 8290 vpslld $32-25,%ymm4,%ymm4 8291 vpxor %ymm8,%ymm4,%ymm4 8292 vmovdqa 0+128(%rbp),%ymm8 8293 vpalignr $12,%ymm7,%ymm7,%ymm7 8294 vpalignr $8,%ymm11,%ymm11,%ymm11 8295 vpalignr $4,%ymm15,%ymm15,%ymm15 8296 vpalignr $12,%ymm6,%ymm6,%ymm6 8297 vpalignr $8,%ymm10,%ymm10,%ymm10 8298 vpalignr $4,%ymm14,%ymm14,%ymm14 8299 vpalignr $12,%ymm5,%ymm5,%ymm5 8300 vpalignr $8,%ymm9,%ymm9,%ymm9 8301 movq 8+0+0(%rbp),%rdx 8302 mulxq %r10,%r10,%rax 8303 addq %r10,%r14 8304 mulxq %r11,%r11,%r9 8305 adcq %r11,%r15 8306 adcq $0,%r9 8307 imulq %r12,%rdx 8308 vpalignr $4,%ymm13,%ymm13,%ymm13 8309 vpalignr $12,%ymm4,%ymm4,%ymm4 8310 vpalignr $8,%ymm8,%ymm8,%ymm8 8311 vpalignr $4,%ymm12,%ymm12,%ymm12 8312 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 addq %rax,%r15 8329 adcq %rdx,%r9 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 movq %r13,%r10 8351 movq %r14,%r11 8352 movq %r15,%r12 8353 andq $3,%r12 8354 movq %r15,%r13 8355 andq $-4,%r13 8356 movq %r9,%r14 8357 shrdq $2,%r9,%r15 8358 shrq $2,%r9 8359 addq %r13,%r15 8360 adcq %r14,%r9 8361 addq %r15,%r10 8362 adcq %r9,%r11 8363 adcq $0,%r12 8364 8365 leaq 32(%rdi),%rdi 8366 decq %rcx 8367 jg .Lseal_avx2_tail_512_rounds_and_3xhash 8368 decq %r8 8369 jge .Lseal_avx2_tail_512_rounds_and_2xhash 8370 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 8371 vpaddd 0+64(%rbp),%ymm7,%ymm7 8372 vpaddd 0+96(%rbp),%ymm11,%ymm11 8373 vpaddd 0+256(%rbp),%ymm15,%ymm15 8374 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 8375 vpaddd 0+64(%rbp),%ymm6,%ymm6 8376 vpaddd 0+96(%rbp),%ymm10,%ymm10 8377 vpaddd 0+224(%rbp),%ymm14,%ymm14 8378 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 8379 vpaddd 0+64(%rbp),%ymm5,%ymm5 8380 vpaddd 0+96(%rbp),%ymm9,%ymm9 8381 vpaddd 0+192(%rbp),%ymm13,%ymm13 8382 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 8383 vpaddd 0+64(%rbp),%ymm4,%ymm4 8384 vpaddd 0+96(%rbp),%ymm8,%ymm8 8385 vpaddd 0+160(%rbp),%ymm12,%ymm12 8386 8387 vmovdqa %ymm0,0+128(%rbp) 8388 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 8389 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 8390 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 8391 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 8392 vpxor 0+0(%rsi),%ymm0,%ymm0 8393 vpxor 32+0(%rsi),%ymm3,%ymm3 8394 vpxor 64+0(%rsi),%ymm7,%ymm7 8395 vpxor 96+0(%rsi),%ymm11,%ymm11 8396 vmovdqu %ymm0,0+0(%rdi) 8397 vmovdqu %ymm3,32+0(%rdi) 8398 vmovdqu %ymm7,64+0(%rdi) 8399 vmovdqu %ymm11,96+0(%rdi) 8400 8401 vmovdqa 0+128(%rbp),%ymm0 8402 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8403 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8404 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8405 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8406 vpxor 0+128(%rsi),%ymm3,%ymm3 8407 vpxor 32+128(%rsi),%ymm2,%ymm2 8408 vpxor 64+128(%rsi),%ymm6,%ymm6 8409 vpxor 96+128(%rsi),%ymm10,%ymm10 8410 vmovdqu %ymm3,0+128(%rdi) 8411 vmovdqu %ymm2,32+128(%rdi) 8412 vmovdqu %ymm6,64+128(%rdi) 8413 vmovdqu %ymm10,96+128(%rdi) 8414 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8415 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8416 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8417 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8418 vpxor 0+256(%rsi),%ymm3,%ymm3 8419 vpxor 32+256(%rsi),%ymm1,%ymm1 8420 vpxor 64+256(%rsi),%ymm5,%ymm5 8421 vpxor 96+256(%rsi),%ymm9,%ymm9 8422 vmovdqu %ymm3,0+256(%rdi) 8423 vmovdqu %ymm1,32+256(%rdi) 8424 vmovdqu %ymm5,64+256(%rdi) 8425 vmovdqu %ymm9,96+256(%rdi) 8426 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8427 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8428 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8429 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8430 vmovdqa %ymm3,%ymm8 8431 8432 movq $384,%rcx 8433 leaq 384(%rsi),%rsi 8434 subq $384,%rbx 8435 jmp .Lseal_avx2_short_hash_remainder 8436 8437.Lseal_avx2_320: 8438 vmovdqa %ymm0,%ymm1 8439 vmovdqa %ymm0,%ymm2 8440 vmovdqa %ymm4,%ymm5 8441 vmovdqa %ymm4,%ymm6 8442 vmovdqa %ymm8,%ymm9 8443 vmovdqa %ymm8,%ymm10 8444 vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 8445 vpaddd .Lavx2_inc(%rip),%ymm13,%ymm14 8446 vmovdqa %ymm4,%ymm7 8447 vmovdqa %ymm8,%ymm11 8448 vmovdqa %ymm12,0+160(%rbp) 8449 vmovdqa %ymm13,0+192(%rbp) 8450 vmovdqa %ymm14,0+224(%rbp) 8451 movq $10,%r10 8452.Lseal_avx2_320_rounds: 8453 vpaddd %ymm4,%ymm0,%ymm0 8454 vpxor %ymm0,%ymm12,%ymm12 8455 vpshufb .Lrol16(%rip),%ymm12,%ymm12 8456 vpaddd %ymm12,%ymm8,%ymm8 8457 vpxor %ymm8,%ymm4,%ymm4 8458 vpsrld $20,%ymm4,%ymm3 8459 vpslld $12,%ymm4,%ymm4 8460 vpxor %ymm3,%ymm4,%ymm4 8461 vpaddd %ymm4,%ymm0,%ymm0 8462 vpxor %ymm0,%ymm12,%ymm12 8463 vpshufb .Lrol8(%rip),%ymm12,%ymm12 8464 vpaddd %ymm12,%ymm8,%ymm8 8465 vpxor %ymm8,%ymm4,%ymm4 8466 vpslld $7,%ymm4,%ymm3 8467 vpsrld $25,%ymm4,%ymm4 8468 vpxor %ymm3,%ymm4,%ymm4 8469 vpalignr $12,%ymm12,%ymm12,%ymm12 8470 vpalignr $8,%ymm8,%ymm8,%ymm8 8471 vpalignr $4,%ymm4,%ymm4,%ymm4 8472 vpaddd %ymm5,%ymm1,%ymm1 8473 vpxor %ymm1,%ymm13,%ymm13 8474 vpshufb .Lrol16(%rip),%ymm13,%ymm13 8475 vpaddd %ymm13,%ymm9,%ymm9 8476 vpxor %ymm9,%ymm5,%ymm5 8477 vpsrld $20,%ymm5,%ymm3 8478 vpslld $12,%ymm5,%ymm5 8479 vpxor %ymm3,%ymm5,%ymm5 8480 vpaddd %ymm5,%ymm1,%ymm1 8481 vpxor %ymm1,%ymm13,%ymm13 8482 vpshufb .Lrol8(%rip),%ymm13,%ymm13 8483 vpaddd %ymm13,%ymm9,%ymm9 8484 vpxor %ymm9,%ymm5,%ymm5 8485 vpslld $7,%ymm5,%ymm3 8486 vpsrld $25,%ymm5,%ymm5 8487 vpxor %ymm3,%ymm5,%ymm5 8488 vpalignr $12,%ymm13,%ymm13,%ymm13 8489 vpalignr $8,%ymm9,%ymm9,%ymm9 8490 vpalignr $4,%ymm5,%ymm5,%ymm5 8491 vpaddd %ymm6,%ymm2,%ymm2 8492 vpxor %ymm2,%ymm14,%ymm14 8493 vpshufb .Lrol16(%rip),%ymm14,%ymm14 8494 vpaddd %ymm14,%ymm10,%ymm10 8495 vpxor %ymm10,%ymm6,%ymm6 8496 vpsrld $20,%ymm6,%ymm3 8497 vpslld $12,%ymm6,%ymm6 8498 vpxor %ymm3,%ymm6,%ymm6 8499 vpaddd %ymm6,%ymm2,%ymm2 8500 vpxor %ymm2,%ymm14,%ymm14 8501 vpshufb .Lrol8(%rip),%ymm14,%ymm14 8502 vpaddd %ymm14,%ymm10,%ymm10 8503 vpxor %ymm10,%ymm6,%ymm6 8504 vpslld $7,%ymm6,%ymm3 8505 vpsrld $25,%ymm6,%ymm6 8506 vpxor %ymm3,%ymm6,%ymm6 8507 vpalignr $12,%ymm14,%ymm14,%ymm14 8508 vpalignr $8,%ymm10,%ymm10,%ymm10 8509 vpalignr $4,%ymm6,%ymm6,%ymm6 8510 vpaddd %ymm4,%ymm0,%ymm0 8511 vpxor %ymm0,%ymm12,%ymm12 8512 vpshufb .Lrol16(%rip),%ymm12,%ymm12 8513 vpaddd %ymm12,%ymm8,%ymm8 8514 vpxor %ymm8,%ymm4,%ymm4 8515 vpsrld $20,%ymm4,%ymm3 8516 vpslld $12,%ymm4,%ymm4 8517 vpxor %ymm3,%ymm4,%ymm4 8518 vpaddd %ymm4,%ymm0,%ymm0 8519 vpxor %ymm0,%ymm12,%ymm12 8520 vpshufb .Lrol8(%rip),%ymm12,%ymm12 8521 vpaddd %ymm12,%ymm8,%ymm8 8522 vpxor %ymm8,%ymm4,%ymm4 8523 vpslld $7,%ymm4,%ymm3 8524 vpsrld $25,%ymm4,%ymm4 8525 vpxor %ymm3,%ymm4,%ymm4 8526 vpalignr $4,%ymm12,%ymm12,%ymm12 8527 vpalignr $8,%ymm8,%ymm8,%ymm8 8528 vpalignr $12,%ymm4,%ymm4,%ymm4 8529 vpaddd %ymm5,%ymm1,%ymm1 8530 vpxor %ymm1,%ymm13,%ymm13 8531 vpshufb .Lrol16(%rip),%ymm13,%ymm13 8532 vpaddd %ymm13,%ymm9,%ymm9 8533 vpxor %ymm9,%ymm5,%ymm5 8534 vpsrld $20,%ymm5,%ymm3 8535 vpslld $12,%ymm5,%ymm5 8536 vpxor %ymm3,%ymm5,%ymm5 8537 vpaddd %ymm5,%ymm1,%ymm1 8538 vpxor %ymm1,%ymm13,%ymm13 8539 vpshufb .Lrol8(%rip),%ymm13,%ymm13 8540 vpaddd %ymm13,%ymm9,%ymm9 8541 vpxor %ymm9,%ymm5,%ymm5 8542 vpslld $7,%ymm5,%ymm3 8543 vpsrld $25,%ymm5,%ymm5 8544 vpxor %ymm3,%ymm5,%ymm5 8545 vpalignr $4,%ymm13,%ymm13,%ymm13 8546 vpalignr $8,%ymm9,%ymm9,%ymm9 8547 vpalignr $12,%ymm5,%ymm5,%ymm5 8548 vpaddd %ymm6,%ymm2,%ymm2 8549 vpxor %ymm2,%ymm14,%ymm14 8550 vpshufb .Lrol16(%rip),%ymm14,%ymm14 8551 vpaddd %ymm14,%ymm10,%ymm10 8552 vpxor %ymm10,%ymm6,%ymm6 8553 vpsrld $20,%ymm6,%ymm3 8554 vpslld $12,%ymm6,%ymm6 8555 vpxor %ymm3,%ymm6,%ymm6 8556 vpaddd %ymm6,%ymm2,%ymm2 8557 vpxor %ymm2,%ymm14,%ymm14 8558 vpshufb .Lrol8(%rip),%ymm14,%ymm14 8559 vpaddd %ymm14,%ymm10,%ymm10 8560 vpxor %ymm10,%ymm6,%ymm6 8561 vpslld $7,%ymm6,%ymm3 8562 vpsrld $25,%ymm6,%ymm6 8563 vpxor %ymm3,%ymm6,%ymm6 8564 vpalignr $4,%ymm14,%ymm14,%ymm14 8565 vpalignr $8,%ymm10,%ymm10,%ymm10 8566 vpalignr $12,%ymm6,%ymm6,%ymm6 8567 8568 decq %r10 8569 jne .Lseal_avx2_320_rounds 8570 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 8571 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 8572 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 8573 vpaddd %ymm7,%ymm4,%ymm4 8574 vpaddd %ymm7,%ymm5,%ymm5 8575 vpaddd %ymm7,%ymm6,%ymm6 8576 vpaddd %ymm11,%ymm8,%ymm8 8577 vpaddd %ymm11,%ymm9,%ymm9 8578 vpaddd %ymm11,%ymm10,%ymm10 8579 vpaddd 0+160(%rbp),%ymm12,%ymm12 8580 vpaddd 0+192(%rbp),%ymm13,%ymm13 8581 vpaddd 0+224(%rbp),%ymm14,%ymm14 8582 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8583 8584 vpand .Lclamp(%rip),%ymm3,%ymm3 8585 vmovdqa %ymm3,0+0(%rbp) 8586 8587 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8588 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8589 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8590 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8591 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8592 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8593 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 8594 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 8595 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 8596 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 8597 jmp .Lseal_avx2_short 8598 8599.Lseal_avx2_192: 8600 vmovdqa %ymm0,%ymm1 8601 vmovdqa %ymm0,%ymm2 8602 vmovdqa %ymm4,%ymm5 8603 vmovdqa %ymm4,%ymm6 8604 vmovdqa %ymm8,%ymm9 8605 vmovdqa %ymm8,%ymm10 8606 vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 8607 vmovdqa %ymm12,%ymm11 8608 vmovdqa %ymm13,%ymm15 8609 movq $10,%r10 8610.Lseal_avx2_192_rounds: 8611 vpaddd %ymm4,%ymm0,%ymm0 8612 vpxor %ymm0,%ymm12,%ymm12 8613 vpshufb .Lrol16(%rip),%ymm12,%ymm12 8614 vpaddd %ymm12,%ymm8,%ymm8 8615 vpxor %ymm8,%ymm4,%ymm4 8616 vpsrld $20,%ymm4,%ymm3 8617 vpslld $12,%ymm4,%ymm4 8618 vpxor %ymm3,%ymm4,%ymm4 8619 vpaddd %ymm4,%ymm0,%ymm0 8620 vpxor %ymm0,%ymm12,%ymm12 8621 vpshufb .Lrol8(%rip),%ymm12,%ymm12 8622 vpaddd %ymm12,%ymm8,%ymm8 8623 vpxor %ymm8,%ymm4,%ymm4 8624 vpslld $7,%ymm4,%ymm3 8625 vpsrld $25,%ymm4,%ymm4 8626 vpxor %ymm3,%ymm4,%ymm4 8627 vpalignr $12,%ymm12,%ymm12,%ymm12 8628 vpalignr $8,%ymm8,%ymm8,%ymm8 8629 vpalignr $4,%ymm4,%ymm4,%ymm4 8630 vpaddd %ymm5,%ymm1,%ymm1 8631 vpxor %ymm1,%ymm13,%ymm13 8632 vpshufb .Lrol16(%rip),%ymm13,%ymm13 8633 vpaddd %ymm13,%ymm9,%ymm9 8634 vpxor %ymm9,%ymm5,%ymm5 8635 vpsrld $20,%ymm5,%ymm3 8636 vpslld $12,%ymm5,%ymm5 8637 vpxor %ymm3,%ymm5,%ymm5 8638 vpaddd %ymm5,%ymm1,%ymm1 8639 vpxor %ymm1,%ymm13,%ymm13 8640 vpshufb .Lrol8(%rip),%ymm13,%ymm13 8641 vpaddd %ymm13,%ymm9,%ymm9 8642 vpxor %ymm9,%ymm5,%ymm5 8643 vpslld $7,%ymm5,%ymm3 8644 vpsrld $25,%ymm5,%ymm5 8645 vpxor %ymm3,%ymm5,%ymm5 8646 vpalignr $12,%ymm13,%ymm13,%ymm13 8647 vpalignr $8,%ymm9,%ymm9,%ymm9 8648 vpalignr $4,%ymm5,%ymm5,%ymm5 8649 vpaddd %ymm4,%ymm0,%ymm0 8650 vpxor %ymm0,%ymm12,%ymm12 8651 vpshufb .Lrol16(%rip),%ymm12,%ymm12 8652 vpaddd %ymm12,%ymm8,%ymm8 8653 vpxor %ymm8,%ymm4,%ymm4 8654 vpsrld $20,%ymm4,%ymm3 8655 vpslld $12,%ymm4,%ymm4 8656 vpxor %ymm3,%ymm4,%ymm4 8657 vpaddd %ymm4,%ymm0,%ymm0 8658 vpxor %ymm0,%ymm12,%ymm12 8659 vpshufb .Lrol8(%rip),%ymm12,%ymm12 8660 vpaddd %ymm12,%ymm8,%ymm8 8661 vpxor %ymm8,%ymm4,%ymm4 8662 vpslld $7,%ymm4,%ymm3 8663 vpsrld $25,%ymm4,%ymm4 8664 vpxor %ymm3,%ymm4,%ymm4 8665 vpalignr $4,%ymm12,%ymm12,%ymm12 8666 vpalignr $8,%ymm8,%ymm8,%ymm8 8667 vpalignr $12,%ymm4,%ymm4,%ymm4 8668 vpaddd %ymm5,%ymm1,%ymm1 8669 vpxor %ymm1,%ymm13,%ymm13 8670 vpshufb .Lrol16(%rip),%ymm13,%ymm13 8671 vpaddd %ymm13,%ymm9,%ymm9 8672 vpxor %ymm9,%ymm5,%ymm5 8673 vpsrld $20,%ymm5,%ymm3 8674 vpslld $12,%ymm5,%ymm5 8675 vpxor %ymm3,%ymm5,%ymm5 8676 vpaddd %ymm5,%ymm1,%ymm1 8677 vpxor %ymm1,%ymm13,%ymm13 8678 vpshufb .Lrol8(%rip),%ymm13,%ymm13 8679 vpaddd %ymm13,%ymm9,%ymm9 8680 vpxor %ymm9,%ymm5,%ymm5 8681 vpslld $7,%ymm5,%ymm3 8682 vpsrld $25,%ymm5,%ymm5 8683 vpxor %ymm3,%ymm5,%ymm5 8684 vpalignr $4,%ymm13,%ymm13,%ymm13 8685 vpalignr $8,%ymm9,%ymm9,%ymm9 8686 vpalignr $12,%ymm5,%ymm5,%ymm5 8687 8688 decq %r10 8689 jne .Lseal_avx2_192_rounds 8690 vpaddd %ymm2,%ymm0,%ymm0 8691 vpaddd %ymm2,%ymm1,%ymm1 8692 vpaddd %ymm6,%ymm4,%ymm4 8693 vpaddd %ymm6,%ymm5,%ymm5 8694 vpaddd %ymm10,%ymm8,%ymm8 8695 vpaddd %ymm10,%ymm9,%ymm9 8696 vpaddd %ymm11,%ymm12,%ymm12 8697 vpaddd %ymm15,%ymm13,%ymm13 8698 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8699 8700 vpand .Lclamp(%rip),%ymm3,%ymm3 8701 vmovdqa %ymm3,0+0(%rbp) 8702 8703 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8704 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8705 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8706 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8707 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8708 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8709.Lseal_avx2_short: 8710 movq %r8,%r8 8711 call poly_hash_ad_internal 8712 xorq %rcx,%rcx 8713.Lseal_avx2_short_hash_remainder: 8714 cmpq $16,%rcx 8715 jb .Lseal_avx2_short_loop 8716 addq 0+0(%rdi),%r10 8717 adcq 8+0(%rdi),%r11 8718 adcq $1,%r12 8719 movq 0+0+0(%rbp),%rax 8720 movq %rax,%r15 8721 mulq %r10 8722 movq %rax,%r13 8723 movq %rdx,%r14 8724 movq 0+0+0(%rbp),%rax 8725 mulq %r11 8726 imulq %r12,%r15 8727 addq %rax,%r14 8728 adcq %rdx,%r15 8729 movq 8+0+0(%rbp),%rax 8730 movq %rax,%r9 8731 mulq %r10 8732 addq %rax,%r14 8733 adcq $0,%rdx 8734 movq %rdx,%r10 8735 movq 8+0+0(%rbp),%rax 8736 mulq %r11 8737 addq %rax,%r15 8738 adcq $0,%rdx 8739 imulq %r12,%r9 8740 addq %r10,%r15 8741 adcq %rdx,%r9 8742 movq %r13,%r10 8743 movq %r14,%r11 8744 movq %r15,%r12 8745 andq $3,%r12 8746 movq %r15,%r13 8747 andq $-4,%r13 8748 movq %r9,%r14 8749 shrdq $2,%r9,%r15 8750 shrq $2,%r9 8751 addq %r13,%r15 8752 adcq %r14,%r9 8753 addq %r15,%r10 8754 adcq %r9,%r11 8755 adcq $0,%r12 8756 8757 subq $16,%rcx 8758 addq $16,%rdi 8759 jmp .Lseal_avx2_short_hash_remainder 8760.Lseal_avx2_short_loop: 8761 cmpq $32,%rbx 8762 jb .Lseal_avx2_short_tail 8763 subq $32,%rbx 8764 8765 vpxor (%rsi),%ymm0,%ymm0 8766 vmovdqu %ymm0,(%rdi) 8767 leaq 32(%rsi),%rsi 8768 8769 addq 0+0(%rdi),%r10 8770 adcq 8+0(%rdi),%r11 8771 adcq $1,%r12 8772 movq 0+0+0(%rbp),%rax 8773 movq %rax,%r15 8774 mulq %r10 8775 movq %rax,%r13 8776 movq %rdx,%r14 8777 movq 0+0+0(%rbp),%rax 8778 mulq %r11 8779 imulq %r12,%r15 8780 addq %rax,%r14 8781 adcq %rdx,%r15 8782 movq 8+0+0(%rbp),%rax 8783 movq %rax,%r9 8784 mulq %r10 8785 addq %rax,%r14 8786 adcq $0,%rdx 8787 movq %rdx,%r10 8788 movq 8+0+0(%rbp),%rax 8789 mulq %r11 8790 addq %rax,%r15 8791 adcq $0,%rdx 8792 imulq %r12,%r9 8793 addq %r10,%r15 8794 adcq %rdx,%r9 8795 movq %r13,%r10 8796 movq %r14,%r11 8797 movq %r15,%r12 8798 andq $3,%r12 8799 movq %r15,%r13 8800 andq $-4,%r13 8801 movq %r9,%r14 8802 shrdq $2,%r9,%r15 8803 shrq $2,%r9 8804 addq %r13,%r15 8805 adcq %r14,%r9 8806 addq %r15,%r10 8807 adcq %r9,%r11 8808 adcq $0,%r12 8809 addq 0+16(%rdi),%r10 8810 adcq 8+16(%rdi),%r11 8811 adcq $1,%r12 8812 movq 0+0+0(%rbp),%rax 8813 movq %rax,%r15 8814 mulq %r10 8815 movq %rax,%r13 8816 movq %rdx,%r14 8817 movq 0+0+0(%rbp),%rax 8818 mulq %r11 8819 imulq %r12,%r15 8820 addq %rax,%r14 8821 adcq %rdx,%r15 8822 movq 8+0+0(%rbp),%rax 8823 movq %rax,%r9 8824 mulq %r10 8825 addq %rax,%r14 8826 adcq $0,%rdx 8827 movq %rdx,%r10 8828 movq 8+0+0(%rbp),%rax 8829 mulq %r11 8830 addq %rax,%r15 8831 adcq $0,%rdx 8832 imulq %r12,%r9 8833 addq %r10,%r15 8834 adcq %rdx,%r9 8835 movq %r13,%r10 8836 movq %r14,%r11 8837 movq %r15,%r12 8838 andq $3,%r12 8839 movq %r15,%r13 8840 andq $-4,%r13 8841 movq %r9,%r14 8842 shrdq $2,%r9,%r15 8843 shrq $2,%r9 8844 addq %r13,%r15 8845 adcq %r14,%r9 8846 addq %r15,%r10 8847 adcq %r9,%r11 8848 adcq $0,%r12 8849 8850 leaq 32(%rdi),%rdi 8851 8852 vmovdqa %ymm4,%ymm0 8853 vmovdqa %ymm8,%ymm4 8854 vmovdqa %ymm12,%ymm8 8855 vmovdqa %ymm1,%ymm12 8856 vmovdqa %ymm5,%ymm1 8857 vmovdqa %ymm9,%ymm5 8858 vmovdqa %ymm13,%ymm9 8859 vmovdqa %ymm2,%ymm13 8860 vmovdqa %ymm6,%ymm2 8861 jmp .Lseal_avx2_short_loop 8862.Lseal_avx2_short_tail: 8863 cmpq $16,%rbx 8864 jb .Lseal_avx2_exit 8865 subq $16,%rbx 8866 vpxor (%rsi),%xmm0,%xmm3 8867 vmovdqu %xmm3,(%rdi) 8868 leaq 16(%rsi),%rsi 8869 addq 0+0(%rdi),%r10 8870 adcq 8+0(%rdi),%r11 8871 adcq $1,%r12 8872 movq 0+0+0(%rbp),%rax 8873 movq %rax,%r15 8874 mulq %r10 8875 movq %rax,%r13 8876 movq %rdx,%r14 8877 movq 0+0+0(%rbp),%rax 8878 mulq %r11 8879 imulq %r12,%r15 8880 addq %rax,%r14 8881 adcq %rdx,%r15 8882 movq 8+0+0(%rbp),%rax 8883 movq %rax,%r9 8884 mulq %r10 8885 addq %rax,%r14 8886 adcq $0,%rdx 8887 movq %rdx,%r10 8888 movq 8+0+0(%rbp),%rax 8889 mulq %r11 8890 addq %rax,%r15 8891 adcq $0,%rdx 8892 imulq %r12,%r9 8893 addq %r10,%r15 8894 adcq %rdx,%r9 8895 movq %r13,%r10 8896 movq %r14,%r11 8897 movq %r15,%r12 8898 andq $3,%r12 8899 movq %r15,%r13 8900 andq $-4,%r13 8901 movq %r9,%r14 8902 shrdq $2,%r9,%r15 8903 shrq $2,%r9 8904 addq %r13,%r15 8905 adcq %r14,%r9 8906 addq %r15,%r10 8907 adcq %r9,%r11 8908 adcq $0,%r12 8909 8910 leaq 16(%rdi),%rdi 8911 vextracti128 $1,%ymm0,%xmm0 8912.Lseal_avx2_exit: 8913 vzeroupper 8914 jmp .Lseal_sse_tail_16 8915.cfi_endproc 8916.size chacha20_poly1305_seal_avx2, .-chacha20_poly1305_seal_avx2 8917#endif 8918.section .note.GNU-stack,"",@progbits 8919