1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#include "ring_core_generated/prefix_symbols_asm.h" 12.text 13.extern OPENSSL_ia32cap_P 14.hidden OPENSSL_ia32cap_P 15 16chacha20_poly1305_constants: 17 18.align 64 19.Lchacha20_consts: 20.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 21.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 22.Lrol8: 23.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 24.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 25.Lrol16: 26.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 27.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 28.Lavx2_init: 29.long 0,0,0,0 30.Lsse_inc: 31.long 1,0,0,0 32.Lavx2_inc: 33.long 2,0,0,0,2,0,0,0 34.Lclamp: 35.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC 36.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF 37.align 16 38.Land_masks: 39.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 40.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 41.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 42.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 43.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 44.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 45.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 46.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 47.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 48.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 49.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 50.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 51.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 52.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 53.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 54.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff 55 56.type poly_hash_ad_internal,@function 57.align 64 58poly_hash_ad_internal: 59.cfi_startproc 60.cfi_def_cfa rsp, 8 61 xorq %r10,%r10 62 xorq %r11,%r11 63 xorq %r12,%r12 64 cmpq $13,%r8 65 jne .Lhash_ad_loop 66.Lpoly_fast_tls_ad: 67 68 movq (%rcx),%r10 69 movq 5(%rcx),%r11 70 shrq $24,%r11 71 movq $1,%r12 72 movq 0+0+0(%rbp),%rax 73 movq %rax,%r15 74 mulq %r10 75 movq %rax,%r13 76 movq %rdx,%r14 77 movq 0+0+0(%rbp),%rax 78 mulq %r11 79 imulq %r12,%r15 80 addq %rax,%r14 81 adcq %rdx,%r15 82 movq 8+0+0(%rbp),%rax 83 movq %rax,%r9 84 mulq %r10 85 addq %rax,%r14 86 adcq $0,%rdx 87 movq %rdx,%r10 88 movq 8+0+0(%rbp),%rax 89 mulq %r11 90 addq %rax,%r15 91 adcq $0,%rdx 92 imulq %r12,%r9 93 addq %r10,%r15 94 adcq %rdx,%r9 95 movq %r13,%r10 96 movq %r14,%r11 97 movq %r15,%r12 98 andq $3,%r12 99 movq %r15,%r13 100 andq $-4,%r13 101 movq %r9,%r14 102 shrdq $2,%r9,%r15 103 shrq $2,%r9 104 addq %r13,%r15 105 adcq %r14,%r9 106 addq %r15,%r10 107 adcq %r9,%r11 108 adcq $0,%r12 109 110 .byte 0xf3,0xc3 111.Lhash_ad_loop: 112 113 cmpq $16,%r8 114 jb .Lhash_ad_tail 115 addq 0+0(%rcx),%r10 116 adcq 8+0(%rcx),%r11 117 adcq $1,%r12 118 movq 0+0+0(%rbp),%rax 119 movq %rax,%r15 120 mulq %r10 121 movq %rax,%r13 122 movq %rdx,%r14 123 movq 0+0+0(%rbp),%rax 124 mulq %r11 125 imulq %r12,%r15 126 addq %rax,%r14 127 adcq %rdx,%r15 128 movq 8+0+0(%rbp),%rax 129 movq %rax,%r9 130 mulq %r10 131 addq %rax,%r14 132 adcq $0,%rdx 133 movq %rdx,%r10 134 movq 8+0+0(%rbp),%rax 135 mulq %r11 136 addq %rax,%r15 137 adcq $0,%rdx 138 imulq %r12,%r9 139 addq %r10,%r15 140 adcq %rdx,%r9 141 movq %r13,%r10 142 movq %r14,%r11 143 movq %r15,%r12 144 andq $3,%r12 145 movq %r15,%r13 146 andq $-4,%r13 147 movq %r9,%r14 148 shrdq $2,%r9,%r15 149 shrq $2,%r9 150 addq %r13,%r15 151 adcq %r14,%r9 152 addq %r15,%r10 153 adcq %r9,%r11 154 adcq $0,%r12 155 156 leaq 16(%rcx),%rcx 157 subq $16,%r8 158 jmp .Lhash_ad_loop 159.Lhash_ad_tail: 160 cmpq $0,%r8 161 je .Lhash_ad_done 162 163 xorq %r13,%r13 164 xorq %r14,%r14 165 xorq %r15,%r15 166 addq %r8,%rcx 167.Lhash_ad_tail_loop: 168 shldq $8,%r13,%r14 169 shlq $8,%r13 170 movzbq -1(%rcx),%r15 171 xorq %r15,%r13 172 decq %rcx 173 decq %r8 174 jne .Lhash_ad_tail_loop 175 176 addq %r13,%r10 177 adcq %r14,%r11 178 adcq $1,%r12 179 movq 0+0+0(%rbp),%rax 180 movq %rax,%r15 181 mulq %r10 182 movq %rax,%r13 183 movq %rdx,%r14 184 movq 0+0+0(%rbp),%rax 185 mulq %r11 186 imulq %r12,%r15 187 addq %rax,%r14 188 adcq %rdx,%r15 189 movq 8+0+0(%rbp),%rax 190 movq %rax,%r9 191 mulq %r10 192 addq %rax,%r14 193 adcq $0,%rdx 194 movq %rdx,%r10 195 movq 8+0+0(%rbp),%rax 196 mulq %r11 197 addq %rax,%r15 198 adcq $0,%rdx 199 imulq %r12,%r9 200 addq %r10,%r15 201 adcq %rdx,%r9 202 movq %r13,%r10 203 movq %r14,%r11 204 movq %r15,%r12 205 andq $3,%r12 206 movq %r15,%r13 207 andq $-4,%r13 208 movq %r9,%r14 209 shrdq $2,%r9,%r15 210 shrq $2,%r9 211 addq %r13,%r15 212 adcq %r14,%r9 213 addq %r15,%r10 214 adcq %r9,%r11 215 adcq $0,%r12 216 217 218.Lhash_ad_done: 219 .byte 0xf3,0xc3 220.cfi_endproc 221.size poly_hash_ad_internal, .-poly_hash_ad_internal 222 223.globl chacha20_poly1305_open 224.hidden chacha20_poly1305_open 225.type chacha20_poly1305_open,@function 226.align 64 227chacha20_poly1305_open: 228.cfi_startproc 229 pushq %rbp 230.cfi_adjust_cfa_offset 8 231.cfi_offset %rbp,-16 232 pushq %rbx 233.cfi_adjust_cfa_offset 8 234.cfi_offset %rbx,-24 235 pushq %r12 236.cfi_adjust_cfa_offset 8 237.cfi_offset %r12,-32 238 pushq %r13 239.cfi_adjust_cfa_offset 8 240.cfi_offset %r13,-40 241 pushq %r14 242.cfi_adjust_cfa_offset 8 243.cfi_offset %r14,-48 244 pushq %r15 245.cfi_adjust_cfa_offset 8 246.cfi_offset %r15,-56 247 248 249 pushq %r9 250.cfi_adjust_cfa_offset 8 251.cfi_offset %r9,-64 252 subq $288 + 0 + 32,%rsp 253.cfi_adjust_cfa_offset 288 + 32 254 255 leaq 32(%rsp),%rbp 256 andq $-32,%rbp 257 258 movq %rdx,%rbx 259 movq %r8,0+0+32(%rbp) 260 movq %rbx,8+0+32(%rbp) 261 262 movl OPENSSL_ia32cap_P+8(%rip),%eax 263 andl $288,%eax 264 xorl $288,%eax 265 jz chacha20_poly1305_open_avx2 266 267 cmpq $128,%rbx 268 jbe .Lopen_sse_128 269 270 movdqa .Lchacha20_consts(%rip),%xmm0 271 movdqu 0(%r9),%xmm4 272 movdqu 16(%r9),%xmm8 273 movdqu 32(%r9),%xmm12 274 275 movdqa %xmm12,%xmm7 276 277 movdqa %xmm4,0+48(%rbp) 278 movdqa %xmm8,0+64(%rbp) 279 movdqa %xmm12,0+96(%rbp) 280 movq $10,%r10 281.Lopen_sse_init_rounds: 282 paddd %xmm4,%xmm0 283 pxor %xmm0,%xmm12 284 pshufb .Lrol16(%rip),%xmm12 285 paddd %xmm12,%xmm8 286 pxor %xmm8,%xmm4 287 movdqa %xmm4,%xmm3 288 pslld $12,%xmm3 289 psrld $20,%xmm4 290 pxor %xmm3,%xmm4 291 paddd %xmm4,%xmm0 292 pxor %xmm0,%xmm12 293 pshufb .Lrol8(%rip),%xmm12 294 paddd %xmm12,%xmm8 295 pxor %xmm8,%xmm4 296 movdqa %xmm4,%xmm3 297 pslld $7,%xmm3 298 psrld $25,%xmm4 299 pxor %xmm3,%xmm4 300.byte 102,15,58,15,228,4 301.byte 102,69,15,58,15,192,8 302.byte 102,69,15,58,15,228,12 303 paddd %xmm4,%xmm0 304 pxor %xmm0,%xmm12 305 pshufb .Lrol16(%rip),%xmm12 306 paddd %xmm12,%xmm8 307 pxor %xmm8,%xmm4 308 movdqa %xmm4,%xmm3 309 pslld $12,%xmm3 310 psrld $20,%xmm4 311 pxor %xmm3,%xmm4 312 paddd %xmm4,%xmm0 313 pxor %xmm0,%xmm12 314 pshufb .Lrol8(%rip),%xmm12 315 paddd %xmm12,%xmm8 316 pxor %xmm8,%xmm4 317 movdqa %xmm4,%xmm3 318 pslld $7,%xmm3 319 psrld $25,%xmm4 320 pxor %xmm3,%xmm4 321.byte 102,15,58,15,228,12 322.byte 102,69,15,58,15,192,8 323.byte 102,69,15,58,15,228,4 324 325 decq %r10 326 jne .Lopen_sse_init_rounds 327 328 paddd .Lchacha20_consts(%rip),%xmm0 329 paddd 0+48(%rbp),%xmm4 330 331 pand .Lclamp(%rip),%xmm0 332 movdqa %xmm0,0+0(%rbp) 333 movdqa %xmm4,0+16(%rbp) 334 335 movq %r8,%r8 336 call poly_hash_ad_internal 337.Lopen_sse_main_loop: 338 cmpq $256,%rbx 339 jb .Lopen_sse_tail 340 341 movdqa .Lchacha20_consts(%rip),%xmm0 342 movdqa 0+48(%rbp),%xmm4 343 movdqa 0+64(%rbp),%xmm8 344 movdqa %xmm0,%xmm1 345 movdqa %xmm4,%xmm5 346 movdqa %xmm8,%xmm9 347 movdqa %xmm0,%xmm2 348 movdqa %xmm4,%xmm6 349 movdqa %xmm8,%xmm10 350 movdqa %xmm0,%xmm3 351 movdqa %xmm4,%xmm7 352 movdqa %xmm8,%xmm11 353 movdqa 0+96(%rbp),%xmm15 354 paddd .Lsse_inc(%rip),%xmm15 355 movdqa %xmm15,%xmm14 356 paddd .Lsse_inc(%rip),%xmm14 357 movdqa %xmm14,%xmm13 358 paddd .Lsse_inc(%rip),%xmm13 359 movdqa %xmm13,%xmm12 360 paddd .Lsse_inc(%rip),%xmm12 361 movdqa %xmm12,0+96(%rbp) 362 movdqa %xmm13,0+112(%rbp) 363 movdqa %xmm14,0+128(%rbp) 364 movdqa %xmm15,0+144(%rbp) 365 366 367 368 movq $4,%rcx 369 movq %rsi,%r8 370.Lopen_sse_main_loop_rounds: 371 movdqa %xmm8,0+80(%rbp) 372 movdqa .Lrol16(%rip),%xmm8 373 paddd %xmm7,%xmm3 374 paddd %xmm6,%xmm2 375 paddd %xmm5,%xmm1 376 paddd %xmm4,%xmm0 377 pxor %xmm3,%xmm15 378 pxor %xmm2,%xmm14 379 pxor %xmm1,%xmm13 380 pxor %xmm0,%xmm12 381.byte 102,69,15,56,0,248 382.byte 102,69,15,56,0,240 383.byte 102,69,15,56,0,232 384.byte 102,69,15,56,0,224 385 movdqa 0+80(%rbp),%xmm8 386 paddd %xmm15,%xmm11 387 paddd %xmm14,%xmm10 388 paddd %xmm13,%xmm9 389 paddd %xmm12,%xmm8 390 pxor %xmm11,%xmm7 391 addq 0+0(%r8),%r10 392 adcq 8+0(%r8),%r11 393 adcq $1,%r12 394 395 leaq 16(%r8),%r8 396 pxor %xmm10,%xmm6 397 pxor %xmm9,%xmm5 398 pxor %xmm8,%xmm4 399 movdqa %xmm8,0+80(%rbp) 400 movdqa %xmm7,%xmm8 401 psrld $20,%xmm8 402 pslld $32-20,%xmm7 403 pxor %xmm8,%xmm7 404 movdqa %xmm6,%xmm8 405 psrld $20,%xmm8 406 pslld $32-20,%xmm6 407 pxor %xmm8,%xmm6 408 movdqa %xmm5,%xmm8 409 psrld $20,%xmm8 410 pslld $32-20,%xmm5 411 pxor %xmm8,%xmm5 412 movdqa %xmm4,%xmm8 413 psrld $20,%xmm8 414 pslld $32-20,%xmm4 415 pxor %xmm8,%xmm4 416 movq 0+0+0(%rbp),%rax 417 movq %rax,%r15 418 mulq %r10 419 movq %rax,%r13 420 movq %rdx,%r14 421 movq 0+0+0(%rbp),%rax 422 mulq %r11 423 imulq %r12,%r15 424 addq %rax,%r14 425 adcq %rdx,%r15 426 movdqa .Lrol8(%rip),%xmm8 427 paddd %xmm7,%xmm3 428 paddd %xmm6,%xmm2 429 paddd %xmm5,%xmm1 430 paddd %xmm4,%xmm0 431 pxor %xmm3,%xmm15 432 pxor %xmm2,%xmm14 433 pxor %xmm1,%xmm13 434 pxor %xmm0,%xmm12 435.byte 102,69,15,56,0,248 436.byte 102,69,15,56,0,240 437.byte 102,69,15,56,0,232 438.byte 102,69,15,56,0,224 439 movdqa 0+80(%rbp),%xmm8 440 paddd %xmm15,%xmm11 441 paddd %xmm14,%xmm10 442 paddd %xmm13,%xmm9 443 paddd %xmm12,%xmm8 444 pxor %xmm11,%xmm7 445 pxor %xmm10,%xmm6 446 movq 8+0+0(%rbp),%rax 447 movq %rax,%r9 448 mulq %r10 449 addq %rax,%r14 450 adcq $0,%rdx 451 movq %rdx,%r10 452 movq 8+0+0(%rbp),%rax 453 mulq %r11 454 addq %rax,%r15 455 adcq $0,%rdx 456 pxor %xmm9,%xmm5 457 pxor %xmm8,%xmm4 458 movdqa %xmm8,0+80(%rbp) 459 movdqa %xmm7,%xmm8 460 psrld $25,%xmm8 461 pslld $32-25,%xmm7 462 pxor %xmm8,%xmm7 463 movdqa %xmm6,%xmm8 464 psrld $25,%xmm8 465 pslld $32-25,%xmm6 466 pxor %xmm8,%xmm6 467 movdqa %xmm5,%xmm8 468 psrld $25,%xmm8 469 pslld $32-25,%xmm5 470 pxor %xmm8,%xmm5 471 movdqa %xmm4,%xmm8 472 psrld $25,%xmm8 473 pslld $32-25,%xmm4 474 pxor %xmm8,%xmm4 475 movdqa 0+80(%rbp),%xmm8 476 imulq %r12,%r9 477 addq %r10,%r15 478 adcq %rdx,%r9 479.byte 102,15,58,15,255,4 480.byte 102,69,15,58,15,219,8 481.byte 102,69,15,58,15,255,12 482.byte 102,15,58,15,246,4 483.byte 102,69,15,58,15,210,8 484.byte 102,69,15,58,15,246,12 485.byte 102,15,58,15,237,4 486.byte 102,69,15,58,15,201,8 487.byte 102,69,15,58,15,237,12 488.byte 102,15,58,15,228,4 489.byte 102,69,15,58,15,192,8 490.byte 102,69,15,58,15,228,12 491 movdqa %xmm8,0+80(%rbp) 492 movdqa .Lrol16(%rip),%xmm8 493 paddd %xmm7,%xmm3 494 paddd %xmm6,%xmm2 495 paddd %xmm5,%xmm1 496 paddd %xmm4,%xmm0 497 pxor %xmm3,%xmm15 498 pxor %xmm2,%xmm14 499 movq %r13,%r10 500 movq %r14,%r11 501 movq %r15,%r12 502 andq $3,%r12 503 movq %r15,%r13 504 andq $-4,%r13 505 movq %r9,%r14 506 shrdq $2,%r9,%r15 507 shrq $2,%r9 508 addq %r13,%r15 509 adcq %r14,%r9 510 addq %r15,%r10 511 adcq %r9,%r11 512 adcq $0,%r12 513 pxor %xmm1,%xmm13 514 pxor %xmm0,%xmm12 515.byte 102,69,15,56,0,248 516.byte 102,69,15,56,0,240 517.byte 102,69,15,56,0,232 518.byte 102,69,15,56,0,224 519 movdqa 0+80(%rbp),%xmm8 520 paddd %xmm15,%xmm11 521 paddd %xmm14,%xmm10 522 paddd %xmm13,%xmm9 523 paddd %xmm12,%xmm8 524 pxor %xmm11,%xmm7 525 pxor %xmm10,%xmm6 526 pxor %xmm9,%xmm5 527 pxor %xmm8,%xmm4 528 movdqa %xmm8,0+80(%rbp) 529 movdqa %xmm7,%xmm8 530 psrld $20,%xmm8 531 pslld $32-20,%xmm7 532 pxor %xmm8,%xmm7 533 movdqa %xmm6,%xmm8 534 psrld $20,%xmm8 535 pslld $32-20,%xmm6 536 pxor %xmm8,%xmm6 537 movdqa %xmm5,%xmm8 538 psrld $20,%xmm8 539 pslld $32-20,%xmm5 540 pxor %xmm8,%xmm5 541 movdqa %xmm4,%xmm8 542 psrld $20,%xmm8 543 pslld $32-20,%xmm4 544 pxor %xmm8,%xmm4 545 movdqa .Lrol8(%rip),%xmm8 546 paddd %xmm7,%xmm3 547 paddd %xmm6,%xmm2 548 paddd %xmm5,%xmm1 549 paddd %xmm4,%xmm0 550 pxor %xmm3,%xmm15 551 pxor %xmm2,%xmm14 552 pxor %xmm1,%xmm13 553 pxor %xmm0,%xmm12 554.byte 102,69,15,56,0,248 555.byte 102,69,15,56,0,240 556.byte 102,69,15,56,0,232 557.byte 102,69,15,56,0,224 558 movdqa 0+80(%rbp),%xmm8 559 paddd %xmm15,%xmm11 560 paddd %xmm14,%xmm10 561 paddd %xmm13,%xmm9 562 paddd %xmm12,%xmm8 563 pxor %xmm11,%xmm7 564 pxor %xmm10,%xmm6 565 pxor %xmm9,%xmm5 566 pxor %xmm8,%xmm4 567 movdqa %xmm8,0+80(%rbp) 568 movdqa %xmm7,%xmm8 569 psrld $25,%xmm8 570 pslld $32-25,%xmm7 571 pxor %xmm8,%xmm7 572 movdqa %xmm6,%xmm8 573 psrld $25,%xmm8 574 pslld $32-25,%xmm6 575 pxor %xmm8,%xmm6 576 movdqa %xmm5,%xmm8 577 psrld $25,%xmm8 578 pslld $32-25,%xmm5 579 pxor %xmm8,%xmm5 580 movdqa %xmm4,%xmm8 581 psrld $25,%xmm8 582 pslld $32-25,%xmm4 583 pxor %xmm8,%xmm4 584 movdqa 0+80(%rbp),%xmm8 585.byte 102,15,58,15,255,12 586.byte 102,69,15,58,15,219,8 587.byte 102,69,15,58,15,255,4 588.byte 102,15,58,15,246,12 589.byte 102,69,15,58,15,210,8 590.byte 102,69,15,58,15,246,4 591.byte 102,15,58,15,237,12 592.byte 102,69,15,58,15,201,8 593.byte 102,69,15,58,15,237,4 594.byte 102,15,58,15,228,12 595.byte 102,69,15,58,15,192,8 596.byte 102,69,15,58,15,228,4 597 598 decq %rcx 599 jge .Lopen_sse_main_loop_rounds 600 addq 0+0(%r8),%r10 601 adcq 8+0(%r8),%r11 602 adcq $1,%r12 603 movq 0+0+0(%rbp),%rax 604 movq %rax,%r15 605 mulq %r10 606 movq %rax,%r13 607 movq %rdx,%r14 608 movq 0+0+0(%rbp),%rax 609 mulq %r11 610 imulq %r12,%r15 611 addq %rax,%r14 612 adcq %rdx,%r15 613 movq 8+0+0(%rbp),%rax 614 movq %rax,%r9 615 mulq %r10 616 addq %rax,%r14 617 adcq $0,%rdx 618 movq %rdx,%r10 619 movq 8+0+0(%rbp),%rax 620 mulq %r11 621 addq %rax,%r15 622 adcq $0,%rdx 623 imulq %r12,%r9 624 addq %r10,%r15 625 adcq %rdx,%r9 626 movq %r13,%r10 627 movq %r14,%r11 628 movq %r15,%r12 629 andq $3,%r12 630 movq %r15,%r13 631 andq $-4,%r13 632 movq %r9,%r14 633 shrdq $2,%r9,%r15 634 shrq $2,%r9 635 addq %r13,%r15 636 adcq %r14,%r9 637 addq %r15,%r10 638 adcq %r9,%r11 639 adcq $0,%r12 640 641 leaq 16(%r8),%r8 642 cmpq $-6,%rcx 643 jg .Lopen_sse_main_loop_rounds 644 paddd .Lchacha20_consts(%rip),%xmm3 645 paddd 0+48(%rbp),%xmm7 646 paddd 0+64(%rbp),%xmm11 647 paddd 0+144(%rbp),%xmm15 648 paddd .Lchacha20_consts(%rip),%xmm2 649 paddd 0+48(%rbp),%xmm6 650 paddd 0+64(%rbp),%xmm10 651 paddd 0+128(%rbp),%xmm14 652 paddd .Lchacha20_consts(%rip),%xmm1 653 paddd 0+48(%rbp),%xmm5 654 paddd 0+64(%rbp),%xmm9 655 paddd 0+112(%rbp),%xmm13 656 paddd .Lchacha20_consts(%rip),%xmm0 657 paddd 0+48(%rbp),%xmm4 658 paddd 0+64(%rbp),%xmm8 659 paddd 0+96(%rbp),%xmm12 660 movdqa %xmm12,0+80(%rbp) 661 movdqu 0 + 0(%rsi),%xmm12 662 pxor %xmm3,%xmm12 663 movdqu %xmm12,0 + 0(%rdi) 664 movdqu 16 + 0(%rsi),%xmm12 665 pxor %xmm7,%xmm12 666 movdqu %xmm12,16 + 0(%rdi) 667 movdqu 32 + 0(%rsi),%xmm12 668 pxor %xmm11,%xmm12 669 movdqu %xmm12,32 + 0(%rdi) 670 movdqu 48 + 0(%rsi),%xmm12 671 pxor %xmm15,%xmm12 672 movdqu %xmm12,48 + 0(%rdi) 673 movdqu 0 + 64(%rsi),%xmm3 674 movdqu 16 + 64(%rsi),%xmm7 675 movdqu 32 + 64(%rsi),%xmm11 676 movdqu 48 + 64(%rsi),%xmm15 677 pxor %xmm3,%xmm2 678 pxor %xmm7,%xmm6 679 pxor %xmm11,%xmm10 680 pxor %xmm14,%xmm15 681 movdqu %xmm2,0 + 64(%rdi) 682 movdqu %xmm6,16 + 64(%rdi) 683 movdqu %xmm10,32 + 64(%rdi) 684 movdqu %xmm15,48 + 64(%rdi) 685 movdqu 0 + 128(%rsi),%xmm3 686 movdqu 16 + 128(%rsi),%xmm7 687 movdqu 32 + 128(%rsi),%xmm11 688 movdqu 48 + 128(%rsi),%xmm15 689 pxor %xmm3,%xmm1 690 pxor %xmm7,%xmm5 691 pxor %xmm11,%xmm9 692 pxor %xmm13,%xmm15 693 movdqu %xmm1,0 + 128(%rdi) 694 movdqu %xmm5,16 + 128(%rdi) 695 movdqu %xmm9,32 + 128(%rdi) 696 movdqu %xmm15,48 + 128(%rdi) 697 movdqu 0 + 192(%rsi),%xmm3 698 movdqu 16 + 192(%rsi),%xmm7 699 movdqu 32 + 192(%rsi),%xmm11 700 movdqu 48 + 192(%rsi),%xmm15 701 pxor %xmm3,%xmm0 702 pxor %xmm7,%xmm4 703 pxor %xmm11,%xmm8 704 pxor 0+80(%rbp),%xmm15 705 movdqu %xmm0,0 + 192(%rdi) 706 movdqu %xmm4,16 + 192(%rdi) 707 movdqu %xmm8,32 + 192(%rdi) 708 movdqu %xmm15,48 + 192(%rdi) 709 710 leaq 256(%rsi),%rsi 711 leaq 256(%rdi),%rdi 712 subq $256,%rbx 713 jmp .Lopen_sse_main_loop 714.Lopen_sse_tail: 715 716 testq %rbx,%rbx 717 jz .Lopen_sse_finalize 718 cmpq $192,%rbx 719 ja .Lopen_sse_tail_256 720 cmpq $128,%rbx 721 ja .Lopen_sse_tail_192 722 cmpq $64,%rbx 723 ja .Lopen_sse_tail_128 724 movdqa .Lchacha20_consts(%rip),%xmm0 725 movdqa 0+48(%rbp),%xmm4 726 movdqa 0+64(%rbp),%xmm8 727 movdqa 0+96(%rbp),%xmm12 728 paddd .Lsse_inc(%rip),%xmm12 729 movdqa %xmm12,0+96(%rbp) 730 731 xorq %r8,%r8 732 movq %rbx,%rcx 733 cmpq $16,%rcx 734 jb .Lopen_sse_tail_64_rounds 735.Lopen_sse_tail_64_rounds_and_x1hash: 736 addq 0+0(%rsi,%r8,1),%r10 737 adcq 8+0(%rsi,%r8,1),%r11 738 adcq $1,%r12 739 movq 0+0+0(%rbp),%rax 740 movq %rax,%r15 741 mulq %r10 742 movq %rax,%r13 743 movq %rdx,%r14 744 movq 0+0+0(%rbp),%rax 745 mulq %r11 746 imulq %r12,%r15 747 addq %rax,%r14 748 adcq %rdx,%r15 749 movq 8+0+0(%rbp),%rax 750 movq %rax,%r9 751 mulq %r10 752 addq %rax,%r14 753 adcq $0,%rdx 754 movq %rdx,%r10 755 movq 8+0+0(%rbp),%rax 756 mulq %r11 757 addq %rax,%r15 758 adcq $0,%rdx 759 imulq %r12,%r9 760 addq %r10,%r15 761 adcq %rdx,%r9 762 movq %r13,%r10 763 movq %r14,%r11 764 movq %r15,%r12 765 andq $3,%r12 766 movq %r15,%r13 767 andq $-4,%r13 768 movq %r9,%r14 769 shrdq $2,%r9,%r15 770 shrq $2,%r9 771 addq %r13,%r15 772 adcq %r14,%r9 773 addq %r15,%r10 774 adcq %r9,%r11 775 adcq $0,%r12 776 777 subq $16,%rcx 778.Lopen_sse_tail_64_rounds: 779 addq $16,%r8 780 paddd %xmm4,%xmm0 781 pxor %xmm0,%xmm12 782 pshufb .Lrol16(%rip),%xmm12 783 paddd %xmm12,%xmm8 784 pxor %xmm8,%xmm4 785 movdqa %xmm4,%xmm3 786 pslld $12,%xmm3 787 psrld $20,%xmm4 788 pxor %xmm3,%xmm4 789 paddd %xmm4,%xmm0 790 pxor %xmm0,%xmm12 791 pshufb .Lrol8(%rip),%xmm12 792 paddd %xmm12,%xmm8 793 pxor %xmm8,%xmm4 794 movdqa %xmm4,%xmm3 795 pslld $7,%xmm3 796 psrld $25,%xmm4 797 pxor %xmm3,%xmm4 798.byte 102,15,58,15,228,4 799.byte 102,69,15,58,15,192,8 800.byte 102,69,15,58,15,228,12 801 paddd %xmm4,%xmm0 802 pxor %xmm0,%xmm12 803 pshufb .Lrol16(%rip),%xmm12 804 paddd %xmm12,%xmm8 805 pxor %xmm8,%xmm4 806 movdqa %xmm4,%xmm3 807 pslld $12,%xmm3 808 psrld $20,%xmm4 809 pxor %xmm3,%xmm4 810 paddd %xmm4,%xmm0 811 pxor %xmm0,%xmm12 812 pshufb .Lrol8(%rip),%xmm12 813 paddd %xmm12,%xmm8 814 pxor %xmm8,%xmm4 815 movdqa %xmm4,%xmm3 816 pslld $7,%xmm3 817 psrld $25,%xmm4 818 pxor %xmm3,%xmm4 819.byte 102,15,58,15,228,12 820.byte 102,69,15,58,15,192,8 821.byte 102,69,15,58,15,228,4 822 823 cmpq $16,%rcx 824 jae .Lopen_sse_tail_64_rounds_and_x1hash 825 cmpq $160,%r8 826 jne .Lopen_sse_tail_64_rounds 827 paddd .Lchacha20_consts(%rip),%xmm0 828 paddd 0+48(%rbp),%xmm4 829 paddd 0+64(%rbp),%xmm8 830 paddd 0+96(%rbp),%xmm12 831 832 jmp .Lopen_sse_tail_64_dec_loop 833 834.Lopen_sse_tail_128: 835 movdqa .Lchacha20_consts(%rip),%xmm0 836 movdqa 0+48(%rbp),%xmm4 837 movdqa 0+64(%rbp),%xmm8 838 movdqa %xmm0,%xmm1 839 movdqa %xmm4,%xmm5 840 movdqa %xmm8,%xmm9 841 movdqa 0+96(%rbp),%xmm13 842 paddd .Lsse_inc(%rip),%xmm13 843 movdqa %xmm13,%xmm12 844 paddd .Lsse_inc(%rip),%xmm12 845 movdqa %xmm12,0+96(%rbp) 846 movdqa %xmm13,0+112(%rbp) 847 848 movq %rbx,%rcx 849 andq $-16,%rcx 850 xorq %r8,%r8 851.Lopen_sse_tail_128_rounds_and_x1hash: 852 addq 0+0(%rsi,%r8,1),%r10 853 adcq 8+0(%rsi,%r8,1),%r11 854 adcq $1,%r12 855 movq 0+0+0(%rbp),%rax 856 movq %rax,%r15 857 mulq %r10 858 movq %rax,%r13 859 movq %rdx,%r14 860 movq 0+0+0(%rbp),%rax 861 mulq %r11 862 imulq %r12,%r15 863 addq %rax,%r14 864 adcq %rdx,%r15 865 movq 8+0+0(%rbp),%rax 866 movq %rax,%r9 867 mulq %r10 868 addq %rax,%r14 869 adcq $0,%rdx 870 movq %rdx,%r10 871 movq 8+0+0(%rbp),%rax 872 mulq %r11 873 addq %rax,%r15 874 adcq $0,%rdx 875 imulq %r12,%r9 876 addq %r10,%r15 877 adcq %rdx,%r9 878 movq %r13,%r10 879 movq %r14,%r11 880 movq %r15,%r12 881 andq $3,%r12 882 movq %r15,%r13 883 andq $-4,%r13 884 movq %r9,%r14 885 shrdq $2,%r9,%r15 886 shrq $2,%r9 887 addq %r13,%r15 888 adcq %r14,%r9 889 addq %r15,%r10 890 adcq %r9,%r11 891 adcq $0,%r12 892 893.Lopen_sse_tail_128_rounds: 894 addq $16,%r8 895 paddd %xmm4,%xmm0 896 pxor %xmm0,%xmm12 897 pshufb .Lrol16(%rip),%xmm12 898 paddd %xmm12,%xmm8 899 pxor %xmm8,%xmm4 900 movdqa %xmm4,%xmm3 901 pslld $12,%xmm3 902 psrld $20,%xmm4 903 pxor %xmm3,%xmm4 904 paddd %xmm4,%xmm0 905 pxor %xmm0,%xmm12 906 pshufb .Lrol8(%rip),%xmm12 907 paddd %xmm12,%xmm8 908 pxor %xmm8,%xmm4 909 movdqa %xmm4,%xmm3 910 pslld $7,%xmm3 911 psrld $25,%xmm4 912 pxor %xmm3,%xmm4 913.byte 102,15,58,15,228,4 914.byte 102,69,15,58,15,192,8 915.byte 102,69,15,58,15,228,12 916 paddd %xmm5,%xmm1 917 pxor %xmm1,%xmm13 918 pshufb .Lrol16(%rip),%xmm13 919 paddd %xmm13,%xmm9 920 pxor %xmm9,%xmm5 921 movdqa %xmm5,%xmm3 922 pslld $12,%xmm3 923 psrld $20,%xmm5 924 pxor %xmm3,%xmm5 925 paddd %xmm5,%xmm1 926 pxor %xmm1,%xmm13 927 pshufb .Lrol8(%rip),%xmm13 928 paddd %xmm13,%xmm9 929 pxor %xmm9,%xmm5 930 movdqa %xmm5,%xmm3 931 pslld $7,%xmm3 932 psrld $25,%xmm5 933 pxor %xmm3,%xmm5 934.byte 102,15,58,15,237,4 935.byte 102,69,15,58,15,201,8 936.byte 102,69,15,58,15,237,12 937 paddd %xmm4,%xmm0 938 pxor %xmm0,%xmm12 939 pshufb .Lrol16(%rip),%xmm12 940 paddd %xmm12,%xmm8 941 pxor %xmm8,%xmm4 942 movdqa %xmm4,%xmm3 943 pslld $12,%xmm3 944 psrld $20,%xmm4 945 pxor %xmm3,%xmm4 946 paddd %xmm4,%xmm0 947 pxor %xmm0,%xmm12 948 pshufb .Lrol8(%rip),%xmm12 949 paddd %xmm12,%xmm8 950 pxor %xmm8,%xmm4 951 movdqa %xmm4,%xmm3 952 pslld $7,%xmm3 953 psrld $25,%xmm4 954 pxor %xmm3,%xmm4 955.byte 102,15,58,15,228,12 956.byte 102,69,15,58,15,192,8 957.byte 102,69,15,58,15,228,4 958 paddd %xmm5,%xmm1 959 pxor %xmm1,%xmm13 960 pshufb .Lrol16(%rip),%xmm13 961 paddd %xmm13,%xmm9 962 pxor %xmm9,%xmm5 963 movdqa %xmm5,%xmm3 964 pslld $12,%xmm3 965 psrld $20,%xmm5 966 pxor %xmm3,%xmm5 967 paddd %xmm5,%xmm1 968 pxor %xmm1,%xmm13 969 pshufb .Lrol8(%rip),%xmm13 970 paddd %xmm13,%xmm9 971 pxor %xmm9,%xmm5 972 movdqa %xmm5,%xmm3 973 pslld $7,%xmm3 974 psrld $25,%xmm5 975 pxor %xmm3,%xmm5 976.byte 102,15,58,15,237,12 977.byte 102,69,15,58,15,201,8 978.byte 102,69,15,58,15,237,4 979 980 cmpq %rcx,%r8 981 jb .Lopen_sse_tail_128_rounds_and_x1hash 982 cmpq $160,%r8 983 jne .Lopen_sse_tail_128_rounds 984 paddd .Lchacha20_consts(%rip),%xmm1 985 paddd 0+48(%rbp),%xmm5 986 paddd 0+64(%rbp),%xmm9 987 paddd 0+112(%rbp),%xmm13 988 paddd .Lchacha20_consts(%rip),%xmm0 989 paddd 0+48(%rbp),%xmm4 990 paddd 0+64(%rbp),%xmm8 991 paddd 0+96(%rbp),%xmm12 992 movdqu 0 + 0(%rsi),%xmm3 993 movdqu 16 + 0(%rsi),%xmm7 994 movdqu 32 + 0(%rsi),%xmm11 995 movdqu 48 + 0(%rsi),%xmm15 996 pxor %xmm3,%xmm1 997 pxor %xmm7,%xmm5 998 pxor %xmm11,%xmm9 999 pxor %xmm13,%xmm15 1000 movdqu %xmm1,0 + 0(%rdi) 1001 movdqu %xmm5,16 + 0(%rdi) 1002 movdqu %xmm9,32 + 0(%rdi) 1003 movdqu %xmm15,48 + 0(%rdi) 1004 1005 subq $64,%rbx 1006 leaq 64(%rsi),%rsi 1007 leaq 64(%rdi),%rdi 1008 jmp .Lopen_sse_tail_64_dec_loop 1009 1010.Lopen_sse_tail_192: 1011 movdqa .Lchacha20_consts(%rip),%xmm0 1012 movdqa 0+48(%rbp),%xmm4 1013 movdqa 0+64(%rbp),%xmm8 1014 movdqa %xmm0,%xmm1 1015 movdqa %xmm4,%xmm5 1016 movdqa %xmm8,%xmm9 1017 movdqa %xmm0,%xmm2 1018 movdqa %xmm4,%xmm6 1019 movdqa %xmm8,%xmm10 1020 movdqa 0+96(%rbp),%xmm14 1021 paddd .Lsse_inc(%rip),%xmm14 1022 movdqa %xmm14,%xmm13 1023 paddd .Lsse_inc(%rip),%xmm13 1024 movdqa %xmm13,%xmm12 1025 paddd .Lsse_inc(%rip),%xmm12 1026 movdqa %xmm12,0+96(%rbp) 1027 movdqa %xmm13,0+112(%rbp) 1028 movdqa %xmm14,0+128(%rbp) 1029 1030 movq %rbx,%rcx 1031 movq $160,%r8 1032 cmpq $160,%rcx 1033 cmovgq %r8,%rcx 1034 andq $-16,%rcx 1035 xorq %r8,%r8 1036.Lopen_sse_tail_192_rounds_and_x1hash: 1037 addq 0+0(%rsi,%r8,1),%r10 1038 adcq 8+0(%rsi,%r8,1),%r11 1039 adcq $1,%r12 1040 movq 0+0+0(%rbp),%rax 1041 movq %rax,%r15 1042 mulq %r10 1043 movq %rax,%r13 1044 movq %rdx,%r14 1045 movq 0+0+0(%rbp),%rax 1046 mulq %r11 1047 imulq %r12,%r15 1048 addq %rax,%r14 1049 adcq %rdx,%r15 1050 movq 8+0+0(%rbp),%rax 1051 movq %rax,%r9 1052 mulq %r10 1053 addq %rax,%r14 1054 adcq $0,%rdx 1055 movq %rdx,%r10 1056 movq 8+0+0(%rbp),%rax 1057 mulq %r11 1058 addq %rax,%r15 1059 adcq $0,%rdx 1060 imulq %r12,%r9 1061 addq %r10,%r15 1062 adcq %rdx,%r9 1063 movq %r13,%r10 1064 movq %r14,%r11 1065 movq %r15,%r12 1066 andq $3,%r12 1067 movq %r15,%r13 1068 andq $-4,%r13 1069 movq %r9,%r14 1070 shrdq $2,%r9,%r15 1071 shrq $2,%r9 1072 addq %r13,%r15 1073 adcq %r14,%r9 1074 addq %r15,%r10 1075 adcq %r9,%r11 1076 adcq $0,%r12 1077 1078.Lopen_sse_tail_192_rounds: 1079 addq $16,%r8 1080 paddd %xmm4,%xmm0 1081 pxor %xmm0,%xmm12 1082 pshufb .Lrol16(%rip),%xmm12 1083 paddd %xmm12,%xmm8 1084 pxor %xmm8,%xmm4 1085 movdqa %xmm4,%xmm3 1086 pslld $12,%xmm3 1087 psrld $20,%xmm4 1088 pxor %xmm3,%xmm4 1089 paddd %xmm4,%xmm0 1090 pxor %xmm0,%xmm12 1091 pshufb .Lrol8(%rip),%xmm12 1092 paddd %xmm12,%xmm8 1093 pxor %xmm8,%xmm4 1094 movdqa %xmm4,%xmm3 1095 pslld $7,%xmm3 1096 psrld $25,%xmm4 1097 pxor %xmm3,%xmm4 1098.byte 102,15,58,15,228,4 1099.byte 102,69,15,58,15,192,8 1100.byte 102,69,15,58,15,228,12 1101 paddd %xmm5,%xmm1 1102 pxor %xmm1,%xmm13 1103 pshufb .Lrol16(%rip),%xmm13 1104 paddd %xmm13,%xmm9 1105 pxor %xmm9,%xmm5 1106 movdqa %xmm5,%xmm3 1107 pslld $12,%xmm3 1108 psrld $20,%xmm5 1109 pxor %xmm3,%xmm5 1110 paddd %xmm5,%xmm1 1111 pxor %xmm1,%xmm13 1112 pshufb .Lrol8(%rip),%xmm13 1113 paddd %xmm13,%xmm9 1114 pxor %xmm9,%xmm5 1115 movdqa %xmm5,%xmm3 1116 pslld $7,%xmm3 1117 psrld $25,%xmm5 1118 pxor %xmm3,%xmm5 1119.byte 102,15,58,15,237,4 1120.byte 102,69,15,58,15,201,8 1121.byte 102,69,15,58,15,237,12 1122 paddd %xmm6,%xmm2 1123 pxor %xmm2,%xmm14 1124 pshufb .Lrol16(%rip),%xmm14 1125 paddd %xmm14,%xmm10 1126 pxor %xmm10,%xmm6 1127 movdqa %xmm6,%xmm3 1128 pslld $12,%xmm3 1129 psrld $20,%xmm6 1130 pxor %xmm3,%xmm6 1131 paddd %xmm6,%xmm2 1132 pxor %xmm2,%xmm14 1133 pshufb .Lrol8(%rip),%xmm14 1134 paddd %xmm14,%xmm10 1135 pxor %xmm10,%xmm6 1136 movdqa %xmm6,%xmm3 1137 pslld $7,%xmm3 1138 psrld $25,%xmm6 1139 pxor %xmm3,%xmm6 1140.byte 102,15,58,15,246,4 1141.byte 102,69,15,58,15,210,8 1142.byte 102,69,15,58,15,246,12 1143 paddd %xmm4,%xmm0 1144 pxor %xmm0,%xmm12 1145 pshufb .Lrol16(%rip),%xmm12 1146 paddd %xmm12,%xmm8 1147 pxor %xmm8,%xmm4 1148 movdqa %xmm4,%xmm3 1149 pslld $12,%xmm3 1150 psrld $20,%xmm4 1151 pxor %xmm3,%xmm4 1152 paddd %xmm4,%xmm0 1153 pxor %xmm0,%xmm12 1154 pshufb .Lrol8(%rip),%xmm12 1155 paddd %xmm12,%xmm8 1156 pxor %xmm8,%xmm4 1157 movdqa %xmm4,%xmm3 1158 pslld $7,%xmm3 1159 psrld $25,%xmm4 1160 pxor %xmm3,%xmm4 1161.byte 102,15,58,15,228,12 1162.byte 102,69,15,58,15,192,8 1163.byte 102,69,15,58,15,228,4 1164 paddd %xmm5,%xmm1 1165 pxor %xmm1,%xmm13 1166 pshufb .Lrol16(%rip),%xmm13 1167 paddd %xmm13,%xmm9 1168 pxor %xmm9,%xmm5 1169 movdqa %xmm5,%xmm3 1170 pslld $12,%xmm3 1171 psrld $20,%xmm5 1172 pxor %xmm3,%xmm5 1173 paddd %xmm5,%xmm1 1174 pxor %xmm1,%xmm13 1175 pshufb .Lrol8(%rip),%xmm13 1176 paddd %xmm13,%xmm9 1177 pxor %xmm9,%xmm5 1178 movdqa %xmm5,%xmm3 1179 pslld $7,%xmm3 1180 psrld $25,%xmm5 1181 pxor %xmm3,%xmm5 1182.byte 102,15,58,15,237,12 1183.byte 102,69,15,58,15,201,8 1184.byte 102,69,15,58,15,237,4 1185 paddd %xmm6,%xmm2 1186 pxor %xmm2,%xmm14 1187 pshufb .Lrol16(%rip),%xmm14 1188 paddd %xmm14,%xmm10 1189 pxor %xmm10,%xmm6 1190 movdqa %xmm6,%xmm3 1191 pslld $12,%xmm3 1192 psrld $20,%xmm6 1193 pxor %xmm3,%xmm6 1194 paddd %xmm6,%xmm2 1195 pxor %xmm2,%xmm14 1196 pshufb .Lrol8(%rip),%xmm14 1197 paddd %xmm14,%xmm10 1198 pxor %xmm10,%xmm6 1199 movdqa %xmm6,%xmm3 1200 pslld $7,%xmm3 1201 psrld $25,%xmm6 1202 pxor %xmm3,%xmm6 1203.byte 102,15,58,15,246,12 1204.byte 102,69,15,58,15,210,8 1205.byte 102,69,15,58,15,246,4 1206 1207 cmpq %rcx,%r8 1208 jb .Lopen_sse_tail_192_rounds_and_x1hash 1209 cmpq $160,%r8 1210 jne .Lopen_sse_tail_192_rounds 1211 cmpq $176,%rbx 1212 jb .Lopen_sse_tail_192_finish 1213 addq 0+160(%rsi),%r10 1214 adcq 8+160(%rsi),%r11 1215 adcq $1,%r12 1216 movq 0+0+0(%rbp),%rax 1217 movq %rax,%r15 1218 mulq %r10 1219 movq %rax,%r13 1220 movq %rdx,%r14 1221 movq 0+0+0(%rbp),%rax 1222 mulq %r11 1223 imulq %r12,%r15 1224 addq %rax,%r14 1225 adcq %rdx,%r15 1226 movq 8+0+0(%rbp),%rax 1227 movq %rax,%r9 1228 mulq %r10 1229 addq %rax,%r14 1230 adcq $0,%rdx 1231 movq %rdx,%r10 1232 movq 8+0+0(%rbp),%rax 1233 mulq %r11 1234 addq %rax,%r15 1235 adcq $0,%rdx 1236 imulq %r12,%r9 1237 addq %r10,%r15 1238 adcq %rdx,%r9 1239 movq %r13,%r10 1240 movq %r14,%r11 1241 movq %r15,%r12 1242 andq $3,%r12 1243 movq %r15,%r13 1244 andq $-4,%r13 1245 movq %r9,%r14 1246 shrdq $2,%r9,%r15 1247 shrq $2,%r9 1248 addq %r13,%r15 1249 adcq %r14,%r9 1250 addq %r15,%r10 1251 adcq %r9,%r11 1252 adcq $0,%r12 1253 1254 cmpq $192,%rbx 1255 jb .Lopen_sse_tail_192_finish 1256 addq 0+176(%rsi),%r10 1257 adcq 8+176(%rsi),%r11 1258 adcq $1,%r12 1259 movq 0+0+0(%rbp),%rax 1260 movq %rax,%r15 1261 mulq %r10 1262 movq %rax,%r13 1263 movq %rdx,%r14 1264 movq 0+0+0(%rbp),%rax 1265 mulq %r11 1266 imulq %r12,%r15 1267 addq %rax,%r14 1268 adcq %rdx,%r15 1269 movq 8+0+0(%rbp),%rax 1270 movq %rax,%r9 1271 mulq %r10 1272 addq %rax,%r14 1273 adcq $0,%rdx 1274 movq %rdx,%r10 1275 movq 8+0+0(%rbp),%rax 1276 mulq %r11 1277 addq %rax,%r15 1278 adcq $0,%rdx 1279 imulq %r12,%r9 1280 addq %r10,%r15 1281 adcq %rdx,%r9 1282 movq %r13,%r10 1283 movq %r14,%r11 1284 movq %r15,%r12 1285 andq $3,%r12 1286 movq %r15,%r13 1287 andq $-4,%r13 1288 movq %r9,%r14 1289 shrdq $2,%r9,%r15 1290 shrq $2,%r9 1291 addq %r13,%r15 1292 adcq %r14,%r9 1293 addq %r15,%r10 1294 adcq %r9,%r11 1295 adcq $0,%r12 1296 1297.Lopen_sse_tail_192_finish: 1298 paddd .Lchacha20_consts(%rip),%xmm2 1299 paddd 0+48(%rbp),%xmm6 1300 paddd 0+64(%rbp),%xmm10 1301 paddd 0+128(%rbp),%xmm14 1302 paddd .Lchacha20_consts(%rip),%xmm1 1303 paddd 0+48(%rbp),%xmm5 1304 paddd 0+64(%rbp),%xmm9 1305 paddd 0+112(%rbp),%xmm13 1306 paddd .Lchacha20_consts(%rip),%xmm0 1307 paddd 0+48(%rbp),%xmm4 1308 paddd 0+64(%rbp),%xmm8 1309 paddd 0+96(%rbp),%xmm12 1310 movdqu 0 + 0(%rsi),%xmm3 1311 movdqu 16 + 0(%rsi),%xmm7 1312 movdqu 32 + 0(%rsi),%xmm11 1313 movdqu 48 + 0(%rsi),%xmm15 1314 pxor %xmm3,%xmm2 1315 pxor %xmm7,%xmm6 1316 pxor %xmm11,%xmm10 1317 pxor %xmm14,%xmm15 1318 movdqu %xmm2,0 + 0(%rdi) 1319 movdqu %xmm6,16 + 0(%rdi) 1320 movdqu %xmm10,32 + 0(%rdi) 1321 movdqu %xmm15,48 + 0(%rdi) 1322 movdqu 0 + 64(%rsi),%xmm3 1323 movdqu 16 + 64(%rsi),%xmm7 1324 movdqu 32 + 64(%rsi),%xmm11 1325 movdqu 48 + 64(%rsi),%xmm15 1326 pxor %xmm3,%xmm1 1327 pxor %xmm7,%xmm5 1328 pxor %xmm11,%xmm9 1329 pxor %xmm13,%xmm15 1330 movdqu %xmm1,0 + 64(%rdi) 1331 movdqu %xmm5,16 + 64(%rdi) 1332 movdqu %xmm9,32 + 64(%rdi) 1333 movdqu %xmm15,48 + 64(%rdi) 1334 1335 subq $128,%rbx 1336 leaq 128(%rsi),%rsi 1337 leaq 128(%rdi),%rdi 1338 jmp .Lopen_sse_tail_64_dec_loop 1339 1340.Lopen_sse_tail_256: 1341 movdqa .Lchacha20_consts(%rip),%xmm0 1342 movdqa 0+48(%rbp),%xmm4 1343 movdqa 0+64(%rbp),%xmm8 1344 movdqa %xmm0,%xmm1 1345 movdqa %xmm4,%xmm5 1346 movdqa %xmm8,%xmm9 1347 movdqa %xmm0,%xmm2 1348 movdqa %xmm4,%xmm6 1349 movdqa %xmm8,%xmm10 1350 movdqa %xmm0,%xmm3 1351 movdqa %xmm4,%xmm7 1352 movdqa %xmm8,%xmm11 1353 movdqa 0+96(%rbp),%xmm15 1354 paddd .Lsse_inc(%rip),%xmm15 1355 movdqa %xmm15,%xmm14 1356 paddd .Lsse_inc(%rip),%xmm14 1357 movdqa %xmm14,%xmm13 1358 paddd .Lsse_inc(%rip),%xmm13 1359 movdqa %xmm13,%xmm12 1360 paddd .Lsse_inc(%rip),%xmm12 1361 movdqa %xmm12,0+96(%rbp) 1362 movdqa %xmm13,0+112(%rbp) 1363 movdqa %xmm14,0+128(%rbp) 1364 movdqa %xmm15,0+144(%rbp) 1365 1366 xorq %r8,%r8 1367.Lopen_sse_tail_256_rounds_and_x1hash: 1368 addq 0+0(%rsi,%r8,1),%r10 1369 adcq 8+0(%rsi,%r8,1),%r11 1370 adcq $1,%r12 1371 movdqa %xmm11,0+80(%rbp) 1372 paddd %xmm4,%xmm0 1373 pxor %xmm0,%xmm12 1374 pshufb .Lrol16(%rip),%xmm12 1375 paddd %xmm12,%xmm8 1376 pxor %xmm8,%xmm4 1377 movdqa %xmm4,%xmm11 1378 pslld $12,%xmm11 1379 psrld $20,%xmm4 1380 pxor %xmm11,%xmm4 1381 paddd %xmm4,%xmm0 1382 pxor %xmm0,%xmm12 1383 pshufb .Lrol8(%rip),%xmm12 1384 paddd %xmm12,%xmm8 1385 pxor %xmm8,%xmm4 1386 movdqa %xmm4,%xmm11 1387 pslld $7,%xmm11 1388 psrld $25,%xmm4 1389 pxor %xmm11,%xmm4 1390.byte 102,15,58,15,228,4 1391.byte 102,69,15,58,15,192,8 1392.byte 102,69,15,58,15,228,12 1393 paddd %xmm5,%xmm1 1394 pxor %xmm1,%xmm13 1395 pshufb .Lrol16(%rip),%xmm13 1396 paddd %xmm13,%xmm9 1397 pxor %xmm9,%xmm5 1398 movdqa %xmm5,%xmm11 1399 pslld $12,%xmm11 1400 psrld $20,%xmm5 1401 pxor %xmm11,%xmm5 1402 paddd %xmm5,%xmm1 1403 pxor %xmm1,%xmm13 1404 pshufb .Lrol8(%rip),%xmm13 1405 paddd %xmm13,%xmm9 1406 pxor %xmm9,%xmm5 1407 movdqa %xmm5,%xmm11 1408 pslld $7,%xmm11 1409 psrld $25,%xmm5 1410 pxor %xmm11,%xmm5 1411.byte 102,15,58,15,237,4 1412.byte 102,69,15,58,15,201,8 1413.byte 102,69,15,58,15,237,12 1414 paddd %xmm6,%xmm2 1415 pxor %xmm2,%xmm14 1416 pshufb .Lrol16(%rip),%xmm14 1417 paddd %xmm14,%xmm10 1418 pxor %xmm10,%xmm6 1419 movdqa %xmm6,%xmm11 1420 pslld $12,%xmm11 1421 psrld $20,%xmm6 1422 pxor %xmm11,%xmm6 1423 paddd %xmm6,%xmm2 1424 pxor %xmm2,%xmm14 1425 pshufb .Lrol8(%rip),%xmm14 1426 paddd %xmm14,%xmm10 1427 pxor %xmm10,%xmm6 1428 movdqa %xmm6,%xmm11 1429 pslld $7,%xmm11 1430 psrld $25,%xmm6 1431 pxor %xmm11,%xmm6 1432.byte 102,15,58,15,246,4 1433.byte 102,69,15,58,15,210,8 1434.byte 102,69,15,58,15,246,12 1435 movdqa 0+80(%rbp),%xmm11 1436 movq 0+0+0(%rbp),%rax 1437 movq %rax,%r15 1438 mulq %r10 1439 movq %rax,%r13 1440 movq %rdx,%r14 1441 movq 0+0+0(%rbp),%rax 1442 mulq %r11 1443 imulq %r12,%r15 1444 addq %rax,%r14 1445 adcq %rdx,%r15 1446 movdqa %xmm9,0+80(%rbp) 1447 paddd %xmm7,%xmm3 1448 pxor %xmm3,%xmm15 1449 pshufb .Lrol16(%rip),%xmm15 1450 paddd %xmm15,%xmm11 1451 pxor %xmm11,%xmm7 1452 movdqa %xmm7,%xmm9 1453 pslld $12,%xmm9 1454 psrld $20,%xmm7 1455 pxor %xmm9,%xmm7 1456 paddd %xmm7,%xmm3 1457 pxor %xmm3,%xmm15 1458 pshufb .Lrol8(%rip),%xmm15 1459 paddd %xmm15,%xmm11 1460 pxor %xmm11,%xmm7 1461 movdqa %xmm7,%xmm9 1462 pslld $7,%xmm9 1463 psrld $25,%xmm7 1464 pxor %xmm9,%xmm7 1465.byte 102,15,58,15,255,4 1466.byte 102,69,15,58,15,219,8 1467.byte 102,69,15,58,15,255,12 1468 movdqa 0+80(%rbp),%xmm9 1469 movq 8+0+0(%rbp),%rax 1470 movq %rax,%r9 1471 mulq %r10 1472 addq %rax,%r14 1473 adcq $0,%rdx 1474 movq %rdx,%r10 1475 movq 8+0+0(%rbp),%rax 1476 mulq %r11 1477 addq %rax,%r15 1478 adcq $0,%rdx 1479 movdqa %xmm11,0+80(%rbp) 1480 paddd %xmm4,%xmm0 1481 pxor %xmm0,%xmm12 1482 pshufb .Lrol16(%rip),%xmm12 1483 paddd %xmm12,%xmm8 1484 pxor %xmm8,%xmm4 1485 movdqa %xmm4,%xmm11 1486 pslld $12,%xmm11 1487 psrld $20,%xmm4 1488 pxor %xmm11,%xmm4 1489 paddd %xmm4,%xmm0 1490 pxor %xmm0,%xmm12 1491 pshufb .Lrol8(%rip),%xmm12 1492 paddd %xmm12,%xmm8 1493 pxor %xmm8,%xmm4 1494 movdqa %xmm4,%xmm11 1495 pslld $7,%xmm11 1496 psrld $25,%xmm4 1497 pxor %xmm11,%xmm4 1498.byte 102,15,58,15,228,12 1499.byte 102,69,15,58,15,192,8 1500.byte 102,69,15,58,15,228,4 1501 paddd %xmm5,%xmm1 1502 pxor %xmm1,%xmm13 1503 pshufb .Lrol16(%rip),%xmm13 1504 paddd %xmm13,%xmm9 1505 pxor %xmm9,%xmm5 1506 movdqa %xmm5,%xmm11 1507 pslld $12,%xmm11 1508 psrld $20,%xmm5 1509 pxor %xmm11,%xmm5 1510 paddd %xmm5,%xmm1 1511 pxor %xmm1,%xmm13 1512 pshufb .Lrol8(%rip),%xmm13 1513 paddd %xmm13,%xmm9 1514 pxor %xmm9,%xmm5 1515 movdqa %xmm5,%xmm11 1516 pslld $7,%xmm11 1517 psrld $25,%xmm5 1518 pxor %xmm11,%xmm5 1519.byte 102,15,58,15,237,12 1520.byte 102,69,15,58,15,201,8 1521.byte 102,69,15,58,15,237,4 1522 imulq %r12,%r9 1523 addq %r10,%r15 1524 adcq %rdx,%r9 1525 paddd %xmm6,%xmm2 1526 pxor %xmm2,%xmm14 1527 pshufb .Lrol16(%rip),%xmm14 1528 paddd %xmm14,%xmm10 1529 pxor %xmm10,%xmm6 1530 movdqa %xmm6,%xmm11 1531 pslld $12,%xmm11 1532 psrld $20,%xmm6 1533 pxor %xmm11,%xmm6 1534 paddd %xmm6,%xmm2 1535 pxor %xmm2,%xmm14 1536 pshufb .Lrol8(%rip),%xmm14 1537 paddd %xmm14,%xmm10 1538 pxor %xmm10,%xmm6 1539 movdqa %xmm6,%xmm11 1540 pslld $7,%xmm11 1541 psrld $25,%xmm6 1542 pxor %xmm11,%xmm6 1543.byte 102,15,58,15,246,12 1544.byte 102,69,15,58,15,210,8 1545.byte 102,69,15,58,15,246,4 1546 movdqa 0+80(%rbp),%xmm11 1547 movq %r13,%r10 1548 movq %r14,%r11 1549 movq %r15,%r12 1550 andq $3,%r12 1551 movq %r15,%r13 1552 andq $-4,%r13 1553 movq %r9,%r14 1554 shrdq $2,%r9,%r15 1555 shrq $2,%r9 1556 addq %r13,%r15 1557 adcq %r14,%r9 1558 addq %r15,%r10 1559 adcq %r9,%r11 1560 adcq $0,%r12 1561 movdqa %xmm9,0+80(%rbp) 1562 paddd %xmm7,%xmm3 1563 pxor %xmm3,%xmm15 1564 pshufb .Lrol16(%rip),%xmm15 1565 paddd %xmm15,%xmm11 1566 pxor %xmm11,%xmm7 1567 movdqa %xmm7,%xmm9 1568 pslld $12,%xmm9 1569 psrld $20,%xmm7 1570 pxor %xmm9,%xmm7 1571 paddd %xmm7,%xmm3 1572 pxor %xmm3,%xmm15 1573 pshufb .Lrol8(%rip),%xmm15 1574 paddd %xmm15,%xmm11 1575 pxor %xmm11,%xmm7 1576 movdqa %xmm7,%xmm9 1577 pslld $7,%xmm9 1578 psrld $25,%xmm7 1579 pxor %xmm9,%xmm7 1580.byte 102,15,58,15,255,12 1581.byte 102,69,15,58,15,219,8 1582.byte 102,69,15,58,15,255,4 1583 movdqa 0+80(%rbp),%xmm9 1584 1585 addq $16,%r8 1586 cmpq $160,%r8 1587 jb .Lopen_sse_tail_256_rounds_and_x1hash 1588 1589 movq %rbx,%rcx 1590 andq $-16,%rcx 1591.Lopen_sse_tail_256_hash: 1592 addq 0+0(%rsi,%r8,1),%r10 1593 adcq 8+0(%rsi,%r8,1),%r11 1594 adcq $1,%r12 1595 movq 0+0+0(%rbp),%rax 1596 movq %rax,%r15 1597 mulq %r10 1598 movq %rax,%r13 1599 movq %rdx,%r14 1600 movq 0+0+0(%rbp),%rax 1601 mulq %r11 1602 imulq %r12,%r15 1603 addq %rax,%r14 1604 adcq %rdx,%r15 1605 movq 8+0+0(%rbp),%rax 1606 movq %rax,%r9 1607 mulq %r10 1608 addq %rax,%r14 1609 adcq $0,%rdx 1610 movq %rdx,%r10 1611 movq 8+0+0(%rbp),%rax 1612 mulq %r11 1613 addq %rax,%r15 1614 adcq $0,%rdx 1615 imulq %r12,%r9 1616 addq %r10,%r15 1617 adcq %rdx,%r9 1618 movq %r13,%r10 1619 movq %r14,%r11 1620 movq %r15,%r12 1621 andq $3,%r12 1622 movq %r15,%r13 1623 andq $-4,%r13 1624 movq %r9,%r14 1625 shrdq $2,%r9,%r15 1626 shrq $2,%r9 1627 addq %r13,%r15 1628 adcq %r14,%r9 1629 addq %r15,%r10 1630 adcq %r9,%r11 1631 adcq $0,%r12 1632 1633 addq $16,%r8 1634 cmpq %rcx,%r8 1635 jb .Lopen_sse_tail_256_hash 1636 paddd .Lchacha20_consts(%rip),%xmm3 1637 paddd 0+48(%rbp),%xmm7 1638 paddd 0+64(%rbp),%xmm11 1639 paddd 0+144(%rbp),%xmm15 1640 paddd .Lchacha20_consts(%rip),%xmm2 1641 paddd 0+48(%rbp),%xmm6 1642 paddd 0+64(%rbp),%xmm10 1643 paddd 0+128(%rbp),%xmm14 1644 paddd .Lchacha20_consts(%rip),%xmm1 1645 paddd 0+48(%rbp),%xmm5 1646 paddd 0+64(%rbp),%xmm9 1647 paddd 0+112(%rbp),%xmm13 1648 paddd .Lchacha20_consts(%rip),%xmm0 1649 paddd 0+48(%rbp),%xmm4 1650 paddd 0+64(%rbp),%xmm8 1651 paddd 0+96(%rbp),%xmm12 1652 movdqa %xmm12,0+80(%rbp) 1653 movdqu 0 + 0(%rsi),%xmm12 1654 pxor %xmm3,%xmm12 1655 movdqu %xmm12,0 + 0(%rdi) 1656 movdqu 16 + 0(%rsi),%xmm12 1657 pxor %xmm7,%xmm12 1658 movdqu %xmm12,16 + 0(%rdi) 1659 movdqu 32 + 0(%rsi),%xmm12 1660 pxor %xmm11,%xmm12 1661 movdqu %xmm12,32 + 0(%rdi) 1662 movdqu 48 + 0(%rsi),%xmm12 1663 pxor %xmm15,%xmm12 1664 movdqu %xmm12,48 + 0(%rdi) 1665 movdqu 0 + 64(%rsi),%xmm3 1666 movdqu 16 + 64(%rsi),%xmm7 1667 movdqu 32 + 64(%rsi),%xmm11 1668 movdqu 48 + 64(%rsi),%xmm15 1669 pxor %xmm3,%xmm2 1670 pxor %xmm7,%xmm6 1671 pxor %xmm11,%xmm10 1672 pxor %xmm14,%xmm15 1673 movdqu %xmm2,0 + 64(%rdi) 1674 movdqu %xmm6,16 + 64(%rdi) 1675 movdqu %xmm10,32 + 64(%rdi) 1676 movdqu %xmm15,48 + 64(%rdi) 1677 movdqu 0 + 128(%rsi),%xmm3 1678 movdqu 16 + 128(%rsi),%xmm7 1679 movdqu 32 + 128(%rsi),%xmm11 1680 movdqu 48 + 128(%rsi),%xmm15 1681 pxor %xmm3,%xmm1 1682 pxor %xmm7,%xmm5 1683 pxor %xmm11,%xmm9 1684 pxor %xmm13,%xmm15 1685 movdqu %xmm1,0 + 128(%rdi) 1686 movdqu %xmm5,16 + 128(%rdi) 1687 movdqu %xmm9,32 + 128(%rdi) 1688 movdqu %xmm15,48 + 128(%rdi) 1689 1690 movdqa 0+80(%rbp),%xmm12 1691 subq $192,%rbx 1692 leaq 192(%rsi),%rsi 1693 leaq 192(%rdi),%rdi 1694 1695 1696.Lopen_sse_tail_64_dec_loop: 1697 cmpq $16,%rbx 1698 jb .Lopen_sse_tail_16_init 1699 subq $16,%rbx 1700 movdqu (%rsi),%xmm3 1701 pxor %xmm3,%xmm0 1702 movdqu %xmm0,(%rdi) 1703 leaq 16(%rsi),%rsi 1704 leaq 16(%rdi),%rdi 1705 movdqa %xmm4,%xmm0 1706 movdqa %xmm8,%xmm4 1707 movdqa %xmm12,%xmm8 1708 jmp .Lopen_sse_tail_64_dec_loop 1709.Lopen_sse_tail_16_init: 1710 movdqa %xmm0,%xmm1 1711 1712 1713.Lopen_sse_tail_16: 1714 testq %rbx,%rbx 1715 jz .Lopen_sse_finalize 1716 1717 1718 1719 pxor %xmm3,%xmm3 1720 leaq -1(%rsi,%rbx,1),%rsi 1721 movq %rbx,%r8 1722.Lopen_sse_tail_16_compose: 1723 pslldq $1,%xmm3 1724 pinsrb $0,(%rsi),%xmm3 1725 subq $1,%rsi 1726 subq $1,%r8 1727 jnz .Lopen_sse_tail_16_compose 1728 1729.byte 102,73,15,126,221 1730 pextrq $1,%xmm3,%r14 1731 1732 pxor %xmm1,%xmm3 1733 1734 1735.Lopen_sse_tail_16_extract: 1736 pextrb $0,%xmm3,(%rdi) 1737 psrldq $1,%xmm3 1738 addq $1,%rdi 1739 subq $1,%rbx 1740 jne .Lopen_sse_tail_16_extract 1741 1742 addq %r13,%r10 1743 adcq %r14,%r11 1744 adcq $1,%r12 1745 movq 0+0+0(%rbp),%rax 1746 movq %rax,%r15 1747 mulq %r10 1748 movq %rax,%r13 1749 movq %rdx,%r14 1750 movq 0+0+0(%rbp),%rax 1751 mulq %r11 1752 imulq %r12,%r15 1753 addq %rax,%r14 1754 adcq %rdx,%r15 1755 movq 8+0+0(%rbp),%rax 1756 movq %rax,%r9 1757 mulq %r10 1758 addq %rax,%r14 1759 adcq $0,%rdx 1760 movq %rdx,%r10 1761 movq 8+0+0(%rbp),%rax 1762 mulq %r11 1763 addq %rax,%r15 1764 adcq $0,%rdx 1765 imulq %r12,%r9 1766 addq %r10,%r15 1767 adcq %rdx,%r9 1768 movq %r13,%r10 1769 movq %r14,%r11 1770 movq %r15,%r12 1771 andq $3,%r12 1772 movq %r15,%r13 1773 andq $-4,%r13 1774 movq %r9,%r14 1775 shrdq $2,%r9,%r15 1776 shrq $2,%r9 1777 addq %r13,%r15 1778 adcq %r14,%r9 1779 addq %r15,%r10 1780 adcq %r9,%r11 1781 adcq $0,%r12 1782 1783 1784.Lopen_sse_finalize: 1785 addq 0+0+32(%rbp),%r10 1786 adcq 8+0+32(%rbp),%r11 1787 adcq $1,%r12 1788 movq 0+0+0(%rbp),%rax 1789 movq %rax,%r15 1790 mulq %r10 1791 movq %rax,%r13 1792 movq %rdx,%r14 1793 movq 0+0+0(%rbp),%rax 1794 mulq %r11 1795 imulq %r12,%r15 1796 addq %rax,%r14 1797 adcq %rdx,%r15 1798 movq 8+0+0(%rbp),%rax 1799 movq %rax,%r9 1800 mulq %r10 1801 addq %rax,%r14 1802 adcq $0,%rdx 1803 movq %rdx,%r10 1804 movq 8+0+0(%rbp),%rax 1805 mulq %r11 1806 addq %rax,%r15 1807 adcq $0,%rdx 1808 imulq %r12,%r9 1809 addq %r10,%r15 1810 adcq %rdx,%r9 1811 movq %r13,%r10 1812 movq %r14,%r11 1813 movq %r15,%r12 1814 andq $3,%r12 1815 movq %r15,%r13 1816 andq $-4,%r13 1817 movq %r9,%r14 1818 shrdq $2,%r9,%r15 1819 shrq $2,%r9 1820 addq %r13,%r15 1821 adcq %r14,%r9 1822 addq %r15,%r10 1823 adcq %r9,%r11 1824 adcq $0,%r12 1825 1826 1827 movq %r10,%r13 1828 movq %r11,%r14 1829 movq %r12,%r15 1830 subq $-5,%r10 1831 sbbq $-1,%r11 1832 sbbq $3,%r12 1833 cmovcq %r13,%r10 1834 cmovcq %r14,%r11 1835 cmovcq %r15,%r12 1836 1837 addq 0+0+16(%rbp),%r10 1838 adcq 8+0+16(%rbp),%r11 1839 1840.cfi_remember_state 1841 addq $288 + 0 + 32,%rsp 1842.cfi_adjust_cfa_offset -(288 + 32) 1843 1844 popq %r9 1845.cfi_adjust_cfa_offset -8 1846.cfi_restore %r9 1847 movq %r10,(%r9) 1848 movq %r11,8(%r9) 1849 popq %r15 1850.cfi_adjust_cfa_offset -8 1851.cfi_restore %r15 1852 popq %r14 1853.cfi_adjust_cfa_offset -8 1854.cfi_restore %r14 1855 popq %r13 1856.cfi_adjust_cfa_offset -8 1857.cfi_restore %r13 1858 popq %r12 1859.cfi_adjust_cfa_offset -8 1860.cfi_restore %r12 1861 popq %rbx 1862.cfi_adjust_cfa_offset -8 1863.cfi_restore %rbx 1864 popq %rbp 1865.cfi_adjust_cfa_offset -8 1866.cfi_restore %rbp 1867 .byte 0xf3,0xc3 1868 1869.Lopen_sse_128: 1870.cfi_restore_state 1871 movdqu .Lchacha20_consts(%rip),%xmm0 1872 movdqa %xmm0,%xmm1 1873 movdqa %xmm0,%xmm2 1874 movdqu 0(%r9),%xmm4 1875 movdqa %xmm4,%xmm5 1876 movdqa %xmm4,%xmm6 1877 movdqu 16(%r9),%xmm8 1878 movdqa %xmm8,%xmm9 1879 movdqa %xmm8,%xmm10 1880 movdqu 32(%r9),%xmm12 1881 movdqa %xmm12,%xmm13 1882 paddd .Lsse_inc(%rip),%xmm13 1883 movdqa %xmm13,%xmm14 1884 paddd .Lsse_inc(%rip),%xmm14 1885 movdqa %xmm4,%xmm7 1886 movdqa %xmm8,%xmm11 1887 movdqa %xmm13,%xmm15 1888 movq $10,%r10 1889 1890.Lopen_sse_128_rounds: 1891 paddd %xmm4,%xmm0 1892 pxor %xmm0,%xmm12 1893 pshufb .Lrol16(%rip),%xmm12 1894 paddd %xmm12,%xmm8 1895 pxor %xmm8,%xmm4 1896 movdqa %xmm4,%xmm3 1897 pslld $12,%xmm3 1898 psrld $20,%xmm4 1899 pxor %xmm3,%xmm4 1900 paddd %xmm4,%xmm0 1901 pxor %xmm0,%xmm12 1902 pshufb .Lrol8(%rip),%xmm12 1903 paddd %xmm12,%xmm8 1904 pxor %xmm8,%xmm4 1905 movdqa %xmm4,%xmm3 1906 pslld $7,%xmm3 1907 psrld $25,%xmm4 1908 pxor %xmm3,%xmm4 1909.byte 102,15,58,15,228,4 1910.byte 102,69,15,58,15,192,8 1911.byte 102,69,15,58,15,228,12 1912 paddd %xmm5,%xmm1 1913 pxor %xmm1,%xmm13 1914 pshufb .Lrol16(%rip),%xmm13 1915 paddd %xmm13,%xmm9 1916 pxor %xmm9,%xmm5 1917 movdqa %xmm5,%xmm3 1918 pslld $12,%xmm3 1919 psrld $20,%xmm5 1920 pxor %xmm3,%xmm5 1921 paddd %xmm5,%xmm1 1922 pxor %xmm1,%xmm13 1923 pshufb .Lrol8(%rip),%xmm13 1924 paddd %xmm13,%xmm9 1925 pxor %xmm9,%xmm5 1926 movdqa %xmm5,%xmm3 1927 pslld $7,%xmm3 1928 psrld $25,%xmm5 1929 pxor %xmm3,%xmm5 1930.byte 102,15,58,15,237,4 1931.byte 102,69,15,58,15,201,8 1932.byte 102,69,15,58,15,237,12 1933 paddd %xmm6,%xmm2 1934 pxor %xmm2,%xmm14 1935 pshufb .Lrol16(%rip),%xmm14 1936 paddd %xmm14,%xmm10 1937 pxor %xmm10,%xmm6 1938 movdqa %xmm6,%xmm3 1939 pslld $12,%xmm3 1940 psrld $20,%xmm6 1941 pxor %xmm3,%xmm6 1942 paddd %xmm6,%xmm2 1943 pxor %xmm2,%xmm14 1944 pshufb .Lrol8(%rip),%xmm14 1945 paddd %xmm14,%xmm10 1946 pxor %xmm10,%xmm6 1947 movdqa %xmm6,%xmm3 1948 pslld $7,%xmm3 1949 psrld $25,%xmm6 1950 pxor %xmm3,%xmm6 1951.byte 102,15,58,15,246,4 1952.byte 102,69,15,58,15,210,8 1953.byte 102,69,15,58,15,246,12 1954 paddd %xmm4,%xmm0 1955 pxor %xmm0,%xmm12 1956 pshufb .Lrol16(%rip),%xmm12 1957 paddd %xmm12,%xmm8 1958 pxor %xmm8,%xmm4 1959 movdqa %xmm4,%xmm3 1960 pslld $12,%xmm3 1961 psrld $20,%xmm4 1962 pxor %xmm3,%xmm4 1963 paddd %xmm4,%xmm0 1964 pxor %xmm0,%xmm12 1965 pshufb .Lrol8(%rip),%xmm12 1966 paddd %xmm12,%xmm8 1967 pxor %xmm8,%xmm4 1968 movdqa %xmm4,%xmm3 1969 pslld $7,%xmm3 1970 psrld $25,%xmm4 1971 pxor %xmm3,%xmm4 1972.byte 102,15,58,15,228,12 1973.byte 102,69,15,58,15,192,8 1974.byte 102,69,15,58,15,228,4 1975 paddd %xmm5,%xmm1 1976 pxor %xmm1,%xmm13 1977 pshufb .Lrol16(%rip),%xmm13 1978 paddd %xmm13,%xmm9 1979 pxor %xmm9,%xmm5 1980 movdqa %xmm5,%xmm3 1981 pslld $12,%xmm3 1982 psrld $20,%xmm5 1983 pxor %xmm3,%xmm5 1984 paddd %xmm5,%xmm1 1985 pxor %xmm1,%xmm13 1986 pshufb .Lrol8(%rip),%xmm13 1987 paddd %xmm13,%xmm9 1988 pxor %xmm9,%xmm5 1989 movdqa %xmm5,%xmm3 1990 pslld $7,%xmm3 1991 psrld $25,%xmm5 1992 pxor %xmm3,%xmm5 1993.byte 102,15,58,15,237,12 1994.byte 102,69,15,58,15,201,8 1995.byte 102,69,15,58,15,237,4 1996 paddd %xmm6,%xmm2 1997 pxor %xmm2,%xmm14 1998 pshufb .Lrol16(%rip),%xmm14 1999 paddd %xmm14,%xmm10 2000 pxor %xmm10,%xmm6 2001 movdqa %xmm6,%xmm3 2002 pslld $12,%xmm3 2003 psrld $20,%xmm6 2004 pxor %xmm3,%xmm6 2005 paddd %xmm6,%xmm2 2006 pxor %xmm2,%xmm14 2007 pshufb .Lrol8(%rip),%xmm14 2008 paddd %xmm14,%xmm10 2009 pxor %xmm10,%xmm6 2010 movdqa %xmm6,%xmm3 2011 pslld $7,%xmm3 2012 psrld $25,%xmm6 2013 pxor %xmm3,%xmm6 2014.byte 102,15,58,15,246,12 2015.byte 102,69,15,58,15,210,8 2016.byte 102,69,15,58,15,246,4 2017 2018 decq %r10 2019 jnz .Lopen_sse_128_rounds 2020 paddd .Lchacha20_consts(%rip),%xmm0 2021 paddd .Lchacha20_consts(%rip),%xmm1 2022 paddd .Lchacha20_consts(%rip),%xmm2 2023 paddd %xmm7,%xmm4 2024 paddd %xmm7,%xmm5 2025 paddd %xmm7,%xmm6 2026 paddd %xmm11,%xmm9 2027 paddd %xmm11,%xmm10 2028 paddd %xmm15,%xmm13 2029 paddd .Lsse_inc(%rip),%xmm15 2030 paddd %xmm15,%xmm14 2031 2032 pand .Lclamp(%rip),%xmm0 2033 movdqa %xmm0,0+0(%rbp) 2034 movdqa %xmm4,0+16(%rbp) 2035 2036 movq %r8,%r8 2037 call poly_hash_ad_internal 2038.Lopen_sse_128_xor_hash: 2039 cmpq $16,%rbx 2040 jb .Lopen_sse_tail_16 2041 subq $16,%rbx 2042 addq 0+0(%rsi),%r10 2043 adcq 8+0(%rsi),%r11 2044 adcq $1,%r12 2045 2046 2047 movdqu 0(%rsi),%xmm3 2048 pxor %xmm3,%xmm1 2049 movdqu %xmm1,0(%rdi) 2050 leaq 16(%rsi),%rsi 2051 leaq 16(%rdi),%rdi 2052 movq 0+0+0(%rbp),%rax 2053 movq %rax,%r15 2054 mulq %r10 2055 movq %rax,%r13 2056 movq %rdx,%r14 2057 movq 0+0+0(%rbp),%rax 2058 mulq %r11 2059 imulq %r12,%r15 2060 addq %rax,%r14 2061 adcq %rdx,%r15 2062 movq 8+0+0(%rbp),%rax 2063 movq %rax,%r9 2064 mulq %r10 2065 addq %rax,%r14 2066 adcq $0,%rdx 2067 movq %rdx,%r10 2068 movq 8+0+0(%rbp),%rax 2069 mulq %r11 2070 addq %rax,%r15 2071 adcq $0,%rdx 2072 imulq %r12,%r9 2073 addq %r10,%r15 2074 adcq %rdx,%r9 2075 movq %r13,%r10 2076 movq %r14,%r11 2077 movq %r15,%r12 2078 andq $3,%r12 2079 movq %r15,%r13 2080 andq $-4,%r13 2081 movq %r9,%r14 2082 shrdq $2,%r9,%r15 2083 shrq $2,%r9 2084 addq %r13,%r15 2085 adcq %r14,%r9 2086 addq %r15,%r10 2087 adcq %r9,%r11 2088 adcq $0,%r12 2089 2090 2091 movdqa %xmm5,%xmm1 2092 movdqa %xmm9,%xmm5 2093 movdqa %xmm13,%xmm9 2094 movdqa %xmm2,%xmm13 2095 movdqa %xmm6,%xmm2 2096 movdqa %xmm10,%xmm6 2097 movdqa %xmm14,%xmm10 2098 jmp .Lopen_sse_128_xor_hash 2099.size chacha20_poly1305_open, .-chacha20_poly1305_open 2100.cfi_endproc 2101 2102 2103 2104 2105 2106 2107 2108.globl chacha20_poly1305_seal 2109.hidden chacha20_poly1305_seal 2110.type chacha20_poly1305_seal,@function 2111.align 64 2112chacha20_poly1305_seal: 2113.cfi_startproc 2114 pushq %rbp 2115.cfi_adjust_cfa_offset 8 2116.cfi_offset %rbp,-16 2117 pushq %rbx 2118.cfi_adjust_cfa_offset 8 2119.cfi_offset %rbx,-24 2120 pushq %r12 2121.cfi_adjust_cfa_offset 8 2122.cfi_offset %r12,-32 2123 pushq %r13 2124.cfi_adjust_cfa_offset 8 2125.cfi_offset %r13,-40 2126 pushq %r14 2127.cfi_adjust_cfa_offset 8 2128.cfi_offset %r14,-48 2129 pushq %r15 2130.cfi_adjust_cfa_offset 8 2131.cfi_offset %r15,-56 2132 2133 2134 pushq %r9 2135.cfi_adjust_cfa_offset 8 2136.cfi_offset %r9,-64 2137 subq $288 + 0 + 32,%rsp 2138.cfi_adjust_cfa_offset 288 + 32 2139 leaq 32(%rsp),%rbp 2140 andq $-32,%rbp 2141 2142 movq 56(%r9),%rbx 2143 addq %rdx,%rbx 2144 movq %r8,0+0+32(%rbp) 2145 movq %rbx,8+0+32(%rbp) 2146 movq %rdx,%rbx 2147 2148 movl OPENSSL_ia32cap_P+8(%rip),%eax 2149 andl $288,%eax 2150 xorl $288,%eax 2151 jz chacha20_poly1305_seal_avx2 2152 2153 cmpq $128,%rbx 2154 jbe .Lseal_sse_128 2155 2156 movdqa .Lchacha20_consts(%rip),%xmm0 2157 movdqu 0(%r9),%xmm4 2158 movdqu 16(%r9),%xmm8 2159 movdqu 32(%r9),%xmm12 2160 2161 movdqa %xmm0,%xmm1 2162 movdqa %xmm0,%xmm2 2163 movdqa %xmm0,%xmm3 2164 movdqa %xmm4,%xmm5 2165 movdqa %xmm4,%xmm6 2166 movdqa %xmm4,%xmm7 2167 movdqa %xmm8,%xmm9 2168 movdqa %xmm8,%xmm10 2169 movdqa %xmm8,%xmm11 2170 movdqa %xmm12,%xmm15 2171 paddd .Lsse_inc(%rip),%xmm12 2172 movdqa %xmm12,%xmm14 2173 paddd .Lsse_inc(%rip),%xmm12 2174 movdqa %xmm12,%xmm13 2175 paddd .Lsse_inc(%rip),%xmm12 2176 2177 movdqa %xmm4,0+48(%rbp) 2178 movdqa %xmm8,0+64(%rbp) 2179 movdqa %xmm12,0+96(%rbp) 2180 movdqa %xmm13,0+112(%rbp) 2181 movdqa %xmm14,0+128(%rbp) 2182 movdqa %xmm15,0+144(%rbp) 2183 movq $10,%r10 2184.Lseal_sse_init_rounds: 2185 movdqa %xmm8,0+80(%rbp) 2186 movdqa .Lrol16(%rip),%xmm8 2187 paddd %xmm7,%xmm3 2188 paddd %xmm6,%xmm2 2189 paddd %xmm5,%xmm1 2190 paddd %xmm4,%xmm0 2191 pxor %xmm3,%xmm15 2192 pxor %xmm2,%xmm14 2193 pxor %xmm1,%xmm13 2194 pxor %xmm0,%xmm12 2195.byte 102,69,15,56,0,248 2196.byte 102,69,15,56,0,240 2197.byte 102,69,15,56,0,232 2198.byte 102,69,15,56,0,224 2199 movdqa 0+80(%rbp),%xmm8 2200 paddd %xmm15,%xmm11 2201 paddd %xmm14,%xmm10 2202 paddd %xmm13,%xmm9 2203 paddd %xmm12,%xmm8 2204 pxor %xmm11,%xmm7 2205 pxor %xmm10,%xmm6 2206 pxor %xmm9,%xmm5 2207 pxor %xmm8,%xmm4 2208 movdqa %xmm8,0+80(%rbp) 2209 movdqa %xmm7,%xmm8 2210 psrld $20,%xmm8 2211 pslld $32-20,%xmm7 2212 pxor %xmm8,%xmm7 2213 movdqa %xmm6,%xmm8 2214 psrld $20,%xmm8 2215 pslld $32-20,%xmm6 2216 pxor %xmm8,%xmm6 2217 movdqa %xmm5,%xmm8 2218 psrld $20,%xmm8 2219 pslld $32-20,%xmm5 2220 pxor %xmm8,%xmm5 2221 movdqa %xmm4,%xmm8 2222 psrld $20,%xmm8 2223 pslld $32-20,%xmm4 2224 pxor %xmm8,%xmm4 2225 movdqa .Lrol8(%rip),%xmm8 2226 paddd %xmm7,%xmm3 2227 paddd %xmm6,%xmm2 2228 paddd %xmm5,%xmm1 2229 paddd %xmm4,%xmm0 2230 pxor %xmm3,%xmm15 2231 pxor %xmm2,%xmm14 2232 pxor %xmm1,%xmm13 2233 pxor %xmm0,%xmm12 2234.byte 102,69,15,56,0,248 2235.byte 102,69,15,56,0,240 2236.byte 102,69,15,56,0,232 2237.byte 102,69,15,56,0,224 2238 movdqa 0+80(%rbp),%xmm8 2239 paddd %xmm15,%xmm11 2240 paddd %xmm14,%xmm10 2241 paddd %xmm13,%xmm9 2242 paddd %xmm12,%xmm8 2243 pxor %xmm11,%xmm7 2244 pxor %xmm10,%xmm6 2245 pxor %xmm9,%xmm5 2246 pxor %xmm8,%xmm4 2247 movdqa %xmm8,0+80(%rbp) 2248 movdqa %xmm7,%xmm8 2249 psrld $25,%xmm8 2250 pslld $32-25,%xmm7 2251 pxor %xmm8,%xmm7 2252 movdqa %xmm6,%xmm8 2253 psrld $25,%xmm8 2254 pslld $32-25,%xmm6 2255 pxor %xmm8,%xmm6 2256 movdqa %xmm5,%xmm8 2257 psrld $25,%xmm8 2258 pslld $32-25,%xmm5 2259 pxor %xmm8,%xmm5 2260 movdqa %xmm4,%xmm8 2261 psrld $25,%xmm8 2262 pslld $32-25,%xmm4 2263 pxor %xmm8,%xmm4 2264 movdqa 0+80(%rbp),%xmm8 2265.byte 102,15,58,15,255,4 2266.byte 102,69,15,58,15,219,8 2267.byte 102,69,15,58,15,255,12 2268.byte 102,15,58,15,246,4 2269.byte 102,69,15,58,15,210,8 2270.byte 102,69,15,58,15,246,12 2271.byte 102,15,58,15,237,4 2272.byte 102,69,15,58,15,201,8 2273.byte 102,69,15,58,15,237,12 2274.byte 102,15,58,15,228,4 2275.byte 102,69,15,58,15,192,8 2276.byte 102,69,15,58,15,228,12 2277 movdqa %xmm8,0+80(%rbp) 2278 movdqa .Lrol16(%rip),%xmm8 2279 paddd %xmm7,%xmm3 2280 paddd %xmm6,%xmm2 2281 paddd %xmm5,%xmm1 2282 paddd %xmm4,%xmm0 2283 pxor %xmm3,%xmm15 2284 pxor %xmm2,%xmm14 2285 pxor %xmm1,%xmm13 2286 pxor %xmm0,%xmm12 2287.byte 102,69,15,56,0,248 2288.byte 102,69,15,56,0,240 2289.byte 102,69,15,56,0,232 2290.byte 102,69,15,56,0,224 2291 movdqa 0+80(%rbp),%xmm8 2292 paddd %xmm15,%xmm11 2293 paddd %xmm14,%xmm10 2294 paddd %xmm13,%xmm9 2295 paddd %xmm12,%xmm8 2296 pxor %xmm11,%xmm7 2297 pxor %xmm10,%xmm6 2298 pxor %xmm9,%xmm5 2299 pxor %xmm8,%xmm4 2300 movdqa %xmm8,0+80(%rbp) 2301 movdqa %xmm7,%xmm8 2302 psrld $20,%xmm8 2303 pslld $32-20,%xmm7 2304 pxor %xmm8,%xmm7 2305 movdqa %xmm6,%xmm8 2306 psrld $20,%xmm8 2307 pslld $32-20,%xmm6 2308 pxor %xmm8,%xmm6 2309 movdqa %xmm5,%xmm8 2310 psrld $20,%xmm8 2311 pslld $32-20,%xmm5 2312 pxor %xmm8,%xmm5 2313 movdqa %xmm4,%xmm8 2314 psrld $20,%xmm8 2315 pslld $32-20,%xmm4 2316 pxor %xmm8,%xmm4 2317 movdqa .Lrol8(%rip),%xmm8 2318 paddd %xmm7,%xmm3 2319 paddd %xmm6,%xmm2 2320 paddd %xmm5,%xmm1 2321 paddd %xmm4,%xmm0 2322 pxor %xmm3,%xmm15 2323 pxor %xmm2,%xmm14 2324 pxor %xmm1,%xmm13 2325 pxor %xmm0,%xmm12 2326.byte 102,69,15,56,0,248 2327.byte 102,69,15,56,0,240 2328.byte 102,69,15,56,0,232 2329.byte 102,69,15,56,0,224 2330 movdqa 0+80(%rbp),%xmm8 2331 paddd %xmm15,%xmm11 2332 paddd %xmm14,%xmm10 2333 paddd %xmm13,%xmm9 2334 paddd %xmm12,%xmm8 2335 pxor %xmm11,%xmm7 2336 pxor %xmm10,%xmm6 2337 pxor %xmm9,%xmm5 2338 pxor %xmm8,%xmm4 2339 movdqa %xmm8,0+80(%rbp) 2340 movdqa %xmm7,%xmm8 2341 psrld $25,%xmm8 2342 pslld $32-25,%xmm7 2343 pxor %xmm8,%xmm7 2344 movdqa %xmm6,%xmm8 2345 psrld $25,%xmm8 2346 pslld $32-25,%xmm6 2347 pxor %xmm8,%xmm6 2348 movdqa %xmm5,%xmm8 2349 psrld $25,%xmm8 2350 pslld $32-25,%xmm5 2351 pxor %xmm8,%xmm5 2352 movdqa %xmm4,%xmm8 2353 psrld $25,%xmm8 2354 pslld $32-25,%xmm4 2355 pxor %xmm8,%xmm4 2356 movdqa 0+80(%rbp),%xmm8 2357.byte 102,15,58,15,255,12 2358.byte 102,69,15,58,15,219,8 2359.byte 102,69,15,58,15,255,4 2360.byte 102,15,58,15,246,12 2361.byte 102,69,15,58,15,210,8 2362.byte 102,69,15,58,15,246,4 2363.byte 102,15,58,15,237,12 2364.byte 102,69,15,58,15,201,8 2365.byte 102,69,15,58,15,237,4 2366.byte 102,15,58,15,228,12 2367.byte 102,69,15,58,15,192,8 2368.byte 102,69,15,58,15,228,4 2369 2370 decq %r10 2371 jnz .Lseal_sse_init_rounds 2372 paddd .Lchacha20_consts(%rip),%xmm3 2373 paddd 0+48(%rbp),%xmm7 2374 paddd 0+64(%rbp),%xmm11 2375 paddd 0+144(%rbp),%xmm15 2376 paddd .Lchacha20_consts(%rip),%xmm2 2377 paddd 0+48(%rbp),%xmm6 2378 paddd 0+64(%rbp),%xmm10 2379 paddd 0+128(%rbp),%xmm14 2380 paddd .Lchacha20_consts(%rip),%xmm1 2381 paddd 0+48(%rbp),%xmm5 2382 paddd 0+64(%rbp),%xmm9 2383 paddd 0+112(%rbp),%xmm13 2384 paddd .Lchacha20_consts(%rip),%xmm0 2385 paddd 0+48(%rbp),%xmm4 2386 paddd 0+64(%rbp),%xmm8 2387 paddd 0+96(%rbp),%xmm12 2388 2389 2390 pand .Lclamp(%rip),%xmm3 2391 movdqa %xmm3,0+0(%rbp) 2392 movdqa %xmm7,0+16(%rbp) 2393 2394 movq %r8,%r8 2395 call poly_hash_ad_internal 2396 movdqu 0 + 0(%rsi),%xmm3 2397 movdqu 16 + 0(%rsi),%xmm7 2398 movdqu 32 + 0(%rsi),%xmm11 2399 movdqu 48 + 0(%rsi),%xmm15 2400 pxor %xmm3,%xmm2 2401 pxor %xmm7,%xmm6 2402 pxor %xmm11,%xmm10 2403 pxor %xmm14,%xmm15 2404 movdqu %xmm2,0 + 0(%rdi) 2405 movdqu %xmm6,16 + 0(%rdi) 2406 movdqu %xmm10,32 + 0(%rdi) 2407 movdqu %xmm15,48 + 0(%rdi) 2408 movdqu 0 + 64(%rsi),%xmm3 2409 movdqu 16 + 64(%rsi),%xmm7 2410 movdqu 32 + 64(%rsi),%xmm11 2411 movdqu 48 + 64(%rsi),%xmm15 2412 pxor %xmm3,%xmm1 2413 pxor %xmm7,%xmm5 2414 pxor %xmm11,%xmm9 2415 pxor %xmm13,%xmm15 2416 movdqu %xmm1,0 + 64(%rdi) 2417 movdqu %xmm5,16 + 64(%rdi) 2418 movdqu %xmm9,32 + 64(%rdi) 2419 movdqu %xmm15,48 + 64(%rdi) 2420 2421 cmpq $192,%rbx 2422 ja .Lseal_sse_main_init 2423 movq $128,%rcx 2424 subq $128,%rbx 2425 leaq 128(%rsi),%rsi 2426 jmp .Lseal_sse_128_tail_hash 2427.Lseal_sse_main_init: 2428 movdqu 0 + 128(%rsi),%xmm3 2429 movdqu 16 + 128(%rsi),%xmm7 2430 movdqu 32 + 128(%rsi),%xmm11 2431 movdqu 48 + 128(%rsi),%xmm15 2432 pxor %xmm3,%xmm0 2433 pxor %xmm7,%xmm4 2434 pxor %xmm11,%xmm8 2435 pxor %xmm12,%xmm15 2436 movdqu %xmm0,0 + 128(%rdi) 2437 movdqu %xmm4,16 + 128(%rdi) 2438 movdqu %xmm8,32 + 128(%rdi) 2439 movdqu %xmm15,48 + 128(%rdi) 2440 2441 movq $192,%rcx 2442 subq $192,%rbx 2443 leaq 192(%rsi),%rsi 2444 movq $2,%rcx 2445 movq $8,%r8 2446 cmpq $64,%rbx 2447 jbe .Lseal_sse_tail_64 2448 cmpq $128,%rbx 2449 jbe .Lseal_sse_tail_128 2450 cmpq $192,%rbx 2451 jbe .Lseal_sse_tail_192 2452 2453.Lseal_sse_main_loop: 2454 movdqa .Lchacha20_consts(%rip),%xmm0 2455 movdqa 0+48(%rbp),%xmm4 2456 movdqa 0+64(%rbp),%xmm8 2457 movdqa %xmm0,%xmm1 2458 movdqa %xmm4,%xmm5 2459 movdqa %xmm8,%xmm9 2460 movdqa %xmm0,%xmm2 2461 movdqa %xmm4,%xmm6 2462 movdqa %xmm8,%xmm10 2463 movdqa %xmm0,%xmm3 2464 movdqa %xmm4,%xmm7 2465 movdqa %xmm8,%xmm11 2466 movdqa 0+96(%rbp),%xmm15 2467 paddd .Lsse_inc(%rip),%xmm15 2468 movdqa %xmm15,%xmm14 2469 paddd .Lsse_inc(%rip),%xmm14 2470 movdqa %xmm14,%xmm13 2471 paddd .Lsse_inc(%rip),%xmm13 2472 movdqa %xmm13,%xmm12 2473 paddd .Lsse_inc(%rip),%xmm12 2474 movdqa %xmm12,0+96(%rbp) 2475 movdqa %xmm13,0+112(%rbp) 2476 movdqa %xmm14,0+128(%rbp) 2477 movdqa %xmm15,0+144(%rbp) 2478 2479.align 32 2480.Lseal_sse_main_rounds: 2481 movdqa %xmm8,0+80(%rbp) 2482 movdqa .Lrol16(%rip),%xmm8 2483 paddd %xmm7,%xmm3 2484 paddd %xmm6,%xmm2 2485 paddd %xmm5,%xmm1 2486 paddd %xmm4,%xmm0 2487 pxor %xmm3,%xmm15 2488 pxor %xmm2,%xmm14 2489 pxor %xmm1,%xmm13 2490 pxor %xmm0,%xmm12 2491.byte 102,69,15,56,0,248 2492.byte 102,69,15,56,0,240 2493.byte 102,69,15,56,0,232 2494.byte 102,69,15,56,0,224 2495 movdqa 0+80(%rbp),%xmm8 2496 paddd %xmm15,%xmm11 2497 paddd %xmm14,%xmm10 2498 paddd %xmm13,%xmm9 2499 paddd %xmm12,%xmm8 2500 pxor %xmm11,%xmm7 2501 addq 0+0(%rdi),%r10 2502 adcq 8+0(%rdi),%r11 2503 adcq $1,%r12 2504 pxor %xmm10,%xmm6 2505 pxor %xmm9,%xmm5 2506 pxor %xmm8,%xmm4 2507 movdqa %xmm8,0+80(%rbp) 2508 movdqa %xmm7,%xmm8 2509 psrld $20,%xmm8 2510 pslld $32-20,%xmm7 2511 pxor %xmm8,%xmm7 2512 movdqa %xmm6,%xmm8 2513 psrld $20,%xmm8 2514 pslld $32-20,%xmm6 2515 pxor %xmm8,%xmm6 2516 movdqa %xmm5,%xmm8 2517 psrld $20,%xmm8 2518 pslld $32-20,%xmm5 2519 pxor %xmm8,%xmm5 2520 movdqa %xmm4,%xmm8 2521 psrld $20,%xmm8 2522 pslld $32-20,%xmm4 2523 pxor %xmm8,%xmm4 2524 movq 0+0+0(%rbp),%rax 2525 movq %rax,%r15 2526 mulq %r10 2527 movq %rax,%r13 2528 movq %rdx,%r14 2529 movq 0+0+0(%rbp),%rax 2530 mulq %r11 2531 imulq %r12,%r15 2532 addq %rax,%r14 2533 adcq %rdx,%r15 2534 movdqa .Lrol8(%rip),%xmm8 2535 paddd %xmm7,%xmm3 2536 paddd %xmm6,%xmm2 2537 paddd %xmm5,%xmm1 2538 paddd %xmm4,%xmm0 2539 pxor %xmm3,%xmm15 2540 pxor %xmm2,%xmm14 2541 pxor %xmm1,%xmm13 2542 pxor %xmm0,%xmm12 2543.byte 102,69,15,56,0,248 2544.byte 102,69,15,56,0,240 2545.byte 102,69,15,56,0,232 2546.byte 102,69,15,56,0,224 2547 movdqa 0+80(%rbp),%xmm8 2548 paddd %xmm15,%xmm11 2549 paddd %xmm14,%xmm10 2550 paddd %xmm13,%xmm9 2551 paddd %xmm12,%xmm8 2552 pxor %xmm11,%xmm7 2553 pxor %xmm10,%xmm6 2554 movq 8+0+0(%rbp),%rax 2555 movq %rax,%r9 2556 mulq %r10 2557 addq %rax,%r14 2558 adcq $0,%rdx 2559 movq %rdx,%r10 2560 movq 8+0+0(%rbp),%rax 2561 mulq %r11 2562 addq %rax,%r15 2563 adcq $0,%rdx 2564 pxor %xmm9,%xmm5 2565 pxor %xmm8,%xmm4 2566 movdqa %xmm8,0+80(%rbp) 2567 movdqa %xmm7,%xmm8 2568 psrld $25,%xmm8 2569 pslld $32-25,%xmm7 2570 pxor %xmm8,%xmm7 2571 movdqa %xmm6,%xmm8 2572 psrld $25,%xmm8 2573 pslld $32-25,%xmm6 2574 pxor %xmm8,%xmm6 2575 movdqa %xmm5,%xmm8 2576 psrld $25,%xmm8 2577 pslld $32-25,%xmm5 2578 pxor %xmm8,%xmm5 2579 movdqa %xmm4,%xmm8 2580 psrld $25,%xmm8 2581 pslld $32-25,%xmm4 2582 pxor %xmm8,%xmm4 2583 movdqa 0+80(%rbp),%xmm8 2584 imulq %r12,%r9 2585 addq %r10,%r15 2586 adcq %rdx,%r9 2587.byte 102,15,58,15,255,4 2588.byte 102,69,15,58,15,219,8 2589.byte 102,69,15,58,15,255,12 2590.byte 102,15,58,15,246,4 2591.byte 102,69,15,58,15,210,8 2592.byte 102,69,15,58,15,246,12 2593.byte 102,15,58,15,237,4 2594.byte 102,69,15,58,15,201,8 2595.byte 102,69,15,58,15,237,12 2596.byte 102,15,58,15,228,4 2597.byte 102,69,15,58,15,192,8 2598.byte 102,69,15,58,15,228,12 2599 movdqa %xmm8,0+80(%rbp) 2600 movdqa .Lrol16(%rip),%xmm8 2601 paddd %xmm7,%xmm3 2602 paddd %xmm6,%xmm2 2603 paddd %xmm5,%xmm1 2604 paddd %xmm4,%xmm0 2605 pxor %xmm3,%xmm15 2606 pxor %xmm2,%xmm14 2607 movq %r13,%r10 2608 movq %r14,%r11 2609 movq %r15,%r12 2610 andq $3,%r12 2611 movq %r15,%r13 2612 andq $-4,%r13 2613 movq %r9,%r14 2614 shrdq $2,%r9,%r15 2615 shrq $2,%r9 2616 addq %r13,%r15 2617 adcq %r14,%r9 2618 addq %r15,%r10 2619 adcq %r9,%r11 2620 adcq $0,%r12 2621 pxor %xmm1,%xmm13 2622 pxor %xmm0,%xmm12 2623.byte 102,69,15,56,0,248 2624.byte 102,69,15,56,0,240 2625.byte 102,69,15,56,0,232 2626.byte 102,69,15,56,0,224 2627 movdqa 0+80(%rbp),%xmm8 2628 paddd %xmm15,%xmm11 2629 paddd %xmm14,%xmm10 2630 paddd %xmm13,%xmm9 2631 paddd %xmm12,%xmm8 2632 pxor %xmm11,%xmm7 2633 pxor %xmm10,%xmm6 2634 pxor %xmm9,%xmm5 2635 pxor %xmm8,%xmm4 2636 movdqa %xmm8,0+80(%rbp) 2637 movdqa %xmm7,%xmm8 2638 psrld $20,%xmm8 2639 pslld $32-20,%xmm7 2640 pxor %xmm8,%xmm7 2641 movdqa %xmm6,%xmm8 2642 psrld $20,%xmm8 2643 pslld $32-20,%xmm6 2644 pxor %xmm8,%xmm6 2645 movdqa %xmm5,%xmm8 2646 psrld $20,%xmm8 2647 pslld $32-20,%xmm5 2648 pxor %xmm8,%xmm5 2649 movdqa %xmm4,%xmm8 2650 psrld $20,%xmm8 2651 pslld $32-20,%xmm4 2652 pxor %xmm8,%xmm4 2653 movdqa .Lrol8(%rip),%xmm8 2654 paddd %xmm7,%xmm3 2655 paddd %xmm6,%xmm2 2656 paddd %xmm5,%xmm1 2657 paddd %xmm4,%xmm0 2658 pxor %xmm3,%xmm15 2659 pxor %xmm2,%xmm14 2660 pxor %xmm1,%xmm13 2661 pxor %xmm0,%xmm12 2662.byte 102,69,15,56,0,248 2663.byte 102,69,15,56,0,240 2664.byte 102,69,15,56,0,232 2665.byte 102,69,15,56,0,224 2666 movdqa 0+80(%rbp),%xmm8 2667 paddd %xmm15,%xmm11 2668 paddd %xmm14,%xmm10 2669 paddd %xmm13,%xmm9 2670 paddd %xmm12,%xmm8 2671 pxor %xmm11,%xmm7 2672 pxor %xmm10,%xmm6 2673 pxor %xmm9,%xmm5 2674 pxor %xmm8,%xmm4 2675 movdqa %xmm8,0+80(%rbp) 2676 movdqa %xmm7,%xmm8 2677 psrld $25,%xmm8 2678 pslld $32-25,%xmm7 2679 pxor %xmm8,%xmm7 2680 movdqa %xmm6,%xmm8 2681 psrld $25,%xmm8 2682 pslld $32-25,%xmm6 2683 pxor %xmm8,%xmm6 2684 movdqa %xmm5,%xmm8 2685 psrld $25,%xmm8 2686 pslld $32-25,%xmm5 2687 pxor %xmm8,%xmm5 2688 movdqa %xmm4,%xmm8 2689 psrld $25,%xmm8 2690 pslld $32-25,%xmm4 2691 pxor %xmm8,%xmm4 2692 movdqa 0+80(%rbp),%xmm8 2693.byte 102,15,58,15,255,12 2694.byte 102,69,15,58,15,219,8 2695.byte 102,69,15,58,15,255,4 2696.byte 102,15,58,15,246,12 2697.byte 102,69,15,58,15,210,8 2698.byte 102,69,15,58,15,246,4 2699.byte 102,15,58,15,237,12 2700.byte 102,69,15,58,15,201,8 2701.byte 102,69,15,58,15,237,4 2702.byte 102,15,58,15,228,12 2703.byte 102,69,15,58,15,192,8 2704.byte 102,69,15,58,15,228,4 2705 2706 leaq 16(%rdi),%rdi 2707 decq %r8 2708 jge .Lseal_sse_main_rounds 2709 addq 0+0(%rdi),%r10 2710 adcq 8+0(%rdi),%r11 2711 adcq $1,%r12 2712 movq 0+0+0(%rbp),%rax 2713 movq %rax,%r15 2714 mulq %r10 2715 movq %rax,%r13 2716 movq %rdx,%r14 2717 movq 0+0+0(%rbp),%rax 2718 mulq %r11 2719 imulq %r12,%r15 2720 addq %rax,%r14 2721 adcq %rdx,%r15 2722 movq 8+0+0(%rbp),%rax 2723 movq %rax,%r9 2724 mulq %r10 2725 addq %rax,%r14 2726 adcq $0,%rdx 2727 movq %rdx,%r10 2728 movq 8+0+0(%rbp),%rax 2729 mulq %r11 2730 addq %rax,%r15 2731 adcq $0,%rdx 2732 imulq %r12,%r9 2733 addq %r10,%r15 2734 adcq %rdx,%r9 2735 movq %r13,%r10 2736 movq %r14,%r11 2737 movq %r15,%r12 2738 andq $3,%r12 2739 movq %r15,%r13 2740 andq $-4,%r13 2741 movq %r9,%r14 2742 shrdq $2,%r9,%r15 2743 shrq $2,%r9 2744 addq %r13,%r15 2745 adcq %r14,%r9 2746 addq %r15,%r10 2747 adcq %r9,%r11 2748 adcq $0,%r12 2749 2750 leaq 16(%rdi),%rdi 2751 decq %rcx 2752 jg .Lseal_sse_main_rounds 2753 paddd .Lchacha20_consts(%rip),%xmm3 2754 paddd 0+48(%rbp),%xmm7 2755 paddd 0+64(%rbp),%xmm11 2756 paddd 0+144(%rbp),%xmm15 2757 paddd .Lchacha20_consts(%rip),%xmm2 2758 paddd 0+48(%rbp),%xmm6 2759 paddd 0+64(%rbp),%xmm10 2760 paddd 0+128(%rbp),%xmm14 2761 paddd .Lchacha20_consts(%rip),%xmm1 2762 paddd 0+48(%rbp),%xmm5 2763 paddd 0+64(%rbp),%xmm9 2764 paddd 0+112(%rbp),%xmm13 2765 paddd .Lchacha20_consts(%rip),%xmm0 2766 paddd 0+48(%rbp),%xmm4 2767 paddd 0+64(%rbp),%xmm8 2768 paddd 0+96(%rbp),%xmm12 2769 2770 movdqa %xmm14,0+80(%rbp) 2771 movdqa %xmm14,0+80(%rbp) 2772 movdqu 0 + 0(%rsi),%xmm14 2773 pxor %xmm3,%xmm14 2774 movdqu %xmm14,0 + 0(%rdi) 2775 movdqu 16 + 0(%rsi),%xmm14 2776 pxor %xmm7,%xmm14 2777 movdqu %xmm14,16 + 0(%rdi) 2778 movdqu 32 + 0(%rsi),%xmm14 2779 pxor %xmm11,%xmm14 2780 movdqu %xmm14,32 + 0(%rdi) 2781 movdqu 48 + 0(%rsi),%xmm14 2782 pxor %xmm15,%xmm14 2783 movdqu %xmm14,48 + 0(%rdi) 2784 2785 movdqa 0+80(%rbp),%xmm14 2786 movdqu 0 + 64(%rsi),%xmm3 2787 movdqu 16 + 64(%rsi),%xmm7 2788 movdqu 32 + 64(%rsi),%xmm11 2789 movdqu 48 + 64(%rsi),%xmm15 2790 pxor %xmm3,%xmm2 2791 pxor %xmm7,%xmm6 2792 pxor %xmm11,%xmm10 2793 pxor %xmm14,%xmm15 2794 movdqu %xmm2,0 + 64(%rdi) 2795 movdqu %xmm6,16 + 64(%rdi) 2796 movdqu %xmm10,32 + 64(%rdi) 2797 movdqu %xmm15,48 + 64(%rdi) 2798 movdqu 0 + 128(%rsi),%xmm3 2799 movdqu 16 + 128(%rsi),%xmm7 2800 movdqu 32 + 128(%rsi),%xmm11 2801 movdqu 48 + 128(%rsi),%xmm15 2802 pxor %xmm3,%xmm1 2803 pxor %xmm7,%xmm5 2804 pxor %xmm11,%xmm9 2805 pxor %xmm13,%xmm15 2806 movdqu %xmm1,0 + 128(%rdi) 2807 movdqu %xmm5,16 + 128(%rdi) 2808 movdqu %xmm9,32 + 128(%rdi) 2809 movdqu %xmm15,48 + 128(%rdi) 2810 2811 cmpq $256,%rbx 2812 ja .Lseal_sse_main_loop_xor 2813 2814 movq $192,%rcx 2815 subq $192,%rbx 2816 leaq 192(%rsi),%rsi 2817 jmp .Lseal_sse_128_tail_hash 2818.Lseal_sse_main_loop_xor: 2819 movdqu 0 + 192(%rsi),%xmm3 2820 movdqu 16 + 192(%rsi),%xmm7 2821 movdqu 32 + 192(%rsi),%xmm11 2822 movdqu 48 + 192(%rsi),%xmm15 2823 pxor %xmm3,%xmm0 2824 pxor %xmm7,%xmm4 2825 pxor %xmm11,%xmm8 2826 pxor %xmm12,%xmm15 2827 movdqu %xmm0,0 + 192(%rdi) 2828 movdqu %xmm4,16 + 192(%rdi) 2829 movdqu %xmm8,32 + 192(%rdi) 2830 movdqu %xmm15,48 + 192(%rdi) 2831 2832 leaq 256(%rsi),%rsi 2833 subq $256,%rbx 2834 movq $6,%rcx 2835 movq $4,%r8 2836 cmpq $192,%rbx 2837 jg .Lseal_sse_main_loop 2838 movq %rbx,%rcx 2839 testq %rbx,%rbx 2840 je .Lseal_sse_128_tail_hash 2841 movq $6,%rcx 2842 cmpq $128,%rbx 2843 ja .Lseal_sse_tail_192 2844 cmpq $64,%rbx 2845 ja .Lseal_sse_tail_128 2846 2847.Lseal_sse_tail_64: 2848 movdqa .Lchacha20_consts(%rip),%xmm0 2849 movdqa 0+48(%rbp),%xmm4 2850 movdqa 0+64(%rbp),%xmm8 2851 movdqa 0+96(%rbp),%xmm12 2852 paddd .Lsse_inc(%rip),%xmm12 2853 movdqa %xmm12,0+96(%rbp) 2854 2855.Lseal_sse_tail_64_rounds_and_x2hash: 2856 addq 0+0(%rdi),%r10 2857 adcq 8+0(%rdi),%r11 2858 adcq $1,%r12 2859 movq 0+0+0(%rbp),%rax 2860 movq %rax,%r15 2861 mulq %r10 2862 movq %rax,%r13 2863 movq %rdx,%r14 2864 movq 0+0+0(%rbp),%rax 2865 mulq %r11 2866 imulq %r12,%r15 2867 addq %rax,%r14 2868 adcq %rdx,%r15 2869 movq 8+0+0(%rbp),%rax 2870 movq %rax,%r9 2871 mulq %r10 2872 addq %rax,%r14 2873 adcq $0,%rdx 2874 movq %rdx,%r10 2875 movq 8+0+0(%rbp),%rax 2876 mulq %r11 2877 addq %rax,%r15 2878 adcq $0,%rdx 2879 imulq %r12,%r9 2880 addq %r10,%r15 2881 adcq %rdx,%r9 2882 movq %r13,%r10 2883 movq %r14,%r11 2884 movq %r15,%r12 2885 andq $3,%r12 2886 movq %r15,%r13 2887 andq $-4,%r13 2888 movq %r9,%r14 2889 shrdq $2,%r9,%r15 2890 shrq $2,%r9 2891 addq %r13,%r15 2892 adcq %r14,%r9 2893 addq %r15,%r10 2894 adcq %r9,%r11 2895 adcq $0,%r12 2896 2897 leaq 16(%rdi),%rdi 2898.Lseal_sse_tail_64_rounds_and_x1hash: 2899 paddd %xmm4,%xmm0 2900 pxor %xmm0,%xmm12 2901 pshufb .Lrol16(%rip),%xmm12 2902 paddd %xmm12,%xmm8 2903 pxor %xmm8,%xmm4 2904 movdqa %xmm4,%xmm3 2905 pslld $12,%xmm3 2906 psrld $20,%xmm4 2907 pxor %xmm3,%xmm4 2908 paddd %xmm4,%xmm0 2909 pxor %xmm0,%xmm12 2910 pshufb .Lrol8(%rip),%xmm12 2911 paddd %xmm12,%xmm8 2912 pxor %xmm8,%xmm4 2913 movdqa %xmm4,%xmm3 2914 pslld $7,%xmm3 2915 psrld $25,%xmm4 2916 pxor %xmm3,%xmm4 2917.byte 102,15,58,15,228,4 2918.byte 102,69,15,58,15,192,8 2919.byte 102,69,15,58,15,228,12 2920 paddd %xmm4,%xmm0 2921 pxor %xmm0,%xmm12 2922 pshufb .Lrol16(%rip),%xmm12 2923 paddd %xmm12,%xmm8 2924 pxor %xmm8,%xmm4 2925 movdqa %xmm4,%xmm3 2926 pslld $12,%xmm3 2927 psrld $20,%xmm4 2928 pxor %xmm3,%xmm4 2929 paddd %xmm4,%xmm0 2930 pxor %xmm0,%xmm12 2931 pshufb .Lrol8(%rip),%xmm12 2932 paddd %xmm12,%xmm8 2933 pxor %xmm8,%xmm4 2934 movdqa %xmm4,%xmm3 2935 pslld $7,%xmm3 2936 psrld $25,%xmm4 2937 pxor %xmm3,%xmm4 2938.byte 102,15,58,15,228,12 2939.byte 102,69,15,58,15,192,8 2940.byte 102,69,15,58,15,228,4 2941 addq 0+0(%rdi),%r10 2942 adcq 8+0(%rdi),%r11 2943 adcq $1,%r12 2944 movq 0+0+0(%rbp),%rax 2945 movq %rax,%r15 2946 mulq %r10 2947 movq %rax,%r13 2948 movq %rdx,%r14 2949 movq 0+0+0(%rbp),%rax 2950 mulq %r11 2951 imulq %r12,%r15 2952 addq %rax,%r14 2953 adcq %rdx,%r15 2954 movq 8+0+0(%rbp),%rax 2955 movq %rax,%r9 2956 mulq %r10 2957 addq %rax,%r14 2958 adcq $0,%rdx 2959 movq %rdx,%r10 2960 movq 8+0+0(%rbp),%rax 2961 mulq %r11 2962 addq %rax,%r15 2963 adcq $0,%rdx 2964 imulq %r12,%r9 2965 addq %r10,%r15 2966 adcq %rdx,%r9 2967 movq %r13,%r10 2968 movq %r14,%r11 2969 movq %r15,%r12 2970 andq $3,%r12 2971 movq %r15,%r13 2972 andq $-4,%r13 2973 movq %r9,%r14 2974 shrdq $2,%r9,%r15 2975 shrq $2,%r9 2976 addq %r13,%r15 2977 adcq %r14,%r9 2978 addq %r15,%r10 2979 adcq %r9,%r11 2980 adcq $0,%r12 2981 2982 leaq 16(%rdi),%rdi 2983 decq %rcx 2984 jg .Lseal_sse_tail_64_rounds_and_x2hash 2985 decq %r8 2986 jge .Lseal_sse_tail_64_rounds_and_x1hash 2987 paddd .Lchacha20_consts(%rip),%xmm0 2988 paddd 0+48(%rbp),%xmm4 2989 paddd 0+64(%rbp),%xmm8 2990 paddd 0+96(%rbp),%xmm12 2991 2992 jmp .Lseal_sse_128_tail_xor 2993 2994.Lseal_sse_tail_128: 2995 movdqa .Lchacha20_consts(%rip),%xmm0 2996 movdqa 0+48(%rbp),%xmm4 2997 movdqa 0+64(%rbp),%xmm8 2998 movdqa %xmm0,%xmm1 2999 movdqa %xmm4,%xmm5 3000 movdqa %xmm8,%xmm9 3001 movdqa 0+96(%rbp),%xmm13 3002 paddd .Lsse_inc(%rip),%xmm13 3003 movdqa %xmm13,%xmm12 3004 paddd .Lsse_inc(%rip),%xmm12 3005 movdqa %xmm12,0+96(%rbp) 3006 movdqa %xmm13,0+112(%rbp) 3007 3008.Lseal_sse_tail_128_rounds_and_x2hash: 3009 addq 0+0(%rdi),%r10 3010 adcq 8+0(%rdi),%r11 3011 adcq $1,%r12 3012 movq 0+0+0(%rbp),%rax 3013 movq %rax,%r15 3014 mulq %r10 3015 movq %rax,%r13 3016 movq %rdx,%r14 3017 movq 0+0+0(%rbp),%rax 3018 mulq %r11 3019 imulq %r12,%r15 3020 addq %rax,%r14 3021 adcq %rdx,%r15 3022 movq 8+0+0(%rbp),%rax 3023 movq %rax,%r9 3024 mulq %r10 3025 addq %rax,%r14 3026 adcq $0,%rdx 3027 movq %rdx,%r10 3028 movq 8+0+0(%rbp),%rax 3029 mulq %r11 3030 addq %rax,%r15 3031 adcq $0,%rdx 3032 imulq %r12,%r9 3033 addq %r10,%r15 3034 adcq %rdx,%r9 3035 movq %r13,%r10 3036 movq %r14,%r11 3037 movq %r15,%r12 3038 andq $3,%r12 3039 movq %r15,%r13 3040 andq $-4,%r13 3041 movq %r9,%r14 3042 shrdq $2,%r9,%r15 3043 shrq $2,%r9 3044 addq %r13,%r15 3045 adcq %r14,%r9 3046 addq %r15,%r10 3047 adcq %r9,%r11 3048 adcq $0,%r12 3049 3050 leaq 16(%rdi),%rdi 3051.Lseal_sse_tail_128_rounds_and_x1hash: 3052 paddd %xmm4,%xmm0 3053 pxor %xmm0,%xmm12 3054 pshufb .Lrol16(%rip),%xmm12 3055 paddd %xmm12,%xmm8 3056 pxor %xmm8,%xmm4 3057 movdqa %xmm4,%xmm3 3058 pslld $12,%xmm3 3059 psrld $20,%xmm4 3060 pxor %xmm3,%xmm4 3061 paddd %xmm4,%xmm0 3062 pxor %xmm0,%xmm12 3063 pshufb .Lrol8(%rip),%xmm12 3064 paddd %xmm12,%xmm8 3065 pxor %xmm8,%xmm4 3066 movdqa %xmm4,%xmm3 3067 pslld $7,%xmm3 3068 psrld $25,%xmm4 3069 pxor %xmm3,%xmm4 3070.byte 102,15,58,15,228,4 3071.byte 102,69,15,58,15,192,8 3072.byte 102,69,15,58,15,228,12 3073 paddd %xmm5,%xmm1 3074 pxor %xmm1,%xmm13 3075 pshufb .Lrol16(%rip),%xmm13 3076 paddd %xmm13,%xmm9 3077 pxor %xmm9,%xmm5 3078 movdqa %xmm5,%xmm3 3079 pslld $12,%xmm3 3080 psrld $20,%xmm5 3081 pxor %xmm3,%xmm5 3082 paddd %xmm5,%xmm1 3083 pxor %xmm1,%xmm13 3084 pshufb .Lrol8(%rip),%xmm13 3085 paddd %xmm13,%xmm9 3086 pxor %xmm9,%xmm5 3087 movdqa %xmm5,%xmm3 3088 pslld $7,%xmm3 3089 psrld $25,%xmm5 3090 pxor %xmm3,%xmm5 3091.byte 102,15,58,15,237,4 3092.byte 102,69,15,58,15,201,8 3093.byte 102,69,15,58,15,237,12 3094 addq 0+0(%rdi),%r10 3095 adcq 8+0(%rdi),%r11 3096 adcq $1,%r12 3097 movq 0+0+0(%rbp),%rax 3098 movq %rax,%r15 3099 mulq %r10 3100 movq %rax,%r13 3101 movq %rdx,%r14 3102 movq 0+0+0(%rbp),%rax 3103 mulq %r11 3104 imulq %r12,%r15 3105 addq %rax,%r14 3106 adcq %rdx,%r15 3107 movq 8+0+0(%rbp),%rax 3108 movq %rax,%r9 3109 mulq %r10 3110 addq %rax,%r14 3111 adcq $0,%rdx 3112 movq %rdx,%r10 3113 movq 8+0+0(%rbp),%rax 3114 mulq %r11 3115 addq %rax,%r15 3116 adcq $0,%rdx 3117 imulq %r12,%r9 3118 addq %r10,%r15 3119 adcq %rdx,%r9 3120 movq %r13,%r10 3121 movq %r14,%r11 3122 movq %r15,%r12 3123 andq $3,%r12 3124 movq %r15,%r13 3125 andq $-4,%r13 3126 movq %r9,%r14 3127 shrdq $2,%r9,%r15 3128 shrq $2,%r9 3129 addq %r13,%r15 3130 adcq %r14,%r9 3131 addq %r15,%r10 3132 adcq %r9,%r11 3133 adcq $0,%r12 3134 paddd %xmm4,%xmm0 3135 pxor %xmm0,%xmm12 3136 pshufb .Lrol16(%rip),%xmm12 3137 paddd %xmm12,%xmm8 3138 pxor %xmm8,%xmm4 3139 movdqa %xmm4,%xmm3 3140 pslld $12,%xmm3 3141 psrld $20,%xmm4 3142 pxor %xmm3,%xmm4 3143 paddd %xmm4,%xmm0 3144 pxor %xmm0,%xmm12 3145 pshufb .Lrol8(%rip),%xmm12 3146 paddd %xmm12,%xmm8 3147 pxor %xmm8,%xmm4 3148 movdqa %xmm4,%xmm3 3149 pslld $7,%xmm3 3150 psrld $25,%xmm4 3151 pxor %xmm3,%xmm4 3152.byte 102,15,58,15,228,12 3153.byte 102,69,15,58,15,192,8 3154.byte 102,69,15,58,15,228,4 3155 paddd %xmm5,%xmm1 3156 pxor %xmm1,%xmm13 3157 pshufb .Lrol16(%rip),%xmm13 3158 paddd %xmm13,%xmm9 3159 pxor %xmm9,%xmm5 3160 movdqa %xmm5,%xmm3 3161 pslld $12,%xmm3 3162 psrld $20,%xmm5 3163 pxor %xmm3,%xmm5 3164 paddd %xmm5,%xmm1 3165 pxor %xmm1,%xmm13 3166 pshufb .Lrol8(%rip),%xmm13 3167 paddd %xmm13,%xmm9 3168 pxor %xmm9,%xmm5 3169 movdqa %xmm5,%xmm3 3170 pslld $7,%xmm3 3171 psrld $25,%xmm5 3172 pxor %xmm3,%xmm5 3173.byte 102,15,58,15,237,12 3174.byte 102,69,15,58,15,201,8 3175.byte 102,69,15,58,15,237,4 3176 3177 leaq 16(%rdi),%rdi 3178 decq %rcx 3179 jg .Lseal_sse_tail_128_rounds_and_x2hash 3180 decq %r8 3181 jge .Lseal_sse_tail_128_rounds_and_x1hash 3182 paddd .Lchacha20_consts(%rip),%xmm1 3183 paddd 0+48(%rbp),%xmm5 3184 paddd 0+64(%rbp),%xmm9 3185 paddd 0+112(%rbp),%xmm13 3186 paddd .Lchacha20_consts(%rip),%xmm0 3187 paddd 0+48(%rbp),%xmm4 3188 paddd 0+64(%rbp),%xmm8 3189 paddd 0+96(%rbp),%xmm12 3190 movdqu 0 + 0(%rsi),%xmm3 3191 movdqu 16 + 0(%rsi),%xmm7 3192 movdqu 32 + 0(%rsi),%xmm11 3193 movdqu 48 + 0(%rsi),%xmm15 3194 pxor %xmm3,%xmm1 3195 pxor %xmm7,%xmm5 3196 pxor %xmm11,%xmm9 3197 pxor %xmm13,%xmm15 3198 movdqu %xmm1,0 + 0(%rdi) 3199 movdqu %xmm5,16 + 0(%rdi) 3200 movdqu %xmm9,32 + 0(%rdi) 3201 movdqu %xmm15,48 + 0(%rdi) 3202 3203 movq $64,%rcx 3204 subq $64,%rbx 3205 leaq 64(%rsi),%rsi 3206 jmp .Lseal_sse_128_tail_hash 3207 3208.Lseal_sse_tail_192: 3209 movdqa .Lchacha20_consts(%rip),%xmm0 3210 movdqa 0+48(%rbp),%xmm4 3211 movdqa 0+64(%rbp),%xmm8 3212 movdqa %xmm0,%xmm1 3213 movdqa %xmm4,%xmm5 3214 movdqa %xmm8,%xmm9 3215 movdqa %xmm0,%xmm2 3216 movdqa %xmm4,%xmm6 3217 movdqa %xmm8,%xmm10 3218 movdqa 0+96(%rbp),%xmm14 3219 paddd .Lsse_inc(%rip),%xmm14 3220 movdqa %xmm14,%xmm13 3221 paddd .Lsse_inc(%rip),%xmm13 3222 movdqa %xmm13,%xmm12 3223 paddd .Lsse_inc(%rip),%xmm12 3224 movdqa %xmm12,0+96(%rbp) 3225 movdqa %xmm13,0+112(%rbp) 3226 movdqa %xmm14,0+128(%rbp) 3227 3228.Lseal_sse_tail_192_rounds_and_x2hash: 3229 addq 0+0(%rdi),%r10 3230 adcq 8+0(%rdi),%r11 3231 adcq $1,%r12 3232 movq 0+0+0(%rbp),%rax 3233 movq %rax,%r15 3234 mulq %r10 3235 movq %rax,%r13 3236 movq %rdx,%r14 3237 movq 0+0+0(%rbp),%rax 3238 mulq %r11 3239 imulq %r12,%r15 3240 addq %rax,%r14 3241 adcq %rdx,%r15 3242 movq 8+0+0(%rbp),%rax 3243 movq %rax,%r9 3244 mulq %r10 3245 addq %rax,%r14 3246 adcq $0,%rdx 3247 movq %rdx,%r10 3248 movq 8+0+0(%rbp),%rax 3249 mulq %r11 3250 addq %rax,%r15 3251 adcq $0,%rdx 3252 imulq %r12,%r9 3253 addq %r10,%r15 3254 adcq %rdx,%r9 3255 movq %r13,%r10 3256 movq %r14,%r11 3257 movq %r15,%r12 3258 andq $3,%r12 3259 movq %r15,%r13 3260 andq $-4,%r13 3261 movq %r9,%r14 3262 shrdq $2,%r9,%r15 3263 shrq $2,%r9 3264 addq %r13,%r15 3265 adcq %r14,%r9 3266 addq %r15,%r10 3267 adcq %r9,%r11 3268 adcq $0,%r12 3269 3270 leaq 16(%rdi),%rdi 3271.Lseal_sse_tail_192_rounds_and_x1hash: 3272 paddd %xmm4,%xmm0 3273 pxor %xmm0,%xmm12 3274 pshufb .Lrol16(%rip),%xmm12 3275 paddd %xmm12,%xmm8 3276 pxor %xmm8,%xmm4 3277 movdqa %xmm4,%xmm3 3278 pslld $12,%xmm3 3279 psrld $20,%xmm4 3280 pxor %xmm3,%xmm4 3281 paddd %xmm4,%xmm0 3282 pxor %xmm0,%xmm12 3283 pshufb .Lrol8(%rip),%xmm12 3284 paddd %xmm12,%xmm8 3285 pxor %xmm8,%xmm4 3286 movdqa %xmm4,%xmm3 3287 pslld $7,%xmm3 3288 psrld $25,%xmm4 3289 pxor %xmm3,%xmm4 3290.byte 102,15,58,15,228,4 3291.byte 102,69,15,58,15,192,8 3292.byte 102,69,15,58,15,228,12 3293 paddd %xmm5,%xmm1 3294 pxor %xmm1,%xmm13 3295 pshufb .Lrol16(%rip),%xmm13 3296 paddd %xmm13,%xmm9 3297 pxor %xmm9,%xmm5 3298 movdqa %xmm5,%xmm3 3299 pslld $12,%xmm3 3300 psrld $20,%xmm5 3301 pxor %xmm3,%xmm5 3302 paddd %xmm5,%xmm1 3303 pxor %xmm1,%xmm13 3304 pshufb .Lrol8(%rip),%xmm13 3305 paddd %xmm13,%xmm9 3306 pxor %xmm9,%xmm5 3307 movdqa %xmm5,%xmm3 3308 pslld $7,%xmm3 3309 psrld $25,%xmm5 3310 pxor %xmm3,%xmm5 3311.byte 102,15,58,15,237,4 3312.byte 102,69,15,58,15,201,8 3313.byte 102,69,15,58,15,237,12 3314 paddd %xmm6,%xmm2 3315 pxor %xmm2,%xmm14 3316 pshufb .Lrol16(%rip),%xmm14 3317 paddd %xmm14,%xmm10 3318 pxor %xmm10,%xmm6 3319 movdqa %xmm6,%xmm3 3320 pslld $12,%xmm3 3321 psrld $20,%xmm6 3322 pxor %xmm3,%xmm6 3323 paddd %xmm6,%xmm2 3324 pxor %xmm2,%xmm14 3325 pshufb .Lrol8(%rip),%xmm14 3326 paddd %xmm14,%xmm10 3327 pxor %xmm10,%xmm6 3328 movdqa %xmm6,%xmm3 3329 pslld $7,%xmm3 3330 psrld $25,%xmm6 3331 pxor %xmm3,%xmm6 3332.byte 102,15,58,15,246,4 3333.byte 102,69,15,58,15,210,8 3334.byte 102,69,15,58,15,246,12 3335 addq 0+0(%rdi),%r10 3336 adcq 8+0(%rdi),%r11 3337 adcq $1,%r12 3338 movq 0+0+0(%rbp),%rax 3339 movq %rax,%r15 3340 mulq %r10 3341 movq %rax,%r13 3342 movq %rdx,%r14 3343 movq 0+0+0(%rbp),%rax 3344 mulq %r11 3345 imulq %r12,%r15 3346 addq %rax,%r14 3347 adcq %rdx,%r15 3348 movq 8+0+0(%rbp),%rax 3349 movq %rax,%r9 3350 mulq %r10 3351 addq %rax,%r14 3352 adcq $0,%rdx 3353 movq %rdx,%r10 3354 movq 8+0+0(%rbp),%rax 3355 mulq %r11 3356 addq %rax,%r15 3357 adcq $0,%rdx 3358 imulq %r12,%r9 3359 addq %r10,%r15 3360 adcq %rdx,%r9 3361 movq %r13,%r10 3362 movq %r14,%r11 3363 movq %r15,%r12 3364 andq $3,%r12 3365 movq %r15,%r13 3366 andq $-4,%r13 3367 movq %r9,%r14 3368 shrdq $2,%r9,%r15 3369 shrq $2,%r9 3370 addq %r13,%r15 3371 adcq %r14,%r9 3372 addq %r15,%r10 3373 adcq %r9,%r11 3374 adcq $0,%r12 3375 paddd %xmm4,%xmm0 3376 pxor %xmm0,%xmm12 3377 pshufb .Lrol16(%rip),%xmm12 3378 paddd %xmm12,%xmm8 3379 pxor %xmm8,%xmm4 3380 movdqa %xmm4,%xmm3 3381 pslld $12,%xmm3 3382 psrld $20,%xmm4 3383 pxor %xmm3,%xmm4 3384 paddd %xmm4,%xmm0 3385 pxor %xmm0,%xmm12 3386 pshufb .Lrol8(%rip),%xmm12 3387 paddd %xmm12,%xmm8 3388 pxor %xmm8,%xmm4 3389 movdqa %xmm4,%xmm3 3390 pslld $7,%xmm3 3391 psrld $25,%xmm4 3392 pxor %xmm3,%xmm4 3393.byte 102,15,58,15,228,12 3394.byte 102,69,15,58,15,192,8 3395.byte 102,69,15,58,15,228,4 3396 paddd %xmm5,%xmm1 3397 pxor %xmm1,%xmm13 3398 pshufb .Lrol16(%rip),%xmm13 3399 paddd %xmm13,%xmm9 3400 pxor %xmm9,%xmm5 3401 movdqa %xmm5,%xmm3 3402 pslld $12,%xmm3 3403 psrld $20,%xmm5 3404 pxor %xmm3,%xmm5 3405 paddd %xmm5,%xmm1 3406 pxor %xmm1,%xmm13 3407 pshufb .Lrol8(%rip),%xmm13 3408 paddd %xmm13,%xmm9 3409 pxor %xmm9,%xmm5 3410 movdqa %xmm5,%xmm3 3411 pslld $7,%xmm3 3412 psrld $25,%xmm5 3413 pxor %xmm3,%xmm5 3414.byte 102,15,58,15,237,12 3415.byte 102,69,15,58,15,201,8 3416.byte 102,69,15,58,15,237,4 3417 paddd %xmm6,%xmm2 3418 pxor %xmm2,%xmm14 3419 pshufb .Lrol16(%rip),%xmm14 3420 paddd %xmm14,%xmm10 3421 pxor %xmm10,%xmm6 3422 movdqa %xmm6,%xmm3 3423 pslld $12,%xmm3 3424 psrld $20,%xmm6 3425 pxor %xmm3,%xmm6 3426 paddd %xmm6,%xmm2 3427 pxor %xmm2,%xmm14 3428 pshufb .Lrol8(%rip),%xmm14 3429 paddd %xmm14,%xmm10 3430 pxor %xmm10,%xmm6 3431 movdqa %xmm6,%xmm3 3432 pslld $7,%xmm3 3433 psrld $25,%xmm6 3434 pxor %xmm3,%xmm6 3435.byte 102,15,58,15,246,12 3436.byte 102,69,15,58,15,210,8 3437.byte 102,69,15,58,15,246,4 3438 3439 leaq 16(%rdi),%rdi 3440 decq %rcx 3441 jg .Lseal_sse_tail_192_rounds_and_x2hash 3442 decq %r8 3443 jge .Lseal_sse_tail_192_rounds_and_x1hash 3444 paddd .Lchacha20_consts(%rip),%xmm2 3445 paddd 0+48(%rbp),%xmm6 3446 paddd 0+64(%rbp),%xmm10 3447 paddd 0+128(%rbp),%xmm14 3448 paddd .Lchacha20_consts(%rip),%xmm1 3449 paddd 0+48(%rbp),%xmm5 3450 paddd 0+64(%rbp),%xmm9 3451 paddd 0+112(%rbp),%xmm13 3452 paddd .Lchacha20_consts(%rip),%xmm0 3453 paddd 0+48(%rbp),%xmm4 3454 paddd 0+64(%rbp),%xmm8 3455 paddd 0+96(%rbp),%xmm12 3456 movdqu 0 + 0(%rsi),%xmm3 3457 movdqu 16 + 0(%rsi),%xmm7 3458 movdqu 32 + 0(%rsi),%xmm11 3459 movdqu 48 + 0(%rsi),%xmm15 3460 pxor %xmm3,%xmm2 3461 pxor %xmm7,%xmm6 3462 pxor %xmm11,%xmm10 3463 pxor %xmm14,%xmm15 3464 movdqu %xmm2,0 + 0(%rdi) 3465 movdqu %xmm6,16 + 0(%rdi) 3466 movdqu %xmm10,32 + 0(%rdi) 3467 movdqu %xmm15,48 + 0(%rdi) 3468 movdqu 0 + 64(%rsi),%xmm3 3469 movdqu 16 + 64(%rsi),%xmm7 3470 movdqu 32 + 64(%rsi),%xmm11 3471 movdqu 48 + 64(%rsi),%xmm15 3472 pxor %xmm3,%xmm1 3473 pxor %xmm7,%xmm5 3474 pxor %xmm11,%xmm9 3475 pxor %xmm13,%xmm15 3476 movdqu %xmm1,0 + 64(%rdi) 3477 movdqu %xmm5,16 + 64(%rdi) 3478 movdqu %xmm9,32 + 64(%rdi) 3479 movdqu %xmm15,48 + 64(%rdi) 3480 3481 movq $128,%rcx 3482 subq $128,%rbx 3483 leaq 128(%rsi),%rsi 3484 3485.Lseal_sse_128_tail_hash: 3486 cmpq $16,%rcx 3487 jb .Lseal_sse_128_tail_xor 3488 addq 0+0(%rdi),%r10 3489 adcq 8+0(%rdi),%r11 3490 adcq $1,%r12 3491 movq 0+0+0(%rbp),%rax 3492 movq %rax,%r15 3493 mulq %r10 3494 movq %rax,%r13 3495 movq %rdx,%r14 3496 movq 0+0+0(%rbp),%rax 3497 mulq %r11 3498 imulq %r12,%r15 3499 addq %rax,%r14 3500 adcq %rdx,%r15 3501 movq 8+0+0(%rbp),%rax 3502 movq %rax,%r9 3503 mulq %r10 3504 addq %rax,%r14 3505 adcq $0,%rdx 3506 movq %rdx,%r10 3507 movq 8+0+0(%rbp),%rax 3508 mulq %r11 3509 addq %rax,%r15 3510 adcq $0,%rdx 3511 imulq %r12,%r9 3512 addq %r10,%r15 3513 adcq %rdx,%r9 3514 movq %r13,%r10 3515 movq %r14,%r11 3516 movq %r15,%r12 3517 andq $3,%r12 3518 movq %r15,%r13 3519 andq $-4,%r13 3520 movq %r9,%r14 3521 shrdq $2,%r9,%r15 3522 shrq $2,%r9 3523 addq %r13,%r15 3524 adcq %r14,%r9 3525 addq %r15,%r10 3526 adcq %r9,%r11 3527 adcq $0,%r12 3528 3529 subq $16,%rcx 3530 leaq 16(%rdi),%rdi 3531 jmp .Lseal_sse_128_tail_hash 3532 3533.Lseal_sse_128_tail_xor: 3534 cmpq $16,%rbx 3535 jb .Lseal_sse_tail_16 3536 subq $16,%rbx 3537 3538 movdqu 0(%rsi),%xmm3 3539 pxor %xmm3,%xmm0 3540 movdqu %xmm0,0(%rdi) 3541 3542 addq 0(%rdi),%r10 3543 adcq 8(%rdi),%r11 3544 adcq $1,%r12 3545 leaq 16(%rsi),%rsi 3546 leaq 16(%rdi),%rdi 3547 movq 0+0+0(%rbp),%rax 3548 movq %rax,%r15 3549 mulq %r10 3550 movq %rax,%r13 3551 movq %rdx,%r14 3552 movq 0+0+0(%rbp),%rax 3553 mulq %r11 3554 imulq %r12,%r15 3555 addq %rax,%r14 3556 adcq %rdx,%r15 3557 movq 8+0+0(%rbp),%rax 3558 movq %rax,%r9 3559 mulq %r10 3560 addq %rax,%r14 3561 adcq $0,%rdx 3562 movq %rdx,%r10 3563 movq 8+0+0(%rbp),%rax 3564 mulq %r11 3565 addq %rax,%r15 3566 adcq $0,%rdx 3567 imulq %r12,%r9 3568 addq %r10,%r15 3569 adcq %rdx,%r9 3570 movq %r13,%r10 3571 movq %r14,%r11 3572 movq %r15,%r12 3573 andq $3,%r12 3574 movq %r15,%r13 3575 andq $-4,%r13 3576 movq %r9,%r14 3577 shrdq $2,%r9,%r15 3578 shrq $2,%r9 3579 addq %r13,%r15 3580 adcq %r14,%r9 3581 addq %r15,%r10 3582 adcq %r9,%r11 3583 adcq $0,%r12 3584 3585 3586 movdqa %xmm4,%xmm0 3587 movdqa %xmm8,%xmm4 3588 movdqa %xmm12,%xmm8 3589 movdqa %xmm1,%xmm12 3590 movdqa %xmm5,%xmm1 3591 movdqa %xmm9,%xmm5 3592 movdqa %xmm13,%xmm9 3593 jmp .Lseal_sse_128_tail_xor 3594 3595.Lseal_sse_tail_16: 3596 testq %rbx,%rbx 3597 jz .Lprocess_blocks_of_extra_in 3598 3599 movq %rbx,%r8 3600 movq %rbx,%rcx 3601 leaq -1(%rsi,%rbx,1),%rsi 3602 pxor %xmm15,%xmm15 3603.Lseal_sse_tail_16_compose: 3604 pslldq $1,%xmm15 3605 pinsrb $0,(%rsi),%xmm15 3606 leaq -1(%rsi),%rsi 3607 decq %rcx 3608 jne .Lseal_sse_tail_16_compose 3609 3610 3611 pxor %xmm0,%xmm15 3612 3613 3614 movq %rbx,%rcx 3615 movdqu %xmm15,%xmm0 3616.Lseal_sse_tail_16_extract: 3617 pextrb $0,%xmm0,(%rdi) 3618 psrldq $1,%xmm0 3619 addq $1,%rdi 3620 subq $1,%rcx 3621 jnz .Lseal_sse_tail_16_extract 3622 3623 3624 3625 3626 3627 3628 3629 3630 movq 288 + 0 + 32(%rsp),%r9 3631 movq 56(%r9),%r14 3632 movq 48(%r9),%r13 3633 testq %r14,%r14 3634 jz .Lprocess_partial_block 3635 3636 movq $16,%r15 3637 subq %rbx,%r15 3638 cmpq %r15,%r14 3639 3640 jge .Lload_extra_in 3641 movq %r14,%r15 3642 3643.Lload_extra_in: 3644 3645 3646 leaq -1(%r13,%r15,1),%rsi 3647 3648 3649 addq %r15,%r13 3650 subq %r15,%r14 3651 movq %r13,48(%r9) 3652 movq %r14,56(%r9) 3653 3654 3655 3656 addq %r15,%r8 3657 3658 3659 pxor %xmm11,%xmm11 3660.Lload_extra_load_loop: 3661 pslldq $1,%xmm11 3662 pinsrb $0,(%rsi),%xmm11 3663 leaq -1(%rsi),%rsi 3664 subq $1,%r15 3665 jnz .Lload_extra_load_loop 3666 3667 3668 3669 3670 movq %rbx,%r15 3671 3672.Lload_extra_shift_loop: 3673 pslldq $1,%xmm11 3674 subq $1,%r15 3675 jnz .Lload_extra_shift_loop 3676 3677 3678 3679 3680 leaq .Land_masks(%rip),%r15 3681 shlq $4,%rbx 3682 pand -16(%r15,%rbx,1),%xmm15 3683 3684 3685 por %xmm11,%xmm15 3686 3687 3688 3689.byte 102,77,15,126,253 3690 pextrq $1,%xmm15,%r14 3691 addq %r13,%r10 3692 adcq %r14,%r11 3693 adcq $1,%r12 3694 movq 0+0+0(%rbp),%rax 3695 movq %rax,%r15 3696 mulq %r10 3697 movq %rax,%r13 3698 movq %rdx,%r14 3699 movq 0+0+0(%rbp),%rax 3700 mulq %r11 3701 imulq %r12,%r15 3702 addq %rax,%r14 3703 adcq %rdx,%r15 3704 movq 8+0+0(%rbp),%rax 3705 movq %rax,%r9 3706 mulq %r10 3707 addq %rax,%r14 3708 adcq $0,%rdx 3709 movq %rdx,%r10 3710 movq 8+0+0(%rbp),%rax 3711 mulq %r11 3712 addq %rax,%r15 3713 adcq $0,%rdx 3714 imulq %r12,%r9 3715 addq %r10,%r15 3716 adcq %rdx,%r9 3717 movq %r13,%r10 3718 movq %r14,%r11 3719 movq %r15,%r12 3720 andq $3,%r12 3721 movq %r15,%r13 3722 andq $-4,%r13 3723 movq %r9,%r14 3724 shrdq $2,%r9,%r15 3725 shrq $2,%r9 3726 addq %r13,%r15 3727 adcq %r14,%r9 3728 addq %r15,%r10 3729 adcq %r9,%r11 3730 adcq $0,%r12 3731 3732 3733.Lprocess_blocks_of_extra_in: 3734 3735 movq 288+32+0 (%rsp),%r9 3736 movq 48(%r9),%rsi 3737 movq 56(%r9),%r8 3738 movq %r8,%rcx 3739 shrq $4,%r8 3740 3741.Lprocess_extra_hash_loop: 3742 jz process_extra_in_trailer 3743 addq 0+0(%rsi),%r10 3744 adcq 8+0(%rsi),%r11 3745 adcq $1,%r12 3746 movq 0+0+0(%rbp),%rax 3747 movq %rax,%r15 3748 mulq %r10 3749 movq %rax,%r13 3750 movq %rdx,%r14 3751 movq 0+0+0(%rbp),%rax 3752 mulq %r11 3753 imulq %r12,%r15 3754 addq %rax,%r14 3755 adcq %rdx,%r15 3756 movq 8+0+0(%rbp),%rax 3757 movq %rax,%r9 3758 mulq %r10 3759 addq %rax,%r14 3760 adcq $0,%rdx 3761 movq %rdx,%r10 3762 movq 8+0+0(%rbp),%rax 3763 mulq %r11 3764 addq %rax,%r15 3765 adcq $0,%rdx 3766 imulq %r12,%r9 3767 addq %r10,%r15 3768 adcq %rdx,%r9 3769 movq %r13,%r10 3770 movq %r14,%r11 3771 movq %r15,%r12 3772 andq $3,%r12 3773 movq %r15,%r13 3774 andq $-4,%r13 3775 movq %r9,%r14 3776 shrdq $2,%r9,%r15 3777 shrq $2,%r9 3778 addq %r13,%r15 3779 adcq %r14,%r9 3780 addq %r15,%r10 3781 adcq %r9,%r11 3782 adcq $0,%r12 3783 3784 leaq 16(%rsi),%rsi 3785 subq $1,%r8 3786 jmp .Lprocess_extra_hash_loop 3787process_extra_in_trailer: 3788 andq $15,%rcx 3789 movq %rcx,%rbx 3790 jz .Ldo_length_block 3791 leaq -1(%rsi,%rcx,1),%rsi 3792 3793.Lprocess_extra_in_trailer_load: 3794 pslldq $1,%xmm15 3795 pinsrb $0,(%rsi),%xmm15 3796 leaq -1(%rsi),%rsi 3797 subq $1,%rcx 3798 jnz .Lprocess_extra_in_trailer_load 3799 3800.Lprocess_partial_block: 3801 3802 leaq .Land_masks(%rip),%r15 3803 shlq $4,%rbx 3804 pand -16(%r15,%rbx,1),%xmm15 3805.byte 102,77,15,126,253 3806 pextrq $1,%xmm15,%r14 3807 addq %r13,%r10 3808 adcq %r14,%r11 3809 adcq $1,%r12 3810 movq 0+0+0(%rbp),%rax 3811 movq %rax,%r15 3812 mulq %r10 3813 movq %rax,%r13 3814 movq %rdx,%r14 3815 movq 0+0+0(%rbp),%rax 3816 mulq %r11 3817 imulq %r12,%r15 3818 addq %rax,%r14 3819 adcq %rdx,%r15 3820 movq 8+0+0(%rbp),%rax 3821 movq %rax,%r9 3822 mulq %r10 3823 addq %rax,%r14 3824 adcq $0,%rdx 3825 movq %rdx,%r10 3826 movq 8+0+0(%rbp),%rax 3827 mulq %r11 3828 addq %rax,%r15 3829 adcq $0,%rdx 3830 imulq %r12,%r9 3831 addq %r10,%r15 3832 adcq %rdx,%r9 3833 movq %r13,%r10 3834 movq %r14,%r11 3835 movq %r15,%r12 3836 andq $3,%r12 3837 movq %r15,%r13 3838 andq $-4,%r13 3839 movq %r9,%r14 3840 shrdq $2,%r9,%r15 3841 shrq $2,%r9 3842 addq %r13,%r15 3843 adcq %r14,%r9 3844 addq %r15,%r10 3845 adcq %r9,%r11 3846 adcq $0,%r12 3847 3848 3849.Ldo_length_block: 3850 addq 0+0+32(%rbp),%r10 3851 adcq 8+0+32(%rbp),%r11 3852 adcq $1,%r12 3853 movq 0+0+0(%rbp),%rax 3854 movq %rax,%r15 3855 mulq %r10 3856 movq %rax,%r13 3857 movq %rdx,%r14 3858 movq 0+0+0(%rbp),%rax 3859 mulq %r11 3860 imulq %r12,%r15 3861 addq %rax,%r14 3862 adcq %rdx,%r15 3863 movq 8+0+0(%rbp),%rax 3864 movq %rax,%r9 3865 mulq %r10 3866 addq %rax,%r14 3867 adcq $0,%rdx 3868 movq %rdx,%r10 3869 movq 8+0+0(%rbp),%rax 3870 mulq %r11 3871 addq %rax,%r15 3872 adcq $0,%rdx 3873 imulq %r12,%r9 3874 addq %r10,%r15 3875 adcq %rdx,%r9 3876 movq %r13,%r10 3877 movq %r14,%r11 3878 movq %r15,%r12 3879 andq $3,%r12 3880 movq %r15,%r13 3881 andq $-4,%r13 3882 movq %r9,%r14 3883 shrdq $2,%r9,%r15 3884 shrq $2,%r9 3885 addq %r13,%r15 3886 adcq %r14,%r9 3887 addq %r15,%r10 3888 adcq %r9,%r11 3889 adcq $0,%r12 3890 3891 3892 movq %r10,%r13 3893 movq %r11,%r14 3894 movq %r12,%r15 3895 subq $-5,%r10 3896 sbbq $-1,%r11 3897 sbbq $3,%r12 3898 cmovcq %r13,%r10 3899 cmovcq %r14,%r11 3900 cmovcq %r15,%r12 3901 3902 addq 0+0+16(%rbp),%r10 3903 adcq 8+0+16(%rbp),%r11 3904 3905.cfi_remember_state 3906 addq $288 + 0 + 32,%rsp 3907.cfi_adjust_cfa_offset -(288 + 32) 3908 3909 popq %r9 3910.cfi_adjust_cfa_offset -8 3911.cfi_restore %r9 3912 movq %r10,(%r9) 3913 movq %r11,8(%r9) 3914 popq %r15 3915.cfi_adjust_cfa_offset -8 3916.cfi_restore %r15 3917 popq %r14 3918.cfi_adjust_cfa_offset -8 3919.cfi_restore %r14 3920 popq %r13 3921.cfi_adjust_cfa_offset -8 3922.cfi_restore %r13 3923 popq %r12 3924.cfi_adjust_cfa_offset -8 3925.cfi_restore %r12 3926 popq %rbx 3927.cfi_adjust_cfa_offset -8 3928.cfi_restore %rbx 3929 popq %rbp 3930.cfi_adjust_cfa_offset -8 3931.cfi_restore %rbp 3932 .byte 0xf3,0xc3 3933 3934.Lseal_sse_128: 3935.cfi_restore_state 3936 movdqu .Lchacha20_consts(%rip),%xmm0 3937 movdqa %xmm0,%xmm1 3938 movdqa %xmm0,%xmm2 3939 movdqu 0(%r9),%xmm4 3940 movdqa %xmm4,%xmm5 3941 movdqa %xmm4,%xmm6 3942 movdqu 16(%r9),%xmm8 3943 movdqa %xmm8,%xmm9 3944 movdqa %xmm8,%xmm10 3945 movdqu 32(%r9),%xmm14 3946 movdqa %xmm14,%xmm12 3947 paddd .Lsse_inc(%rip),%xmm12 3948 movdqa %xmm12,%xmm13 3949 paddd .Lsse_inc(%rip),%xmm13 3950 movdqa %xmm4,%xmm7 3951 movdqa %xmm8,%xmm11 3952 movdqa %xmm12,%xmm15 3953 movq $10,%r10 3954 3955.Lseal_sse_128_rounds: 3956 paddd %xmm4,%xmm0 3957 pxor %xmm0,%xmm12 3958 pshufb .Lrol16(%rip),%xmm12 3959 paddd %xmm12,%xmm8 3960 pxor %xmm8,%xmm4 3961 movdqa %xmm4,%xmm3 3962 pslld $12,%xmm3 3963 psrld $20,%xmm4 3964 pxor %xmm3,%xmm4 3965 paddd %xmm4,%xmm0 3966 pxor %xmm0,%xmm12 3967 pshufb .Lrol8(%rip),%xmm12 3968 paddd %xmm12,%xmm8 3969 pxor %xmm8,%xmm4 3970 movdqa %xmm4,%xmm3 3971 pslld $7,%xmm3 3972 psrld $25,%xmm4 3973 pxor %xmm3,%xmm4 3974.byte 102,15,58,15,228,4 3975.byte 102,69,15,58,15,192,8 3976.byte 102,69,15,58,15,228,12 3977 paddd %xmm5,%xmm1 3978 pxor %xmm1,%xmm13 3979 pshufb .Lrol16(%rip),%xmm13 3980 paddd %xmm13,%xmm9 3981 pxor %xmm9,%xmm5 3982 movdqa %xmm5,%xmm3 3983 pslld $12,%xmm3 3984 psrld $20,%xmm5 3985 pxor %xmm3,%xmm5 3986 paddd %xmm5,%xmm1 3987 pxor %xmm1,%xmm13 3988 pshufb .Lrol8(%rip),%xmm13 3989 paddd %xmm13,%xmm9 3990 pxor %xmm9,%xmm5 3991 movdqa %xmm5,%xmm3 3992 pslld $7,%xmm3 3993 psrld $25,%xmm5 3994 pxor %xmm3,%xmm5 3995.byte 102,15,58,15,237,4 3996.byte 102,69,15,58,15,201,8 3997.byte 102,69,15,58,15,237,12 3998 paddd %xmm6,%xmm2 3999 pxor %xmm2,%xmm14 4000 pshufb .Lrol16(%rip),%xmm14 4001 paddd %xmm14,%xmm10 4002 pxor %xmm10,%xmm6 4003 movdqa %xmm6,%xmm3 4004 pslld $12,%xmm3 4005 psrld $20,%xmm6 4006 pxor %xmm3,%xmm6 4007 paddd %xmm6,%xmm2 4008 pxor %xmm2,%xmm14 4009 pshufb .Lrol8(%rip),%xmm14 4010 paddd %xmm14,%xmm10 4011 pxor %xmm10,%xmm6 4012 movdqa %xmm6,%xmm3 4013 pslld $7,%xmm3 4014 psrld $25,%xmm6 4015 pxor %xmm3,%xmm6 4016.byte 102,15,58,15,246,4 4017.byte 102,69,15,58,15,210,8 4018.byte 102,69,15,58,15,246,12 4019 paddd %xmm4,%xmm0 4020 pxor %xmm0,%xmm12 4021 pshufb .Lrol16(%rip),%xmm12 4022 paddd %xmm12,%xmm8 4023 pxor %xmm8,%xmm4 4024 movdqa %xmm4,%xmm3 4025 pslld $12,%xmm3 4026 psrld $20,%xmm4 4027 pxor %xmm3,%xmm4 4028 paddd %xmm4,%xmm0 4029 pxor %xmm0,%xmm12 4030 pshufb .Lrol8(%rip),%xmm12 4031 paddd %xmm12,%xmm8 4032 pxor %xmm8,%xmm4 4033 movdqa %xmm4,%xmm3 4034 pslld $7,%xmm3 4035 psrld $25,%xmm4 4036 pxor %xmm3,%xmm4 4037.byte 102,15,58,15,228,12 4038.byte 102,69,15,58,15,192,8 4039.byte 102,69,15,58,15,228,4 4040 paddd %xmm5,%xmm1 4041 pxor %xmm1,%xmm13 4042 pshufb .Lrol16(%rip),%xmm13 4043 paddd %xmm13,%xmm9 4044 pxor %xmm9,%xmm5 4045 movdqa %xmm5,%xmm3 4046 pslld $12,%xmm3 4047 psrld $20,%xmm5 4048 pxor %xmm3,%xmm5 4049 paddd %xmm5,%xmm1 4050 pxor %xmm1,%xmm13 4051 pshufb .Lrol8(%rip),%xmm13 4052 paddd %xmm13,%xmm9 4053 pxor %xmm9,%xmm5 4054 movdqa %xmm5,%xmm3 4055 pslld $7,%xmm3 4056 psrld $25,%xmm5 4057 pxor %xmm3,%xmm5 4058.byte 102,15,58,15,237,12 4059.byte 102,69,15,58,15,201,8 4060.byte 102,69,15,58,15,237,4 4061 paddd %xmm6,%xmm2 4062 pxor %xmm2,%xmm14 4063 pshufb .Lrol16(%rip),%xmm14 4064 paddd %xmm14,%xmm10 4065 pxor %xmm10,%xmm6 4066 movdqa %xmm6,%xmm3 4067 pslld $12,%xmm3 4068 psrld $20,%xmm6 4069 pxor %xmm3,%xmm6 4070 paddd %xmm6,%xmm2 4071 pxor %xmm2,%xmm14 4072 pshufb .Lrol8(%rip),%xmm14 4073 paddd %xmm14,%xmm10 4074 pxor %xmm10,%xmm6 4075 movdqa %xmm6,%xmm3 4076 pslld $7,%xmm3 4077 psrld $25,%xmm6 4078 pxor %xmm3,%xmm6 4079.byte 102,15,58,15,246,12 4080.byte 102,69,15,58,15,210,8 4081.byte 102,69,15,58,15,246,4 4082 4083 decq %r10 4084 jnz .Lseal_sse_128_rounds 4085 paddd .Lchacha20_consts(%rip),%xmm0 4086 paddd .Lchacha20_consts(%rip),%xmm1 4087 paddd .Lchacha20_consts(%rip),%xmm2 4088 paddd %xmm7,%xmm4 4089 paddd %xmm7,%xmm5 4090 paddd %xmm7,%xmm6 4091 paddd %xmm11,%xmm8 4092 paddd %xmm11,%xmm9 4093 paddd %xmm15,%xmm12 4094 paddd .Lsse_inc(%rip),%xmm15 4095 paddd %xmm15,%xmm13 4096 4097 pand .Lclamp(%rip),%xmm2 4098 movdqa %xmm2,0+0(%rbp) 4099 movdqa %xmm6,0+16(%rbp) 4100 4101 movq %r8,%r8 4102 call poly_hash_ad_internal 4103 jmp .Lseal_sse_128_tail_xor 4104.size chacha20_poly1305_seal, .-chacha20_poly1305_seal 4105.cfi_endproc 4106 4107 4108.type chacha20_poly1305_open_avx2,@function 4109.align 64 4110chacha20_poly1305_open_avx2: 4111.cfi_startproc 4112 4113 4114.cfi_adjust_cfa_offset 8 4115.cfi_offset %rbp,-16 4116.cfi_adjust_cfa_offset 8 4117.cfi_offset %rbx,-24 4118.cfi_adjust_cfa_offset 8 4119.cfi_offset %r12,-32 4120.cfi_adjust_cfa_offset 8 4121.cfi_offset %r13,-40 4122.cfi_adjust_cfa_offset 8 4123.cfi_offset %r14,-48 4124.cfi_adjust_cfa_offset 8 4125.cfi_offset %r15,-56 4126.cfi_adjust_cfa_offset 8 4127.cfi_offset %r9,-64 4128.cfi_adjust_cfa_offset 288 + 32 4129 4130 vzeroupper 4131 vmovdqa .Lchacha20_consts(%rip),%ymm0 4132 vbroadcasti128 0(%r9),%ymm4 4133 vbroadcasti128 16(%r9),%ymm8 4134 vbroadcasti128 32(%r9),%ymm12 4135 vpaddd .Lavx2_init(%rip),%ymm12,%ymm12 4136 cmpq $192,%rbx 4137 jbe .Lopen_avx2_192 4138 cmpq $320,%rbx 4139 jbe .Lopen_avx2_320 4140 4141 vmovdqa %ymm4,0+64(%rbp) 4142 vmovdqa %ymm8,0+96(%rbp) 4143 vmovdqa %ymm12,0+160(%rbp) 4144 movq $10,%r10 4145.Lopen_avx2_init_rounds: 4146 vpaddd %ymm4,%ymm0,%ymm0 4147 vpxor %ymm0,%ymm12,%ymm12 4148 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4149 vpaddd %ymm12,%ymm8,%ymm8 4150 vpxor %ymm8,%ymm4,%ymm4 4151 vpsrld $20,%ymm4,%ymm3 4152 vpslld $12,%ymm4,%ymm4 4153 vpxor %ymm3,%ymm4,%ymm4 4154 vpaddd %ymm4,%ymm0,%ymm0 4155 vpxor %ymm0,%ymm12,%ymm12 4156 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4157 vpaddd %ymm12,%ymm8,%ymm8 4158 vpxor %ymm8,%ymm4,%ymm4 4159 vpslld $7,%ymm4,%ymm3 4160 vpsrld $25,%ymm4,%ymm4 4161 vpxor %ymm3,%ymm4,%ymm4 4162 vpalignr $12,%ymm12,%ymm12,%ymm12 4163 vpalignr $8,%ymm8,%ymm8,%ymm8 4164 vpalignr $4,%ymm4,%ymm4,%ymm4 4165 vpaddd %ymm4,%ymm0,%ymm0 4166 vpxor %ymm0,%ymm12,%ymm12 4167 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4168 vpaddd %ymm12,%ymm8,%ymm8 4169 vpxor %ymm8,%ymm4,%ymm4 4170 vpsrld $20,%ymm4,%ymm3 4171 vpslld $12,%ymm4,%ymm4 4172 vpxor %ymm3,%ymm4,%ymm4 4173 vpaddd %ymm4,%ymm0,%ymm0 4174 vpxor %ymm0,%ymm12,%ymm12 4175 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4176 vpaddd %ymm12,%ymm8,%ymm8 4177 vpxor %ymm8,%ymm4,%ymm4 4178 vpslld $7,%ymm4,%ymm3 4179 vpsrld $25,%ymm4,%ymm4 4180 vpxor %ymm3,%ymm4,%ymm4 4181 vpalignr $4,%ymm12,%ymm12,%ymm12 4182 vpalignr $8,%ymm8,%ymm8,%ymm8 4183 vpalignr $12,%ymm4,%ymm4,%ymm4 4184 4185 decq %r10 4186 jne .Lopen_avx2_init_rounds 4187 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 4188 vpaddd 0+64(%rbp),%ymm4,%ymm4 4189 vpaddd 0+96(%rbp),%ymm8,%ymm8 4190 vpaddd 0+160(%rbp),%ymm12,%ymm12 4191 4192 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4193 4194 vpand .Lclamp(%rip),%ymm3,%ymm3 4195 vmovdqa %ymm3,0+0(%rbp) 4196 4197 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 4198 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 4199 4200 movq %r8,%r8 4201 call poly_hash_ad_internal 4202 4203 xorq %rcx,%rcx 4204.Lopen_avx2_init_hash: 4205 addq 0+0(%rsi,%rcx,1),%r10 4206 adcq 8+0(%rsi,%rcx,1),%r11 4207 adcq $1,%r12 4208 movq 0+0+0(%rbp),%rax 4209 movq %rax,%r15 4210 mulq %r10 4211 movq %rax,%r13 4212 movq %rdx,%r14 4213 movq 0+0+0(%rbp),%rax 4214 mulq %r11 4215 imulq %r12,%r15 4216 addq %rax,%r14 4217 adcq %rdx,%r15 4218 movq 8+0+0(%rbp),%rax 4219 movq %rax,%r9 4220 mulq %r10 4221 addq %rax,%r14 4222 adcq $0,%rdx 4223 movq %rdx,%r10 4224 movq 8+0+0(%rbp),%rax 4225 mulq %r11 4226 addq %rax,%r15 4227 adcq $0,%rdx 4228 imulq %r12,%r9 4229 addq %r10,%r15 4230 adcq %rdx,%r9 4231 movq %r13,%r10 4232 movq %r14,%r11 4233 movq %r15,%r12 4234 andq $3,%r12 4235 movq %r15,%r13 4236 andq $-4,%r13 4237 movq %r9,%r14 4238 shrdq $2,%r9,%r15 4239 shrq $2,%r9 4240 addq %r13,%r15 4241 adcq %r14,%r9 4242 addq %r15,%r10 4243 adcq %r9,%r11 4244 adcq $0,%r12 4245 4246 addq $16,%rcx 4247 cmpq $64,%rcx 4248 jne .Lopen_avx2_init_hash 4249 4250 vpxor 0(%rsi),%ymm0,%ymm0 4251 vpxor 32(%rsi),%ymm4,%ymm4 4252 4253 vmovdqu %ymm0,0(%rdi) 4254 vmovdqu %ymm4,32(%rdi) 4255 leaq 64(%rsi),%rsi 4256 leaq 64(%rdi),%rdi 4257 subq $64,%rbx 4258.Lopen_avx2_main_loop: 4259 4260 cmpq $512,%rbx 4261 jb .Lopen_avx2_main_loop_done 4262 vmovdqa .Lchacha20_consts(%rip),%ymm0 4263 vmovdqa 0+64(%rbp),%ymm4 4264 vmovdqa 0+96(%rbp),%ymm8 4265 vmovdqa %ymm0,%ymm1 4266 vmovdqa %ymm4,%ymm5 4267 vmovdqa %ymm8,%ymm9 4268 vmovdqa %ymm0,%ymm2 4269 vmovdqa %ymm4,%ymm6 4270 vmovdqa %ymm8,%ymm10 4271 vmovdqa %ymm0,%ymm3 4272 vmovdqa %ymm4,%ymm7 4273 vmovdqa %ymm8,%ymm11 4274 vmovdqa .Lavx2_inc(%rip),%ymm12 4275 vpaddd 0+160(%rbp),%ymm12,%ymm15 4276 vpaddd %ymm15,%ymm12,%ymm14 4277 vpaddd %ymm14,%ymm12,%ymm13 4278 vpaddd %ymm13,%ymm12,%ymm12 4279 vmovdqa %ymm15,0+256(%rbp) 4280 vmovdqa %ymm14,0+224(%rbp) 4281 vmovdqa %ymm13,0+192(%rbp) 4282 vmovdqa %ymm12,0+160(%rbp) 4283 4284 xorq %rcx,%rcx 4285.Lopen_avx2_main_loop_rounds: 4286 addq 0+0(%rsi,%rcx,1),%r10 4287 adcq 8+0(%rsi,%rcx,1),%r11 4288 adcq $1,%r12 4289 vmovdqa %ymm8,0+128(%rbp) 4290 vmovdqa .Lrol16(%rip),%ymm8 4291 vpaddd %ymm7,%ymm3,%ymm3 4292 vpaddd %ymm6,%ymm2,%ymm2 4293 vpaddd %ymm5,%ymm1,%ymm1 4294 vpaddd %ymm4,%ymm0,%ymm0 4295 vpxor %ymm3,%ymm15,%ymm15 4296 vpxor %ymm2,%ymm14,%ymm14 4297 vpxor %ymm1,%ymm13,%ymm13 4298 vpxor %ymm0,%ymm12,%ymm12 4299 movq 0+0+0(%rbp),%rdx 4300 movq %rdx,%r15 4301 mulxq %r10,%r13,%r14 4302 mulxq %r11,%rax,%rdx 4303 imulq %r12,%r15 4304 addq %rax,%r14 4305 adcq %rdx,%r15 4306 vpshufb %ymm8,%ymm15,%ymm15 4307 vpshufb %ymm8,%ymm14,%ymm14 4308 vpshufb %ymm8,%ymm13,%ymm13 4309 vpshufb %ymm8,%ymm12,%ymm12 4310 vpaddd %ymm15,%ymm11,%ymm11 4311 vpaddd %ymm14,%ymm10,%ymm10 4312 vpaddd %ymm13,%ymm9,%ymm9 4313 vpaddd 0+128(%rbp),%ymm12,%ymm8 4314 vpxor %ymm11,%ymm7,%ymm7 4315 movq 8+0+0(%rbp),%rdx 4316 mulxq %r10,%r10,%rax 4317 addq %r10,%r14 4318 mulxq %r11,%r11,%r9 4319 adcq %r11,%r15 4320 adcq $0,%r9 4321 imulq %r12,%rdx 4322 vpxor %ymm10,%ymm6,%ymm6 4323 vpxor %ymm9,%ymm5,%ymm5 4324 vpxor %ymm8,%ymm4,%ymm4 4325 vmovdqa %ymm8,0+128(%rbp) 4326 vpsrld $20,%ymm7,%ymm8 4327 vpslld $32-20,%ymm7,%ymm7 4328 vpxor %ymm8,%ymm7,%ymm7 4329 vpsrld $20,%ymm6,%ymm8 4330 vpslld $32-20,%ymm6,%ymm6 4331 vpxor %ymm8,%ymm6,%ymm6 4332 vpsrld $20,%ymm5,%ymm8 4333 vpslld $32-20,%ymm5,%ymm5 4334 addq %rax,%r15 4335 adcq %rdx,%r9 4336 vpxor %ymm8,%ymm5,%ymm5 4337 vpsrld $20,%ymm4,%ymm8 4338 vpslld $32-20,%ymm4,%ymm4 4339 vpxor %ymm8,%ymm4,%ymm4 4340 vmovdqa .Lrol8(%rip),%ymm8 4341 vpaddd %ymm7,%ymm3,%ymm3 4342 vpaddd %ymm6,%ymm2,%ymm2 4343 vpaddd %ymm5,%ymm1,%ymm1 4344 vpaddd %ymm4,%ymm0,%ymm0 4345 vpxor %ymm3,%ymm15,%ymm15 4346 movq %r13,%r10 4347 movq %r14,%r11 4348 movq %r15,%r12 4349 andq $3,%r12 4350 movq %r15,%r13 4351 andq $-4,%r13 4352 movq %r9,%r14 4353 shrdq $2,%r9,%r15 4354 shrq $2,%r9 4355 addq %r13,%r15 4356 adcq %r14,%r9 4357 addq %r15,%r10 4358 adcq %r9,%r11 4359 adcq $0,%r12 4360 vpxor %ymm2,%ymm14,%ymm14 4361 vpxor %ymm1,%ymm13,%ymm13 4362 vpxor %ymm0,%ymm12,%ymm12 4363 vpshufb %ymm8,%ymm15,%ymm15 4364 vpshufb %ymm8,%ymm14,%ymm14 4365 vpshufb %ymm8,%ymm13,%ymm13 4366 vpshufb %ymm8,%ymm12,%ymm12 4367 vpaddd %ymm15,%ymm11,%ymm11 4368 vpaddd %ymm14,%ymm10,%ymm10 4369 addq 0+16(%rsi,%rcx,1),%r10 4370 adcq 8+16(%rsi,%rcx,1),%r11 4371 adcq $1,%r12 4372 vpaddd %ymm13,%ymm9,%ymm9 4373 vpaddd 0+128(%rbp),%ymm12,%ymm8 4374 vpxor %ymm11,%ymm7,%ymm7 4375 vpxor %ymm10,%ymm6,%ymm6 4376 vpxor %ymm9,%ymm5,%ymm5 4377 vpxor %ymm8,%ymm4,%ymm4 4378 vmovdqa %ymm8,0+128(%rbp) 4379 vpsrld $25,%ymm7,%ymm8 4380 movq 0+0+0(%rbp),%rdx 4381 movq %rdx,%r15 4382 mulxq %r10,%r13,%r14 4383 mulxq %r11,%rax,%rdx 4384 imulq %r12,%r15 4385 addq %rax,%r14 4386 adcq %rdx,%r15 4387 vpslld $32-25,%ymm7,%ymm7 4388 vpxor %ymm8,%ymm7,%ymm7 4389 vpsrld $25,%ymm6,%ymm8 4390 vpslld $32-25,%ymm6,%ymm6 4391 vpxor %ymm8,%ymm6,%ymm6 4392 vpsrld $25,%ymm5,%ymm8 4393 vpslld $32-25,%ymm5,%ymm5 4394 vpxor %ymm8,%ymm5,%ymm5 4395 vpsrld $25,%ymm4,%ymm8 4396 vpslld $32-25,%ymm4,%ymm4 4397 vpxor %ymm8,%ymm4,%ymm4 4398 vmovdqa 0+128(%rbp),%ymm8 4399 vpalignr $4,%ymm7,%ymm7,%ymm7 4400 vpalignr $8,%ymm11,%ymm11,%ymm11 4401 vpalignr $12,%ymm15,%ymm15,%ymm15 4402 vpalignr $4,%ymm6,%ymm6,%ymm6 4403 vpalignr $8,%ymm10,%ymm10,%ymm10 4404 vpalignr $12,%ymm14,%ymm14,%ymm14 4405 movq 8+0+0(%rbp),%rdx 4406 mulxq %r10,%r10,%rax 4407 addq %r10,%r14 4408 mulxq %r11,%r11,%r9 4409 adcq %r11,%r15 4410 adcq $0,%r9 4411 imulq %r12,%rdx 4412 vpalignr $4,%ymm5,%ymm5,%ymm5 4413 vpalignr $8,%ymm9,%ymm9,%ymm9 4414 vpalignr $12,%ymm13,%ymm13,%ymm13 4415 vpalignr $4,%ymm4,%ymm4,%ymm4 4416 vpalignr $8,%ymm8,%ymm8,%ymm8 4417 vpalignr $12,%ymm12,%ymm12,%ymm12 4418 vmovdqa %ymm8,0+128(%rbp) 4419 vmovdqa .Lrol16(%rip),%ymm8 4420 vpaddd %ymm7,%ymm3,%ymm3 4421 vpaddd %ymm6,%ymm2,%ymm2 4422 vpaddd %ymm5,%ymm1,%ymm1 4423 vpaddd %ymm4,%ymm0,%ymm0 4424 vpxor %ymm3,%ymm15,%ymm15 4425 vpxor %ymm2,%ymm14,%ymm14 4426 vpxor %ymm1,%ymm13,%ymm13 4427 vpxor %ymm0,%ymm12,%ymm12 4428 vpshufb %ymm8,%ymm15,%ymm15 4429 vpshufb %ymm8,%ymm14,%ymm14 4430 addq %rax,%r15 4431 adcq %rdx,%r9 4432 vpshufb %ymm8,%ymm13,%ymm13 4433 vpshufb %ymm8,%ymm12,%ymm12 4434 vpaddd %ymm15,%ymm11,%ymm11 4435 vpaddd %ymm14,%ymm10,%ymm10 4436 vpaddd %ymm13,%ymm9,%ymm9 4437 vpaddd 0+128(%rbp),%ymm12,%ymm8 4438 vpxor %ymm11,%ymm7,%ymm7 4439 vpxor %ymm10,%ymm6,%ymm6 4440 vpxor %ymm9,%ymm5,%ymm5 4441 movq %r13,%r10 4442 movq %r14,%r11 4443 movq %r15,%r12 4444 andq $3,%r12 4445 movq %r15,%r13 4446 andq $-4,%r13 4447 movq %r9,%r14 4448 shrdq $2,%r9,%r15 4449 shrq $2,%r9 4450 addq %r13,%r15 4451 adcq %r14,%r9 4452 addq %r15,%r10 4453 adcq %r9,%r11 4454 adcq $0,%r12 4455 vpxor %ymm8,%ymm4,%ymm4 4456 vmovdqa %ymm8,0+128(%rbp) 4457 vpsrld $20,%ymm7,%ymm8 4458 vpslld $32-20,%ymm7,%ymm7 4459 vpxor %ymm8,%ymm7,%ymm7 4460 vpsrld $20,%ymm6,%ymm8 4461 vpslld $32-20,%ymm6,%ymm6 4462 vpxor %ymm8,%ymm6,%ymm6 4463 addq 0+32(%rsi,%rcx,1),%r10 4464 adcq 8+32(%rsi,%rcx,1),%r11 4465 adcq $1,%r12 4466 4467 leaq 48(%rcx),%rcx 4468 vpsrld $20,%ymm5,%ymm8 4469 vpslld $32-20,%ymm5,%ymm5 4470 vpxor %ymm8,%ymm5,%ymm5 4471 vpsrld $20,%ymm4,%ymm8 4472 vpslld $32-20,%ymm4,%ymm4 4473 vpxor %ymm8,%ymm4,%ymm4 4474 vmovdqa .Lrol8(%rip),%ymm8 4475 vpaddd %ymm7,%ymm3,%ymm3 4476 vpaddd %ymm6,%ymm2,%ymm2 4477 vpaddd %ymm5,%ymm1,%ymm1 4478 vpaddd %ymm4,%ymm0,%ymm0 4479 vpxor %ymm3,%ymm15,%ymm15 4480 vpxor %ymm2,%ymm14,%ymm14 4481 vpxor %ymm1,%ymm13,%ymm13 4482 vpxor %ymm0,%ymm12,%ymm12 4483 vpshufb %ymm8,%ymm15,%ymm15 4484 vpshufb %ymm8,%ymm14,%ymm14 4485 vpshufb %ymm8,%ymm13,%ymm13 4486 movq 0+0+0(%rbp),%rdx 4487 movq %rdx,%r15 4488 mulxq %r10,%r13,%r14 4489 mulxq %r11,%rax,%rdx 4490 imulq %r12,%r15 4491 addq %rax,%r14 4492 adcq %rdx,%r15 4493 vpshufb %ymm8,%ymm12,%ymm12 4494 vpaddd %ymm15,%ymm11,%ymm11 4495 vpaddd %ymm14,%ymm10,%ymm10 4496 vpaddd %ymm13,%ymm9,%ymm9 4497 vpaddd 0+128(%rbp),%ymm12,%ymm8 4498 vpxor %ymm11,%ymm7,%ymm7 4499 vpxor %ymm10,%ymm6,%ymm6 4500 vpxor %ymm9,%ymm5,%ymm5 4501 movq 8+0+0(%rbp),%rdx 4502 mulxq %r10,%r10,%rax 4503 addq %r10,%r14 4504 mulxq %r11,%r11,%r9 4505 adcq %r11,%r15 4506 adcq $0,%r9 4507 imulq %r12,%rdx 4508 vpxor %ymm8,%ymm4,%ymm4 4509 vmovdqa %ymm8,0+128(%rbp) 4510 vpsrld $25,%ymm7,%ymm8 4511 vpslld $32-25,%ymm7,%ymm7 4512 vpxor %ymm8,%ymm7,%ymm7 4513 vpsrld $25,%ymm6,%ymm8 4514 vpslld $32-25,%ymm6,%ymm6 4515 vpxor %ymm8,%ymm6,%ymm6 4516 addq %rax,%r15 4517 adcq %rdx,%r9 4518 vpsrld $25,%ymm5,%ymm8 4519 vpslld $32-25,%ymm5,%ymm5 4520 vpxor %ymm8,%ymm5,%ymm5 4521 vpsrld $25,%ymm4,%ymm8 4522 vpslld $32-25,%ymm4,%ymm4 4523 vpxor %ymm8,%ymm4,%ymm4 4524 vmovdqa 0+128(%rbp),%ymm8 4525 vpalignr $12,%ymm7,%ymm7,%ymm7 4526 vpalignr $8,%ymm11,%ymm11,%ymm11 4527 vpalignr $4,%ymm15,%ymm15,%ymm15 4528 vpalignr $12,%ymm6,%ymm6,%ymm6 4529 vpalignr $8,%ymm10,%ymm10,%ymm10 4530 vpalignr $4,%ymm14,%ymm14,%ymm14 4531 vpalignr $12,%ymm5,%ymm5,%ymm5 4532 vpalignr $8,%ymm9,%ymm9,%ymm9 4533 vpalignr $4,%ymm13,%ymm13,%ymm13 4534 vpalignr $12,%ymm4,%ymm4,%ymm4 4535 vpalignr $8,%ymm8,%ymm8,%ymm8 4536 movq %r13,%r10 4537 movq %r14,%r11 4538 movq %r15,%r12 4539 andq $3,%r12 4540 movq %r15,%r13 4541 andq $-4,%r13 4542 movq %r9,%r14 4543 shrdq $2,%r9,%r15 4544 shrq $2,%r9 4545 addq %r13,%r15 4546 adcq %r14,%r9 4547 addq %r15,%r10 4548 adcq %r9,%r11 4549 adcq $0,%r12 4550 vpalignr $4,%ymm12,%ymm12,%ymm12 4551 4552 cmpq $60*8,%rcx 4553 jne .Lopen_avx2_main_loop_rounds 4554 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 4555 vpaddd 0+64(%rbp),%ymm7,%ymm7 4556 vpaddd 0+96(%rbp),%ymm11,%ymm11 4557 vpaddd 0+256(%rbp),%ymm15,%ymm15 4558 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 4559 vpaddd 0+64(%rbp),%ymm6,%ymm6 4560 vpaddd 0+96(%rbp),%ymm10,%ymm10 4561 vpaddd 0+224(%rbp),%ymm14,%ymm14 4562 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 4563 vpaddd 0+64(%rbp),%ymm5,%ymm5 4564 vpaddd 0+96(%rbp),%ymm9,%ymm9 4565 vpaddd 0+192(%rbp),%ymm13,%ymm13 4566 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 4567 vpaddd 0+64(%rbp),%ymm4,%ymm4 4568 vpaddd 0+96(%rbp),%ymm8,%ymm8 4569 vpaddd 0+160(%rbp),%ymm12,%ymm12 4570 4571 vmovdqa %ymm0,0+128(%rbp) 4572 addq 0+60*8(%rsi),%r10 4573 adcq 8+60*8(%rsi),%r11 4574 adcq $1,%r12 4575 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 4576 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 4577 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 4578 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 4579 vpxor 0+0(%rsi),%ymm0,%ymm0 4580 vpxor 32+0(%rsi),%ymm3,%ymm3 4581 vpxor 64+0(%rsi),%ymm7,%ymm7 4582 vpxor 96+0(%rsi),%ymm11,%ymm11 4583 vmovdqu %ymm0,0+0(%rdi) 4584 vmovdqu %ymm3,32+0(%rdi) 4585 vmovdqu %ymm7,64+0(%rdi) 4586 vmovdqu %ymm11,96+0(%rdi) 4587 4588 vmovdqa 0+128(%rbp),%ymm0 4589 movq 0+0+0(%rbp),%rax 4590 movq %rax,%r15 4591 mulq %r10 4592 movq %rax,%r13 4593 movq %rdx,%r14 4594 movq 0+0+0(%rbp),%rax 4595 mulq %r11 4596 imulq %r12,%r15 4597 addq %rax,%r14 4598 adcq %rdx,%r15 4599 movq 8+0+0(%rbp),%rax 4600 movq %rax,%r9 4601 mulq %r10 4602 addq %rax,%r14 4603 adcq $0,%rdx 4604 movq %rdx,%r10 4605 movq 8+0+0(%rbp),%rax 4606 mulq %r11 4607 addq %rax,%r15 4608 adcq $0,%rdx 4609 imulq %r12,%r9 4610 addq %r10,%r15 4611 adcq %rdx,%r9 4612 movq %r13,%r10 4613 movq %r14,%r11 4614 movq %r15,%r12 4615 andq $3,%r12 4616 movq %r15,%r13 4617 andq $-4,%r13 4618 movq %r9,%r14 4619 shrdq $2,%r9,%r15 4620 shrq $2,%r9 4621 addq %r13,%r15 4622 adcq %r14,%r9 4623 addq %r15,%r10 4624 adcq %r9,%r11 4625 adcq $0,%r12 4626 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 4627 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 4628 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 4629 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 4630 vpxor 0+128(%rsi),%ymm3,%ymm3 4631 vpxor 32+128(%rsi),%ymm2,%ymm2 4632 vpxor 64+128(%rsi),%ymm6,%ymm6 4633 vpxor 96+128(%rsi),%ymm10,%ymm10 4634 vmovdqu %ymm3,0+128(%rdi) 4635 vmovdqu %ymm2,32+128(%rdi) 4636 vmovdqu %ymm6,64+128(%rdi) 4637 vmovdqu %ymm10,96+128(%rdi) 4638 addq 0+60*8+16(%rsi),%r10 4639 adcq 8+60*8+16(%rsi),%r11 4640 adcq $1,%r12 4641 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 4642 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 4643 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 4644 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 4645 vpxor 0+256(%rsi),%ymm3,%ymm3 4646 vpxor 32+256(%rsi),%ymm1,%ymm1 4647 vpxor 64+256(%rsi),%ymm5,%ymm5 4648 vpxor 96+256(%rsi),%ymm9,%ymm9 4649 vmovdqu %ymm3,0+256(%rdi) 4650 vmovdqu %ymm1,32+256(%rdi) 4651 vmovdqu %ymm5,64+256(%rdi) 4652 vmovdqu %ymm9,96+256(%rdi) 4653 movq 0+0+0(%rbp),%rax 4654 movq %rax,%r15 4655 mulq %r10 4656 movq %rax,%r13 4657 movq %rdx,%r14 4658 movq 0+0+0(%rbp),%rax 4659 mulq %r11 4660 imulq %r12,%r15 4661 addq %rax,%r14 4662 adcq %rdx,%r15 4663 movq 8+0+0(%rbp),%rax 4664 movq %rax,%r9 4665 mulq %r10 4666 addq %rax,%r14 4667 adcq $0,%rdx 4668 movq %rdx,%r10 4669 movq 8+0+0(%rbp),%rax 4670 mulq %r11 4671 addq %rax,%r15 4672 adcq $0,%rdx 4673 imulq %r12,%r9 4674 addq %r10,%r15 4675 adcq %rdx,%r9 4676 movq %r13,%r10 4677 movq %r14,%r11 4678 movq %r15,%r12 4679 andq $3,%r12 4680 movq %r15,%r13 4681 andq $-4,%r13 4682 movq %r9,%r14 4683 shrdq $2,%r9,%r15 4684 shrq $2,%r9 4685 addq %r13,%r15 4686 adcq %r14,%r9 4687 addq %r15,%r10 4688 adcq %r9,%r11 4689 adcq $0,%r12 4690 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4691 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 4692 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 4693 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 4694 vpxor 0+384(%rsi),%ymm3,%ymm3 4695 vpxor 32+384(%rsi),%ymm0,%ymm0 4696 vpxor 64+384(%rsi),%ymm4,%ymm4 4697 vpxor 96+384(%rsi),%ymm8,%ymm8 4698 vmovdqu %ymm3,0+384(%rdi) 4699 vmovdqu %ymm0,32+384(%rdi) 4700 vmovdqu %ymm4,64+384(%rdi) 4701 vmovdqu %ymm8,96+384(%rdi) 4702 4703 leaq 512(%rsi),%rsi 4704 leaq 512(%rdi),%rdi 4705 subq $512,%rbx 4706 jmp .Lopen_avx2_main_loop 4707.Lopen_avx2_main_loop_done: 4708 testq %rbx,%rbx 4709 vzeroupper 4710 je .Lopen_sse_finalize 4711 4712 cmpq $384,%rbx 4713 ja .Lopen_avx2_tail_512 4714 cmpq $256,%rbx 4715 ja .Lopen_avx2_tail_384 4716 cmpq $128,%rbx 4717 ja .Lopen_avx2_tail_256 4718 vmovdqa .Lchacha20_consts(%rip),%ymm0 4719 vmovdqa 0+64(%rbp),%ymm4 4720 vmovdqa 0+96(%rbp),%ymm8 4721 vmovdqa .Lavx2_inc(%rip),%ymm12 4722 vpaddd 0+160(%rbp),%ymm12,%ymm12 4723 vmovdqa %ymm12,0+160(%rbp) 4724 4725 xorq %r8,%r8 4726 movq %rbx,%rcx 4727 andq $-16,%rcx 4728 testq %rcx,%rcx 4729 je .Lopen_avx2_tail_128_rounds 4730.Lopen_avx2_tail_128_rounds_and_x1hash: 4731 addq 0+0(%rsi,%r8,1),%r10 4732 adcq 8+0(%rsi,%r8,1),%r11 4733 adcq $1,%r12 4734 movq 0+0+0(%rbp),%rax 4735 movq %rax,%r15 4736 mulq %r10 4737 movq %rax,%r13 4738 movq %rdx,%r14 4739 movq 0+0+0(%rbp),%rax 4740 mulq %r11 4741 imulq %r12,%r15 4742 addq %rax,%r14 4743 adcq %rdx,%r15 4744 movq 8+0+0(%rbp),%rax 4745 movq %rax,%r9 4746 mulq %r10 4747 addq %rax,%r14 4748 adcq $0,%rdx 4749 movq %rdx,%r10 4750 movq 8+0+0(%rbp),%rax 4751 mulq %r11 4752 addq %rax,%r15 4753 adcq $0,%rdx 4754 imulq %r12,%r9 4755 addq %r10,%r15 4756 adcq %rdx,%r9 4757 movq %r13,%r10 4758 movq %r14,%r11 4759 movq %r15,%r12 4760 andq $3,%r12 4761 movq %r15,%r13 4762 andq $-4,%r13 4763 movq %r9,%r14 4764 shrdq $2,%r9,%r15 4765 shrq $2,%r9 4766 addq %r13,%r15 4767 adcq %r14,%r9 4768 addq %r15,%r10 4769 adcq %r9,%r11 4770 adcq $0,%r12 4771 4772.Lopen_avx2_tail_128_rounds: 4773 addq $16,%r8 4774 vpaddd %ymm4,%ymm0,%ymm0 4775 vpxor %ymm0,%ymm12,%ymm12 4776 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4777 vpaddd %ymm12,%ymm8,%ymm8 4778 vpxor %ymm8,%ymm4,%ymm4 4779 vpsrld $20,%ymm4,%ymm3 4780 vpslld $12,%ymm4,%ymm4 4781 vpxor %ymm3,%ymm4,%ymm4 4782 vpaddd %ymm4,%ymm0,%ymm0 4783 vpxor %ymm0,%ymm12,%ymm12 4784 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4785 vpaddd %ymm12,%ymm8,%ymm8 4786 vpxor %ymm8,%ymm4,%ymm4 4787 vpslld $7,%ymm4,%ymm3 4788 vpsrld $25,%ymm4,%ymm4 4789 vpxor %ymm3,%ymm4,%ymm4 4790 vpalignr $12,%ymm12,%ymm12,%ymm12 4791 vpalignr $8,%ymm8,%ymm8,%ymm8 4792 vpalignr $4,%ymm4,%ymm4,%ymm4 4793 vpaddd %ymm4,%ymm0,%ymm0 4794 vpxor %ymm0,%ymm12,%ymm12 4795 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4796 vpaddd %ymm12,%ymm8,%ymm8 4797 vpxor %ymm8,%ymm4,%ymm4 4798 vpsrld $20,%ymm4,%ymm3 4799 vpslld $12,%ymm4,%ymm4 4800 vpxor %ymm3,%ymm4,%ymm4 4801 vpaddd %ymm4,%ymm0,%ymm0 4802 vpxor %ymm0,%ymm12,%ymm12 4803 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4804 vpaddd %ymm12,%ymm8,%ymm8 4805 vpxor %ymm8,%ymm4,%ymm4 4806 vpslld $7,%ymm4,%ymm3 4807 vpsrld $25,%ymm4,%ymm4 4808 vpxor %ymm3,%ymm4,%ymm4 4809 vpalignr $4,%ymm12,%ymm12,%ymm12 4810 vpalignr $8,%ymm8,%ymm8,%ymm8 4811 vpalignr $12,%ymm4,%ymm4,%ymm4 4812 4813 cmpq %rcx,%r8 4814 jb .Lopen_avx2_tail_128_rounds_and_x1hash 4815 cmpq $160,%r8 4816 jne .Lopen_avx2_tail_128_rounds 4817 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 4818 vpaddd 0+64(%rbp),%ymm4,%ymm4 4819 vpaddd 0+96(%rbp),%ymm8,%ymm8 4820 vpaddd 0+160(%rbp),%ymm12,%ymm12 4821 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 4822 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 4823 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 4824 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 4825 vmovdqa %ymm3,%ymm8 4826 4827 jmp .Lopen_avx2_tail_128_xor 4828 4829.Lopen_avx2_tail_256: 4830 vmovdqa .Lchacha20_consts(%rip),%ymm0 4831 vmovdqa 0+64(%rbp),%ymm4 4832 vmovdqa 0+96(%rbp),%ymm8 4833 vmovdqa %ymm0,%ymm1 4834 vmovdqa %ymm4,%ymm5 4835 vmovdqa %ymm8,%ymm9 4836 vmovdqa .Lavx2_inc(%rip),%ymm12 4837 vpaddd 0+160(%rbp),%ymm12,%ymm13 4838 vpaddd %ymm13,%ymm12,%ymm12 4839 vmovdqa %ymm12,0+160(%rbp) 4840 vmovdqa %ymm13,0+192(%rbp) 4841 4842 movq %rbx,0+128(%rbp) 4843 movq %rbx,%rcx 4844 subq $128,%rcx 4845 shrq $4,%rcx 4846 movq $10,%r8 4847 cmpq $10,%rcx 4848 cmovgq %r8,%rcx 4849 movq %rsi,%rbx 4850 xorq %r8,%r8 4851.Lopen_avx2_tail_256_rounds_and_x1hash: 4852 addq 0+0(%rbx),%r10 4853 adcq 8+0(%rbx),%r11 4854 adcq $1,%r12 4855 movq 0+0+0(%rbp),%rdx 4856 movq %rdx,%r15 4857 mulxq %r10,%r13,%r14 4858 mulxq %r11,%rax,%rdx 4859 imulq %r12,%r15 4860 addq %rax,%r14 4861 adcq %rdx,%r15 4862 movq 8+0+0(%rbp),%rdx 4863 mulxq %r10,%r10,%rax 4864 addq %r10,%r14 4865 mulxq %r11,%r11,%r9 4866 adcq %r11,%r15 4867 adcq $0,%r9 4868 imulq %r12,%rdx 4869 addq %rax,%r15 4870 adcq %rdx,%r9 4871 movq %r13,%r10 4872 movq %r14,%r11 4873 movq %r15,%r12 4874 andq $3,%r12 4875 movq %r15,%r13 4876 andq $-4,%r13 4877 movq %r9,%r14 4878 shrdq $2,%r9,%r15 4879 shrq $2,%r9 4880 addq %r13,%r15 4881 adcq %r14,%r9 4882 addq %r15,%r10 4883 adcq %r9,%r11 4884 adcq $0,%r12 4885 4886 leaq 16(%rbx),%rbx 4887.Lopen_avx2_tail_256_rounds: 4888 vpaddd %ymm4,%ymm0,%ymm0 4889 vpxor %ymm0,%ymm12,%ymm12 4890 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4891 vpaddd %ymm12,%ymm8,%ymm8 4892 vpxor %ymm8,%ymm4,%ymm4 4893 vpsrld $20,%ymm4,%ymm3 4894 vpslld $12,%ymm4,%ymm4 4895 vpxor %ymm3,%ymm4,%ymm4 4896 vpaddd %ymm4,%ymm0,%ymm0 4897 vpxor %ymm0,%ymm12,%ymm12 4898 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4899 vpaddd %ymm12,%ymm8,%ymm8 4900 vpxor %ymm8,%ymm4,%ymm4 4901 vpslld $7,%ymm4,%ymm3 4902 vpsrld $25,%ymm4,%ymm4 4903 vpxor %ymm3,%ymm4,%ymm4 4904 vpalignr $12,%ymm12,%ymm12,%ymm12 4905 vpalignr $8,%ymm8,%ymm8,%ymm8 4906 vpalignr $4,%ymm4,%ymm4,%ymm4 4907 vpaddd %ymm5,%ymm1,%ymm1 4908 vpxor %ymm1,%ymm13,%ymm13 4909 vpshufb .Lrol16(%rip),%ymm13,%ymm13 4910 vpaddd %ymm13,%ymm9,%ymm9 4911 vpxor %ymm9,%ymm5,%ymm5 4912 vpsrld $20,%ymm5,%ymm3 4913 vpslld $12,%ymm5,%ymm5 4914 vpxor %ymm3,%ymm5,%ymm5 4915 vpaddd %ymm5,%ymm1,%ymm1 4916 vpxor %ymm1,%ymm13,%ymm13 4917 vpshufb .Lrol8(%rip),%ymm13,%ymm13 4918 vpaddd %ymm13,%ymm9,%ymm9 4919 vpxor %ymm9,%ymm5,%ymm5 4920 vpslld $7,%ymm5,%ymm3 4921 vpsrld $25,%ymm5,%ymm5 4922 vpxor %ymm3,%ymm5,%ymm5 4923 vpalignr $12,%ymm13,%ymm13,%ymm13 4924 vpalignr $8,%ymm9,%ymm9,%ymm9 4925 vpalignr $4,%ymm5,%ymm5,%ymm5 4926 4927 incq %r8 4928 vpaddd %ymm4,%ymm0,%ymm0 4929 vpxor %ymm0,%ymm12,%ymm12 4930 vpshufb .Lrol16(%rip),%ymm12,%ymm12 4931 vpaddd %ymm12,%ymm8,%ymm8 4932 vpxor %ymm8,%ymm4,%ymm4 4933 vpsrld $20,%ymm4,%ymm3 4934 vpslld $12,%ymm4,%ymm4 4935 vpxor %ymm3,%ymm4,%ymm4 4936 vpaddd %ymm4,%ymm0,%ymm0 4937 vpxor %ymm0,%ymm12,%ymm12 4938 vpshufb .Lrol8(%rip),%ymm12,%ymm12 4939 vpaddd %ymm12,%ymm8,%ymm8 4940 vpxor %ymm8,%ymm4,%ymm4 4941 vpslld $7,%ymm4,%ymm3 4942 vpsrld $25,%ymm4,%ymm4 4943 vpxor %ymm3,%ymm4,%ymm4 4944 vpalignr $4,%ymm12,%ymm12,%ymm12 4945 vpalignr $8,%ymm8,%ymm8,%ymm8 4946 vpalignr $12,%ymm4,%ymm4,%ymm4 4947 vpaddd %ymm5,%ymm1,%ymm1 4948 vpxor %ymm1,%ymm13,%ymm13 4949 vpshufb .Lrol16(%rip),%ymm13,%ymm13 4950 vpaddd %ymm13,%ymm9,%ymm9 4951 vpxor %ymm9,%ymm5,%ymm5 4952 vpsrld $20,%ymm5,%ymm3 4953 vpslld $12,%ymm5,%ymm5 4954 vpxor %ymm3,%ymm5,%ymm5 4955 vpaddd %ymm5,%ymm1,%ymm1 4956 vpxor %ymm1,%ymm13,%ymm13 4957 vpshufb .Lrol8(%rip),%ymm13,%ymm13 4958 vpaddd %ymm13,%ymm9,%ymm9 4959 vpxor %ymm9,%ymm5,%ymm5 4960 vpslld $7,%ymm5,%ymm3 4961 vpsrld $25,%ymm5,%ymm5 4962 vpxor %ymm3,%ymm5,%ymm5 4963 vpalignr $4,%ymm13,%ymm13,%ymm13 4964 vpalignr $8,%ymm9,%ymm9,%ymm9 4965 vpalignr $12,%ymm5,%ymm5,%ymm5 4966 vpaddd %ymm6,%ymm2,%ymm2 4967 vpxor %ymm2,%ymm14,%ymm14 4968 vpshufb .Lrol16(%rip),%ymm14,%ymm14 4969 vpaddd %ymm14,%ymm10,%ymm10 4970 vpxor %ymm10,%ymm6,%ymm6 4971 vpsrld $20,%ymm6,%ymm3 4972 vpslld $12,%ymm6,%ymm6 4973 vpxor %ymm3,%ymm6,%ymm6 4974 vpaddd %ymm6,%ymm2,%ymm2 4975 vpxor %ymm2,%ymm14,%ymm14 4976 vpshufb .Lrol8(%rip),%ymm14,%ymm14 4977 vpaddd %ymm14,%ymm10,%ymm10 4978 vpxor %ymm10,%ymm6,%ymm6 4979 vpslld $7,%ymm6,%ymm3 4980 vpsrld $25,%ymm6,%ymm6 4981 vpxor %ymm3,%ymm6,%ymm6 4982 vpalignr $4,%ymm14,%ymm14,%ymm14 4983 vpalignr $8,%ymm10,%ymm10,%ymm10 4984 vpalignr $12,%ymm6,%ymm6,%ymm6 4985 4986 cmpq %rcx,%r8 4987 jb .Lopen_avx2_tail_256_rounds_and_x1hash 4988 cmpq $10,%r8 4989 jne .Lopen_avx2_tail_256_rounds 4990 movq %rbx,%r8 4991 subq %rsi,%rbx 4992 movq %rbx,%rcx 4993 movq 0+128(%rbp),%rbx 4994.Lopen_avx2_tail_256_hash: 4995 addq $16,%rcx 4996 cmpq %rbx,%rcx 4997 jg .Lopen_avx2_tail_256_done 4998 addq 0+0(%r8),%r10 4999 adcq 8+0(%r8),%r11 5000 adcq $1,%r12 5001 movq 0+0+0(%rbp),%rdx 5002 movq %rdx,%r15 5003 mulxq %r10,%r13,%r14 5004 mulxq %r11,%rax,%rdx 5005 imulq %r12,%r15 5006 addq %rax,%r14 5007 adcq %rdx,%r15 5008 movq 8+0+0(%rbp),%rdx 5009 mulxq %r10,%r10,%rax 5010 addq %r10,%r14 5011 mulxq %r11,%r11,%r9 5012 adcq %r11,%r15 5013 adcq $0,%r9 5014 imulq %r12,%rdx 5015 addq %rax,%r15 5016 adcq %rdx,%r9 5017 movq %r13,%r10 5018 movq %r14,%r11 5019 movq %r15,%r12 5020 andq $3,%r12 5021 movq %r15,%r13 5022 andq $-4,%r13 5023 movq %r9,%r14 5024 shrdq $2,%r9,%r15 5025 shrq $2,%r9 5026 addq %r13,%r15 5027 adcq %r14,%r9 5028 addq %r15,%r10 5029 adcq %r9,%r11 5030 adcq $0,%r12 5031 5032 leaq 16(%r8),%r8 5033 jmp .Lopen_avx2_tail_256_hash 5034.Lopen_avx2_tail_256_done: 5035 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 5036 vpaddd 0+64(%rbp),%ymm5,%ymm5 5037 vpaddd 0+96(%rbp),%ymm9,%ymm9 5038 vpaddd 0+192(%rbp),%ymm13,%ymm13 5039 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 5040 vpaddd 0+64(%rbp),%ymm4,%ymm4 5041 vpaddd 0+96(%rbp),%ymm8,%ymm8 5042 vpaddd 0+160(%rbp),%ymm12,%ymm12 5043 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5044 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5045 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5046 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5047 vpxor 0+0(%rsi),%ymm3,%ymm3 5048 vpxor 32+0(%rsi),%ymm1,%ymm1 5049 vpxor 64+0(%rsi),%ymm5,%ymm5 5050 vpxor 96+0(%rsi),%ymm9,%ymm9 5051 vmovdqu %ymm3,0+0(%rdi) 5052 vmovdqu %ymm1,32+0(%rdi) 5053 vmovdqu %ymm5,64+0(%rdi) 5054 vmovdqu %ymm9,96+0(%rdi) 5055 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5056 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5057 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5058 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5059 vmovdqa %ymm3,%ymm8 5060 5061 leaq 128(%rsi),%rsi 5062 leaq 128(%rdi),%rdi 5063 subq $128,%rbx 5064 jmp .Lopen_avx2_tail_128_xor 5065 5066.Lopen_avx2_tail_384: 5067 vmovdqa .Lchacha20_consts(%rip),%ymm0 5068 vmovdqa 0+64(%rbp),%ymm4 5069 vmovdqa 0+96(%rbp),%ymm8 5070 vmovdqa %ymm0,%ymm1 5071 vmovdqa %ymm4,%ymm5 5072 vmovdqa %ymm8,%ymm9 5073 vmovdqa %ymm0,%ymm2 5074 vmovdqa %ymm4,%ymm6 5075 vmovdqa %ymm8,%ymm10 5076 vmovdqa .Lavx2_inc(%rip),%ymm12 5077 vpaddd 0+160(%rbp),%ymm12,%ymm14 5078 vpaddd %ymm14,%ymm12,%ymm13 5079 vpaddd %ymm13,%ymm12,%ymm12 5080 vmovdqa %ymm12,0+160(%rbp) 5081 vmovdqa %ymm13,0+192(%rbp) 5082 vmovdqa %ymm14,0+224(%rbp) 5083 5084 movq %rbx,0+128(%rbp) 5085 movq %rbx,%rcx 5086 subq $256,%rcx 5087 shrq $4,%rcx 5088 addq $6,%rcx 5089 movq $10,%r8 5090 cmpq $10,%rcx 5091 cmovgq %r8,%rcx 5092 movq %rsi,%rbx 5093 xorq %r8,%r8 5094.Lopen_avx2_tail_384_rounds_and_x2hash: 5095 addq 0+0(%rbx),%r10 5096 adcq 8+0(%rbx),%r11 5097 adcq $1,%r12 5098 movq 0+0+0(%rbp),%rdx 5099 movq %rdx,%r15 5100 mulxq %r10,%r13,%r14 5101 mulxq %r11,%rax,%rdx 5102 imulq %r12,%r15 5103 addq %rax,%r14 5104 adcq %rdx,%r15 5105 movq 8+0+0(%rbp),%rdx 5106 mulxq %r10,%r10,%rax 5107 addq %r10,%r14 5108 mulxq %r11,%r11,%r9 5109 adcq %r11,%r15 5110 adcq $0,%r9 5111 imulq %r12,%rdx 5112 addq %rax,%r15 5113 adcq %rdx,%r9 5114 movq %r13,%r10 5115 movq %r14,%r11 5116 movq %r15,%r12 5117 andq $3,%r12 5118 movq %r15,%r13 5119 andq $-4,%r13 5120 movq %r9,%r14 5121 shrdq $2,%r9,%r15 5122 shrq $2,%r9 5123 addq %r13,%r15 5124 adcq %r14,%r9 5125 addq %r15,%r10 5126 adcq %r9,%r11 5127 adcq $0,%r12 5128 5129 leaq 16(%rbx),%rbx 5130.Lopen_avx2_tail_384_rounds_and_x1hash: 5131 vpaddd %ymm6,%ymm2,%ymm2 5132 vpxor %ymm2,%ymm14,%ymm14 5133 vpshufb .Lrol16(%rip),%ymm14,%ymm14 5134 vpaddd %ymm14,%ymm10,%ymm10 5135 vpxor %ymm10,%ymm6,%ymm6 5136 vpsrld $20,%ymm6,%ymm3 5137 vpslld $12,%ymm6,%ymm6 5138 vpxor %ymm3,%ymm6,%ymm6 5139 vpaddd %ymm6,%ymm2,%ymm2 5140 vpxor %ymm2,%ymm14,%ymm14 5141 vpshufb .Lrol8(%rip),%ymm14,%ymm14 5142 vpaddd %ymm14,%ymm10,%ymm10 5143 vpxor %ymm10,%ymm6,%ymm6 5144 vpslld $7,%ymm6,%ymm3 5145 vpsrld $25,%ymm6,%ymm6 5146 vpxor %ymm3,%ymm6,%ymm6 5147 vpalignr $12,%ymm14,%ymm14,%ymm14 5148 vpalignr $8,%ymm10,%ymm10,%ymm10 5149 vpalignr $4,%ymm6,%ymm6,%ymm6 5150 vpaddd %ymm5,%ymm1,%ymm1 5151 vpxor %ymm1,%ymm13,%ymm13 5152 vpshufb .Lrol16(%rip),%ymm13,%ymm13 5153 vpaddd %ymm13,%ymm9,%ymm9 5154 vpxor %ymm9,%ymm5,%ymm5 5155 vpsrld $20,%ymm5,%ymm3 5156 vpslld $12,%ymm5,%ymm5 5157 vpxor %ymm3,%ymm5,%ymm5 5158 vpaddd %ymm5,%ymm1,%ymm1 5159 vpxor %ymm1,%ymm13,%ymm13 5160 vpshufb .Lrol8(%rip),%ymm13,%ymm13 5161 vpaddd %ymm13,%ymm9,%ymm9 5162 vpxor %ymm9,%ymm5,%ymm5 5163 vpslld $7,%ymm5,%ymm3 5164 vpsrld $25,%ymm5,%ymm5 5165 vpxor %ymm3,%ymm5,%ymm5 5166 vpalignr $12,%ymm13,%ymm13,%ymm13 5167 vpalignr $8,%ymm9,%ymm9,%ymm9 5168 vpalignr $4,%ymm5,%ymm5,%ymm5 5169 vpaddd %ymm4,%ymm0,%ymm0 5170 vpxor %ymm0,%ymm12,%ymm12 5171 vpshufb .Lrol16(%rip),%ymm12,%ymm12 5172 vpaddd %ymm12,%ymm8,%ymm8 5173 vpxor %ymm8,%ymm4,%ymm4 5174 vpsrld $20,%ymm4,%ymm3 5175 vpslld $12,%ymm4,%ymm4 5176 vpxor %ymm3,%ymm4,%ymm4 5177 vpaddd %ymm4,%ymm0,%ymm0 5178 vpxor %ymm0,%ymm12,%ymm12 5179 vpshufb .Lrol8(%rip),%ymm12,%ymm12 5180 vpaddd %ymm12,%ymm8,%ymm8 5181 vpxor %ymm8,%ymm4,%ymm4 5182 vpslld $7,%ymm4,%ymm3 5183 vpsrld $25,%ymm4,%ymm4 5184 vpxor %ymm3,%ymm4,%ymm4 5185 vpalignr $12,%ymm12,%ymm12,%ymm12 5186 vpalignr $8,%ymm8,%ymm8,%ymm8 5187 vpalignr $4,%ymm4,%ymm4,%ymm4 5188 addq 0+0(%rbx),%r10 5189 adcq 8+0(%rbx),%r11 5190 adcq $1,%r12 5191 movq 0+0+0(%rbp),%rax 5192 movq %rax,%r15 5193 mulq %r10 5194 movq %rax,%r13 5195 movq %rdx,%r14 5196 movq 0+0+0(%rbp),%rax 5197 mulq %r11 5198 imulq %r12,%r15 5199 addq %rax,%r14 5200 adcq %rdx,%r15 5201 movq 8+0+0(%rbp),%rax 5202 movq %rax,%r9 5203 mulq %r10 5204 addq %rax,%r14 5205 adcq $0,%rdx 5206 movq %rdx,%r10 5207 movq 8+0+0(%rbp),%rax 5208 mulq %r11 5209 addq %rax,%r15 5210 adcq $0,%rdx 5211 imulq %r12,%r9 5212 addq %r10,%r15 5213 adcq %rdx,%r9 5214 movq %r13,%r10 5215 movq %r14,%r11 5216 movq %r15,%r12 5217 andq $3,%r12 5218 movq %r15,%r13 5219 andq $-4,%r13 5220 movq %r9,%r14 5221 shrdq $2,%r9,%r15 5222 shrq $2,%r9 5223 addq %r13,%r15 5224 adcq %r14,%r9 5225 addq %r15,%r10 5226 adcq %r9,%r11 5227 adcq $0,%r12 5228 5229 leaq 16(%rbx),%rbx 5230 incq %r8 5231 vpaddd %ymm6,%ymm2,%ymm2 5232 vpxor %ymm2,%ymm14,%ymm14 5233 vpshufb .Lrol16(%rip),%ymm14,%ymm14 5234 vpaddd %ymm14,%ymm10,%ymm10 5235 vpxor %ymm10,%ymm6,%ymm6 5236 vpsrld $20,%ymm6,%ymm3 5237 vpslld $12,%ymm6,%ymm6 5238 vpxor %ymm3,%ymm6,%ymm6 5239 vpaddd %ymm6,%ymm2,%ymm2 5240 vpxor %ymm2,%ymm14,%ymm14 5241 vpshufb .Lrol8(%rip),%ymm14,%ymm14 5242 vpaddd %ymm14,%ymm10,%ymm10 5243 vpxor %ymm10,%ymm6,%ymm6 5244 vpslld $7,%ymm6,%ymm3 5245 vpsrld $25,%ymm6,%ymm6 5246 vpxor %ymm3,%ymm6,%ymm6 5247 vpalignr $4,%ymm14,%ymm14,%ymm14 5248 vpalignr $8,%ymm10,%ymm10,%ymm10 5249 vpalignr $12,%ymm6,%ymm6,%ymm6 5250 vpaddd %ymm5,%ymm1,%ymm1 5251 vpxor %ymm1,%ymm13,%ymm13 5252 vpshufb .Lrol16(%rip),%ymm13,%ymm13 5253 vpaddd %ymm13,%ymm9,%ymm9 5254 vpxor %ymm9,%ymm5,%ymm5 5255 vpsrld $20,%ymm5,%ymm3 5256 vpslld $12,%ymm5,%ymm5 5257 vpxor %ymm3,%ymm5,%ymm5 5258 vpaddd %ymm5,%ymm1,%ymm1 5259 vpxor %ymm1,%ymm13,%ymm13 5260 vpshufb .Lrol8(%rip),%ymm13,%ymm13 5261 vpaddd %ymm13,%ymm9,%ymm9 5262 vpxor %ymm9,%ymm5,%ymm5 5263 vpslld $7,%ymm5,%ymm3 5264 vpsrld $25,%ymm5,%ymm5 5265 vpxor %ymm3,%ymm5,%ymm5 5266 vpalignr $4,%ymm13,%ymm13,%ymm13 5267 vpalignr $8,%ymm9,%ymm9,%ymm9 5268 vpalignr $12,%ymm5,%ymm5,%ymm5 5269 vpaddd %ymm4,%ymm0,%ymm0 5270 vpxor %ymm0,%ymm12,%ymm12 5271 vpshufb .Lrol16(%rip),%ymm12,%ymm12 5272 vpaddd %ymm12,%ymm8,%ymm8 5273 vpxor %ymm8,%ymm4,%ymm4 5274 vpsrld $20,%ymm4,%ymm3 5275 vpslld $12,%ymm4,%ymm4 5276 vpxor %ymm3,%ymm4,%ymm4 5277 vpaddd %ymm4,%ymm0,%ymm0 5278 vpxor %ymm0,%ymm12,%ymm12 5279 vpshufb .Lrol8(%rip),%ymm12,%ymm12 5280 vpaddd %ymm12,%ymm8,%ymm8 5281 vpxor %ymm8,%ymm4,%ymm4 5282 vpslld $7,%ymm4,%ymm3 5283 vpsrld $25,%ymm4,%ymm4 5284 vpxor %ymm3,%ymm4,%ymm4 5285 vpalignr $4,%ymm12,%ymm12,%ymm12 5286 vpalignr $8,%ymm8,%ymm8,%ymm8 5287 vpalignr $12,%ymm4,%ymm4,%ymm4 5288 5289 cmpq %rcx,%r8 5290 jb .Lopen_avx2_tail_384_rounds_and_x2hash 5291 cmpq $10,%r8 5292 jne .Lopen_avx2_tail_384_rounds_and_x1hash 5293 movq %rbx,%r8 5294 subq %rsi,%rbx 5295 movq %rbx,%rcx 5296 movq 0+128(%rbp),%rbx 5297.Lopen_avx2_384_tail_hash: 5298 addq $16,%rcx 5299 cmpq %rbx,%rcx 5300 jg .Lopen_avx2_384_tail_done 5301 addq 0+0(%r8),%r10 5302 adcq 8+0(%r8),%r11 5303 adcq $1,%r12 5304 movq 0+0+0(%rbp),%rdx 5305 movq %rdx,%r15 5306 mulxq %r10,%r13,%r14 5307 mulxq %r11,%rax,%rdx 5308 imulq %r12,%r15 5309 addq %rax,%r14 5310 adcq %rdx,%r15 5311 movq 8+0+0(%rbp),%rdx 5312 mulxq %r10,%r10,%rax 5313 addq %r10,%r14 5314 mulxq %r11,%r11,%r9 5315 adcq %r11,%r15 5316 adcq $0,%r9 5317 imulq %r12,%rdx 5318 addq %rax,%r15 5319 adcq %rdx,%r9 5320 movq %r13,%r10 5321 movq %r14,%r11 5322 movq %r15,%r12 5323 andq $3,%r12 5324 movq %r15,%r13 5325 andq $-4,%r13 5326 movq %r9,%r14 5327 shrdq $2,%r9,%r15 5328 shrq $2,%r9 5329 addq %r13,%r15 5330 adcq %r14,%r9 5331 addq %r15,%r10 5332 adcq %r9,%r11 5333 adcq $0,%r12 5334 5335 leaq 16(%r8),%r8 5336 jmp .Lopen_avx2_384_tail_hash 5337.Lopen_avx2_384_tail_done: 5338 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 5339 vpaddd 0+64(%rbp),%ymm6,%ymm6 5340 vpaddd 0+96(%rbp),%ymm10,%ymm10 5341 vpaddd 0+224(%rbp),%ymm14,%ymm14 5342 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 5343 vpaddd 0+64(%rbp),%ymm5,%ymm5 5344 vpaddd 0+96(%rbp),%ymm9,%ymm9 5345 vpaddd 0+192(%rbp),%ymm13,%ymm13 5346 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 5347 vpaddd 0+64(%rbp),%ymm4,%ymm4 5348 vpaddd 0+96(%rbp),%ymm8,%ymm8 5349 vpaddd 0+160(%rbp),%ymm12,%ymm12 5350 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5351 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5352 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5353 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5354 vpxor 0+0(%rsi),%ymm3,%ymm3 5355 vpxor 32+0(%rsi),%ymm2,%ymm2 5356 vpxor 64+0(%rsi),%ymm6,%ymm6 5357 vpxor 96+0(%rsi),%ymm10,%ymm10 5358 vmovdqu %ymm3,0+0(%rdi) 5359 vmovdqu %ymm2,32+0(%rdi) 5360 vmovdqu %ymm6,64+0(%rdi) 5361 vmovdqu %ymm10,96+0(%rdi) 5362 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5363 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5364 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5365 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5366 vpxor 0+128(%rsi),%ymm3,%ymm3 5367 vpxor 32+128(%rsi),%ymm1,%ymm1 5368 vpxor 64+128(%rsi),%ymm5,%ymm5 5369 vpxor 96+128(%rsi),%ymm9,%ymm9 5370 vmovdqu %ymm3,0+128(%rdi) 5371 vmovdqu %ymm1,32+128(%rdi) 5372 vmovdqu %ymm5,64+128(%rdi) 5373 vmovdqu %ymm9,96+128(%rdi) 5374 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5375 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5376 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5377 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5378 vmovdqa %ymm3,%ymm8 5379 5380 leaq 256(%rsi),%rsi 5381 leaq 256(%rdi),%rdi 5382 subq $256,%rbx 5383 jmp .Lopen_avx2_tail_128_xor 5384 5385.Lopen_avx2_tail_512: 5386 vmovdqa .Lchacha20_consts(%rip),%ymm0 5387 vmovdqa 0+64(%rbp),%ymm4 5388 vmovdqa 0+96(%rbp),%ymm8 5389 vmovdqa %ymm0,%ymm1 5390 vmovdqa %ymm4,%ymm5 5391 vmovdqa %ymm8,%ymm9 5392 vmovdqa %ymm0,%ymm2 5393 vmovdqa %ymm4,%ymm6 5394 vmovdqa %ymm8,%ymm10 5395 vmovdqa %ymm0,%ymm3 5396 vmovdqa %ymm4,%ymm7 5397 vmovdqa %ymm8,%ymm11 5398 vmovdqa .Lavx2_inc(%rip),%ymm12 5399 vpaddd 0+160(%rbp),%ymm12,%ymm15 5400 vpaddd %ymm15,%ymm12,%ymm14 5401 vpaddd %ymm14,%ymm12,%ymm13 5402 vpaddd %ymm13,%ymm12,%ymm12 5403 vmovdqa %ymm15,0+256(%rbp) 5404 vmovdqa %ymm14,0+224(%rbp) 5405 vmovdqa %ymm13,0+192(%rbp) 5406 vmovdqa %ymm12,0+160(%rbp) 5407 5408 xorq %rcx,%rcx 5409 movq %rsi,%r8 5410.Lopen_avx2_tail_512_rounds_and_x2hash: 5411 addq 0+0(%r8),%r10 5412 adcq 8+0(%r8),%r11 5413 adcq $1,%r12 5414 movq 0+0+0(%rbp),%rax 5415 movq %rax,%r15 5416 mulq %r10 5417 movq %rax,%r13 5418 movq %rdx,%r14 5419 movq 0+0+0(%rbp),%rax 5420 mulq %r11 5421 imulq %r12,%r15 5422 addq %rax,%r14 5423 adcq %rdx,%r15 5424 movq 8+0+0(%rbp),%rax 5425 movq %rax,%r9 5426 mulq %r10 5427 addq %rax,%r14 5428 adcq $0,%rdx 5429 movq %rdx,%r10 5430 movq 8+0+0(%rbp),%rax 5431 mulq %r11 5432 addq %rax,%r15 5433 adcq $0,%rdx 5434 imulq %r12,%r9 5435 addq %r10,%r15 5436 adcq %rdx,%r9 5437 movq %r13,%r10 5438 movq %r14,%r11 5439 movq %r15,%r12 5440 andq $3,%r12 5441 movq %r15,%r13 5442 andq $-4,%r13 5443 movq %r9,%r14 5444 shrdq $2,%r9,%r15 5445 shrq $2,%r9 5446 addq %r13,%r15 5447 adcq %r14,%r9 5448 addq %r15,%r10 5449 adcq %r9,%r11 5450 adcq $0,%r12 5451 5452 leaq 16(%r8),%r8 5453.Lopen_avx2_tail_512_rounds_and_x1hash: 5454 vmovdqa %ymm8,0+128(%rbp) 5455 vmovdqa .Lrol16(%rip),%ymm8 5456 vpaddd %ymm7,%ymm3,%ymm3 5457 vpaddd %ymm6,%ymm2,%ymm2 5458 vpaddd %ymm5,%ymm1,%ymm1 5459 vpaddd %ymm4,%ymm0,%ymm0 5460 vpxor %ymm3,%ymm15,%ymm15 5461 vpxor %ymm2,%ymm14,%ymm14 5462 vpxor %ymm1,%ymm13,%ymm13 5463 vpxor %ymm0,%ymm12,%ymm12 5464 vpshufb %ymm8,%ymm15,%ymm15 5465 vpshufb %ymm8,%ymm14,%ymm14 5466 vpshufb %ymm8,%ymm13,%ymm13 5467 vpshufb %ymm8,%ymm12,%ymm12 5468 vpaddd %ymm15,%ymm11,%ymm11 5469 vpaddd %ymm14,%ymm10,%ymm10 5470 vpaddd %ymm13,%ymm9,%ymm9 5471 vpaddd 0+128(%rbp),%ymm12,%ymm8 5472 vpxor %ymm11,%ymm7,%ymm7 5473 vpxor %ymm10,%ymm6,%ymm6 5474 vpxor %ymm9,%ymm5,%ymm5 5475 vpxor %ymm8,%ymm4,%ymm4 5476 vmovdqa %ymm8,0+128(%rbp) 5477 vpsrld $20,%ymm7,%ymm8 5478 vpslld $32-20,%ymm7,%ymm7 5479 vpxor %ymm8,%ymm7,%ymm7 5480 vpsrld $20,%ymm6,%ymm8 5481 vpslld $32-20,%ymm6,%ymm6 5482 vpxor %ymm8,%ymm6,%ymm6 5483 vpsrld $20,%ymm5,%ymm8 5484 vpslld $32-20,%ymm5,%ymm5 5485 vpxor %ymm8,%ymm5,%ymm5 5486 vpsrld $20,%ymm4,%ymm8 5487 vpslld $32-20,%ymm4,%ymm4 5488 vpxor %ymm8,%ymm4,%ymm4 5489 vmovdqa .Lrol8(%rip),%ymm8 5490 vpaddd %ymm7,%ymm3,%ymm3 5491 addq 0+0(%r8),%r10 5492 adcq 8+0(%r8),%r11 5493 adcq $1,%r12 5494 movq 0+0+0(%rbp),%rdx 5495 movq %rdx,%r15 5496 mulxq %r10,%r13,%r14 5497 mulxq %r11,%rax,%rdx 5498 imulq %r12,%r15 5499 addq %rax,%r14 5500 adcq %rdx,%r15 5501 movq 8+0+0(%rbp),%rdx 5502 mulxq %r10,%r10,%rax 5503 addq %r10,%r14 5504 mulxq %r11,%r11,%r9 5505 adcq %r11,%r15 5506 adcq $0,%r9 5507 imulq %r12,%rdx 5508 addq %rax,%r15 5509 adcq %rdx,%r9 5510 movq %r13,%r10 5511 movq %r14,%r11 5512 movq %r15,%r12 5513 andq $3,%r12 5514 movq %r15,%r13 5515 andq $-4,%r13 5516 movq %r9,%r14 5517 shrdq $2,%r9,%r15 5518 shrq $2,%r9 5519 addq %r13,%r15 5520 adcq %r14,%r9 5521 addq %r15,%r10 5522 adcq %r9,%r11 5523 adcq $0,%r12 5524 vpaddd %ymm6,%ymm2,%ymm2 5525 vpaddd %ymm5,%ymm1,%ymm1 5526 vpaddd %ymm4,%ymm0,%ymm0 5527 vpxor %ymm3,%ymm15,%ymm15 5528 vpxor %ymm2,%ymm14,%ymm14 5529 vpxor %ymm1,%ymm13,%ymm13 5530 vpxor %ymm0,%ymm12,%ymm12 5531 vpshufb %ymm8,%ymm15,%ymm15 5532 vpshufb %ymm8,%ymm14,%ymm14 5533 vpshufb %ymm8,%ymm13,%ymm13 5534 vpshufb %ymm8,%ymm12,%ymm12 5535 vpaddd %ymm15,%ymm11,%ymm11 5536 vpaddd %ymm14,%ymm10,%ymm10 5537 vpaddd %ymm13,%ymm9,%ymm9 5538 vpaddd 0+128(%rbp),%ymm12,%ymm8 5539 vpxor %ymm11,%ymm7,%ymm7 5540 vpxor %ymm10,%ymm6,%ymm6 5541 vpxor %ymm9,%ymm5,%ymm5 5542 vpxor %ymm8,%ymm4,%ymm4 5543 vmovdqa %ymm8,0+128(%rbp) 5544 vpsrld $25,%ymm7,%ymm8 5545 vpslld $32-25,%ymm7,%ymm7 5546 vpxor %ymm8,%ymm7,%ymm7 5547 vpsrld $25,%ymm6,%ymm8 5548 vpslld $32-25,%ymm6,%ymm6 5549 vpxor %ymm8,%ymm6,%ymm6 5550 vpsrld $25,%ymm5,%ymm8 5551 vpslld $32-25,%ymm5,%ymm5 5552 vpxor %ymm8,%ymm5,%ymm5 5553 vpsrld $25,%ymm4,%ymm8 5554 vpslld $32-25,%ymm4,%ymm4 5555 vpxor %ymm8,%ymm4,%ymm4 5556 vmovdqa 0+128(%rbp),%ymm8 5557 vpalignr $4,%ymm7,%ymm7,%ymm7 5558 vpalignr $8,%ymm11,%ymm11,%ymm11 5559 vpalignr $12,%ymm15,%ymm15,%ymm15 5560 vpalignr $4,%ymm6,%ymm6,%ymm6 5561 vpalignr $8,%ymm10,%ymm10,%ymm10 5562 vpalignr $12,%ymm14,%ymm14,%ymm14 5563 vpalignr $4,%ymm5,%ymm5,%ymm5 5564 vpalignr $8,%ymm9,%ymm9,%ymm9 5565 vpalignr $12,%ymm13,%ymm13,%ymm13 5566 vpalignr $4,%ymm4,%ymm4,%ymm4 5567 vpalignr $8,%ymm8,%ymm8,%ymm8 5568 vpalignr $12,%ymm12,%ymm12,%ymm12 5569 vmovdqa %ymm8,0+128(%rbp) 5570 vmovdqa .Lrol16(%rip),%ymm8 5571 vpaddd %ymm7,%ymm3,%ymm3 5572 addq 0+16(%r8),%r10 5573 adcq 8+16(%r8),%r11 5574 adcq $1,%r12 5575 movq 0+0+0(%rbp),%rdx 5576 movq %rdx,%r15 5577 mulxq %r10,%r13,%r14 5578 mulxq %r11,%rax,%rdx 5579 imulq %r12,%r15 5580 addq %rax,%r14 5581 adcq %rdx,%r15 5582 movq 8+0+0(%rbp),%rdx 5583 mulxq %r10,%r10,%rax 5584 addq %r10,%r14 5585 mulxq %r11,%r11,%r9 5586 adcq %r11,%r15 5587 adcq $0,%r9 5588 imulq %r12,%rdx 5589 addq %rax,%r15 5590 adcq %rdx,%r9 5591 movq %r13,%r10 5592 movq %r14,%r11 5593 movq %r15,%r12 5594 andq $3,%r12 5595 movq %r15,%r13 5596 andq $-4,%r13 5597 movq %r9,%r14 5598 shrdq $2,%r9,%r15 5599 shrq $2,%r9 5600 addq %r13,%r15 5601 adcq %r14,%r9 5602 addq %r15,%r10 5603 adcq %r9,%r11 5604 adcq $0,%r12 5605 5606 leaq 32(%r8),%r8 5607 vpaddd %ymm6,%ymm2,%ymm2 5608 vpaddd %ymm5,%ymm1,%ymm1 5609 vpaddd %ymm4,%ymm0,%ymm0 5610 vpxor %ymm3,%ymm15,%ymm15 5611 vpxor %ymm2,%ymm14,%ymm14 5612 vpxor %ymm1,%ymm13,%ymm13 5613 vpxor %ymm0,%ymm12,%ymm12 5614 vpshufb %ymm8,%ymm15,%ymm15 5615 vpshufb %ymm8,%ymm14,%ymm14 5616 vpshufb %ymm8,%ymm13,%ymm13 5617 vpshufb %ymm8,%ymm12,%ymm12 5618 vpaddd %ymm15,%ymm11,%ymm11 5619 vpaddd %ymm14,%ymm10,%ymm10 5620 vpaddd %ymm13,%ymm9,%ymm9 5621 vpaddd 0+128(%rbp),%ymm12,%ymm8 5622 vpxor %ymm11,%ymm7,%ymm7 5623 vpxor %ymm10,%ymm6,%ymm6 5624 vpxor %ymm9,%ymm5,%ymm5 5625 vpxor %ymm8,%ymm4,%ymm4 5626 vmovdqa %ymm8,0+128(%rbp) 5627 vpsrld $20,%ymm7,%ymm8 5628 vpslld $32-20,%ymm7,%ymm7 5629 vpxor %ymm8,%ymm7,%ymm7 5630 vpsrld $20,%ymm6,%ymm8 5631 vpslld $32-20,%ymm6,%ymm6 5632 vpxor %ymm8,%ymm6,%ymm6 5633 vpsrld $20,%ymm5,%ymm8 5634 vpslld $32-20,%ymm5,%ymm5 5635 vpxor %ymm8,%ymm5,%ymm5 5636 vpsrld $20,%ymm4,%ymm8 5637 vpslld $32-20,%ymm4,%ymm4 5638 vpxor %ymm8,%ymm4,%ymm4 5639 vmovdqa .Lrol8(%rip),%ymm8 5640 vpaddd %ymm7,%ymm3,%ymm3 5641 vpaddd %ymm6,%ymm2,%ymm2 5642 vpaddd %ymm5,%ymm1,%ymm1 5643 vpaddd %ymm4,%ymm0,%ymm0 5644 vpxor %ymm3,%ymm15,%ymm15 5645 vpxor %ymm2,%ymm14,%ymm14 5646 vpxor %ymm1,%ymm13,%ymm13 5647 vpxor %ymm0,%ymm12,%ymm12 5648 vpshufb %ymm8,%ymm15,%ymm15 5649 vpshufb %ymm8,%ymm14,%ymm14 5650 vpshufb %ymm8,%ymm13,%ymm13 5651 vpshufb %ymm8,%ymm12,%ymm12 5652 vpaddd %ymm15,%ymm11,%ymm11 5653 vpaddd %ymm14,%ymm10,%ymm10 5654 vpaddd %ymm13,%ymm9,%ymm9 5655 vpaddd 0+128(%rbp),%ymm12,%ymm8 5656 vpxor %ymm11,%ymm7,%ymm7 5657 vpxor %ymm10,%ymm6,%ymm6 5658 vpxor %ymm9,%ymm5,%ymm5 5659 vpxor %ymm8,%ymm4,%ymm4 5660 vmovdqa %ymm8,0+128(%rbp) 5661 vpsrld $25,%ymm7,%ymm8 5662 vpslld $32-25,%ymm7,%ymm7 5663 vpxor %ymm8,%ymm7,%ymm7 5664 vpsrld $25,%ymm6,%ymm8 5665 vpslld $32-25,%ymm6,%ymm6 5666 vpxor %ymm8,%ymm6,%ymm6 5667 vpsrld $25,%ymm5,%ymm8 5668 vpslld $32-25,%ymm5,%ymm5 5669 vpxor %ymm8,%ymm5,%ymm5 5670 vpsrld $25,%ymm4,%ymm8 5671 vpslld $32-25,%ymm4,%ymm4 5672 vpxor %ymm8,%ymm4,%ymm4 5673 vmovdqa 0+128(%rbp),%ymm8 5674 vpalignr $12,%ymm7,%ymm7,%ymm7 5675 vpalignr $8,%ymm11,%ymm11,%ymm11 5676 vpalignr $4,%ymm15,%ymm15,%ymm15 5677 vpalignr $12,%ymm6,%ymm6,%ymm6 5678 vpalignr $8,%ymm10,%ymm10,%ymm10 5679 vpalignr $4,%ymm14,%ymm14,%ymm14 5680 vpalignr $12,%ymm5,%ymm5,%ymm5 5681 vpalignr $8,%ymm9,%ymm9,%ymm9 5682 vpalignr $4,%ymm13,%ymm13,%ymm13 5683 vpalignr $12,%ymm4,%ymm4,%ymm4 5684 vpalignr $8,%ymm8,%ymm8,%ymm8 5685 vpalignr $4,%ymm12,%ymm12,%ymm12 5686 5687 incq %rcx 5688 cmpq $4,%rcx 5689 jl .Lopen_avx2_tail_512_rounds_and_x2hash 5690 cmpq $10,%rcx 5691 jne .Lopen_avx2_tail_512_rounds_and_x1hash 5692 movq %rbx,%rcx 5693 subq $384,%rcx 5694 andq $-16,%rcx 5695.Lopen_avx2_tail_512_hash: 5696 testq %rcx,%rcx 5697 je .Lopen_avx2_tail_512_done 5698 addq 0+0(%r8),%r10 5699 adcq 8+0(%r8),%r11 5700 adcq $1,%r12 5701 movq 0+0+0(%rbp),%rdx 5702 movq %rdx,%r15 5703 mulxq %r10,%r13,%r14 5704 mulxq %r11,%rax,%rdx 5705 imulq %r12,%r15 5706 addq %rax,%r14 5707 adcq %rdx,%r15 5708 movq 8+0+0(%rbp),%rdx 5709 mulxq %r10,%r10,%rax 5710 addq %r10,%r14 5711 mulxq %r11,%r11,%r9 5712 adcq %r11,%r15 5713 adcq $0,%r9 5714 imulq %r12,%rdx 5715 addq %rax,%r15 5716 adcq %rdx,%r9 5717 movq %r13,%r10 5718 movq %r14,%r11 5719 movq %r15,%r12 5720 andq $3,%r12 5721 movq %r15,%r13 5722 andq $-4,%r13 5723 movq %r9,%r14 5724 shrdq $2,%r9,%r15 5725 shrq $2,%r9 5726 addq %r13,%r15 5727 adcq %r14,%r9 5728 addq %r15,%r10 5729 adcq %r9,%r11 5730 adcq $0,%r12 5731 5732 leaq 16(%r8),%r8 5733 subq $16,%rcx 5734 jmp .Lopen_avx2_tail_512_hash 5735.Lopen_avx2_tail_512_done: 5736 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 5737 vpaddd 0+64(%rbp),%ymm7,%ymm7 5738 vpaddd 0+96(%rbp),%ymm11,%ymm11 5739 vpaddd 0+256(%rbp),%ymm15,%ymm15 5740 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 5741 vpaddd 0+64(%rbp),%ymm6,%ymm6 5742 vpaddd 0+96(%rbp),%ymm10,%ymm10 5743 vpaddd 0+224(%rbp),%ymm14,%ymm14 5744 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 5745 vpaddd 0+64(%rbp),%ymm5,%ymm5 5746 vpaddd 0+96(%rbp),%ymm9,%ymm9 5747 vpaddd 0+192(%rbp),%ymm13,%ymm13 5748 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 5749 vpaddd 0+64(%rbp),%ymm4,%ymm4 5750 vpaddd 0+96(%rbp),%ymm8,%ymm8 5751 vpaddd 0+160(%rbp),%ymm12,%ymm12 5752 5753 vmovdqa %ymm0,0+128(%rbp) 5754 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 5755 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 5756 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 5757 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 5758 vpxor 0+0(%rsi),%ymm0,%ymm0 5759 vpxor 32+0(%rsi),%ymm3,%ymm3 5760 vpxor 64+0(%rsi),%ymm7,%ymm7 5761 vpxor 96+0(%rsi),%ymm11,%ymm11 5762 vmovdqu %ymm0,0+0(%rdi) 5763 vmovdqu %ymm3,32+0(%rdi) 5764 vmovdqu %ymm7,64+0(%rdi) 5765 vmovdqu %ymm11,96+0(%rdi) 5766 5767 vmovdqa 0+128(%rbp),%ymm0 5768 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5769 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5770 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5771 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5772 vpxor 0+128(%rsi),%ymm3,%ymm3 5773 vpxor 32+128(%rsi),%ymm2,%ymm2 5774 vpxor 64+128(%rsi),%ymm6,%ymm6 5775 vpxor 96+128(%rsi),%ymm10,%ymm10 5776 vmovdqu %ymm3,0+128(%rdi) 5777 vmovdqu %ymm2,32+128(%rdi) 5778 vmovdqu %ymm6,64+128(%rdi) 5779 vmovdqu %ymm10,96+128(%rdi) 5780 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5781 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5782 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5783 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5784 vpxor 0+256(%rsi),%ymm3,%ymm3 5785 vpxor 32+256(%rsi),%ymm1,%ymm1 5786 vpxor 64+256(%rsi),%ymm5,%ymm5 5787 vpxor 96+256(%rsi),%ymm9,%ymm9 5788 vmovdqu %ymm3,0+256(%rdi) 5789 vmovdqu %ymm1,32+256(%rdi) 5790 vmovdqu %ymm5,64+256(%rdi) 5791 vmovdqu %ymm9,96+256(%rdi) 5792 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5793 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5794 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5795 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5796 vmovdqa %ymm3,%ymm8 5797 5798 leaq 384(%rsi),%rsi 5799 leaq 384(%rdi),%rdi 5800 subq $384,%rbx 5801.Lopen_avx2_tail_128_xor: 5802 cmpq $32,%rbx 5803 jb .Lopen_avx2_tail_32_xor 5804 subq $32,%rbx 5805 vpxor (%rsi),%ymm0,%ymm0 5806 vmovdqu %ymm0,(%rdi) 5807 leaq 32(%rsi),%rsi 5808 leaq 32(%rdi),%rdi 5809 vmovdqa %ymm4,%ymm0 5810 vmovdqa %ymm8,%ymm4 5811 vmovdqa %ymm12,%ymm8 5812 jmp .Lopen_avx2_tail_128_xor 5813.Lopen_avx2_tail_32_xor: 5814 cmpq $16,%rbx 5815 vmovdqa %xmm0,%xmm1 5816 jb .Lopen_avx2_exit 5817 subq $16,%rbx 5818 5819 vpxor (%rsi),%xmm0,%xmm1 5820 vmovdqu %xmm1,(%rdi) 5821 leaq 16(%rsi),%rsi 5822 leaq 16(%rdi),%rdi 5823 vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 5824 vmovdqa %xmm0,%xmm1 5825.Lopen_avx2_exit: 5826 vzeroupper 5827 jmp .Lopen_sse_tail_16 5828 5829.Lopen_avx2_192: 5830 vmovdqa %ymm0,%ymm1 5831 vmovdqa %ymm0,%ymm2 5832 vmovdqa %ymm4,%ymm5 5833 vmovdqa %ymm4,%ymm6 5834 vmovdqa %ymm8,%ymm9 5835 vmovdqa %ymm8,%ymm10 5836 vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 5837 vmovdqa %ymm12,%ymm11 5838 vmovdqa %ymm13,%ymm15 5839 movq $10,%r10 5840.Lopen_avx2_192_rounds: 5841 vpaddd %ymm4,%ymm0,%ymm0 5842 vpxor %ymm0,%ymm12,%ymm12 5843 vpshufb .Lrol16(%rip),%ymm12,%ymm12 5844 vpaddd %ymm12,%ymm8,%ymm8 5845 vpxor %ymm8,%ymm4,%ymm4 5846 vpsrld $20,%ymm4,%ymm3 5847 vpslld $12,%ymm4,%ymm4 5848 vpxor %ymm3,%ymm4,%ymm4 5849 vpaddd %ymm4,%ymm0,%ymm0 5850 vpxor %ymm0,%ymm12,%ymm12 5851 vpshufb .Lrol8(%rip),%ymm12,%ymm12 5852 vpaddd %ymm12,%ymm8,%ymm8 5853 vpxor %ymm8,%ymm4,%ymm4 5854 vpslld $7,%ymm4,%ymm3 5855 vpsrld $25,%ymm4,%ymm4 5856 vpxor %ymm3,%ymm4,%ymm4 5857 vpalignr $12,%ymm12,%ymm12,%ymm12 5858 vpalignr $8,%ymm8,%ymm8,%ymm8 5859 vpalignr $4,%ymm4,%ymm4,%ymm4 5860 vpaddd %ymm5,%ymm1,%ymm1 5861 vpxor %ymm1,%ymm13,%ymm13 5862 vpshufb .Lrol16(%rip),%ymm13,%ymm13 5863 vpaddd %ymm13,%ymm9,%ymm9 5864 vpxor %ymm9,%ymm5,%ymm5 5865 vpsrld $20,%ymm5,%ymm3 5866 vpslld $12,%ymm5,%ymm5 5867 vpxor %ymm3,%ymm5,%ymm5 5868 vpaddd %ymm5,%ymm1,%ymm1 5869 vpxor %ymm1,%ymm13,%ymm13 5870 vpshufb .Lrol8(%rip),%ymm13,%ymm13 5871 vpaddd %ymm13,%ymm9,%ymm9 5872 vpxor %ymm9,%ymm5,%ymm5 5873 vpslld $7,%ymm5,%ymm3 5874 vpsrld $25,%ymm5,%ymm5 5875 vpxor %ymm3,%ymm5,%ymm5 5876 vpalignr $12,%ymm13,%ymm13,%ymm13 5877 vpalignr $8,%ymm9,%ymm9,%ymm9 5878 vpalignr $4,%ymm5,%ymm5,%ymm5 5879 vpaddd %ymm4,%ymm0,%ymm0 5880 vpxor %ymm0,%ymm12,%ymm12 5881 vpshufb .Lrol16(%rip),%ymm12,%ymm12 5882 vpaddd %ymm12,%ymm8,%ymm8 5883 vpxor %ymm8,%ymm4,%ymm4 5884 vpsrld $20,%ymm4,%ymm3 5885 vpslld $12,%ymm4,%ymm4 5886 vpxor %ymm3,%ymm4,%ymm4 5887 vpaddd %ymm4,%ymm0,%ymm0 5888 vpxor %ymm0,%ymm12,%ymm12 5889 vpshufb .Lrol8(%rip),%ymm12,%ymm12 5890 vpaddd %ymm12,%ymm8,%ymm8 5891 vpxor %ymm8,%ymm4,%ymm4 5892 vpslld $7,%ymm4,%ymm3 5893 vpsrld $25,%ymm4,%ymm4 5894 vpxor %ymm3,%ymm4,%ymm4 5895 vpalignr $4,%ymm12,%ymm12,%ymm12 5896 vpalignr $8,%ymm8,%ymm8,%ymm8 5897 vpalignr $12,%ymm4,%ymm4,%ymm4 5898 vpaddd %ymm5,%ymm1,%ymm1 5899 vpxor %ymm1,%ymm13,%ymm13 5900 vpshufb .Lrol16(%rip),%ymm13,%ymm13 5901 vpaddd %ymm13,%ymm9,%ymm9 5902 vpxor %ymm9,%ymm5,%ymm5 5903 vpsrld $20,%ymm5,%ymm3 5904 vpslld $12,%ymm5,%ymm5 5905 vpxor %ymm3,%ymm5,%ymm5 5906 vpaddd %ymm5,%ymm1,%ymm1 5907 vpxor %ymm1,%ymm13,%ymm13 5908 vpshufb .Lrol8(%rip),%ymm13,%ymm13 5909 vpaddd %ymm13,%ymm9,%ymm9 5910 vpxor %ymm9,%ymm5,%ymm5 5911 vpslld $7,%ymm5,%ymm3 5912 vpsrld $25,%ymm5,%ymm5 5913 vpxor %ymm3,%ymm5,%ymm5 5914 vpalignr $4,%ymm13,%ymm13,%ymm13 5915 vpalignr $8,%ymm9,%ymm9,%ymm9 5916 vpalignr $12,%ymm5,%ymm5,%ymm5 5917 5918 decq %r10 5919 jne .Lopen_avx2_192_rounds 5920 vpaddd %ymm2,%ymm0,%ymm0 5921 vpaddd %ymm2,%ymm1,%ymm1 5922 vpaddd %ymm6,%ymm4,%ymm4 5923 vpaddd %ymm6,%ymm5,%ymm5 5924 vpaddd %ymm10,%ymm8,%ymm8 5925 vpaddd %ymm10,%ymm9,%ymm9 5926 vpaddd %ymm11,%ymm12,%ymm12 5927 vpaddd %ymm15,%ymm13,%ymm13 5928 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 5929 5930 vpand .Lclamp(%rip),%ymm3,%ymm3 5931 vmovdqa %ymm3,0+0(%rbp) 5932 5933 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 5934 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 5935 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 5936 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 5937 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 5938 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 5939.Lopen_avx2_short: 5940 movq %r8,%r8 5941 call poly_hash_ad_internal 5942.Lopen_avx2_short_hash_and_xor_loop: 5943 cmpq $32,%rbx 5944 jb .Lopen_avx2_short_tail_32 5945 subq $32,%rbx 5946 addq 0+0(%rsi),%r10 5947 adcq 8+0(%rsi),%r11 5948 adcq $1,%r12 5949 movq 0+0+0(%rbp),%rax 5950 movq %rax,%r15 5951 mulq %r10 5952 movq %rax,%r13 5953 movq %rdx,%r14 5954 movq 0+0+0(%rbp),%rax 5955 mulq %r11 5956 imulq %r12,%r15 5957 addq %rax,%r14 5958 adcq %rdx,%r15 5959 movq 8+0+0(%rbp),%rax 5960 movq %rax,%r9 5961 mulq %r10 5962 addq %rax,%r14 5963 adcq $0,%rdx 5964 movq %rdx,%r10 5965 movq 8+0+0(%rbp),%rax 5966 mulq %r11 5967 addq %rax,%r15 5968 adcq $0,%rdx 5969 imulq %r12,%r9 5970 addq %r10,%r15 5971 adcq %rdx,%r9 5972 movq %r13,%r10 5973 movq %r14,%r11 5974 movq %r15,%r12 5975 andq $3,%r12 5976 movq %r15,%r13 5977 andq $-4,%r13 5978 movq %r9,%r14 5979 shrdq $2,%r9,%r15 5980 shrq $2,%r9 5981 addq %r13,%r15 5982 adcq %r14,%r9 5983 addq %r15,%r10 5984 adcq %r9,%r11 5985 adcq $0,%r12 5986 addq 0+16(%rsi),%r10 5987 adcq 8+16(%rsi),%r11 5988 adcq $1,%r12 5989 movq 0+0+0(%rbp),%rax 5990 movq %rax,%r15 5991 mulq %r10 5992 movq %rax,%r13 5993 movq %rdx,%r14 5994 movq 0+0+0(%rbp),%rax 5995 mulq %r11 5996 imulq %r12,%r15 5997 addq %rax,%r14 5998 adcq %rdx,%r15 5999 movq 8+0+0(%rbp),%rax 6000 movq %rax,%r9 6001 mulq %r10 6002 addq %rax,%r14 6003 adcq $0,%rdx 6004 movq %rdx,%r10 6005 movq 8+0+0(%rbp),%rax 6006 mulq %r11 6007 addq %rax,%r15 6008 adcq $0,%rdx 6009 imulq %r12,%r9 6010 addq %r10,%r15 6011 adcq %rdx,%r9 6012 movq %r13,%r10 6013 movq %r14,%r11 6014 movq %r15,%r12 6015 andq $3,%r12 6016 movq %r15,%r13 6017 andq $-4,%r13 6018 movq %r9,%r14 6019 shrdq $2,%r9,%r15 6020 shrq $2,%r9 6021 addq %r13,%r15 6022 adcq %r14,%r9 6023 addq %r15,%r10 6024 adcq %r9,%r11 6025 adcq $0,%r12 6026 6027 6028 vpxor (%rsi),%ymm0,%ymm0 6029 vmovdqu %ymm0,(%rdi) 6030 leaq 32(%rsi),%rsi 6031 leaq 32(%rdi),%rdi 6032 6033 vmovdqa %ymm4,%ymm0 6034 vmovdqa %ymm8,%ymm4 6035 vmovdqa %ymm12,%ymm8 6036 vmovdqa %ymm1,%ymm12 6037 vmovdqa %ymm5,%ymm1 6038 vmovdqa %ymm9,%ymm5 6039 vmovdqa %ymm13,%ymm9 6040 vmovdqa %ymm2,%ymm13 6041 vmovdqa %ymm6,%ymm2 6042 jmp .Lopen_avx2_short_hash_and_xor_loop 6043.Lopen_avx2_short_tail_32: 6044 cmpq $16,%rbx 6045 vmovdqa %xmm0,%xmm1 6046 jb .Lopen_avx2_short_tail_32_exit 6047 subq $16,%rbx 6048 addq 0+0(%rsi),%r10 6049 adcq 8+0(%rsi),%r11 6050 adcq $1,%r12 6051 movq 0+0+0(%rbp),%rax 6052 movq %rax,%r15 6053 mulq %r10 6054 movq %rax,%r13 6055 movq %rdx,%r14 6056 movq 0+0+0(%rbp),%rax 6057 mulq %r11 6058 imulq %r12,%r15 6059 addq %rax,%r14 6060 adcq %rdx,%r15 6061 movq 8+0+0(%rbp),%rax 6062 movq %rax,%r9 6063 mulq %r10 6064 addq %rax,%r14 6065 adcq $0,%rdx 6066 movq %rdx,%r10 6067 movq 8+0+0(%rbp),%rax 6068 mulq %r11 6069 addq %rax,%r15 6070 adcq $0,%rdx 6071 imulq %r12,%r9 6072 addq %r10,%r15 6073 adcq %rdx,%r9 6074 movq %r13,%r10 6075 movq %r14,%r11 6076 movq %r15,%r12 6077 andq $3,%r12 6078 movq %r15,%r13 6079 andq $-4,%r13 6080 movq %r9,%r14 6081 shrdq $2,%r9,%r15 6082 shrq $2,%r9 6083 addq %r13,%r15 6084 adcq %r14,%r9 6085 addq %r15,%r10 6086 adcq %r9,%r11 6087 adcq $0,%r12 6088 6089 vpxor (%rsi),%xmm0,%xmm3 6090 vmovdqu %xmm3,(%rdi) 6091 leaq 16(%rsi),%rsi 6092 leaq 16(%rdi),%rdi 6093 vextracti128 $1,%ymm0,%xmm1 6094.Lopen_avx2_short_tail_32_exit: 6095 vzeroupper 6096 jmp .Lopen_sse_tail_16 6097 6098.Lopen_avx2_320: 6099 vmovdqa %ymm0,%ymm1 6100 vmovdqa %ymm0,%ymm2 6101 vmovdqa %ymm4,%ymm5 6102 vmovdqa %ymm4,%ymm6 6103 vmovdqa %ymm8,%ymm9 6104 vmovdqa %ymm8,%ymm10 6105 vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 6106 vpaddd .Lavx2_inc(%rip),%ymm13,%ymm14 6107 vmovdqa %ymm4,%ymm7 6108 vmovdqa %ymm8,%ymm11 6109 vmovdqa %ymm12,0+160(%rbp) 6110 vmovdqa %ymm13,0+192(%rbp) 6111 vmovdqa %ymm14,0+224(%rbp) 6112 movq $10,%r10 6113.Lopen_avx2_320_rounds: 6114 vpaddd %ymm4,%ymm0,%ymm0 6115 vpxor %ymm0,%ymm12,%ymm12 6116 vpshufb .Lrol16(%rip),%ymm12,%ymm12 6117 vpaddd %ymm12,%ymm8,%ymm8 6118 vpxor %ymm8,%ymm4,%ymm4 6119 vpsrld $20,%ymm4,%ymm3 6120 vpslld $12,%ymm4,%ymm4 6121 vpxor %ymm3,%ymm4,%ymm4 6122 vpaddd %ymm4,%ymm0,%ymm0 6123 vpxor %ymm0,%ymm12,%ymm12 6124 vpshufb .Lrol8(%rip),%ymm12,%ymm12 6125 vpaddd %ymm12,%ymm8,%ymm8 6126 vpxor %ymm8,%ymm4,%ymm4 6127 vpslld $7,%ymm4,%ymm3 6128 vpsrld $25,%ymm4,%ymm4 6129 vpxor %ymm3,%ymm4,%ymm4 6130 vpalignr $12,%ymm12,%ymm12,%ymm12 6131 vpalignr $8,%ymm8,%ymm8,%ymm8 6132 vpalignr $4,%ymm4,%ymm4,%ymm4 6133 vpaddd %ymm5,%ymm1,%ymm1 6134 vpxor %ymm1,%ymm13,%ymm13 6135 vpshufb .Lrol16(%rip),%ymm13,%ymm13 6136 vpaddd %ymm13,%ymm9,%ymm9 6137 vpxor %ymm9,%ymm5,%ymm5 6138 vpsrld $20,%ymm5,%ymm3 6139 vpslld $12,%ymm5,%ymm5 6140 vpxor %ymm3,%ymm5,%ymm5 6141 vpaddd %ymm5,%ymm1,%ymm1 6142 vpxor %ymm1,%ymm13,%ymm13 6143 vpshufb .Lrol8(%rip),%ymm13,%ymm13 6144 vpaddd %ymm13,%ymm9,%ymm9 6145 vpxor %ymm9,%ymm5,%ymm5 6146 vpslld $7,%ymm5,%ymm3 6147 vpsrld $25,%ymm5,%ymm5 6148 vpxor %ymm3,%ymm5,%ymm5 6149 vpalignr $12,%ymm13,%ymm13,%ymm13 6150 vpalignr $8,%ymm9,%ymm9,%ymm9 6151 vpalignr $4,%ymm5,%ymm5,%ymm5 6152 vpaddd %ymm6,%ymm2,%ymm2 6153 vpxor %ymm2,%ymm14,%ymm14 6154 vpshufb .Lrol16(%rip),%ymm14,%ymm14 6155 vpaddd %ymm14,%ymm10,%ymm10 6156 vpxor %ymm10,%ymm6,%ymm6 6157 vpsrld $20,%ymm6,%ymm3 6158 vpslld $12,%ymm6,%ymm6 6159 vpxor %ymm3,%ymm6,%ymm6 6160 vpaddd %ymm6,%ymm2,%ymm2 6161 vpxor %ymm2,%ymm14,%ymm14 6162 vpshufb .Lrol8(%rip),%ymm14,%ymm14 6163 vpaddd %ymm14,%ymm10,%ymm10 6164 vpxor %ymm10,%ymm6,%ymm6 6165 vpslld $7,%ymm6,%ymm3 6166 vpsrld $25,%ymm6,%ymm6 6167 vpxor %ymm3,%ymm6,%ymm6 6168 vpalignr $12,%ymm14,%ymm14,%ymm14 6169 vpalignr $8,%ymm10,%ymm10,%ymm10 6170 vpalignr $4,%ymm6,%ymm6,%ymm6 6171 vpaddd %ymm4,%ymm0,%ymm0 6172 vpxor %ymm0,%ymm12,%ymm12 6173 vpshufb .Lrol16(%rip),%ymm12,%ymm12 6174 vpaddd %ymm12,%ymm8,%ymm8 6175 vpxor %ymm8,%ymm4,%ymm4 6176 vpsrld $20,%ymm4,%ymm3 6177 vpslld $12,%ymm4,%ymm4 6178 vpxor %ymm3,%ymm4,%ymm4 6179 vpaddd %ymm4,%ymm0,%ymm0 6180 vpxor %ymm0,%ymm12,%ymm12 6181 vpshufb .Lrol8(%rip),%ymm12,%ymm12 6182 vpaddd %ymm12,%ymm8,%ymm8 6183 vpxor %ymm8,%ymm4,%ymm4 6184 vpslld $7,%ymm4,%ymm3 6185 vpsrld $25,%ymm4,%ymm4 6186 vpxor %ymm3,%ymm4,%ymm4 6187 vpalignr $4,%ymm12,%ymm12,%ymm12 6188 vpalignr $8,%ymm8,%ymm8,%ymm8 6189 vpalignr $12,%ymm4,%ymm4,%ymm4 6190 vpaddd %ymm5,%ymm1,%ymm1 6191 vpxor %ymm1,%ymm13,%ymm13 6192 vpshufb .Lrol16(%rip),%ymm13,%ymm13 6193 vpaddd %ymm13,%ymm9,%ymm9 6194 vpxor %ymm9,%ymm5,%ymm5 6195 vpsrld $20,%ymm5,%ymm3 6196 vpslld $12,%ymm5,%ymm5 6197 vpxor %ymm3,%ymm5,%ymm5 6198 vpaddd %ymm5,%ymm1,%ymm1 6199 vpxor %ymm1,%ymm13,%ymm13 6200 vpshufb .Lrol8(%rip),%ymm13,%ymm13 6201 vpaddd %ymm13,%ymm9,%ymm9 6202 vpxor %ymm9,%ymm5,%ymm5 6203 vpslld $7,%ymm5,%ymm3 6204 vpsrld $25,%ymm5,%ymm5 6205 vpxor %ymm3,%ymm5,%ymm5 6206 vpalignr $4,%ymm13,%ymm13,%ymm13 6207 vpalignr $8,%ymm9,%ymm9,%ymm9 6208 vpalignr $12,%ymm5,%ymm5,%ymm5 6209 vpaddd %ymm6,%ymm2,%ymm2 6210 vpxor %ymm2,%ymm14,%ymm14 6211 vpshufb .Lrol16(%rip),%ymm14,%ymm14 6212 vpaddd %ymm14,%ymm10,%ymm10 6213 vpxor %ymm10,%ymm6,%ymm6 6214 vpsrld $20,%ymm6,%ymm3 6215 vpslld $12,%ymm6,%ymm6 6216 vpxor %ymm3,%ymm6,%ymm6 6217 vpaddd %ymm6,%ymm2,%ymm2 6218 vpxor %ymm2,%ymm14,%ymm14 6219 vpshufb .Lrol8(%rip),%ymm14,%ymm14 6220 vpaddd %ymm14,%ymm10,%ymm10 6221 vpxor %ymm10,%ymm6,%ymm6 6222 vpslld $7,%ymm6,%ymm3 6223 vpsrld $25,%ymm6,%ymm6 6224 vpxor %ymm3,%ymm6,%ymm6 6225 vpalignr $4,%ymm14,%ymm14,%ymm14 6226 vpalignr $8,%ymm10,%ymm10,%ymm10 6227 vpalignr $12,%ymm6,%ymm6,%ymm6 6228 6229 decq %r10 6230 jne .Lopen_avx2_320_rounds 6231 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 6232 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 6233 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 6234 vpaddd %ymm7,%ymm4,%ymm4 6235 vpaddd %ymm7,%ymm5,%ymm5 6236 vpaddd %ymm7,%ymm6,%ymm6 6237 vpaddd %ymm11,%ymm8,%ymm8 6238 vpaddd %ymm11,%ymm9,%ymm9 6239 vpaddd %ymm11,%ymm10,%ymm10 6240 vpaddd 0+160(%rbp),%ymm12,%ymm12 6241 vpaddd 0+192(%rbp),%ymm13,%ymm13 6242 vpaddd 0+224(%rbp),%ymm14,%ymm14 6243 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 6244 6245 vpand .Lclamp(%rip),%ymm3,%ymm3 6246 vmovdqa %ymm3,0+0(%rbp) 6247 6248 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 6249 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 6250 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 6251 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 6252 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 6253 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 6254 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 6255 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 6256 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 6257 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 6258 jmp .Lopen_avx2_short 6259.size chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2 6260.cfi_endproc 6261 6262 6263.type chacha20_poly1305_seal_avx2,@function 6264.align 64 6265chacha20_poly1305_seal_avx2: 6266.cfi_startproc 6267 6268 6269.cfi_adjust_cfa_offset 8 6270.cfi_offset %rbp,-16 6271.cfi_adjust_cfa_offset 8 6272.cfi_offset %rbx,-24 6273.cfi_adjust_cfa_offset 8 6274.cfi_offset %r12,-32 6275.cfi_adjust_cfa_offset 8 6276.cfi_offset %r13,-40 6277.cfi_adjust_cfa_offset 8 6278.cfi_offset %r14,-48 6279.cfi_adjust_cfa_offset 8 6280.cfi_offset %r15,-56 6281.cfi_adjust_cfa_offset 8 6282.cfi_offset %r9,-64 6283.cfi_adjust_cfa_offset 288 + 32 6284 6285 vzeroupper 6286 vmovdqa .Lchacha20_consts(%rip),%ymm0 6287 vbroadcasti128 0(%r9),%ymm4 6288 vbroadcasti128 16(%r9),%ymm8 6289 vbroadcasti128 32(%r9),%ymm12 6290 vpaddd .Lavx2_init(%rip),%ymm12,%ymm12 6291 cmpq $192,%rbx 6292 jbe .Lseal_avx2_192 6293 cmpq $320,%rbx 6294 jbe .Lseal_avx2_320 6295 vmovdqa %ymm0,%ymm1 6296 vmovdqa %ymm0,%ymm2 6297 vmovdqa %ymm0,%ymm3 6298 vmovdqa %ymm4,%ymm5 6299 vmovdqa %ymm4,%ymm6 6300 vmovdqa %ymm4,%ymm7 6301 vmovdqa %ymm4,0+64(%rbp) 6302 vmovdqa %ymm8,%ymm9 6303 vmovdqa %ymm8,%ymm10 6304 vmovdqa %ymm8,%ymm11 6305 vmovdqa %ymm8,0+96(%rbp) 6306 vmovdqa %ymm12,%ymm15 6307 vpaddd .Lavx2_inc(%rip),%ymm15,%ymm14 6308 vpaddd .Lavx2_inc(%rip),%ymm14,%ymm13 6309 vpaddd .Lavx2_inc(%rip),%ymm13,%ymm12 6310 vmovdqa %ymm12,0+160(%rbp) 6311 vmovdqa %ymm13,0+192(%rbp) 6312 vmovdqa %ymm14,0+224(%rbp) 6313 vmovdqa %ymm15,0+256(%rbp) 6314 movq $10,%r10 6315.Lseal_avx2_init_rounds: 6316 vmovdqa %ymm8,0+128(%rbp) 6317 vmovdqa .Lrol16(%rip),%ymm8 6318 vpaddd %ymm7,%ymm3,%ymm3 6319 vpaddd %ymm6,%ymm2,%ymm2 6320 vpaddd %ymm5,%ymm1,%ymm1 6321 vpaddd %ymm4,%ymm0,%ymm0 6322 vpxor %ymm3,%ymm15,%ymm15 6323 vpxor %ymm2,%ymm14,%ymm14 6324 vpxor %ymm1,%ymm13,%ymm13 6325 vpxor %ymm0,%ymm12,%ymm12 6326 vpshufb %ymm8,%ymm15,%ymm15 6327 vpshufb %ymm8,%ymm14,%ymm14 6328 vpshufb %ymm8,%ymm13,%ymm13 6329 vpshufb %ymm8,%ymm12,%ymm12 6330 vpaddd %ymm15,%ymm11,%ymm11 6331 vpaddd %ymm14,%ymm10,%ymm10 6332 vpaddd %ymm13,%ymm9,%ymm9 6333 vpaddd 0+128(%rbp),%ymm12,%ymm8 6334 vpxor %ymm11,%ymm7,%ymm7 6335 vpxor %ymm10,%ymm6,%ymm6 6336 vpxor %ymm9,%ymm5,%ymm5 6337 vpxor %ymm8,%ymm4,%ymm4 6338 vmovdqa %ymm8,0+128(%rbp) 6339 vpsrld $20,%ymm7,%ymm8 6340 vpslld $32-20,%ymm7,%ymm7 6341 vpxor %ymm8,%ymm7,%ymm7 6342 vpsrld $20,%ymm6,%ymm8 6343 vpslld $32-20,%ymm6,%ymm6 6344 vpxor %ymm8,%ymm6,%ymm6 6345 vpsrld $20,%ymm5,%ymm8 6346 vpslld $32-20,%ymm5,%ymm5 6347 vpxor %ymm8,%ymm5,%ymm5 6348 vpsrld $20,%ymm4,%ymm8 6349 vpslld $32-20,%ymm4,%ymm4 6350 vpxor %ymm8,%ymm4,%ymm4 6351 vmovdqa .Lrol8(%rip),%ymm8 6352 vpaddd %ymm7,%ymm3,%ymm3 6353 vpaddd %ymm6,%ymm2,%ymm2 6354 vpaddd %ymm5,%ymm1,%ymm1 6355 vpaddd %ymm4,%ymm0,%ymm0 6356 vpxor %ymm3,%ymm15,%ymm15 6357 vpxor %ymm2,%ymm14,%ymm14 6358 vpxor %ymm1,%ymm13,%ymm13 6359 vpxor %ymm0,%ymm12,%ymm12 6360 vpshufb %ymm8,%ymm15,%ymm15 6361 vpshufb %ymm8,%ymm14,%ymm14 6362 vpshufb %ymm8,%ymm13,%ymm13 6363 vpshufb %ymm8,%ymm12,%ymm12 6364 vpaddd %ymm15,%ymm11,%ymm11 6365 vpaddd %ymm14,%ymm10,%ymm10 6366 vpaddd %ymm13,%ymm9,%ymm9 6367 vpaddd 0+128(%rbp),%ymm12,%ymm8 6368 vpxor %ymm11,%ymm7,%ymm7 6369 vpxor %ymm10,%ymm6,%ymm6 6370 vpxor %ymm9,%ymm5,%ymm5 6371 vpxor %ymm8,%ymm4,%ymm4 6372 vmovdqa %ymm8,0+128(%rbp) 6373 vpsrld $25,%ymm7,%ymm8 6374 vpslld $32-25,%ymm7,%ymm7 6375 vpxor %ymm8,%ymm7,%ymm7 6376 vpsrld $25,%ymm6,%ymm8 6377 vpslld $32-25,%ymm6,%ymm6 6378 vpxor %ymm8,%ymm6,%ymm6 6379 vpsrld $25,%ymm5,%ymm8 6380 vpslld $32-25,%ymm5,%ymm5 6381 vpxor %ymm8,%ymm5,%ymm5 6382 vpsrld $25,%ymm4,%ymm8 6383 vpslld $32-25,%ymm4,%ymm4 6384 vpxor %ymm8,%ymm4,%ymm4 6385 vmovdqa 0+128(%rbp),%ymm8 6386 vpalignr $4,%ymm7,%ymm7,%ymm7 6387 vpalignr $8,%ymm11,%ymm11,%ymm11 6388 vpalignr $12,%ymm15,%ymm15,%ymm15 6389 vpalignr $4,%ymm6,%ymm6,%ymm6 6390 vpalignr $8,%ymm10,%ymm10,%ymm10 6391 vpalignr $12,%ymm14,%ymm14,%ymm14 6392 vpalignr $4,%ymm5,%ymm5,%ymm5 6393 vpalignr $8,%ymm9,%ymm9,%ymm9 6394 vpalignr $12,%ymm13,%ymm13,%ymm13 6395 vpalignr $4,%ymm4,%ymm4,%ymm4 6396 vpalignr $8,%ymm8,%ymm8,%ymm8 6397 vpalignr $12,%ymm12,%ymm12,%ymm12 6398 vmovdqa %ymm8,0+128(%rbp) 6399 vmovdqa .Lrol16(%rip),%ymm8 6400 vpaddd %ymm7,%ymm3,%ymm3 6401 vpaddd %ymm6,%ymm2,%ymm2 6402 vpaddd %ymm5,%ymm1,%ymm1 6403 vpaddd %ymm4,%ymm0,%ymm0 6404 vpxor %ymm3,%ymm15,%ymm15 6405 vpxor %ymm2,%ymm14,%ymm14 6406 vpxor %ymm1,%ymm13,%ymm13 6407 vpxor %ymm0,%ymm12,%ymm12 6408 vpshufb %ymm8,%ymm15,%ymm15 6409 vpshufb %ymm8,%ymm14,%ymm14 6410 vpshufb %ymm8,%ymm13,%ymm13 6411 vpshufb %ymm8,%ymm12,%ymm12 6412 vpaddd %ymm15,%ymm11,%ymm11 6413 vpaddd %ymm14,%ymm10,%ymm10 6414 vpaddd %ymm13,%ymm9,%ymm9 6415 vpaddd 0+128(%rbp),%ymm12,%ymm8 6416 vpxor %ymm11,%ymm7,%ymm7 6417 vpxor %ymm10,%ymm6,%ymm6 6418 vpxor %ymm9,%ymm5,%ymm5 6419 vpxor %ymm8,%ymm4,%ymm4 6420 vmovdqa %ymm8,0+128(%rbp) 6421 vpsrld $20,%ymm7,%ymm8 6422 vpslld $32-20,%ymm7,%ymm7 6423 vpxor %ymm8,%ymm7,%ymm7 6424 vpsrld $20,%ymm6,%ymm8 6425 vpslld $32-20,%ymm6,%ymm6 6426 vpxor %ymm8,%ymm6,%ymm6 6427 vpsrld $20,%ymm5,%ymm8 6428 vpslld $32-20,%ymm5,%ymm5 6429 vpxor %ymm8,%ymm5,%ymm5 6430 vpsrld $20,%ymm4,%ymm8 6431 vpslld $32-20,%ymm4,%ymm4 6432 vpxor %ymm8,%ymm4,%ymm4 6433 vmovdqa .Lrol8(%rip),%ymm8 6434 vpaddd %ymm7,%ymm3,%ymm3 6435 vpaddd %ymm6,%ymm2,%ymm2 6436 vpaddd %ymm5,%ymm1,%ymm1 6437 vpaddd %ymm4,%ymm0,%ymm0 6438 vpxor %ymm3,%ymm15,%ymm15 6439 vpxor %ymm2,%ymm14,%ymm14 6440 vpxor %ymm1,%ymm13,%ymm13 6441 vpxor %ymm0,%ymm12,%ymm12 6442 vpshufb %ymm8,%ymm15,%ymm15 6443 vpshufb %ymm8,%ymm14,%ymm14 6444 vpshufb %ymm8,%ymm13,%ymm13 6445 vpshufb %ymm8,%ymm12,%ymm12 6446 vpaddd %ymm15,%ymm11,%ymm11 6447 vpaddd %ymm14,%ymm10,%ymm10 6448 vpaddd %ymm13,%ymm9,%ymm9 6449 vpaddd 0+128(%rbp),%ymm12,%ymm8 6450 vpxor %ymm11,%ymm7,%ymm7 6451 vpxor %ymm10,%ymm6,%ymm6 6452 vpxor %ymm9,%ymm5,%ymm5 6453 vpxor %ymm8,%ymm4,%ymm4 6454 vmovdqa %ymm8,0+128(%rbp) 6455 vpsrld $25,%ymm7,%ymm8 6456 vpslld $32-25,%ymm7,%ymm7 6457 vpxor %ymm8,%ymm7,%ymm7 6458 vpsrld $25,%ymm6,%ymm8 6459 vpslld $32-25,%ymm6,%ymm6 6460 vpxor %ymm8,%ymm6,%ymm6 6461 vpsrld $25,%ymm5,%ymm8 6462 vpslld $32-25,%ymm5,%ymm5 6463 vpxor %ymm8,%ymm5,%ymm5 6464 vpsrld $25,%ymm4,%ymm8 6465 vpslld $32-25,%ymm4,%ymm4 6466 vpxor %ymm8,%ymm4,%ymm4 6467 vmovdqa 0+128(%rbp),%ymm8 6468 vpalignr $12,%ymm7,%ymm7,%ymm7 6469 vpalignr $8,%ymm11,%ymm11,%ymm11 6470 vpalignr $4,%ymm15,%ymm15,%ymm15 6471 vpalignr $12,%ymm6,%ymm6,%ymm6 6472 vpalignr $8,%ymm10,%ymm10,%ymm10 6473 vpalignr $4,%ymm14,%ymm14,%ymm14 6474 vpalignr $12,%ymm5,%ymm5,%ymm5 6475 vpalignr $8,%ymm9,%ymm9,%ymm9 6476 vpalignr $4,%ymm13,%ymm13,%ymm13 6477 vpalignr $12,%ymm4,%ymm4,%ymm4 6478 vpalignr $8,%ymm8,%ymm8,%ymm8 6479 vpalignr $4,%ymm12,%ymm12,%ymm12 6480 6481 decq %r10 6482 jnz .Lseal_avx2_init_rounds 6483 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 6484 vpaddd 0+64(%rbp),%ymm7,%ymm7 6485 vpaddd 0+96(%rbp),%ymm11,%ymm11 6486 vpaddd 0+256(%rbp),%ymm15,%ymm15 6487 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 6488 vpaddd 0+64(%rbp),%ymm6,%ymm6 6489 vpaddd 0+96(%rbp),%ymm10,%ymm10 6490 vpaddd 0+224(%rbp),%ymm14,%ymm14 6491 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 6492 vpaddd 0+64(%rbp),%ymm5,%ymm5 6493 vpaddd 0+96(%rbp),%ymm9,%ymm9 6494 vpaddd 0+192(%rbp),%ymm13,%ymm13 6495 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 6496 vpaddd 0+64(%rbp),%ymm4,%ymm4 6497 vpaddd 0+96(%rbp),%ymm8,%ymm8 6498 vpaddd 0+160(%rbp),%ymm12,%ymm12 6499 6500 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 6501 vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 6502 vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 6503 vpand .Lclamp(%rip),%ymm15,%ymm15 6504 vmovdqa %ymm15,0+0(%rbp) 6505 movq %r8,%r8 6506 call poly_hash_ad_internal 6507 6508 vpxor 0(%rsi),%ymm3,%ymm3 6509 vpxor 32(%rsi),%ymm11,%ymm11 6510 vmovdqu %ymm3,0(%rdi) 6511 vmovdqu %ymm11,32(%rdi) 6512 vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 6513 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 6514 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 6515 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 6516 vpxor 0+64(%rsi),%ymm15,%ymm15 6517 vpxor 32+64(%rsi),%ymm2,%ymm2 6518 vpxor 64+64(%rsi),%ymm6,%ymm6 6519 vpxor 96+64(%rsi),%ymm10,%ymm10 6520 vmovdqu %ymm15,0+64(%rdi) 6521 vmovdqu %ymm2,32+64(%rdi) 6522 vmovdqu %ymm6,64+64(%rdi) 6523 vmovdqu %ymm10,96+64(%rdi) 6524 vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 6525 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 6526 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 6527 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 6528 vpxor 0+192(%rsi),%ymm15,%ymm15 6529 vpxor 32+192(%rsi),%ymm1,%ymm1 6530 vpxor 64+192(%rsi),%ymm5,%ymm5 6531 vpxor 96+192(%rsi),%ymm9,%ymm9 6532 vmovdqu %ymm15,0+192(%rdi) 6533 vmovdqu %ymm1,32+192(%rdi) 6534 vmovdqu %ymm5,64+192(%rdi) 6535 vmovdqu %ymm9,96+192(%rdi) 6536 vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 6537 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 6538 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 6539 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 6540 vmovdqa %ymm15,%ymm8 6541 6542 leaq 320(%rsi),%rsi 6543 subq $320,%rbx 6544 movq $320,%rcx 6545 cmpq $128,%rbx 6546 jbe .Lseal_avx2_short_hash_remainder 6547 vpxor 0(%rsi),%ymm0,%ymm0 6548 vpxor 32(%rsi),%ymm4,%ymm4 6549 vpxor 64(%rsi),%ymm8,%ymm8 6550 vpxor 96(%rsi),%ymm12,%ymm12 6551 vmovdqu %ymm0,320(%rdi) 6552 vmovdqu %ymm4,352(%rdi) 6553 vmovdqu %ymm8,384(%rdi) 6554 vmovdqu %ymm12,416(%rdi) 6555 leaq 128(%rsi),%rsi 6556 subq $128,%rbx 6557 movq $8,%rcx 6558 movq $2,%r8 6559 cmpq $128,%rbx 6560 jbe .Lseal_avx2_tail_128 6561 cmpq $256,%rbx 6562 jbe .Lseal_avx2_tail_256 6563 cmpq $384,%rbx 6564 jbe .Lseal_avx2_tail_384 6565 cmpq $512,%rbx 6566 jbe .Lseal_avx2_tail_512 6567 vmovdqa .Lchacha20_consts(%rip),%ymm0 6568 vmovdqa 0+64(%rbp),%ymm4 6569 vmovdqa 0+96(%rbp),%ymm8 6570 vmovdqa %ymm0,%ymm1 6571 vmovdqa %ymm4,%ymm5 6572 vmovdqa %ymm8,%ymm9 6573 vmovdqa %ymm0,%ymm2 6574 vmovdqa %ymm4,%ymm6 6575 vmovdqa %ymm8,%ymm10 6576 vmovdqa %ymm0,%ymm3 6577 vmovdqa %ymm4,%ymm7 6578 vmovdqa %ymm8,%ymm11 6579 vmovdqa .Lavx2_inc(%rip),%ymm12 6580 vpaddd 0+160(%rbp),%ymm12,%ymm15 6581 vpaddd %ymm15,%ymm12,%ymm14 6582 vpaddd %ymm14,%ymm12,%ymm13 6583 vpaddd %ymm13,%ymm12,%ymm12 6584 vmovdqa %ymm15,0+256(%rbp) 6585 vmovdqa %ymm14,0+224(%rbp) 6586 vmovdqa %ymm13,0+192(%rbp) 6587 vmovdqa %ymm12,0+160(%rbp) 6588 vmovdqa %ymm8,0+128(%rbp) 6589 vmovdqa .Lrol16(%rip),%ymm8 6590 vpaddd %ymm7,%ymm3,%ymm3 6591 vpaddd %ymm6,%ymm2,%ymm2 6592 vpaddd %ymm5,%ymm1,%ymm1 6593 vpaddd %ymm4,%ymm0,%ymm0 6594 vpxor %ymm3,%ymm15,%ymm15 6595 vpxor %ymm2,%ymm14,%ymm14 6596 vpxor %ymm1,%ymm13,%ymm13 6597 vpxor %ymm0,%ymm12,%ymm12 6598 vpshufb %ymm8,%ymm15,%ymm15 6599 vpshufb %ymm8,%ymm14,%ymm14 6600 vpshufb %ymm8,%ymm13,%ymm13 6601 vpshufb %ymm8,%ymm12,%ymm12 6602 vpaddd %ymm15,%ymm11,%ymm11 6603 vpaddd %ymm14,%ymm10,%ymm10 6604 vpaddd %ymm13,%ymm9,%ymm9 6605 vpaddd 0+128(%rbp),%ymm12,%ymm8 6606 vpxor %ymm11,%ymm7,%ymm7 6607 vpxor %ymm10,%ymm6,%ymm6 6608 vpxor %ymm9,%ymm5,%ymm5 6609 vpxor %ymm8,%ymm4,%ymm4 6610 vmovdqa %ymm8,0+128(%rbp) 6611 vpsrld $20,%ymm7,%ymm8 6612 vpslld $32-20,%ymm7,%ymm7 6613 vpxor %ymm8,%ymm7,%ymm7 6614 vpsrld $20,%ymm6,%ymm8 6615 vpslld $32-20,%ymm6,%ymm6 6616 vpxor %ymm8,%ymm6,%ymm6 6617 vpsrld $20,%ymm5,%ymm8 6618 vpslld $32-20,%ymm5,%ymm5 6619 vpxor %ymm8,%ymm5,%ymm5 6620 vpsrld $20,%ymm4,%ymm8 6621 vpslld $32-20,%ymm4,%ymm4 6622 vpxor %ymm8,%ymm4,%ymm4 6623 vmovdqa .Lrol8(%rip),%ymm8 6624 vpaddd %ymm7,%ymm3,%ymm3 6625 vpaddd %ymm6,%ymm2,%ymm2 6626 vpaddd %ymm5,%ymm1,%ymm1 6627 vpaddd %ymm4,%ymm0,%ymm0 6628 vpxor %ymm3,%ymm15,%ymm15 6629 vpxor %ymm2,%ymm14,%ymm14 6630 vpxor %ymm1,%ymm13,%ymm13 6631 vpxor %ymm0,%ymm12,%ymm12 6632 vpshufb %ymm8,%ymm15,%ymm15 6633 vpshufb %ymm8,%ymm14,%ymm14 6634 vpshufb %ymm8,%ymm13,%ymm13 6635 vpshufb %ymm8,%ymm12,%ymm12 6636 vpaddd %ymm15,%ymm11,%ymm11 6637 vpaddd %ymm14,%ymm10,%ymm10 6638 vpaddd %ymm13,%ymm9,%ymm9 6639 vpaddd 0+128(%rbp),%ymm12,%ymm8 6640 vpxor %ymm11,%ymm7,%ymm7 6641 vpxor %ymm10,%ymm6,%ymm6 6642 vpxor %ymm9,%ymm5,%ymm5 6643 vpxor %ymm8,%ymm4,%ymm4 6644 vmovdqa %ymm8,0+128(%rbp) 6645 vpsrld $25,%ymm7,%ymm8 6646 vpslld $32-25,%ymm7,%ymm7 6647 vpxor %ymm8,%ymm7,%ymm7 6648 vpsrld $25,%ymm6,%ymm8 6649 vpslld $32-25,%ymm6,%ymm6 6650 vpxor %ymm8,%ymm6,%ymm6 6651 vpsrld $25,%ymm5,%ymm8 6652 vpslld $32-25,%ymm5,%ymm5 6653 vpxor %ymm8,%ymm5,%ymm5 6654 vpsrld $25,%ymm4,%ymm8 6655 vpslld $32-25,%ymm4,%ymm4 6656 vpxor %ymm8,%ymm4,%ymm4 6657 vmovdqa 0+128(%rbp),%ymm8 6658 vpalignr $4,%ymm7,%ymm7,%ymm7 6659 vpalignr $8,%ymm11,%ymm11,%ymm11 6660 vpalignr $12,%ymm15,%ymm15,%ymm15 6661 vpalignr $4,%ymm6,%ymm6,%ymm6 6662 vpalignr $8,%ymm10,%ymm10,%ymm10 6663 vpalignr $12,%ymm14,%ymm14,%ymm14 6664 vpalignr $4,%ymm5,%ymm5,%ymm5 6665 vpalignr $8,%ymm9,%ymm9,%ymm9 6666 vpalignr $12,%ymm13,%ymm13,%ymm13 6667 vpalignr $4,%ymm4,%ymm4,%ymm4 6668 vpalignr $8,%ymm8,%ymm8,%ymm8 6669 vpalignr $12,%ymm12,%ymm12,%ymm12 6670 vmovdqa %ymm8,0+128(%rbp) 6671 vmovdqa .Lrol16(%rip),%ymm8 6672 vpaddd %ymm7,%ymm3,%ymm3 6673 vpaddd %ymm6,%ymm2,%ymm2 6674 vpaddd %ymm5,%ymm1,%ymm1 6675 vpaddd %ymm4,%ymm0,%ymm0 6676 vpxor %ymm3,%ymm15,%ymm15 6677 vpxor %ymm2,%ymm14,%ymm14 6678 vpxor %ymm1,%ymm13,%ymm13 6679 vpxor %ymm0,%ymm12,%ymm12 6680 vpshufb %ymm8,%ymm15,%ymm15 6681 vpshufb %ymm8,%ymm14,%ymm14 6682 vpshufb %ymm8,%ymm13,%ymm13 6683 vpshufb %ymm8,%ymm12,%ymm12 6684 vpaddd %ymm15,%ymm11,%ymm11 6685 vpaddd %ymm14,%ymm10,%ymm10 6686 vpaddd %ymm13,%ymm9,%ymm9 6687 vpaddd 0+128(%rbp),%ymm12,%ymm8 6688 vpxor %ymm11,%ymm7,%ymm7 6689 vpxor %ymm10,%ymm6,%ymm6 6690 vpxor %ymm9,%ymm5,%ymm5 6691 vpxor %ymm8,%ymm4,%ymm4 6692 vmovdqa %ymm8,0+128(%rbp) 6693 vpsrld $20,%ymm7,%ymm8 6694 vpslld $32-20,%ymm7,%ymm7 6695 vpxor %ymm8,%ymm7,%ymm7 6696 vpsrld $20,%ymm6,%ymm8 6697 vpslld $32-20,%ymm6,%ymm6 6698 vpxor %ymm8,%ymm6,%ymm6 6699 vpsrld $20,%ymm5,%ymm8 6700 vpslld $32-20,%ymm5,%ymm5 6701 vpxor %ymm8,%ymm5,%ymm5 6702 vpsrld $20,%ymm4,%ymm8 6703 vpslld $32-20,%ymm4,%ymm4 6704 vpxor %ymm8,%ymm4,%ymm4 6705 vmovdqa .Lrol8(%rip),%ymm8 6706 vpaddd %ymm7,%ymm3,%ymm3 6707 vpaddd %ymm6,%ymm2,%ymm2 6708 vpaddd %ymm5,%ymm1,%ymm1 6709 vpaddd %ymm4,%ymm0,%ymm0 6710 vpxor %ymm3,%ymm15,%ymm15 6711 vpxor %ymm2,%ymm14,%ymm14 6712 vpxor %ymm1,%ymm13,%ymm13 6713 vpxor %ymm0,%ymm12,%ymm12 6714 vpshufb %ymm8,%ymm15,%ymm15 6715 vpshufb %ymm8,%ymm14,%ymm14 6716 vpshufb %ymm8,%ymm13,%ymm13 6717 vpshufb %ymm8,%ymm12,%ymm12 6718 vpaddd %ymm15,%ymm11,%ymm11 6719 vpaddd %ymm14,%ymm10,%ymm10 6720 vpaddd %ymm13,%ymm9,%ymm9 6721 vpaddd 0+128(%rbp),%ymm12,%ymm8 6722 vpxor %ymm11,%ymm7,%ymm7 6723 vpxor %ymm10,%ymm6,%ymm6 6724 vpxor %ymm9,%ymm5,%ymm5 6725 vpxor %ymm8,%ymm4,%ymm4 6726 vmovdqa %ymm8,0+128(%rbp) 6727 vpsrld $25,%ymm7,%ymm8 6728 vpslld $32-25,%ymm7,%ymm7 6729 vpxor %ymm8,%ymm7,%ymm7 6730 vpsrld $25,%ymm6,%ymm8 6731 vpslld $32-25,%ymm6,%ymm6 6732 vpxor %ymm8,%ymm6,%ymm6 6733 vpsrld $25,%ymm5,%ymm8 6734 vpslld $32-25,%ymm5,%ymm5 6735 vpxor %ymm8,%ymm5,%ymm5 6736 vpsrld $25,%ymm4,%ymm8 6737 vpslld $32-25,%ymm4,%ymm4 6738 vpxor %ymm8,%ymm4,%ymm4 6739 vmovdqa 0+128(%rbp),%ymm8 6740 vpalignr $12,%ymm7,%ymm7,%ymm7 6741 vpalignr $8,%ymm11,%ymm11,%ymm11 6742 vpalignr $4,%ymm15,%ymm15,%ymm15 6743 vpalignr $12,%ymm6,%ymm6,%ymm6 6744 vpalignr $8,%ymm10,%ymm10,%ymm10 6745 vpalignr $4,%ymm14,%ymm14,%ymm14 6746 vpalignr $12,%ymm5,%ymm5,%ymm5 6747 vpalignr $8,%ymm9,%ymm9,%ymm9 6748 vpalignr $4,%ymm13,%ymm13,%ymm13 6749 vpalignr $12,%ymm4,%ymm4,%ymm4 6750 vpalignr $8,%ymm8,%ymm8,%ymm8 6751 vpalignr $4,%ymm12,%ymm12,%ymm12 6752 vmovdqa %ymm8,0+128(%rbp) 6753 vmovdqa .Lrol16(%rip),%ymm8 6754 vpaddd %ymm7,%ymm3,%ymm3 6755 vpaddd %ymm6,%ymm2,%ymm2 6756 vpaddd %ymm5,%ymm1,%ymm1 6757 vpaddd %ymm4,%ymm0,%ymm0 6758 vpxor %ymm3,%ymm15,%ymm15 6759 vpxor %ymm2,%ymm14,%ymm14 6760 vpxor %ymm1,%ymm13,%ymm13 6761 vpxor %ymm0,%ymm12,%ymm12 6762 vpshufb %ymm8,%ymm15,%ymm15 6763 vpshufb %ymm8,%ymm14,%ymm14 6764 vpshufb %ymm8,%ymm13,%ymm13 6765 vpshufb %ymm8,%ymm12,%ymm12 6766 vpaddd %ymm15,%ymm11,%ymm11 6767 vpaddd %ymm14,%ymm10,%ymm10 6768 vpaddd %ymm13,%ymm9,%ymm9 6769 vpaddd 0+128(%rbp),%ymm12,%ymm8 6770 vpxor %ymm11,%ymm7,%ymm7 6771 vpxor %ymm10,%ymm6,%ymm6 6772 vpxor %ymm9,%ymm5,%ymm5 6773 vpxor %ymm8,%ymm4,%ymm4 6774 vmovdqa %ymm8,0+128(%rbp) 6775 vpsrld $20,%ymm7,%ymm8 6776 vpslld $32-20,%ymm7,%ymm7 6777 vpxor %ymm8,%ymm7,%ymm7 6778 vpsrld $20,%ymm6,%ymm8 6779 vpslld $32-20,%ymm6,%ymm6 6780 vpxor %ymm8,%ymm6,%ymm6 6781 vpsrld $20,%ymm5,%ymm8 6782 vpslld $32-20,%ymm5,%ymm5 6783 vpxor %ymm8,%ymm5,%ymm5 6784 vpsrld $20,%ymm4,%ymm8 6785 vpslld $32-20,%ymm4,%ymm4 6786 vpxor %ymm8,%ymm4,%ymm4 6787 vmovdqa .Lrol8(%rip),%ymm8 6788 vpaddd %ymm7,%ymm3,%ymm3 6789 vpaddd %ymm6,%ymm2,%ymm2 6790 vpaddd %ymm5,%ymm1,%ymm1 6791 vpaddd %ymm4,%ymm0,%ymm0 6792 vpxor %ymm3,%ymm15,%ymm15 6793 6794 subq $16,%rdi 6795 movq $9,%rcx 6796 jmp .Lseal_avx2_main_loop_rounds_entry 6797.align 32 6798.Lseal_avx2_main_loop: 6799 vmovdqa .Lchacha20_consts(%rip),%ymm0 6800 vmovdqa 0+64(%rbp),%ymm4 6801 vmovdqa 0+96(%rbp),%ymm8 6802 vmovdqa %ymm0,%ymm1 6803 vmovdqa %ymm4,%ymm5 6804 vmovdqa %ymm8,%ymm9 6805 vmovdqa %ymm0,%ymm2 6806 vmovdqa %ymm4,%ymm6 6807 vmovdqa %ymm8,%ymm10 6808 vmovdqa %ymm0,%ymm3 6809 vmovdqa %ymm4,%ymm7 6810 vmovdqa %ymm8,%ymm11 6811 vmovdqa .Lavx2_inc(%rip),%ymm12 6812 vpaddd 0+160(%rbp),%ymm12,%ymm15 6813 vpaddd %ymm15,%ymm12,%ymm14 6814 vpaddd %ymm14,%ymm12,%ymm13 6815 vpaddd %ymm13,%ymm12,%ymm12 6816 vmovdqa %ymm15,0+256(%rbp) 6817 vmovdqa %ymm14,0+224(%rbp) 6818 vmovdqa %ymm13,0+192(%rbp) 6819 vmovdqa %ymm12,0+160(%rbp) 6820 6821 movq $10,%rcx 6822.align 32 6823.Lseal_avx2_main_loop_rounds: 6824 addq 0+0(%rdi),%r10 6825 adcq 8+0(%rdi),%r11 6826 adcq $1,%r12 6827 vmovdqa %ymm8,0+128(%rbp) 6828 vmovdqa .Lrol16(%rip),%ymm8 6829 vpaddd %ymm7,%ymm3,%ymm3 6830 vpaddd %ymm6,%ymm2,%ymm2 6831 vpaddd %ymm5,%ymm1,%ymm1 6832 vpaddd %ymm4,%ymm0,%ymm0 6833 vpxor %ymm3,%ymm15,%ymm15 6834 vpxor %ymm2,%ymm14,%ymm14 6835 vpxor %ymm1,%ymm13,%ymm13 6836 vpxor %ymm0,%ymm12,%ymm12 6837 movq 0+0+0(%rbp),%rdx 6838 movq %rdx,%r15 6839 mulxq %r10,%r13,%r14 6840 mulxq %r11,%rax,%rdx 6841 imulq %r12,%r15 6842 addq %rax,%r14 6843 adcq %rdx,%r15 6844 vpshufb %ymm8,%ymm15,%ymm15 6845 vpshufb %ymm8,%ymm14,%ymm14 6846 vpshufb %ymm8,%ymm13,%ymm13 6847 vpshufb %ymm8,%ymm12,%ymm12 6848 vpaddd %ymm15,%ymm11,%ymm11 6849 vpaddd %ymm14,%ymm10,%ymm10 6850 vpaddd %ymm13,%ymm9,%ymm9 6851 vpaddd 0+128(%rbp),%ymm12,%ymm8 6852 vpxor %ymm11,%ymm7,%ymm7 6853 movq 8+0+0(%rbp),%rdx 6854 mulxq %r10,%r10,%rax 6855 addq %r10,%r14 6856 mulxq %r11,%r11,%r9 6857 adcq %r11,%r15 6858 adcq $0,%r9 6859 imulq %r12,%rdx 6860 vpxor %ymm10,%ymm6,%ymm6 6861 vpxor %ymm9,%ymm5,%ymm5 6862 vpxor %ymm8,%ymm4,%ymm4 6863 vmovdqa %ymm8,0+128(%rbp) 6864 vpsrld $20,%ymm7,%ymm8 6865 vpslld $32-20,%ymm7,%ymm7 6866 vpxor %ymm8,%ymm7,%ymm7 6867 vpsrld $20,%ymm6,%ymm8 6868 vpslld $32-20,%ymm6,%ymm6 6869 vpxor %ymm8,%ymm6,%ymm6 6870 vpsrld $20,%ymm5,%ymm8 6871 vpslld $32-20,%ymm5,%ymm5 6872 addq %rax,%r15 6873 adcq %rdx,%r9 6874 vpxor %ymm8,%ymm5,%ymm5 6875 vpsrld $20,%ymm4,%ymm8 6876 vpslld $32-20,%ymm4,%ymm4 6877 vpxor %ymm8,%ymm4,%ymm4 6878 vmovdqa .Lrol8(%rip),%ymm8 6879 vpaddd %ymm7,%ymm3,%ymm3 6880 vpaddd %ymm6,%ymm2,%ymm2 6881 vpaddd %ymm5,%ymm1,%ymm1 6882 vpaddd %ymm4,%ymm0,%ymm0 6883 vpxor %ymm3,%ymm15,%ymm15 6884 movq %r13,%r10 6885 movq %r14,%r11 6886 movq %r15,%r12 6887 andq $3,%r12 6888 movq %r15,%r13 6889 andq $-4,%r13 6890 movq %r9,%r14 6891 shrdq $2,%r9,%r15 6892 shrq $2,%r9 6893 addq %r13,%r15 6894 adcq %r14,%r9 6895 addq %r15,%r10 6896 adcq %r9,%r11 6897 adcq $0,%r12 6898 6899.Lseal_avx2_main_loop_rounds_entry: 6900 vpxor %ymm2,%ymm14,%ymm14 6901 vpxor %ymm1,%ymm13,%ymm13 6902 vpxor %ymm0,%ymm12,%ymm12 6903 vpshufb %ymm8,%ymm15,%ymm15 6904 vpshufb %ymm8,%ymm14,%ymm14 6905 vpshufb %ymm8,%ymm13,%ymm13 6906 vpshufb %ymm8,%ymm12,%ymm12 6907 vpaddd %ymm15,%ymm11,%ymm11 6908 vpaddd %ymm14,%ymm10,%ymm10 6909 addq 0+16(%rdi),%r10 6910 adcq 8+16(%rdi),%r11 6911 adcq $1,%r12 6912 vpaddd %ymm13,%ymm9,%ymm9 6913 vpaddd 0+128(%rbp),%ymm12,%ymm8 6914 vpxor %ymm11,%ymm7,%ymm7 6915 vpxor %ymm10,%ymm6,%ymm6 6916 vpxor %ymm9,%ymm5,%ymm5 6917 vpxor %ymm8,%ymm4,%ymm4 6918 vmovdqa %ymm8,0+128(%rbp) 6919 vpsrld $25,%ymm7,%ymm8 6920 movq 0+0+0(%rbp),%rdx 6921 movq %rdx,%r15 6922 mulxq %r10,%r13,%r14 6923 mulxq %r11,%rax,%rdx 6924 imulq %r12,%r15 6925 addq %rax,%r14 6926 adcq %rdx,%r15 6927 vpslld $32-25,%ymm7,%ymm7 6928 vpxor %ymm8,%ymm7,%ymm7 6929 vpsrld $25,%ymm6,%ymm8 6930 vpslld $32-25,%ymm6,%ymm6 6931 vpxor %ymm8,%ymm6,%ymm6 6932 vpsrld $25,%ymm5,%ymm8 6933 vpslld $32-25,%ymm5,%ymm5 6934 vpxor %ymm8,%ymm5,%ymm5 6935 vpsrld $25,%ymm4,%ymm8 6936 vpslld $32-25,%ymm4,%ymm4 6937 vpxor %ymm8,%ymm4,%ymm4 6938 vmovdqa 0+128(%rbp),%ymm8 6939 vpalignr $4,%ymm7,%ymm7,%ymm7 6940 vpalignr $8,%ymm11,%ymm11,%ymm11 6941 vpalignr $12,%ymm15,%ymm15,%ymm15 6942 vpalignr $4,%ymm6,%ymm6,%ymm6 6943 vpalignr $8,%ymm10,%ymm10,%ymm10 6944 vpalignr $12,%ymm14,%ymm14,%ymm14 6945 movq 8+0+0(%rbp),%rdx 6946 mulxq %r10,%r10,%rax 6947 addq %r10,%r14 6948 mulxq %r11,%r11,%r9 6949 adcq %r11,%r15 6950 adcq $0,%r9 6951 imulq %r12,%rdx 6952 vpalignr $4,%ymm5,%ymm5,%ymm5 6953 vpalignr $8,%ymm9,%ymm9,%ymm9 6954 vpalignr $12,%ymm13,%ymm13,%ymm13 6955 vpalignr $4,%ymm4,%ymm4,%ymm4 6956 vpalignr $8,%ymm8,%ymm8,%ymm8 6957 vpalignr $12,%ymm12,%ymm12,%ymm12 6958 vmovdqa %ymm8,0+128(%rbp) 6959 vmovdqa .Lrol16(%rip),%ymm8 6960 vpaddd %ymm7,%ymm3,%ymm3 6961 vpaddd %ymm6,%ymm2,%ymm2 6962 vpaddd %ymm5,%ymm1,%ymm1 6963 vpaddd %ymm4,%ymm0,%ymm0 6964 vpxor %ymm3,%ymm15,%ymm15 6965 vpxor %ymm2,%ymm14,%ymm14 6966 vpxor %ymm1,%ymm13,%ymm13 6967 vpxor %ymm0,%ymm12,%ymm12 6968 vpshufb %ymm8,%ymm15,%ymm15 6969 vpshufb %ymm8,%ymm14,%ymm14 6970 addq %rax,%r15 6971 adcq %rdx,%r9 6972 vpshufb %ymm8,%ymm13,%ymm13 6973 vpshufb %ymm8,%ymm12,%ymm12 6974 vpaddd %ymm15,%ymm11,%ymm11 6975 vpaddd %ymm14,%ymm10,%ymm10 6976 vpaddd %ymm13,%ymm9,%ymm9 6977 vpaddd 0+128(%rbp),%ymm12,%ymm8 6978 vpxor %ymm11,%ymm7,%ymm7 6979 vpxor %ymm10,%ymm6,%ymm6 6980 vpxor %ymm9,%ymm5,%ymm5 6981 movq %r13,%r10 6982 movq %r14,%r11 6983 movq %r15,%r12 6984 andq $3,%r12 6985 movq %r15,%r13 6986 andq $-4,%r13 6987 movq %r9,%r14 6988 shrdq $2,%r9,%r15 6989 shrq $2,%r9 6990 addq %r13,%r15 6991 adcq %r14,%r9 6992 addq %r15,%r10 6993 adcq %r9,%r11 6994 adcq $0,%r12 6995 vpxor %ymm8,%ymm4,%ymm4 6996 vmovdqa %ymm8,0+128(%rbp) 6997 vpsrld $20,%ymm7,%ymm8 6998 vpslld $32-20,%ymm7,%ymm7 6999 vpxor %ymm8,%ymm7,%ymm7 7000 vpsrld $20,%ymm6,%ymm8 7001 vpslld $32-20,%ymm6,%ymm6 7002 vpxor %ymm8,%ymm6,%ymm6 7003 addq 0+32(%rdi),%r10 7004 adcq 8+32(%rdi),%r11 7005 adcq $1,%r12 7006 7007 leaq 48(%rdi),%rdi 7008 vpsrld $20,%ymm5,%ymm8 7009 vpslld $32-20,%ymm5,%ymm5 7010 vpxor %ymm8,%ymm5,%ymm5 7011 vpsrld $20,%ymm4,%ymm8 7012 vpslld $32-20,%ymm4,%ymm4 7013 vpxor %ymm8,%ymm4,%ymm4 7014 vmovdqa .Lrol8(%rip),%ymm8 7015 vpaddd %ymm7,%ymm3,%ymm3 7016 vpaddd %ymm6,%ymm2,%ymm2 7017 vpaddd %ymm5,%ymm1,%ymm1 7018 vpaddd %ymm4,%ymm0,%ymm0 7019 vpxor %ymm3,%ymm15,%ymm15 7020 vpxor %ymm2,%ymm14,%ymm14 7021 vpxor %ymm1,%ymm13,%ymm13 7022 vpxor %ymm0,%ymm12,%ymm12 7023 vpshufb %ymm8,%ymm15,%ymm15 7024 vpshufb %ymm8,%ymm14,%ymm14 7025 vpshufb %ymm8,%ymm13,%ymm13 7026 movq 0+0+0(%rbp),%rdx 7027 movq %rdx,%r15 7028 mulxq %r10,%r13,%r14 7029 mulxq %r11,%rax,%rdx 7030 imulq %r12,%r15 7031 addq %rax,%r14 7032 adcq %rdx,%r15 7033 vpshufb %ymm8,%ymm12,%ymm12 7034 vpaddd %ymm15,%ymm11,%ymm11 7035 vpaddd %ymm14,%ymm10,%ymm10 7036 vpaddd %ymm13,%ymm9,%ymm9 7037 vpaddd 0+128(%rbp),%ymm12,%ymm8 7038 vpxor %ymm11,%ymm7,%ymm7 7039 vpxor %ymm10,%ymm6,%ymm6 7040 vpxor %ymm9,%ymm5,%ymm5 7041 movq 8+0+0(%rbp),%rdx 7042 mulxq %r10,%r10,%rax 7043 addq %r10,%r14 7044 mulxq %r11,%r11,%r9 7045 adcq %r11,%r15 7046 adcq $0,%r9 7047 imulq %r12,%rdx 7048 vpxor %ymm8,%ymm4,%ymm4 7049 vmovdqa %ymm8,0+128(%rbp) 7050 vpsrld $25,%ymm7,%ymm8 7051 vpslld $32-25,%ymm7,%ymm7 7052 vpxor %ymm8,%ymm7,%ymm7 7053 vpsrld $25,%ymm6,%ymm8 7054 vpslld $32-25,%ymm6,%ymm6 7055 vpxor %ymm8,%ymm6,%ymm6 7056 addq %rax,%r15 7057 adcq %rdx,%r9 7058 vpsrld $25,%ymm5,%ymm8 7059 vpslld $32-25,%ymm5,%ymm5 7060 vpxor %ymm8,%ymm5,%ymm5 7061 vpsrld $25,%ymm4,%ymm8 7062 vpslld $32-25,%ymm4,%ymm4 7063 vpxor %ymm8,%ymm4,%ymm4 7064 vmovdqa 0+128(%rbp),%ymm8 7065 vpalignr $12,%ymm7,%ymm7,%ymm7 7066 vpalignr $8,%ymm11,%ymm11,%ymm11 7067 vpalignr $4,%ymm15,%ymm15,%ymm15 7068 vpalignr $12,%ymm6,%ymm6,%ymm6 7069 vpalignr $8,%ymm10,%ymm10,%ymm10 7070 vpalignr $4,%ymm14,%ymm14,%ymm14 7071 vpalignr $12,%ymm5,%ymm5,%ymm5 7072 vpalignr $8,%ymm9,%ymm9,%ymm9 7073 vpalignr $4,%ymm13,%ymm13,%ymm13 7074 vpalignr $12,%ymm4,%ymm4,%ymm4 7075 vpalignr $8,%ymm8,%ymm8,%ymm8 7076 movq %r13,%r10 7077 movq %r14,%r11 7078 movq %r15,%r12 7079 andq $3,%r12 7080 movq %r15,%r13 7081 andq $-4,%r13 7082 movq %r9,%r14 7083 shrdq $2,%r9,%r15 7084 shrq $2,%r9 7085 addq %r13,%r15 7086 adcq %r14,%r9 7087 addq %r15,%r10 7088 adcq %r9,%r11 7089 adcq $0,%r12 7090 vpalignr $4,%ymm12,%ymm12,%ymm12 7091 7092 decq %rcx 7093 jne .Lseal_avx2_main_loop_rounds 7094 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 7095 vpaddd 0+64(%rbp),%ymm7,%ymm7 7096 vpaddd 0+96(%rbp),%ymm11,%ymm11 7097 vpaddd 0+256(%rbp),%ymm15,%ymm15 7098 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 7099 vpaddd 0+64(%rbp),%ymm6,%ymm6 7100 vpaddd 0+96(%rbp),%ymm10,%ymm10 7101 vpaddd 0+224(%rbp),%ymm14,%ymm14 7102 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 7103 vpaddd 0+64(%rbp),%ymm5,%ymm5 7104 vpaddd 0+96(%rbp),%ymm9,%ymm9 7105 vpaddd 0+192(%rbp),%ymm13,%ymm13 7106 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 7107 vpaddd 0+64(%rbp),%ymm4,%ymm4 7108 vpaddd 0+96(%rbp),%ymm8,%ymm8 7109 vpaddd 0+160(%rbp),%ymm12,%ymm12 7110 7111 vmovdqa %ymm0,0+128(%rbp) 7112 addq 0+0(%rdi),%r10 7113 adcq 8+0(%rdi),%r11 7114 adcq $1,%r12 7115 movq 0+0+0(%rbp),%rdx 7116 movq %rdx,%r15 7117 mulxq %r10,%r13,%r14 7118 mulxq %r11,%rax,%rdx 7119 imulq %r12,%r15 7120 addq %rax,%r14 7121 adcq %rdx,%r15 7122 movq 8+0+0(%rbp),%rdx 7123 mulxq %r10,%r10,%rax 7124 addq %r10,%r14 7125 mulxq %r11,%r11,%r9 7126 adcq %r11,%r15 7127 adcq $0,%r9 7128 imulq %r12,%rdx 7129 addq %rax,%r15 7130 adcq %rdx,%r9 7131 movq %r13,%r10 7132 movq %r14,%r11 7133 movq %r15,%r12 7134 andq $3,%r12 7135 movq %r15,%r13 7136 andq $-4,%r13 7137 movq %r9,%r14 7138 shrdq $2,%r9,%r15 7139 shrq $2,%r9 7140 addq %r13,%r15 7141 adcq %r14,%r9 7142 addq %r15,%r10 7143 adcq %r9,%r11 7144 adcq $0,%r12 7145 addq 0+16(%rdi),%r10 7146 adcq 8+16(%rdi),%r11 7147 adcq $1,%r12 7148 movq 0+0+0(%rbp),%rdx 7149 movq %rdx,%r15 7150 mulxq %r10,%r13,%r14 7151 mulxq %r11,%rax,%rdx 7152 imulq %r12,%r15 7153 addq %rax,%r14 7154 adcq %rdx,%r15 7155 movq 8+0+0(%rbp),%rdx 7156 mulxq %r10,%r10,%rax 7157 addq %r10,%r14 7158 mulxq %r11,%r11,%r9 7159 adcq %r11,%r15 7160 adcq $0,%r9 7161 imulq %r12,%rdx 7162 addq %rax,%r15 7163 adcq %rdx,%r9 7164 movq %r13,%r10 7165 movq %r14,%r11 7166 movq %r15,%r12 7167 andq $3,%r12 7168 movq %r15,%r13 7169 andq $-4,%r13 7170 movq %r9,%r14 7171 shrdq $2,%r9,%r15 7172 shrq $2,%r9 7173 addq %r13,%r15 7174 adcq %r14,%r9 7175 addq %r15,%r10 7176 adcq %r9,%r11 7177 adcq $0,%r12 7178 7179 leaq 32(%rdi),%rdi 7180 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 7181 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 7182 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 7183 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 7184 vpxor 0+0(%rsi),%ymm0,%ymm0 7185 vpxor 32+0(%rsi),%ymm3,%ymm3 7186 vpxor 64+0(%rsi),%ymm7,%ymm7 7187 vpxor 96+0(%rsi),%ymm11,%ymm11 7188 vmovdqu %ymm0,0+0(%rdi) 7189 vmovdqu %ymm3,32+0(%rdi) 7190 vmovdqu %ymm7,64+0(%rdi) 7191 vmovdqu %ymm11,96+0(%rdi) 7192 7193 vmovdqa 0+128(%rbp),%ymm0 7194 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 7195 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 7196 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 7197 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 7198 vpxor 0+128(%rsi),%ymm3,%ymm3 7199 vpxor 32+128(%rsi),%ymm2,%ymm2 7200 vpxor 64+128(%rsi),%ymm6,%ymm6 7201 vpxor 96+128(%rsi),%ymm10,%ymm10 7202 vmovdqu %ymm3,0+128(%rdi) 7203 vmovdqu %ymm2,32+128(%rdi) 7204 vmovdqu %ymm6,64+128(%rdi) 7205 vmovdqu %ymm10,96+128(%rdi) 7206 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7207 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7208 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7209 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7210 vpxor 0+256(%rsi),%ymm3,%ymm3 7211 vpxor 32+256(%rsi),%ymm1,%ymm1 7212 vpxor 64+256(%rsi),%ymm5,%ymm5 7213 vpxor 96+256(%rsi),%ymm9,%ymm9 7214 vmovdqu %ymm3,0+256(%rdi) 7215 vmovdqu %ymm1,32+256(%rdi) 7216 vmovdqu %ymm5,64+256(%rdi) 7217 vmovdqu %ymm9,96+256(%rdi) 7218 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 7219 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 7220 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 7221 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 7222 vpxor 0+384(%rsi),%ymm3,%ymm3 7223 vpxor 32+384(%rsi),%ymm0,%ymm0 7224 vpxor 64+384(%rsi),%ymm4,%ymm4 7225 vpxor 96+384(%rsi),%ymm8,%ymm8 7226 vmovdqu %ymm3,0+384(%rdi) 7227 vmovdqu %ymm0,32+384(%rdi) 7228 vmovdqu %ymm4,64+384(%rdi) 7229 vmovdqu %ymm8,96+384(%rdi) 7230 7231 leaq 512(%rsi),%rsi 7232 subq $512,%rbx 7233 cmpq $512,%rbx 7234 jg .Lseal_avx2_main_loop 7235 7236 addq 0+0(%rdi),%r10 7237 adcq 8+0(%rdi),%r11 7238 adcq $1,%r12 7239 movq 0+0+0(%rbp),%rdx 7240 movq %rdx,%r15 7241 mulxq %r10,%r13,%r14 7242 mulxq %r11,%rax,%rdx 7243 imulq %r12,%r15 7244 addq %rax,%r14 7245 adcq %rdx,%r15 7246 movq 8+0+0(%rbp),%rdx 7247 mulxq %r10,%r10,%rax 7248 addq %r10,%r14 7249 mulxq %r11,%r11,%r9 7250 adcq %r11,%r15 7251 adcq $0,%r9 7252 imulq %r12,%rdx 7253 addq %rax,%r15 7254 adcq %rdx,%r9 7255 movq %r13,%r10 7256 movq %r14,%r11 7257 movq %r15,%r12 7258 andq $3,%r12 7259 movq %r15,%r13 7260 andq $-4,%r13 7261 movq %r9,%r14 7262 shrdq $2,%r9,%r15 7263 shrq $2,%r9 7264 addq %r13,%r15 7265 adcq %r14,%r9 7266 addq %r15,%r10 7267 adcq %r9,%r11 7268 adcq $0,%r12 7269 addq 0+16(%rdi),%r10 7270 adcq 8+16(%rdi),%r11 7271 adcq $1,%r12 7272 movq 0+0+0(%rbp),%rdx 7273 movq %rdx,%r15 7274 mulxq %r10,%r13,%r14 7275 mulxq %r11,%rax,%rdx 7276 imulq %r12,%r15 7277 addq %rax,%r14 7278 adcq %rdx,%r15 7279 movq 8+0+0(%rbp),%rdx 7280 mulxq %r10,%r10,%rax 7281 addq %r10,%r14 7282 mulxq %r11,%r11,%r9 7283 adcq %r11,%r15 7284 adcq $0,%r9 7285 imulq %r12,%rdx 7286 addq %rax,%r15 7287 adcq %rdx,%r9 7288 movq %r13,%r10 7289 movq %r14,%r11 7290 movq %r15,%r12 7291 andq $3,%r12 7292 movq %r15,%r13 7293 andq $-4,%r13 7294 movq %r9,%r14 7295 shrdq $2,%r9,%r15 7296 shrq $2,%r9 7297 addq %r13,%r15 7298 adcq %r14,%r9 7299 addq %r15,%r10 7300 adcq %r9,%r11 7301 adcq $0,%r12 7302 7303 leaq 32(%rdi),%rdi 7304 movq $10,%rcx 7305 xorq %r8,%r8 7306 7307 cmpq $384,%rbx 7308 ja .Lseal_avx2_tail_512 7309 cmpq $256,%rbx 7310 ja .Lseal_avx2_tail_384 7311 cmpq $128,%rbx 7312 ja .Lseal_avx2_tail_256 7313 7314.Lseal_avx2_tail_128: 7315 vmovdqa .Lchacha20_consts(%rip),%ymm0 7316 vmovdqa 0+64(%rbp),%ymm4 7317 vmovdqa 0+96(%rbp),%ymm8 7318 vmovdqa .Lavx2_inc(%rip),%ymm12 7319 vpaddd 0+160(%rbp),%ymm12,%ymm12 7320 vmovdqa %ymm12,0+160(%rbp) 7321 7322.Lseal_avx2_tail_128_rounds_and_3xhash: 7323 addq 0+0(%rdi),%r10 7324 adcq 8+0(%rdi),%r11 7325 adcq $1,%r12 7326 movq 0+0+0(%rbp),%rdx 7327 movq %rdx,%r15 7328 mulxq %r10,%r13,%r14 7329 mulxq %r11,%rax,%rdx 7330 imulq %r12,%r15 7331 addq %rax,%r14 7332 adcq %rdx,%r15 7333 movq 8+0+0(%rbp),%rdx 7334 mulxq %r10,%r10,%rax 7335 addq %r10,%r14 7336 mulxq %r11,%r11,%r9 7337 adcq %r11,%r15 7338 adcq $0,%r9 7339 imulq %r12,%rdx 7340 addq %rax,%r15 7341 adcq %rdx,%r9 7342 movq %r13,%r10 7343 movq %r14,%r11 7344 movq %r15,%r12 7345 andq $3,%r12 7346 movq %r15,%r13 7347 andq $-4,%r13 7348 movq %r9,%r14 7349 shrdq $2,%r9,%r15 7350 shrq $2,%r9 7351 addq %r13,%r15 7352 adcq %r14,%r9 7353 addq %r15,%r10 7354 adcq %r9,%r11 7355 adcq $0,%r12 7356 7357 leaq 16(%rdi),%rdi 7358.Lseal_avx2_tail_128_rounds_and_2xhash: 7359 vpaddd %ymm4,%ymm0,%ymm0 7360 vpxor %ymm0,%ymm12,%ymm12 7361 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7362 vpaddd %ymm12,%ymm8,%ymm8 7363 vpxor %ymm8,%ymm4,%ymm4 7364 vpsrld $20,%ymm4,%ymm3 7365 vpslld $12,%ymm4,%ymm4 7366 vpxor %ymm3,%ymm4,%ymm4 7367 vpaddd %ymm4,%ymm0,%ymm0 7368 vpxor %ymm0,%ymm12,%ymm12 7369 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7370 vpaddd %ymm12,%ymm8,%ymm8 7371 vpxor %ymm8,%ymm4,%ymm4 7372 vpslld $7,%ymm4,%ymm3 7373 vpsrld $25,%ymm4,%ymm4 7374 vpxor %ymm3,%ymm4,%ymm4 7375 vpalignr $12,%ymm12,%ymm12,%ymm12 7376 vpalignr $8,%ymm8,%ymm8,%ymm8 7377 vpalignr $4,%ymm4,%ymm4,%ymm4 7378 addq 0+0(%rdi),%r10 7379 adcq 8+0(%rdi),%r11 7380 adcq $1,%r12 7381 movq 0+0+0(%rbp),%rdx 7382 movq %rdx,%r15 7383 mulxq %r10,%r13,%r14 7384 mulxq %r11,%rax,%rdx 7385 imulq %r12,%r15 7386 addq %rax,%r14 7387 adcq %rdx,%r15 7388 movq 8+0+0(%rbp),%rdx 7389 mulxq %r10,%r10,%rax 7390 addq %r10,%r14 7391 mulxq %r11,%r11,%r9 7392 adcq %r11,%r15 7393 adcq $0,%r9 7394 imulq %r12,%rdx 7395 addq %rax,%r15 7396 adcq %rdx,%r9 7397 movq %r13,%r10 7398 movq %r14,%r11 7399 movq %r15,%r12 7400 andq $3,%r12 7401 movq %r15,%r13 7402 andq $-4,%r13 7403 movq %r9,%r14 7404 shrdq $2,%r9,%r15 7405 shrq $2,%r9 7406 addq %r13,%r15 7407 adcq %r14,%r9 7408 addq %r15,%r10 7409 adcq %r9,%r11 7410 adcq $0,%r12 7411 vpaddd %ymm4,%ymm0,%ymm0 7412 vpxor %ymm0,%ymm12,%ymm12 7413 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7414 vpaddd %ymm12,%ymm8,%ymm8 7415 vpxor %ymm8,%ymm4,%ymm4 7416 vpsrld $20,%ymm4,%ymm3 7417 vpslld $12,%ymm4,%ymm4 7418 vpxor %ymm3,%ymm4,%ymm4 7419 vpaddd %ymm4,%ymm0,%ymm0 7420 vpxor %ymm0,%ymm12,%ymm12 7421 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7422 vpaddd %ymm12,%ymm8,%ymm8 7423 vpxor %ymm8,%ymm4,%ymm4 7424 vpslld $7,%ymm4,%ymm3 7425 vpsrld $25,%ymm4,%ymm4 7426 vpxor %ymm3,%ymm4,%ymm4 7427 vpalignr $4,%ymm12,%ymm12,%ymm12 7428 vpalignr $8,%ymm8,%ymm8,%ymm8 7429 vpalignr $12,%ymm4,%ymm4,%ymm4 7430 addq 0+16(%rdi),%r10 7431 adcq 8+16(%rdi),%r11 7432 adcq $1,%r12 7433 movq 0+0+0(%rbp),%rdx 7434 movq %rdx,%r15 7435 mulxq %r10,%r13,%r14 7436 mulxq %r11,%rax,%rdx 7437 imulq %r12,%r15 7438 addq %rax,%r14 7439 adcq %rdx,%r15 7440 movq 8+0+0(%rbp),%rdx 7441 mulxq %r10,%r10,%rax 7442 addq %r10,%r14 7443 mulxq %r11,%r11,%r9 7444 adcq %r11,%r15 7445 adcq $0,%r9 7446 imulq %r12,%rdx 7447 addq %rax,%r15 7448 adcq %rdx,%r9 7449 movq %r13,%r10 7450 movq %r14,%r11 7451 movq %r15,%r12 7452 andq $3,%r12 7453 movq %r15,%r13 7454 andq $-4,%r13 7455 movq %r9,%r14 7456 shrdq $2,%r9,%r15 7457 shrq $2,%r9 7458 addq %r13,%r15 7459 adcq %r14,%r9 7460 addq %r15,%r10 7461 adcq %r9,%r11 7462 adcq $0,%r12 7463 7464 leaq 32(%rdi),%rdi 7465 decq %rcx 7466 jg .Lseal_avx2_tail_128_rounds_and_3xhash 7467 decq %r8 7468 jge .Lseal_avx2_tail_128_rounds_and_2xhash 7469 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 7470 vpaddd 0+64(%rbp),%ymm4,%ymm4 7471 vpaddd 0+96(%rbp),%ymm8,%ymm8 7472 vpaddd 0+160(%rbp),%ymm12,%ymm12 7473 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7474 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7475 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7476 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7477 vmovdqa %ymm3,%ymm8 7478 7479 jmp .Lseal_avx2_short_loop 7480 7481.Lseal_avx2_tail_256: 7482 vmovdqa .Lchacha20_consts(%rip),%ymm0 7483 vmovdqa 0+64(%rbp),%ymm4 7484 vmovdqa 0+96(%rbp),%ymm8 7485 vmovdqa %ymm0,%ymm1 7486 vmovdqa %ymm4,%ymm5 7487 vmovdqa %ymm8,%ymm9 7488 vmovdqa .Lavx2_inc(%rip),%ymm12 7489 vpaddd 0+160(%rbp),%ymm12,%ymm13 7490 vpaddd %ymm13,%ymm12,%ymm12 7491 vmovdqa %ymm12,0+160(%rbp) 7492 vmovdqa %ymm13,0+192(%rbp) 7493 7494.Lseal_avx2_tail_256_rounds_and_3xhash: 7495 addq 0+0(%rdi),%r10 7496 adcq 8+0(%rdi),%r11 7497 adcq $1,%r12 7498 movq 0+0+0(%rbp),%rax 7499 movq %rax,%r15 7500 mulq %r10 7501 movq %rax,%r13 7502 movq %rdx,%r14 7503 movq 0+0+0(%rbp),%rax 7504 mulq %r11 7505 imulq %r12,%r15 7506 addq %rax,%r14 7507 adcq %rdx,%r15 7508 movq 8+0+0(%rbp),%rax 7509 movq %rax,%r9 7510 mulq %r10 7511 addq %rax,%r14 7512 adcq $0,%rdx 7513 movq %rdx,%r10 7514 movq 8+0+0(%rbp),%rax 7515 mulq %r11 7516 addq %rax,%r15 7517 adcq $0,%rdx 7518 imulq %r12,%r9 7519 addq %r10,%r15 7520 adcq %rdx,%r9 7521 movq %r13,%r10 7522 movq %r14,%r11 7523 movq %r15,%r12 7524 andq $3,%r12 7525 movq %r15,%r13 7526 andq $-4,%r13 7527 movq %r9,%r14 7528 shrdq $2,%r9,%r15 7529 shrq $2,%r9 7530 addq %r13,%r15 7531 adcq %r14,%r9 7532 addq %r15,%r10 7533 adcq %r9,%r11 7534 adcq $0,%r12 7535 7536 leaq 16(%rdi),%rdi 7537.Lseal_avx2_tail_256_rounds_and_2xhash: 7538 vpaddd %ymm4,%ymm0,%ymm0 7539 vpxor %ymm0,%ymm12,%ymm12 7540 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7541 vpaddd %ymm12,%ymm8,%ymm8 7542 vpxor %ymm8,%ymm4,%ymm4 7543 vpsrld $20,%ymm4,%ymm3 7544 vpslld $12,%ymm4,%ymm4 7545 vpxor %ymm3,%ymm4,%ymm4 7546 vpaddd %ymm4,%ymm0,%ymm0 7547 vpxor %ymm0,%ymm12,%ymm12 7548 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7549 vpaddd %ymm12,%ymm8,%ymm8 7550 vpxor %ymm8,%ymm4,%ymm4 7551 vpslld $7,%ymm4,%ymm3 7552 vpsrld $25,%ymm4,%ymm4 7553 vpxor %ymm3,%ymm4,%ymm4 7554 vpalignr $12,%ymm12,%ymm12,%ymm12 7555 vpalignr $8,%ymm8,%ymm8,%ymm8 7556 vpalignr $4,%ymm4,%ymm4,%ymm4 7557 vpaddd %ymm5,%ymm1,%ymm1 7558 vpxor %ymm1,%ymm13,%ymm13 7559 vpshufb .Lrol16(%rip),%ymm13,%ymm13 7560 vpaddd %ymm13,%ymm9,%ymm9 7561 vpxor %ymm9,%ymm5,%ymm5 7562 vpsrld $20,%ymm5,%ymm3 7563 vpslld $12,%ymm5,%ymm5 7564 vpxor %ymm3,%ymm5,%ymm5 7565 vpaddd %ymm5,%ymm1,%ymm1 7566 vpxor %ymm1,%ymm13,%ymm13 7567 vpshufb .Lrol8(%rip),%ymm13,%ymm13 7568 vpaddd %ymm13,%ymm9,%ymm9 7569 vpxor %ymm9,%ymm5,%ymm5 7570 vpslld $7,%ymm5,%ymm3 7571 vpsrld $25,%ymm5,%ymm5 7572 vpxor %ymm3,%ymm5,%ymm5 7573 vpalignr $12,%ymm13,%ymm13,%ymm13 7574 vpalignr $8,%ymm9,%ymm9,%ymm9 7575 vpalignr $4,%ymm5,%ymm5,%ymm5 7576 addq 0+0(%rdi),%r10 7577 adcq 8+0(%rdi),%r11 7578 adcq $1,%r12 7579 movq 0+0+0(%rbp),%rax 7580 movq %rax,%r15 7581 mulq %r10 7582 movq %rax,%r13 7583 movq %rdx,%r14 7584 movq 0+0+0(%rbp),%rax 7585 mulq %r11 7586 imulq %r12,%r15 7587 addq %rax,%r14 7588 adcq %rdx,%r15 7589 movq 8+0+0(%rbp),%rax 7590 movq %rax,%r9 7591 mulq %r10 7592 addq %rax,%r14 7593 adcq $0,%rdx 7594 movq %rdx,%r10 7595 movq 8+0+0(%rbp),%rax 7596 mulq %r11 7597 addq %rax,%r15 7598 adcq $0,%rdx 7599 imulq %r12,%r9 7600 addq %r10,%r15 7601 adcq %rdx,%r9 7602 movq %r13,%r10 7603 movq %r14,%r11 7604 movq %r15,%r12 7605 andq $3,%r12 7606 movq %r15,%r13 7607 andq $-4,%r13 7608 movq %r9,%r14 7609 shrdq $2,%r9,%r15 7610 shrq $2,%r9 7611 addq %r13,%r15 7612 adcq %r14,%r9 7613 addq %r15,%r10 7614 adcq %r9,%r11 7615 adcq $0,%r12 7616 vpaddd %ymm4,%ymm0,%ymm0 7617 vpxor %ymm0,%ymm12,%ymm12 7618 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7619 vpaddd %ymm12,%ymm8,%ymm8 7620 vpxor %ymm8,%ymm4,%ymm4 7621 vpsrld $20,%ymm4,%ymm3 7622 vpslld $12,%ymm4,%ymm4 7623 vpxor %ymm3,%ymm4,%ymm4 7624 vpaddd %ymm4,%ymm0,%ymm0 7625 vpxor %ymm0,%ymm12,%ymm12 7626 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7627 vpaddd %ymm12,%ymm8,%ymm8 7628 vpxor %ymm8,%ymm4,%ymm4 7629 vpslld $7,%ymm4,%ymm3 7630 vpsrld $25,%ymm4,%ymm4 7631 vpxor %ymm3,%ymm4,%ymm4 7632 vpalignr $4,%ymm12,%ymm12,%ymm12 7633 vpalignr $8,%ymm8,%ymm8,%ymm8 7634 vpalignr $12,%ymm4,%ymm4,%ymm4 7635 vpaddd %ymm5,%ymm1,%ymm1 7636 vpxor %ymm1,%ymm13,%ymm13 7637 vpshufb .Lrol16(%rip),%ymm13,%ymm13 7638 vpaddd %ymm13,%ymm9,%ymm9 7639 vpxor %ymm9,%ymm5,%ymm5 7640 vpsrld $20,%ymm5,%ymm3 7641 vpslld $12,%ymm5,%ymm5 7642 vpxor %ymm3,%ymm5,%ymm5 7643 vpaddd %ymm5,%ymm1,%ymm1 7644 vpxor %ymm1,%ymm13,%ymm13 7645 vpshufb .Lrol8(%rip),%ymm13,%ymm13 7646 vpaddd %ymm13,%ymm9,%ymm9 7647 vpxor %ymm9,%ymm5,%ymm5 7648 vpslld $7,%ymm5,%ymm3 7649 vpsrld $25,%ymm5,%ymm5 7650 vpxor %ymm3,%ymm5,%ymm5 7651 vpalignr $4,%ymm13,%ymm13,%ymm13 7652 vpalignr $8,%ymm9,%ymm9,%ymm9 7653 vpalignr $12,%ymm5,%ymm5,%ymm5 7654 addq 0+16(%rdi),%r10 7655 adcq 8+16(%rdi),%r11 7656 adcq $1,%r12 7657 movq 0+0+0(%rbp),%rax 7658 movq %rax,%r15 7659 mulq %r10 7660 movq %rax,%r13 7661 movq %rdx,%r14 7662 movq 0+0+0(%rbp),%rax 7663 mulq %r11 7664 imulq %r12,%r15 7665 addq %rax,%r14 7666 adcq %rdx,%r15 7667 movq 8+0+0(%rbp),%rax 7668 movq %rax,%r9 7669 mulq %r10 7670 addq %rax,%r14 7671 adcq $0,%rdx 7672 movq %rdx,%r10 7673 movq 8+0+0(%rbp),%rax 7674 mulq %r11 7675 addq %rax,%r15 7676 adcq $0,%rdx 7677 imulq %r12,%r9 7678 addq %r10,%r15 7679 adcq %rdx,%r9 7680 movq %r13,%r10 7681 movq %r14,%r11 7682 movq %r15,%r12 7683 andq $3,%r12 7684 movq %r15,%r13 7685 andq $-4,%r13 7686 movq %r9,%r14 7687 shrdq $2,%r9,%r15 7688 shrq $2,%r9 7689 addq %r13,%r15 7690 adcq %r14,%r9 7691 addq %r15,%r10 7692 adcq %r9,%r11 7693 adcq $0,%r12 7694 7695 leaq 32(%rdi),%rdi 7696 decq %rcx 7697 jg .Lseal_avx2_tail_256_rounds_and_3xhash 7698 decq %r8 7699 jge .Lseal_avx2_tail_256_rounds_and_2xhash 7700 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 7701 vpaddd 0+64(%rbp),%ymm5,%ymm5 7702 vpaddd 0+96(%rbp),%ymm9,%ymm9 7703 vpaddd 0+192(%rbp),%ymm13,%ymm13 7704 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 7705 vpaddd 0+64(%rbp),%ymm4,%ymm4 7706 vpaddd 0+96(%rbp),%ymm8,%ymm8 7707 vpaddd 0+160(%rbp),%ymm12,%ymm12 7708 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7709 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7710 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7711 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7712 vpxor 0+0(%rsi),%ymm3,%ymm3 7713 vpxor 32+0(%rsi),%ymm1,%ymm1 7714 vpxor 64+0(%rsi),%ymm5,%ymm5 7715 vpxor 96+0(%rsi),%ymm9,%ymm9 7716 vmovdqu %ymm3,0+0(%rdi) 7717 vmovdqu %ymm1,32+0(%rdi) 7718 vmovdqu %ymm5,64+0(%rdi) 7719 vmovdqu %ymm9,96+0(%rdi) 7720 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7721 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7722 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7723 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7724 vmovdqa %ymm3,%ymm8 7725 7726 movq $128,%rcx 7727 leaq 128(%rsi),%rsi 7728 subq $128,%rbx 7729 jmp .Lseal_avx2_short_hash_remainder 7730 7731.Lseal_avx2_tail_384: 7732 vmovdqa .Lchacha20_consts(%rip),%ymm0 7733 vmovdqa 0+64(%rbp),%ymm4 7734 vmovdqa 0+96(%rbp),%ymm8 7735 vmovdqa %ymm0,%ymm1 7736 vmovdqa %ymm4,%ymm5 7737 vmovdqa %ymm8,%ymm9 7738 vmovdqa %ymm0,%ymm2 7739 vmovdqa %ymm4,%ymm6 7740 vmovdqa %ymm8,%ymm10 7741 vmovdqa .Lavx2_inc(%rip),%ymm12 7742 vpaddd 0+160(%rbp),%ymm12,%ymm14 7743 vpaddd %ymm14,%ymm12,%ymm13 7744 vpaddd %ymm13,%ymm12,%ymm12 7745 vmovdqa %ymm12,0+160(%rbp) 7746 vmovdqa %ymm13,0+192(%rbp) 7747 vmovdqa %ymm14,0+224(%rbp) 7748 7749.Lseal_avx2_tail_384_rounds_and_3xhash: 7750 addq 0+0(%rdi),%r10 7751 adcq 8+0(%rdi),%r11 7752 adcq $1,%r12 7753 movq 0+0+0(%rbp),%rax 7754 movq %rax,%r15 7755 mulq %r10 7756 movq %rax,%r13 7757 movq %rdx,%r14 7758 movq 0+0+0(%rbp),%rax 7759 mulq %r11 7760 imulq %r12,%r15 7761 addq %rax,%r14 7762 adcq %rdx,%r15 7763 movq 8+0+0(%rbp),%rax 7764 movq %rax,%r9 7765 mulq %r10 7766 addq %rax,%r14 7767 adcq $0,%rdx 7768 movq %rdx,%r10 7769 movq 8+0+0(%rbp),%rax 7770 mulq %r11 7771 addq %rax,%r15 7772 adcq $0,%rdx 7773 imulq %r12,%r9 7774 addq %r10,%r15 7775 adcq %rdx,%r9 7776 movq %r13,%r10 7777 movq %r14,%r11 7778 movq %r15,%r12 7779 andq $3,%r12 7780 movq %r15,%r13 7781 andq $-4,%r13 7782 movq %r9,%r14 7783 shrdq $2,%r9,%r15 7784 shrq $2,%r9 7785 addq %r13,%r15 7786 adcq %r14,%r9 7787 addq %r15,%r10 7788 adcq %r9,%r11 7789 adcq $0,%r12 7790 7791 leaq 16(%rdi),%rdi 7792.Lseal_avx2_tail_384_rounds_and_2xhash: 7793 vpaddd %ymm4,%ymm0,%ymm0 7794 vpxor %ymm0,%ymm12,%ymm12 7795 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7796 vpaddd %ymm12,%ymm8,%ymm8 7797 vpxor %ymm8,%ymm4,%ymm4 7798 vpsrld $20,%ymm4,%ymm3 7799 vpslld $12,%ymm4,%ymm4 7800 vpxor %ymm3,%ymm4,%ymm4 7801 vpaddd %ymm4,%ymm0,%ymm0 7802 vpxor %ymm0,%ymm12,%ymm12 7803 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7804 vpaddd %ymm12,%ymm8,%ymm8 7805 vpxor %ymm8,%ymm4,%ymm4 7806 vpslld $7,%ymm4,%ymm3 7807 vpsrld $25,%ymm4,%ymm4 7808 vpxor %ymm3,%ymm4,%ymm4 7809 vpalignr $12,%ymm12,%ymm12,%ymm12 7810 vpalignr $8,%ymm8,%ymm8,%ymm8 7811 vpalignr $4,%ymm4,%ymm4,%ymm4 7812 vpaddd %ymm5,%ymm1,%ymm1 7813 vpxor %ymm1,%ymm13,%ymm13 7814 vpshufb .Lrol16(%rip),%ymm13,%ymm13 7815 vpaddd %ymm13,%ymm9,%ymm9 7816 vpxor %ymm9,%ymm5,%ymm5 7817 vpsrld $20,%ymm5,%ymm3 7818 vpslld $12,%ymm5,%ymm5 7819 vpxor %ymm3,%ymm5,%ymm5 7820 vpaddd %ymm5,%ymm1,%ymm1 7821 vpxor %ymm1,%ymm13,%ymm13 7822 vpshufb .Lrol8(%rip),%ymm13,%ymm13 7823 vpaddd %ymm13,%ymm9,%ymm9 7824 vpxor %ymm9,%ymm5,%ymm5 7825 vpslld $7,%ymm5,%ymm3 7826 vpsrld $25,%ymm5,%ymm5 7827 vpxor %ymm3,%ymm5,%ymm5 7828 vpalignr $12,%ymm13,%ymm13,%ymm13 7829 vpalignr $8,%ymm9,%ymm9,%ymm9 7830 vpalignr $4,%ymm5,%ymm5,%ymm5 7831 addq 0+0(%rdi),%r10 7832 adcq 8+0(%rdi),%r11 7833 adcq $1,%r12 7834 movq 0+0+0(%rbp),%rax 7835 movq %rax,%r15 7836 mulq %r10 7837 movq %rax,%r13 7838 movq %rdx,%r14 7839 movq 0+0+0(%rbp),%rax 7840 mulq %r11 7841 imulq %r12,%r15 7842 addq %rax,%r14 7843 adcq %rdx,%r15 7844 movq 8+0+0(%rbp),%rax 7845 movq %rax,%r9 7846 mulq %r10 7847 addq %rax,%r14 7848 adcq $0,%rdx 7849 movq %rdx,%r10 7850 movq 8+0+0(%rbp),%rax 7851 mulq %r11 7852 addq %rax,%r15 7853 adcq $0,%rdx 7854 imulq %r12,%r9 7855 addq %r10,%r15 7856 adcq %rdx,%r9 7857 movq %r13,%r10 7858 movq %r14,%r11 7859 movq %r15,%r12 7860 andq $3,%r12 7861 movq %r15,%r13 7862 andq $-4,%r13 7863 movq %r9,%r14 7864 shrdq $2,%r9,%r15 7865 shrq $2,%r9 7866 addq %r13,%r15 7867 adcq %r14,%r9 7868 addq %r15,%r10 7869 adcq %r9,%r11 7870 adcq $0,%r12 7871 vpaddd %ymm6,%ymm2,%ymm2 7872 vpxor %ymm2,%ymm14,%ymm14 7873 vpshufb .Lrol16(%rip),%ymm14,%ymm14 7874 vpaddd %ymm14,%ymm10,%ymm10 7875 vpxor %ymm10,%ymm6,%ymm6 7876 vpsrld $20,%ymm6,%ymm3 7877 vpslld $12,%ymm6,%ymm6 7878 vpxor %ymm3,%ymm6,%ymm6 7879 vpaddd %ymm6,%ymm2,%ymm2 7880 vpxor %ymm2,%ymm14,%ymm14 7881 vpshufb .Lrol8(%rip),%ymm14,%ymm14 7882 vpaddd %ymm14,%ymm10,%ymm10 7883 vpxor %ymm10,%ymm6,%ymm6 7884 vpslld $7,%ymm6,%ymm3 7885 vpsrld $25,%ymm6,%ymm6 7886 vpxor %ymm3,%ymm6,%ymm6 7887 vpalignr $12,%ymm14,%ymm14,%ymm14 7888 vpalignr $8,%ymm10,%ymm10,%ymm10 7889 vpalignr $4,%ymm6,%ymm6,%ymm6 7890 vpaddd %ymm4,%ymm0,%ymm0 7891 vpxor %ymm0,%ymm12,%ymm12 7892 vpshufb .Lrol16(%rip),%ymm12,%ymm12 7893 vpaddd %ymm12,%ymm8,%ymm8 7894 vpxor %ymm8,%ymm4,%ymm4 7895 vpsrld $20,%ymm4,%ymm3 7896 vpslld $12,%ymm4,%ymm4 7897 vpxor %ymm3,%ymm4,%ymm4 7898 vpaddd %ymm4,%ymm0,%ymm0 7899 vpxor %ymm0,%ymm12,%ymm12 7900 vpshufb .Lrol8(%rip),%ymm12,%ymm12 7901 vpaddd %ymm12,%ymm8,%ymm8 7902 vpxor %ymm8,%ymm4,%ymm4 7903 vpslld $7,%ymm4,%ymm3 7904 vpsrld $25,%ymm4,%ymm4 7905 vpxor %ymm3,%ymm4,%ymm4 7906 vpalignr $4,%ymm12,%ymm12,%ymm12 7907 vpalignr $8,%ymm8,%ymm8,%ymm8 7908 vpalignr $12,%ymm4,%ymm4,%ymm4 7909 addq 0+16(%rdi),%r10 7910 adcq 8+16(%rdi),%r11 7911 adcq $1,%r12 7912 movq 0+0+0(%rbp),%rax 7913 movq %rax,%r15 7914 mulq %r10 7915 movq %rax,%r13 7916 movq %rdx,%r14 7917 movq 0+0+0(%rbp),%rax 7918 mulq %r11 7919 imulq %r12,%r15 7920 addq %rax,%r14 7921 adcq %rdx,%r15 7922 movq 8+0+0(%rbp),%rax 7923 movq %rax,%r9 7924 mulq %r10 7925 addq %rax,%r14 7926 adcq $0,%rdx 7927 movq %rdx,%r10 7928 movq 8+0+0(%rbp),%rax 7929 mulq %r11 7930 addq %rax,%r15 7931 adcq $0,%rdx 7932 imulq %r12,%r9 7933 addq %r10,%r15 7934 adcq %rdx,%r9 7935 movq %r13,%r10 7936 movq %r14,%r11 7937 movq %r15,%r12 7938 andq $3,%r12 7939 movq %r15,%r13 7940 andq $-4,%r13 7941 movq %r9,%r14 7942 shrdq $2,%r9,%r15 7943 shrq $2,%r9 7944 addq %r13,%r15 7945 adcq %r14,%r9 7946 addq %r15,%r10 7947 adcq %r9,%r11 7948 adcq $0,%r12 7949 vpaddd %ymm5,%ymm1,%ymm1 7950 vpxor %ymm1,%ymm13,%ymm13 7951 vpshufb .Lrol16(%rip),%ymm13,%ymm13 7952 vpaddd %ymm13,%ymm9,%ymm9 7953 vpxor %ymm9,%ymm5,%ymm5 7954 vpsrld $20,%ymm5,%ymm3 7955 vpslld $12,%ymm5,%ymm5 7956 vpxor %ymm3,%ymm5,%ymm5 7957 vpaddd %ymm5,%ymm1,%ymm1 7958 vpxor %ymm1,%ymm13,%ymm13 7959 vpshufb .Lrol8(%rip),%ymm13,%ymm13 7960 vpaddd %ymm13,%ymm9,%ymm9 7961 vpxor %ymm9,%ymm5,%ymm5 7962 vpslld $7,%ymm5,%ymm3 7963 vpsrld $25,%ymm5,%ymm5 7964 vpxor %ymm3,%ymm5,%ymm5 7965 vpalignr $4,%ymm13,%ymm13,%ymm13 7966 vpalignr $8,%ymm9,%ymm9,%ymm9 7967 vpalignr $12,%ymm5,%ymm5,%ymm5 7968 vpaddd %ymm6,%ymm2,%ymm2 7969 vpxor %ymm2,%ymm14,%ymm14 7970 vpshufb .Lrol16(%rip),%ymm14,%ymm14 7971 vpaddd %ymm14,%ymm10,%ymm10 7972 vpxor %ymm10,%ymm6,%ymm6 7973 vpsrld $20,%ymm6,%ymm3 7974 vpslld $12,%ymm6,%ymm6 7975 vpxor %ymm3,%ymm6,%ymm6 7976 vpaddd %ymm6,%ymm2,%ymm2 7977 vpxor %ymm2,%ymm14,%ymm14 7978 vpshufb .Lrol8(%rip),%ymm14,%ymm14 7979 vpaddd %ymm14,%ymm10,%ymm10 7980 vpxor %ymm10,%ymm6,%ymm6 7981 vpslld $7,%ymm6,%ymm3 7982 vpsrld $25,%ymm6,%ymm6 7983 vpxor %ymm3,%ymm6,%ymm6 7984 vpalignr $4,%ymm14,%ymm14,%ymm14 7985 vpalignr $8,%ymm10,%ymm10,%ymm10 7986 vpalignr $12,%ymm6,%ymm6,%ymm6 7987 7988 leaq 32(%rdi),%rdi 7989 decq %rcx 7990 jg .Lseal_avx2_tail_384_rounds_and_3xhash 7991 decq %r8 7992 jge .Lseal_avx2_tail_384_rounds_and_2xhash 7993 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 7994 vpaddd 0+64(%rbp),%ymm6,%ymm6 7995 vpaddd 0+96(%rbp),%ymm10,%ymm10 7996 vpaddd 0+224(%rbp),%ymm14,%ymm14 7997 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 7998 vpaddd 0+64(%rbp),%ymm5,%ymm5 7999 vpaddd 0+96(%rbp),%ymm9,%ymm9 8000 vpaddd 0+192(%rbp),%ymm13,%ymm13 8001 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 8002 vpaddd 0+64(%rbp),%ymm4,%ymm4 8003 vpaddd 0+96(%rbp),%ymm8,%ymm8 8004 vpaddd 0+160(%rbp),%ymm12,%ymm12 8005 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8006 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8007 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8008 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8009 vpxor 0+0(%rsi),%ymm3,%ymm3 8010 vpxor 32+0(%rsi),%ymm2,%ymm2 8011 vpxor 64+0(%rsi),%ymm6,%ymm6 8012 vpxor 96+0(%rsi),%ymm10,%ymm10 8013 vmovdqu %ymm3,0+0(%rdi) 8014 vmovdqu %ymm2,32+0(%rdi) 8015 vmovdqu %ymm6,64+0(%rdi) 8016 vmovdqu %ymm10,96+0(%rdi) 8017 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8018 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8019 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8020 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8021 vpxor 0+128(%rsi),%ymm3,%ymm3 8022 vpxor 32+128(%rsi),%ymm1,%ymm1 8023 vpxor 64+128(%rsi),%ymm5,%ymm5 8024 vpxor 96+128(%rsi),%ymm9,%ymm9 8025 vmovdqu %ymm3,0+128(%rdi) 8026 vmovdqu %ymm1,32+128(%rdi) 8027 vmovdqu %ymm5,64+128(%rdi) 8028 vmovdqu %ymm9,96+128(%rdi) 8029 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8030 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8031 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8032 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8033 vmovdqa %ymm3,%ymm8 8034 8035 movq $256,%rcx 8036 leaq 256(%rsi),%rsi 8037 subq $256,%rbx 8038 jmp .Lseal_avx2_short_hash_remainder 8039 8040.Lseal_avx2_tail_512: 8041 vmovdqa .Lchacha20_consts(%rip),%ymm0 8042 vmovdqa 0+64(%rbp),%ymm4 8043 vmovdqa 0+96(%rbp),%ymm8 8044 vmovdqa %ymm0,%ymm1 8045 vmovdqa %ymm4,%ymm5 8046 vmovdqa %ymm8,%ymm9 8047 vmovdqa %ymm0,%ymm2 8048 vmovdqa %ymm4,%ymm6 8049 vmovdqa %ymm8,%ymm10 8050 vmovdqa %ymm0,%ymm3 8051 vmovdqa %ymm4,%ymm7 8052 vmovdqa %ymm8,%ymm11 8053 vmovdqa .Lavx2_inc(%rip),%ymm12 8054 vpaddd 0+160(%rbp),%ymm12,%ymm15 8055 vpaddd %ymm15,%ymm12,%ymm14 8056 vpaddd %ymm14,%ymm12,%ymm13 8057 vpaddd %ymm13,%ymm12,%ymm12 8058 vmovdqa %ymm15,0+256(%rbp) 8059 vmovdqa %ymm14,0+224(%rbp) 8060 vmovdqa %ymm13,0+192(%rbp) 8061 vmovdqa %ymm12,0+160(%rbp) 8062 8063.Lseal_avx2_tail_512_rounds_and_3xhash: 8064 addq 0+0(%rdi),%r10 8065 adcq 8+0(%rdi),%r11 8066 adcq $1,%r12 8067 movq 0+0+0(%rbp),%rdx 8068 movq %rdx,%r15 8069 mulxq %r10,%r13,%r14 8070 mulxq %r11,%rax,%rdx 8071 imulq %r12,%r15 8072 addq %rax,%r14 8073 adcq %rdx,%r15 8074 movq 8+0+0(%rbp),%rdx 8075 mulxq %r10,%r10,%rax 8076 addq %r10,%r14 8077 mulxq %r11,%r11,%r9 8078 adcq %r11,%r15 8079 adcq $0,%r9 8080 imulq %r12,%rdx 8081 addq %rax,%r15 8082 adcq %rdx,%r9 8083 movq %r13,%r10 8084 movq %r14,%r11 8085 movq %r15,%r12 8086 andq $3,%r12 8087 movq %r15,%r13 8088 andq $-4,%r13 8089 movq %r9,%r14 8090 shrdq $2,%r9,%r15 8091 shrq $2,%r9 8092 addq %r13,%r15 8093 adcq %r14,%r9 8094 addq %r15,%r10 8095 adcq %r9,%r11 8096 adcq $0,%r12 8097 8098 leaq 16(%rdi),%rdi 8099.Lseal_avx2_tail_512_rounds_and_2xhash: 8100 vmovdqa %ymm8,0+128(%rbp) 8101 vmovdqa .Lrol16(%rip),%ymm8 8102 vpaddd %ymm7,%ymm3,%ymm3 8103 vpaddd %ymm6,%ymm2,%ymm2 8104 vpaddd %ymm5,%ymm1,%ymm1 8105 vpaddd %ymm4,%ymm0,%ymm0 8106 vpxor %ymm3,%ymm15,%ymm15 8107 vpxor %ymm2,%ymm14,%ymm14 8108 vpxor %ymm1,%ymm13,%ymm13 8109 vpxor %ymm0,%ymm12,%ymm12 8110 vpshufb %ymm8,%ymm15,%ymm15 8111 vpshufb %ymm8,%ymm14,%ymm14 8112 vpshufb %ymm8,%ymm13,%ymm13 8113 vpshufb %ymm8,%ymm12,%ymm12 8114 vpaddd %ymm15,%ymm11,%ymm11 8115 vpaddd %ymm14,%ymm10,%ymm10 8116 vpaddd %ymm13,%ymm9,%ymm9 8117 vpaddd 0+128(%rbp),%ymm12,%ymm8 8118 vpxor %ymm11,%ymm7,%ymm7 8119 vpxor %ymm10,%ymm6,%ymm6 8120 addq 0+0(%rdi),%r10 8121 adcq 8+0(%rdi),%r11 8122 adcq $1,%r12 8123 vpxor %ymm9,%ymm5,%ymm5 8124 vpxor %ymm8,%ymm4,%ymm4 8125 vmovdqa %ymm8,0+128(%rbp) 8126 vpsrld $20,%ymm7,%ymm8 8127 vpslld $32-20,%ymm7,%ymm7 8128 vpxor %ymm8,%ymm7,%ymm7 8129 vpsrld $20,%ymm6,%ymm8 8130 vpslld $32-20,%ymm6,%ymm6 8131 vpxor %ymm8,%ymm6,%ymm6 8132 vpsrld $20,%ymm5,%ymm8 8133 vpslld $32-20,%ymm5,%ymm5 8134 vpxor %ymm8,%ymm5,%ymm5 8135 vpsrld $20,%ymm4,%ymm8 8136 vpslld $32-20,%ymm4,%ymm4 8137 vpxor %ymm8,%ymm4,%ymm4 8138 vmovdqa .Lrol8(%rip),%ymm8 8139 vpaddd %ymm7,%ymm3,%ymm3 8140 vpaddd %ymm6,%ymm2,%ymm2 8141 vpaddd %ymm5,%ymm1,%ymm1 8142 vpaddd %ymm4,%ymm0,%ymm0 8143 movq 0+0+0(%rbp),%rdx 8144 movq %rdx,%r15 8145 mulxq %r10,%r13,%r14 8146 mulxq %r11,%rax,%rdx 8147 imulq %r12,%r15 8148 addq %rax,%r14 8149 adcq %rdx,%r15 8150 vpxor %ymm3,%ymm15,%ymm15 8151 vpxor %ymm2,%ymm14,%ymm14 8152 vpxor %ymm1,%ymm13,%ymm13 8153 vpxor %ymm0,%ymm12,%ymm12 8154 vpshufb %ymm8,%ymm15,%ymm15 8155 vpshufb %ymm8,%ymm14,%ymm14 8156 vpshufb %ymm8,%ymm13,%ymm13 8157 vpshufb %ymm8,%ymm12,%ymm12 8158 vpaddd %ymm15,%ymm11,%ymm11 8159 vpaddd %ymm14,%ymm10,%ymm10 8160 vpaddd %ymm13,%ymm9,%ymm9 8161 vpaddd 0+128(%rbp),%ymm12,%ymm8 8162 vpxor %ymm11,%ymm7,%ymm7 8163 vpxor %ymm10,%ymm6,%ymm6 8164 vpxor %ymm9,%ymm5,%ymm5 8165 vpxor %ymm8,%ymm4,%ymm4 8166 vmovdqa %ymm8,0+128(%rbp) 8167 vpsrld $25,%ymm7,%ymm8 8168 vpslld $32-25,%ymm7,%ymm7 8169 vpxor %ymm8,%ymm7,%ymm7 8170 movq 8+0+0(%rbp),%rdx 8171 mulxq %r10,%r10,%rax 8172 addq %r10,%r14 8173 mulxq %r11,%r11,%r9 8174 adcq %r11,%r15 8175 adcq $0,%r9 8176 imulq %r12,%rdx 8177 vpsrld $25,%ymm6,%ymm8 8178 vpslld $32-25,%ymm6,%ymm6 8179 vpxor %ymm8,%ymm6,%ymm6 8180 vpsrld $25,%ymm5,%ymm8 8181 vpslld $32-25,%ymm5,%ymm5 8182 vpxor %ymm8,%ymm5,%ymm5 8183 vpsrld $25,%ymm4,%ymm8 8184 vpslld $32-25,%ymm4,%ymm4 8185 vpxor %ymm8,%ymm4,%ymm4 8186 vmovdqa 0+128(%rbp),%ymm8 8187 vpalignr $4,%ymm7,%ymm7,%ymm7 8188 vpalignr $8,%ymm11,%ymm11,%ymm11 8189 vpalignr $12,%ymm15,%ymm15,%ymm15 8190 vpalignr $4,%ymm6,%ymm6,%ymm6 8191 vpalignr $8,%ymm10,%ymm10,%ymm10 8192 vpalignr $12,%ymm14,%ymm14,%ymm14 8193 vpalignr $4,%ymm5,%ymm5,%ymm5 8194 vpalignr $8,%ymm9,%ymm9,%ymm9 8195 vpalignr $12,%ymm13,%ymm13,%ymm13 8196 vpalignr $4,%ymm4,%ymm4,%ymm4 8197 addq %rax,%r15 8198 adcq %rdx,%r9 8199 vpalignr $8,%ymm8,%ymm8,%ymm8 8200 vpalignr $12,%ymm12,%ymm12,%ymm12 8201 vmovdqa %ymm8,0+128(%rbp) 8202 vmovdqa .Lrol16(%rip),%ymm8 8203 vpaddd %ymm7,%ymm3,%ymm3 8204 vpaddd %ymm6,%ymm2,%ymm2 8205 vpaddd %ymm5,%ymm1,%ymm1 8206 vpaddd %ymm4,%ymm0,%ymm0 8207 vpxor %ymm3,%ymm15,%ymm15 8208 vpxor %ymm2,%ymm14,%ymm14 8209 vpxor %ymm1,%ymm13,%ymm13 8210 vpxor %ymm0,%ymm12,%ymm12 8211 vpshufb %ymm8,%ymm15,%ymm15 8212 vpshufb %ymm8,%ymm14,%ymm14 8213 vpshufb %ymm8,%ymm13,%ymm13 8214 vpshufb %ymm8,%ymm12,%ymm12 8215 vpaddd %ymm15,%ymm11,%ymm11 8216 vpaddd %ymm14,%ymm10,%ymm10 8217 vpaddd %ymm13,%ymm9,%ymm9 8218 vpaddd 0+128(%rbp),%ymm12,%ymm8 8219 movq %r13,%r10 8220 movq %r14,%r11 8221 movq %r15,%r12 8222 andq $3,%r12 8223 movq %r15,%r13 8224 andq $-4,%r13 8225 movq %r9,%r14 8226 shrdq $2,%r9,%r15 8227 shrq $2,%r9 8228 addq %r13,%r15 8229 adcq %r14,%r9 8230 addq %r15,%r10 8231 adcq %r9,%r11 8232 adcq $0,%r12 8233 vpxor %ymm11,%ymm7,%ymm7 8234 vpxor %ymm10,%ymm6,%ymm6 8235 vpxor %ymm9,%ymm5,%ymm5 8236 vpxor %ymm8,%ymm4,%ymm4 8237 vmovdqa %ymm8,0+128(%rbp) 8238 vpsrld $20,%ymm7,%ymm8 8239 vpslld $32-20,%ymm7,%ymm7 8240 vpxor %ymm8,%ymm7,%ymm7 8241 vpsrld $20,%ymm6,%ymm8 8242 vpslld $32-20,%ymm6,%ymm6 8243 vpxor %ymm8,%ymm6,%ymm6 8244 vpsrld $20,%ymm5,%ymm8 8245 vpslld $32-20,%ymm5,%ymm5 8246 vpxor %ymm8,%ymm5,%ymm5 8247 vpsrld $20,%ymm4,%ymm8 8248 vpslld $32-20,%ymm4,%ymm4 8249 vpxor %ymm8,%ymm4,%ymm4 8250 vmovdqa .Lrol8(%rip),%ymm8 8251 vpaddd %ymm7,%ymm3,%ymm3 8252 vpaddd %ymm6,%ymm2,%ymm2 8253 addq 0+16(%rdi),%r10 8254 adcq 8+16(%rdi),%r11 8255 adcq $1,%r12 8256 vpaddd %ymm5,%ymm1,%ymm1 8257 vpaddd %ymm4,%ymm0,%ymm0 8258 vpxor %ymm3,%ymm15,%ymm15 8259 vpxor %ymm2,%ymm14,%ymm14 8260 vpxor %ymm1,%ymm13,%ymm13 8261 vpxor %ymm0,%ymm12,%ymm12 8262 vpshufb %ymm8,%ymm15,%ymm15 8263 vpshufb %ymm8,%ymm14,%ymm14 8264 vpshufb %ymm8,%ymm13,%ymm13 8265 vpshufb %ymm8,%ymm12,%ymm12 8266 vpaddd %ymm15,%ymm11,%ymm11 8267 vpaddd %ymm14,%ymm10,%ymm10 8268 vpaddd %ymm13,%ymm9,%ymm9 8269 vpaddd 0+128(%rbp),%ymm12,%ymm8 8270 vpxor %ymm11,%ymm7,%ymm7 8271 vpxor %ymm10,%ymm6,%ymm6 8272 vpxor %ymm9,%ymm5,%ymm5 8273 vpxor %ymm8,%ymm4,%ymm4 8274 vmovdqa %ymm8,0+128(%rbp) 8275 vpsrld $25,%ymm7,%ymm8 8276 movq 0+0+0(%rbp),%rdx 8277 movq %rdx,%r15 8278 mulxq %r10,%r13,%r14 8279 mulxq %r11,%rax,%rdx 8280 imulq %r12,%r15 8281 addq %rax,%r14 8282 adcq %rdx,%r15 8283 vpslld $32-25,%ymm7,%ymm7 8284 vpxor %ymm8,%ymm7,%ymm7 8285 vpsrld $25,%ymm6,%ymm8 8286 vpslld $32-25,%ymm6,%ymm6 8287 vpxor %ymm8,%ymm6,%ymm6 8288 vpsrld $25,%ymm5,%ymm8 8289 vpslld $32-25,%ymm5,%ymm5 8290 vpxor %ymm8,%ymm5,%ymm5 8291 vpsrld $25,%ymm4,%ymm8 8292 vpslld $32-25,%ymm4,%ymm4 8293 vpxor %ymm8,%ymm4,%ymm4 8294 vmovdqa 0+128(%rbp),%ymm8 8295 vpalignr $12,%ymm7,%ymm7,%ymm7 8296 vpalignr $8,%ymm11,%ymm11,%ymm11 8297 vpalignr $4,%ymm15,%ymm15,%ymm15 8298 vpalignr $12,%ymm6,%ymm6,%ymm6 8299 vpalignr $8,%ymm10,%ymm10,%ymm10 8300 vpalignr $4,%ymm14,%ymm14,%ymm14 8301 vpalignr $12,%ymm5,%ymm5,%ymm5 8302 vpalignr $8,%ymm9,%ymm9,%ymm9 8303 movq 8+0+0(%rbp),%rdx 8304 mulxq %r10,%r10,%rax 8305 addq %r10,%r14 8306 mulxq %r11,%r11,%r9 8307 adcq %r11,%r15 8308 adcq $0,%r9 8309 imulq %r12,%rdx 8310 vpalignr $4,%ymm13,%ymm13,%ymm13 8311 vpalignr $12,%ymm4,%ymm4,%ymm4 8312 vpalignr $8,%ymm8,%ymm8,%ymm8 8313 vpalignr $4,%ymm12,%ymm12,%ymm12 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330 addq %rax,%r15 8331 adcq %rdx,%r9 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 movq %r13,%r10 8353 movq %r14,%r11 8354 movq %r15,%r12 8355 andq $3,%r12 8356 movq %r15,%r13 8357 andq $-4,%r13 8358 movq %r9,%r14 8359 shrdq $2,%r9,%r15 8360 shrq $2,%r9 8361 addq %r13,%r15 8362 adcq %r14,%r9 8363 addq %r15,%r10 8364 adcq %r9,%r11 8365 adcq $0,%r12 8366 8367 leaq 32(%rdi),%rdi 8368 decq %rcx 8369 jg .Lseal_avx2_tail_512_rounds_and_3xhash 8370 decq %r8 8371 jge .Lseal_avx2_tail_512_rounds_and_2xhash 8372 vpaddd .Lchacha20_consts(%rip),%ymm3,%ymm3 8373 vpaddd 0+64(%rbp),%ymm7,%ymm7 8374 vpaddd 0+96(%rbp),%ymm11,%ymm11 8375 vpaddd 0+256(%rbp),%ymm15,%ymm15 8376 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 8377 vpaddd 0+64(%rbp),%ymm6,%ymm6 8378 vpaddd 0+96(%rbp),%ymm10,%ymm10 8379 vpaddd 0+224(%rbp),%ymm14,%ymm14 8380 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 8381 vpaddd 0+64(%rbp),%ymm5,%ymm5 8382 vpaddd 0+96(%rbp),%ymm9,%ymm9 8383 vpaddd 0+192(%rbp),%ymm13,%ymm13 8384 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 8385 vpaddd 0+64(%rbp),%ymm4,%ymm4 8386 vpaddd 0+96(%rbp),%ymm8,%ymm8 8387 vpaddd 0+160(%rbp),%ymm12,%ymm12 8388 8389 vmovdqa %ymm0,0+128(%rbp) 8390 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 8391 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 8392 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 8393 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 8394 vpxor 0+0(%rsi),%ymm0,%ymm0 8395 vpxor 32+0(%rsi),%ymm3,%ymm3 8396 vpxor 64+0(%rsi),%ymm7,%ymm7 8397 vpxor 96+0(%rsi),%ymm11,%ymm11 8398 vmovdqu %ymm0,0+0(%rdi) 8399 vmovdqu %ymm3,32+0(%rdi) 8400 vmovdqu %ymm7,64+0(%rdi) 8401 vmovdqu %ymm11,96+0(%rdi) 8402 8403 vmovdqa 0+128(%rbp),%ymm0 8404 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8405 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8406 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8407 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8408 vpxor 0+128(%rsi),%ymm3,%ymm3 8409 vpxor 32+128(%rsi),%ymm2,%ymm2 8410 vpxor 64+128(%rsi),%ymm6,%ymm6 8411 vpxor 96+128(%rsi),%ymm10,%ymm10 8412 vmovdqu %ymm3,0+128(%rdi) 8413 vmovdqu %ymm2,32+128(%rdi) 8414 vmovdqu %ymm6,64+128(%rdi) 8415 vmovdqu %ymm10,96+128(%rdi) 8416 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8417 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8418 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8419 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8420 vpxor 0+256(%rsi),%ymm3,%ymm3 8421 vpxor 32+256(%rsi),%ymm1,%ymm1 8422 vpxor 64+256(%rsi),%ymm5,%ymm5 8423 vpxor 96+256(%rsi),%ymm9,%ymm9 8424 vmovdqu %ymm3,0+256(%rdi) 8425 vmovdqu %ymm1,32+256(%rdi) 8426 vmovdqu %ymm5,64+256(%rdi) 8427 vmovdqu %ymm9,96+256(%rdi) 8428 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8429 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8430 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8431 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8432 vmovdqa %ymm3,%ymm8 8433 8434 movq $384,%rcx 8435 leaq 384(%rsi),%rsi 8436 subq $384,%rbx 8437 jmp .Lseal_avx2_short_hash_remainder 8438 8439.Lseal_avx2_320: 8440 vmovdqa %ymm0,%ymm1 8441 vmovdqa %ymm0,%ymm2 8442 vmovdqa %ymm4,%ymm5 8443 vmovdqa %ymm4,%ymm6 8444 vmovdqa %ymm8,%ymm9 8445 vmovdqa %ymm8,%ymm10 8446 vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 8447 vpaddd .Lavx2_inc(%rip),%ymm13,%ymm14 8448 vmovdqa %ymm4,%ymm7 8449 vmovdqa %ymm8,%ymm11 8450 vmovdqa %ymm12,0+160(%rbp) 8451 vmovdqa %ymm13,0+192(%rbp) 8452 vmovdqa %ymm14,0+224(%rbp) 8453 movq $10,%r10 8454.Lseal_avx2_320_rounds: 8455 vpaddd %ymm4,%ymm0,%ymm0 8456 vpxor %ymm0,%ymm12,%ymm12 8457 vpshufb .Lrol16(%rip),%ymm12,%ymm12 8458 vpaddd %ymm12,%ymm8,%ymm8 8459 vpxor %ymm8,%ymm4,%ymm4 8460 vpsrld $20,%ymm4,%ymm3 8461 vpslld $12,%ymm4,%ymm4 8462 vpxor %ymm3,%ymm4,%ymm4 8463 vpaddd %ymm4,%ymm0,%ymm0 8464 vpxor %ymm0,%ymm12,%ymm12 8465 vpshufb .Lrol8(%rip),%ymm12,%ymm12 8466 vpaddd %ymm12,%ymm8,%ymm8 8467 vpxor %ymm8,%ymm4,%ymm4 8468 vpslld $7,%ymm4,%ymm3 8469 vpsrld $25,%ymm4,%ymm4 8470 vpxor %ymm3,%ymm4,%ymm4 8471 vpalignr $12,%ymm12,%ymm12,%ymm12 8472 vpalignr $8,%ymm8,%ymm8,%ymm8 8473 vpalignr $4,%ymm4,%ymm4,%ymm4 8474 vpaddd %ymm5,%ymm1,%ymm1 8475 vpxor %ymm1,%ymm13,%ymm13 8476 vpshufb .Lrol16(%rip),%ymm13,%ymm13 8477 vpaddd %ymm13,%ymm9,%ymm9 8478 vpxor %ymm9,%ymm5,%ymm5 8479 vpsrld $20,%ymm5,%ymm3 8480 vpslld $12,%ymm5,%ymm5 8481 vpxor %ymm3,%ymm5,%ymm5 8482 vpaddd %ymm5,%ymm1,%ymm1 8483 vpxor %ymm1,%ymm13,%ymm13 8484 vpshufb .Lrol8(%rip),%ymm13,%ymm13 8485 vpaddd %ymm13,%ymm9,%ymm9 8486 vpxor %ymm9,%ymm5,%ymm5 8487 vpslld $7,%ymm5,%ymm3 8488 vpsrld $25,%ymm5,%ymm5 8489 vpxor %ymm3,%ymm5,%ymm5 8490 vpalignr $12,%ymm13,%ymm13,%ymm13 8491 vpalignr $8,%ymm9,%ymm9,%ymm9 8492 vpalignr $4,%ymm5,%ymm5,%ymm5 8493 vpaddd %ymm6,%ymm2,%ymm2 8494 vpxor %ymm2,%ymm14,%ymm14 8495 vpshufb .Lrol16(%rip),%ymm14,%ymm14 8496 vpaddd %ymm14,%ymm10,%ymm10 8497 vpxor %ymm10,%ymm6,%ymm6 8498 vpsrld $20,%ymm6,%ymm3 8499 vpslld $12,%ymm6,%ymm6 8500 vpxor %ymm3,%ymm6,%ymm6 8501 vpaddd %ymm6,%ymm2,%ymm2 8502 vpxor %ymm2,%ymm14,%ymm14 8503 vpshufb .Lrol8(%rip),%ymm14,%ymm14 8504 vpaddd %ymm14,%ymm10,%ymm10 8505 vpxor %ymm10,%ymm6,%ymm6 8506 vpslld $7,%ymm6,%ymm3 8507 vpsrld $25,%ymm6,%ymm6 8508 vpxor %ymm3,%ymm6,%ymm6 8509 vpalignr $12,%ymm14,%ymm14,%ymm14 8510 vpalignr $8,%ymm10,%ymm10,%ymm10 8511 vpalignr $4,%ymm6,%ymm6,%ymm6 8512 vpaddd %ymm4,%ymm0,%ymm0 8513 vpxor %ymm0,%ymm12,%ymm12 8514 vpshufb .Lrol16(%rip),%ymm12,%ymm12 8515 vpaddd %ymm12,%ymm8,%ymm8 8516 vpxor %ymm8,%ymm4,%ymm4 8517 vpsrld $20,%ymm4,%ymm3 8518 vpslld $12,%ymm4,%ymm4 8519 vpxor %ymm3,%ymm4,%ymm4 8520 vpaddd %ymm4,%ymm0,%ymm0 8521 vpxor %ymm0,%ymm12,%ymm12 8522 vpshufb .Lrol8(%rip),%ymm12,%ymm12 8523 vpaddd %ymm12,%ymm8,%ymm8 8524 vpxor %ymm8,%ymm4,%ymm4 8525 vpslld $7,%ymm4,%ymm3 8526 vpsrld $25,%ymm4,%ymm4 8527 vpxor %ymm3,%ymm4,%ymm4 8528 vpalignr $4,%ymm12,%ymm12,%ymm12 8529 vpalignr $8,%ymm8,%ymm8,%ymm8 8530 vpalignr $12,%ymm4,%ymm4,%ymm4 8531 vpaddd %ymm5,%ymm1,%ymm1 8532 vpxor %ymm1,%ymm13,%ymm13 8533 vpshufb .Lrol16(%rip),%ymm13,%ymm13 8534 vpaddd %ymm13,%ymm9,%ymm9 8535 vpxor %ymm9,%ymm5,%ymm5 8536 vpsrld $20,%ymm5,%ymm3 8537 vpslld $12,%ymm5,%ymm5 8538 vpxor %ymm3,%ymm5,%ymm5 8539 vpaddd %ymm5,%ymm1,%ymm1 8540 vpxor %ymm1,%ymm13,%ymm13 8541 vpshufb .Lrol8(%rip),%ymm13,%ymm13 8542 vpaddd %ymm13,%ymm9,%ymm9 8543 vpxor %ymm9,%ymm5,%ymm5 8544 vpslld $7,%ymm5,%ymm3 8545 vpsrld $25,%ymm5,%ymm5 8546 vpxor %ymm3,%ymm5,%ymm5 8547 vpalignr $4,%ymm13,%ymm13,%ymm13 8548 vpalignr $8,%ymm9,%ymm9,%ymm9 8549 vpalignr $12,%ymm5,%ymm5,%ymm5 8550 vpaddd %ymm6,%ymm2,%ymm2 8551 vpxor %ymm2,%ymm14,%ymm14 8552 vpshufb .Lrol16(%rip),%ymm14,%ymm14 8553 vpaddd %ymm14,%ymm10,%ymm10 8554 vpxor %ymm10,%ymm6,%ymm6 8555 vpsrld $20,%ymm6,%ymm3 8556 vpslld $12,%ymm6,%ymm6 8557 vpxor %ymm3,%ymm6,%ymm6 8558 vpaddd %ymm6,%ymm2,%ymm2 8559 vpxor %ymm2,%ymm14,%ymm14 8560 vpshufb .Lrol8(%rip),%ymm14,%ymm14 8561 vpaddd %ymm14,%ymm10,%ymm10 8562 vpxor %ymm10,%ymm6,%ymm6 8563 vpslld $7,%ymm6,%ymm3 8564 vpsrld $25,%ymm6,%ymm6 8565 vpxor %ymm3,%ymm6,%ymm6 8566 vpalignr $4,%ymm14,%ymm14,%ymm14 8567 vpalignr $8,%ymm10,%ymm10,%ymm10 8568 vpalignr $12,%ymm6,%ymm6,%ymm6 8569 8570 decq %r10 8571 jne .Lseal_avx2_320_rounds 8572 vpaddd .Lchacha20_consts(%rip),%ymm0,%ymm0 8573 vpaddd .Lchacha20_consts(%rip),%ymm1,%ymm1 8574 vpaddd .Lchacha20_consts(%rip),%ymm2,%ymm2 8575 vpaddd %ymm7,%ymm4,%ymm4 8576 vpaddd %ymm7,%ymm5,%ymm5 8577 vpaddd %ymm7,%ymm6,%ymm6 8578 vpaddd %ymm11,%ymm8,%ymm8 8579 vpaddd %ymm11,%ymm9,%ymm9 8580 vpaddd %ymm11,%ymm10,%ymm10 8581 vpaddd 0+160(%rbp),%ymm12,%ymm12 8582 vpaddd 0+192(%rbp),%ymm13,%ymm13 8583 vpaddd 0+224(%rbp),%ymm14,%ymm14 8584 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8585 8586 vpand .Lclamp(%rip),%ymm3,%ymm3 8587 vmovdqa %ymm3,0+0(%rbp) 8588 8589 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8590 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8591 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8592 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8593 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8594 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8595 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 8596 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 8597 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 8598 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 8599 jmp .Lseal_avx2_short 8600 8601.Lseal_avx2_192: 8602 vmovdqa %ymm0,%ymm1 8603 vmovdqa %ymm0,%ymm2 8604 vmovdqa %ymm4,%ymm5 8605 vmovdqa %ymm4,%ymm6 8606 vmovdqa %ymm8,%ymm9 8607 vmovdqa %ymm8,%ymm10 8608 vpaddd .Lavx2_inc(%rip),%ymm12,%ymm13 8609 vmovdqa %ymm12,%ymm11 8610 vmovdqa %ymm13,%ymm15 8611 movq $10,%r10 8612.Lseal_avx2_192_rounds: 8613 vpaddd %ymm4,%ymm0,%ymm0 8614 vpxor %ymm0,%ymm12,%ymm12 8615 vpshufb .Lrol16(%rip),%ymm12,%ymm12 8616 vpaddd %ymm12,%ymm8,%ymm8 8617 vpxor %ymm8,%ymm4,%ymm4 8618 vpsrld $20,%ymm4,%ymm3 8619 vpslld $12,%ymm4,%ymm4 8620 vpxor %ymm3,%ymm4,%ymm4 8621 vpaddd %ymm4,%ymm0,%ymm0 8622 vpxor %ymm0,%ymm12,%ymm12 8623 vpshufb .Lrol8(%rip),%ymm12,%ymm12 8624 vpaddd %ymm12,%ymm8,%ymm8 8625 vpxor %ymm8,%ymm4,%ymm4 8626 vpslld $7,%ymm4,%ymm3 8627 vpsrld $25,%ymm4,%ymm4 8628 vpxor %ymm3,%ymm4,%ymm4 8629 vpalignr $12,%ymm12,%ymm12,%ymm12 8630 vpalignr $8,%ymm8,%ymm8,%ymm8 8631 vpalignr $4,%ymm4,%ymm4,%ymm4 8632 vpaddd %ymm5,%ymm1,%ymm1 8633 vpxor %ymm1,%ymm13,%ymm13 8634 vpshufb .Lrol16(%rip),%ymm13,%ymm13 8635 vpaddd %ymm13,%ymm9,%ymm9 8636 vpxor %ymm9,%ymm5,%ymm5 8637 vpsrld $20,%ymm5,%ymm3 8638 vpslld $12,%ymm5,%ymm5 8639 vpxor %ymm3,%ymm5,%ymm5 8640 vpaddd %ymm5,%ymm1,%ymm1 8641 vpxor %ymm1,%ymm13,%ymm13 8642 vpshufb .Lrol8(%rip),%ymm13,%ymm13 8643 vpaddd %ymm13,%ymm9,%ymm9 8644 vpxor %ymm9,%ymm5,%ymm5 8645 vpslld $7,%ymm5,%ymm3 8646 vpsrld $25,%ymm5,%ymm5 8647 vpxor %ymm3,%ymm5,%ymm5 8648 vpalignr $12,%ymm13,%ymm13,%ymm13 8649 vpalignr $8,%ymm9,%ymm9,%ymm9 8650 vpalignr $4,%ymm5,%ymm5,%ymm5 8651 vpaddd %ymm4,%ymm0,%ymm0 8652 vpxor %ymm0,%ymm12,%ymm12 8653 vpshufb .Lrol16(%rip),%ymm12,%ymm12 8654 vpaddd %ymm12,%ymm8,%ymm8 8655 vpxor %ymm8,%ymm4,%ymm4 8656 vpsrld $20,%ymm4,%ymm3 8657 vpslld $12,%ymm4,%ymm4 8658 vpxor %ymm3,%ymm4,%ymm4 8659 vpaddd %ymm4,%ymm0,%ymm0 8660 vpxor %ymm0,%ymm12,%ymm12 8661 vpshufb .Lrol8(%rip),%ymm12,%ymm12 8662 vpaddd %ymm12,%ymm8,%ymm8 8663 vpxor %ymm8,%ymm4,%ymm4 8664 vpslld $7,%ymm4,%ymm3 8665 vpsrld $25,%ymm4,%ymm4 8666 vpxor %ymm3,%ymm4,%ymm4 8667 vpalignr $4,%ymm12,%ymm12,%ymm12 8668 vpalignr $8,%ymm8,%ymm8,%ymm8 8669 vpalignr $12,%ymm4,%ymm4,%ymm4 8670 vpaddd %ymm5,%ymm1,%ymm1 8671 vpxor %ymm1,%ymm13,%ymm13 8672 vpshufb .Lrol16(%rip),%ymm13,%ymm13 8673 vpaddd %ymm13,%ymm9,%ymm9 8674 vpxor %ymm9,%ymm5,%ymm5 8675 vpsrld $20,%ymm5,%ymm3 8676 vpslld $12,%ymm5,%ymm5 8677 vpxor %ymm3,%ymm5,%ymm5 8678 vpaddd %ymm5,%ymm1,%ymm1 8679 vpxor %ymm1,%ymm13,%ymm13 8680 vpshufb .Lrol8(%rip),%ymm13,%ymm13 8681 vpaddd %ymm13,%ymm9,%ymm9 8682 vpxor %ymm9,%ymm5,%ymm5 8683 vpslld $7,%ymm5,%ymm3 8684 vpsrld $25,%ymm5,%ymm5 8685 vpxor %ymm3,%ymm5,%ymm5 8686 vpalignr $4,%ymm13,%ymm13,%ymm13 8687 vpalignr $8,%ymm9,%ymm9,%ymm9 8688 vpalignr $12,%ymm5,%ymm5,%ymm5 8689 8690 decq %r10 8691 jne .Lseal_avx2_192_rounds 8692 vpaddd %ymm2,%ymm0,%ymm0 8693 vpaddd %ymm2,%ymm1,%ymm1 8694 vpaddd %ymm6,%ymm4,%ymm4 8695 vpaddd %ymm6,%ymm5,%ymm5 8696 vpaddd %ymm10,%ymm8,%ymm8 8697 vpaddd %ymm10,%ymm9,%ymm9 8698 vpaddd %ymm11,%ymm12,%ymm12 8699 vpaddd %ymm15,%ymm13,%ymm13 8700 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8701 8702 vpand .Lclamp(%rip),%ymm3,%ymm3 8703 vmovdqa %ymm3,0+0(%rbp) 8704 8705 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8706 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8707 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8708 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8709 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8710 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8711.Lseal_avx2_short: 8712 movq %r8,%r8 8713 call poly_hash_ad_internal 8714 xorq %rcx,%rcx 8715.Lseal_avx2_short_hash_remainder: 8716 cmpq $16,%rcx 8717 jb .Lseal_avx2_short_loop 8718 addq 0+0(%rdi),%r10 8719 adcq 8+0(%rdi),%r11 8720 adcq $1,%r12 8721 movq 0+0+0(%rbp),%rax 8722 movq %rax,%r15 8723 mulq %r10 8724 movq %rax,%r13 8725 movq %rdx,%r14 8726 movq 0+0+0(%rbp),%rax 8727 mulq %r11 8728 imulq %r12,%r15 8729 addq %rax,%r14 8730 adcq %rdx,%r15 8731 movq 8+0+0(%rbp),%rax 8732 movq %rax,%r9 8733 mulq %r10 8734 addq %rax,%r14 8735 adcq $0,%rdx 8736 movq %rdx,%r10 8737 movq 8+0+0(%rbp),%rax 8738 mulq %r11 8739 addq %rax,%r15 8740 adcq $0,%rdx 8741 imulq %r12,%r9 8742 addq %r10,%r15 8743 adcq %rdx,%r9 8744 movq %r13,%r10 8745 movq %r14,%r11 8746 movq %r15,%r12 8747 andq $3,%r12 8748 movq %r15,%r13 8749 andq $-4,%r13 8750 movq %r9,%r14 8751 shrdq $2,%r9,%r15 8752 shrq $2,%r9 8753 addq %r13,%r15 8754 adcq %r14,%r9 8755 addq %r15,%r10 8756 adcq %r9,%r11 8757 adcq $0,%r12 8758 8759 subq $16,%rcx 8760 addq $16,%rdi 8761 jmp .Lseal_avx2_short_hash_remainder 8762.Lseal_avx2_short_loop: 8763 cmpq $32,%rbx 8764 jb .Lseal_avx2_short_tail 8765 subq $32,%rbx 8766 8767 vpxor (%rsi),%ymm0,%ymm0 8768 vmovdqu %ymm0,(%rdi) 8769 leaq 32(%rsi),%rsi 8770 8771 addq 0+0(%rdi),%r10 8772 adcq 8+0(%rdi),%r11 8773 adcq $1,%r12 8774 movq 0+0+0(%rbp),%rax 8775 movq %rax,%r15 8776 mulq %r10 8777 movq %rax,%r13 8778 movq %rdx,%r14 8779 movq 0+0+0(%rbp),%rax 8780 mulq %r11 8781 imulq %r12,%r15 8782 addq %rax,%r14 8783 adcq %rdx,%r15 8784 movq 8+0+0(%rbp),%rax 8785 movq %rax,%r9 8786 mulq %r10 8787 addq %rax,%r14 8788 adcq $0,%rdx 8789 movq %rdx,%r10 8790 movq 8+0+0(%rbp),%rax 8791 mulq %r11 8792 addq %rax,%r15 8793 adcq $0,%rdx 8794 imulq %r12,%r9 8795 addq %r10,%r15 8796 adcq %rdx,%r9 8797 movq %r13,%r10 8798 movq %r14,%r11 8799 movq %r15,%r12 8800 andq $3,%r12 8801 movq %r15,%r13 8802 andq $-4,%r13 8803 movq %r9,%r14 8804 shrdq $2,%r9,%r15 8805 shrq $2,%r9 8806 addq %r13,%r15 8807 adcq %r14,%r9 8808 addq %r15,%r10 8809 adcq %r9,%r11 8810 adcq $0,%r12 8811 addq 0+16(%rdi),%r10 8812 adcq 8+16(%rdi),%r11 8813 adcq $1,%r12 8814 movq 0+0+0(%rbp),%rax 8815 movq %rax,%r15 8816 mulq %r10 8817 movq %rax,%r13 8818 movq %rdx,%r14 8819 movq 0+0+0(%rbp),%rax 8820 mulq %r11 8821 imulq %r12,%r15 8822 addq %rax,%r14 8823 adcq %rdx,%r15 8824 movq 8+0+0(%rbp),%rax 8825 movq %rax,%r9 8826 mulq %r10 8827 addq %rax,%r14 8828 adcq $0,%rdx 8829 movq %rdx,%r10 8830 movq 8+0+0(%rbp),%rax 8831 mulq %r11 8832 addq %rax,%r15 8833 adcq $0,%rdx 8834 imulq %r12,%r9 8835 addq %r10,%r15 8836 adcq %rdx,%r9 8837 movq %r13,%r10 8838 movq %r14,%r11 8839 movq %r15,%r12 8840 andq $3,%r12 8841 movq %r15,%r13 8842 andq $-4,%r13 8843 movq %r9,%r14 8844 shrdq $2,%r9,%r15 8845 shrq $2,%r9 8846 addq %r13,%r15 8847 adcq %r14,%r9 8848 addq %r15,%r10 8849 adcq %r9,%r11 8850 adcq $0,%r12 8851 8852 leaq 32(%rdi),%rdi 8853 8854 vmovdqa %ymm4,%ymm0 8855 vmovdqa %ymm8,%ymm4 8856 vmovdqa %ymm12,%ymm8 8857 vmovdqa %ymm1,%ymm12 8858 vmovdqa %ymm5,%ymm1 8859 vmovdqa %ymm9,%ymm5 8860 vmovdqa %ymm13,%ymm9 8861 vmovdqa %ymm2,%ymm13 8862 vmovdqa %ymm6,%ymm2 8863 jmp .Lseal_avx2_short_loop 8864.Lseal_avx2_short_tail: 8865 cmpq $16,%rbx 8866 jb .Lseal_avx2_exit 8867 subq $16,%rbx 8868 vpxor (%rsi),%xmm0,%xmm3 8869 vmovdqu %xmm3,(%rdi) 8870 leaq 16(%rsi),%rsi 8871 addq 0+0(%rdi),%r10 8872 adcq 8+0(%rdi),%r11 8873 adcq $1,%r12 8874 movq 0+0+0(%rbp),%rax 8875 movq %rax,%r15 8876 mulq %r10 8877 movq %rax,%r13 8878 movq %rdx,%r14 8879 movq 0+0+0(%rbp),%rax 8880 mulq %r11 8881 imulq %r12,%r15 8882 addq %rax,%r14 8883 adcq %rdx,%r15 8884 movq 8+0+0(%rbp),%rax 8885 movq %rax,%r9 8886 mulq %r10 8887 addq %rax,%r14 8888 adcq $0,%rdx 8889 movq %rdx,%r10 8890 movq 8+0+0(%rbp),%rax 8891 mulq %r11 8892 addq %rax,%r15 8893 adcq $0,%rdx 8894 imulq %r12,%r9 8895 addq %r10,%r15 8896 adcq %rdx,%r9 8897 movq %r13,%r10 8898 movq %r14,%r11 8899 movq %r15,%r12 8900 andq $3,%r12 8901 movq %r15,%r13 8902 andq $-4,%r13 8903 movq %r9,%r14 8904 shrdq $2,%r9,%r15 8905 shrq $2,%r9 8906 addq %r13,%r15 8907 adcq %r14,%r9 8908 addq %r15,%r10 8909 adcq %r9,%r11 8910 adcq $0,%r12 8911 8912 leaq 16(%rdi),%rdi 8913 vextracti128 $1,%ymm0,%xmm0 8914.Lseal_avx2_exit: 8915 vzeroupper 8916 jmp .Lseal_sse_tail_16 8917.cfi_endproc 8918.size chacha20_poly1305_seal_avx2, .-chacha20_poly1305_seal_avx2 8919#endif 8920.section .note.GNU-stack,"",@progbits 8921