1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11.text 12 13 14chacha20_poly1305_constants: 15 16.p2align 6 17L$chacha20_consts: 18.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 19.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 20L$rol8: 21.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 22.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 23L$rol16: 24.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 25.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 26L$avx2_init: 27.long 0,0,0,0 28L$sse_inc: 29.long 1,0,0,0 30L$avx2_inc: 31.long 2,0,0,0,2,0,0,0 32L$clamp: 33.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC 34.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF 35.p2align 4 36L$and_masks: 37.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 38.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 39.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 40.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 41.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 42.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 43.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 44.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 45.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 46.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 47.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 48.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 49.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 50.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 51.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 52.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff 53 54 55.p2align 6 56poly_hash_ad_internal: 57 58 59 xorq %r10,%r10 60 xorq %r11,%r11 61 xorq %r12,%r12 62 cmpq $13,%r8 63 jne L$hash_ad_loop 64L$poly_fast_tls_ad: 65 66 movq (%rcx),%r10 67 movq 5(%rcx),%r11 68 shrq $24,%r11 69 movq $1,%r12 70 movq 0+0+0(%rbp),%rax 71 movq %rax,%r15 72 mulq %r10 73 movq %rax,%r13 74 movq %rdx,%r14 75 movq 0+0+0(%rbp),%rax 76 mulq %r11 77 imulq %r12,%r15 78 addq %rax,%r14 79 adcq %rdx,%r15 80 movq 8+0+0(%rbp),%rax 81 movq %rax,%r9 82 mulq %r10 83 addq %rax,%r14 84 adcq $0,%rdx 85 movq %rdx,%r10 86 movq 8+0+0(%rbp),%rax 87 mulq %r11 88 addq %rax,%r15 89 adcq $0,%rdx 90 imulq %r12,%r9 91 addq %r10,%r15 92 adcq %rdx,%r9 93 movq %r13,%r10 94 movq %r14,%r11 95 movq %r15,%r12 96 andq $3,%r12 97 movq %r15,%r13 98 andq $-4,%r13 99 movq %r9,%r14 100 shrdq $2,%r9,%r15 101 shrq $2,%r9 102 addq %r13,%r15 103 adcq %r14,%r9 104 addq %r15,%r10 105 adcq %r9,%r11 106 adcq $0,%r12 107 108 .byte 0xf3,0xc3 109L$hash_ad_loop: 110 111 cmpq $16,%r8 112 jb L$hash_ad_tail 113 addq 0+0(%rcx),%r10 114 adcq 8+0(%rcx),%r11 115 adcq $1,%r12 116 movq 0+0+0(%rbp),%rax 117 movq %rax,%r15 118 mulq %r10 119 movq %rax,%r13 120 movq %rdx,%r14 121 movq 0+0+0(%rbp),%rax 122 mulq %r11 123 imulq %r12,%r15 124 addq %rax,%r14 125 adcq %rdx,%r15 126 movq 8+0+0(%rbp),%rax 127 movq %rax,%r9 128 mulq %r10 129 addq %rax,%r14 130 adcq $0,%rdx 131 movq %rdx,%r10 132 movq 8+0+0(%rbp),%rax 133 mulq %r11 134 addq %rax,%r15 135 adcq $0,%rdx 136 imulq %r12,%r9 137 addq %r10,%r15 138 adcq %rdx,%r9 139 movq %r13,%r10 140 movq %r14,%r11 141 movq %r15,%r12 142 andq $3,%r12 143 movq %r15,%r13 144 andq $-4,%r13 145 movq %r9,%r14 146 shrdq $2,%r9,%r15 147 shrq $2,%r9 148 addq %r13,%r15 149 adcq %r14,%r9 150 addq %r15,%r10 151 adcq %r9,%r11 152 adcq $0,%r12 153 154 leaq 16(%rcx),%rcx 155 subq $16,%r8 156 jmp L$hash_ad_loop 157L$hash_ad_tail: 158 cmpq $0,%r8 159 je L$hash_ad_done 160 161 xorq %r13,%r13 162 xorq %r14,%r14 163 xorq %r15,%r15 164 addq %r8,%rcx 165L$hash_ad_tail_loop: 166 shldq $8,%r13,%r14 167 shlq $8,%r13 168 movzbq -1(%rcx),%r15 169 xorq %r15,%r13 170 decq %rcx 171 decq %r8 172 jne L$hash_ad_tail_loop 173 174 addq %r13,%r10 175 adcq %r14,%r11 176 adcq $1,%r12 177 movq 0+0+0(%rbp),%rax 178 movq %rax,%r15 179 mulq %r10 180 movq %rax,%r13 181 movq %rdx,%r14 182 movq 0+0+0(%rbp),%rax 183 mulq %r11 184 imulq %r12,%r15 185 addq %rax,%r14 186 adcq %rdx,%r15 187 movq 8+0+0(%rbp),%rax 188 movq %rax,%r9 189 mulq %r10 190 addq %rax,%r14 191 adcq $0,%rdx 192 movq %rdx,%r10 193 movq 8+0+0(%rbp),%rax 194 mulq %r11 195 addq %rax,%r15 196 adcq $0,%rdx 197 imulq %r12,%r9 198 addq %r10,%r15 199 adcq %rdx,%r9 200 movq %r13,%r10 201 movq %r14,%r11 202 movq %r15,%r12 203 andq $3,%r12 204 movq %r15,%r13 205 andq $-4,%r13 206 movq %r9,%r14 207 shrdq $2,%r9,%r15 208 shrq $2,%r9 209 addq %r13,%r15 210 adcq %r14,%r9 211 addq %r15,%r10 212 adcq %r9,%r11 213 adcq $0,%r12 214 215 216L$hash_ad_done: 217 .byte 0xf3,0xc3 218 219 220 221.globl _GFp_chacha20_poly1305_open 222.private_extern _GFp_chacha20_poly1305_open 223 224.p2align 6 225_GFp_chacha20_poly1305_open: 226 227 pushq %rbp 228 229 pushq %rbx 230 231 pushq %r12 232 233 pushq %r13 234 235 pushq %r14 236 237 pushq %r15 238 239 240 241 pushq %r9 242 243 subq $288 + 0 + 32,%rsp 244 245 246 leaq 32(%rsp),%rbp 247 andq $-32,%rbp 248 249 movq %rdx,%rbx 250 movq %r8,0+0+32(%rbp) 251 movq %rbx,8+0+32(%rbp) 252 253 movl _GFp_ia32cap_P+8(%rip),%eax 254 andl $288,%eax 255 xorl $288,%eax 256 jz chacha20_poly1305_open_avx2 257 258 cmpq $128,%rbx 259 jbe L$open_sse_128 260 261 movdqa L$chacha20_consts(%rip),%xmm0 262 movdqu 0(%r9),%xmm4 263 movdqu 16(%r9),%xmm8 264 movdqu 32(%r9),%xmm12 265 266 movdqa %xmm12,%xmm7 267 268 movdqa %xmm4,0+48(%rbp) 269 movdqa %xmm8,0+64(%rbp) 270 movdqa %xmm12,0+96(%rbp) 271 movq $10,%r10 272L$open_sse_init_rounds: 273 paddd %xmm4,%xmm0 274 pxor %xmm0,%xmm12 275 pshufb L$rol16(%rip),%xmm12 276 paddd %xmm12,%xmm8 277 pxor %xmm8,%xmm4 278 movdqa %xmm4,%xmm3 279 pslld $12,%xmm3 280 psrld $20,%xmm4 281 pxor %xmm3,%xmm4 282 paddd %xmm4,%xmm0 283 pxor %xmm0,%xmm12 284 pshufb L$rol8(%rip),%xmm12 285 paddd %xmm12,%xmm8 286 pxor %xmm8,%xmm4 287 movdqa %xmm4,%xmm3 288 pslld $7,%xmm3 289 psrld $25,%xmm4 290 pxor %xmm3,%xmm4 291.byte 102,15,58,15,228,4 292.byte 102,69,15,58,15,192,8 293.byte 102,69,15,58,15,228,12 294 paddd %xmm4,%xmm0 295 pxor %xmm0,%xmm12 296 pshufb L$rol16(%rip),%xmm12 297 paddd %xmm12,%xmm8 298 pxor %xmm8,%xmm4 299 movdqa %xmm4,%xmm3 300 pslld $12,%xmm3 301 psrld $20,%xmm4 302 pxor %xmm3,%xmm4 303 paddd %xmm4,%xmm0 304 pxor %xmm0,%xmm12 305 pshufb L$rol8(%rip),%xmm12 306 paddd %xmm12,%xmm8 307 pxor %xmm8,%xmm4 308 movdqa %xmm4,%xmm3 309 pslld $7,%xmm3 310 psrld $25,%xmm4 311 pxor %xmm3,%xmm4 312.byte 102,15,58,15,228,12 313.byte 102,69,15,58,15,192,8 314.byte 102,69,15,58,15,228,4 315 316 decq %r10 317 jne L$open_sse_init_rounds 318 319 paddd L$chacha20_consts(%rip),%xmm0 320 paddd 0+48(%rbp),%xmm4 321 322 pand L$clamp(%rip),%xmm0 323 movdqa %xmm0,0+0(%rbp) 324 movdqa %xmm4,0+16(%rbp) 325 326 movq %r8,%r8 327 call poly_hash_ad_internal 328L$open_sse_main_loop: 329 cmpq $256,%rbx 330 jb L$open_sse_tail 331 332 movdqa L$chacha20_consts(%rip),%xmm0 333 movdqa 0+48(%rbp),%xmm4 334 movdqa 0+64(%rbp),%xmm8 335 movdqa %xmm0,%xmm1 336 movdqa %xmm4,%xmm5 337 movdqa %xmm8,%xmm9 338 movdqa %xmm0,%xmm2 339 movdqa %xmm4,%xmm6 340 movdqa %xmm8,%xmm10 341 movdqa %xmm0,%xmm3 342 movdqa %xmm4,%xmm7 343 movdqa %xmm8,%xmm11 344 movdqa 0+96(%rbp),%xmm15 345 paddd L$sse_inc(%rip),%xmm15 346 movdqa %xmm15,%xmm14 347 paddd L$sse_inc(%rip),%xmm14 348 movdqa %xmm14,%xmm13 349 paddd L$sse_inc(%rip),%xmm13 350 movdqa %xmm13,%xmm12 351 paddd L$sse_inc(%rip),%xmm12 352 movdqa %xmm12,0+96(%rbp) 353 movdqa %xmm13,0+112(%rbp) 354 movdqa %xmm14,0+128(%rbp) 355 movdqa %xmm15,0+144(%rbp) 356 357 358 359 movq $4,%rcx 360 movq %rsi,%r8 361L$open_sse_main_loop_rounds: 362 movdqa %xmm8,0+80(%rbp) 363 movdqa L$rol16(%rip),%xmm8 364 paddd %xmm7,%xmm3 365 paddd %xmm6,%xmm2 366 paddd %xmm5,%xmm1 367 paddd %xmm4,%xmm0 368 pxor %xmm3,%xmm15 369 pxor %xmm2,%xmm14 370 pxor %xmm1,%xmm13 371 pxor %xmm0,%xmm12 372.byte 102,69,15,56,0,248 373.byte 102,69,15,56,0,240 374.byte 102,69,15,56,0,232 375.byte 102,69,15,56,0,224 376 movdqa 0+80(%rbp),%xmm8 377 paddd %xmm15,%xmm11 378 paddd %xmm14,%xmm10 379 paddd %xmm13,%xmm9 380 paddd %xmm12,%xmm8 381 pxor %xmm11,%xmm7 382 addq 0+0(%r8),%r10 383 adcq 8+0(%r8),%r11 384 adcq $1,%r12 385 386 leaq 16(%r8),%r8 387 pxor %xmm10,%xmm6 388 pxor %xmm9,%xmm5 389 pxor %xmm8,%xmm4 390 movdqa %xmm8,0+80(%rbp) 391 movdqa %xmm7,%xmm8 392 psrld $20,%xmm8 393 pslld $32-20,%xmm7 394 pxor %xmm8,%xmm7 395 movdqa %xmm6,%xmm8 396 psrld $20,%xmm8 397 pslld $32-20,%xmm6 398 pxor %xmm8,%xmm6 399 movdqa %xmm5,%xmm8 400 psrld $20,%xmm8 401 pslld $32-20,%xmm5 402 pxor %xmm8,%xmm5 403 movdqa %xmm4,%xmm8 404 psrld $20,%xmm8 405 pslld $32-20,%xmm4 406 pxor %xmm8,%xmm4 407 movq 0+0+0(%rbp),%rax 408 movq %rax,%r15 409 mulq %r10 410 movq %rax,%r13 411 movq %rdx,%r14 412 movq 0+0+0(%rbp),%rax 413 mulq %r11 414 imulq %r12,%r15 415 addq %rax,%r14 416 adcq %rdx,%r15 417 movdqa L$rol8(%rip),%xmm8 418 paddd %xmm7,%xmm3 419 paddd %xmm6,%xmm2 420 paddd %xmm5,%xmm1 421 paddd %xmm4,%xmm0 422 pxor %xmm3,%xmm15 423 pxor %xmm2,%xmm14 424 pxor %xmm1,%xmm13 425 pxor %xmm0,%xmm12 426.byte 102,69,15,56,0,248 427.byte 102,69,15,56,0,240 428.byte 102,69,15,56,0,232 429.byte 102,69,15,56,0,224 430 movdqa 0+80(%rbp),%xmm8 431 paddd %xmm15,%xmm11 432 paddd %xmm14,%xmm10 433 paddd %xmm13,%xmm9 434 paddd %xmm12,%xmm8 435 pxor %xmm11,%xmm7 436 pxor %xmm10,%xmm6 437 movq 8+0+0(%rbp),%rax 438 movq %rax,%r9 439 mulq %r10 440 addq %rax,%r14 441 adcq $0,%rdx 442 movq %rdx,%r10 443 movq 8+0+0(%rbp),%rax 444 mulq %r11 445 addq %rax,%r15 446 adcq $0,%rdx 447 pxor %xmm9,%xmm5 448 pxor %xmm8,%xmm4 449 movdqa %xmm8,0+80(%rbp) 450 movdqa %xmm7,%xmm8 451 psrld $25,%xmm8 452 pslld $32-25,%xmm7 453 pxor %xmm8,%xmm7 454 movdqa %xmm6,%xmm8 455 psrld $25,%xmm8 456 pslld $32-25,%xmm6 457 pxor %xmm8,%xmm6 458 movdqa %xmm5,%xmm8 459 psrld $25,%xmm8 460 pslld $32-25,%xmm5 461 pxor %xmm8,%xmm5 462 movdqa %xmm4,%xmm8 463 psrld $25,%xmm8 464 pslld $32-25,%xmm4 465 pxor %xmm8,%xmm4 466 movdqa 0+80(%rbp),%xmm8 467 imulq %r12,%r9 468 addq %r10,%r15 469 adcq %rdx,%r9 470.byte 102,15,58,15,255,4 471.byte 102,69,15,58,15,219,8 472.byte 102,69,15,58,15,255,12 473.byte 102,15,58,15,246,4 474.byte 102,69,15,58,15,210,8 475.byte 102,69,15,58,15,246,12 476.byte 102,15,58,15,237,4 477.byte 102,69,15,58,15,201,8 478.byte 102,69,15,58,15,237,12 479.byte 102,15,58,15,228,4 480.byte 102,69,15,58,15,192,8 481.byte 102,69,15,58,15,228,12 482 movdqa %xmm8,0+80(%rbp) 483 movdqa L$rol16(%rip),%xmm8 484 paddd %xmm7,%xmm3 485 paddd %xmm6,%xmm2 486 paddd %xmm5,%xmm1 487 paddd %xmm4,%xmm0 488 pxor %xmm3,%xmm15 489 pxor %xmm2,%xmm14 490 movq %r13,%r10 491 movq %r14,%r11 492 movq %r15,%r12 493 andq $3,%r12 494 movq %r15,%r13 495 andq $-4,%r13 496 movq %r9,%r14 497 shrdq $2,%r9,%r15 498 shrq $2,%r9 499 addq %r13,%r15 500 adcq %r14,%r9 501 addq %r15,%r10 502 adcq %r9,%r11 503 adcq $0,%r12 504 pxor %xmm1,%xmm13 505 pxor %xmm0,%xmm12 506.byte 102,69,15,56,0,248 507.byte 102,69,15,56,0,240 508.byte 102,69,15,56,0,232 509.byte 102,69,15,56,0,224 510 movdqa 0+80(%rbp),%xmm8 511 paddd %xmm15,%xmm11 512 paddd %xmm14,%xmm10 513 paddd %xmm13,%xmm9 514 paddd %xmm12,%xmm8 515 pxor %xmm11,%xmm7 516 pxor %xmm10,%xmm6 517 pxor %xmm9,%xmm5 518 pxor %xmm8,%xmm4 519 movdqa %xmm8,0+80(%rbp) 520 movdqa %xmm7,%xmm8 521 psrld $20,%xmm8 522 pslld $32-20,%xmm7 523 pxor %xmm8,%xmm7 524 movdqa %xmm6,%xmm8 525 psrld $20,%xmm8 526 pslld $32-20,%xmm6 527 pxor %xmm8,%xmm6 528 movdqa %xmm5,%xmm8 529 psrld $20,%xmm8 530 pslld $32-20,%xmm5 531 pxor %xmm8,%xmm5 532 movdqa %xmm4,%xmm8 533 psrld $20,%xmm8 534 pslld $32-20,%xmm4 535 pxor %xmm8,%xmm4 536 movdqa L$rol8(%rip),%xmm8 537 paddd %xmm7,%xmm3 538 paddd %xmm6,%xmm2 539 paddd %xmm5,%xmm1 540 paddd %xmm4,%xmm0 541 pxor %xmm3,%xmm15 542 pxor %xmm2,%xmm14 543 pxor %xmm1,%xmm13 544 pxor %xmm0,%xmm12 545.byte 102,69,15,56,0,248 546.byte 102,69,15,56,0,240 547.byte 102,69,15,56,0,232 548.byte 102,69,15,56,0,224 549 movdqa 0+80(%rbp),%xmm8 550 paddd %xmm15,%xmm11 551 paddd %xmm14,%xmm10 552 paddd %xmm13,%xmm9 553 paddd %xmm12,%xmm8 554 pxor %xmm11,%xmm7 555 pxor %xmm10,%xmm6 556 pxor %xmm9,%xmm5 557 pxor %xmm8,%xmm4 558 movdqa %xmm8,0+80(%rbp) 559 movdqa %xmm7,%xmm8 560 psrld $25,%xmm8 561 pslld $32-25,%xmm7 562 pxor %xmm8,%xmm7 563 movdqa %xmm6,%xmm8 564 psrld $25,%xmm8 565 pslld $32-25,%xmm6 566 pxor %xmm8,%xmm6 567 movdqa %xmm5,%xmm8 568 psrld $25,%xmm8 569 pslld $32-25,%xmm5 570 pxor %xmm8,%xmm5 571 movdqa %xmm4,%xmm8 572 psrld $25,%xmm8 573 pslld $32-25,%xmm4 574 pxor %xmm8,%xmm4 575 movdqa 0+80(%rbp),%xmm8 576.byte 102,15,58,15,255,12 577.byte 102,69,15,58,15,219,8 578.byte 102,69,15,58,15,255,4 579.byte 102,15,58,15,246,12 580.byte 102,69,15,58,15,210,8 581.byte 102,69,15,58,15,246,4 582.byte 102,15,58,15,237,12 583.byte 102,69,15,58,15,201,8 584.byte 102,69,15,58,15,237,4 585.byte 102,15,58,15,228,12 586.byte 102,69,15,58,15,192,8 587.byte 102,69,15,58,15,228,4 588 589 decq %rcx 590 jge L$open_sse_main_loop_rounds 591 addq 0+0(%r8),%r10 592 adcq 8+0(%r8),%r11 593 adcq $1,%r12 594 movq 0+0+0(%rbp),%rax 595 movq %rax,%r15 596 mulq %r10 597 movq %rax,%r13 598 movq %rdx,%r14 599 movq 0+0+0(%rbp),%rax 600 mulq %r11 601 imulq %r12,%r15 602 addq %rax,%r14 603 adcq %rdx,%r15 604 movq 8+0+0(%rbp),%rax 605 movq %rax,%r9 606 mulq %r10 607 addq %rax,%r14 608 adcq $0,%rdx 609 movq %rdx,%r10 610 movq 8+0+0(%rbp),%rax 611 mulq %r11 612 addq %rax,%r15 613 adcq $0,%rdx 614 imulq %r12,%r9 615 addq %r10,%r15 616 adcq %rdx,%r9 617 movq %r13,%r10 618 movq %r14,%r11 619 movq %r15,%r12 620 andq $3,%r12 621 movq %r15,%r13 622 andq $-4,%r13 623 movq %r9,%r14 624 shrdq $2,%r9,%r15 625 shrq $2,%r9 626 addq %r13,%r15 627 adcq %r14,%r9 628 addq %r15,%r10 629 adcq %r9,%r11 630 adcq $0,%r12 631 632 leaq 16(%r8),%r8 633 cmpq $-6,%rcx 634 jg L$open_sse_main_loop_rounds 635 paddd L$chacha20_consts(%rip),%xmm3 636 paddd 0+48(%rbp),%xmm7 637 paddd 0+64(%rbp),%xmm11 638 paddd 0+144(%rbp),%xmm15 639 paddd L$chacha20_consts(%rip),%xmm2 640 paddd 0+48(%rbp),%xmm6 641 paddd 0+64(%rbp),%xmm10 642 paddd 0+128(%rbp),%xmm14 643 paddd L$chacha20_consts(%rip),%xmm1 644 paddd 0+48(%rbp),%xmm5 645 paddd 0+64(%rbp),%xmm9 646 paddd 0+112(%rbp),%xmm13 647 paddd L$chacha20_consts(%rip),%xmm0 648 paddd 0+48(%rbp),%xmm4 649 paddd 0+64(%rbp),%xmm8 650 paddd 0+96(%rbp),%xmm12 651 movdqa %xmm12,0+80(%rbp) 652 movdqu 0 + 0(%rsi),%xmm12 653 pxor %xmm3,%xmm12 654 movdqu %xmm12,0 + 0(%rdi) 655 movdqu 16 + 0(%rsi),%xmm12 656 pxor %xmm7,%xmm12 657 movdqu %xmm12,16 + 0(%rdi) 658 movdqu 32 + 0(%rsi),%xmm12 659 pxor %xmm11,%xmm12 660 movdqu %xmm12,32 + 0(%rdi) 661 movdqu 48 + 0(%rsi),%xmm12 662 pxor %xmm15,%xmm12 663 movdqu %xmm12,48 + 0(%rdi) 664 movdqu 0 + 64(%rsi),%xmm3 665 movdqu 16 + 64(%rsi),%xmm7 666 movdqu 32 + 64(%rsi),%xmm11 667 movdqu 48 + 64(%rsi),%xmm15 668 pxor %xmm3,%xmm2 669 pxor %xmm7,%xmm6 670 pxor %xmm11,%xmm10 671 pxor %xmm14,%xmm15 672 movdqu %xmm2,0 + 64(%rdi) 673 movdqu %xmm6,16 + 64(%rdi) 674 movdqu %xmm10,32 + 64(%rdi) 675 movdqu %xmm15,48 + 64(%rdi) 676 movdqu 0 + 128(%rsi),%xmm3 677 movdqu 16 + 128(%rsi),%xmm7 678 movdqu 32 + 128(%rsi),%xmm11 679 movdqu 48 + 128(%rsi),%xmm15 680 pxor %xmm3,%xmm1 681 pxor %xmm7,%xmm5 682 pxor %xmm11,%xmm9 683 pxor %xmm13,%xmm15 684 movdqu %xmm1,0 + 128(%rdi) 685 movdqu %xmm5,16 + 128(%rdi) 686 movdqu %xmm9,32 + 128(%rdi) 687 movdqu %xmm15,48 + 128(%rdi) 688 movdqu 0 + 192(%rsi),%xmm3 689 movdqu 16 + 192(%rsi),%xmm7 690 movdqu 32 + 192(%rsi),%xmm11 691 movdqu 48 + 192(%rsi),%xmm15 692 pxor %xmm3,%xmm0 693 pxor %xmm7,%xmm4 694 pxor %xmm11,%xmm8 695 pxor 0+80(%rbp),%xmm15 696 movdqu %xmm0,0 + 192(%rdi) 697 movdqu %xmm4,16 + 192(%rdi) 698 movdqu %xmm8,32 + 192(%rdi) 699 movdqu %xmm15,48 + 192(%rdi) 700 701 leaq 256(%rsi),%rsi 702 leaq 256(%rdi),%rdi 703 subq $256,%rbx 704 jmp L$open_sse_main_loop 705L$open_sse_tail: 706 707 testq %rbx,%rbx 708 jz L$open_sse_finalize 709 cmpq $192,%rbx 710 ja L$open_sse_tail_256 711 cmpq $128,%rbx 712 ja L$open_sse_tail_192 713 cmpq $64,%rbx 714 ja L$open_sse_tail_128 715 movdqa L$chacha20_consts(%rip),%xmm0 716 movdqa 0+48(%rbp),%xmm4 717 movdqa 0+64(%rbp),%xmm8 718 movdqa 0+96(%rbp),%xmm12 719 paddd L$sse_inc(%rip),%xmm12 720 movdqa %xmm12,0+96(%rbp) 721 722 xorq %r8,%r8 723 movq %rbx,%rcx 724 cmpq $16,%rcx 725 jb L$open_sse_tail_64_rounds 726L$open_sse_tail_64_rounds_and_x1hash: 727 addq 0+0(%rsi,%r8,1),%r10 728 adcq 8+0(%rsi,%r8,1),%r11 729 adcq $1,%r12 730 movq 0+0+0(%rbp),%rax 731 movq %rax,%r15 732 mulq %r10 733 movq %rax,%r13 734 movq %rdx,%r14 735 movq 0+0+0(%rbp),%rax 736 mulq %r11 737 imulq %r12,%r15 738 addq %rax,%r14 739 adcq %rdx,%r15 740 movq 8+0+0(%rbp),%rax 741 movq %rax,%r9 742 mulq %r10 743 addq %rax,%r14 744 adcq $0,%rdx 745 movq %rdx,%r10 746 movq 8+0+0(%rbp),%rax 747 mulq %r11 748 addq %rax,%r15 749 adcq $0,%rdx 750 imulq %r12,%r9 751 addq %r10,%r15 752 adcq %rdx,%r9 753 movq %r13,%r10 754 movq %r14,%r11 755 movq %r15,%r12 756 andq $3,%r12 757 movq %r15,%r13 758 andq $-4,%r13 759 movq %r9,%r14 760 shrdq $2,%r9,%r15 761 shrq $2,%r9 762 addq %r13,%r15 763 adcq %r14,%r9 764 addq %r15,%r10 765 adcq %r9,%r11 766 adcq $0,%r12 767 768 subq $16,%rcx 769L$open_sse_tail_64_rounds: 770 addq $16,%r8 771 paddd %xmm4,%xmm0 772 pxor %xmm0,%xmm12 773 pshufb L$rol16(%rip),%xmm12 774 paddd %xmm12,%xmm8 775 pxor %xmm8,%xmm4 776 movdqa %xmm4,%xmm3 777 pslld $12,%xmm3 778 psrld $20,%xmm4 779 pxor %xmm3,%xmm4 780 paddd %xmm4,%xmm0 781 pxor %xmm0,%xmm12 782 pshufb L$rol8(%rip),%xmm12 783 paddd %xmm12,%xmm8 784 pxor %xmm8,%xmm4 785 movdqa %xmm4,%xmm3 786 pslld $7,%xmm3 787 psrld $25,%xmm4 788 pxor %xmm3,%xmm4 789.byte 102,15,58,15,228,4 790.byte 102,69,15,58,15,192,8 791.byte 102,69,15,58,15,228,12 792 paddd %xmm4,%xmm0 793 pxor %xmm0,%xmm12 794 pshufb L$rol16(%rip),%xmm12 795 paddd %xmm12,%xmm8 796 pxor %xmm8,%xmm4 797 movdqa %xmm4,%xmm3 798 pslld $12,%xmm3 799 psrld $20,%xmm4 800 pxor %xmm3,%xmm4 801 paddd %xmm4,%xmm0 802 pxor %xmm0,%xmm12 803 pshufb L$rol8(%rip),%xmm12 804 paddd %xmm12,%xmm8 805 pxor %xmm8,%xmm4 806 movdqa %xmm4,%xmm3 807 pslld $7,%xmm3 808 psrld $25,%xmm4 809 pxor %xmm3,%xmm4 810.byte 102,15,58,15,228,12 811.byte 102,69,15,58,15,192,8 812.byte 102,69,15,58,15,228,4 813 814 cmpq $16,%rcx 815 jae L$open_sse_tail_64_rounds_and_x1hash 816 cmpq $160,%r8 817 jne L$open_sse_tail_64_rounds 818 paddd L$chacha20_consts(%rip),%xmm0 819 paddd 0+48(%rbp),%xmm4 820 paddd 0+64(%rbp),%xmm8 821 paddd 0+96(%rbp),%xmm12 822 823 jmp L$open_sse_tail_64_dec_loop 824 825L$open_sse_tail_128: 826 movdqa L$chacha20_consts(%rip),%xmm0 827 movdqa 0+48(%rbp),%xmm4 828 movdqa 0+64(%rbp),%xmm8 829 movdqa %xmm0,%xmm1 830 movdqa %xmm4,%xmm5 831 movdqa %xmm8,%xmm9 832 movdqa 0+96(%rbp),%xmm13 833 paddd L$sse_inc(%rip),%xmm13 834 movdqa %xmm13,%xmm12 835 paddd L$sse_inc(%rip),%xmm12 836 movdqa %xmm12,0+96(%rbp) 837 movdqa %xmm13,0+112(%rbp) 838 839 movq %rbx,%rcx 840 andq $-16,%rcx 841 xorq %r8,%r8 842L$open_sse_tail_128_rounds_and_x1hash: 843 addq 0+0(%rsi,%r8,1),%r10 844 adcq 8+0(%rsi,%r8,1),%r11 845 adcq $1,%r12 846 movq 0+0+0(%rbp),%rax 847 movq %rax,%r15 848 mulq %r10 849 movq %rax,%r13 850 movq %rdx,%r14 851 movq 0+0+0(%rbp),%rax 852 mulq %r11 853 imulq %r12,%r15 854 addq %rax,%r14 855 adcq %rdx,%r15 856 movq 8+0+0(%rbp),%rax 857 movq %rax,%r9 858 mulq %r10 859 addq %rax,%r14 860 adcq $0,%rdx 861 movq %rdx,%r10 862 movq 8+0+0(%rbp),%rax 863 mulq %r11 864 addq %rax,%r15 865 adcq $0,%rdx 866 imulq %r12,%r9 867 addq %r10,%r15 868 adcq %rdx,%r9 869 movq %r13,%r10 870 movq %r14,%r11 871 movq %r15,%r12 872 andq $3,%r12 873 movq %r15,%r13 874 andq $-4,%r13 875 movq %r9,%r14 876 shrdq $2,%r9,%r15 877 shrq $2,%r9 878 addq %r13,%r15 879 adcq %r14,%r9 880 addq %r15,%r10 881 adcq %r9,%r11 882 adcq $0,%r12 883 884L$open_sse_tail_128_rounds: 885 addq $16,%r8 886 paddd %xmm4,%xmm0 887 pxor %xmm0,%xmm12 888 pshufb L$rol16(%rip),%xmm12 889 paddd %xmm12,%xmm8 890 pxor %xmm8,%xmm4 891 movdqa %xmm4,%xmm3 892 pslld $12,%xmm3 893 psrld $20,%xmm4 894 pxor %xmm3,%xmm4 895 paddd %xmm4,%xmm0 896 pxor %xmm0,%xmm12 897 pshufb L$rol8(%rip),%xmm12 898 paddd %xmm12,%xmm8 899 pxor %xmm8,%xmm4 900 movdqa %xmm4,%xmm3 901 pslld $7,%xmm3 902 psrld $25,%xmm4 903 pxor %xmm3,%xmm4 904.byte 102,15,58,15,228,4 905.byte 102,69,15,58,15,192,8 906.byte 102,69,15,58,15,228,12 907 paddd %xmm5,%xmm1 908 pxor %xmm1,%xmm13 909 pshufb L$rol16(%rip),%xmm13 910 paddd %xmm13,%xmm9 911 pxor %xmm9,%xmm5 912 movdqa %xmm5,%xmm3 913 pslld $12,%xmm3 914 psrld $20,%xmm5 915 pxor %xmm3,%xmm5 916 paddd %xmm5,%xmm1 917 pxor %xmm1,%xmm13 918 pshufb L$rol8(%rip),%xmm13 919 paddd %xmm13,%xmm9 920 pxor %xmm9,%xmm5 921 movdqa %xmm5,%xmm3 922 pslld $7,%xmm3 923 psrld $25,%xmm5 924 pxor %xmm3,%xmm5 925.byte 102,15,58,15,237,4 926.byte 102,69,15,58,15,201,8 927.byte 102,69,15,58,15,237,12 928 paddd %xmm4,%xmm0 929 pxor %xmm0,%xmm12 930 pshufb L$rol16(%rip),%xmm12 931 paddd %xmm12,%xmm8 932 pxor %xmm8,%xmm4 933 movdqa %xmm4,%xmm3 934 pslld $12,%xmm3 935 psrld $20,%xmm4 936 pxor %xmm3,%xmm4 937 paddd %xmm4,%xmm0 938 pxor %xmm0,%xmm12 939 pshufb L$rol8(%rip),%xmm12 940 paddd %xmm12,%xmm8 941 pxor %xmm8,%xmm4 942 movdqa %xmm4,%xmm3 943 pslld $7,%xmm3 944 psrld $25,%xmm4 945 pxor %xmm3,%xmm4 946.byte 102,15,58,15,228,12 947.byte 102,69,15,58,15,192,8 948.byte 102,69,15,58,15,228,4 949 paddd %xmm5,%xmm1 950 pxor %xmm1,%xmm13 951 pshufb L$rol16(%rip),%xmm13 952 paddd %xmm13,%xmm9 953 pxor %xmm9,%xmm5 954 movdqa %xmm5,%xmm3 955 pslld $12,%xmm3 956 psrld $20,%xmm5 957 pxor %xmm3,%xmm5 958 paddd %xmm5,%xmm1 959 pxor %xmm1,%xmm13 960 pshufb L$rol8(%rip),%xmm13 961 paddd %xmm13,%xmm9 962 pxor %xmm9,%xmm5 963 movdqa %xmm5,%xmm3 964 pslld $7,%xmm3 965 psrld $25,%xmm5 966 pxor %xmm3,%xmm5 967.byte 102,15,58,15,237,12 968.byte 102,69,15,58,15,201,8 969.byte 102,69,15,58,15,237,4 970 971 cmpq %rcx,%r8 972 jb L$open_sse_tail_128_rounds_and_x1hash 973 cmpq $160,%r8 974 jne L$open_sse_tail_128_rounds 975 paddd L$chacha20_consts(%rip),%xmm1 976 paddd 0+48(%rbp),%xmm5 977 paddd 0+64(%rbp),%xmm9 978 paddd 0+112(%rbp),%xmm13 979 paddd L$chacha20_consts(%rip),%xmm0 980 paddd 0+48(%rbp),%xmm4 981 paddd 0+64(%rbp),%xmm8 982 paddd 0+96(%rbp),%xmm12 983 movdqu 0 + 0(%rsi),%xmm3 984 movdqu 16 + 0(%rsi),%xmm7 985 movdqu 32 + 0(%rsi),%xmm11 986 movdqu 48 + 0(%rsi),%xmm15 987 pxor %xmm3,%xmm1 988 pxor %xmm7,%xmm5 989 pxor %xmm11,%xmm9 990 pxor %xmm13,%xmm15 991 movdqu %xmm1,0 + 0(%rdi) 992 movdqu %xmm5,16 + 0(%rdi) 993 movdqu %xmm9,32 + 0(%rdi) 994 movdqu %xmm15,48 + 0(%rdi) 995 996 subq $64,%rbx 997 leaq 64(%rsi),%rsi 998 leaq 64(%rdi),%rdi 999 jmp L$open_sse_tail_64_dec_loop 1000 1001L$open_sse_tail_192: 1002 movdqa L$chacha20_consts(%rip),%xmm0 1003 movdqa 0+48(%rbp),%xmm4 1004 movdqa 0+64(%rbp),%xmm8 1005 movdqa %xmm0,%xmm1 1006 movdqa %xmm4,%xmm5 1007 movdqa %xmm8,%xmm9 1008 movdqa %xmm0,%xmm2 1009 movdqa %xmm4,%xmm6 1010 movdqa %xmm8,%xmm10 1011 movdqa 0+96(%rbp),%xmm14 1012 paddd L$sse_inc(%rip),%xmm14 1013 movdqa %xmm14,%xmm13 1014 paddd L$sse_inc(%rip),%xmm13 1015 movdqa %xmm13,%xmm12 1016 paddd L$sse_inc(%rip),%xmm12 1017 movdqa %xmm12,0+96(%rbp) 1018 movdqa %xmm13,0+112(%rbp) 1019 movdqa %xmm14,0+128(%rbp) 1020 1021 movq %rbx,%rcx 1022 movq $160,%r8 1023 cmpq $160,%rcx 1024 cmovgq %r8,%rcx 1025 andq $-16,%rcx 1026 xorq %r8,%r8 1027L$open_sse_tail_192_rounds_and_x1hash: 1028 addq 0+0(%rsi,%r8,1),%r10 1029 adcq 8+0(%rsi,%r8,1),%r11 1030 adcq $1,%r12 1031 movq 0+0+0(%rbp),%rax 1032 movq %rax,%r15 1033 mulq %r10 1034 movq %rax,%r13 1035 movq %rdx,%r14 1036 movq 0+0+0(%rbp),%rax 1037 mulq %r11 1038 imulq %r12,%r15 1039 addq %rax,%r14 1040 adcq %rdx,%r15 1041 movq 8+0+0(%rbp),%rax 1042 movq %rax,%r9 1043 mulq %r10 1044 addq %rax,%r14 1045 adcq $0,%rdx 1046 movq %rdx,%r10 1047 movq 8+0+0(%rbp),%rax 1048 mulq %r11 1049 addq %rax,%r15 1050 adcq $0,%rdx 1051 imulq %r12,%r9 1052 addq %r10,%r15 1053 adcq %rdx,%r9 1054 movq %r13,%r10 1055 movq %r14,%r11 1056 movq %r15,%r12 1057 andq $3,%r12 1058 movq %r15,%r13 1059 andq $-4,%r13 1060 movq %r9,%r14 1061 shrdq $2,%r9,%r15 1062 shrq $2,%r9 1063 addq %r13,%r15 1064 adcq %r14,%r9 1065 addq %r15,%r10 1066 adcq %r9,%r11 1067 adcq $0,%r12 1068 1069L$open_sse_tail_192_rounds: 1070 addq $16,%r8 1071 paddd %xmm4,%xmm0 1072 pxor %xmm0,%xmm12 1073 pshufb L$rol16(%rip),%xmm12 1074 paddd %xmm12,%xmm8 1075 pxor %xmm8,%xmm4 1076 movdqa %xmm4,%xmm3 1077 pslld $12,%xmm3 1078 psrld $20,%xmm4 1079 pxor %xmm3,%xmm4 1080 paddd %xmm4,%xmm0 1081 pxor %xmm0,%xmm12 1082 pshufb L$rol8(%rip),%xmm12 1083 paddd %xmm12,%xmm8 1084 pxor %xmm8,%xmm4 1085 movdqa %xmm4,%xmm3 1086 pslld $7,%xmm3 1087 psrld $25,%xmm4 1088 pxor %xmm3,%xmm4 1089.byte 102,15,58,15,228,4 1090.byte 102,69,15,58,15,192,8 1091.byte 102,69,15,58,15,228,12 1092 paddd %xmm5,%xmm1 1093 pxor %xmm1,%xmm13 1094 pshufb L$rol16(%rip),%xmm13 1095 paddd %xmm13,%xmm9 1096 pxor %xmm9,%xmm5 1097 movdqa %xmm5,%xmm3 1098 pslld $12,%xmm3 1099 psrld $20,%xmm5 1100 pxor %xmm3,%xmm5 1101 paddd %xmm5,%xmm1 1102 pxor %xmm1,%xmm13 1103 pshufb L$rol8(%rip),%xmm13 1104 paddd %xmm13,%xmm9 1105 pxor %xmm9,%xmm5 1106 movdqa %xmm5,%xmm3 1107 pslld $7,%xmm3 1108 psrld $25,%xmm5 1109 pxor %xmm3,%xmm5 1110.byte 102,15,58,15,237,4 1111.byte 102,69,15,58,15,201,8 1112.byte 102,69,15,58,15,237,12 1113 paddd %xmm6,%xmm2 1114 pxor %xmm2,%xmm14 1115 pshufb L$rol16(%rip),%xmm14 1116 paddd %xmm14,%xmm10 1117 pxor %xmm10,%xmm6 1118 movdqa %xmm6,%xmm3 1119 pslld $12,%xmm3 1120 psrld $20,%xmm6 1121 pxor %xmm3,%xmm6 1122 paddd %xmm6,%xmm2 1123 pxor %xmm2,%xmm14 1124 pshufb L$rol8(%rip),%xmm14 1125 paddd %xmm14,%xmm10 1126 pxor %xmm10,%xmm6 1127 movdqa %xmm6,%xmm3 1128 pslld $7,%xmm3 1129 psrld $25,%xmm6 1130 pxor %xmm3,%xmm6 1131.byte 102,15,58,15,246,4 1132.byte 102,69,15,58,15,210,8 1133.byte 102,69,15,58,15,246,12 1134 paddd %xmm4,%xmm0 1135 pxor %xmm0,%xmm12 1136 pshufb L$rol16(%rip),%xmm12 1137 paddd %xmm12,%xmm8 1138 pxor %xmm8,%xmm4 1139 movdqa %xmm4,%xmm3 1140 pslld $12,%xmm3 1141 psrld $20,%xmm4 1142 pxor %xmm3,%xmm4 1143 paddd %xmm4,%xmm0 1144 pxor %xmm0,%xmm12 1145 pshufb L$rol8(%rip),%xmm12 1146 paddd %xmm12,%xmm8 1147 pxor %xmm8,%xmm4 1148 movdqa %xmm4,%xmm3 1149 pslld $7,%xmm3 1150 psrld $25,%xmm4 1151 pxor %xmm3,%xmm4 1152.byte 102,15,58,15,228,12 1153.byte 102,69,15,58,15,192,8 1154.byte 102,69,15,58,15,228,4 1155 paddd %xmm5,%xmm1 1156 pxor %xmm1,%xmm13 1157 pshufb L$rol16(%rip),%xmm13 1158 paddd %xmm13,%xmm9 1159 pxor %xmm9,%xmm5 1160 movdqa %xmm5,%xmm3 1161 pslld $12,%xmm3 1162 psrld $20,%xmm5 1163 pxor %xmm3,%xmm5 1164 paddd %xmm5,%xmm1 1165 pxor %xmm1,%xmm13 1166 pshufb L$rol8(%rip),%xmm13 1167 paddd %xmm13,%xmm9 1168 pxor %xmm9,%xmm5 1169 movdqa %xmm5,%xmm3 1170 pslld $7,%xmm3 1171 psrld $25,%xmm5 1172 pxor %xmm3,%xmm5 1173.byte 102,15,58,15,237,12 1174.byte 102,69,15,58,15,201,8 1175.byte 102,69,15,58,15,237,4 1176 paddd %xmm6,%xmm2 1177 pxor %xmm2,%xmm14 1178 pshufb L$rol16(%rip),%xmm14 1179 paddd %xmm14,%xmm10 1180 pxor %xmm10,%xmm6 1181 movdqa %xmm6,%xmm3 1182 pslld $12,%xmm3 1183 psrld $20,%xmm6 1184 pxor %xmm3,%xmm6 1185 paddd %xmm6,%xmm2 1186 pxor %xmm2,%xmm14 1187 pshufb L$rol8(%rip),%xmm14 1188 paddd %xmm14,%xmm10 1189 pxor %xmm10,%xmm6 1190 movdqa %xmm6,%xmm3 1191 pslld $7,%xmm3 1192 psrld $25,%xmm6 1193 pxor %xmm3,%xmm6 1194.byte 102,15,58,15,246,12 1195.byte 102,69,15,58,15,210,8 1196.byte 102,69,15,58,15,246,4 1197 1198 cmpq %rcx,%r8 1199 jb L$open_sse_tail_192_rounds_and_x1hash 1200 cmpq $160,%r8 1201 jne L$open_sse_tail_192_rounds 1202 cmpq $176,%rbx 1203 jb L$open_sse_tail_192_finish 1204 addq 0+160(%rsi),%r10 1205 adcq 8+160(%rsi),%r11 1206 adcq $1,%r12 1207 movq 0+0+0(%rbp),%rax 1208 movq %rax,%r15 1209 mulq %r10 1210 movq %rax,%r13 1211 movq %rdx,%r14 1212 movq 0+0+0(%rbp),%rax 1213 mulq %r11 1214 imulq %r12,%r15 1215 addq %rax,%r14 1216 adcq %rdx,%r15 1217 movq 8+0+0(%rbp),%rax 1218 movq %rax,%r9 1219 mulq %r10 1220 addq %rax,%r14 1221 adcq $0,%rdx 1222 movq %rdx,%r10 1223 movq 8+0+0(%rbp),%rax 1224 mulq %r11 1225 addq %rax,%r15 1226 adcq $0,%rdx 1227 imulq %r12,%r9 1228 addq %r10,%r15 1229 adcq %rdx,%r9 1230 movq %r13,%r10 1231 movq %r14,%r11 1232 movq %r15,%r12 1233 andq $3,%r12 1234 movq %r15,%r13 1235 andq $-4,%r13 1236 movq %r9,%r14 1237 shrdq $2,%r9,%r15 1238 shrq $2,%r9 1239 addq %r13,%r15 1240 adcq %r14,%r9 1241 addq %r15,%r10 1242 adcq %r9,%r11 1243 adcq $0,%r12 1244 1245 cmpq $192,%rbx 1246 jb L$open_sse_tail_192_finish 1247 addq 0+176(%rsi),%r10 1248 adcq 8+176(%rsi),%r11 1249 adcq $1,%r12 1250 movq 0+0+0(%rbp),%rax 1251 movq %rax,%r15 1252 mulq %r10 1253 movq %rax,%r13 1254 movq %rdx,%r14 1255 movq 0+0+0(%rbp),%rax 1256 mulq %r11 1257 imulq %r12,%r15 1258 addq %rax,%r14 1259 adcq %rdx,%r15 1260 movq 8+0+0(%rbp),%rax 1261 movq %rax,%r9 1262 mulq %r10 1263 addq %rax,%r14 1264 adcq $0,%rdx 1265 movq %rdx,%r10 1266 movq 8+0+0(%rbp),%rax 1267 mulq %r11 1268 addq %rax,%r15 1269 adcq $0,%rdx 1270 imulq %r12,%r9 1271 addq %r10,%r15 1272 adcq %rdx,%r9 1273 movq %r13,%r10 1274 movq %r14,%r11 1275 movq %r15,%r12 1276 andq $3,%r12 1277 movq %r15,%r13 1278 andq $-4,%r13 1279 movq %r9,%r14 1280 shrdq $2,%r9,%r15 1281 shrq $2,%r9 1282 addq %r13,%r15 1283 adcq %r14,%r9 1284 addq %r15,%r10 1285 adcq %r9,%r11 1286 adcq $0,%r12 1287 1288L$open_sse_tail_192_finish: 1289 paddd L$chacha20_consts(%rip),%xmm2 1290 paddd 0+48(%rbp),%xmm6 1291 paddd 0+64(%rbp),%xmm10 1292 paddd 0+128(%rbp),%xmm14 1293 paddd L$chacha20_consts(%rip),%xmm1 1294 paddd 0+48(%rbp),%xmm5 1295 paddd 0+64(%rbp),%xmm9 1296 paddd 0+112(%rbp),%xmm13 1297 paddd L$chacha20_consts(%rip),%xmm0 1298 paddd 0+48(%rbp),%xmm4 1299 paddd 0+64(%rbp),%xmm8 1300 paddd 0+96(%rbp),%xmm12 1301 movdqu 0 + 0(%rsi),%xmm3 1302 movdqu 16 + 0(%rsi),%xmm7 1303 movdqu 32 + 0(%rsi),%xmm11 1304 movdqu 48 + 0(%rsi),%xmm15 1305 pxor %xmm3,%xmm2 1306 pxor %xmm7,%xmm6 1307 pxor %xmm11,%xmm10 1308 pxor %xmm14,%xmm15 1309 movdqu %xmm2,0 + 0(%rdi) 1310 movdqu %xmm6,16 + 0(%rdi) 1311 movdqu %xmm10,32 + 0(%rdi) 1312 movdqu %xmm15,48 + 0(%rdi) 1313 movdqu 0 + 64(%rsi),%xmm3 1314 movdqu 16 + 64(%rsi),%xmm7 1315 movdqu 32 + 64(%rsi),%xmm11 1316 movdqu 48 + 64(%rsi),%xmm15 1317 pxor %xmm3,%xmm1 1318 pxor %xmm7,%xmm5 1319 pxor %xmm11,%xmm9 1320 pxor %xmm13,%xmm15 1321 movdqu %xmm1,0 + 64(%rdi) 1322 movdqu %xmm5,16 + 64(%rdi) 1323 movdqu %xmm9,32 + 64(%rdi) 1324 movdqu %xmm15,48 + 64(%rdi) 1325 1326 subq $128,%rbx 1327 leaq 128(%rsi),%rsi 1328 leaq 128(%rdi),%rdi 1329 jmp L$open_sse_tail_64_dec_loop 1330 1331L$open_sse_tail_256: 1332 movdqa L$chacha20_consts(%rip),%xmm0 1333 movdqa 0+48(%rbp),%xmm4 1334 movdqa 0+64(%rbp),%xmm8 1335 movdqa %xmm0,%xmm1 1336 movdqa %xmm4,%xmm5 1337 movdqa %xmm8,%xmm9 1338 movdqa %xmm0,%xmm2 1339 movdqa %xmm4,%xmm6 1340 movdqa %xmm8,%xmm10 1341 movdqa %xmm0,%xmm3 1342 movdqa %xmm4,%xmm7 1343 movdqa %xmm8,%xmm11 1344 movdqa 0+96(%rbp),%xmm15 1345 paddd L$sse_inc(%rip),%xmm15 1346 movdqa %xmm15,%xmm14 1347 paddd L$sse_inc(%rip),%xmm14 1348 movdqa %xmm14,%xmm13 1349 paddd L$sse_inc(%rip),%xmm13 1350 movdqa %xmm13,%xmm12 1351 paddd L$sse_inc(%rip),%xmm12 1352 movdqa %xmm12,0+96(%rbp) 1353 movdqa %xmm13,0+112(%rbp) 1354 movdqa %xmm14,0+128(%rbp) 1355 movdqa %xmm15,0+144(%rbp) 1356 1357 xorq %r8,%r8 1358L$open_sse_tail_256_rounds_and_x1hash: 1359 addq 0+0(%rsi,%r8,1),%r10 1360 adcq 8+0(%rsi,%r8,1),%r11 1361 adcq $1,%r12 1362 movdqa %xmm11,0+80(%rbp) 1363 paddd %xmm4,%xmm0 1364 pxor %xmm0,%xmm12 1365 pshufb L$rol16(%rip),%xmm12 1366 paddd %xmm12,%xmm8 1367 pxor %xmm8,%xmm4 1368 movdqa %xmm4,%xmm11 1369 pslld $12,%xmm11 1370 psrld $20,%xmm4 1371 pxor %xmm11,%xmm4 1372 paddd %xmm4,%xmm0 1373 pxor %xmm0,%xmm12 1374 pshufb L$rol8(%rip),%xmm12 1375 paddd %xmm12,%xmm8 1376 pxor %xmm8,%xmm4 1377 movdqa %xmm4,%xmm11 1378 pslld $7,%xmm11 1379 psrld $25,%xmm4 1380 pxor %xmm11,%xmm4 1381.byte 102,15,58,15,228,4 1382.byte 102,69,15,58,15,192,8 1383.byte 102,69,15,58,15,228,12 1384 paddd %xmm5,%xmm1 1385 pxor %xmm1,%xmm13 1386 pshufb L$rol16(%rip),%xmm13 1387 paddd %xmm13,%xmm9 1388 pxor %xmm9,%xmm5 1389 movdqa %xmm5,%xmm11 1390 pslld $12,%xmm11 1391 psrld $20,%xmm5 1392 pxor %xmm11,%xmm5 1393 paddd %xmm5,%xmm1 1394 pxor %xmm1,%xmm13 1395 pshufb L$rol8(%rip),%xmm13 1396 paddd %xmm13,%xmm9 1397 pxor %xmm9,%xmm5 1398 movdqa %xmm5,%xmm11 1399 pslld $7,%xmm11 1400 psrld $25,%xmm5 1401 pxor %xmm11,%xmm5 1402.byte 102,15,58,15,237,4 1403.byte 102,69,15,58,15,201,8 1404.byte 102,69,15,58,15,237,12 1405 paddd %xmm6,%xmm2 1406 pxor %xmm2,%xmm14 1407 pshufb L$rol16(%rip),%xmm14 1408 paddd %xmm14,%xmm10 1409 pxor %xmm10,%xmm6 1410 movdqa %xmm6,%xmm11 1411 pslld $12,%xmm11 1412 psrld $20,%xmm6 1413 pxor %xmm11,%xmm6 1414 paddd %xmm6,%xmm2 1415 pxor %xmm2,%xmm14 1416 pshufb L$rol8(%rip),%xmm14 1417 paddd %xmm14,%xmm10 1418 pxor %xmm10,%xmm6 1419 movdqa %xmm6,%xmm11 1420 pslld $7,%xmm11 1421 psrld $25,%xmm6 1422 pxor %xmm11,%xmm6 1423.byte 102,15,58,15,246,4 1424.byte 102,69,15,58,15,210,8 1425.byte 102,69,15,58,15,246,12 1426 movdqa 0+80(%rbp),%xmm11 1427 movq 0+0+0(%rbp),%rax 1428 movq %rax,%r15 1429 mulq %r10 1430 movq %rax,%r13 1431 movq %rdx,%r14 1432 movq 0+0+0(%rbp),%rax 1433 mulq %r11 1434 imulq %r12,%r15 1435 addq %rax,%r14 1436 adcq %rdx,%r15 1437 movdqa %xmm9,0+80(%rbp) 1438 paddd %xmm7,%xmm3 1439 pxor %xmm3,%xmm15 1440 pshufb L$rol16(%rip),%xmm15 1441 paddd %xmm15,%xmm11 1442 pxor %xmm11,%xmm7 1443 movdqa %xmm7,%xmm9 1444 pslld $12,%xmm9 1445 psrld $20,%xmm7 1446 pxor %xmm9,%xmm7 1447 paddd %xmm7,%xmm3 1448 pxor %xmm3,%xmm15 1449 pshufb L$rol8(%rip),%xmm15 1450 paddd %xmm15,%xmm11 1451 pxor %xmm11,%xmm7 1452 movdqa %xmm7,%xmm9 1453 pslld $7,%xmm9 1454 psrld $25,%xmm7 1455 pxor %xmm9,%xmm7 1456.byte 102,15,58,15,255,4 1457.byte 102,69,15,58,15,219,8 1458.byte 102,69,15,58,15,255,12 1459 movdqa 0+80(%rbp),%xmm9 1460 movq 8+0+0(%rbp),%rax 1461 movq %rax,%r9 1462 mulq %r10 1463 addq %rax,%r14 1464 adcq $0,%rdx 1465 movq %rdx,%r10 1466 movq 8+0+0(%rbp),%rax 1467 mulq %r11 1468 addq %rax,%r15 1469 adcq $0,%rdx 1470 movdqa %xmm11,0+80(%rbp) 1471 paddd %xmm4,%xmm0 1472 pxor %xmm0,%xmm12 1473 pshufb L$rol16(%rip),%xmm12 1474 paddd %xmm12,%xmm8 1475 pxor %xmm8,%xmm4 1476 movdqa %xmm4,%xmm11 1477 pslld $12,%xmm11 1478 psrld $20,%xmm4 1479 pxor %xmm11,%xmm4 1480 paddd %xmm4,%xmm0 1481 pxor %xmm0,%xmm12 1482 pshufb L$rol8(%rip),%xmm12 1483 paddd %xmm12,%xmm8 1484 pxor %xmm8,%xmm4 1485 movdqa %xmm4,%xmm11 1486 pslld $7,%xmm11 1487 psrld $25,%xmm4 1488 pxor %xmm11,%xmm4 1489.byte 102,15,58,15,228,12 1490.byte 102,69,15,58,15,192,8 1491.byte 102,69,15,58,15,228,4 1492 paddd %xmm5,%xmm1 1493 pxor %xmm1,%xmm13 1494 pshufb L$rol16(%rip),%xmm13 1495 paddd %xmm13,%xmm9 1496 pxor %xmm9,%xmm5 1497 movdqa %xmm5,%xmm11 1498 pslld $12,%xmm11 1499 psrld $20,%xmm5 1500 pxor %xmm11,%xmm5 1501 paddd %xmm5,%xmm1 1502 pxor %xmm1,%xmm13 1503 pshufb L$rol8(%rip),%xmm13 1504 paddd %xmm13,%xmm9 1505 pxor %xmm9,%xmm5 1506 movdqa %xmm5,%xmm11 1507 pslld $7,%xmm11 1508 psrld $25,%xmm5 1509 pxor %xmm11,%xmm5 1510.byte 102,15,58,15,237,12 1511.byte 102,69,15,58,15,201,8 1512.byte 102,69,15,58,15,237,4 1513 imulq %r12,%r9 1514 addq %r10,%r15 1515 adcq %rdx,%r9 1516 paddd %xmm6,%xmm2 1517 pxor %xmm2,%xmm14 1518 pshufb L$rol16(%rip),%xmm14 1519 paddd %xmm14,%xmm10 1520 pxor %xmm10,%xmm6 1521 movdqa %xmm6,%xmm11 1522 pslld $12,%xmm11 1523 psrld $20,%xmm6 1524 pxor %xmm11,%xmm6 1525 paddd %xmm6,%xmm2 1526 pxor %xmm2,%xmm14 1527 pshufb L$rol8(%rip),%xmm14 1528 paddd %xmm14,%xmm10 1529 pxor %xmm10,%xmm6 1530 movdqa %xmm6,%xmm11 1531 pslld $7,%xmm11 1532 psrld $25,%xmm6 1533 pxor %xmm11,%xmm6 1534.byte 102,15,58,15,246,12 1535.byte 102,69,15,58,15,210,8 1536.byte 102,69,15,58,15,246,4 1537 movdqa 0+80(%rbp),%xmm11 1538 movq %r13,%r10 1539 movq %r14,%r11 1540 movq %r15,%r12 1541 andq $3,%r12 1542 movq %r15,%r13 1543 andq $-4,%r13 1544 movq %r9,%r14 1545 shrdq $2,%r9,%r15 1546 shrq $2,%r9 1547 addq %r13,%r15 1548 adcq %r14,%r9 1549 addq %r15,%r10 1550 adcq %r9,%r11 1551 adcq $0,%r12 1552 movdqa %xmm9,0+80(%rbp) 1553 paddd %xmm7,%xmm3 1554 pxor %xmm3,%xmm15 1555 pshufb L$rol16(%rip),%xmm15 1556 paddd %xmm15,%xmm11 1557 pxor %xmm11,%xmm7 1558 movdqa %xmm7,%xmm9 1559 pslld $12,%xmm9 1560 psrld $20,%xmm7 1561 pxor %xmm9,%xmm7 1562 paddd %xmm7,%xmm3 1563 pxor %xmm3,%xmm15 1564 pshufb L$rol8(%rip),%xmm15 1565 paddd %xmm15,%xmm11 1566 pxor %xmm11,%xmm7 1567 movdqa %xmm7,%xmm9 1568 pslld $7,%xmm9 1569 psrld $25,%xmm7 1570 pxor %xmm9,%xmm7 1571.byte 102,15,58,15,255,12 1572.byte 102,69,15,58,15,219,8 1573.byte 102,69,15,58,15,255,4 1574 movdqa 0+80(%rbp),%xmm9 1575 1576 addq $16,%r8 1577 cmpq $160,%r8 1578 jb L$open_sse_tail_256_rounds_and_x1hash 1579 1580 movq %rbx,%rcx 1581 andq $-16,%rcx 1582L$open_sse_tail_256_hash: 1583 addq 0+0(%rsi,%r8,1),%r10 1584 adcq 8+0(%rsi,%r8,1),%r11 1585 adcq $1,%r12 1586 movq 0+0+0(%rbp),%rax 1587 movq %rax,%r15 1588 mulq %r10 1589 movq %rax,%r13 1590 movq %rdx,%r14 1591 movq 0+0+0(%rbp),%rax 1592 mulq %r11 1593 imulq %r12,%r15 1594 addq %rax,%r14 1595 adcq %rdx,%r15 1596 movq 8+0+0(%rbp),%rax 1597 movq %rax,%r9 1598 mulq %r10 1599 addq %rax,%r14 1600 adcq $0,%rdx 1601 movq %rdx,%r10 1602 movq 8+0+0(%rbp),%rax 1603 mulq %r11 1604 addq %rax,%r15 1605 adcq $0,%rdx 1606 imulq %r12,%r9 1607 addq %r10,%r15 1608 adcq %rdx,%r9 1609 movq %r13,%r10 1610 movq %r14,%r11 1611 movq %r15,%r12 1612 andq $3,%r12 1613 movq %r15,%r13 1614 andq $-4,%r13 1615 movq %r9,%r14 1616 shrdq $2,%r9,%r15 1617 shrq $2,%r9 1618 addq %r13,%r15 1619 adcq %r14,%r9 1620 addq %r15,%r10 1621 adcq %r9,%r11 1622 adcq $0,%r12 1623 1624 addq $16,%r8 1625 cmpq %rcx,%r8 1626 jb L$open_sse_tail_256_hash 1627 paddd L$chacha20_consts(%rip),%xmm3 1628 paddd 0+48(%rbp),%xmm7 1629 paddd 0+64(%rbp),%xmm11 1630 paddd 0+144(%rbp),%xmm15 1631 paddd L$chacha20_consts(%rip),%xmm2 1632 paddd 0+48(%rbp),%xmm6 1633 paddd 0+64(%rbp),%xmm10 1634 paddd 0+128(%rbp),%xmm14 1635 paddd L$chacha20_consts(%rip),%xmm1 1636 paddd 0+48(%rbp),%xmm5 1637 paddd 0+64(%rbp),%xmm9 1638 paddd 0+112(%rbp),%xmm13 1639 paddd L$chacha20_consts(%rip),%xmm0 1640 paddd 0+48(%rbp),%xmm4 1641 paddd 0+64(%rbp),%xmm8 1642 paddd 0+96(%rbp),%xmm12 1643 movdqa %xmm12,0+80(%rbp) 1644 movdqu 0 + 0(%rsi),%xmm12 1645 pxor %xmm3,%xmm12 1646 movdqu %xmm12,0 + 0(%rdi) 1647 movdqu 16 + 0(%rsi),%xmm12 1648 pxor %xmm7,%xmm12 1649 movdqu %xmm12,16 + 0(%rdi) 1650 movdqu 32 + 0(%rsi),%xmm12 1651 pxor %xmm11,%xmm12 1652 movdqu %xmm12,32 + 0(%rdi) 1653 movdqu 48 + 0(%rsi),%xmm12 1654 pxor %xmm15,%xmm12 1655 movdqu %xmm12,48 + 0(%rdi) 1656 movdqu 0 + 64(%rsi),%xmm3 1657 movdqu 16 + 64(%rsi),%xmm7 1658 movdqu 32 + 64(%rsi),%xmm11 1659 movdqu 48 + 64(%rsi),%xmm15 1660 pxor %xmm3,%xmm2 1661 pxor %xmm7,%xmm6 1662 pxor %xmm11,%xmm10 1663 pxor %xmm14,%xmm15 1664 movdqu %xmm2,0 + 64(%rdi) 1665 movdqu %xmm6,16 + 64(%rdi) 1666 movdqu %xmm10,32 + 64(%rdi) 1667 movdqu %xmm15,48 + 64(%rdi) 1668 movdqu 0 + 128(%rsi),%xmm3 1669 movdqu 16 + 128(%rsi),%xmm7 1670 movdqu 32 + 128(%rsi),%xmm11 1671 movdqu 48 + 128(%rsi),%xmm15 1672 pxor %xmm3,%xmm1 1673 pxor %xmm7,%xmm5 1674 pxor %xmm11,%xmm9 1675 pxor %xmm13,%xmm15 1676 movdqu %xmm1,0 + 128(%rdi) 1677 movdqu %xmm5,16 + 128(%rdi) 1678 movdqu %xmm9,32 + 128(%rdi) 1679 movdqu %xmm15,48 + 128(%rdi) 1680 1681 movdqa 0+80(%rbp),%xmm12 1682 subq $192,%rbx 1683 leaq 192(%rsi),%rsi 1684 leaq 192(%rdi),%rdi 1685 1686 1687L$open_sse_tail_64_dec_loop: 1688 cmpq $16,%rbx 1689 jb L$open_sse_tail_16_init 1690 subq $16,%rbx 1691 movdqu (%rsi),%xmm3 1692 pxor %xmm3,%xmm0 1693 movdqu %xmm0,(%rdi) 1694 leaq 16(%rsi),%rsi 1695 leaq 16(%rdi),%rdi 1696 movdqa %xmm4,%xmm0 1697 movdqa %xmm8,%xmm4 1698 movdqa %xmm12,%xmm8 1699 jmp L$open_sse_tail_64_dec_loop 1700L$open_sse_tail_16_init: 1701 movdqa %xmm0,%xmm1 1702 1703 1704L$open_sse_tail_16: 1705 testq %rbx,%rbx 1706 jz L$open_sse_finalize 1707 1708 1709 1710 pxor %xmm3,%xmm3 1711 leaq -1(%rsi,%rbx,1),%rsi 1712 movq %rbx,%r8 1713L$open_sse_tail_16_compose: 1714 pslldq $1,%xmm3 1715 pinsrb $0,(%rsi),%xmm3 1716 subq $1,%rsi 1717 subq $1,%r8 1718 jnz L$open_sse_tail_16_compose 1719 1720.byte 102,73,15,126,221 1721 pextrq $1,%xmm3,%r14 1722 1723 pxor %xmm1,%xmm3 1724 1725 1726L$open_sse_tail_16_extract: 1727 pextrb $0,%xmm3,(%rdi) 1728 psrldq $1,%xmm3 1729 addq $1,%rdi 1730 subq $1,%rbx 1731 jne L$open_sse_tail_16_extract 1732 1733 addq %r13,%r10 1734 adcq %r14,%r11 1735 adcq $1,%r12 1736 movq 0+0+0(%rbp),%rax 1737 movq %rax,%r15 1738 mulq %r10 1739 movq %rax,%r13 1740 movq %rdx,%r14 1741 movq 0+0+0(%rbp),%rax 1742 mulq %r11 1743 imulq %r12,%r15 1744 addq %rax,%r14 1745 adcq %rdx,%r15 1746 movq 8+0+0(%rbp),%rax 1747 movq %rax,%r9 1748 mulq %r10 1749 addq %rax,%r14 1750 adcq $0,%rdx 1751 movq %rdx,%r10 1752 movq 8+0+0(%rbp),%rax 1753 mulq %r11 1754 addq %rax,%r15 1755 adcq $0,%rdx 1756 imulq %r12,%r9 1757 addq %r10,%r15 1758 adcq %rdx,%r9 1759 movq %r13,%r10 1760 movq %r14,%r11 1761 movq %r15,%r12 1762 andq $3,%r12 1763 movq %r15,%r13 1764 andq $-4,%r13 1765 movq %r9,%r14 1766 shrdq $2,%r9,%r15 1767 shrq $2,%r9 1768 addq %r13,%r15 1769 adcq %r14,%r9 1770 addq %r15,%r10 1771 adcq %r9,%r11 1772 adcq $0,%r12 1773 1774 1775L$open_sse_finalize: 1776 addq 0+0+32(%rbp),%r10 1777 adcq 8+0+32(%rbp),%r11 1778 adcq $1,%r12 1779 movq 0+0+0(%rbp),%rax 1780 movq %rax,%r15 1781 mulq %r10 1782 movq %rax,%r13 1783 movq %rdx,%r14 1784 movq 0+0+0(%rbp),%rax 1785 mulq %r11 1786 imulq %r12,%r15 1787 addq %rax,%r14 1788 adcq %rdx,%r15 1789 movq 8+0+0(%rbp),%rax 1790 movq %rax,%r9 1791 mulq %r10 1792 addq %rax,%r14 1793 adcq $0,%rdx 1794 movq %rdx,%r10 1795 movq 8+0+0(%rbp),%rax 1796 mulq %r11 1797 addq %rax,%r15 1798 adcq $0,%rdx 1799 imulq %r12,%r9 1800 addq %r10,%r15 1801 adcq %rdx,%r9 1802 movq %r13,%r10 1803 movq %r14,%r11 1804 movq %r15,%r12 1805 andq $3,%r12 1806 movq %r15,%r13 1807 andq $-4,%r13 1808 movq %r9,%r14 1809 shrdq $2,%r9,%r15 1810 shrq $2,%r9 1811 addq %r13,%r15 1812 adcq %r14,%r9 1813 addq %r15,%r10 1814 adcq %r9,%r11 1815 adcq $0,%r12 1816 1817 1818 movq %r10,%r13 1819 movq %r11,%r14 1820 movq %r12,%r15 1821 subq $-5,%r10 1822 sbbq $-1,%r11 1823 sbbq $3,%r12 1824 cmovcq %r13,%r10 1825 cmovcq %r14,%r11 1826 cmovcq %r15,%r12 1827 1828 addq 0+0+16(%rbp),%r10 1829 adcq 8+0+16(%rbp),%r11 1830 1831 1832 addq $288 + 0 + 32,%rsp 1833 1834 1835 popq %r9 1836 1837 movq %r10,(%r9) 1838 movq %r11,8(%r9) 1839 popq %r15 1840 1841 popq %r14 1842 1843 popq %r13 1844 1845 popq %r12 1846 1847 popq %rbx 1848 1849 popq %rbp 1850 1851 .byte 0xf3,0xc3 1852 1853L$open_sse_128: 1854 1855 movdqu L$chacha20_consts(%rip),%xmm0 1856 movdqa %xmm0,%xmm1 1857 movdqa %xmm0,%xmm2 1858 movdqu 0(%r9),%xmm4 1859 movdqa %xmm4,%xmm5 1860 movdqa %xmm4,%xmm6 1861 movdqu 16(%r9),%xmm8 1862 movdqa %xmm8,%xmm9 1863 movdqa %xmm8,%xmm10 1864 movdqu 32(%r9),%xmm12 1865 movdqa %xmm12,%xmm13 1866 paddd L$sse_inc(%rip),%xmm13 1867 movdqa %xmm13,%xmm14 1868 paddd L$sse_inc(%rip),%xmm14 1869 movdqa %xmm4,%xmm7 1870 movdqa %xmm8,%xmm11 1871 movdqa %xmm13,%xmm15 1872 movq $10,%r10 1873 1874L$open_sse_128_rounds: 1875 paddd %xmm4,%xmm0 1876 pxor %xmm0,%xmm12 1877 pshufb L$rol16(%rip),%xmm12 1878 paddd %xmm12,%xmm8 1879 pxor %xmm8,%xmm4 1880 movdqa %xmm4,%xmm3 1881 pslld $12,%xmm3 1882 psrld $20,%xmm4 1883 pxor %xmm3,%xmm4 1884 paddd %xmm4,%xmm0 1885 pxor %xmm0,%xmm12 1886 pshufb L$rol8(%rip),%xmm12 1887 paddd %xmm12,%xmm8 1888 pxor %xmm8,%xmm4 1889 movdqa %xmm4,%xmm3 1890 pslld $7,%xmm3 1891 psrld $25,%xmm4 1892 pxor %xmm3,%xmm4 1893.byte 102,15,58,15,228,4 1894.byte 102,69,15,58,15,192,8 1895.byte 102,69,15,58,15,228,12 1896 paddd %xmm5,%xmm1 1897 pxor %xmm1,%xmm13 1898 pshufb L$rol16(%rip),%xmm13 1899 paddd %xmm13,%xmm9 1900 pxor %xmm9,%xmm5 1901 movdqa %xmm5,%xmm3 1902 pslld $12,%xmm3 1903 psrld $20,%xmm5 1904 pxor %xmm3,%xmm5 1905 paddd %xmm5,%xmm1 1906 pxor %xmm1,%xmm13 1907 pshufb L$rol8(%rip),%xmm13 1908 paddd %xmm13,%xmm9 1909 pxor %xmm9,%xmm5 1910 movdqa %xmm5,%xmm3 1911 pslld $7,%xmm3 1912 psrld $25,%xmm5 1913 pxor %xmm3,%xmm5 1914.byte 102,15,58,15,237,4 1915.byte 102,69,15,58,15,201,8 1916.byte 102,69,15,58,15,237,12 1917 paddd %xmm6,%xmm2 1918 pxor %xmm2,%xmm14 1919 pshufb L$rol16(%rip),%xmm14 1920 paddd %xmm14,%xmm10 1921 pxor %xmm10,%xmm6 1922 movdqa %xmm6,%xmm3 1923 pslld $12,%xmm3 1924 psrld $20,%xmm6 1925 pxor %xmm3,%xmm6 1926 paddd %xmm6,%xmm2 1927 pxor %xmm2,%xmm14 1928 pshufb L$rol8(%rip),%xmm14 1929 paddd %xmm14,%xmm10 1930 pxor %xmm10,%xmm6 1931 movdqa %xmm6,%xmm3 1932 pslld $7,%xmm3 1933 psrld $25,%xmm6 1934 pxor %xmm3,%xmm6 1935.byte 102,15,58,15,246,4 1936.byte 102,69,15,58,15,210,8 1937.byte 102,69,15,58,15,246,12 1938 paddd %xmm4,%xmm0 1939 pxor %xmm0,%xmm12 1940 pshufb L$rol16(%rip),%xmm12 1941 paddd %xmm12,%xmm8 1942 pxor %xmm8,%xmm4 1943 movdqa %xmm4,%xmm3 1944 pslld $12,%xmm3 1945 psrld $20,%xmm4 1946 pxor %xmm3,%xmm4 1947 paddd %xmm4,%xmm0 1948 pxor %xmm0,%xmm12 1949 pshufb L$rol8(%rip),%xmm12 1950 paddd %xmm12,%xmm8 1951 pxor %xmm8,%xmm4 1952 movdqa %xmm4,%xmm3 1953 pslld $7,%xmm3 1954 psrld $25,%xmm4 1955 pxor %xmm3,%xmm4 1956.byte 102,15,58,15,228,12 1957.byte 102,69,15,58,15,192,8 1958.byte 102,69,15,58,15,228,4 1959 paddd %xmm5,%xmm1 1960 pxor %xmm1,%xmm13 1961 pshufb L$rol16(%rip),%xmm13 1962 paddd %xmm13,%xmm9 1963 pxor %xmm9,%xmm5 1964 movdqa %xmm5,%xmm3 1965 pslld $12,%xmm3 1966 psrld $20,%xmm5 1967 pxor %xmm3,%xmm5 1968 paddd %xmm5,%xmm1 1969 pxor %xmm1,%xmm13 1970 pshufb L$rol8(%rip),%xmm13 1971 paddd %xmm13,%xmm9 1972 pxor %xmm9,%xmm5 1973 movdqa %xmm5,%xmm3 1974 pslld $7,%xmm3 1975 psrld $25,%xmm5 1976 pxor %xmm3,%xmm5 1977.byte 102,15,58,15,237,12 1978.byte 102,69,15,58,15,201,8 1979.byte 102,69,15,58,15,237,4 1980 paddd %xmm6,%xmm2 1981 pxor %xmm2,%xmm14 1982 pshufb L$rol16(%rip),%xmm14 1983 paddd %xmm14,%xmm10 1984 pxor %xmm10,%xmm6 1985 movdqa %xmm6,%xmm3 1986 pslld $12,%xmm3 1987 psrld $20,%xmm6 1988 pxor %xmm3,%xmm6 1989 paddd %xmm6,%xmm2 1990 pxor %xmm2,%xmm14 1991 pshufb L$rol8(%rip),%xmm14 1992 paddd %xmm14,%xmm10 1993 pxor %xmm10,%xmm6 1994 movdqa %xmm6,%xmm3 1995 pslld $7,%xmm3 1996 psrld $25,%xmm6 1997 pxor %xmm3,%xmm6 1998.byte 102,15,58,15,246,12 1999.byte 102,69,15,58,15,210,8 2000.byte 102,69,15,58,15,246,4 2001 2002 decq %r10 2003 jnz L$open_sse_128_rounds 2004 paddd L$chacha20_consts(%rip),%xmm0 2005 paddd L$chacha20_consts(%rip),%xmm1 2006 paddd L$chacha20_consts(%rip),%xmm2 2007 paddd %xmm7,%xmm4 2008 paddd %xmm7,%xmm5 2009 paddd %xmm7,%xmm6 2010 paddd %xmm11,%xmm9 2011 paddd %xmm11,%xmm10 2012 paddd %xmm15,%xmm13 2013 paddd L$sse_inc(%rip),%xmm15 2014 paddd %xmm15,%xmm14 2015 2016 pand L$clamp(%rip),%xmm0 2017 movdqa %xmm0,0+0(%rbp) 2018 movdqa %xmm4,0+16(%rbp) 2019 2020 movq %r8,%r8 2021 call poly_hash_ad_internal 2022L$open_sse_128_xor_hash: 2023 cmpq $16,%rbx 2024 jb L$open_sse_tail_16 2025 subq $16,%rbx 2026 addq 0+0(%rsi),%r10 2027 adcq 8+0(%rsi),%r11 2028 adcq $1,%r12 2029 2030 2031 movdqu 0(%rsi),%xmm3 2032 pxor %xmm3,%xmm1 2033 movdqu %xmm1,0(%rdi) 2034 leaq 16(%rsi),%rsi 2035 leaq 16(%rdi),%rdi 2036 movq 0+0+0(%rbp),%rax 2037 movq %rax,%r15 2038 mulq %r10 2039 movq %rax,%r13 2040 movq %rdx,%r14 2041 movq 0+0+0(%rbp),%rax 2042 mulq %r11 2043 imulq %r12,%r15 2044 addq %rax,%r14 2045 adcq %rdx,%r15 2046 movq 8+0+0(%rbp),%rax 2047 movq %rax,%r9 2048 mulq %r10 2049 addq %rax,%r14 2050 adcq $0,%rdx 2051 movq %rdx,%r10 2052 movq 8+0+0(%rbp),%rax 2053 mulq %r11 2054 addq %rax,%r15 2055 adcq $0,%rdx 2056 imulq %r12,%r9 2057 addq %r10,%r15 2058 adcq %rdx,%r9 2059 movq %r13,%r10 2060 movq %r14,%r11 2061 movq %r15,%r12 2062 andq $3,%r12 2063 movq %r15,%r13 2064 andq $-4,%r13 2065 movq %r9,%r14 2066 shrdq $2,%r9,%r15 2067 shrq $2,%r9 2068 addq %r13,%r15 2069 adcq %r14,%r9 2070 addq %r15,%r10 2071 adcq %r9,%r11 2072 adcq $0,%r12 2073 2074 2075 movdqa %xmm5,%xmm1 2076 movdqa %xmm9,%xmm5 2077 movdqa %xmm13,%xmm9 2078 movdqa %xmm2,%xmm13 2079 movdqa %xmm6,%xmm2 2080 movdqa %xmm10,%xmm6 2081 movdqa %xmm14,%xmm10 2082 jmp L$open_sse_128_xor_hash 2083 2084 2085 2086 2087 2088 2089 2090 2091.globl _GFp_chacha20_poly1305_seal 2092.private_extern _GFp_chacha20_poly1305_seal 2093 2094.p2align 6 2095_GFp_chacha20_poly1305_seal: 2096 2097 pushq %rbp 2098 2099 pushq %rbx 2100 2101 pushq %r12 2102 2103 pushq %r13 2104 2105 pushq %r14 2106 2107 pushq %r15 2108 2109 2110 2111 pushq %r9 2112 2113 subq $288 + 0 + 32,%rsp 2114 2115 leaq 32(%rsp),%rbp 2116 andq $-32,%rbp 2117 2118 movq 56(%r9),%rbx 2119 addq %rdx,%rbx 2120 movq %r8,0+0+32(%rbp) 2121 movq %rbx,8+0+32(%rbp) 2122 movq %rdx,%rbx 2123 2124 movl _GFp_ia32cap_P+8(%rip),%eax 2125 andl $288,%eax 2126 xorl $288,%eax 2127 jz chacha20_poly1305_seal_avx2 2128 2129 cmpq $128,%rbx 2130 jbe L$seal_sse_128 2131 2132 movdqa L$chacha20_consts(%rip),%xmm0 2133 movdqu 0(%r9),%xmm4 2134 movdqu 16(%r9),%xmm8 2135 movdqu 32(%r9),%xmm12 2136 2137 movdqa %xmm0,%xmm1 2138 movdqa %xmm0,%xmm2 2139 movdqa %xmm0,%xmm3 2140 movdqa %xmm4,%xmm5 2141 movdqa %xmm4,%xmm6 2142 movdqa %xmm4,%xmm7 2143 movdqa %xmm8,%xmm9 2144 movdqa %xmm8,%xmm10 2145 movdqa %xmm8,%xmm11 2146 movdqa %xmm12,%xmm15 2147 paddd L$sse_inc(%rip),%xmm12 2148 movdqa %xmm12,%xmm14 2149 paddd L$sse_inc(%rip),%xmm12 2150 movdqa %xmm12,%xmm13 2151 paddd L$sse_inc(%rip),%xmm12 2152 2153 movdqa %xmm4,0+48(%rbp) 2154 movdqa %xmm8,0+64(%rbp) 2155 movdqa %xmm12,0+96(%rbp) 2156 movdqa %xmm13,0+112(%rbp) 2157 movdqa %xmm14,0+128(%rbp) 2158 movdqa %xmm15,0+144(%rbp) 2159 movq $10,%r10 2160L$seal_sse_init_rounds: 2161 movdqa %xmm8,0+80(%rbp) 2162 movdqa L$rol16(%rip),%xmm8 2163 paddd %xmm7,%xmm3 2164 paddd %xmm6,%xmm2 2165 paddd %xmm5,%xmm1 2166 paddd %xmm4,%xmm0 2167 pxor %xmm3,%xmm15 2168 pxor %xmm2,%xmm14 2169 pxor %xmm1,%xmm13 2170 pxor %xmm0,%xmm12 2171.byte 102,69,15,56,0,248 2172.byte 102,69,15,56,0,240 2173.byte 102,69,15,56,0,232 2174.byte 102,69,15,56,0,224 2175 movdqa 0+80(%rbp),%xmm8 2176 paddd %xmm15,%xmm11 2177 paddd %xmm14,%xmm10 2178 paddd %xmm13,%xmm9 2179 paddd %xmm12,%xmm8 2180 pxor %xmm11,%xmm7 2181 pxor %xmm10,%xmm6 2182 pxor %xmm9,%xmm5 2183 pxor %xmm8,%xmm4 2184 movdqa %xmm8,0+80(%rbp) 2185 movdqa %xmm7,%xmm8 2186 psrld $20,%xmm8 2187 pslld $32-20,%xmm7 2188 pxor %xmm8,%xmm7 2189 movdqa %xmm6,%xmm8 2190 psrld $20,%xmm8 2191 pslld $32-20,%xmm6 2192 pxor %xmm8,%xmm6 2193 movdqa %xmm5,%xmm8 2194 psrld $20,%xmm8 2195 pslld $32-20,%xmm5 2196 pxor %xmm8,%xmm5 2197 movdqa %xmm4,%xmm8 2198 psrld $20,%xmm8 2199 pslld $32-20,%xmm4 2200 pxor %xmm8,%xmm4 2201 movdqa L$rol8(%rip),%xmm8 2202 paddd %xmm7,%xmm3 2203 paddd %xmm6,%xmm2 2204 paddd %xmm5,%xmm1 2205 paddd %xmm4,%xmm0 2206 pxor %xmm3,%xmm15 2207 pxor %xmm2,%xmm14 2208 pxor %xmm1,%xmm13 2209 pxor %xmm0,%xmm12 2210.byte 102,69,15,56,0,248 2211.byte 102,69,15,56,0,240 2212.byte 102,69,15,56,0,232 2213.byte 102,69,15,56,0,224 2214 movdqa 0+80(%rbp),%xmm8 2215 paddd %xmm15,%xmm11 2216 paddd %xmm14,%xmm10 2217 paddd %xmm13,%xmm9 2218 paddd %xmm12,%xmm8 2219 pxor %xmm11,%xmm7 2220 pxor %xmm10,%xmm6 2221 pxor %xmm9,%xmm5 2222 pxor %xmm8,%xmm4 2223 movdqa %xmm8,0+80(%rbp) 2224 movdqa %xmm7,%xmm8 2225 psrld $25,%xmm8 2226 pslld $32-25,%xmm7 2227 pxor %xmm8,%xmm7 2228 movdqa %xmm6,%xmm8 2229 psrld $25,%xmm8 2230 pslld $32-25,%xmm6 2231 pxor %xmm8,%xmm6 2232 movdqa %xmm5,%xmm8 2233 psrld $25,%xmm8 2234 pslld $32-25,%xmm5 2235 pxor %xmm8,%xmm5 2236 movdqa %xmm4,%xmm8 2237 psrld $25,%xmm8 2238 pslld $32-25,%xmm4 2239 pxor %xmm8,%xmm4 2240 movdqa 0+80(%rbp),%xmm8 2241.byte 102,15,58,15,255,4 2242.byte 102,69,15,58,15,219,8 2243.byte 102,69,15,58,15,255,12 2244.byte 102,15,58,15,246,4 2245.byte 102,69,15,58,15,210,8 2246.byte 102,69,15,58,15,246,12 2247.byte 102,15,58,15,237,4 2248.byte 102,69,15,58,15,201,8 2249.byte 102,69,15,58,15,237,12 2250.byte 102,15,58,15,228,4 2251.byte 102,69,15,58,15,192,8 2252.byte 102,69,15,58,15,228,12 2253 movdqa %xmm8,0+80(%rbp) 2254 movdqa L$rol16(%rip),%xmm8 2255 paddd %xmm7,%xmm3 2256 paddd %xmm6,%xmm2 2257 paddd %xmm5,%xmm1 2258 paddd %xmm4,%xmm0 2259 pxor %xmm3,%xmm15 2260 pxor %xmm2,%xmm14 2261 pxor %xmm1,%xmm13 2262 pxor %xmm0,%xmm12 2263.byte 102,69,15,56,0,248 2264.byte 102,69,15,56,0,240 2265.byte 102,69,15,56,0,232 2266.byte 102,69,15,56,0,224 2267 movdqa 0+80(%rbp),%xmm8 2268 paddd %xmm15,%xmm11 2269 paddd %xmm14,%xmm10 2270 paddd %xmm13,%xmm9 2271 paddd %xmm12,%xmm8 2272 pxor %xmm11,%xmm7 2273 pxor %xmm10,%xmm6 2274 pxor %xmm9,%xmm5 2275 pxor %xmm8,%xmm4 2276 movdqa %xmm8,0+80(%rbp) 2277 movdqa %xmm7,%xmm8 2278 psrld $20,%xmm8 2279 pslld $32-20,%xmm7 2280 pxor %xmm8,%xmm7 2281 movdqa %xmm6,%xmm8 2282 psrld $20,%xmm8 2283 pslld $32-20,%xmm6 2284 pxor %xmm8,%xmm6 2285 movdqa %xmm5,%xmm8 2286 psrld $20,%xmm8 2287 pslld $32-20,%xmm5 2288 pxor %xmm8,%xmm5 2289 movdqa %xmm4,%xmm8 2290 psrld $20,%xmm8 2291 pslld $32-20,%xmm4 2292 pxor %xmm8,%xmm4 2293 movdqa L$rol8(%rip),%xmm8 2294 paddd %xmm7,%xmm3 2295 paddd %xmm6,%xmm2 2296 paddd %xmm5,%xmm1 2297 paddd %xmm4,%xmm0 2298 pxor %xmm3,%xmm15 2299 pxor %xmm2,%xmm14 2300 pxor %xmm1,%xmm13 2301 pxor %xmm0,%xmm12 2302.byte 102,69,15,56,0,248 2303.byte 102,69,15,56,0,240 2304.byte 102,69,15,56,0,232 2305.byte 102,69,15,56,0,224 2306 movdqa 0+80(%rbp),%xmm8 2307 paddd %xmm15,%xmm11 2308 paddd %xmm14,%xmm10 2309 paddd %xmm13,%xmm9 2310 paddd %xmm12,%xmm8 2311 pxor %xmm11,%xmm7 2312 pxor %xmm10,%xmm6 2313 pxor %xmm9,%xmm5 2314 pxor %xmm8,%xmm4 2315 movdqa %xmm8,0+80(%rbp) 2316 movdqa %xmm7,%xmm8 2317 psrld $25,%xmm8 2318 pslld $32-25,%xmm7 2319 pxor %xmm8,%xmm7 2320 movdqa %xmm6,%xmm8 2321 psrld $25,%xmm8 2322 pslld $32-25,%xmm6 2323 pxor %xmm8,%xmm6 2324 movdqa %xmm5,%xmm8 2325 psrld $25,%xmm8 2326 pslld $32-25,%xmm5 2327 pxor %xmm8,%xmm5 2328 movdqa %xmm4,%xmm8 2329 psrld $25,%xmm8 2330 pslld $32-25,%xmm4 2331 pxor %xmm8,%xmm4 2332 movdqa 0+80(%rbp),%xmm8 2333.byte 102,15,58,15,255,12 2334.byte 102,69,15,58,15,219,8 2335.byte 102,69,15,58,15,255,4 2336.byte 102,15,58,15,246,12 2337.byte 102,69,15,58,15,210,8 2338.byte 102,69,15,58,15,246,4 2339.byte 102,15,58,15,237,12 2340.byte 102,69,15,58,15,201,8 2341.byte 102,69,15,58,15,237,4 2342.byte 102,15,58,15,228,12 2343.byte 102,69,15,58,15,192,8 2344.byte 102,69,15,58,15,228,4 2345 2346 decq %r10 2347 jnz L$seal_sse_init_rounds 2348 paddd L$chacha20_consts(%rip),%xmm3 2349 paddd 0+48(%rbp),%xmm7 2350 paddd 0+64(%rbp),%xmm11 2351 paddd 0+144(%rbp),%xmm15 2352 paddd L$chacha20_consts(%rip),%xmm2 2353 paddd 0+48(%rbp),%xmm6 2354 paddd 0+64(%rbp),%xmm10 2355 paddd 0+128(%rbp),%xmm14 2356 paddd L$chacha20_consts(%rip),%xmm1 2357 paddd 0+48(%rbp),%xmm5 2358 paddd 0+64(%rbp),%xmm9 2359 paddd 0+112(%rbp),%xmm13 2360 paddd L$chacha20_consts(%rip),%xmm0 2361 paddd 0+48(%rbp),%xmm4 2362 paddd 0+64(%rbp),%xmm8 2363 paddd 0+96(%rbp),%xmm12 2364 2365 2366 pand L$clamp(%rip),%xmm3 2367 movdqa %xmm3,0+0(%rbp) 2368 movdqa %xmm7,0+16(%rbp) 2369 2370 movq %r8,%r8 2371 call poly_hash_ad_internal 2372 movdqu 0 + 0(%rsi),%xmm3 2373 movdqu 16 + 0(%rsi),%xmm7 2374 movdqu 32 + 0(%rsi),%xmm11 2375 movdqu 48 + 0(%rsi),%xmm15 2376 pxor %xmm3,%xmm2 2377 pxor %xmm7,%xmm6 2378 pxor %xmm11,%xmm10 2379 pxor %xmm14,%xmm15 2380 movdqu %xmm2,0 + 0(%rdi) 2381 movdqu %xmm6,16 + 0(%rdi) 2382 movdqu %xmm10,32 + 0(%rdi) 2383 movdqu %xmm15,48 + 0(%rdi) 2384 movdqu 0 + 64(%rsi),%xmm3 2385 movdqu 16 + 64(%rsi),%xmm7 2386 movdqu 32 + 64(%rsi),%xmm11 2387 movdqu 48 + 64(%rsi),%xmm15 2388 pxor %xmm3,%xmm1 2389 pxor %xmm7,%xmm5 2390 pxor %xmm11,%xmm9 2391 pxor %xmm13,%xmm15 2392 movdqu %xmm1,0 + 64(%rdi) 2393 movdqu %xmm5,16 + 64(%rdi) 2394 movdqu %xmm9,32 + 64(%rdi) 2395 movdqu %xmm15,48 + 64(%rdi) 2396 2397 cmpq $192,%rbx 2398 ja L$seal_sse_main_init 2399 movq $128,%rcx 2400 subq $128,%rbx 2401 leaq 128(%rsi),%rsi 2402 jmp L$seal_sse_128_tail_hash 2403L$seal_sse_main_init: 2404 movdqu 0 + 128(%rsi),%xmm3 2405 movdqu 16 + 128(%rsi),%xmm7 2406 movdqu 32 + 128(%rsi),%xmm11 2407 movdqu 48 + 128(%rsi),%xmm15 2408 pxor %xmm3,%xmm0 2409 pxor %xmm7,%xmm4 2410 pxor %xmm11,%xmm8 2411 pxor %xmm12,%xmm15 2412 movdqu %xmm0,0 + 128(%rdi) 2413 movdqu %xmm4,16 + 128(%rdi) 2414 movdqu %xmm8,32 + 128(%rdi) 2415 movdqu %xmm15,48 + 128(%rdi) 2416 2417 movq $192,%rcx 2418 subq $192,%rbx 2419 leaq 192(%rsi),%rsi 2420 movq $2,%rcx 2421 movq $8,%r8 2422 cmpq $64,%rbx 2423 jbe L$seal_sse_tail_64 2424 cmpq $128,%rbx 2425 jbe L$seal_sse_tail_128 2426 cmpq $192,%rbx 2427 jbe L$seal_sse_tail_192 2428 2429L$seal_sse_main_loop: 2430 movdqa L$chacha20_consts(%rip),%xmm0 2431 movdqa 0+48(%rbp),%xmm4 2432 movdqa 0+64(%rbp),%xmm8 2433 movdqa %xmm0,%xmm1 2434 movdqa %xmm4,%xmm5 2435 movdqa %xmm8,%xmm9 2436 movdqa %xmm0,%xmm2 2437 movdqa %xmm4,%xmm6 2438 movdqa %xmm8,%xmm10 2439 movdqa %xmm0,%xmm3 2440 movdqa %xmm4,%xmm7 2441 movdqa %xmm8,%xmm11 2442 movdqa 0+96(%rbp),%xmm15 2443 paddd L$sse_inc(%rip),%xmm15 2444 movdqa %xmm15,%xmm14 2445 paddd L$sse_inc(%rip),%xmm14 2446 movdqa %xmm14,%xmm13 2447 paddd L$sse_inc(%rip),%xmm13 2448 movdqa %xmm13,%xmm12 2449 paddd L$sse_inc(%rip),%xmm12 2450 movdqa %xmm12,0+96(%rbp) 2451 movdqa %xmm13,0+112(%rbp) 2452 movdqa %xmm14,0+128(%rbp) 2453 movdqa %xmm15,0+144(%rbp) 2454 2455.p2align 5 2456L$seal_sse_main_rounds: 2457 movdqa %xmm8,0+80(%rbp) 2458 movdqa L$rol16(%rip),%xmm8 2459 paddd %xmm7,%xmm3 2460 paddd %xmm6,%xmm2 2461 paddd %xmm5,%xmm1 2462 paddd %xmm4,%xmm0 2463 pxor %xmm3,%xmm15 2464 pxor %xmm2,%xmm14 2465 pxor %xmm1,%xmm13 2466 pxor %xmm0,%xmm12 2467.byte 102,69,15,56,0,248 2468.byte 102,69,15,56,0,240 2469.byte 102,69,15,56,0,232 2470.byte 102,69,15,56,0,224 2471 movdqa 0+80(%rbp),%xmm8 2472 paddd %xmm15,%xmm11 2473 paddd %xmm14,%xmm10 2474 paddd %xmm13,%xmm9 2475 paddd %xmm12,%xmm8 2476 pxor %xmm11,%xmm7 2477 addq 0+0(%rdi),%r10 2478 adcq 8+0(%rdi),%r11 2479 adcq $1,%r12 2480 pxor %xmm10,%xmm6 2481 pxor %xmm9,%xmm5 2482 pxor %xmm8,%xmm4 2483 movdqa %xmm8,0+80(%rbp) 2484 movdqa %xmm7,%xmm8 2485 psrld $20,%xmm8 2486 pslld $32-20,%xmm7 2487 pxor %xmm8,%xmm7 2488 movdqa %xmm6,%xmm8 2489 psrld $20,%xmm8 2490 pslld $32-20,%xmm6 2491 pxor %xmm8,%xmm6 2492 movdqa %xmm5,%xmm8 2493 psrld $20,%xmm8 2494 pslld $32-20,%xmm5 2495 pxor %xmm8,%xmm5 2496 movdqa %xmm4,%xmm8 2497 psrld $20,%xmm8 2498 pslld $32-20,%xmm4 2499 pxor %xmm8,%xmm4 2500 movq 0+0+0(%rbp),%rax 2501 movq %rax,%r15 2502 mulq %r10 2503 movq %rax,%r13 2504 movq %rdx,%r14 2505 movq 0+0+0(%rbp),%rax 2506 mulq %r11 2507 imulq %r12,%r15 2508 addq %rax,%r14 2509 adcq %rdx,%r15 2510 movdqa L$rol8(%rip),%xmm8 2511 paddd %xmm7,%xmm3 2512 paddd %xmm6,%xmm2 2513 paddd %xmm5,%xmm1 2514 paddd %xmm4,%xmm0 2515 pxor %xmm3,%xmm15 2516 pxor %xmm2,%xmm14 2517 pxor %xmm1,%xmm13 2518 pxor %xmm0,%xmm12 2519.byte 102,69,15,56,0,248 2520.byte 102,69,15,56,0,240 2521.byte 102,69,15,56,0,232 2522.byte 102,69,15,56,0,224 2523 movdqa 0+80(%rbp),%xmm8 2524 paddd %xmm15,%xmm11 2525 paddd %xmm14,%xmm10 2526 paddd %xmm13,%xmm9 2527 paddd %xmm12,%xmm8 2528 pxor %xmm11,%xmm7 2529 pxor %xmm10,%xmm6 2530 movq 8+0+0(%rbp),%rax 2531 movq %rax,%r9 2532 mulq %r10 2533 addq %rax,%r14 2534 adcq $0,%rdx 2535 movq %rdx,%r10 2536 movq 8+0+0(%rbp),%rax 2537 mulq %r11 2538 addq %rax,%r15 2539 adcq $0,%rdx 2540 pxor %xmm9,%xmm5 2541 pxor %xmm8,%xmm4 2542 movdqa %xmm8,0+80(%rbp) 2543 movdqa %xmm7,%xmm8 2544 psrld $25,%xmm8 2545 pslld $32-25,%xmm7 2546 pxor %xmm8,%xmm7 2547 movdqa %xmm6,%xmm8 2548 psrld $25,%xmm8 2549 pslld $32-25,%xmm6 2550 pxor %xmm8,%xmm6 2551 movdqa %xmm5,%xmm8 2552 psrld $25,%xmm8 2553 pslld $32-25,%xmm5 2554 pxor %xmm8,%xmm5 2555 movdqa %xmm4,%xmm8 2556 psrld $25,%xmm8 2557 pslld $32-25,%xmm4 2558 pxor %xmm8,%xmm4 2559 movdqa 0+80(%rbp),%xmm8 2560 imulq %r12,%r9 2561 addq %r10,%r15 2562 adcq %rdx,%r9 2563.byte 102,15,58,15,255,4 2564.byte 102,69,15,58,15,219,8 2565.byte 102,69,15,58,15,255,12 2566.byte 102,15,58,15,246,4 2567.byte 102,69,15,58,15,210,8 2568.byte 102,69,15,58,15,246,12 2569.byte 102,15,58,15,237,4 2570.byte 102,69,15,58,15,201,8 2571.byte 102,69,15,58,15,237,12 2572.byte 102,15,58,15,228,4 2573.byte 102,69,15,58,15,192,8 2574.byte 102,69,15,58,15,228,12 2575 movdqa %xmm8,0+80(%rbp) 2576 movdqa L$rol16(%rip),%xmm8 2577 paddd %xmm7,%xmm3 2578 paddd %xmm6,%xmm2 2579 paddd %xmm5,%xmm1 2580 paddd %xmm4,%xmm0 2581 pxor %xmm3,%xmm15 2582 pxor %xmm2,%xmm14 2583 movq %r13,%r10 2584 movq %r14,%r11 2585 movq %r15,%r12 2586 andq $3,%r12 2587 movq %r15,%r13 2588 andq $-4,%r13 2589 movq %r9,%r14 2590 shrdq $2,%r9,%r15 2591 shrq $2,%r9 2592 addq %r13,%r15 2593 adcq %r14,%r9 2594 addq %r15,%r10 2595 adcq %r9,%r11 2596 adcq $0,%r12 2597 pxor %xmm1,%xmm13 2598 pxor %xmm0,%xmm12 2599.byte 102,69,15,56,0,248 2600.byte 102,69,15,56,0,240 2601.byte 102,69,15,56,0,232 2602.byte 102,69,15,56,0,224 2603 movdqa 0+80(%rbp),%xmm8 2604 paddd %xmm15,%xmm11 2605 paddd %xmm14,%xmm10 2606 paddd %xmm13,%xmm9 2607 paddd %xmm12,%xmm8 2608 pxor %xmm11,%xmm7 2609 pxor %xmm10,%xmm6 2610 pxor %xmm9,%xmm5 2611 pxor %xmm8,%xmm4 2612 movdqa %xmm8,0+80(%rbp) 2613 movdqa %xmm7,%xmm8 2614 psrld $20,%xmm8 2615 pslld $32-20,%xmm7 2616 pxor %xmm8,%xmm7 2617 movdqa %xmm6,%xmm8 2618 psrld $20,%xmm8 2619 pslld $32-20,%xmm6 2620 pxor %xmm8,%xmm6 2621 movdqa %xmm5,%xmm8 2622 psrld $20,%xmm8 2623 pslld $32-20,%xmm5 2624 pxor %xmm8,%xmm5 2625 movdqa %xmm4,%xmm8 2626 psrld $20,%xmm8 2627 pslld $32-20,%xmm4 2628 pxor %xmm8,%xmm4 2629 movdqa L$rol8(%rip),%xmm8 2630 paddd %xmm7,%xmm3 2631 paddd %xmm6,%xmm2 2632 paddd %xmm5,%xmm1 2633 paddd %xmm4,%xmm0 2634 pxor %xmm3,%xmm15 2635 pxor %xmm2,%xmm14 2636 pxor %xmm1,%xmm13 2637 pxor %xmm0,%xmm12 2638.byte 102,69,15,56,0,248 2639.byte 102,69,15,56,0,240 2640.byte 102,69,15,56,0,232 2641.byte 102,69,15,56,0,224 2642 movdqa 0+80(%rbp),%xmm8 2643 paddd %xmm15,%xmm11 2644 paddd %xmm14,%xmm10 2645 paddd %xmm13,%xmm9 2646 paddd %xmm12,%xmm8 2647 pxor %xmm11,%xmm7 2648 pxor %xmm10,%xmm6 2649 pxor %xmm9,%xmm5 2650 pxor %xmm8,%xmm4 2651 movdqa %xmm8,0+80(%rbp) 2652 movdqa %xmm7,%xmm8 2653 psrld $25,%xmm8 2654 pslld $32-25,%xmm7 2655 pxor %xmm8,%xmm7 2656 movdqa %xmm6,%xmm8 2657 psrld $25,%xmm8 2658 pslld $32-25,%xmm6 2659 pxor %xmm8,%xmm6 2660 movdqa %xmm5,%xmm8 2661 psrld $25,%xmm8 2662 pslld $32-25,%xmm5 2663 pxor %xmm8,%xmm5 2664 movdqa %xmm4,%xmm8 2665 psrld $25,%xmm8 2666 pslld $32-25,%xmm4 2667 pxor %xmm8,%xmm4 2668 movdqa 0+80(%rbp),%xmm8 2669.byte 102,15,58,15,255,12 2670.byte 102,69,15,58,15,219,8 2671.byte 102,69,15,58,15,255,4 2672.byte 102,15,58,15,246,12 2673.byte 102,69,15,58,15,210,8 2674.byte 102,69,15,58,15,246,4 2675.byte 102,15,58,15,237,12 2676.byte 102,69,15,58,15,201,8 2677.byte 102,69,15,58,15,237,4 2678.byte 102,15,58,15,228,12 2679.byte 102,69,15,58,15,192,8 2680.byte 102,69,15,58,15,228,4 2681 2682 leaq 16(%rdi),%rdi 2683 decq %r8 2684 jge L$seal_sse_main_rounds 2685 addq 0+0(%rdi),%r10 2686 adcq 8+0(%rdi),%r11 2687 adcq $1,%r12 2688 movq 0+0+0(%rbp),%rax 2689 movq %rax,%r15 2690 mulq %r10 2691 movq %rax,%r13 2692 movq %rdx,%r14 2693 movq 0+0+0(%rbp),%rax 2694 mulq %r11 2695 imulq %r12,%r15 2696 addq %rax,%r14 2697 adcq %rdx,%r15 2698 movq 8+0+0(%rbp),%rax 2699 movq %rax,%r9 2700 mulq %r10 2701 addq %rax,%r14 2702 adcq $0,%rdx 2703 movq %rdx,%r10 2704 movq 8+0+0(%rbp),%rax 2705 mulq %r11 2706 addq %rax,%r15 2707 adcq $0,%rdx 2708 imulq %r12,%r9 2709 addq %r10,%r15 2710 adcq %rdx,%r9 2711 movq %r13,%r10 2712 movq %r14,%r11 2713 movq %r15,%r12 2714 andq $3,%r12 2715 movq %r15,%r13 2716 andq $-4,%r13 2717 movq %r9,%r14 2718 shrdq $2,%r9,%r15 2719 shrq $2,%r9 2720 addq %r13,%r15 2721 adcq %r14,%r9 2722 addq %r15,%r10 2723 adcq %r9,%r11 2724 adcq $0,%r12 2725 2726 leaq 16(%rdi),%rdi 2727 decq %rcx 2728 jg L$seal_sse_main_rounds 2729 paddd L$chacha20_consts(%rip),%xmm3 2730 paddd 0+48(%rbp),%xmm7 2731 paddd 0+64(%rbp),%xmm11 2732 paddd 0+144(%rbp),%xmm15 2733 paddd L$chacha20_consts(%rip),%xmm2 2734 paddd 0+48(%rbp),%xmm6 2735 paddd 0+64(%rbp),%xmm10 2736 paddd 0+128(%rbp),%xmm14 2737 paddd L$chacha20_consts(%rip),%xmm1 2738 paddd 0+48(%rbp),%xmm5 2739 paddd 0+64(%rbp),%xmm9 2740 paddd 0+112(%rbp),%xmm13 2741 paddd L$chacha20_consts(%rip),%xmm0 2742 paddd 0+48(%rbp),%xmm4 2743 paddd 0+64(%rbp),%xmm8 2744 paddd 0+96(%rbp),%xmm12 2745 2746 movdqa %xmm14,0+80(%rbp) 2747 movdqa %xmm14,0+80(%rbp) 2748 movdqu 0 + 0(%rsi),%xmm14 2749 pxor %xmm3,%xmm14 2750 movdqu %xmm14,0 + 0(%rdi) 2751 movdqu 16 + 0(%rsi),%xmm14 2752 pxor %xmm7,%xmm14 2753 movdqu %xmm14,16 + 0(%rdi) 2754 movdqu 32 + 0(%rsi),%xmm14 2755 pxor %xmm11,%xmm14 2756 movdqu %xmm14,32 + 0(%rdi) 2757 movdqu 48 + 0(%rsi),%xmm14 2758 pxor %xmm15,%xmm14 2759 movdqu %xmm14,48 + 0(%rdi) 2760 2761 movdqa 0+80(%rbp),%xmm14 2762 movdqu 0 + 64(%rsi),%xmm3 2763 movdqu 16 + 64(%rsi),%xmm7 2764 movdqu 32 + 64(%rsi),%xmm11 2765 movdqu 48 + 64(%rsi),%xmm15 2766 pxor %xmm3,%xmm2 2767 pxor %xmm7,%xmm6 2768 pxor %xmm11,%xmm10 2769 pxor %xmm14,%xmm15 2770 movdqu %xmm2,0 + 64(%rdi) 2771 movdqu %xmm6,16 + 64(%rdi) 2772 movdqu %xmm10,32 + 64(%rdi) 2773 movdqu %xmm15,48 + 64(%rdi) 2774 movdqu 0 + 128(%rsi),%xmm3 2775 movdqu 16 + 128(%rsi),%xmm7 2776 movdqu 32 + 128(%rsi),%xmm11 2777 movdqu 48 + 128(%rsi),%xmm15 2778 pxor %xmm3,%xmm1 2779 pxor %xmm7,%xmm5 2780 pxor %xmm11,%xmm9 2781 pxor %xmm13,%xmm15 2782 movdqu %xmm1,0 + 128(%rdi) 2783 movdqu %xmm5,16 + 128(%rdi) 2784 movdqu %xmm9,32 + 128(%rdi) 2785 movdqu %xmm15,48 + 128(%rdi) 2786 2787 cmpq $256,%rbx 2788 ja L$seal_sse_main_loop_xor 2789 2790 movq $192,%rcx 2791 subq $192,%rbx 2792 leaq 192(%rsi),%rsi 2793 jmp L$seal_sse_128_tail_hash 2794L$seal_sse_main_loop_xor: 2795 movdqu 0 + 192(%rsi),%xmm3 2796 movdqu 16 + 192(%rsi),%xmm7 2797 movdqu 32 + 192(%rsi),%xmm11 2798 movdqu 48 + 192(%rsi),%xmm15 2799 pxor %xmm3,%xmm0 2800 pxor %xmm7,%xmm4 2801 pxor %xmm11,%xmm8 2802 pxor %xmm12,%xmm15 2803 movdqu %xmm0,0 + 192(%rdi) 2804 movdqu %xmm4,16 + 192(%rdi) 2805 movdqu %xmm8,32 + 192(%rdi) 2806 movdqu %xmm15,48 + 192(%rdi) 2807 2808 leaq 256(%rsi),%rsi 2809 subq $256,%rbx 2810 movq $6,%rcx 2811 movq $4,%r8 2812 cmpq $192,%rbx 2813 jg L$seal_sse_main_loop 2814 movq %rbx,%rcx 2815 testq %rbx,%rbx 2816 je L$seal_sse_128_tail_hash 2817 movq $6,%rcx 2818 cmpq $128,%rbx 2819 ja L$seal_sse_tail_192 2820 cmpq $64,%rbx 2821 ja L$seal_sse_tail_128 2822 2823L$seal_sse_tail_64: 2824 movdqa L$chacha20_consts(%rip),%xmm0 2825 movdqa 0+48(%rbp),%xmm4 2826 movdqa 0+64(%rbp),%xmm8 2827 movdqa 0+96(%rbp),%xmm12 2828 paddd L$sse_inc(%rip),%xmm12 2829 movdqa %xmm12,0+96(%rbp) 2830 2831L$seal_sse_tail_64_rounds_and_x2hash: 2832 addq 0+0(%rdi),%r10 2833 adcq 8+0(%rdi),%r11 2834 adcq $1,%r12 2835 movq 0+0+0(%rbp),%rax 2836 movq %rax,%r15 2837 mulq %r10 2838 movq %rax,%r13 2839 movq %rdx,%r14 2840 movq 0+0+0(%rbp),%rax 2841 mulq %r11 2842 imulq %r12,%r15 2843 addq %rax,%r14 2844 adcq %rdx,%r15 2845 movq 8+0+0(%rbp),%rax 2846 movq %rax,%r9 2847 mulq %r10 2848 addq %rax,%r14 2849 adcq $0,%rdx 2850 movq %rdx,%r10 2851 movq 8+0+0(%rbp),%rax 2852 mulq %r11 2853 addq %rax,%r15 2854 adcq $0,%rdx 2855 imulq %r12,%r9 2856 addq %r10,%r15 2857 adcq %rdx,%r9 2858 movq %r13,%r10 2859 movq %r14,%r11 2860 movq %r15,%r12 2861 andq $3,%r12 2862 movq %r15,%r13 2863 andq $-4,%r13 2864 movq %r9,%r14 2865 shrdq $2,%r9,%r15 2866 shrq $2,%r9 2867 addq %r13,%r15 2868 adcq %r14,%r9 2869 addq %r15,%r10 2870 adcq %r9,%r11 2871 adcq $0,%r12 2872 2873 leaq 16(%rdi),%rdi 2874L$seal_sse_tail_64_rounds_and_x1hash: 2875 paddd %xmm4,%xmm0 2876 pxor %xmm0,%xmm12 2877 pshufb L$rol16(%rip),%xmm12 2878 paddd %xmm12,%xmm8 2879 pxor %xmm8,%xmm4 2880 movdqa %xmm4,%xmm3 2881 pslld $12,%xmm3 2882 psrld $20,%xmm4 2883 pxor %xmm3,%xmm4 2884 paddd %xmm4,%xmm0 2885 pxor %xmm0,%xmm12 2886 pshufb L$rol8(%rip),%xmm12 2887 paddd %xmm12,%xmm8 2888 pxor %xmm8,%xmm4 2889 movdqa %xmm4,%xmm3 2890 pslld $7,%xmm3 2891 psrld $25,%xmm4 2892 pxor %xmm3,%xmm4 2893.byte 102,15,58,15,228,4 2894.byte 102,69,15,58,15,192,8 2895.byte 102,69,15,58,15,228,12 2896 paddd %xmm4,%xmm0 2897 pxor %xmm0,%xmm12 2898 pshufb L$rol16(%rip),%xmm12 2899 paddd %xmm12,%xmm8 2900 pxor %xmm8,%xmm4 2901 movdqa %xmm4,%xmm3 2902 pslld $12,%xmm3 2903 psrld $20,%xmm4 2904 pxor %xmm3,%xmm4 2905 paddd %xmm4,%xmm0 2906 pxor %xmm0,%xmm12 2907 pshufb L$rol8(%rip),%xmm12 2908 paddd %xmm12,%xmm8 2909 pxor %xmm8,%xmm4 2910 movdqa %xmm4,%xmm3 2911 pslld $7,%xmm3 2912 psrld $25,%xmm4 2913 pxor %xmm3,%xmm4 2914.byte 102,15,58,15,228,12 2915.byte 102,69,15,58,15,192,8 2916.byte 102,69,15,58,15,228,4 2917 addq 0+0(%rdi),%r10 2918 adcq 8+0(%rdi),%r11 2919 adcq $1,%r12 2920 movq 0+0+0(%rbp),%rax 2921 movq %rax,%r15 2922 mulq %r10 2923 movq %rax,%r13 2924 movq %rdx,%r14 2925 movq 0+0+0(%rbp),%rax 2926 mulq %r11 2927 imulq %r12,%r15 2928 addq %rax,%r14 2929 adcq %rdx,%r15 2930 movq 8+0+0(%rbp),%rax 2931 movq %rax,%r9 2932 mulq %r10 2933 addq %rax,%r14 2934 adcq $0,%rdx 2935 movq %rdx,%r10 2936 movq 8+0+0(%rbp),%rax 2937 mulq %r11 2938 addq %rax,%r15 2939 adcq $0,%rdx 2940 imulq %r12,%r9 2941 addq %r10,%r15 2942 adcq %rdx,%r9 2943 movq %r13,%r10 2944 movq %r14,%r11 2945 movq %r15,%r12 2946 andq $3,%r12 2947 movq %r15,%r13 2948 andq $-4,%r13 2949 movq %r9,%r14 2950 shrdq $2,%r9,%r15 2951 shrq $2,%r9 2952 addq %r13,%r15 2953 adcq %r14,%r9 2954 addq %r15,%r10 2955 adcq %r9,%r11 2956 adcq $0,%r12 2957 2958 leaq 16(%rdi),%rdi 2959 decq %rcx 2960 jg L$seal_sse_tail_64_rounds_and_x2hash 2961 decq %r8 2962 jge L$seal_sse_tail_64_rounds_and_x1hash 2963 paddd L$chacha20_consts(%rip),%xmm0 2964 paddd 0+48(%rbp),%xmm4 2965 paddd 0+64(%rbp),%xmm8 2966 paddd 0+96(%rbp),%xmm12 2967 2968 jmp L$seal_sse_128_tail_xor 2969 2970L$seal_sse_tail_128: 2971 movdqa L$chacha20_consts(%rip),%xmm0 2972 movdqa 0+48(%rbp),%xmm4 2973 movdqa 0+64(%rbp),%xmm8 2974 movdqa %xmm0,%xmm1 2975 movdqa %xmm4,%xmm5 2976 movdqa %xmm8,%xmm9 2977 movdqa 0+96(%rbp),%xmm13 2978 paddd L$sse_inc(%rip),%xmm13 2979 movdqa %xmm13,%xmm12 2980 paddd L$sse_inc(%rip),%xmm12 2981 movdqa %xmm12,0+96(%rbp) 2982 movdqa %xmm13,0+112(%rbp) 2983 2984L$seal_sse_tail_128_rounds_and_x2hash: 2985 addq 0+0(%rdi),%r10 2986 adcq 8+0(%rdi),%r11 2987 adcq $1,%r12 2988 movq 0+0+0(%rbp),%rax 2989 movq %rax,%r15 2990 mulq %r10 2991 movq %rax,%r13 2992 movq %rdx,%r14 2993 movq 0+0+0(%rbp),%rax 2994 mulq %r11 2995 imulq %r12,%r15 2996 addq %rax,%r14 2997 adcq %rdx,%r15 2998 movq 8+0+0(%rbp),%rax 2999 movq %rax,%r9 3000 mulq %r10 3001 addq %rax,%r14 3002 adcq $0,%rdx 3003 movq %rdx,%r10 3004 movq 8+0+0(%rbp),%rax 3005 mulq %r11 3006 addq %rax,%r15 3007 adcq $0,%rdx 3008 imulq %r12,%r9 3009 addq %r10,%r15 3010 adcq %rdx,%r9 3011 movq %r13,%r10 3012 movq %r14,%r11 3013 movq %r15,%r12 3014 andq $3,%r12 3015 movq %r15,%r13 3016 andq $-4,%r13 3017 movq %r9,%r14 3018 shrdq $2,%r9,%r15 3019 shrq $2,%r9 3020 addq %r13,%r15 3021 adcq %r14,%r9 3022 addq %r15,%r10 3023 adcq %r9,%r11 3024 adcq $0,%r12 3025 3026 leaq 16(%rdi),%rdi 3027L$seal_sse_tail_128_rounds_and_x1hash: 3028 paddd %xmm4,%xmm0 3029 pxor %xmm0,%xmm12 3030 pshufb L$rol16(%rip),%xmm12 3031 paddd %xmm12,%xmm8 3032 pxor %xmm8,%xmm4 3033 movdqa %xmm4,%xmm3 3034 pslld $12,%xmm3 3035 psrld $20,%xmm4 3036 pxor %xmm3,%xmm4 3037 paddd %xmm4,%xmm0 3038 pxor %xmm0,%xmm12 3039 pshufb L$rol8(%rip),%xmm12 3040 paddd %xmm12,%xmm8 3041 pxor %xmm8,%xmm4 3042 movdqa %xmm4,%xmm3 3043 pslld $7,%xmm3 3044 psrld $25,%xmm4 3045 pxor %xmm3,%xmm4 3046.byte 102,15,58,15,228,4 3047.byte 102,69,15,58,15,192,8 3048.byte 102,69,15,58,15,228,12 3049 paddd %xmm5,%xmm1 3050 pxor %xmm1,%xmm13 3051 pshufb L$rol16(%rip),%xmm13 3052 paddd %xmm13,%xmm9 3053 pxor %xmm9,%xmm5 3054 movdqa %xmm5,%xmm3 3055 pslld $12,%xmm3 3056 psrld $20,%xmm5 3057 pxor %xmm3,%xmm5 3058 paddd %xmm5,%xmm1 3059 pxor %xmm1,%xmm13 3060 pshufb L$rol8(%rip),%xmm13 3061 paddd %xmm13,%xmm9 3062 pxor %xmm9,%xmm5 3063 movdqa %xmm5,%xmm3 3064 pslld $7,%xmm3 3065 psrld $25,%xmm5 3066 pxor %xmm3,%xmm5 3067.byte 102,15,58,15,237,4 3068.byte 102,69,15,58,15,201,8 3069.byte 102,69,15,58,15,237,12 3070 addq 0+0(%rdi),%r10 3071 adcq 8+0(%rdi),%r11 3072 adcq $1,%r12 3073 movq 0+0+0(%rbp),%rax 3074 movq %rax,%r15 3075 mulq %r10 3076 movq %rax,%r13 3077 movq %rdx,%r14 3078 movq 0+0+0(%rbp),%rax 3079 mulq %r11 3080 imulq %r12,%r15 3081 addq %rax,%r14 3082 adcq %rdx,%r15 3083 movq 8+0+0(%rbp),%rax 3084 movq %rax,%r9 3085 mulq %r10 3086 addq %rax,%r14 3087 adcq $0,%rdx 3088 movq %rdx,%r10 3089 movq 8+0+0(%rbp),%rax 3090 mulq %r11 3091 addq %rax,%r15 3092 adcq $0,%rdx 3093 imulq %r12,%r9 3094 addq %r10,%r15 3095 adcq %rdx,%r9 3096 movq %r13,%r10 3097 movq %r14,%r11 3098 movq %r15,%r12 3099 andq $3,%r12 3100 movq %r15,%r13 3101 andq $-4,%r13 3102 movq %r9,%r14 3103 shrdq $2,%r9,%r15 3104 shrq $2,%r9 3105 addq %r13,%r15 3106 adcq %r14,%r9 3107 addq %r15,%r10 3108 adcq %r9,%r11 3109 adcq $0,%r12 3110 paddd %xmm4,%xmm0 3111 pxor %xmm0,%xmm12 3112 pshufb L$rol16(%rip),%xmm12 3113 paddd %xmm12,%xmm8 3114 pxor %xmm8,%xmm4 3115 movdqa %xmm4,%xmm3 3116 pslld $12,%xmm3 3117 psrld $20,%xmm4 3118 pxor %xmm3,%xmm4 3119 paddd %xmm4,%xmm0 3120 pxor %xmm0,%xmm12 3121 pshufb L$rol8(%rip),%xmm12 3122 paddd %xmm12,%xmm8 3123 pxor %xmm8,%xmm4 3124 movdqa %xmm4,%xmm3 3125 pslld $7,%xmm3 3126 psrld $25,%xmm4 3127 pxor %xmm3,%xmm4 3128.byte 102,15,58,15,228,12 3129.byte 102,69,15,58,15,192,8 3130.byte 102,69,15,58,15,228,4 3131 paddd %xmm5,%xmm1 3132 pxor %xmm1,%xmm13 3133 pshufb L$rol16(%rip),%xmm13 3134 paddd %xmm13,%xmm9 3135 pxor %xmm9,%xmm5 3136 movdqa %xmm5,%xmm3 3137 pslld $12,%xmm3 3138 psrld $20,%xmm5 3139 pxor %xmm3,%xmm5 3140 paddd %xmm5,%xmm1 3141 pxor %xmm1,%xmm13 3142 pshufb L$rol8(%rip),%xmm13 3143 paddd %xmm13,%xmm9 3144 pxor %xmm9,%xmm5 3145 movdqa %xmm5,%xmm3 3146 pslld $7,%xmm3 3147 psrld $25,%xmm5 3148 pxor %xmm3,%xmm5 3149.byte 102,15,58,15,237,12 3150.byte 102,69,15,58,15,201,8 3151.byte 102,69,15,58,15,237,4 3152 3153 leaq 16(%rdi),%rdi 3154 decq %rcx 3155 jg L$seal_sse_tail_128_rounds_and_x2hash 3156 decq %r8 3157 jge L$seal_sse_tail_128_rounds_and_x1hash 3158 paddd L$chacha20_consts(%rip),%xmm1 3159 paddd 0+48(%rbp),%xmm5 3160 paddd 0+64(%rbp),%xmm9 3161 paddd 0+112(%rbp),%xmm13 3162 paddd L$chacha20_consts(%rip),%xmm0 3163 paddd 0+48(%rbp),%xmm4 3164 paddd 0+64(%rbp),%xmm8 3165 paddd 0+96(%rbp),%xmm12 3166 movdqu 0 + 0(%rsi),%xmm3 3167 movdqu 16 + 0(%rsi),%xmm7 3168 movdqu 32 + 0(%rsi),%xmm11 3169 movdqu 48 + 0(%rsi),%xmm15 3170 pxor %xmm3,%xmm1 3171 pxor %xmm7,%xmm5 3172 pxor %xmm11,%xmm9 3173 pxor %xmm13,%xmm15 3174 movdqu %xmm1,0 + 0(%rdi) 3175 movdqu %xmm5,16 + 0(%rdi) 3176 movdqu %xmm9,32 + 0(%rdi) 3177 movdqu %xmm15,48 + 0(%rdi) 3178 3179 movq $64,%rcx 3180 subq $64,%rbx 3181 leaq 64(%rsi),%rsi 3182 jmp L$seal_sse_128_tail_hash 3183 3184L$seal_sse_tail_192: 3185 movdqa L$chacha20_consts(%rip),%xmm0 3186 movdqa 0+48(%rbp),%xmm4 3187 movdqa 0+64(%rbp),%xmm8 3188 movdqa %xmm0,%xmm1 3189 movdqa %xmm4,%xmm5 3190 movdqa %xmm8,%xmm9 3191 movdqa %xmm0,%xmm2 3192 movdqa %xmm4,%xmm6 3193 movdqa %xmm8,%xmm10 3194 movdqa 0+96(%rbp),%xmm14 3195 paddd L$sse_inc(%rip),%xmm14 3196 movdqa %xmm14,%xmm13 3197 paddd L$sse_inc(%rip),%xmm13 3198 movdqa %xmm13,%xmm12 3199 paddd L$sse_inc(%rip),%xmm12 3200 movdqa %xmm12,0+96(%rbp) 3201 movdqa %xmm13,0+112(%rbp) 3202 movdqa %xmm14,0+128(%rbp) 3203 3204L$seal_sse_tail_192_rounds_and_x2hash: 3205 addq 0+0(%rdi),%r10 3206 adcq 8+0(%rdi),%r11 3207 adcq $1,%r12 3208 movq 0+0+0(%rbp),%rax 3209 movq %rax,%r15 3210 mulq %r10 3211 movq %rax,%r13 3212 movq %rdx,%r14 3213 movq 0+0+0(%rbp),%rax 3214 mulq %r11 3215 imulq %r12,%r15 3216 addq %rax,%r14 3217 adcq %rdx,%r15 3218 movq 8+0+0(%rbp),%rax 3219 movq %rax,%r9 3220 mulq %r10 3221 addq %rax,%r14 3222 adcq $0,%rdx 3223 movq %rdx,%r10 3224 movq 8+0+0(%rbp),%rax 3225 mulq %r11 3226 addq %rax,%r15 3227 adcq $0,%rdx 3228 imulq %r12,%r9 3229 addq %r10,%r15 3230 adcq %rdx,%r9 3231 movq %r13,%r10 3232 movq %r14,%r11 3233 movq %r15,%r12 3234 andq $3,%r12 3235 movq %r15,%r13 3236 andq $-4,%r13 3237 movq %r9,%r14 3238 shrdq $2,%r9,%r15 3239 shrq $2,%r9 3240 addq %r13,%r15 3241 adcq %r14,%r9 3242 addq %r15,%r10 3243 adcq %r9,%r11 3244 adcq $0,%r12 3245 3246 leaq 16(%rdi),%rdi 3247L$seal_sse_tail_192_rounds_and_x1hash: 3248 paddd %xmm4,%xmm0 3249 pxor %xmm0,%xmm12 3250 pshufb L$rol16(%rip),%xmm12 3251 paddd %xmm12,%xmm8 3252 pxor %xmm8,%xmm4 3253 movdqa %xmm4,%xmm3 3254 pslld $12,%xmm3 3255 psrld $20,%xmm4 3256 pxor %xmm3,%xmm4 3257 paddd %xmm4,%xmm0 3258 pxor %xmm0,%xmm12 3259 pshufb L$rol8(%rip),%xmm12 3260 paddd %xmm12,%xmm8 3261 pxor %xmm8,%xmm4 3262 movdqa %xmm4,%xmm3 3263 pslld $7,%xmm3 3264 psrld $25,%xmm4 3265 pxor %xmm3,%xmm4 3266.byte 102,15,58,15,228,4 3267.byte 102,69,15,58,15,192,8 3268.byte 102,69,15,58,15,228,12 3269 paddd %xmm5,%xmm1 3270 pxor %xmm1,%xmm13 3271 pshufb L$rol16(%rip),%xmm13 3272 paddd %xmm13,%xmm9 3273 pxor %xmm9,%xmm5 3274 movdqa %xmm5,%xmm3 3275 pslld $12,%xmm3 3276 psrld $20,%xmm5 3277 pxor %xmm3,%xmm5 3278 paddd %xmm5,%xmm1 3279 pxor %xmm1,%xmm13 3280 pshufb L$rol8(%rip),%xmm13 3281 paddd %xmm13,%xmm9 3282 pxor %xmm9,%xmm5 3283 movdqa %xmm5,%xmm3 3284 pslld $7,%xmm3 3285 psrld $25,%xmm5 3286 pxor %xmm3,%xmm5 3287.byte 102,15,58,15,237,4 3288.byte 102,69,15,58,15,201,8 3289.byte 102,69,15,58,15,237,12 3290 paddd %xmm6,%xmm2 3291 pxor %xmm2,%xmm14 3292 pshufb L$rol16(%rip),%xmm14 3293 paddd %xmm14,%xmm10 3294 pxor %xmm10,%xmm6 3295 movdqa %xmm6,%xmm3 3296 pslld $12,%xmm3 3297 psrld $20,%xmm6 3298 pxor %xmm3,%xmm6 3299 paddd %xmm6,%xmm2 3300 pxor %xmm2,%xmm14 3301 pshufb L$rol8(%rip),%xmm14 3302 paddd %xmm14,%xmm10 3303 pxor %xmm10,%xmm6 3304 movdqa %xmm6,%xmm3 3305 pslld $7,%xmm3 3306 psrld $25,%xmm6 3307 pxor %xmm3,%xmm6 3308.byte 102,15,58,15,246,4 3309.byte 102,69,15,58,15,210,8 3310.byte 102,69,15,58,15,246,12 3311 addq 0+0(%rdi),%r10 3312 adcq 8+0(%rdi),%r11 3313 adcq $1,%r12 3314 movq 0+0+0(%rbp),%rax 3315 movq %rax,%r15 3316 mulq %r10 3317 movq %rax,%r13 3318 movq %rdx,%r14 3319 movq 0+0+0(%rbp),%rax 3320 mulq %r11 3321 imulq %r12,%r15 3322 addq %rax,%r14 3323 adcq %rdx,%r15 3324 movq 8+0+0(%rbp),%rax 3325 movq %rax,%r9 3326 mulq %r10 3327 addq %rax,%r14 3328 adcq $0,%rdx 3329 movq %rdx,%r10 3330 movq 8+0+0(%rbp),%rax 3331 mulq %r11 3332 addq %rax,%r15 3333 adcq $0,%rdx 3334 imulq %r12,%r9 3335 addq %r10,%r15 3336 adcq %rdx,%r9 3337 movq %r13,%r10 3338 movq %r14,%r11 3339 movq %r15,%r12 3340 andq $3,%r12 3341 movq %r15,%r13 3342 andq $-4,%r13 3343 movq %r9,%r14 3344 shrdq $2,%r9,%r15 3345 shrq $2,%r9 3346 addq %r13,%r15 3347 adcq %r14,%r9 3348 addq %r15,%r10 3349 adcq %r9,%r11 3350 adcq $0,%r12 3351 paddd %xmm4,%xmm0 3352 pxor %xmm0,%xmm12 3353 pshufb L$rol16(%rip),%xmm12 3354 paddd %xmm12,%xmm8 3355 pxor %xmm8,%xmm4 3356 movdqa %xmm4,%xmm3 3357 pslld $12,%xmm3 3358 psrld $20,%xmm4 3359 pxor %xmm3,%xmm4 3360 paddd %xmm4,%xmm0 3361 pxor %xmm0,%xmm12 3362 pshufb L$rol8(%rip),%xmm12 3363 paddd %xmm12,%xmm8 3364 pxor %xmm8,%xmm4 3365 movdqa %xmm4,%xmm3 3366 pslld $7,%xmm3 3367 psrld $25,%xmm4 3368 pxor %xmm3,%xmm4 3369.byte 102,15,58,15,228,12 3370.byte 102,69,15,58,15,192,8 3371.byte 102,69,15,58,15,228,4 3372 paddd %xmm5,%xmm1 3373 pxor %xmm1,%xmm13 3374 pshufb L$rol16(%rip),%xmm13 3375 paddd %xmm13,%xmm9 3376 pxor %xmm9,%xmm5 3377 movdqa %xmm5,%xmm3 3378 pslld $12,%xmm3 3379 psrld $20,%xmm5 3380 pxor %xmm3,%xmm5 3381 paddd %xmm5,%xmm1 3382 pxor %xmm1,%xmm13 3383 pshufb L$rol8(%rip),%xmm13 3384 paddd %xmm13,%xmm9 3385 pxor %xmm9,%xmm5 3386 movdqa %xmm5,%xmm3 3387 pslld $7,%xmm3 3388 psrld $25,%xmm5 3389 pxor %xmm3,%xmm5 3390.byte 102,15,58,15,237,12 3391.byte 102,69,15,58,15,201,8 3392.byte 102,69,15,58,15,237,4 3393 paddd %xmm6,%xmm2 3394 pxor %xmm2,%xmm14 3395 pshufb L$rol16(%rip),%xmm14 3396 paddd %xmm14,%xmm10 3397 pxor %xmm10,%xmm6 3398 movdqa %xmm6,%xmm3 3399 pslld $12,%xmm3 3400 psrld $20,%xmm6 3401 pxor %xmm3,%xmm6 3402 paddd %xmm6,%xmm2 3403 pxor %xmm2,%xmm14 3404 pshufb L$rol8(%rip),%xmm14 3405 paddd %xmm14,%xmm10 3406 pxor %xmm10,%xmm6 3407 movdqa %xmm6,%xmm3 3408 pslld $7,%xmm3 3409 psrld $25,%xmm6 3410 pxor %xmm3,%xmm6 3411.byte 102,15,58,15,246,12 3412.byte 102,69,15,58,15,210,8 3413.byte 102,69,15,58,15,246,4 3414 3415 leaq 16(%rdi),%rdi 3416 decq %rcx 3417 jg L$seal_sse_tail_192_rounds_and_x2hash 3418 decq %r8 3419 jge L$seal_sse_tail_192_rounds_and_x1hash 3420 paddd L$chacha20_consts(%rip),%xmm2 3421 paddd 0+48(%rbp),%xmm6 3422 paddd 0+64(%rbp),%xmm10 3423 paddd 0+128(%rbp),%xmm14 3424 paddd L$chacha20_consts(%rip),%xmm1 3425 paddd 0+48(%rbp),%xmm5 3426 paddd 0+64(%rbp),%xmm9 3427 paddd 0+112(%rbp),%xmm13 3428 paddd L$chacha20_consts(%rip),%xmm0 3429 paddd 0+48(%rbp),%xmm4 3430 paddd 0+64(%rbp),%xmm8 3431 paddd 0+96(%rbp),%xmm12 3432 movdqu 0 + 0(%rsi),%xmm3 3433 movdqu 16 + 0(%rsi),%xmm7 3434 movdqu 32 + 0(%rsi),%xmm11 3435 movdqu 48 + 0(%rsi),%xmm15 3436 pxor %xmm3,%xmm2 3437 pxor %xmm7,%xmm6 3438 pxor %xmm11,%xmm10 3439 pxor %xmm14,%xmm15 3440 movdqu %xmm2,0 + 0(%rdi) 3441 movdqu %xmm6,16 + 0(%rdi) 3442 movdqu %xmm10,32 + 0(%rdi) 3443 movdqu %xmm15,48 + 0(%rdi) 3444 movdqu 0 + 64(%rsi),%xmm3 3445 movdqu 16 + 64(%rsi),%xmm7 3446 movdqu 32 + 64(%rsi),%xmm11 3447 movdqu 48 + 64(%rsi),%xmm15 3448 pxor %xmm3,%xmm1 3449 pxor %xmm7,%xmm5 3450 pxor %xmm11,%xmm9 3451 pxor %xmm13,%xmm15 3452 movdqu %xmm1,0 + 64(%rdi) 3453 movdqu %xmm5,16 + 64(%rdi) 3454 movdqu %xmm9,32 + 64(%rdi) 3455 movdqu %xmm15,48 + 64(%rdi) 3456 3457 movq $128,%rcx 3458 subq $128,%rbx 3459 leaq 128(%rsi),%rsi 3460 3461L$seal_sse_128_tail_hash: 3462 cmpq $16,%rcx 3463 jb L$seal_sse_128_tail_xor 3464 addq 0+0(%rdi),%r10 3465 adcq 8+0(%rdi),%r11 3466 adcq $1,%r12 3467 movq 0+0+0(%rbp),%rax 3468 movq %rax,%r15 3469 mulq %r10 3470 movq %rax,%r13 3471 movq %rdx,%r14 3472 movq 0+0+0(%rbp),%rax 3473 mulq %r11 3474 imulq %r12,%r15 3475 addq %rax,%r14 3476 adcq %rdx,%r15 3477 movq 8+0+0(%rbp),%rax 3478 movq %rax,%r9 3479 mulq %r10 3480 addq %rax,%r14 3481 adcq $0,%rdx 3482 movq %rdx,%r10 3483 movq 8+0+0(%rbp),%rax 3484 mulq %r11 3485 addq %rax,%r15 3486 adcq $0,%rdx 3487 imulq %r12,%r9 3488 addq %r10,%r15 3489 adcq %rdx,%r9 3490 movq %r13,%r10 3491 movq %r14,%r11 3492 movq %r15,%r12 3493 andq $3,%r12 3494 movq %r15,%r13 3495 andq $-4,%r13 3496 movq %r9,%r14 3497 shrdq $2,%r9,%r15 3498 shrq $2,%r9 3499 addq %r13,%r15 3500 adcq %r14,%r9 3501 addq %r15,%r10 3502 adcq %r9,%r11 3503 adcq $0,%r12 3504 3505 subq $16,%rcx 3506 leaq 16(%rdi),%rdi 3507 jmp L$seal_sse_128_tail_hash 3508 3509L$seal_sse_128_tail_xor: 3510 cmpq $16,%rbx 3511 jb L$seal_sse_tail_16 3512 subq $16,%rbx 3513 3514 movdqu 0(%rsi),%xmm3 3515 pxor %xmm3,%xmm0 3516 movdqu %xmm0,0(%rdi) 3517 3518 addq 0(%rdi),%r10 3519 adcq 8(%rdi),%r11 3520 adcq $1,%r12 3521 leaq 16(%rsi),%rsi 3522 leaq 16(%rdi),%rdi 3523 movq 0+0+0(%rbp),%rax 3524 movq %rax,%r15 3525 mulq %r10 3526 movq %rax,%r13 3527 movq %rdx,%r14 3528 movq 0+0+0(%rbp),%rax 3529 mulq %r11 3530 imulq %r12,%r15 3531 addq %rax,%r14 3532 adcq %rdx,%r15 3533 movq 8+0+0(%rbp),%rax 3534 movq %rax,%r9 3535 mulq %r10 3536 addq %rax,%r14 3537 adcq $0,%rdx 3538 movq %rdx,%r10 3539 movq 8+0+0(%rbp),%rax 3540 mulq %r11 3541 addq %rax,%r15 3542 adcq $0,%rdx 3543 imulq %r12,%r9 3544 addq %r10,%r15 3545 adcq %rdx,%r9 3546 movq %r13,%r10 3547 movq %r14,%r11 3548 movq %r15,%r12 3549 andq $3,%r12 3550 movq %r15,%r13 3551 andq $-4,%r13 3552 movq %r9,%r14 3553 shrdq $2,%r9,%r15 3554 shrq $2,%r9 3555 addq %r13,%r15 3556 adcq %r14,%r9 3557 addq %r15,%r10 3558 adcq %r9,%r11 3559 adcq $0,%r12 3560 3561 3562 movdqa %xmm4,%xmm0 3563 movdqa %xmm8,%xmm4 3564 movdqa %xmm12,%xmm8 3565 movdqa %xmm1,%xmm12 3566 movdqa %xmm5,%xmm1 3567 movdqa %xmm9,%xmm5 3568 movdqa %xmm13,%xmm9 3569 jmp L$seal_sse_128_tail_xor 3570 3571L$seal_sse_tail_16: 3572 testq %rbx,%rbx 3573 jz L$process_blocks_of_extra_in 3574 3575 movq %rbx,%r8 3576 movq %rbx,%rcx 3577 leaq -1(%rsi,%rbx,1),%rsi 3578 pxor %xmm15,%xmm15 3579L$seal_sse_tail_16_compose: 3580 pslldq $1,%xmm15 3581 pinsrb $0,(%rsi),%xmm15 3582 leaq -1(%rsi),%rsi 3583 decq %rcx 3584 jne L$seal_sse_tail_16_compose 3585 3586 3587 pxor %xmm0,%xmm15 3588 3589 3590 movq %rbx,%rcx 3591 movdqu %xmm15,%xmm0 3592L$seal_sse_tail_16_extract: 3593 pextrb $0,%xmm0,(%rdi) 3594 psrldq $1,%xmm0 3595 addq $1,%rdi 3596 subq $1,%rcx 3597 jnz L$seal_sse_tail_16_extract 3598 3599 3600 3601 3602 3603 3604 3605 3606 movq 288 + 0 + 32(%rsp),%r9 3607 movq 56(%r9),%r14 3608 movq 48(%r9),%r13 3609 testq %r14,%r14 3610 jz L$process_partial_block 3611 3612 movq $16,%r15 3613 subq %rbx,%r15 3614 cmpq %r15,%r14 3615 3616 jge L$load_extra_in 3617 movq %r14,%r15 3618 3619L$load_extra_in: 3620 3621 3622 leaq -1(%r13,%r15,1),%rsi 3623 3624 3625 addq %r15,%r13 3626 subq %r15,%r14 3627 movq %r13,48(%r9) 3628 movq %r14,56(%r9) 3629 3630 3631 3632 addq %r15,%r8 3633 3634 3635 pxor %xmm11,%xmm11 3636L$load_extra_load_loop: 3637 pslldq $1,%xmm11 3638 pinsrb $0,(%rsi),%xmm11 3639 leaq -1(%rsi),%rsi 3640 subq $1,%r15 3641 jnz L$load_extra_load_loop 3642 3643 3644 3645 3646 movq %rbx,%r15 3647 3648L$load_extra_shift_loop: 3649 pslldq $1,%xmm11 3650 subq $1,%r15 3651 jnz L$load_extra_shift_loop 3652 3653 3654 3655 3656 leaq L$and_masks(%rip),%r15 3657 shlq $4,%rbx 3658 pand -16(%r15,%rbx,1),%xmm15 3659 3660 3661 por %xmm11,%xmm15 3662 3663 3664 3665.byte 102,77,15,126,253 3666 pextrq $1,%xmm15,%r14 3667 addq %r13,%r10 3668 adcq %r14,%r11 3669 adcq $1,%r12 3670 movq 0+0+0(%rbp),%rax 3671 movq %rax,%r15 3672 mulq %r10 3673 movq %rax,%r13 3674 movq %rdx,%r14 3675 movq 0+0+0(%rbp),%rax 3676 mulq %r11 3677 imulq %r12,%r15 3678 addq %rax,%r14 3679 adcq %rdx,%r15 3680 movq 8+0+0(%rbp),%rax 3681 movq %rax,%r9 3682 mulq %r10 3683 addq %rax,%r14 3684 adcq $0,%rdx 3685 movq %rdx,%r10 3686 movq 8+0+0(%rbp),%rax 3687 mulq %r11 3688 addq %rax,%r15 3689 adcq $0,%rdx 3690 imulq %r12,%r9 3691 addq %r10,%r15 3692 adcq %rdx,%r9 3693 movq %r13,%r10 3694 movq %r14,%r11 3695 movq %r15,%r12 3696 andq $3,%r12 3697 movq %r15,%r13 3698 andq $-4,%r13 3699 movq %r9,%r14 3700 shrdq $2,%r9,%r15 3701 shrq $2,%r9 3702 addq %r13,%r15 3703 adcq %r14,%r9 3704 addq %r15,%r10 3705 adcq %r9,%r11 3706 adcq $0,%r12 3707 3708 3709L$process_blocks_of_extra_in: 3710 3711 movq 288+32+0 (%rsp),%r9 3712 movq 48(%r9),%rsi 3713 movq 56(%r9),%r8 3714 movq %r8,%rcx 3715 shrq $4,%r8 3716 3717L$process_extra_hash_loop: 3718 jz process_extra_in_trailer 3719 addq 0+0(%rsi),%r10 3720 adcq 8+0(%rsi),%r11 3721 adcq $1,%r12 3722 movq 0+0+0(%rbp),%rax 3723 movq %rax,%r15 3724 mulq %r10 3725 movq %rax,%r13 3726 movq %rdx,%r14 3727 movq 0+0+0(%rbp),%rax 3728 mulq %r11 3729 imulq %r12,%r15 3730 addq %rax,%r14 3731 adcq %rdx,%r15 3732 movq 8+0+0(%rbp),%rax 3733 movq %rax,%r9 3734 mulq %r10 3735 addq %rax,%r14 3736 adcq $0,%rdx 3737 movq %rdx,%r10 3738 movq 8+0+0(%rbp),%rax 3739 mulq %r11 3740 addq %rax,%r15 3741 adcq $0,%rdx 3742 imulq %r12,%r9 3743 addq %r10,%r15 3744 adcq %rdx,%r9 3745 movq %r13,%r10 3746 movq %r14,%r11 3747 movq %r15,%r12 3748 andq $3,%r12 3749 movq %r15,%r13 3750 andq $-4,%r13 3751 movq %r9,%r14 3752 shrdq $2,%r9,%r15 3753 shrq $2,%r9 3754 addq %r13,%r15 3755 adcq %r14,%r9 3756 addq %r15,%r10 3757 adcq %r9,%r11 3758 adcq $0,%r12 3759 3760 leaq 16(%rsi),%rsi 3761 subq $1,%r8 3762 jmp L$process_extra_hash_loop 3763process_extra_in_trailer: 3764 andq $15,%rcx 3765 movq %rcx,%rbx 3766 jz L$do_length_block 3767 leaq -1(%rsi,%rcx,1),%rsi 3768 3769L$process_extra_in_trailer_load: 3770 pslldq $1,%xmm15 3771 pinsrb $0,(%rsi),%xmm15 3772 leaq -1(%rsi),%rsi 3773 subq $1,%rcx 3774 jnz L$process_extra_in_trailer_load 3775 3776L$process_partial_block: 3777 3778 leaq L$and_masks(%rip),%r15 3779 shlq $4,%rbx 3780 pand -16(%r15,%rbx,1),%xmm15 3781.byte 102,77,15,126,253 3782 pextrq $1,%xmm15,%r14 3783 addq %r13,%r10 3784 adcq %r14,%r11 3785 adcq $1,%r12 3786 movq 0+0+0(%rbp),%rax 3787 movq %rax,%r15 3788 mulq %r10 3789 movq %rax,%r13 3790 movq %rdx,%r14 3791 movq 0+0+0(%rbp),%rax 3792 mulq %r11 3793 imulq %r12,%r15 3794 addq %rax,%r14 3795 adcq %rdx,%r15 3796 movq 8+0+0(%rbp),%rax 3797 movq %rax,%r9 3798 mulq %r10 3799 addq %rax,%r14 3800 adcq $0,%rdx 3801 movq %rdx,%r10 3802 movq 8+0+0(%rbp),%rax 3803 mulq %r11 3804 addq %rax,%r15 3805 adcq $0,%rdx 3806 imulq %r12,%r9 3807 addq %r10,%r15 3808 adcq %rdx,%r9 3809 movq %r13,%r10 3810 movq %r14,%r11 3811 movq %r15,%r12 3812 andq $3,%r12 3813 movq %r15,%r13 3814 andq $-4,%r13 3815 movq %r9,%r14 3816 shrdq $2,%r9,%r15 3817 shrq $2,%r9 3818 addq %r13,%r15 3819 adcq %r14,%r9 3820 addq %r15,%r10 3821 adcq %r9,%r11 3822 adcq $0,%r12 3823 3824 3825L$do_length_block: 3826 addq 0+0+32(%rbp),%r10 3827 adcq 8+0+32(%rbp),%r11 3828 adcq $1,%r12 3829 movq 0+0+0(%rbp),%rax 3830 movq %rax,%r15 3831 mulq %r10 3832 movq %rax,%r13 3833 movq %rdx,%r14 3834 movq 0+0+0(%rbp),%rax 3835 mulq %r11 3836 imulq %r12,%r15 3837 addq %rax,%r14 3838 adcq %rdx,%r15 3839 movq 8+0+0(%rbp),%rax 3840 movq %rax,%r9 3841 mulq %r10 3842 addq %rax,%r14 3843 adcq $0,%rdx 3844 movq %rdx,%r10 3845 movq 8+0+0(%rbp),%rax 3846 mulq %r11 3847 addq %rax,%r15 3848 adcq $0,%rdx 3849 imulq %r12,%r9 3850 addq %r10,%r15 3851 adcq %rdx,%r9 3852 movq %r13,%r10 3853 movq %r14,%r11 3854 movq %r15,%r12 3855 andq $3,%r12 3856 movq %r15,%r13 3857 andq $-4,%r13 3858 movq %r9,%r14 3859 shrdq $2,%r9,%r15 3860 shrq $2,%r9 3861 addq %r13,%r15 3862 adcq %r14,%r9 3863 addq %r15,%r10 3864 adcq %r9,%r11 3865 adcq $0,%r12 3866 3867 3868 movq %r10,%r13 3869 movq %r11,%r14 3870 movq %r12,%r15 3871 subq $-5,%r10 3872 sbbq $-1,%r11 3873 sbbq $3,%r12 3874 cmovcq %r13,%r10 3875 cmovcq %r14,%r11 3876 cmovcq %r15,%r12 3877 3878 addq 0+0+16(%rbp),%r10 3879 adcq 8+0+16(%rbp),%r11 3880 3881 3882 addq $288 + 0 + 32,%rsp 3883 3884 3885 popq %r9 3886 3887 movq %r10,(%r9) 3888 movq %r11,8(%r9) 3889 popq %r15 3890 3891 popq %r14 3892 3893 popq %r13 3894 3895 popq %r12 3896 3897 popq %rbx 3898 3899 popq %rbp 3900 3901 .byte 0xf3,0xc3 3902 3903L$seal_sse_128: 3904 3905 movdqu L$chacha20_consts(%rip),%xmm0 3906 movdqa %xmm0,%xmm1 3907 movdqa %xmm0,%xmm2 3908 movdqu 0(%r9),%xmm4 3909 movdqa %xmm4,%xmm5 3910 movdqa %xmm4,%xmm6 3911 movdqu 16(%r9),%xmm8 3912 movdqa %xmm8,%xmm9 3913 movdqa %xmm8,%xmm10 3914 movdqu 32(%r9),%xmm14 3915 movdqa %xmm14,%xmm12 3916 paddd L$sse_inc(%rip),%xmm12 3917 movdqa %xmm12,%xmm13 3918 paddd L$sse_inc(%rip),%xmm13 3919 movdqa %xmm4,%xmm7 3920 movdqa %xmm8,%xmm11 3921 movdqa %xmm12,%xmm15 3922 movq $10,%r10 3923 3924L$seal_sse_128_rounds: 3925 paddd %xmm4,%xmm0 3926 pxor %xmm0,%xmm12 3927 pshufb L$rol16(%rip),%xmm12 3928 paddd %xmm12,%xmm8 3929 pxor %xmm8,%xmm4 3930 movdqa %xmm4,%xmm3 3931 pslld $12,%xmm3 3932 psrld $20,%xmm4 3933 pxor %xmm3,%xmm4 3934 paddd %xmm4,%xmm0 3935 pxor %xmm0,%xmm12 3936 pshufb L$rol8(%rip),%xmm12 3937 paddd %xmm12,%xmm8 3938 pxor %xmm8,%xmm4 3939 movdqa %xmm4,%xmm3 3940 pslld $7,%xmm3 3941 psrld $25,%xmm4 3942 pxor %xmm3,%xmm4 3943.byte 102,15,58,15,228,4 3944.byte 102,69,15,58,15,192,8 3945.byte 102,69,15,58,15,228,12 3946 paddd %xmm5,%xmm1 3947 pxor %xmm1,%xmm13 3948 pshufb L$rol16(%rip),%xmm13 3949 paddd %xmm13,%xmm9 3950 pxor %xmm9,%xmm5 3951 movdqa %xmm5,%xmm3 3952 pslld $12,%xmm3 3953 psrld $20,%xmm5 3954 pxor %xmm3,%xmm5 3955 paddd %xmm5,%xmm1 3956 pxor %xmm1,%xmm13 3957 pshufb L$rol8(%rip),%xmm13 3958 paddd %xmm13,%xmm9 3959 pxor %xmm9,%xmm5 3960 movdqa %xmm5,%xmm3 3961 pslld $7,%xmm3 3962 psrld $25,%xmm5 3963 pxor %xmm3,%xmm5 3964.byte 102,15,58,15,237,4 3965.byte 102,69,15,58,15,201,8 3966.byte 102,69,15,58,15,237,12 3967 paddd %xmm6,%xmm2 3968 pxor %xmm2,%xmm14 3969 pshufb L$rol16(%rip),%xmm14 3970 paddd %xmm14,%xmm10 3971 pxor %xmm10,%xmm6 3972 movdqa %xmm6,%xmm3 3973 pslld $12,%xmm3 3974 psrld $20,%xmm6 3975 pxor %xmm3,%xmm6 3976 paddd %xmm6,%xmm2 3977 pxor %xmm2,%xmm14 3978 pshufb L$rol8(%rip),%xmm14 3979 paddd %xmm14,%xmm10 3980 pxor %xmm10,%xmm6 3981 movdqa %xmm6,%xmm3 3982 pslld $7,%xmm3 3983 psrld $25,%xmm6 3984 pxor %xmm3,%xmm6 3985.byte 102,15,58,15,246,4 3986.byte 102,69,15,58,15,210,8 3987.byte 102,69,15,58,15,246,12 3988 paddd %xmm4,%xmm0 3989 pxor %xmm0,%xmm12 3990 pshufb L$rol16(%rip),%xmm12 3991 paddd %xmm12,%xmm8 3992 pxor %xmm8,%xmm4 3993 movdqa %xmm4,%xmm3 3994 pslld $12,%xmm3 3995 psrld $20,%xmm4 3996 pxor %xmm3,%xmm4 3997 paddd %xmm4,%xmm0 3998 pxor %xmm0,%xmm12 3999 pshufb L$rol8(%rip),%xmm12 4000 paddd %xmm12,%xmm8 4001 pxor %xmm8,%xmm4 4002 movdqa %xmm4,%xmm3 4003 pslld $7,%xmm3 4004 psrld $25,%xmm4 4005 pxor %xmm3,%xmm4 4006.byte 102,15,58,15,228,12 4007.byte 102,69,15,58,15,192,8 4008.byte 102,69,15,58,15,228,4 4009 paddd %xmm5,%xmm1 4010 pxor %xmm1,%xmm13 4011 pshufb L$rol16(%rip),%xmm13 4012 paddd %xmm13,%xmm9 4013 pxor %xmm9,%xmm5 4014 movdqa %xmm5,%xmm3 4015 pslld $12,%xmm3 4016 psrld $20,%xmm5 4017 pxor %xmm3,%xmm5 4018 paddd %xmm5,%xmm1 4019 pxor %xmm1,%xmm13 4020 pshufb L$rol8(%rip),%xmm13 4021 paddd %xmm13,%xmm9 4022 pxor %xmm9,%xmm5 4023 movdqa %xmm5,%xmm3 4024 pslld $7,%xmm3 4025 psrld $25,%xmm5 4026 pxor %xmm3,%xmm5 4027.byte 102,15,58,15,237,12 4028.byte 102,69,15,58,15,201,8 4029.byte 102,69,15,58,15,237,4 4030 paddd %xmm6,%xmm2 4031 pxor %xmm2,%xmm14 4032 pshufb L$rol16(%rip),%xmm14 4033 paddd %xmm14,%xmm10 4034 pxor %xmm10,%xmm6 4035 movdqa %xmm6,%xmm3 4036 pslld $12,%xmm3 4037 psrld $20,%xmm6 4038 pxor %xmm3,%xmm6 4039 paddd %xmm6,%xmm2 4040 pxor %xmm2,%xmm14 4041 pshufb L$rol8(%rip),%xmm14 4042 paddd %xmm14,%xmm10 4043 pxor %xmm10,%xmm6 4044 movdqa %xmm6,%xmm3 4045 pslld $7,%xmm3 4046 psrld $25,%xmm6 4047 pxor %xmm3,%xmm6 4048.byte 102,15,58,15,246,12 4049.byte 102,69,15,58,15,210,8 4050.byte 102,69,15,58,15,246,4 4051 4052 decq %r10 4053 jnz L$seal_sse_128_rounds 4054 paddd L$chacha20_consts(%rip),%xmm0 4055 paddd L$chacha20_consts(%rip),%xmm1 4056 paddd L$chacha20_consts(%rip),%xmm2 4057 paddd %xmm7,%xmm4 4058 paddd %xmm7,%xmm5 4059 paddd %xmm7,%xmm6 4060 paddd %xmm11,%xmm8 4061 paddd %xmm11,%xmm9 4062 paddd %xmm15,%xmm12 4063 paddd L$sse_inc(%rip),%xmm15 4064 paddd %xmm15,%xmm13 4065 4066 pand L$clamp(%rip),%xmm2 4067 movdqa %xmm2,0+0(%rbp) 4068 movdqa %xmm6,0+16(%rbp) 4069 4070 movq %r8,%r8 4071 call poly_hash_ad_internal 4072 jmp L$seal_sse_128_tail_xor 4073 4074 4075 4076 4077 4078.p2align 6 4079chacha20_poly1305_open_avx2: 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 vzeroupper 4093 vmovdqa L$chacha20_consts(%rip),%ymm0 4094 vbroadcasti128 0(%r9),%ymm4 4095 vbroadcasti128 16(%r9),%ymm8 4096 vbroadcasti128 32(%r9),%ymm12 4097 vpaddd L$avx2_init(%rip),%ymm12,%ymm12 4098 cmpq $192,%rbx 4099 jbe L$open_avx2_192 4100 cmpq $320,%rbx 4101 jbe L$open_avx2_320 4102 4103 vmovdqa %ymm4,0+64(%rbp) 4104 vmovdqa %ymm8,0+96(%rbp) 4105 vmovdqa %ymm12,0+160(%rbp) 4106 movq $10,%r10 4107L$open_avx2_init_rounds: 4108 vpaddd %ymm4,%ymm0,%ymm0 4109 vpxor %ymm0,%ymm12,%ymm12 4110 vpshufb L$rol16(%rip),%ymm12,%ymm12 4111 vpaddd %ymm12,%ymm8,%ymm8 4112 vpxor %ymm8,%ymm4,%ymm4 4113 vpsrld $20,%ymm4,%ymm3 4114 vpslld $12,%ymm4,%ymm4 4115 vpxor %ymm3,%ymm4,%ymm4 4116 vpaddd %ymm4,%ymm0,%ymm0 4117 vpxor %ymm0,%ymm12,%ymm12 4118 vpshufb L$rol8(%rip),%ymm12,%ymm12 4119 vpaddd %ymm12,%ymm8,%ymm8 4120 vpxor %ymm8,%ymm4,%ymm4 4121 vpslld $7,%ymm4,%ymm3 4122 vpsrld $25,%ymm4,%ymm4 4123 vpxor %ymm3,%ymm4,%ymm4 4124 vpalignr $12,%ymm12,%ymm12,%ymm12 4125 vpalignr $8,%ymm8,%ymm8,%ymm8 4126 vpalignr $4,%ymm4,%ymm4,%ymm4 4127 vpaddd %ymm4,%ymm0,%ymm0 4128 vpxor %ymm0,%ymm12,%ymm12 4129 vpshufb L$rol16(%rip),%ymm12,%ymm12 4130 vpaddd %ymm12,%ymm8,%ymm8 4131 vpxor %ymm8,%ymm4,%ymm4 4132 vpsrld $20,%ymm4,%ymm3 4133 vpslld $12,%ymm4,%ymm4 4134 vpxor %ymm3,%ymm4,%ymm4 4135 vpaddd %ymm4,%ymm0,%ymm0 4136 vpxor %ymm0,%ymm12,%ymm12 4137 vpshufb L$rol8(%rip),%ymm12,%ymm12 4138 vpaddd %ymm12,%ymm8,%ymm8 4139 vpxor %ymm8,%ymm4,%ymm4 4140 vpslld $7,%ymm4,%ymm3 4141 vpsrld $25,%ymm4,%ymm4 4142 vpxor %ymm3,%ymm4,%ymm4 4143 vpalignr $4,%ymm12,%ymm12,%ymm12 4144 vpalignr $8,%ymm8,%ymm8,%ymm8 4145 vpalignr $12,%ymm4,%ymm4,%ymm4 4146 4147 decq %r10 4148 jne L$open_avx2_init_rounds 4149 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 4150 vpaddd 0+64(%rbp),%ymm4,%ymm4 4151 vpaddd 0+96(%rbp),%ymm8,%ymm8 4152 vpaddd 0+160(%rbp),%ymm12,%ymm12 4153 4154 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4155 4156 vpand L$clamp(%rip),%ymm3,%ymm3 4157 vmovdqa %ymm3,0+0(%rbp) 4158 4159 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 4160 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 4161 4162 movq %r8,%r8 4163 call poly_hash_ad_internal 4164 4165 xorq %rcx,%rcx 4166L$open_avx2_init_hash: 4167 addq 0+0(%rsi,%rcx,1),%r10 4168 adcq 8+0(%rsi,%rcx,1),%r11 4169 adcq $1,%r12 4170 movq 0+0+0(%rbp),%rax 4171 movq %rax,%r15 4172 mulq %r10 4173 movq %rax,%r13 4174 movq %rdx,%r14 4175 movq 0+0+0(%rbp),%rax 4176 mulq %r11 4177 imulq %r12,%r15 4178 addq %rax,%r14 4179 adcq %rdx,%r15 4180 movq 8+0+0(%rbp),%rax 4181 movq %rax,%r9 4182 mulq %r10 4183 addq %rax,%r14 4184 adcq $0,%rdx 4185 movq %rdx,%r10 4186 movq 8+0+0(%rbp),%rax 4187 mulq %r11 4188 addq %rax,%r15 4189 adcq $0,%rdx 4190 imulq %r12,%r9 4191 addq %r10,%r15 4192 adcq %rdx,%r9 4193 movq %r13,%r10 4194 movq %r14,%r11 4195 movq %r15,%r12 4196 andq $3,%r12 4197 movq %r15,%r13 4198 andq $-4,%r13 4199 movq %r9,%r14 4200 shrdq $2,%r9,%r15 4201 shrq $2,%r9 4202 addq %r13,%r15 4203 adcq %r14,%r9 4204 addq %r15,%r10 4205 adcq %r9,%r11 4206 adcq $0,%r12 4207 4208 addq $16,%rcx 4209 cmpq $64,%rcx 4210 jne L$open_avx2_init_hash 4211 4212 vpxor 0(%rsi),%ymm0,%ymm0 4213 vpxor 32(%rsi),%ymm4,%ymm4 4214 4215 vmovdqu %ymm0,0(%rdi) 4216 vmovdqu %ymm4,32(%rdi) 4217 leaq 64(%rsi),%rsi 4218 leaq 64(%rdi),%rdi 4219 subq $64,%rbx 4220L$open_avx2_main_loop: 4221 4222 cmpq $512,%rbx 4223 jb L$open_avx2_main_loop_done 4224 vmovdqa L$chacha20_consts(%rip),%ymm0 4225 vmovdqa 0+64(%rbp),%ymm4 4226 vmovdqa 0+96(%rbp),%ymm8 4227 vmovdqa %ymm0,%ymm1 4228 vmovdqa %ymm4,%ymm5 4229 vmovdqa %ymm8,%ymm9 4230 vmovdqa %ymm0,%ymm2 4231 vmovdqa %ymm4,%ymm6 4232 vmovdqa %ymm8,%ymm10 4233 vmovdqa %ymm0,%ymm3 4234 vmovdqa %ymm4,%ymm7 4235 vmovdqa %ymm8,%ymm11 4236 vmovdqa L$avx2_inc(%rip),%ymm12 4237 vpaddd 0+160(%rbp),%ymm12,%ymm15 4238 vpaddd %ymm15,%ymm12,%ymm14 4239 vpaddd %ymm14,%ymm12,%ymm13 4240 vpaddd %ymm13,%ymm12,%ymm12 4241 vmovdqa %ymm15,0+256(%rbp) 4242 vmovdqa %ymm14,0+224(%rbp) 4243 vmovdqa %ymm13,0+192(%rbp) 4244 vmovdqa %ymm12,0+160(%rbp) 4245 4246 xorq %rcx,%rcx 4247L$open_avx2_main_loop_rounds: 4248 addq 0+0(%rsi,%rcx,1),%r10 4249 adcq 8+0(%rsi,%rcx,1),%r11 4250 adcq $1,%r12 4251 vmovdqa %ymm8,0+128(%rbp) 4252 vmovdqa L$rol16(%rip),%ymm8 4253 vpaddd %ymm7,%ymm3,%ymm3 4254 vpaddd %ymm6,%ymm2,%ymm2 4255 vpaddd %ymm5,%ymm1,%ymm1 4256 vpaddd %ymm4,%ymm0,%ymm0 4257 vpxor %ymm3,%ymm15,%ymm15 4258 vpxor %ymm2,%ymm14,%ymm14 4259 vpxor %ymm1,%ymm13,%ymm13 4260 vpxor %ymm0,%ymm12,%ymm12 4261 movq 0+0+0(%rbp),%rdx 4262 movq %rdx,%r15 4263 mulxq %r10,%r13,%r14 4264 mulxq %r11,%rax,%rdx 4265 imulq %r12,%r15 4266 addq %rax,%r14 4267 adcq %rdx,%r15 4268 vpshufb %ymm8,%ymm15,%ymm15 4269 vpshufb %ymm8,%ymm14,%ymm14 4270 vpshufb %ymm8,%ymm13,%ymm13 4271 vpshufb %ymm8,%ymm12,%ymm12 4272 vpaddd %ymm15,%ymm11,%ymm11 4273 vpaddd %ymm14,%ymm10,%ymm10 4274 vpaddd %ymm13,%ymm9,%ymm9 4275 vpaddd 0+128(%rbp),%ymm12,%ymm8 4276 vpxor %ymm11,%ymm7,%ymm7 4277 movq 8+0+0(%rbp),%rdx 4278 mulxq %r10,%r10,%rax 4279 addq %r10,%r14 4280 mulxq %r11,%r11,%r9 4281 adcq %r11,%r15 4282 adcq $0,%r9 4283 imulq %r12,%rdx 4284 vpxor %ymm10,%ymm6,%ymm6 4285 vpxor %ymm9,%ymm5,%ymm5 4286 vpxor %ymm8,%ymm4,%ymm4 4287 vmovdqa %ymm8,0+128(%rbp) 4288 vpsrld $20,%ymm7,%ymm8 4289 vpslld $32-20,%ymm7,%ymm7 4290 vpxor %ymm8,%ymm7,%ymm7 4291 vpsrld $20,%ymm6,%ymm8 4292 vpslld $32-20,%ymm6,%ymm6 4293 vpxor %ymm8,%ymm6,%ymm6 4294 vpsrld $20,%ymm5,%ymm8 4295 vpslld $32-20,%ymm5,%ymm5 4296 addq %rax,%r15 4297 adcq %rdx,%r9 4298 vpxor %ymm8,%ymm5,%ymm5 4299 vpsrld $20,%ymm4,%ymm8 4300 vpslld $32-20,%ymm4,%ymm4 4301 vpxor %ymm8,%ymm4,%ymm4 4302 vmovdqa L$rol8(%rip),%ymm8 4303 vpaddd %ymm7,%ymm3,%ymm3 4304 vpaddd %ymm6,%ymm2,%ymm2 4305 vpaddd %ymm5,%ymm1,%ymm1 4306 vpaddd %ymm4,%ymm0,%ymm0 4307 vpxor %ymm3,%ymm15,%ymm15 4308 movq %r13,%r10 4309 movq %r14,%r11 4310 movq %r15,%r12 4311 andq $3,%r12 4312 movq %r15,%r13 4313 andq $-4,%r13 4314 movq %r9,%r14 4315 shrdq $2,%r9,%r15 4316 shrq $2,%r9 4317 addq %r13,%r15 4318 adcq %r14,%r9 4319 addq %r15,%r10 4320 adcq %r9,%r11 4321 adcq $0,%r12 4322 vpxor %ymm2,%ymm14,%ymm14 4323 vpxor %ymm1,%ymm13,%ymm13 4324 vpxor %ymm0,%ymm12,%ymm12 4325 vpshufb %ymm8,%ymm15,%ymm15 4326 vpshufb %ymm8,%ymm14,%ymm14 4327 vpshufb %ymm8,%ymm13,%ymm13 4328 vpshufb %ymm8,%ymm12,%ymm12 4329 vpaddd %ymm15,%ymm11,%ymm11 4330 vpaddd %ymm14,%ymm10,%ymm10 4331 addq 0+16(%rsi,%rcx,1),%r10 4332 adcq 8+16(%rsi,%rcx,1),%r11 4333 adcq $1,%r12 4334 vpaddd %ymm13,%ymm9,%ymm9 4335 vpaddd 0+128(%rbp),%ymm12,%ymm8 4336 vpxor %ymm11,%ymm7,%ymm7 4337 vpxor %ymm10,%ymm6,%ymm6 4338 vpxor %ymm9,%ymm5,%ymm5 4339 vpxor %ymm8,%ymm4,%ymm4 4340 vmovdqa %ymm8,0+128(%rbp) 4341 vpsrld $25,%ymm7,%ymm8 4342 movq 0+0+0(%rbp),%rdx 4343 movq %rdx,%r15 4344 mulxq %r10,%r13,%r14 4345 mulxq %r11,%rax,%rdx 4346 imulq %r12,%r15 4347 addq %rax,%r14 4348 adcq %rdx,%r15 4349 vpslld $32-25,%ymm7,%ymm7 4350 vpxor %ymm8,%ymm7,%ymm7 4351 vpsrld $25,%ymm6,%ymm8 4352 vpslld $32-25,%ymm6,%ymm6 4353 vpxor %ymm8,%ymm6,%ymm6 4354 vpsrld $25,%ymm5,%ymm8 4355 vpslld $32-25,%ymm5,%ymm5 4356 vpxor %ymm8,%ymm5,%ymm5 4357 vpsrld $25,%ymm4,%ymm8 4358 vpslld $32-25,%ymm4,%ymm4 4359 vpxor %ymm8,%ymm4,%ymm4 4360 vmovdqa 0+128(%rbp),%ymm8 4361 vpalignr $4,%ymm7,%ymm7,%ymm7 4362 vpalignr $8,%ymm11,%ymm11,%ymm11 4363 vpalignr $12,%ymm15,%ymm15,%ymm15 4364 vpalignr $4,%ymm6,%ymm6,%ymm6 4365 vpalignr $8,%ymm10,%ymm10,%ymm10 4366 vpalignr $12,%ymm14,%ymm14,%ymm14 4367 movq 8+0+0(%rbp),%rdx 4368 mulxq %r10,%r10,%rax 4369 addq %r10,%r14 4370 mulxq %r11,%r11,%r9 4371 adcq %r11,%r15 4372 adcq $0,%r9 4373 imulq %r12,%rdx 4374 vpalignr $4,%ymm5,%ymm5,%ymm5 4375 vpalignr $8,%ymm9,%ymm9,%ymm9 4376 vpalignr $12,%ymm13,%ymm13,%ymm13 4377 vpalignr $4,%ymm4,%ymm4,%ymm4 4378 vpalignr $8,%ymm8,%ymm8,%ymm8 4379 vpalignr $12,%ymm12,%ymm12,%ymm12 4380 vmovdqa %ymm8,0+128(%rbp) 4381 vmovdqa L$rol16(%rip),%ymm8 4382 vpaddd %ymm7,%ymm3,%ymm3 4383 vpaddd %ymm6,%ymm2,%ymm2 4384 vpaddd %ymm5,%ymm1,%ymm1 4385 vpaddd %ymm4,%ymm0,%ymm0 4386 vpxor %ymm3,%ymm15,%ymm15 4387 vpxor %ymm2,%ymm14,%ymm14 4388 vpxor %ymm1,%ymm13,%ymm13 4389 vpxor %ymm0,%ymm12,%ymm12 4390 vpshufb %ymm8,%ymm15,%ymm15 4391 vpshufb %ymm8,%ymm14,%ymm14 4392 addq %rax,%r15 4393 adcq %rdx,%r9 4394 vpshufb %ymm8,%ymm13,%ymm13 4395 vpshufb %ymm8,%ymm12,%ymm12 4396 vpaddd %ymm15,%ymm11,%ymm11 4397 vpaddd %ymm14,%ymm10,%ymm10 4398 vpaddd %ymm13,%ymm9,%ymm9 4399 vpaddd 0+128(%rbp),%ymm12,%ymm8 4400 vpxor %ymm11,%ymm7,%ymm7 4401 vpxor %ymm10,%ymm6,%ymm6 4402 vpxor %ymm9,%ymm5,%ymm5 4403 movq %r13,%r10 4404 movq %r14,%r11 4405 movq %r15,%r12 4406 andq $3,%r12 4407 movq %r15,%r13 4408 andq $-4,%r13 4409 movq %r9,%r14 4410 shrdq $2,%r9,%r15 4411 shrq $2,%r9 4412 addq %r13,%r15 4413 adcq %r14,%r9 4414 addq %r15,%r10 4415 adcq %r9,%r11 4416 adcq $0,%r12 4417 vpxor %ymm8,%ymm4,%ymm4 4418 vmovdqa %ymm8,0+128(%rbp) 4419 vpsrld $20,%ymm7,%ymm8 4420 vpslld $32-20,%ymm7,%ymm7 4421 vpxor %ymm8,%ymm7,%ymm7 4422 vpsrld $20,%ymm6,%ymm8 4423 vpslld $32-20,%ymm6,%ymm6 4424 vpxor %ymm8,%ymm6,%ymm6 4425 addq 0+32(%rsi,%rcx,1),%r10 4426 adcq 8+32(%rsi,%rcx,1),%r11 4427 adcq $1,%r12 4428 4429 leaq 48(%rcx),%rcx 4430 vpsrld $20,%ymm5,%ymm8 4431 vpslld $32-20,%ymm5,%ymm5 4432 vpxor %ymm8,%ymm5,%ymm5 4433 vpsrld $20,%ymm4,%ymm8 4434 vpslld $32-20,%ymm4,%ymm4 4435 vpxor %ymm8,%ymm4,%ymm4 4436 vmovdqa L$rol8(%rip),%ymm8 4437 vpaddd %ymm7,%ymm3,%ymm3 4438 vpaddd %ymm6,%ymm2,%ymm2 4439 vpaddd %ymm5,%ymm1,%ymm1 4440 vpaddd %ymm4,%ymm0,%ymm0 4441 vpxor %ymm3,%ymm15,%ymm15 4442 vpxor %ymm2,%ymm14,%ymm14 4443 vpxor %ymm1,%ymm13,%ymm13 4444 vpxor %ymm0,%ymm12,%ymm12 4445 vpshufb %ymm8,%ymm15,%ymm15 4446 vpshufb %ymm8,%ymm14,%ymm14 4447 vpshufb %ymm8,%ymm13,%ymm13 4448 movq 0+0+0(%rbp),%rdx 4449 movq %rdx,%r15 4450 mulxq %r10,%r13,%r14 4451 mulxq %r11,%rax,%rdx 4452 imulq %r12,%r15 4453 addq %rax,%r14 4454 adcq %rdx,%r15 4455 vpshufb %ymm8,%ymm12,%ymm12 4456 vpaddd %ymm15,%ymm11,%ymm11 4457 vpaddd %ymm14,%ymm10,%ymm10 4458 vpaddd %ymm13,%ymm9,%ymm9 4459 vpaddd 0+128(%rbp),%ymm12,%ymm8 4460 vpxor %ymm11,%ymm7,%ymm7 4461 vpxor %ymm10,%ymm6,%ymm6 4462 vpxor %ymm9,%ymm5,%ymm5 4463 movq 8+0+0(%rbp),%rdx 4464 mulxq %r10,%r10,%rax 4465 addq %r10,%r14 4466 mulxq %r11,%r11,%r9 4467 adcq %r11,%r15 4468 adcq $0,%r9 4469 imulq %r12,%rdx 4470 vpxor %ymm8,%ymm4,%ymm4 4471 vmovdqa %ymm8,0+128(%rbp) 4472 vpsrld $25,%ymm7,%ymm8 4473 vpslld $32-25,%ymm7,%ymm7 4474 vpxor %ymm8,%ymm7,%ymm7 4475 vpsrld $25,%ymm6,%ymm8 4476 vpslld $32-25,%ymm6,%ymm6 4477 vpxor %ymm8,%ymm6,%ymm6 4478 addq %rax,%r15 4479 adcq %rdx,%r9 4480 vpsrld $25,%ymm5,%ymm8 4481 vpslld $32-25,%ymm5,%ymm5 4482 vpxor %ymm8,%ymm5,%ymm5 4483 vpsrld $25,%ymm4,%ymm8 4484 vpslld $32-25,%ymm4,%ymm4 4485 vpxor %ymm8,%ymm4,%ymm4 4486 vmovdqa 0+128(%rbp),%ymm8 4487 vpalignr $12,%ymm7,%ymm7,%ymm7 4488 vpalignr $8,%ymm11,%ymm11,%ymm11 4489 vpalignr $4,%ymm15,%ymm15,%ymm15 4490 vpalignr $12,%ymm6,%ymm6,%ymm6 4491 vpalignr $8,%ymm10,%ymm10,%ymm10 4492 vpalignr $4,%ymm14,%ymm14,%ymm14 4493 vpalignr $12,%ymm5,%ymm5,%ymm5 4494 vpalignr $8,%ymm9,%ymm9,%ymm9 4495 vpalignr $4,%ymm13,%ymm13,%ymm13 4496 vpalignr $12,%ymm4,%ymm4,%ymm4 4497 vpalignr $8,%ymm8,%ymm8,%ymm8 4498 movq %r13,%r10 4499 movq %r14,%r11 4500 movq %r15,%r12 4501 andq $3,%r12 4502 movq %r15,%r13 4503 andq $-4,%r13 4504 movq %r9,%r14 4505 shrdq $2,%r9,%r15 4506 shrq $2,%r9 4507 addq %r13,%r15 4508 adcq %r14,%r9 4509 addq %r15,%r10 4510 adcq %r9,%r11 4511 adcq $0,%r12 4512 vpalignr $4,%ymm12,%ymm12,%ymm12 4513 4514 cmpq $60*8,%rcx 4515 jne L$open_avx2_main_loop_rounds 4516 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 4517 vpaddd 0+64(%rbp),%ymm7,%ymm7 4518 vpaddd 0+96(%rbp),%ymm11,%ymm11 4519 vpaddd 0+256(%rbp),%ymm15,%ymm15 4520 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 4521 vpaddd 0+64(%rbp),%ymm6,%ymm6 4522 vpaddd 0+96(%rbp),%ymm10,%ymm10 4523 vpaddd 0+224(%rbp),%ymm14,%ymm14 4524 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 4525 vpaddd 0+64(%rbp),%ymm5,%ymm5 4526 vpaddd 0+96(%rbp),%ymm9,%ymm9 4527 vpaddd 0+192(%rbp),%ymm13,%ymm13 4528 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 4529 vpaddd 0+64(%rbp),%ymm4,%ymm4 4530 vpaddd 0+96(%rbp),%ymm8,%ymm8 4531 vpaddd 0+160(%rbp),%ymm12,%ymm12 4532 4533 vmovdqa %ymm0,0+128(%rbp) 4534 addq 0+60*8(%rsi),%r10 4535 adcq 8+60*8(%rsi),%r11 4536 adcq $1,%r12 4537 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 4538 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 4539 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 4540 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 4541 vpxor 0+0(%rsi),%ymm0,%ymm0 4542 vpxor 32+0(%rsi),%ymm3,%ymm3 4543 vpxor 64+0(%rsi),%ymm7,%ymm7 4544 vpxor 96+0(%rsi),%ymm11,%ymm11 4545 vmovdqu %ymm0,0+0(%rdi) 4546 vmovdqu %ymm3,32+0(%rdi) 4547 vmovdqu %ymm7,64+0(%rdi) 4548 vmovdqu %ymm11,96+0(%rdi) 4549 4550 vmovdqa 0+128(%rbp),%ymm0 4551 movq 0+0+0(%rbp),%rax 4552 movq %rax,%r15 4553 mulq %r10 4554 movq %rax,%r13 4555 movq %rdx,%r14 4556 movq 0+0+0(%rbp),%rax 4557 mulq %r11 4558 imulq %r12,%r15 4559 addq %rax,%r14 4560 adcq %rdx,%r15 4561 movq 8+0+0(%rbp),%rax 4562 movq %rax,%r9 4563 mulq %r10 4564 addq %rax,%r14 4565 adcq $0,%rdx 4566 movq %rdx,%r10 4567 movq 8+0+0(%rbp),%rax 4568 mulq %r11 4569 addq %rax,%r15 4570 adcq $0,%rdx 4571 imulq %r12,%r9 4572 addq %r10,%r15 4573 adcq %rdx,%r9 4574 movq %r13,%r10 4575 movq %r14,%r11 4576 movq %r15,%r12 4577 andq $3,%r12 4578 movq %r15,%r13 4579 andq $-4,%r13 4580 movq %r9,%r14 4581 shrdq $2,%r9,%r15 4582 shrq $2,%r9 4583 addq %r13,%r15 4584 adcq %r14,%r9 4585 addq %r15,%r10 4586 adcq %r9,%r11 4587 adcq $0,%r12 4588 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 4589 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 4590 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 4591 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 4592 vpxor 0+128(%rsi),%ymm3,%ymm3 4593 vpxor 32+128(%rsi),%ymm2,%ymm2 4594 vpxor 64+128(%rsi),%ymm6,%ymm6 4595 vpxor 96+128(%rsi),%ymm10,%ymm10 4596 vmovdqu %ymm3,0+128(%rdi) 4597 vmovdqu %ymm2,32+128(%rdi) 4598 vmovdqu %ymm6,64+128(%rdi) 4599 vmovdqu %ymm10,96+128(%rdi) 4600 addq 0+60*8+16(%rsi),%r10 4601 adcq 8+60*8+16(%rsi),%r11 4602 adcq $1,%r12 4603 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 4604 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 4605 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 4606 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 4607 vpxor 0+256(%rsi),%ymm3,%ymm3 4608 vpxor 32+256(%rsi),%ymm1,%ymm1 4609 vpxor 64+256(%rsi),%ymm5,%ymm5 4610 vpxor 96+256(%rsi),%ymm9,%ymm9 4611 vmovdqu %ymm3,0+256(%rdi) 4612 vmovdqu %ymm1,32+256(%rdi) 4613 vmovdqu %ymm5,64+256(%rdi) 4614 vmovdqu %ymm9,96+256(%rdi) 4615 movq 0+0+0(%rbp),%rax 4616 movq %rax,%r15 4617 mulq %r10 4618 movq %rax,%r13 4619 movq %rdx,%r14 4620 movq 0+0+0(%rbp),%rax 4621 mulq %r11 4622 imulq %r12,%r15 4623 addq %rax,%r14 4624 adcq %rdx,%r15 4625 movq 8+0+0(%rbp),%rax 4626 movq %rax,%r9 4627 mulq %r10 4628 addq %rax,%r14 4629 adcq $0,%rdx 4630 movq %rdx,%r10 4631 movq 8+0+0(%rbp),%rax 4632 mulq %r11 4633 addq %rax,%r15 4634 adcq $0,%rdx 4635 imulq %r12,%r9 4636 addq %r10,%r15 4637 adcq %rdx,%r9 4638 movq %r13,%r10 4639 movq %r14,%r11 4640 movq %r15,%r12 4641 andq $3,%r12 4642 movq %r15,%r13 4643 andq $-4,%r13 4644 movq %r9,%r14 4645 shrdq $2,%r9,%r15 4646 shrq $2,%r9 4647 addq %r13,%r15 4648 adcq %r14,%r9 4649 addq %r15,%r10 4650 adcq %r9,%r11 4651 adcq $0,%r12 4652 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4653 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 4654 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 4655 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 4656 vpxor 0+384(%rsi),%ymm3,%ymm3 4657 vpxor 32+384(%rsi),%ymm0,%ymm0 4658 vpxor 64+384(%rsi),%ymm4,%ymm4 4659 vpxor 96+384(%rsi),%ymm8,%ymm8 4660 vmovdqu %ymm3,0+384(%rdi) 4661 vmovdqu %ymm0,32+384(%rdi) 4662 vmovdqu %ymm4,64+384(%rdi) 4663 vmovdqu %ymm8,96+384(%rdi) 4664 4665 leaq 512(%rsi),%rsi 4666 leaq 512(%rdi),%rdi 4667 subq $512,%rbx 4668 jmp L$open_avx2_main_loop 4669L$open_avx2_main_loop_done: 4670 testq %rbx,%rbx 4671 vzeroupper 4672 je L$open_sse_finalize 4673 4674 cmpq $384,%rbx 4675 ja L$open_avx2_tail_512 4676 cmpq $256,%rbx 4677 ja L$open_avx2_tail_384 4678 cmpq $128,%rbx 4679 ja L$open_avx2_tail_256 4680 vmovdqa L$chacha20_consts(%rip),%ymm0 4681 vmovdqa 0+64(%rbp),%ymm4 4682 vmovdqa 0+96(%rbp),%ymm8 4683 vmovdqa L$avx2_inc(%rip),%ymm12 4684 vpaddd 0+160(%rbp),%ymm12,%ymm12 4685 vmovdqa %ymm12,0+160(%rbp) 4686 4687 xorq %r8,%r8 4688 movq %rbx,%rcx 4689 andq $-16,%rcx 4690 testq %rcx,%rcx 4691 je L$open_avx2_tail_128_rounds 4692L$open_avx2_tail_128_rounds_and_x1hash: 4693 addq 0+0(%rsi,%r8,1),%r10 4694 adcq 8+0(%rsi,%r8,1),%r11 4695 adcq $1,%r12 4696 movq 0+0+0(%rbp),%rax 4697 movq %rax,%r15 4698 mulq %r10 4699 movq %rax,%r13 4700 movq %rdx,%r14 4701 movq 0+0+0(%rbp),%rax 4702 mulq %r11 4703 imulq %r12,%r15 4704 addq %rax,%r14 4705 adcq %rdx,%r15 4706 movq 8+0+0(%rbp),%rax 4707 movq %rax,%r9 4708 mulq %r10 4709 addq %rax,%r14 4710 adcq $0,%rdx 4711 movq %rdx,%r10 4712 movq 8+0+0(%rbp),%rax 4713 mulq %r11 4714 addq %rax,%r15 4715 adcq $0,%rdx 4716 imulq %r12,%r9 4717 addq %r10,%r15 4718 adcq %rdx,%r9 4719 movq %r13,%r10 4720 movq %r14,%r11 4721 movq %r15,%r12 4722 andq $3,%r12 4723 movq %r15,%r13 4724 andq $-4,%r13 4725 movq %r9,%r14 4726 shrdq $2,%r9,%r15 4727 shrq $2,%r9 4728 addq %r13,%r15 4729 adcq %r14,%r9 4730 addq %r15,%r10 4731 adcq %r9,%r11 4732 adcq $0,%r12 4733 4734L$open_avx2_tail_128_rounds: 4735 addq $16,%r8 4736 vpaddd %ymm4,%ymm0,%ymm0 4737 vpxor %ymm0,%ymm12,%ymm12 4738 vpshufb L$rol16(%rip),%ymm12,%ymm12 4739 vpaddd %ymm12,%ymm8,%ymm8 4740 vpxor %ymm8,%ymm4,%ymm4 4741 vpsrld $20,%ymm4,%ymm3 4742 vpslld $12,%ymm4,%ymm4 4743 vpxor %ymm3,%ymm4,%ymm4 4744 vpaddd %ymm4,%ymm0,%ymm0 4745 vpxor %ymm0,%ymm12,%ymm12 4746 vpshufb L$rol8(%rip),%ymm12,%ymm12 4747 vpaddd %ymm12,%ymm8,%ymm8 4748 vpxor %ymm8,%ymm4,%ymm4 4749 vpslld $7,%ymm4,%ymm3 4750 vpsrld $25,%ymm4,%ymm4 4751 vpxor %ymm3,%ymm4,%ymm4 4752 vpalignr $12,%ymm12,%ymm12,%ymm12 4753 vpalignr $8,%ymm8,%ymm8,%ymm8 4754 vpalignr $4,%ymm4,%ymm4,%ymm4 4755 vpaddd %ymm4,%ymm0,%ymm0 4756 vpxor %ymm0,%ymm12,%ymm12 4757 vpshufb L$rol16(%rip),%ymm12,%ymm12 4758 vpaddd %ymm12,%ymm8,%ymm8 4759 vpxor %ymm8,%ymm4,%ymm4 4760 vpsrld $20,%ymm4,%ymm3 4761 vpslld $12,%ymm4,%ymm4 4762 vpxor %ymm3,%ymm4,%ymm4 4763 vpaddd %ymm4,%ymm0,%ymm0 4764 vpxor %ymm0,%ymm12,%ymm12 4765 vpshufb L$rol8(%rip),%ymm12,%ymm12 4766 vpaddd %ymm12,%ymm8,%ymm8 4767 vpxor %ymm8,%ymm4,%ymm4 4768 vpslld $7,%ymm4,%ymm3 4769 vpsrld $25,%ymm4,%ymm4 4770 vpxor %ymm3,%ymm4,%ymm4 4771 vpalignr $4,%ymm12,%ymm12,%ymm12 4772 vpalignr $8,%ymm8,%ymm8,%ymm8 4773 vpalignr $12,%ymm4,%ymm4,%ymm4 4774 4775 cmpq %rcx,%r8 4776 jb L$open_avx2_tail_128_rounds_and_x1hash 4777 cmpq $160,%r8 4778 jne L$open_avx2_tail_128_rounds 4779 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 4780 vpaddd 0+64(%rbp),%ymm4,%ymm4 4781 vpaddd 0+96(%rbp),%ymm8,%ymm8 4782 vpaddd 0+160(%rbp),%ymm12,%ymm12 4783 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 4784 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 4785 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 4786 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 4787 vmovdqa %ymm3,%ymm8 4788 4789 jmp L$open_avx2_tail_128_xor 4790 4791L$open_avx2_tail_256: 4792 vmovdqa L$chacha20_consts(%rip),%ymm0 4793 vmovdqa 0+64(%rbp),%ymm4 4794 vmovdqa 0+96(%rbp),%ymm8 4795 vmovdqa %ymm0,%ymm1 4796 vmovdqa %ymm4,%ymm5 4797 vmovdqa %ymm8,%ymm9 4798 vmovdqa L$avx2_inc(%rip),%ymm12 4799 vpaddd 0+160(%rbp),%ymm12,%ymm13 4800 vpaddd %ymm13,%ymm12,%ymm12 4801 vmovdqa %ymm12,0+160(%rbp) 4802 vmovdqa %ymm13,0+192(%rbp) 4803 4804 movq %rbx,0+128(%rbp) 4805 movq %rbx,%rcx 4806 subq $128,%rcx 4807 shrq $4,%rcx 4808 movq $10,%r8 4809 cmpq $10,%rcx 4810 cmovgq %r8,%rcx 4811 movq %rsi,%rbx 4812 xorq %r8,%r8 4813L$open_avx2_tail_256_rounds_and_x1hash: 4814 addq 0+0(%rbx),%r10 4815 adcq 8+0(%rbx),%r11 4816 adcq $1,%r12 4817 movq 0+0+0(%rbp),%rdx 4818 movq %rdx,%r15 4819 mulxq %r10,%r13,%r14 4820 mulxq %r11,%rax,%rdx 4821 imulq %r12,%r15 4822 addq %rax,%r14 4823 adcq %rdx,%r15 4824 movq 8+0+0(%rbp),%rdx 4825 mulxq %r10,%r10,%rax 4826 addq %r10,%r14 4827 mulxq %r11,%r11,%r9 4828 adcq %r11,%r15 4829 adcq $0,%r9 4830 imulq %r12,%rdx 4831 addq %rax,%r15 4832 adcq %rdx,%r9 4833 movq %r13,%r10 4834 movq %r14,%r11 4835 movq %r15,%r12 4836 andq $3,%r12 4837 movq %r15,%r13 4838 andq $-4,%r13 4839 movq %r9,%r14 4840 shrdq $2,%r9,%r15 4841 shrq $2,%r9 4842 addq %r13,%r15 4843 adcq %r14,%r9 4844 addq %r15,%r10 4845 adcq %r9,%r11 4846 adcq $0,%r12 4847 4848 leaq 16(%rbx),%rbx 4849L$open_avx2_tail_256_rounds: 4850 vpaddd %ymm4,%ymm0,%ymm0 4851 vpxor %ymm0,%ymm12,%ymm12 4852 vpshufb L$rol16(%rip),%ymm12,%ymm12 4853 vpaddd %ymm12,%ymm8,%ymm8 4854 vpxor %ymm8,%ymm4,%ymm4 4855 vpsrld $20,%ymm4,%ymm3 4856 vpslld $12,%ymm4,%ymm4 4857 vpxor %ymm3,%ymm4,%ymm4 4858 vpaddd %ymm4,%ymm0,%ymm0 4859 vpxor %ymm0,%ymm12,%ymm12 4860 vpshufb L$rol8(%rip),%ymm12,%ymm12 4861 vpaddd %ymm12,%ymm8,%ymm8 4862 vpxor %ymm8,%ymm4,%ymm4 4863 vpslld $7,%ymm4,%ymm3 4864 vpsrld $25,%ymm4,%ymm4 4865 vpxor %ymm3,%ymm4,%ymm4 4866 vpalignr $12,%ymm12,%ymm12,%ymm12 4867 vpalignr $8,%ymm8,%ymm8,%ymm8 4868 vpalignr $4,%ymm4,%ymm4,%ymm4 4869 vpaddd %ymm5,%ymm1,%ymm1 4870 vpxor %ymm1,%ymm13,%ymm13 4871 vpshufb L$rol16(%rip),%ymm13,%ymm13 4872 vpaddd %ymm13,%ymm9,%ymm9 4873 vpxor %ymm9,%ymm5,%ymm5 4874 vpsrld $20,%ymm5,%ymm3 4875 vpslld $12,%ymm5,%ymm5 4876 vpxor %ymm3,%ymm5,%ymm5 4877 vpaddd %ymm5,%ymm1,%ymm1 4878 vpxor %ymm1,%ymm13,%ymm13 4879 vpshufb L$rol8(%rip),%ymm13,%ymm13 4880 vpaddd %ymm13,%ymm9,%ymm9 4881 vpxor %ymm9,%ymm5,%ymm5 4882 vpslld $7,%ymm5,%ymm3 4883 vpsrld $25,%ymm5,%ymm5 4884 vpxor %ymm3,%ymm5,%ymm5 4885 vpalignr $12,%ymm13,%ymm13,%ymm13 4886 vpalignr $8,%ymm9,%ymm9,%ymm9 4887 vpalignr $4,%ymm5,%ymm5,%ymm5 4888 4889 incq %r8 4890 vpaddd %ymm4,%ymm0,%ymm0 4891 vpxor %ymm0,%ymm12,%ymm12 4892 vpshufb L$rol16(%rip),%ymm12,%ymm12 4893 vpaddd %ymm12,%ymm8,%ymm8 4894 vpxor %ymm8,%ymm4,%ymm4 4895 vpsrld $20,%ymm4,%ymm3 4896 vpslld $12,%ymm4,%ymm4 4897 vpxor %ymm3,%ymm4,%ymm4 4898 vpaddd %ymm4,%ymm0,%ymm0 4899 vpxor %ymm0,%ymm12,%ymm12 4900 vpshufb L$rol8(%rip),%ymm12,%ymm12 4901 vpaddd %ymm12,%ymm8,%ymm8 4902 vpxor %ymm8,%ymm4,%ymm4 4903 vpslld $7,%ymm4,%ymm3 4904 vpsrld $25,%ymm4,%ymm4 4905 vpxor %ymm3,%ymm4,%ymm4 4906 vpalignr $4,%ymm12,%ymm12,%ymm12 4907 vpalignr $8,%ymm8,%ymm8,%ymm8 4908 vpalignr $12,%ymm4,%ymm4,%ymm4 4909 vpaddd %ymm5,%ymm1,%ymm1 4910 vpxor %ymm1,%ymm13,%ymm13 4911 vpshufb L$rol16(%rip),%ymm13,%ymm13 4912 vpaddd %ymm13,%ymm9,%ymm9 4913 vpxor %ymm9,%ymm5,%ymm5 4914 vpsrld $20,%ymm5,%ymm3 4915 vpslld $12,%ymm5,%ymm5 4916 vpxor %ymm3,%ymm5,%ymm5 4917 vpaddd %ymm5,%ymm1,%ymm1 4918 vpxor %ymm1,%ymm13,%ymm13 4919 vpshufb L$rol8(%rip),%ymm13,%ymm13 4920 vpaddd %ymm13,%ymm9,%ymm9 4921 vpxor %ymm9,%ymm5,%ymm5 4922 vpslld $7,%ymm5,%ymm3 4923 vpsrld $25,%ymm5,%ymm5 4924 vpxor %ymm3,%ymm5,%ymm5 4925 vpalignr $4,%ymm13,%ymm13,%ymm13 4926 vpalignr $8,%ymm9,%ymm9,%ymm9 4927 vpalignr $12,%ymm5,%ymm5,%ymm5 4928 vpaddd %ymm6,%ymm2,%ymm2 4929 vpxor %ymm2,%ymm14,%ymm14 4930 vpshufb L$rol16(%rip),%ymm14,%ymm14 4931 vpaddd %ymm14,%ymm10,%ymm10 4932 vpxor %ymm10,%ymm6,%ymm6 4933 vpsrld $20,%ymm6,%ymm3 4934 vpslld $12,%ymm6,%ymm6 4935 vpxor %ymm3,%ymm6,%ymm6 4936 vpaddd %ymm6,%ymm2,%ymm2 4937 vpxor %ymm2,%ymm14,%ymm14 4938 vpshufb L$rol8(%rip),%ymm14,%ymm14 4939 vpaddd %ymm14,%ymm10,%ymm10 4940 vpxor %ymm10,%ymm6,%ymm6 4941 vpslld $7,%ymm6,%ymm3 4942 vpsrld $25,%ymm6,%ymm6 4943 vpxor %ymm3,%ymm6,%ymm6 4944 vpalignr $4,%ymm14,%ymm14,%ymm14 4945 vpalignr $8,%ymm10,%ymm10,%ymm10 4946 vpalignr $12,%ymm6,%ymm6,%ymm6 4947 4948 cmpq %rcx,%r8 4949 jb L$open_avx2_tail_256_rounds_and_x1hash 4950 cmpq $10,%r8 4951 jne L$open_avx2_tail_256_rounds 4952 movq %rbx,%r8 4953 subq %rsi,%rbx 4954 movq %rbx,%rcx 4955 movq 0+128(%rbp),%rbx 4956L$open_avx2_tail_256_hash: 4957 addq $16,%rcx 4958 cmpq %rbx,%rcx 4959 jg L$open_avx2_tail_256_done 4960 addq 0+0(%r8),%r10 4961 adcq 8+0(%r8),%r11 4962 adcq $1,%r12 4963 movq 0+0+0(%rbp),%rdx 4964 movq %rdx,%r15 4965 mulxq %r10,%r13,%r14 4966 mulxq %r11,%rax,%rdx 4967 imulq %r12,%r15 4968 addq %rax,%r14 4969 adcq %rdx,%r15 4970 movq 8+0+0(%rbp),%rdx 4971 mulxq %r10,%r10,%rax 4972 addq %r10,%r14 4973 mulxq %r11,%r11,%r9 4974 adcq %r11,%r15 4975 adcq $0,%r9 4976 imulq %r12,%rdx 4977 addq %rax,%r15 4978 adcq %rdx,%r9 4979 movq %r13,%r10 4980 movq %r14,%r11 4981 movq %r15,%r12 4982 andq $3,%r12 4983 movq %r15,%r13 4984 andq $-4,%r13 4985 movq %r9,%r14 4986 shrdq $2,%r9,%r15 4987 shrq $2,%r9 4988 addq %r13,%r15 4989 adcq %r14,%r9 4990 addq %r15,%r10 4991 adcq %r9,%r11 4992 adcq $0,%r12 4993 4994 leaq 16(%r8),%r8 4995 jmp L$open_avx2_tail_256_hash 4996L$open_avx2_tail_256_done: 4997 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 4998 vpaddd 0+64(%rbp),%ymm5,%ymm5 4999 vpaddd 0+96(%rbp),%ymm9,%ymm9 5000 vpaddd 0+192(%rbp),%ymm13,%ymm13 5001 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 5002 vpaddd 0+64(%rbp),%ymm4,%ymm4 5003 vpaddd 0+96(%rbp),%ymm8,%ymm8 5004 vpaddd 0+160(%rbp),%ymm12,%ymm12 5005 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5006 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5007 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5008 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5009 vpxor 0+0(%rsi),%ymm3,%ymm3 5010 vpxor 32+0(%rsi),%ymm1,%ymm1 5011 vpxor 64+0(%rsi),%ymm5,%ymm5 5012 vpxor 96+0(%rsi),%ymm9,%ymm9 5013 vmovdqu %ymm3,0+0(%rdi) 5014 vmovdqu %ymm1,32+0(%rdi) 5015 vmovdqu %ymm5,64+0(%rdi) 5016 vmovdqu %ymm9,96+0(%rdi) 5017 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5018 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5019 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5020 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5021 vmovdqa %ymm3,%ymm8 5022 5023 leaq 128(%rsi),%rsi 5024 leaq 128(%rdi),%rdi 5025 subq $128,%rbx 5026 jmp L$open_avx2_tail_128_xor 5027 5028L$open_avx2_tail_384: 5029 vmovdqa L$chacha20_consts(%rip),%ymm0 5030 vmovdqa 0+64(%rbp),%ymm4 5031 vmovdqa 0+96(%rbp),%ymm8 5032 vmovdqa %ymm0,%ymm1 5033 vmovdqa %ymm4,%ymm5 5034 vmovdqa %ymm8,%ymm9 5035 vmovdqa %ymm0,%ymm2 5036 vmovdqa %ymm4,%ymm6 5037 vmovdqa %ymm8,%ymm10 5038 vmovdqa L$avx2_inc(%rip),%ymm12 5039 vpaddd 0+160(%rbp),%ymm12,%ymm14 5040 vpaddd %ymm14,%ymm12,%ymm13 5041 vpaddd %ymm13,%ymm12,%ymm12 5042 vmovdqa %ymm12,0+160(%rbp) 5043 vmovdqa %ymm13,0+192(%rbp) 5044 vmovdqa %ymm14,0+224(%rbp) 5045 5046 movq %rbx,0+128(%rbp) 5047 movq %rbx,%rcx 5048 subq $256,%rcx 5049 shrq $4,%rcx 5050 addq $6,%rcx 5051 movq $10,%r8 5052 cmpq $10,%rcx 5053 cmovgq %r8,%rcx 5054 movq %rsi,%rbx 5055 xorq %r8,%r8 5056L$open_avx2_tail_384_rounds_and_x2hash: 5057 addq 0+0(%rbx),%r10 5058 adcq 8+0(%rbx),%r11 5059 adcq $1,%r12 5060 movq 0+0+0(%rbp),%rdx 5061 movq %rdx,%r15 5062 mulxq %r10,%r13,%r14 5063 mulxq %r11,%rax,%rdx 5064 imulq %r12,%r15 5065 addq %rax,%r14 5066 adcq %rdx,%r15 5067 movq 8+0+0(%rbp),%rdx 5068 mulxq %r10,%r10,%rax 5069 addq %r10,%r14 5070 mulxq %r11,%r11,%r9 5071 adcq %r11,%r15 5072 adcq $0,%r9 5073 imulq %r12,%rdx 5074 addq %rax,%r15 5075 adcq %rdx,%r9 5076 movq %r13,%r10 5077 movq %r14,%r11 5078 movq %r15,%r12 5079 andq $3,%r12 5080 movq %r15,%r13 5081 andq $-4,%r13 5082 movq %r9,%r14 5083 shrdq $2,%r9,%r15 5084 shrq $2,%r9 5085 addq %r13,%r15 5086 adcq %r14,%r9 5087 addq %r15,%r10 5088 adcq %r9,%r11 5089 adcq $0,%r12 5090 5091 leaq 16(%rbx),%rbx 5092L$open_avx2_tail_384_rounds_and_x1hash: 5093 vpaddd %ymm6,%ymm2,%ymm2 5094 vpxor %ymm2,%ymm14,%ymm14 5095 vpshufb L$rol16(%rip),%ymm14,%ymm14 5096 vpaddd %ymm14,%ymm10,%ymm10 5097 vpxor %ymm10,%ymm6,%ymm6 5098 vpsrld $20,%ymm6,%ymm3 5099 vpslld $12,%ymm6,%ymm6 5100 vpxor %ymm3,%ymm6,%ymm6 5101 vpaddd %ymm6,%ymm2,%ymm2 5102 vpxor %ymm2,%ymm14,%ymm14 5103 vpshufb L$rol8(%rip),%ymm14,%ymm14 5104 vpaddd %ymm14,%ymm10,%ymm10 5105 vpxor %ymm10,%ymm6,%ymm6 5106 vpslld $7,%ymm6,%ymm3 5107 vpsrld $25,%ymm6,%ymm6 5108 vpxor %ymm3,%ymm6,%ymm6 5109 vpalignr $12,%ymm14,%ymm14,%ymm14 5110 vpalignr $8,%ymm10,%ymm10,%ymm10 5111 vpalignr $4,%ymm6,%ymm6,%ymm6 5112 vpaddd %ymm5,%ymm1,%ymm1 5113 vpxor %ymm1,%ymm13,%ymm13 5114 vpshufb L$rol16(%rip),%ymm13,%ymm13 5115 vpaddd %ymm13,%ymm9,%ymm9 5116 vpxor %ymm9,%ymm5,%ymm5 5117 vpsrld $20,%ymm5,%ymm3 5118 vpslld $12,%ymm5,%ymm5 5119 vpxor %ymm3,%ymm5,%ymm5 5120 vpaddd %ymm5,%ymm1,%ymm1 5121 vpxor %ymm1,%ymm13,%ymm13 5122 vpshufb L$rol8(%rip),%ymm13,%ymm13 5123 vpaddd %ymm13,%ymm9,%ymm9 5124 vpxor %ymm9,%ymm5,%ymm5 5125 vpslld $7,%ymm5,%ymm3 5126 vpsrld $25,%ymm5,%ymm5 5127 vpxor %ymm3,%ymm5,%ymm5 5128 vpalignr $12,%ymm13,%ymm13,%ymm13 5129 vpalignr $8,%ymm9,%ymm9,%ymm9 5130 vpalignr $4,%ymm5,%ymm5,%ymm5 5131 vpaddd %ymm4,%ymm0,%ymm0 5132 vpxor %ymm0,%ymm12,%ymm12 5133 vpshufb L$rol16(%rip),%ymm12,%ymm12 5134 vpaddd %ymm12,%ymm8,%ymm8 5135 vpxor %ymm8,%ymm4,%ymm4 5136 vpsrld $20,%ymm4,%ymm3 5137 vpslld $12,%ymm4,%ymm4 5138 vpxor %ymm3,%ymm4,%ymm4 5139 vpaddd %ymm4,%ymm0,%ymm0 5140 vpxor %ymm0,%ymm12,%ymm12 5141 vpshufb L$rol8(%rip),%ymm12,%ymm12 5142 vpaddd %ymm12,%ymm8,%ymm8 5143 vpxor %ymm8,%ymm4,%ymm4 5144 vpslld $7,%ymm4,%ymm3 5145 vpsrld $25,%ymm4,%ymm4 5146 vpxor %ymm3,%ymm4,%ymm4 5147 vpalignr $12,%ymm12,%ymm12,%ymm12 5148 vpalignr $8,%ymm8,%ymm8,%ymm8 5149 vpalignr $4,%ymm4,%ymm4,%ymm4 5150 addq 0+0(%rbx),%r10 5151 adcq 8+0(%rbx),%r11 5152 adcq $1,%r12 5153 movq 0+0+0(%rbp),%rax 5154 movq %rax,%r15 5155 mulq %r10 5156 movq %rax,%r13 5157 movq %rdx,%r14 5158 movq 0+0+0(%rbp),%rax 5159 mulq %r11 5160 imulq %r12,%r15 5161 addq %rax,%r14 5162 adcq %rdx,%r15 5163 movq 8+0+0(%rbp),%rax 5164 movq %rax,%r9 5165 mulq %r10 5166 addq %rax,%r14 5167 adcq $0,%rdx 5168 movq %rdx,%r10 5169 movq 8+0+0(%rbp),%rax 5170 mulq %r11 5171 addq %rax,%r15 5172 adcq $0,%rdx 5173 imulq %r12,%r9 5174 addq %r10,%r15 5175 adcq %rdx,%r9 5176 movq %r13,%r10 5177 movq %r14,%r11 5178 movq %r15,%r12 5179 andq $3,%r12 5180 movq %r15,%r13 5181 andq $-4,%r13 5182 movq %r9,%r14 5183 shrdq $2,%r9,%r15 5184 shrq $2,%r9 5185 addq %r13,%r15 5186 adcq %r14,%r9 5187 addq %r15,%r10 5188 adcq %r9,%r11 5189 adcq $0,%r12 5190 5191 leaq 16(%rbx),%rbx 5192 incq %r8 5193 vpaddd %ymm6,%ymm2,%ymm2 5194 vpxor %ymm2,%ymm14,%ymm14 5195 vpshufb L$rol16(%rip),%ymm14,%ymm14 5196 vpaddd %ymm14,%ymm10,%ymm10 5197 vpxor %ymm10,%ymm6,%ymm6 5198 vpsrld $20,%ymm6,%ymm3 5199 vpslld $12,%ymm6,%ymm6 5200 vpxor %ymm3,%ymm6,%ymm6 5201 vpaddd %ymm6,%ymm2,%ymm2 5202 vpxor %ymm2,%ymm14,%ymm14 5203 vpshufb L$rol8(%rip),%ymm14,%ymm14 5204 vpaddd %ymm14,%ymm10,%ymm10 5205 vpxor %ymm10,%ymm6,%ymm6 5206 vpslld $7,%ymm6,%ymm3 5207 vpsrld $25,%ymm6,%ymm6 5208 vpxor %ymm3,%ymm6,%ymm6 5209 vpalignr $4,%ymm14,%ymm14,%ymm14 5210 vpalignr $8,%ymm10,%ymm10,%ymm10 5211 vpalignr $12,%ymm6,%ymm6,%ymm6 5212 vpaddd %ymm5,%ymm1,%ymm1 5213 vpxor %ymm1,%ymm13,%ymm13 5214 vpshufb L$rol16(%rip),%ymm13,%ymm13 5215 vpaddd %ymm13,%ymm9,%ymm9 5216 vpxor %ymm9,%ymm5,%ymm5 5217 vpsrld $20,%ymm5,%ymm3 5218 vpslld $12,%ymm5,%ymm5 5219 vpxor %ymm3,%ymm5,%ymm5 5220 vpaddd %ymm5,%ymm1,%ymm1 5221 vpxor %ymm1,%ymm13,%ymm13 5222 vpshufb L$rol8(%rip),%ymm13,%ymm13 5223 vpaddd %ymm13,%ymm9,%ymm9 5224 vpxor %ymm9,%ymm5,%ymm5 5225 vpslld $7,%ymm5,%ymm3 5226 vpsrld $25,%ymm5,%ymm5 5227 vpxor %ymm3,%ymm5,%ymm5 5228 vpalignr $4,%ymm13,%ymm13,%ymm13 5229 vpalignr $8,%ymm9,%ymm9,%ymm9 5230 vpalignr $12,%ymm5,%ymm5,%ymm5 5231 vpaddd %ymm4,%ymm0,%ymm0 5232 vpxor %ymm0,%ymm12,%ymm12 5233 vpshufb L$rol16(%rip),%ymm12,%ymm12 5234 vpaddd %ymm12,%ymm8,%ymm8 5235 vpxor %ymm8,%ymm4,%ymm4 5236 vpsrld $20,%ymm4,%ymm3 5237 vpslld $12,%ymm4,%ymm4 5238 vpxor %ymm3,%ymm4,%ymm4 5239 vpaddd %ymm4,%ymm0,%ymm0 5240 vpxor %ymm0,%ymm12,%ymm12 5241 vpshufb L$rol8(%rip),%ymm12,%ymm12 5242 vpaddd %ymm12,%ymm8,%ymm8 5243 vpxor %ymm8,%ymm4,%ymm4 5244 vpslld $7,%ymm4,%ymm3 5245 vpsrld $25,%ymm4,%ymm4 5246 vpxor %ymm3,%ymm4,%ymm4 5247 vpalignr $4,%ymm12,%ymm12,%ymm12 5248 vpalignr $8,%ymm8,%ymm8,%ymm8 5249 vpalignr $12,%ymm4,%ymm4,%ymm4 5250 5251 cmpq %rcx,%r8 5252 jb L$open_avx2_tail_384_rounds_and_x2hash 5253 cmpq $10,%r8 5254 jne L$open_avx2_tail_384_rounds_and_x1hash 5255 movq %rbx,%r8 5256 subq %rsi,%rbx 5257 movq %rbx,%rcx 5258 movq 0+128(%rbp),%rbx 5259L$open_avx2_384_tail_hash: 5260 addq $16,%rcx 5261 cmpq %rbx,%rcx 5262 jg L$open_avx2_384_tail_done 5263 addq 0+0(%r8),%r10 5264 adcq 8+0(%r8),%r11 5265 adcq $1,%r12 5266 movq 0+0+0(%rbp),%rdx 5267 movq %rdx,%r15 5268 mulxq %r10,%r13,%r14 5269 mulxq %r11,%rax,%rdx 5270 imulq %r12,%r15 5271 addq %rax,%r14 5272 adcq %rdx,%r15 5273 movq 8+0+0(%rbp),%rdx 5274 mulxq %r10,%r10,%rax 5275 addq %r10,%r14 5276 mulxq %r11,%r11,%r9 5277 adcq %r11,%r15 5278 adcq $0,%r9 5279 imulq %r12,%rdx 5280 addq %rax,%r15 5281 adcq %rdx,%r9 5282 movq %r13,%r10 5283 movq %r14,%r11 5284 movq %r15,%r12 5285 andq $3,%r12 5286 movq %r15,%r13 5287 andq $-4,%r13 5288 movq %r9,%r14 5289 shrdq $2,%r9,%r15 5290 shrq $2,%r9 5291 addq %r13,%r15 5292 adcq %r14,%r9 5293 addq %r15,%r10 5294 adcq %r9,%r11 5295 adcq $0,%r12 5296 5297 leaq 16(%r8),%r8 5298 jmp L$open_avx2_384_tail_hash 5299L$open_avx2_384_tail_done: 5300 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 5301 vpaddd 0+64(%rbp),%ymm6,%ymm6 5302 vpaddd 0+96(%rbp),%ymm10,%ymm10 5303 vpaddd 0+224(%rbp),%ymm14,%ymm14 5304 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 5305 vpaddd 0+64(%rbp),%ymm5,%ymm5 5306 vpaddd 0+96(%rbp),%ymm9,%ymm9 5307 vpaddd 0+192(%rbp),%ymm13,%ymm13 5308 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 5309 vpaddd 0+64(%rbp),%ymm4,%ymm4 5310 vpaddd 0+96(%rbp),%ymm8,%ymm8 5311 vpaddd 0+160(%rbp),%ymm12,%ymm12 5312 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5313 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5314 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5315 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5316 vpxor 0+0(%rsi),%ymm3,%ymm3 5317 vpxor 32+0(%rsi),%ymm2,%ymm2 5318 vpxor 64+0(%rsi),%ymm6,%ymm6 5319 vpxor 96+0(%rsi),%ymm10,%ymm10 5320 vmovdqu %ymm3,0+0(%rdi) 5321 vmovdqu %ymm2,32+0(%rdi) 5322 vmovdqu %ymm6,64+0(%rdi) 5323 vmovdqu %ymm10,96+0(%rdi) 5324 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5325 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5326 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5327 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5328 vpxor 0+128(%rsi),%ymm3,%ymm3 5329 vpxor 32+128(%rsi),%ymm1,%ymm1 5330 vpxor 64+128(%rsi),%ymm5,%ymm5 5331 vpxor 96+128(%rsi),%ymm9,%ymm9 5332 vmovdqu %ymm3,0+128(%rdi) 5333 vmovdqu %ymm1,32+128(%rdi) 5334 vmovdqu %ymm5,64+128(%rdi) 5335 vmovdqu %ymm9,96+128(%rdi) 5336 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5337 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5338 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5339 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5340 vmovdqa %ymm3,%ymm8 5341 5342 leaq 256(%rsi),%rsi 5343 leaq 256(%rdi),%rdi 5344 subq $256,%rbx 5345 jmp L$open_avx2_tail_128_xor 5346 5347L$open_avx2_tail_512: 5348 vmovdqa L$chacha20_consts(%rip),%ymm0 5349 vmovdqa 0+64(%rbp),%ymm4 5350 vmovdqa 0+96(%rbp),%ymm8 5351 vmovdqa %ymm0,%ymm1 5352 vmovdqa %ymm4,%ymm5 5353 vmovdqa %ymm8,%ymm9 5354 vmovdqa %ymm0,%ymm2 5355 vmovdqa %ymm4,%ymm6 5356 vmovdqa %ymm8,%ymm10 5357 vmovdqa %ymm0,%ymm3 5358 vmovdqa %ymm4,%ymm7 5359 vmovdqa %ymm8,%ymm11 5360 vmovdqa L$avx2_inc(%rip),%ymm12 5361 vpaddd 0+160(%rbp),%ymm12,%ymm15 5362 vpaddd %ymm15,%ymm12,%ymm14 5363 vpaddd %ymm14,%ymm12,%ymm13 5364 vpaddd %ymm13,%ymm12,%ymm12 5365 vmovdqa %ymm15,0+256(%rbp) 5366 vmovdqa %ymm14,0+224(%rbp) 5367 vmovdqa %ymm13,0+192(%rbp) 5368 vmovdqa %ymm12,0+160(%rbp) 5369 5370 xorq %rcx,%rcx 5371 movq %rsi,%r8 5372L$open_avx2_tail_512_rounds_and_x2hash: 5373 addq 0+0(%r8),%r10 5374 adcq 8+0(%r8),%r11 5375 adcq $1,%r12 5376 movq 0+0+0(%rbp),%rax 5377 movq %rax,%r15 5378 mulq %r10 5379 movq %rax,%r13 5380 movq %rdx,%r14 5381 movq 0+0+0(%rbp),%rax 5382 mulq %r11 5383 imulq %r12,%r15 5384 addq %rax,%r14 5385 adcq %rdx,%r15 5386 movq 8+0+0(%rbp),%rax 5387 movq %rax,%r9 5388 mulq %r10 5389 addq %rax,%r14 5390 adcq $0,%rdx 5391 movq %rdx,%r10 5392 movq 8+0+0(%rbp),%rax 5393 mulq %r11 5394 addq %rax,%r15 5395 adcq $0,%rdx 5396 imulq %r12,%r9 5397 addq %r10,%r15 5398 adcq %rdx,%r9 5399 movq %r13,%r10 5400 movq %r14,%r11 5401 movq %r15,%r12 5402 andq $3,%r12 5403 movq %r15,%r13 5404 andq $-4,%r13 5405 movq %r9,%r14 5406 shrdq $2,%r9,%r15 5407 shrq $2,%r9 5408 addq %r13,%r15 5409 adcq %r14,%r9 5410 addq %r15,%r10 5411 adcq %r9,%r11 5412 adcq $0,%r12 5413 5414 leaq 16(%r8),%r8 5415L$open_avx2_tail_512_rounds_and_x1hash: 5416 vmovdqa %ymm8,0+128(%rbp) 5417 vmovdqa L$rol16(%rip),%ymm8 5418 vpaddd %ymm7,%ymm3,%ymm3 5419 vpaddd %ymm6,%ymm2,%ymm2 5420 vpaddd %ymm5,%ymm1,%ymm1 5421 vpaddd %ymm4,%ymm0,%ymm0 5422 vpxor %ymm3,%ymm15,%ymm15 5423 vpxor %ymm2,%ymm14,%ymm14 5424 vpxor %ymm1,%ymm13,%ymm13 5425 vpxor %ymm0,%ymm12,%ymm12 5426 vpshufb %ymm8,%ymm15,%ymm15 5427 vpshufb %ymm8,%ymm14,%ymm14 5428 vpshufb %ymm8,%ymm13,%ymm13 5429 vpshufb %ymm8,%ymm12,%ymm12 5430 vpaddd %ymm15,%ymm11,%ymm11 5431 vpaddd %ymm14,%ymm10,%ymm10 5432 vpaddd %ymm13,%ymm9,%ymm9 5433 vpaddd 0+128(%rbp),%ymm12,%ymm8 5434 vpxor %ymm11,%ymm7,%ymm7 5435 vpxor %ymm10,%ymm6,%ymm6 5436 vpxor %ymm9,%ymm5,%ymm5 5437 vpxor %ymm8,%ymm4,%ymm4 5438 vmovdqa %ymm8,0+128(%rbp) 5439 vpsrld $20,%ymm7,%ymm8 5440 vpslld $32-20,%ymm7,%ymm7 5441 vpxor %ymm8,%ymm7,%ymm7 5442 vpsrld $20,%ymm6,%ymm8 5443 vpslld $32-20,%ymm6,%ymm6 5444 vpxor %ymm8,%ymm6,%ymm6 5445 vpsrld $20,%ymm5,%ymm8 5446 vpslld $32-20,%ymm5,%ymm5 5447 vpxor %ymm8,%ymm5,%ymm5 5448 vpsrld $20,%ymm4,%ymm8 5449 vpslld $32-20,%ymm4,%ymm4 5450 vpxor %ymm8,%ymm4,%ymm4 5451 vmovdqa L$rol8(%rip),%ymm8 5452 vpaddd %ymm7,%ymm3,%ymm3 5453 addq 0+0(%r8),%r10 5454 adcq 8+0(%r8),%r11 5455 adcq $1,%r12 5456 movq 0+0+0(%rbp),%rdx 5457 movq %rdx,%r15 5458 mulxq %r10,%r13,%r14 5459 mulxq %r11,%rax,%rdx 5460 imulq %r12,%r15 5461 addq %rax,%r14 5462 adcq %rdx,%r15 5463 movq 8+0+0(%rbp),%rdx 5464 mulxq %r10,%r10,%rax 5465 addq %r10,%r14 5466 mulxq %r11,%r11,%r9 5467 adcq %r11,%r15 5468 adcq $0,%r9 5469 imulq %r12,%rdx 5470 addq %rax,%r15 5471 adcq %rdx,%r9 5472 movq %r13,%r10 5473 movq %r14,%r11 5474 movq %r15,%r12 5475 andq $3,%r12 5476 movq %r15,%r13 5477 andq $-4,%r13 5478 movq %r9,%r14 5479 shrdq $2,%r9,%r15 5480 shrq $2,%r9 5481 addq %r13,%r15 5482 adcq %r14,%r9 5483 addq %r15,%r10 5484 adcq %r9,%r11 5485 adcq $0,%r12 5486 vpaddd %ymm6,%ymm2,%ymm2 5487 vpaddd %ymm5,%ymm1,%ymm1 5488 vpaddd %ymm4,%ymm0,%ymm0 5489 vpxor %ymm3,%ymm15,%ymm15 5490 vpxor %ymm2,%ymm14,%ymm14 5491 vpxor %ymm1,%ymm13,%ymm13 5492 vpxor %ymm0,%ymm12,%ymm12 5493 vpshufb %ymm8,%ymm15,%ymm15 5494 vpshufb %ymm8,%ymm14,%ymm14 5495 vpshufb %ymm8,%ymm13,%ymm13 5496 vpshufb %ymm8,%ymm12,%ymm12 5497 vpaddd %ymm15,%ymm11,%ymm11 5498 vpaddd %ymm14,%ymm10,%ymm10 5499 vpaddd %ymm13,%ymm9,%ymm9 5500 vpaddd 0+128(%rbp),%ymm12,%ymm8 5501 vpxor %ymm11,%ymm7,%ymm7 5502 vpxor %ymm10,%ymm6,%ymm6 5503 vpxor %ymm9,%ymm5,%ymm5 5504 vpxor %ymm8,%ymm4,%ymm4 5505 vmovdqa %ymm8,0+128(%rbp) 5506 vpsrld $25,%ymm7,%ymm8 5507 vpslld $32-25,%ymm7,%ymm7 5508 vpxor %ymm8,%ymm7,%ymm7 5509 vpsrld $25,%ymm6,%ymm8 5510 vpslld $32-25,%ymm6,%ymm6 5511 vpxor %ymm8,%ymm6,%ymm6 5512 vpsrld $25,%ymm5,%ymm8 5513 vpslld $32-25,%ymm5,%ymm5 5514 vpxor %ymm8,%ymm5,%ymm5 5515 vpsrld $25,%ymm4,%ymm8 5516 vpslld $32-25,%ymm4,%ymm4 5517 vpxor %ymm8,%ymm4,%ymm4 5518 vmovdqa 0+128(%rbp),%ymm8 5519 vpalignr $4,%ymm7,%ymm7,%ymm7 5520 vpalignr $8,%ymm11,%ymm11,%ymm11 5521 vpalignr $12,%ymm15,%ymm15,%ymm15 5522 vpalignr $4,%ymm6,%ymm6,%ymm6 5523 vpalignr $8,%ymm10,%ymm10,%ymm10 5524 vpalignr $12,%ymm14,%ymm14,%ymm14 5525 vpalignr $4,%ymm5,%ymm5,%ymm5 5526 vpalignr $8,%ymm9,%ymm9,%ymm9 5527 vpalignr $12,%ymm13,%ymm13,%ymm13 5528 vpalignr $4,%ymm4,%ymm4,%ymm4 5529 vpalignr $8,%ymm8,%ymm8,%ymm8 5530 vpalignr $12,%ymm12,%ymm12,%ymm12 5531 vmovdqa %ymm8,0+128(%rbp) 5532 vmovdqa L$rol16(%rip),%ymm8 5533 vpaddd %ymm7,%ymm3,%ymm3 5534 addq 0+16(%r8),%r10 5535 adcq 8+16(%r8),%r11 5536 adcq $1,%r12 5537 movq 0+0+0(%rbp),%rdx 5538 movq %rdx,%r15 5539 mulxq %r10,%r13,%r14 5540 mulxq %r11,%rax,%rdx 5541 imulq %r12,%r15 5542 addq %rax,%r14 5543 adcq %rdx,%r15 5544 movq 8+0+0(%rbp),%rdx 5545 mulxq %r10,%r10,%rax 5546 addq %r10,%r14 5547 mulxq %r11,%r11,%r9 5548 adcq %r11,%r15 5549 adcq $0,%r9 5550 imulq %r12,%rdx 5551 addq %rax,%r15 5552 adcq %rdx,%r9 5553 movq %r13,%r10 5554 movq %r14,%r11 5555 movq %r15,%r12 5556 andq $3,%r12 5557 movq %r15,%r13 5558 andq $-4,%r13 5559 movq %r9,%r14 5560 shrdq $2,%r9,%r15 5561 shrq $2,%r9 5562 addq %r13,%r15 5563 adcq %r14,%r9 5564 addq %r15,%r10 5565 adcq %r9,%r11 5566 adcq $0,%r12 5567 5568 leaq 32(%r8),%r8 5569 vpaddd %ymm6,%ymm2,%ymm2 5570 vpaddd %ymm5,%ymm1,%ymm1 5571 vpaddd %ymm4,%ymm0,%ymm0 5572 vpxor %ymm3,%ymm15,%ymm15 5573 vpxor %ymm2,%ymm14,%ymm14 5574 vpxor %ymm1,%ymm13,%ymm13 5575 vpxor %ymm0,%ymm12,%ymm12 5576 vpshufb %ymm8,%ymm15,%ymm15 5577 vpshufb %ymm8,%ymm14,%ymm14 5578 vpshufb %ymm8,%ymm13,%ymm13 5579 vpshufb %ymm8,%ymm12,%ymm12 5580 vpaddd %ymm15,%ymm11,%ymm11 5581 vpaddd %ymm14,%ymm10,%ymm10 5582 vpaddd %ymm13,%ymm9,%ymm9 5583 vpaddd 0+128(%rbp),%ymm12,%ymm8 5584 vpxor %ymm11,%ymm7,%ymm7 5585 vpxor %ymm10,%ymm6,%ymm6 5586 vpxor %ymm9,%ymm5,%ymm5 5587 vpxor %ymm8,%ymm4,%ymm4 5588 vmovdqa %ymm8,0+128(%rbp) 5589 vpsrld $20,%ymm7,%ymm8 5590 vpslld $32-20,%ymm7,%ymm7 5591 vpxor %ymm8,%ymm7,%ymm7 5592 vpsrld $20,%ymm6,%ymm8 5593 vpslld $32-20,%ymm6,%ymm6 5594 vpxor %ymm8,%ymm6,%ymm6 5595 vpsrld $20,%ymm5,%ymm8 5596 vpslld $32-20,%ymm5,%ymm5 5597 vpxor %ymm8,%ymm5,%ymm5 5598 vpsrld $20,%ymm4,%ymm8 5599 vpslld $32-20,%ymm4,%ymm4 5600 vpxor %ymm8,%ymm4,%ymm4 5601 vmovdqa L$rol8(%rip),%ymm8 5602 vpaddd %ymm7,%ymm3,%ymm3 5603 vpaddd %ymm6,%ymm2,%ymm2 5604 vpaddd %ymm5,%ymm1,%ymm1 5605 vpaddd %ymm4,%ymm0,%ymm0 5606 vpxor %ymm3,%ymm15,%ymm15 5607 vpxor %ymm2,%ymm14,%ymm14 5608 vpxor %ymm1,%ymm13,%ymm13 5609 vpxor %ymm0,%ymm12,%ymm12 5610 vpshufb %ymm8,%ymm15,%ymm15 5611 vpshufb %ymm8,%ymm14,%ymm14 5612 vpshufb %ymm8,%ymm13,%ymm13 5613 vpshufb %ymm8,%ymm12,%ymm12 5614 vpaddd %ymm15,%ymm11,%ymm11 5615 vpaddd %ymm14,%ymm10,%ymm10 5616 vpaddd %ymm13,%ymm9,%ymm9 5617 vpaddd 0+128(%rbp),%ymm12,%ymm8 5618 vpxor %ymm11,%ymm7,%ymm7 5619 vpxor %ymm10,%ymm6,%ymm6 5620 vpxor %ymm9,%ymm5,%ymm5 5621 vpxor %ymm8,%ymm4,%ymm4 5622 vmovdqa %ymm8,0+128(%rbp) 5623 vpsrld $25,%ymm7,%ymm8 5624 vpslld $32-25,%ymm7,%ymm7 5625 vpxor %ymm8,%ymm7,%ymm7 5626 vpsrld $25,%ymm6,%ymm8 5627 vpslld $32-25,%ymm6,%ymm6 5628 vpxor %ymm8,%ymm6,%ymm6 5629 vpsrld $25,%ymm5,%ymm8 5630 vpslld $32-25,%ymm5,%ymm5 5631 vpxor %ymm8,%ymm5,%ymm5 5632 vpsrld $25,%ymm4,%ymm8 5633 vpslld $32-25,%ymm4,%ymm4 5634 vpxor %ymm8,%ymm4,%ymm4 5635 vmovdqa 0+128(%rbp),%ymm8 5636 vpalignr $12,%ymm7,%ymm7,%ymm7 5637 vpalignr $8,%ymm11,%ymm11,%ymm11 5638 vpalignr $4,%ymm15,%ymm15,%ymm15 5639 vpalignr $12,%ymm6,%ymm6,%ymm6 5640 vpalignr $8,%ymm10,%ymm10,%ymm10 5641 vpalignr $4,%ymm14,%ymm14,%ymm14 5642 vpalignr $12,%ymm5,%ymm5,%ymm5 5643 vpalignr $8,%ymm9,%ymm9,%ymm9 5644 vpalignr $4,%ymm13,%ymm13,%ymm13 5645 vpalignr $12,%ymm4,%ymm4,%ymm4 5646 vpalignr $8,%ymm8,%ymm8,%ymm8 5647 vpalignr $4,%ymm12,%ymm12,%ymm12 5648 5649 incq %rcx 5650 cmpq $4,%rcx 5651 jl L$open_avx2_tail_512_rounds_and_x2hash 5652 cmpq $10,%rcx 5653 jne L$open_avx2_tail_512_rounds_and_x1hash 5654 movq %rbx,%rcx 5655 subq $384,%rcx 5656 andq $-16,%rcx 5657L$open_avx2_tail_512_hash: 5658 testq %rcx,%rcx 5659 je L$open_avx2_tail_512_done 5660 addq 0+0(%r8),%r10 5661 adcq 8+0(%r8),%r11 5662 adcq $1,%r12 5663 movq 0+0+0(%rbp),%rdx 5664 movq %rdx,%r15 5665 mulxq %r10,%r13,%r14 5666 mulxq %r11,%rax,%rdx 5667 imulq %r12,%r15 5668 addq %rax,%r14 5669 adcq %rdx,%r15 5670 movq 8+0+0(%rbp),%rdx 5671 mulxq %r10,%r10,%rax 5672 addq %r10,%r14 5673 mulxq %r11,%r11,%r9 5674 adcq %r11,%r15 5675 adcq $0,%r9 5676 imulq %r12,%rdx 5677 addq %rax,%r15 5678 adcq %rdx,%r9 5679 movq %r13,%r10 5680 movq %r14,%r11 5681 movq %r15,%r12 5682 andq $3,%r12 5683 movq %r15,%r13 5684 andq $-4,%r13 5685 movq %r9,%r14 5686 shrdq $2,%r9,%r15 5687 shrq $2,%r9 5688 addq %r13,%r15 5689 adcq %r14,%r9 5690 addq %r15,%r10 5691 adcq %r9,%r11 5692 adcq $0,%r12 5693 5694 leaq 16(%r8),%r8 5695 subq $16,%rcx 5696 jmp L$open_avx2_tail_512_hash 5697L$open_avx2_tail_512_done: 5698 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 5699 vpaddd 0+64(%rbp),%ymm7,%ymm7 5700 vpaddd 0+96(%rbp),%ymm11,%ymm11 5701 vpaddd 0+256(%rbp),%ymm15,%ymm15 5702 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 5703 vpaddd 0+64(%rbp),%ymm6,%ymm6 5704 vpaddd 0+96(%rbp),%ymm10,%ymm10 5705 vpaddd 0+224(%rbp),%ymm14,%ymm14 5706 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 5707 vpaddd 0+64(%rbp),%ymm5,%ymm5 5708 vpaddd 0+96(%rbp),%ymm9,%ymm9 5709 vpaddd 0+192(%rbp),%ymm13,%ymm13 5710 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 5711 vpaddd 0+64(%rbp),%ymm4,%ymm4 5712 vpaddd 0+96(%rbp),%ymm8,%ymm8 5713 vpaddd 0+160(%rbp),%ymm12,%ymm12 5714 5715 vmovdqa %ymm0,0+128(%rbp) 5716 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 5717 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 5718 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 5719 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 5720 vpxor 0+0(%rsi),%ymm0,%ymm0 5721 vpxor 32+0(%rsi),%ymm3,%ymm3 5722 vpxor 64+0(%rsi),%ymm7,%ymm7 5723 vpxor 96+0(%rsi),%ymm11,%ymm11 5724 vmovdqu %ymm0,0+0(%rdi) 5725 vmovdqu %ymm3,32+0(%rdi) 5726 vmovdqu %ymm7,64+0(%rdi) 5727 vmovdqu %ymm11,96+0(%rdi) 5728 5729 vmovdqa 0+128(%rbp),%ymm0 5730 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5731 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5732 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5733 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5734 vpxor 0+128(%rsi),%ymm3,%ymm3 5735 vpxor 32+128(%rsi),%ymm2,%ymm2 5736 vpxor 64+128(%rsi),%ymm6,%ymm6 5737 vpxor 96+128(%rsi),%ymm10,%ymm10 5738 vmovdqu %ymm3,0+128(%rdi) 5739 vmovdqu %ymm2,32+128(%rdi) 5740 vmovdqu %ymm6,64+128(%rdi) 5741 vmovdqu %ymm10,96+128(%rdi) 5742 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5743 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5744 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5745 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5746 vpxor 0+256(%rsi),%ymm3,%ymm3 5747 vpxor 32+256(%rsi),%ymm1,%ymm1 5748 vpxor 64+256(%rsi),%ymm5,%ymm5 5749 vpxor 96+256(%rsi),%ymm9,%ymm9 5750 vmovdqu %ymm3,0+256(%rdi) 5751 vmovdqu %ymm1,32+256(%rdi) 5752 vmovdqu %ymm5,64+256(%rdi) 5753 vmovdqu %ymm9,96+256(%rdi) 5754 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5755 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5756 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5757 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5758 vmovdqa %ymm3,%ymm8 5759 5760 leaq 384(%rsi),%rsi 5761 leaq 384(%rdi),%rdi 5762 subq $384,%rbx 5763L$open_avx2_tail_128_xor: 5764 cmpq $32,%rbx 5765 jb L$open_avx2_tail_32_xor 5766 subq $32,%rbx 5767 vpxor (%rsi),%ymm0,%ymm0 5768 vmovdqu %ymm0,(%rdi) 5769 leaq 32(%rsi),%rsi 5770 leaq 32(%rdi),%rdi 5771 vmovdqa %ymm4,%ymm0 5772 vmovdqa %ymm8,%ymm4 5773 vmovdqa %ymm12,%ymm8 5774 jmp L$open_avx2_tail_128_xor 5775L$open_avx2_tail_32_xor: 5776 cmpq $16,%rbx 5777 vmovdqa %xmm0,%xmm1 5778 jb L$open_avx2_exit 5779 subq $16,%rbx 5780 5781 vpxor (%rsi),%xmm0,%xmm1 5782 vmovdqu %xmm1,(%rdi) 5783 leaq 16(%rsi),%rsi 5784 leaq 16(%rdi),%rdi 5785 vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 5786 vmovdqa %xmm0,%xmm1 5787L$open_avx2_exit: 5788 vzeroupper 5789 jmp L$open_sse_tail_16 5790 5791L$open_avx2_192: 5792 vmovdqa %ymm0,%ymm1 5793 vmovdqa %ymm0,%ymm2 5794 vmovdqa %ymm4,%ymm5 5795 vmovdqa %ymm4,%ymm6 5796 vmovdqa %ymm8,%ymm9 5797 vmovdqa %ymm8,%ymm10 5798 vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 5799 vmovdqa %ymm12,%ymm11 5800 vmovdqa %ymm13,%ymm15 5801 movq $10,%r10 5802L$open_avx2_192_rounds: 5803 vpaddd %ymm4,%ymm0,%ymm0 5804 vpxor %ymm0,%ymm12,%ymm12 5805 vpshufb L$rol16(%rip),%ymm12,%ymm12 5806 vpaddd %ymm12,%ymm8,%ymm8 5807 vpxor %ymm8,%ymm4,%ymm4 5808 vpsrld $20,%ymm4,%ymm3 5809 vpslld $12,%ymm4,%ymm4 5810 vpxor %ymm3,%ymm4,%ymm4 5811 vpaddd %ymm4,%ymm0,%ymm0 5812 vpxor %ymm0,%ymm12,%ymm12 5813 vpshufb L$rol8(%rip),%ymm12,%ymm12 5814 vpaddd %ymm12,%ymm8,%ymm8 5815 vpxor %ymm8,%ymm4,%ymm4 5816 vpslld $7,%ymm4,%ymm3 5817 vpsrld $25,%ymm4,%ymm4 5818 vpxor %ymm3,%ymm4,%ymm4 5819 vpalignr $12,%ymm12,%ymm12,%ymm12 5820 vpalignr $8,%ymm8,%ymm8,%ymm8 5821 vpalignr $4,%ymm4,%ymm4,%ymm4 5822 vpaddd %ymm5,%ymm1,%ymm1 5823 vpxor %ymm1,%ymm13,%ymm13 5824 vpshufb L$rol16(%rip),%ymm13,%ymm13 5825 vpaddd %ymm13,%ymm9,%ymm9 5826 vpxor %ymm9,%ymm5,%ymm5 5827 vpsrld $20,%ymm5,%ymm3 5828 vpslld $12,%ymm5,%ymm5 5829 vpxor %ymm3,%ymm5,%ymm5 5830 vpaddd %ymm5,%ymm1,%ymm1 5831 vpxor %ymm1,%ymm13,%ymm13 5832 vpshufb L$rol8(%rip),%ymm13,%ymm13 5833 vpaddd %ymm13,%ymm9,%ymm9 5834 vpxor %ymm9,%ymm5,%ymm5 5835 vpslld $7,%ymm5,%ymm3 5836 vpsrld $25,%ymm5,%ymm5 5837 vpxor %ymm3,%ymm5,%ymm5 5838 vpalignr $12,%ymm13,%ymm13,%ymm13 5839 vpalignr $8,%ymm9,%ymm9,%ymm9 5840 vpalignr $4,%ymm5,%ymm5,%ymm5 5841 vpaddd %ymm4,%ymm0,%ymm0 5842 vpxor %ymm0,%ymm12,%ymm12 5843 vpshufb L$rol16(%rip),%ymm12,%ymm12 5844 vpaddd %ymm12,%ymm8,%ymm8 5845 vpxor %ymm8,%ymm4,%ymm4 5846 vpsrld $20,%ymm4,%ymm3 5847 vpslld $12,%ymm4,%ymm4 5848 vpxor %ymm3,%ymm4,%ymm4 5849 vpaddd %ymm4,%ymm0,%ymm0 5850 vpxor %ymm0,%ymm12,%ymm12 5851 vpshufb L$rol8(%rip),%ymm12,%ymm12 5852 vpaddd %ymm12,%ymm8,%ymm8 5853 vpxor %ymm8,%ymm4,%ymm4 5854 vpslld $7,%ymm4,%ymm3 5855 vpsrld $25,%ymm4,%ymm4 5856 vpxor %ymm3,%ymm4,%ymm4 5857 vpalignr $4,%ymm12,%ymm12,%ymm12 5858 vpalignr $8,%ymm8,%ymm8,%ymm8 5859 vpalignr $12,%ymm4,%ymm4,%ymm4 5860 vpaddd %ymm5,%ymm1,%ymm1 5861 vpxor %ymm1,%ymm13,%ymm13 5862 vpshufb L$rol16(%rip),%ymm13,%ymm13 5863 vpaddd %ymm13,%ymm9,%ymm9 5864 vpxor %ymm9,%ymm5,%ymm5 5865 vpsrld $20,%ymm5,%ymm3 5866 vpslld $12,%ymm5,%ymm5 5867 vpxor %ymm3,%ymm5,%ymm5 5868 vpaddd %ymm5,%ymm1,%ymm1 5869 vpxor %ymm1,%ymm13,%ymm13 5870 vpshufb L$rol8(%rip),%ymm13,%ymm13 5871 vpaddd %ymm13,%ymm9,%ymm9 5872 vpxor %ymm9,%ymm5,%ymm5 5873 vpslld $7,%ymm5,%ymm3 5874 vpsrld $25,%ymm5,%ymm5 5875 vpxor %ymm3,%ymm5,%ymm5 5876 vpalignr $4,%ymm13,%ymm13,%ymm13 5877 vpalignr $8,%ymm9,%ymm9,%ymm9 5878 vpalignr $12,%ymm5,%ymm5,%ymm5 5879 5880 decq %r10 5881 jne L$open_avx2_192_rounds 5882 vpaddd %ymm2,%ymm0,%ymm0 5883 vpaddd %ymm2,%ymm1,%ymm1 5884 vpaddd %ymm6,%ymm4,%ymm4 5885 vpaddd %ymm6,%ymm5,%ymm5 5886 vpaddd %ymm10,%ymm8,%ymm8 5887 vpaddd %ymm10,%ymm9,%ymm9 5888 vpaddd %ymm11,%ymm12,%ymm12 5889 vpaddd %ymm15,%ymm13,%ymm13 5890 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 5891 5892 vpand L$clamp(%rip),%ymm3,%ymm3 5893 vmovdqa %ymm3,0+0(%rbp) 5894 5895 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 5896 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 5897 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 5898 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 5899 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 5900 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 5901L$open_avx2_short: 5902 movq %r8,%r8 5903 call poly_hash_ad_internal 5904L$open_avx2_short_hash_and_xor_loop: 5905 cmpq $32,%rbx 5906 jb L$open_avx2_short_tail_32 5907 subq $32,%rbx 5908 addq 0+0(%rsi),%r10 5909 adcq 8+0(%rsi),%r11 5910 adcq $1,%r12 5911 movq 0+0+0(%rbp),%rax 5912 movq %rax,%r15 5913 mulq %r10 5914 movq %rax,%r13 5915 movq %rdx,%r14 5916 movq 0+0+0(%rbp),%rax 5917 mulq %r11 5918 imulq %r12,%r15 5919 addq %rax,%r14 5920 adcq %rdx,%r15 5921 movq 8+0+0(%rbp),%rax 5922 movq %rax,%r9 5923 mulq %r10 5924 addq %rax,%r14 5925 adcq $0,%rdx 5926 movq %rdx,%r10 5927 movq 8+0+0(%rbp),%rax 5928 mulq %r11 5929 addq %rax,%r15 5930 adcq $0,%rdx 5931 imulq %r12,%r9 5932 addq %r10,%r15 5933 adcq %rdx,%r9 5934 movq %r13,%r10 5935 movq %r14,%r11 5936 movq %r15,%r12 5937 andq $3,%r12 5938 movq %r15,%r13 5939 andq $-4,%r13 5940 movq %r9,%r14 5941 shrdq $2,%r9,%r15 5942 shrq $2,%r9 5943 addq %r13,%r15 5944 adcq %r14,%r9 5945 addq %r15,%r10 5946 adcq %r9,%r11 5947 adcq $0,%r12 5948 addq 0+16(%rsi),%r10 5949 adcq 8+16(%rsi),%r11 5950 adcq $1,%r12 5951 movq 0+0+0(%rbp),%rax 5952 movq %rax,%r15 5953 mulq %r10 5954 movq %rax,%r13 5955 movq %rdx,%r14 5956 movq 0+0+0(%rbp),%rax 5957 mulq %r11 5958 imulq %r12,%r15 5959 addq %rax,%r14 5960 adcq %rdx,%r15 5961 movq 8+0+0(%rbp),%rax 5962 movq %rax,%r9 5963 mulq %r10 5964 addq %rax,%r14 5965 adcq $0,%rdx 5966 movq %rdx,%r10 5967 movq 8+0+0(%rbp),%rax 5968 mulq %r11 5969 addq %rax,%r15 5970 adcq $0,%rdx 5971 imulq %r12,%r9 5972 addq %r10,%r15 5973 adcq %rdx,%r9 5974 movq %r13,%r10 5975 movq %r14,%r11 5976 movq %r15,%r12 5977 andq $3,%r12 5978 movq %r15,%r13 5979 andq $-4,%r13 5980 movq %r9,%r14 5981 shrdq $2,%r9,%r15 5982 shrq $2,%r9 5983 addq %r13,%r15 5984 adcq %r14,%r9 5985 addq %r15,%r10 5986 adcq %r9,%r11 5987 adcq $0,%r12 5988 5989 5990 vpxor (%rsi),%ymm0,%ymm0 5991 vmovdqu %ymm0,(%rdi) 5992 leaq 32(%rsi),%rsi 5993 leaq 32(%rdi),%rdi 5994 5995 vmovdqa %ymm4,%ymm0 5996 vmovdqa %ymm8,%ymm4 5997 vmovdqa %ymm12,%ymm8 5998 vmovdqa %ymm1,%ymm12 5999 vmovdqa %ymm5,%ymm1 6000 vmovdqa %ymm9,%ymm5 6001 vmovdqa %ymm13,%ymm9 6002 vmovdqa %ymm2,%ymm13 6003 vmovdqa %ymm6,%ymm2 6004 jmp L$open_avx2_short_hash_and_xor_loop 6005L$open_avx2_short_tail_32: 6006 cmpq $16,%rbx 6007 vmovdqa %xmm0,%xmm1 6008 jb L$open_avx2_short_tail_32_exit 6009 subq $16,%rbx 6010 addq 0+0(%rsi),%r10 6011 adcq 8+0(%rsi),%r11 6012 adcq $1,%r12 6013 movq 0+0+0(%rbp),%rax 6014 movq %rax,%r15 6015 mulq %r10 6016 movq %rax,%r13 6017 movq %rdx,%r14 6018 movq 0+0+0(%rbp),%rax 6019 mulq %r11 6020 imulq %r12,%r15 6021 addq %rax,%r14 6022 adcq %rdx,%r15 6023 movq 8+0+0(%rbp),%rax 6024 movq %rax,%r9 6025 mulq %r10 6026 addq %rax,%r14 6027 adcq $0,%rdx 6028 movq %rdx,%r10 6029 movq 8+0+0(%rbp),%rax 6030 mulq %r11 6031 addq %rax,%r15 6032 adcq $0,%rdx 6033 imulq %r12,%r9 6034 addq %r10,%r15 6035 adcq %rdx,%r9 6036 movq %r13,%r10 6037 movq %r14,%r11 6038 movq %r15,%r12 6039 andq $3,%r12 6040 movq %r15,%r13 6041 andq $-4,%r13 6042 movq %r9,%r14 6043 shrdq $2,%r9,%r15 6044 shrq $2,%r9 6045 addq %r13,%r15 6046 adcq %r14,%r9 6047 addq %r15,%r10 6048 adcq %r9,%r11 6049 adcq $0,%r12 6050 6051 vpxor (%rsi),%xmm0,%xmm3 6052 vmovdqu %xmm3,(%rdi) 6053 leaq 16(%rsi),%rsi 6054 leaq 16(%rdi),%rdi 6055 vextracti128 $1,%ymm0,%xmm1 6056L$open_avx2_short_tail_32_exit: 6057 vzeroupper 6058 jmp L$open_sse_tail_16 6059 6060L$open_avx2_320: 6061 vmovdqa %ymm0,%ymm1 6062 vmovdqa %ymm0,%ymm2 6063 vmovdqa %ymm4,%ymm5 6064 vmovdqa %ymm4,%ymm6 6065 vmovdqa %ymm8,%ymm9 6066 vmovdqa %ymm8,%ymm10 6067 vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 6068 vpaddd L$avx2_inc(%rip),%ymm13,%ymm14 6069 vmovdqa %ymm4,%ymm7 6070 vmovdqa %ymm8,%ymm11 6071 vmovdqa %ymm12,0+160(%rbp) 6072 vmovdqa %ymm13,0+192(%rbp) 6073 vmovdqa %ymm14,0+224(%rbp) 6074 movq $10,%r10 6075L$open_avx2_320_rounds: 6076 vpaddd %ymm4,%ymm0,%ymm0 6077 vpxor %ymm0,%ymm12,%ymm12 6078 vpshufb L$rol16(%rip),%ymm12,%ymm12 6079 vpaddd %ymm12,%ymm8,%ymm8 6080 vpxor %ymm8,%ymm4,%ymm4 6081 vpsrld $20,%ymm4,%ymm3 6082 vpslld $12,%ymm4,%ymm4 6083 vpxor %ymm3,%ymm4,%ymm4 6084 vpaddd %ymm4,%ymm0,%ymm0 6085 vpxor %ymm0,%ymm12,%ymm12 6086 vpshufb L$rol8(%rip),%ymm12,%ymm12 6087 vpaddd %ymm12,%ymm8,%ymm8 6088 vpxor %ymm8,%ymm4,%ymm4 6089 vpslld $7,%ymm4,%ymm3 6090 vpsrld $25,%ymm4,%ymm4 6091 vpxor %ymm3,%ymm4,%ymm4 6092 vpalignr $12,%ymm12,%ymm12,%ymm12 6093 vpalignr $8,%ymm8,%ymm8,%ymm8 6094 vpalignr $4,%ymm4,%ymm4,%ymm4 6095 vpaddd %ymm5,%ymm1,%ymm1 6096 vpxor %ymm1,%ymm13,%ymm13 6097 vpshufb L$rol16(%rip),%ymm13,%ymm13 6098 vpaddd %ymm13,%ymm9,%ymm9 6099 vpxor %ymm9,%ymm5,%ymm5 6100 vpsrld $20,%ymm5,%ymm3 6101 vpslld $12,%ymm5,%ymm5 6102 vpxor %ymm3,%ymm5,%ymm5 6103 vpaddd %ymm5,%ymm1,%ymm1 6104 vpxor %ymm1,%ymm13,%ymm13 6105 vpshufb L$rol8(%rip),%ymm13,%ymm13 6106 vpaddd %ymm13,%ymm9,%ymm9 6107 vpxor %ymm9,%ymm5,%ymm5 6108 vpslld $7,%ymm5,%ymm3 6109 vpsrld $25,%ymm5,%ymm5 6110 vpxor %ymm3,%ymm5,%ymm5 6111 vpalignr $12,%ymm13,%ymm13,%ymm13 6112 vpalignr $8,%ymm9,%ymm9,%ymm9 6113 vpalignr $4,%ymm5,%ymm5,%ymm5 6114 vpaddd %ymm6,%ymm2,%ymm2 6115 vpxor %ymm2,%ymm14,%ymm14 6116 vpshufb L$rol16(%rip),%ymm14,%ymm14 6117 vpaddd %ymm14,%ymm10,%ymm10 6118 vpxor %ymm10,%ymm6,%ymm6 6119 vpsrld $20,%ymm6,%ymm3 6120 vpslld $12,%ymm6,%ymm6 6121 vpxor %ymm3,%ymm6,%ymm6 6122 vpaddd %ymm6,%ymm2,%ymm2 6123 vpxor %ymm2,%ymm14,%ymm14 6124 vpshufb L$rol8(%rip),%ymm14,%ymm14 6125 vpaddd %ymm14,%ymm10,%ymm10 6126 vpxor %ymm10,%ymm6,%ymm6 6127 vpslld $7,%ymm6,%ymm3 6128 vpsrld $25,%ymm6,%ymm6 6129 vpxor %ymm3,%ymm6,%ymm6 6130 vpalignr $12,%ymm14,%ymm14,%ymm14 6131 vpalignr $8,%ymm10,%ymm10,%ymm10 6132 vpalignr $4,%ymm6,%ymm6,%ymm6 6133 vpaddd %ymm4,%ymm0,%ymm0 6134 vpxor %ymm0,%ymm12,%ymm12 6135 vpshufb L$rol16(%rip),%ymm12,%ymm12 6136 vpaddd %ymm12,%ymm8,%ymm8 6137 vpxor %ymm8,%ymm4,%ymm4 6138 vpsrld $20,%ymm4,%ymm3 6139 vpslld $12,%ymm4,%ymm4 6140 vpxor %ymm3,%ymm4,%ymm4 6141 vpaddd %ymm4,%ymm0,%ymm0 6142 vpxor %ymm0,%ymm12,%ymm12 6143 vpshufb L$rol8(%rip),%ymm12,%ymm12 6144 vpaddd %ymm12,%ymm8,%ymm8 6145 vpxor %ymm8,%ymm4,%ymm4 6146 vpslld $7,%ymm4,%ymm3 6147 vpsrld $25,%ymm4,%ymm4 6148 vpxor %ymm3,%ymm4,%ymm4 6149 vpalignr $4,%ymm12,%ymm12,%ymm12 6150 vpalignr $8,%ymm8,%ymm8,%ymm8 6151 vpalignr $12,%ymm4,%ymm4,%ymm4 6152 vpaddd %ymm5,%ymm1,%ymm1 6153 vpxor %ymm1,%ymm13,%ymm13 6154 vpshufb L$rol16(%rip),%ymm13,%ymm13 6155 vpaddd %ymm13,%ymm9,%ymm9 6156 vpxor %ymm9,%ymm5,%ymm5 6157 vpsrld $20,%ymm5,%ymm3 6158 vpslld $12,%ymm5,%ymm5 6159 vpxor %ymm3,%ymm5,%ymm5 6160 vpaddd %ymm5,%ymm1,%ymm1 6161 vpxor %ymm1,%ymm13,%ymm13 6162 vpshufb L$rol8(%rip),%ymm13,%ymm13 6163 vpaddd %ymm13,%ymm9,%ymm9 6164 vpxor %ymm9,%ymm5,%ymm5 6165 vpslld $7,%ymm5,%ymm3 6166 vpsrld $25,%ymm5,%ymm5 6167 vpxor %ymm3,%ymm5,%ymm5 6168 vpalignr $4,%ymm13,%ymm13,%ymm13 6169 vpalignr $8,%ymm9,%ymm9,%ymm9 6170 vpalignr $12,%ymm5,%ymm5,%ymm5 6171 vpaddd %ymm6,%ymm2,%ymm2 6172 vpxor %ymm2,%ymm14,%ymm14 6173 vpshufb L$rol16(%rip),%ymm14,%ymm14 6174 vpaddd %ymm14,%ymm10,%ymm10 6175 vpxor %ymm10,%ymm6,%ymm6 6176 vpsrld $20,%ymm6,%ymm3 6177 vpslld $12,%ymm6,%ymm6 6178 vpxor %ymm3,%ymm6,%ymm6 6179 vpaddd %ymm6,%ymm2,%ymm2 6180 vpxor %ymm2,%ymm14,%ymm14 6181 vpshufb L$rol8(%rip),%ymm14,%ymm14 6182 vpaddd %ymm14,%ymm10,%ymm10 6183 vpxor %ymm10,%ymm6,%ymm6 6184 vpslld $7,%ymm6,%ymm3 6185 vpsrld $25,%ymm6,%ymm6 6186 vpxor %ymm3,%ymm6,%ymm6 6187 vpalignr $4,%ymm14,%ymm14,%ymm14 6188 vpalignr $8,%ymm10,%ymm10,%ymm10 6189 vpalignr $12,%ymm6,%ymm6,%ymm6 6190 6191 decq %r10 6192 jne L$open_avx2_320_rounds 6193 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 6194 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 6195 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 6196 vpaddd %ymm7,%ymm4,%ymm4 6197 vpaddd %ymm7,%ymm5,%ymm5 6198 vpaddd %ymm7,%ymm6,%ymm6 6199 vpaddd %ymm11,%ymm8,%ymm8 6200 vpaddd %ymm11,%ymm9,%ymm9 6201 vpaddd %ymm11,%ymm10,%ymm10 6202 vpaddd 0+160(%rbp),%ymm12,%ymm12 6203 vpaddd 0+192(%rbp),%ymm13,%ymm13 6204 vpaddd 0+224(%rbp),%ymm14,%ymm14 6205 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 6206 6207 vpand L$clamp(%rip),%ymm3,%ymm3 6208 vmovdqa %ymm3,0+0(%rbp) 6209 6210 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 6211 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 6212 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 6213 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 6214 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 6215 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 6216 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 6217 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 6218 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 6219 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 6220 jmp L$open_avx2_short 6221 6222 6223 6224 6225 6226.p2align 6 6227chacha20_poly1305_seal_avx2: 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 vzeroupper 6241 vmovdqa L$chacha20_consts(%rip),%ymm0 6242 vbroadcasti128 0(%r9),%ymm4 6243 vbroadcasti128 16(%r9),%ymm8 6244 vbroadcasti128 32(%r9),%ymm12 6245 vpaddd L$avx2_init(%rip),%ymm12,%ymm12 6246 cmpq $192,%rbx 6247 jbe L$seal_avx2_192 6248 cmpq $320,%rbx 6249 jbe L$seal_avx2_320 6250 vmovdqa %ymm0,%ymm1 6251 vmovdqa %ymm0,%ymm2 6252 vmovdqa %ymm0,%ymm3 6253 vmovdqa %ymm4,%ymm5 6254 vmovdqa %ymm4,%ymm6 6255 vmovdqa %ymm4,%ymm7 6256 vmovdqa %ymm4,0+64(%rbp) 6257 vmovdqa %ymm8,%ymm9 6258 vmovdqa %ymm8,%ymm10 6259 vmovdqa %ymm8,%ymm11 6260 vmovdqa %ymm8,0+96(%rbp) 6261 vmovdqa %ymm12,%ymm15 6262 vpaddd L$avx2_inc(%rip),%ymm15,%ymm14 6263 vpaddd L$avx2_inc(%rip),%ymm14,%ymm13 6264 vpaddd L$avx2_inc(%rip),%ymm13,%ymm12 6265 vmovdqa %ymm12,0+160(%rbp) 6266 vmovdqa %ymm13,0+192(%rbp) 6267 vmovdqa %ymm14,0+224(%rbp) 6268 vmovdqa %ymm15,0+256(%rbp) 6269 movq $10,%r10 6270L$seal_avx2_init_rounds: 6271 vmovdqa %ymm8,0+128(%rbp) 6272 vmovdqa L$rol16(%rip),%ymm8 6273 vpaddd %ymm7,%ymm3,%ymm3 6274 vpaddd %ymm6,%ymm2,%ymm2 6275 vpaddd %ymm5,%ymm1,%ymm1 6276 vpaddd %ymm4,%ymm0,%ymm0 6277 vpxor %ymm3,%ymm15,%ymm15 6278 vpxor %ymm2,%ymm14,%ymm14 6279 vpxor %ymm1,%ymm13,%ymm13 6280 vpxor %ymm0,%ymm12,%ymm12 6281 vpshufb %ymm8,%ymm15,%ymm15 6282 vpshufb %ymm8,%ymm14,%ymm14 6283 vpshufb %ymm8,%ymm13,%ymm13 6284 vpshufb %ymm8,%ymm12,%ymm12 6285 vpaddd %ymm15,%ymm11,%ymm11 6286 vpaddd %ymm14,%ymm10,%ymm10 6287 vpaddd %ymm13,%ymm9,%ymm9 6288 vpaddd 0+128(%rbp),%ymm12,%ymm8 6289 vpxor %ymm11,%ymm7,%ymm7 6290 vpxor %ymm10,%ymm6,%ymm6 6291 vpxor %ymm9,%ymm5,%ymm5 6292 vpxor %ymm8,%ymm4,%ymm4 6293 vmovdqa %ymm8,0+128(%rbp) 6294 vpsrld $20,%ymm7,%ymm8 6295 vpslld $32-20,%ymm7,%ymm7 6296 vpxor %ymm8,%ymm7,%ymm7 6297 vpsrld $20,%ymm6,%ymm8 6298 vpslld $32-20,%ymm6,%ymm6 6299 vpxor %ymm8,%ymm6,%ymm6 6300 vpsrld $20,%ymm5,%ymm8 6301 vpslld $32-20,%ymm5,%ymm5 6302 vpxor %ymm8,%ymm5,%ymm5 6303 vpsrld $20,%ymm4,%ymm8 6304 vpslld $32-20,%ymm4,%ymm4 6305 vpxor %ymm8,%ymm4,%ymm4 6306 vmovdqa L$rol8(%rip),%ymm8 6307 vpaddd %ymm7,%ymm3,%ymm3 6308 vpaddd %ymm6,%ymm2,%ymm2 6309 vpaddd %ymm5,%ymm1,%ymm1 6310 vpaddd %ymm4,%ymm0,%ymm0 6311 vpxor %ymm3,%ymm15,%ymm15 6312 vpxor %ymm2,%ymm14,%ymm14 6313 vpxor %ymm1,%ymm13,%ymm13 6314 vpxor %ymm0,%ymm12,%ymm12 6315 vpshufb %ymm8,%ymm15,%ymm15 6316 vpshufb %ymm8,%ymm14,%ymm14 6317 vpshufb %ymm8,%ymm13,%ymm13 6318 vpshufb %ymm8,%ymm12,%ymm12 6319 vpaddd %ymm15,%ymm11,%ymm11 6320 vpaddd %ymm14,%ymm10,%ymm10 6321 vpaddd %ymm13,%ymm9,%ymm9 6322 vpaddd 0+128(%rbp),%ymm12,%ymm8 6323 vpxor %ymm11,%ymm7,%ymm7 6324 vpxor %ymm10,%ymm6,%ymm6 6325 vpxor %ymm9,%ymm5,%ymm5 6326 vpxor %ymm8,%ymm4,%ymm4 6327 vmovdqa %ymm8,0+128(%rbp) 6328 vpsrld $25,%ymm7,%ymm8 6329 vpslld $32-25,%ymm7,%ymm7 6330 vpxor %ymm8,%ymm7,%ymm7 6331 vpsrld $25,%ymm6,%ymm8 6332 vpslld $32-25,%ymm6,%ymm6 6333 vpxor %ymm8,%ymm6,%ymm6 6334 vpsrld $25,%ymm5,%ymm8 6335 vpslld $32-25,%ymm5,%ymm5 6336 vpxor %ymm8,%ymm5,%ymm5 6337 vpsrld $25,%ymm4,%ymm8 6338 vpslld $32-25,%ymm4,%ymm4 6339 vpxor %ymm8,%ymm4,%ymm4 6340 vmovdqa 0+128(%rbp),%ymm8 6341 vpalignr $4,%ymm7,%ymm7,%ymm7 6342 vpalignr $8,%ymm11,%ymm11,%ymm11 6343 vpalignr $12,%ymm15,%ymm15,%ymm15 6344 vpalignr $4,%ymm6,%ymm6,%ymm6 6345 vpalignr $8,%ymm10,%ymm10,%ymm10 6346 vpalignr $12,%ymm14,%ymm14,%ymm14 6347 vpalignr $4,%ymm5,%ymm5,%ymm5 6348 vpalignr $8,%ymm9,%ymm9,%ymm9 6349 vpalignr $12,%ymm13,%ymm13,%ymm13 6350 vpalignr $4,%ymm4,%ymm4,%ymm4 6351 vpalignr $8,%ymm8,%ymm8,%ymm8 6352 vpalignr $12,%ymm12,%ymm12,%ymm12 6353 vmovdqa %ymm8,0+128(%rbp) 6354 vmovdqa L$rol16(%rip),%ymm8 6355 vpaddd %ymm7,%ymm3,%ymm3 6356 vpaddd %ymm6,%ymm2,%ymm2 6357 vpaddd %ymm5,%ymm1,%ymm1 6358 vpaddd %ymm4,%ymm0,%ymm0 6359 vpxor %ymm3,%ymm15,%ymm15 6360 vpxor %ymm2,%ymm14,%ymm14 6361 vpxor %ymm1,%ymm13,%ymm13 6362 vpxor %ymm0,%ymm12,%ymm12 6363 vpshufb %ymm8,%ymm15,%ymm15 6364 vpshufb %ymm8,%ymm14,%ymm14 6365 vpshufb %ymm8,%ymm13,%ymm13 6366 vpshufb %ymm8,%ymm12,%ymm12 6367 vpaddd %ymm15,%ymm11,%ymm11 6368 vpaddd %ymm14,%ymm10,%ymm10 6369 vpaddd %ymm13,%ymm9,%ymm9 6370 vpaddd 0+128(%rbp),%ymm12,%ymm8 6371 vpxor %ymm11,%ymm7,%ymm7 6372 vpxor %ymm10,%ymm6,%ymm6 6373 vpxor %ymm9,%ymm5,%ymm5 6374 vpxor %ymm8,%ymm4,%ymm4 6375 vmovdqa %ymm8,0+128(%rbp) 6376 vpsrld $20,%ymm7,%ymm8 6377 vpslld $32-20,%ymm7,%ymm7 6378 vpxor %ymm8,%ymm7,%ymm7 6379 vpsrld $20,%ymm6,%ymm8 6380 vpslld $32-20,%ymm6,%ymm6 6381 vpxor %ymm8,%ymm6,%ymm6 6382 vpsrld $20,%ymm5,%ymm8 6383 vpslld $32-20,%ymm5,%ymm5 6384 vpxor %ymm8,%ymm5,%ymm5 6385 vpsrld $20,%ymm4,%ymm8 6386 vpslld $32-20,%ymm4,%ymm4 6387 vpxor %ymm8,%ymm4,%ymm4 6388 vmovdqa L$rol8(%rip),%ymm8 6389 vpaddd %ymm7,%ymm3,%ymm3 6390 vpaddd %ymm6,%ymm2,%ymm2 6391 vpaddd %ymm5,%ymm1,%ymm1 6392 vpaddd %ymm4,%ymm0,%ymm0 6393 vpxor %ymm3,%ymm15,%ymm15 6394 vpxor %ymm2,%ymm14,%ymm14 6395 vpxor %ymm1,%ymm13,%ymm13 6396 vpxor %ymm0,%ymm12,%ymm12 6397 vpshufb %ymm8,%ymm15,%ymm15 6398 vpshufb %ymm8,%ymm14,%ymm14 6399 vpshufb %ymm8,%ymm13,%ymm13 6400 vpshufb %ymm8,%ymm12,%ymm12 6401 vpaddd %ymm15,%ymm11,%ymm11 6402 vpaddd %ymm14,%ymm10,%ymm10 6403 vpaddd %ymm13,%ymm9,%ymm9 6404 vpaddd 0+128(%rbp),%ymm12,%ymm8 6405 vpxor %ymm11,%ymm7,%ymm7 6406 vpxor %ymm10,%ymm6,%ymm6 6407 vpxor %ymm9,%ymm5,%ymm5 6408 vpxor %ymm8,%ymm4,%ymm4 6409 vmovdqa %ymm8,0+128(%rbp) 6410 vpsrld $25,%ymm7,%ymm8 6411 vpslld $32-25,%ymm7,%ymm7 6412 vpxor %ymm8,%ymm7,%ymm7 6413 vpsrld $25,%ymm6,%ymm8 6414 vpslld $32-25,%ymm6,%ymm6 6415 vpxor %ymm8,%ymm6,%ymm6 6416 vpsrld $25,%ymm5,%ymm8 6417 vpslld $32-25,%ymm5,%ymm5 6418 vpxor %ymm8,%ymm5,%ymm5 6419 vpsrld $25,%ymm4,%ymm8 6420 vpslld $32-25,%ymm4,%ymm4 6421 vpxor %ymm8,%ymm4,%ymm4 6422 vmovdqa 0+128(%rbp),%ymm8 6423 vpalignr $12,%ymm7,%ymm7,%ymm7 6424 vpalignr $8,%ymm11,%ymm11,%ymm11 6425 vpalignr $4,%ymm15,%ymm15,%ymm15 6426 vpalignr $12,%ymm6,%ymm6,%ymm6 6427 vpalignr $8,%ymm10,%ymm10,%ymm10 6428 vpalignr $4,%ymm14,%ymm14,%ymm14 6429 vpalignr $12,%ymm5,%ymm5,%ymm5 6430 vpalignr $8,%ymm9,%ymm9,%ymm9 6431 vpalignr $4,%ymm13,%ymm13,%ymm13 6432 vpalignr $12,%ymm4,%ymm4,%ymm4 6433 vpalignr $8,%ymm8,%ymm8,%ymm8 6434 vpalignr $4,%ymm12,%ymm12,%ymm12 6435 6436 decq %r10 6437 jnz L$seal_avx2_init_rounds 6438 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 6439 vpaddd 0+64(%rbp),%ymm7,%ymm7 6440 vpaddd 0+96(%rbp),%ymm11,%ymm11 6441 vpaddd 0+256(%rbp),%ymm15,%ymm15 6442 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 6443 vpaddd 0+64(%rbp),%ymm6,%ymm6 6444 vpaddd 0+96(%rbp),%ymm10,%ymm10 6445 vpaddd 0+224(%rbp),%ymm14,%ymm14 6446 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 6447 vpaddd 0+64(%rbp),%ymm5,%ymm5 6448 vpaddd 0+96(%rbp),%ymm9,%ymm9 6449 vpaddd 0+192(%rbp),%ymm13,%ymm13 6450 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 6451 vpaddd 0+64(%rbp),%ymm4,%ymm4 6452 vpaddd 0+96(%rbp),%ymm8,%ymm8 6453 vpaddd 0+160(%rbp),%ymm12,%ymm12 6454 6455 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 6456 vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 6457 vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 6458 vpand L$clamp(%rip),%ymm15,%ymm15 6459 vmovdqa %ymm15,0+0(%rbp) 6460 movq %r8,%r8 6461 call poly_hash_ad_internal 6462 6463 vpxor 0(%rsi),%ymm3,%ymm3 6464 vpxor 32(%rsi),%ymm11,%ymm11 6465 vmovdqu %ymm3,0(%rdi) 6466 vmovdqu %ymm11,32(%rdi) 6467 vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 6468 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 6469 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 6470 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 6471 vpxor 0+64(%rsi),%ymm15,%ymm15 6472 vpxor 32+64(%rsi),%ymm2,%ymm2 6473 vpxor 64+64(%rsi),%ymm6,%ymm6 6474 vpxor 96+64(%rsi),%ymm10,%ymm10 6475 vmovdqu %ymm15,0+64(%rdi) 6476 vmovdqu %ymm2,32+64(%rdi) 6477 vmovdqu %ymm6,64+64(%rdi) 6478 vmovdqu %ymm10,96+64(%rdi) 6479 vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 6480 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 6481 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 6482 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 6483 vpxor 0+192(%rsi),%ymm15,%ymm15 6484 vpxor 32+192(%rsi),%ymm1,%ymm1 6485 vpxor 64+192(%rsi),%ymm5,%ymm5 6486 vpxor 96+192(%rsi),%ymm9,%ymm9 6487 vmovdqu %ymm15,0+192(%rdi) 6488 vmovdqu %ymm1,32+192(%rdi) 6489 vmovdqu %ymm5,64+192(%rdi) 6490 vmovdqu %ymm9,96+192(%rdi) 6491 vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 6492 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 6493 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 6494 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 6495 vmovdqa %ymm15,%ymm8 6496 6497 leaq 320(%rsi),%rsi 6498 subq $320,%rbx 6499 movq $320,%rcx 6500 cmpq $128,%rbx 6501 jbe L$seal_avx2_short_hash_remainder 6502 vpxor 0(%rsi),%ymm0,%ymm0 6503 vpxor 32(%rsi),%ymm4,%ymm4 6504 vpxor 64(%rsi),%ymm8,%ymm8 6505 vpxor 96(%rsi),%ymm12,%ymm12 6506 vmovdqu %ymm0,320(%rdi) 6507 vmovdqu %ymm4,352(%rdi) 6508 vmovdqu %ymm8,384(%rdi) 6509 vmovdqu %ymm12,416(%rdi) 6510 leaq 128(%rsi),%rsi 6511 subq $128,%rbx 6512 movq $8,%rcx 6513 movq $2,%r8 6514 cmpq $128,%rbx 6515 jbe L$seal_avx2_tail_128 6516 cmpq $256,%rbx 6517 jbe L$seal_avx2_tail_256 6518 cmpq $384,%rbx 6519 jbe L$seal_avx2_tail_384 6520 cmpq $512,%rbx 6521 jbe L$seal_avx2_tail_512 6522 vmovdqa L$chacha20_consts(%rip),%ymm0 6523 vmovdqa 0+64(%rbp),%ymm4 6524 vmovdqa 0+96(%rbp),%ymm8 6525 vmovdqa %ymm0,%ymm1 6526 vmovdqa %ymm4,%ymm5 6527 vmovdqa %ymm8,%ymm9 6528 vmovdqa %ymm0,%ymm2 6529 vmovdqa %ymm4,%ymm6 6530 vmovdqa %ymm8,%ymm10 6531 vmovdqa %ymm0,%ymm3 6532 vmovdqa %ymm4,%ymm7 6533 vmovdqa %ymm8,%ymm11 6534 vmovdqa L$avx2_inc(%rip),%ymm12 6535 vpaddd 0+160(%rbp),%ymm12,%ymm15 6536 vpaddd %ymm15,%ymm12,%ymm14 6537 vpaddd %ymm14,%ymm12,%ymm13 6538 vpaddd %ymm13,%ymm12,%ymm12 6539 vmovdqa %ymm15,0+256(%rbp) 6540 vmovdqa %ymm14,0+224(%rbp) 6541 vmovdqa %ymm13,0+192(%rbp) 6542 vmovdqa %ymm12,0+160(%rbp) 6543 vmovdqa %ymm8,0+128(%rbp) 6544 vmovdqa L$rol16(%rip),%ymm8 6545 vpaddd %ymm7,%ymm3,%ymm3 6546 vpaddd %ymm6,%ymm2,%ymm2 6547 vpaddd %ymm5,%ymm1,%ymm1 6548 vpaddd %ymm4,%ymm0,%ymm0 6549 vpxor %ymm3,%ymm15,%ymm15 6550 vpxor %ymm2,%ymm14,%ymm14 6551 vpxor %ymm1,%ymm13,%ymm13 6552 vpxor %ymm0,%ymm12,%ymm12 6553 vpshufb %ymm8,%ymm15,%ymm15 6554 vpshufb %ymm8,%ymm14,%ymm14 6555 vpshufb %ymm8,%ymm13,%ymm13 6556 vpshufb %ymm8,%ymm12,%ymm12 6557 vpaddd %ymm15,%ymm11,%ymm11 6558 vpaddd %ymm14,%ymm10,%ymm10 6559 vpaddd %ymm13,%ymm9,%ymm9 6560 vpaddd 0+128(%rbp),%ymm12,%ymm8 6561 vpxor %ymm11,%ymm7,%ymm7 6562 vpxor %ymm10,%ymm6,%ymm6 6563 vpxor %ymm9,%ymm5,%ymm5 6564 vpxor %ymm8,%ymm4,%ymm4 6565 vmovdqa %ymm8,0+128(%rbp) 6566 vpsrld $20,%ymm7,%ymm8 6567 vpslld $32-20,%ymm7,%ymm7 6568 vpxor %ymm8,%ymm7,%ymm7 6569 vpsrld $20,%ymm6,%ymm8 6570 vpslld $32-20,%ymm6,%ymm6 6571 vpxor %ymm8,%ymm6,%ymm6 6572 vpsrld $20,%ymm5,%ymm8 6573 vpslld $32-20,%ymm5,%ymm5 6574 vpxor %ymm8,%ymm5,%ymm5 6575 vpsrld $20,%ymm4,%ymm8 6576 vpslld $32-20,%ymm4,%ymm4 6577 vpxor %ymm8,%ymm4,%ymm4 6578 vmovdqa L$rol8(%rip),%ymm8 6579 vpaddd %ymm7,%ymm3,%ymm3 6580 vpaddd %ymm6,%ymm2,%ymm2 6581 vpaddd %ymm5,%ymm1,%ymm1 6582 vpaddd %ymm4,%ymm0,%ymm0 6583 vpxor %ymm3,%ymm15,%ymm15 6584 vpxor %ymm2,%ymm14,%ymm14 6585 vpxor %ymm1,%ymm13,%ymm13 6586 vpxor %ymm0,%ymm12,%ymm12 6587 vpshufb %ymm8,%ymm15,%ymm15 6588 vpshufb %ymm8,%ymm14,%ymm14 6589 vpshufb %ymm8,%ymm13,%ymm13 6590 vpshufb %ymm8,%ymm12,%ymm12 6591 vpaddd %ymm15,%ymm11,%ymm11 6592 vpaddd %ymm14,%ymm10,%ymm10 6593 vpaddd %ymm13,%ymm9,%ymm9 6594 vpaddd 0+128(%rbp),%ymm12,%ymm8 6595 vpxor %ymm11,%ymm7,%ymm7 6596 vpxor %ymm10,%ymm6,%ymm6 6597 vpxor %ymm9,%ymm5,%ymm5 6598 vpxor %ymm8,%ymm4,%ymm4 6599 vmovdqa %ymm8,0+128(%rbp) 6600 vpsrld $25,%ymm7,%ymm8 6601 vpslld $32-25,%ymm7,%ymm7 6602 vpxor %ymm8,%ymm7,%ymm7 6603 vpsrld $25,%ymm6,%ymm8 6604 vpslld $32-25,%ymm6,%ymm6 6605 vpxor %ymm8,%ymm6,%ymm6 6606 vpsrld $25,%ymm5,%ymm8 6607 vpslld $32-25,%ymm5,%ymm5 6608 vpxor %ymm8,%ymm5,%ymm5 6609 vpsrld $25,%ymm4,%ymm8 6610 vpslld $32-25,%ymm4,%ymm4 6611 vpxor %ymm8,%ymm4,%ymm4 6612 vmovdqa 0+128(%rbp),%ymm8 6613 vpalignr $4,%ymm7,%ymm7,%ymm7 6614 vpalignr $8,%ymm11,%ymm11,%ymm11 6615 vpalignr $12,%ymm15,%ymm15,%ymm15 6616 vpalignr $4,%ymm6,%ymm6,%ymm6 6617 vpalignr $8,%ymm10,%ymm10,%ymm10 6618 vpalignr $12,%ymm14,%ymm14,%ymm14 6619 vpalignr $4,%ymm5,%ymm5,%ymm5 6620 vpalignr $8,%ymm9,%ymm9,%ymm9 6621 vpalignr $12,%ymm13,%ymm13,%ymm13 6622 vpalignr $4,%ymm4,%ymm4,%ymm4 6623 vpalignr $8,%ymm8,%ymm8,%ymm8 6624 vpalignr $12,%ymm12,%ymm12,%ymm12 6625 vmovdqa %ymm8,0+128(%rbp) 6626 vmovdqa L$rol16(%rip),%ymm8 6627 vpaddd %ymm7,%ymm3,%ymm3 6628 vpaddd %ymm6,%ymm2,%ymm2 6629 vpaddd %ymm5,%ymm1,%ymm1 6630 vpaddd %ymm4,%ymm0,%ymm0 6631 vpxor %ymm3,%ymm15,%ymm15 6632 vpxor %ymm2,%ymm14,%ymm14 6633 vpxor %ymm1,%ymm13,%ymm13 6634 vpxor %ymm0,%ymm12,%ymm12 6635 vpshufb %ymm8,%ymm15,%ymm15 6636 vpshufb %ymm8,%ymm14,%ymm14 6637 vpshufb %ymm8,%ymm13,%ymm13 6638 vpshufb %ymm8,%ymm12,%ymm12 6639 vpaddd %ymm15,%ymm11,%ymm11 6640 vpaddd %ymm14,%ymm10,%ymm10 6641 vpaddd %ymm13,%ymm9,%ymm9 6642 vpaddd 0+128(%rbp),%ymm12,%ymm8 6643 vpxor %ymm11,%ymm7,%ymm7 6644 vpxor %ymm10,%ymm6,%ymm6 6645 vpxor %ymm9,%ymm5,%ymm5 6646 vpxor %ymm8,%ymm4,%ymm4 6647 vmovdqa %ymm8,0+128(%rbp) 6648 vpsrld $20,%ymm7,%ymm8 6649 vpslld $32-20,%ymm7,%ymm7 6650 vpxor %ymm8,%ymm7,%ymm7 6651 vpsrld $20,%ymm6,%ymm8 6652 vpslld $32-20,%ymm6,%ymm6 6653 vpxor %ymm8,%ymm6,%ymm6 6654 vpsrld $20,%ymm5,%ymm8 6655 vpslld $32-20,%ymm5,%ymm5 6656 vpxor %ymm8,%ymm5,%ymm5 6657 vpsrld $20,%ymm4,%ymm8 6658 vpslld $32-20,%ymm4,%ymm4 6659 vpxor %ymm8,%ymm4,%ymm4 6660 vmovdqa L$rol8(%rip),%ymm8 6661 vpaddd %ymm7,%ymm3,%ymm3 6662 vpaddd %ymm6,%ymm2,%ymm2 6663 vpaddd %ymm5,%ymm1,%ymm1 6664 vpaddd %ymm4,%ymm0,%ymm0 6665 vpxor %ymm3,%ymm15,%ymm15 6666 vpxor %ymm2,%ymm14,%ymm14 6667 vpxor %ymm1,%ymm13,%ymm13 6668 vpxor %ymm0,%ymm12,%ymm12 6669 vpshufb %ymm8,%ymm15,%ymm15 6670 vpshufb %ymm8,%ymm14,%ymm14 6671 vpshufb %ymm8,%ymm13,%ymm13 6672 vpshufb %ymm8,%ymm12,%ymm12 6673 vpaddd %ymm15,%ymm11,%ymm11 6674 vpaddd %ymm14,%ymm10,%ymm10 6675 vpaddd %ymm13,%ymm9,%ymm9 6676 vpaddd 0+128(%rbp),%ymm12,%ymm8 6677 vpxor %ymm11,%ymm7,%ymm7 6678 vpxor %ymm10,%ymm6,%ymm6 6679 vpxor %ymm9,%ymm5,%ymm5 6680 vpxor %ymm8,%ymm4,%ymm4 6681 vmovdqa %ymm8,0+128(%rbp) 6682 vpsrld $25,%ymm7,%ymm8 6683 vpslld $32-25,%ymm7,%ymm7 6684 vpxor %ymm8,%ymm7,%ymm7 6685 vpsrld $25,%ymm6,%ymm8 6686 vpslld $32-25,%ymm6,%ymm6 6687 vpxor %ymm8,%ymm6,%ymm6 6688 vpsrld $25,%ymm5,%ymm8 6689 vpslld $32-25,%ymm5,%ymm5 6690 vpxor %ymm8,%ymm5,%ymm5 6691 vpsrld $25,%ymm4,%ymm8 6692 vpslld $32-25,%ymm4,%ymm4 6693 vpxor %ymm8,%ymm4,%ymm4 6694 vmovdqa 0+128(%rbp),%ymm8 6695 vpalignr $12,%ymm7,%ymm7,%ymm7 6696 vpalignr $8,%ymm11,%ymm11,%ymm11 6697 vpalignr $4,%ymm15,%ymm15,%ymm15 6698 vpalignr $12,%ymm6,%ymm6,%ymm6 6699 vpalignr $8,%ymm10,%ymm10,%ymm10 6700 vpalignr $4,%ymm14,%ymm14,%ymm14 6701 vpalignr $12,%ymm5,%ymm5,%ymm5 6702 vpalignr $8,%ymm9,%ymm9,%ymm9 6703 vpalignr $4,%ymm13,%ymm13,%ymm13 6704 vpalignr $12,%ymm4,%ymm4,%ymm4 6705 vpalignr $8,%ymm8,%ymm8,%ymm8 6706 vpalignr $4,%ymm12,%ymm12,%ymm12 6707 vmovdqa %ymm8,0+128(%rbp) 6708 vmovdqa L$rol16(%rip),%ymm8 6709 vpaddd %ymm7,%ymm3,%ymm3 6710 vpaddd %ymm6,%ymm2,%ymm2 6711 vpaddd %ymm5,%ymm1,%ymm1 6712 vpaddd %ymm4,%ymm0,%ymm0 6713 vpxor %ymm3,%ymm15,%ymm15 6714 vpxor %ymm2,%ymm14,%ymm14 6715 vpxor %ymm1,%ymm13,%ymm13 6716 vpxor %ymm0,%ymm12,%ymm12 6717 vpshufb %ymm8,%ymm15,%ymm15 6718 vpshufb %ymm8,%ymm14,%ymm14 6719 vpshufb %ymm8,%ymm13,%ymm13 6720 vpshufb %ymm8,%ymm12,%ymm12 6721 vpaddd %ymm15,%ymm11,%ymm11 6722 vpaddd %ymm14,%ymm10,%ymm10 6723 vpaddd %ymm13,%ymm9,%ymm9 6724 vpaddd 0+128(%rbp),%ymm12,%ymm8 6725 vpxor %ymm11,%ymm7,%ymm7 6726 vpxor %ymm10,%ymm6,%ymm6 6727 vpxor %ymm9,%ymm5,%ymm5 6728 vpxor %ymm8,%ymm4,%ymm4 6729 vmovdqa %ymm8,0+128(%rbp) 6730 vpsrld $20,%ymm7,%ymm8 6731 vpslld $32-20,%ymm7,%ymm7 6732 vpxor %ymm8,%ymm7,%ymm7 6733 vpsrld $20,%ymm6,%ymm8 6734 vpslld $32-20,%ymm6,%ymm6 6735 vpxor %ymm8,%ymm6,%ymm6 6736 vpsrld $20,%ymm5,%ymm8 6737 vpslld $32-20,%ymm5,%ymm5 6738 vpxor %ymm8,%ymm5,%ymm5 6739 vpsrld $20,%ymm4,%ymm8 6740 vpslld $32-20,%ymm4,%ymm4 6741 vpxor %ymm8,%ymm4,%ymm4 6742 vmovdqa L$rol8(%rip),%ymm8 6743 vpaddd %ymm7,%ymm3,%ymm3 6744 vpaddd %ymm6,%ymm2,%ymm2 6745 vpaddd %ymm5,%ymm1,%ymm1 6746 vpaddd %ymm4,%ymm0,%ymm0 6747 vpxor %ymm3,%ymm15,%ymm15 6748 6749 subq $16,%rdi 6750 movq $9,%rcx 6751 jmp L$seal_avx2_main_loop_rounds_entry 6752.p2align 5 6753L$seal_avx2_main_loop: 6754 vmovdqa L$chacha20_consts(%rip),%ymm0 6755 vmovdqa 0+64(%rbp),%ymm4 6756 vmovdqa 0+96(%rbp),%ymm8 6757 vmovdqa %ymm0,%ymm1 6758 vmovdqa %ymm4,%ymm5 6759 vmovdqa %ymm8,%ymm9 6760 vmovdqa %ymm0,%ymm2 6761 vmovdqa %ymm4,%ymm6 6762 vmovdqa %ymm8,%ymm10 6763 vmovdqa %ymm0,%ymm3 6764 vmovdqa %ymm4,%ymm7 6765 vmovdqa %ymm8,%ymm11 6766 vmovdqa L$avx2_inc(%rip),%ymm12 6767 vpaddd 0+160(%rbp),%ymm12,%ymm15 6768 vpaddd %ymm15,%ymm12,%ymm14 6769 vpaddd %ymm14,%ymm12,%ymm13 6770 vpaddd %ymm13,%ymm12,%ymm12 6771 vmovdqa %ymm15,0+256(%rbp) 6772 vmovdqa %ymm14,0+224(%rbp) 6773 vmovdqa %ymm13,0+192(%rbp) 6774 vmovdqa %ymm12,0+160(%rbp) 6775 6776 movq $10,%rcx 6777.p2align 5 6778L$seal_avx2_main_loop_rounds: 6779 addq 0+0(%rdi),%r10 6780 adcq 8+0(%rdi),%r11 6781 adcq $1,%r12 6782 vmovdqa %ymm8,0+128(%rbp) 6783 vmovdqa L$rol16(%rip),%ymm8 6784 vpaddd %ymm7,%ymm3,%ymm3 6785 vpaddd %ymm6,%ymm2,%ymm2 6786 vpaddd %ymm5,%ymm1,%ymm1 6787 vpaddd %ymm4,%ymm0,%ymm0 6788 vpxor %ymm3,%ymm15,%ymm15 6789 vpxor %ymm2,%ymm14,%ymm14 6790 vpxor %ymm1,%ymm13,%ymm13 6791 vpxor %ymm0,%ymm12,%ymm12 6792 movq 0+0+0(%rbp),%rdx 6793 movq %rdx,%r15 6794 mulxq %r10,%r13,%r14 6795 mulxq %r11,%rax,%rdx 6796 imulq %r12,%r15 6797 addq %rax,%r14 6798 adcq %rdx,%r15 6799 vpshufb %ymm8,%ymm15,%ymm15 6800 vpshufb %ymm8,%ymm14,%ymm14 6801 vpshufb %ymm8,%ymm13,%ymm13 6802 vpshufb %ymm8,%ymm12,%ymm12 6803 vpaddd %ymm15,%ymm11,%ymm11 6804 vpaddd %ymm14,%ymm10,%ymm10 6805 vpaddd %ymm13,%ymm9,%ymm9 6806 vpaddd 0+128(%rbp),%ymm12,%ymm8 6807 vpxor %ymm11,%ymm7,%ymm7 6808 movq 8+0+0(%rbp),%rdx 6809 mulxq %r10,%r10,%rax 6810 addq %r10,%r14 6811 mulxq %r11,%r11,%r9 6812 adcq %r11,%r15 6813 adcq $0,%r9 6814 imulq %r12,%rdx 6815 vpxor %ymm10,%ymm6,%ymm6 6816 vpxor %ymm9,%ymm5,%ymm5 6817 vpxor %ymm8,%ymm4,%ymm4 6818 vmovdqa %ymm8,0+128(%rbp) 6819 vpsrld $20,%ymm7,%ymm8 6820 vpslld $32-20,%ymm7,%ymm7 6821 vpxor %ymm8,%ymm7,%ymm7 6822 vpsrld $20,%ymm6,%ymm8 6823 vpslld $32-20,%ymm6,%ymm6 6824 vpxor %ymm8,%ymm6,%ymm6 6825 vpsrld $20,%ymm5,%ymm8 6826 vpslld $32-20,%ymm5,%ymm5 6827 addq %rax,%r15 6828 adcq %rdx,%r9 6829 vpxor %ymm8,%ymm5,%ymm5 6830 vpsrld $20,%ymm4,%ymm8 6831 vpslld $32-20,%ymm4,%ymm4 6832 vpxor %ymm8,%ymm4,%ymm4 6833 vmovdqa L$rol8(%rip),%ymm8 6834 vpaddd %ymm7,%ymm3,%ymm3 6835 vpaddd %ymm6,%ymm2,%ymm2 6836 vpaddd %ymm5,%ymm1,%ymm1 6837 vpaddd %ymm4,%ymm0,%ymm0 6838 vpxor %ymm3,%ymm15,%ymm15 6839 movq %r13,%r10 6840 movq %r14,%r11 6841 movq %r15,%r12 6842 andq $3,%r12 6843 movq %r15,%r13 6844 andq $-4,%r13 6845 movq %r9,%r14 6846 shrdq $2,%r9,%r15 6847 shrq $2,%r9 6848 addq %r13,%r15 6849 adcq %r14,%r9 6850 addq %r15,%r10 6851 adcq %r9,%r11 6852 adcq $0,%r12 6853 6854L$seal_avx2_main_loop_rounds_entry: 6855 vpxor %ymm2,%ymm14,%ymm14 6856 vpxor %ymm1,%ymm13,%ymm13 6857 vpxor %ymm0,%ymm12,%ymm12 6858 vpshufb %ymm8,%ymm15,%ymm15 6859 vpshufb %ymm8,%ymm14,%ymm14 6860 vpshufb %ymm8,%ymm13,%ymm13 6861 vpshufb %ymm8,%ymm12,%ymm12 6862 vpaddd %ymm15,%ymm11,%ymm11 6863 vpaddd %ymm14,%ymm10,%ymm10 6864 addq 0+16(%rdi),%r10 6865 adcq 8+16(%rdi),%r11 6866 adcq $1,%r12 6867 vpaddd %ymm13,%ymm9,%ymm9 6868 vpaddd 0+128(%rbp),%ymm12,%ymm8 6869 vpxor %ymm11,%ymm7,%ymm7 6870 vpxor %ymm10,%ymm6,%ymm6 6871 vpxor %ymm9,%ymm5,%ymm5 6872 vpxor %ymm8,%ymm4,%ymm4 6873 vmovdqa %ymm8,0+128(%rbp) 6874 vpsrld $25,%ymm7,%ymm8 6875 movq 0+0+0(%rbp),%rdx 6876 movq %rdx,%r15 6877 mulxq %r10,%r13,%r14 6878 mulxq %r11,%rax,%rdx 6879 imulq %r12,%r15 6880 addq %rax,%r14 6881 adcq %rdx,%r15 6882 vpslld $32-25,%ymm7,%ymm7 6883 vpxor %ymm8,%ymm7,%ymm7 6884 vpsrld $25,%ymm6,%ymm8 6885 vpslld $32-25,%ymm6,%ymm6 6886 vpxor %ymm8,%ymm6,%ymm6 6887 vpsrld $25,%ymm5,%ymm8 6888 vpslld $32-25,%ymm5,%ymm5 6889 vpxor %ymm8,%ymm5,%ymm5 6890 vpsrld $25,%ymm4,%ymm8 6891 vpslld $32-25,%ymm4,%ymm4 6892 vpxor %ymm8,%ymm4,%ymm4 6893 vmovdqa 0+128(%rbp),%ymm8 6894 vpalignr $4,%ymm7,%ymm7,%ymm7 6895 vpalignr $8,%ymm11,%ymm11,%ymm11 6896 vpalignr $12,%ymm15,%ymm15,%ymm15 6897 vpalignr $4,%ymm6,%ymm6,%ymm6 6898 vpalignr $8,%ymm10,%ymm10,%ymm10 6899 vpalignr $12,%ymm14,%ymm14,%ymm14 6900 movq 8+0+0(%rbp),%rdx 6901 mulxq %r10,%r10,%rax 6902 addq %r10,%r14 6903 mulxq %r11,%r11,%r9 6904 adcq %r11,%r15 6905 adcq $0,%r9 6906 imulq %r12,%rdx 6907 vpalignr $4,%ymm5,%ymm5,%ymm5 6908 vpalignr $8,%ymm9,%ymm9,%ymm9 6909 vpalignr $12,%ymm13,%ymm13,%ymm13 6910 vpalignr $4,%ymm4,%ymm4,%ymm4 6911 vpalignr $8,%ymm8,%ymm8,%ymm8 6912 vpalignr $12,%ymm12,%ymm12,%ymm12 6913 vmovdqa %ymm8,0+128(%rbp) 6914 vmovdqa L$rol16(%rip),%ymm8 6915 vpaddd %ymm7,%ymm3,%ymm3 6916 vpaddd %ymm6,%ymm2,%ymm2 6917 vpaddd %ymm5,%ymm1,%ymm1 6918 vpaddd %ymm4,%ymm0,%ymm0 6919 vpxor %ymm3,%ymm15,%ymm15 6920 vpxor %ymm2,%ymm14,%ymm14 6921 vpxor %ymm1,%ymm13,%ymm13 6922 vpxor %ymm0,%ymm12,%ymm12 6923 vpshufb %ymm8,%ymm15,%ymm15 6924 vpshufb %ymm8,%ymm14,%ymm14 6925 addq %rax,%r15 6926 adcq %rdx,%r9 6927 vpshufb %ymm8,%ymm13,%ymm13 6928 vpshufb %ymm8,%ymm12,%ymm12 6929 vpaddd %ymm15,%ymm11,%ymm11 6930 vpaddd %ymm14,%ymm10,%ymm10 6931 vpaddd %ymm13,%ymm9,%ymm9 6932 vpaddd 0+128(%rbp),%ymm12,%ymm8 6933 vpxor %ymm11,%ymm7,%ymm7 6934 vpxor %ymm10,%ymm6,%ymm6 6935 vpxor %ymm9,%ymm5,%ymm5 6936 movq %r13,%r10 6937 movq %r14,%r11 6938 movq %r15,%r12 6939 andq $3,%r12 6940 movq %r15,%r13 6941 andq $-4,%r13 6942 movq %r9,%r14 6943 shrdq $2,%r9,%r15 6944 shrq $2,%r9 6945 addq %r13,%r15 6946 adcq %r14,%r9 6947 addq %r15,%r10 6948 adcq %r9,%r11 6949 adcq $0,%r12 6950 vpxor %ymm8,%ymm4,%ymm4 6951 vmovdqa %ymm8,0+128(%rbp) 6952 vpsrld $20,%ymm7,%ymm8 6953 vpslld $32-20,%ymm7,%ymm7 6954 vpxor %ymm8,%ymm7,%ymm7 6955 vpsrld $20,%ymm6,%ymm8 6956 vpslld $32-20,%ymm6,%ymm6 6957 vpxor %ymm8,%ymm6,%ymm6 6958 addq 0+32(%rdi),%r10 6959 adcq 8+32(%rdi),%r11 6960 adcq $1,%r12 6961 6962 leaq 48(%rdi),%rdi 6963 vpsrld $20,%ymm5,%ymm8 6964 vpslld $32-20,%ymm5,%ymm5 6965 vpxor %ymm8,%ymm5,%ymm5 6966 vpsrld $20,%ymm4,%ymm8 6967 vpslld $32-20,%ymm4,%ymm4 6968 vpxor %ymm8,%ymm4,%ymm4 6969 vmovdqa L$rol8(%rip),%ymm8 6970 vpaddd %ymm7,%ymm3,%ymm3 6971 vpaddd %ymm6,%ymm2,%ymm2 6972 vpaddd %ymm5,%ymm1,%ymm1 6973 vpaddd %ymm4,%ymm0,%ymm0 6974 vpxor %ymm3,%ymm15,%ymm15 6975 vpxor %ymm2,%ymm14,%ymm14 6976 vpxor %ymm1,%ymm13,%ymm13 6977 vpxor %ymm0,%ymm12,%ymm12 6978 vpshufb %ymm8,%ymm15,%ymm15 6979 vpshufb %ymm8,%ymm14,%ymm14 6980 vpshufb %ymm8,%ymm13,%ymm13 6981 movq 0+0+0(%rbp),%rdx 6982 movq %rdx,%r15 6983 mulxq %r10,%r13,%r14 6984 mulxq %r11,%rax,%rdx 6985 imulq %r12,%r15 6986 addq %rax,%r14 6987 adcq %rdx,%r15 6988 vpshufb %ymm8,%ymm12,%ymm12 6989 vpaddd %ymm15,%ymm11,%ymm11 6990 vpaddd %ymm14,%ymm10,%ymm10 6991 vpaddd %ymm13,%ymm9,%ymm9 6992 vpaddd 0+128(%rbp),%ymm12,%ymm8 6993 vpxor %ymm11,%ymm7,%ymm7 6994 vpxor %ymm10,%ymm6,%ymm6 6995 vpxor %ymm9,%ymm5,%ymm5 6996 movq 8+0+0(%rbp),%rdx 6997 mulxq %r10,%r10,%rax 6998 addq %r10,%r14 6999 mulxq %r11,%r11,%r9 7000 adcq %r11,%r15 7001 adcq $0,%r9 7002 imulq %r12,%rdx 7003 vpxor %ymm8,%ymm4,%ymm4 7004 vmovdqa %ymm8,0+128(%rbp) 7005 vpsrld $25,%ymm7,%ymm8 7006 vpslld $32-25,%ymm7,%ymm7 7007 vpxor %ymm8,%ymm7,%ymm7 7008 vpsrld $25,%ymm6,%ymm8 7009 vpslld $32-25,%ymm6,%ymm6 7010 vpxor %ymm8,%ymm6,%ymm6 7011 addq %rax,%r15 7012 adcq %rdx,%r9 7013 vpsrld $25,%ymm5,%ymm8 7014 vpslld $32-25,%ymm5,%ymm5 7015 vpxor %ymm8,%ymm5,%ymm5 7016 vpsrld $25,%ymm4,%ymm8 7017 vpslld $32-25,%ymm4,%ymm4 7018 vpxor %ymm8,%ymm4,%ymm4 7019 vmovdqa 0+128(%rbp),%ymm8 7020 vpalignr $12,%ymm7,%ymm7,%ymm7 7021 vpalignr $8,%ymm11,%ymm11,%ymm11 7022 vpalignr $4,%ymm15,%ymm15,%ymm15 7023 vpalignr $12,%ymm6,%ymm6,%ymm6 7024 vpalignr $8,%ymm10,%ymm10,%ymm10 7025 vpalignr $4,%ymm14,%ymm14,%ymm14 7026 vpalignr $12,%ymm5,%ymm5,%ymm5 7027 vpalignr $8,%ymm9,%ymm9,%ymm9 7028 vpalignr $4,%ymm13,%ymm13,%ymm13 7029 vpalignr $12,%ymm4,%ymm4,%ymm4 7030 vpalignr $8,%ymm8,%ymm8,%ymm8 7031 movq %r13,%r10 7032 movq %r14,%r11 7033 movq %r15,%r12 7034 andq $3,%r12 7035 movq %r15,%r13 7036 andq $-4,%r13 7037 movq %r9,%r14 7038 shrdq $2,%r9,%r15 7039 shrq $2,%r9 7040 addq %r13,%r15 7041 adcq %r14,%r9 7042 addq %r15,%r10 7043 adcq %r9,%r11 7044 adcq $0,%r12 7045 vpalignr $4,%ymm12,%ymm12,%ymm12 7046 7047 decq %rcx 7048 jne L$seal_avx2_main_loop_rounds 7049 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 7050 vpaddd 0+64(%rbp),%ymm7,%ymm7 7051 vpaddd 0+96(%rbp),%ymm11,%ymm11 7052 vpaddd 0+256(%rbp),%ymm15,%ymm15 7053 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 7054 vpaddd 0+64(%rbp),%ymm6,%ymm6 7055 vpaddd 0+96(%rbp),%ymm10,%ymm10 7056 vpaddd 0+224(%rbp),%ymm14,%ymm14 7057 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 7058 vpaddd 0+64(%rbp),%ymm5,%ymm5 7059 vpaddd 0+96(%rbp),%ymm9,%ymm9 7060 vpaddd 0+192(%rbp),%ymm13,%ymm13 7061 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 7062 vpaddd 0+64(%rbp),%ymm4,%ymm4 7063 vpaddd 0+96(%rbp),%ymm8,%ymm8 7064 vpaddd 0+160(%rbp),%ymm12,%ymm12 7065 7066 vmovdqa %ymm0,0+128(%rbp) 7067 addq 0+0(%rdi),%r10 7068 adcq 8+0(%rdi),%r11 7069 adcq $1,%r12 7070 movq 0+0+0(%rbp),%rdx 7071 movq %rdx,%r15 7072 mulxq %r10,%r13,%r14 7073 mulxq %r11,%rax,%rdx 7074 imulq %r12,%r15 7075 addq %rax,%r14 7076 adcq %rdx,%r15 7077 movq 8+0+0(%rbp),%rdx 7078 mulxq %r10,%r10,%rax 7079 addq %r10,%r14 7080 mulxq %r11,%r11,%r9 7081 adcq %r11,%r15 7082 adcq $0,%r9 7083 imulq %r12,%rdx 7084 addq %rax,%r15 7085 adcq %rdx,%r9 7086 movq %r13,%r10 7087 movq %r14,%r11 7088 movq %r15,%r12 7089 andq $3,%r12 7090 movq %r15,%r13 7091 andq $-4,%r13 7092 movq %r9,%r14 7093 shrdq $2,%r9,%r15 7094 shrq $2,%r9 7095 addq %r13,%r15 7096 adcq %r14,%r9 7097 addq %r15,%r10 7098 adcq %r9,%r11 7099 adcq $0,%r12 7100 addq 0+16(%rdi),%r10 7101 adcq 8+16(%rdi),%r11 7102 adcq $1,%r12 7103 movq 0+0+0(%rbp),%rdx 7104 movq %rdx,%r15 7105 mulxq %r10,%r13,%r14 7106 mulxq %r11,%rax,%rdx 7107 imulq %r12,%r15 7108 addq %rax,%r14 7109 adcq %rdx,%r15 7110 movq 8+0+0(%rbp),%rdx 7111 mulxq %r10,%r10,%rax 7112 addq %r10,%r14 7113 mulxq %r11,%r11,%r9 7114 adcq %r11,%r15 7115 adcq $0,%r9 7116 imulq %r12,%rdx 7117 addq %rax,%r15 7118 adcq %rdx,%r9 7119 movq %r13,%r10 7120 movq %r14,%r11 7121 movq %r15,%r12 7122 andq $3,%r12 7123 movq %r15,%r13 7124 andq $-4,%r13 7125 movq %r9,%r14 7126 shrdq $2,%r9,%r15 7127 shrq $2,%r9 7128 addq %r13,%r15 7129 adcq %r14,%r9 7130 addq %r15,%r10 7131 adcq %r9,%r11 7132 adcq $0,%r12 7133 7134 leaq 32(%rdi),%rdi 7135 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 7136 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 7137 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 7138 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 7139 vpxor 0+0(%rsi),%ymm0,%ymm0 7140 vpxor 32+0(%rsi),%ymm3,%ymm3 7141 vpxor 64+0(%rsi),%ymm7,%ymm7 7142 vpxor 96+0(%rsi),%ymm11,%ymm11 7143 vmovdqu %ymm0,0+0(%rdi) 7144 vmovdqu %ymm3,32+0(%rdi) 7145 vmovdqu %ymm7,64+0(%rdi) 7146 vmovdqu %ymm11,96+0(%rdi) 7147 7148 vmovdqa 0+128(%rbp),%ymm0 7149 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 7150 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 7151 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 7152 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 7153 vpxor 0+128(%rsi),%ymm3,%ymm3 7154 vpxor 32+128(%rsi),%ymm2,%ymm2 7155 vpxor 64+128(%rsi),%ymm6,%ymm6 7156 vpxor 96+128(%rsi),%ymm10,%ymm10 7157 vmovdqu %ymm3,0+128(%rdi) 7158 vmovdqu %ymm2,32+128(%rdi) 7159 vmovdqu %ymm6,64+128(%rdi) 7160 vmovdqu %ymm10,96+128(%rdi) 7161 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7162 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7163 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7164 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7165 vpxor 0+256(%rsi),%ymm3,%ymm3 7166 vpxor 32+256(%rsi),%ymm1,%ymm1 7167 vpxor 64+256(%rsi),%ymm5,%ymm5 7168 vpxor 96+256(%rsi),%ymm9,%ymm9 7169 vmovdqu %ymm3,0+256(%rdi) 7170 vmovdqu %ymm1,32+256(%rdi) 7171 vmovdqu %ymm5,64+256(%rdi) 7172 vmovdqu %ymm9,96+256(%rdi) 7173 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 7174 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 7175 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 7176 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 7177 vpxor 0+384(%rsi),%ymm3,%ymm3 7178 vpxor 32+384(%rsi),%ymm0,%ymm0 7179 vpxor 64+384(%rsi),%ymm4,%ymm4 7180 vpxor 96+384(%rsi),%ymm8,%ymm8 7181 vmovdqu %ymm3,0+384(%rdi) 7182 vmovdqu %ymm0,32+384(%rdi) 7183 vmovdqu %ymm4,64+384(%rdi) 7184 vmovdqu %ymm8,96+384(%rdi) 7185 7186 leaq 512(%rsi),%rsi 7187 subq $512,%rbx 7188 cmpq $512,%rbx 7189 jg L$seal_avx2_main_loop 7190 7191 addq 0+0(%rdi),%r10 7192 adcq 8+0(%rdi),%r11 7193 adcq $1,%r12 7194 movq 0+0+0(%rbp),%rdx 7195 movq %rdx,%r15 7196 mulxq %r10,%r13,%r14 7197 mulxq %r11,%rax,%rdx 7198 imulq %r12,%r15 7199 addq %rax,%r14 7200 adcq %rdx,%r15 7201 movq 8+0+0(%rbp),%rdx 7202 mulxq %r10,%r10,%rax 7203 addq %r10,%r14 7204 mulxq %r11,%r11,%r9 7205 adcq %r11,%r15 7206 adcq $0,%r9 7207 imulq %r12,%rdx 7208 addq %rax,%r15 7209 adcq %rdx,%r9 7210 movq %r13,%r10 7211 movq %r14,%r11 7212 movq %r15,%r12 7213 andq $3,%r12 7214 movq %r15,%r13 7215 andq $-4,%r13 7216 movq %r9,%r14 7217 shrdq $2,%r9,%r15 7218 shrq $2,%r9 7219 addq %r13,%r15 7220 adcq %r14,%r9 7221 addq %r15,%r10 7222 adcq %r9,%r11 7223 adcq $0,%r12 7224 addq 0+16(%rdi),%r10 7225 adcq 8+16(%rdi),%r11 7226 adcq $1,%r12 7227 movq 0+0+0(%rbp),%rdx 7228 movq %rdx,%r15 7229 mulxq %r10,%r13,%r14 7230 mulxq %r11,%rax,%rdx 7231 imulq %r12,%r15 7232 addq %rax,%r14 7233 adcq %rdx,%r15 7234 movq 8+0+0(%rbp),%rdx 7235 mulxq %r10,%r10,%rax 7236 addq %r10,%r14 7237 mulxq %r11,%r11,%r9 7238 adcq %r11,%r15 7239 adcq $0,%r9 7240 imulq %r12,%rdx 7241 addq %rax,%r15 7242 adcq %rdx,%r9 7243 movq %r13,%r10 7244 movq %r14,%r11 7245 movq %r15,%r12 7246 andq $3,%r12 7247 movq %r15,%r13 7248 andq $-4,%r13 7249 movq %r9,%r14 7250 shrdq $2,%r9,%r15 7251 shrq $2,%r9 7252 addq %r13,%r15 7253 adcq %r14,%r9 7254 addq %r15,%r10 7255 adcq %r9,%r11 7256 adcq $0,%r12 7257 7258 leaq 32(%rdi),%rdi 7259 movq $10,%rcx 7260 xorq %r8,%r8 7261 7262 cmpq $384,%rbx 7263 ja L$seal_avx2_tail_512 7264 cmpq $256,%rbx 7265 ja L$seal_avx2_tail_384 7266 cmpq $128,%rbx 7267 ja L$seal_avx2_tail_256 7268 7269L$seal_avx2_tail_128: 7270 vmovdqa L$chacha20_consts(%rip),%ymm0 7271 vmovdqa 0+64(%rbp),%ymm4 7272 vmovdqa 0+96(%rbp),%ymm8 7273 vmovdqa L$avx2_inc(%rip),%ymm12 7274 vpaddd 0+160(%rbp),%ymm12,%ymm12 7275 vmovdqa %ymm12,0+160(%rbp) 7276 7277L$seal_avx2_tail_128_rounds_and_3xhash: 7278 addq 0+0(%rdi),%r10 7279 adcq 8+0(%rdi),%r11 7280 adcq $1,%r12 7281 movq 0+0+0(%rbp),%rdx 7282 movq %rdx,%r15 7283 mulxq %r10,%r13,%r14 7284 mulxq %r11,%rax,%rdx 7285 imulq %r12,%r15 7286 addq %rax,%r14 7287 adcq %rdx,%r15 7288 movq 8+0+0(%rbp),%rdx 7289 mulxq %r10,%r10,%rax 7290 addq %r10,%r14 7291 mulxq %r11,%r11,%r9 7292 adcq %r11,%r15 7293 adcq $0,%r9 7294 imulq %r12,%rdx 7295 addq %rax,%r15 7296 adcq %rdx,%r9 7297 movq %r13,%r10 7298 movq %r14,%r11 7299 movq %r15,%r12 7300 andq $3,%r12 7301 movq %r15,%r13 7302 andq $-4,%r13 7303 movq %r9,%r14 7304 shrdq $2,%r9,%r15 7305 shrq $2,%r9 7306 addq %r13,%r15 7307 adcq %r14,%r9 7308 addq %r15,%r10 7309 adcq %r9,%r11 7310 adcq $0,%r12 7311 7312 leaq 16(%rdi),%rdi 7313L$seal_avx2_tail_128_rounds_and_2xhash: 7314 vpaddd %ymm4,%ymm0,%ymm0 7315 vpxor %ymm0,%ymm12,%ymm12 7316 vpshufb L$rol16(%rip),%ymm12,%ymm12 7317 vpaddd %ymm12,%ymm8,%ymm8 7318 vpxor %ymm8,%ymm4,%ymm4 7319 vpsrld $20,%ymm4,%ymm3 7320 vpslld $12,%ymm4,%ymm4 7321 vpxor %ymm3,%ymm4,%ymm4 7322 vpaddd %ymm4,%ymm0,%ymm0 7323 vpxor %ymm0,%ymm12,%ymm12 7324 vpshufb L$rol8(%rip),%ymm12,%ymm12 7325 vpaddd %ymm12,%ymm8,%ymm8 7326 vpxor %ymm8,%ymm4,%ymm4 7327 vpslld $7,%ymm4,%ymm3 7328 vpsrld $25,%ymm4,%ymm4 7329 vpxor %ymm3,%ymm4,%ymm4 7330 vpalignr $12,%ymm12,%ymm12,%ymm12 7331 vpalignr $8,%ymm8,%ymm8,%ymm8 7332 vpalignr $4,%ymm4,%ymm4,%ymm4 7333 addq 0+0(%rdi),%r10 7334 adcq 8+0(%rdi),%r11 7335 adcq $1,%r12 7336 movq 0+0+0(%rbp),%rdx 7337 movq %rdx,%r15 7338 mulxq %r10,%r13,%r14 7339 mulxq %r11,%rax,%rdx 7340 imulq %r12,%r15 7341 addq %rax,%r14 7342 adcq %rdx,%r15 7343 movq 8+0+0(%rbp),%rdx 7344 mulxq %r10,%r10,%rax 7345 addq %r10,%r14 7346 mulxq %r11,%r11,%r9 7347 adcq %r11,%r15 7348 adcq $0,%r9 7349 imulq %r12,%rdx 7350 addq %rax,%r15 7351 adcq %rdx,%r9 7352 movq %r13,%r10 7353 movq %r14,%r11 7354 movq %r15,%r12 7355 andq $3,%r12 7356 movq %r15,%r13 7357 andq $-4,%r13 7358 movq %r9,%r14 7359 shrdq $2,%r9,%r15 7360 shrq $2,%r9 7361 addq %r13,%r15 7362 adcq %r14,%r9 7363 addq %r15,%r10 7364 adcq %r9,%r11 7365 adcq $0,%r12 7366 vpaddd %ymm4,%ymm0,%ymm0 7367 vpxor %ymm0,%ymm12,%ymm12 7368 vpshufb L$rol16(%rip),%ymm12,%ymm12 7369 vpaddd %ymm12,%ymm8,%ymm8 7370 vpxor %ymm8,%ymm4,%ymm4 7371 vpsrld $20,%ymm4,%ymm3 7372 vpslld $12,%ymm4,%ymm4 7373 vpxor %ymm3,%ymm4,%ymm4 7374 vpaddd %ymm4,%ymm0,%ymm0 7375 vpxor %ymm0,%ymm12,%ymm12 7376 vpshufb L$rol8(%rip),%ymm12,%ymm12 7377 vpaddd %ymm12,%ymm8,%ymm8 7378 vpxor %ymm8,%ymm4,%ymm4 7379 vpslld $7,%ymm4,%ymm3 7380 vpsrld $25,%ymm4,%ymm4 7381 vpxor %ymm3,%ymm4,%ymm4 7382 vpalignr $4,%ymm12,%ymm12,%ymm12 7383 vpalignr $8,%ymm8,%ymm8,%ymm8 7384 vpalignr $12,%ymm4,%ymm4,%ymm4 7385 addq 0+16(%rdi),%r10 7386 adcq 8+16(%rdi),%r11 7387 adcq $1,%r12 7388 movq 0+0+0(%rbp),%rdx 7389 movq %rdx,%r15 7390 mulxq %r10,%r13,%r14 7391 mulxq %r11,%rax,%rdx 7392 imulq %r12,%r15 7393 addq %rax,%r14 7394 adcq %rdx,%r15 7395 movq 8+0+0(%rbp),%rdx 7396 mulxq %r10,%r10,%rax 7397 addq %r10,%r14 7398 mulxq %r11,%r11,%r9 7399 adcq %r11,%r15 7400 adcq $0,%r9 7401 imulq %r12,%rdx 7402 addq %rax,%r15 7403 adcq %rdx,%r9 7404 movq %r13,%r10 7405 movq %r14,%r11 7406 movq %r15,%r12 7407 andq $3,%r12 7408 movq %r15,%r13 7409 andq $-4,%r13 7410 movq %r9,%r14 7411 shrdq $2,%r9,%r15 7412 shrq $2,%r9 7413 addq %r13,%r15 7414 adcq %r14,%r9 7415 addq %r15,%r10 7416 adcq %r9,%r11 7417 adcq $0,%r12 7418 7419 leaq 32(%rdi),%rdi 7420 decq %rcx 7421 jg L$seal_avx2_tail_128_rounds_and_3xhash 7422 decq %r8 7423 jge L$seal_avx2_tail_128_rounds_and_2xhash 7424 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 7425 vpaddd 0+64(%rbp),%ymm4,%ymm4 7426 vpaddd 0+96(%rbp),%ymm8,%ymm8 7427 vpaddd 0+160(%rbp),%ymm12,%ymm12 7428 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7429 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7430 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7431 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7432 vmovdqa %ymm3,%ymm8 7433 7434 jmp L$seal_avx2_short_loop 7435 7436L$seal_avx2_tail_256: 7437 vmovdqa L$chacha20_consts(%rip),%ymm0 7438 vmovdqa 0+64(%rbp),%ymm4 7439 vmovdqa 0+96(%rbp),%ymm8 7440 vmovdqa %ymm0,%ymm1 7441 vmovdqa %ymm4,%ymm5 7442 vmovdqa %ymm8,%ymm9 7443 vmovdqa L$avx2_inc(%rip),%ymm12 7444 vpaddd 0+160(%rbp),%ymm12,%ymm13 7445 vpaddd %ymm13,%ymm12,%ymm12 7446 vmovdqa %ymm12,0+160(%rbp) 7447 vmovdqa %ymm13,0+192(%rbp) 7448 7449L$seal_avx2_tail_256_rounds_and_3xhash: 7450 addq 0+0(%rdi),%r10 7451 adcq 8+0(%rdi),%r11 7452 adcq $1,%r12 7453 movq 0+0+0(%rbp),%rax 7454 movq %rax,%r15 7455 mulq %r10 7456 movq %rax,%r13 7457 movq %rdx,%r14 7458 movq 0+0+0(%rbp),%rax 7459 mulq %r11 7460 imulq %r12,%r15 7461 addq %rax,%r14 7462 adcq %rdx,%r15 7463 movq 8+0+0(%rbp),%rax 7464 movq %rax,%r9 7465 mulq %r10 7466 addq %rax,%r14 7467 adcq $0,%rdx 7468 movq %rdx,%r10 7469 movq 8+0+0(%rbp),%rax 7470 mulq %r11 7471 addq %rax,%r15 7472 adcq $0,%rdx 7473 imulq %r12,%r9 7474 addq %r10,%r15 7475 adcq %rdx,%r9 7476 movq %r13,%r10 7477 movq %r14,%r11 7478 movq %r15,%r12 7479 andq $3,%r12 7480 movq %r15,%r13 7481 andq $-4,%r13 7482 movq %r9,%r14 7483 shrdq $2,%r9,%r15 7484 shrq $2,%r9 7485 addq %r13,%r15 7486 adcq %r14,%r9 7487 addq %r15,%r10 7488 adcq %r9,%r11 7489 adcq $0,%r12 7490 7491 leaq 16(%rdi),%rdi 7492L$seal_avx2_tail_256_rounds_and_2xhash: 7493 vpaddd %ymm4,%ymm0,%ymm0 7494 vpxor %ymm0,%ymm12,%ymm12 7495 vpshufb L$rol16(%rip),%ymm12,%ymm12 7496 vpaddd %ymm12,%ymm8,%ymm8 7497 vpxor %ymm8,%ymm4,%ymm4 7498 vpsrld $20,%ymm4,%ymm3 7499 vpslld $12,%ymm4,%ymm4 7500 vpxor %ymm3,%ymm4,%ymm4 7501 vpaddd %ymm4,%ymm0,%ymm0 7502 vpxor %ymm0,%ymm12,%ymm12 7503 vpshufb L$rol8(%rip),%ymm12,%ymm12 7504 vpaddd %ymm12,%ymm8,%ymm8 7505 vpxor %ymm8,%ymm4,%ymm4 7506 vpslld $7,%ymm4,%ymm3 7507 vpsrld $25,%ymm4,%ymm4 7508 vpxor %ymm3,%ymm4,%ymm4 7509 vpalignr $12,%ymm12,%ymm12,%ymm12 7510 vpalignr $8,%ymm8,%ymm8,%ymm8 7511 vpalignr $4,%ymm4,%ymm4,%ymm4 7512 vpaddd %ymm5,%ymm1,%ymm1 7513 vpxor %ymm1,%ymm13,%ymm13 7514 vpshufb L$rol16(%rip),%ymm13,%ymm13 7515 vpaddd %ymm13,%ymm9,%ymm9 7516 vpxor %ymm9,%ymm5,%ymm5 7517 vpsrld $20,%ymm5,%ymm3 7518 vpslld $12,%ymm5,%ymm5 7519 vpxor %ymm3,%ymm5,%ymm5 7520 vpaddd %ymm5,%ymm1,%ymm1 7521 vpxor %ymm1,%ymm13,%ymm13 7522 vpshufb L$rol8(%rip),%ymm13,%ymm13 7523 vpaddd %ymm13,%ymm9,%ymm9 7524 vpxor %ymm9,%ymm5,%ymm5 7525 vpslld $7,%ymm5,%ymm3 7526 vpsrld $25,%ymm5,%ymm5 7527 vpxor %ymm3,%ymm5,%ymm5 7528 vpalignr $12,%ymm13,%ymm13,%ymm13 7529 vpalignr $8,%ymm9,%ymm9,%ymm9 7530 vpalignr $4,%ymm5,%ymm5,%ymm5 7531 addq 0+0(%rdi),%r10 7532 adcq 8+0(%rdi),%r11 7533 adcq $1,%r12 7534 movq 0+0+0(%rbp),%rax 7535 movq %rax,%r15 7536 mulq %r10 7537 movq %rax,%r13 7538 movq %rdx,%r14 7539 movq 0+0+0(%rbp),%rax 7540 mulq %r11 7541 imulq %r12,%r15 7542 addq %rax,%r14 7543 adcq %rdx,%r15 7544 movq 8+0+0(%rbp),%rax 7545 movq %rax,%r9 7546 mulq %r10 7547 addq %rax,%r14 7548 adcq $0,%rdx 7549 movq %rdx,%r10 7550 movq 8+0+0(%rbp),%rax 7551 mulq %r11 7552 addq %rax,%r15 7553 adcq $0,%rdx 7554 imulq %r12,%r9 7555 addq %r10,%r15 7556 adcq %rdx,%r9 7557 movq %r13,%r10 7558 movq %r14,%r11 7559 movq %r15,%r12 7560 andq $3,%r12 7561 movq %r15,%r13 7562 andq $-4,%r13 7563 movq %r9,%r14 7564 shrdq $2,%r9,%r15 7565 shrq $2,%r9 7566 addq %r13,%r15 7567 adcq %r14,%r9 7568 addq %r15,%r10 7569 adcq %r9,%r11 7570 adcq $0,%r12 7571 vpaddd %ymm4,%ymm0,%ymm0 7572 vpxor %ymm0,%ymm12,%ymm12 7573 vpshufb L$rol16(%rip),%ymm12,%ymm12 7574 vpaddd %ymm12,%ymm8,%ymm8 7575 vpxor %ymm8,%ymm4,%ymm4 7576 vpsrld $20,%ymm4,%ymm3 7577 vpslld $12,%ymm4,%ymm4 7578 vpxor %ymm3,%ymm4,%ymm4 7579 vpaddd %ymm4,%ymm0,%ymm0 7580 vpxor %ymm0,%ymm12,%ymm12 7581 vpshufb L$rol8(%rip),%ymm12,%ymm12 7582 vpaddd %ymm12,%ymm8,%ymm8 7583 vpxor %ymm8,%ymm4,%ymm4 7584 vpslld $7,%ymm4,%ymm3 7585 vpsrld $25,%ymm4,%ymm4 7586 vpxor %ymm3,%ymm4,%ymm4 7587 vpalignr $4,%ymm12,%ymm12,%ymm12 7588 vpalignr $8,%ymm8,%ymm8,%ymm8 7589 vpalignr $12,%ymm4,%ymm4,%ymm4 7590 vpaddd %ymm5,%ymm1,%ymm1 7591 vpxor %ymm1,%ymm13,%ymm13 7592 vpshufb L$rol16(%rip),%ymm13,%ymm13 7593 vpaddd %ymm13,%ymm9,%ymm9 7594 vpxor %ymm9,%ymm5,%ymm5 7595 vpsrld $20,%ymm5,%ymm3 7596 vpslld $12,%ymm5,%ymm5 7597 vpxor %ymm3,%ymm5,%ymm5 7598 vpaddd %ymm5,%ymm1,%ymm1 7599 vpxor %ymm1,%ymm13,%ymm13 7600 vpshufb L$rol8(%rip),%ymm13,%ymm13 7601 vpaddd %ymm13,%ymm9,%ymm9 7602 vpxor %ymm9,%ymm5,%ymm5 7603 vpslld $7,%ymm5,%ymm3 7604 vpsrld $25,%ymm5,%ymm5 7605 vpxor %ymm3,%ymm5,%ymm5 7606 vpalignr $4,%ymm13,%ymm13,%ymm13 7607 vpalignr $8,%ymm9,%ymm9,%ymm9 7608 vpalignr $12,%ymm5,%ymm5,%ymm5 7609 addq 0+16(%rdi),%r10 7610 adcq 8+16(%rdi),%r11 7611 adcq $1,%r12 7612 movq 0+0+0(%rbp),%rax 7613 movq %rax,%r15 7614 mulq %r10 7615 movq %rax,%r13 7616 movq %rdx,%r14 7617 movq 0+0+0(%rbp),%rax 7618 mulq %r11 7619 imulq %r12,%r15 7620 addq %rax,%r14 7621 adcq %rdx,%r15 7622 movq 8+0+0(%rbp),%rax 7623 movq %rax,%r9 7624 mulq %r10 7625 addq %rax,%r14 7626 adcq $0,%rdx 7627 movq %rdx,%r10 7628 movq 8+0+0(%rbp),%rax 7629 mulq %r11 7630 addq %rax,%r15 7631 adcq $0,%rdx 7632 imulq %r12,%r9 7633 addq %r10,%r15 7634 adcq %rdx,%r9 7635 movq %r13,%r10 7636 movq %r14,%r11 7637 movq %r15,%r12 7638 andq $3,%r12 7639 movq %r15,%r13 7640 andq $-4,%r13 7641 movq %r9,%r14 7642 shrdq $2,%r9,%r15 7643 shrq $2,%r9 7644 addq %r13,%r15 7645 adcq %r14,%r9 7646 addq %r15,%r10 7647 adcq %r9,%r11 7648 adcq $0,%r12 7649 7650 leaq 32(%rdi),%rdi 7651 decq %rcx 7652 jg L$seal_avx2_tail_256_rounds_and_3xhash 7653 decq %r8 7654 jge L$seal_avx2_tail_256_rounds_and_2xhash 7655 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 7656 vpaddd 0+64(%rbp),%ymm5,%ymm5 7657 vpaddd 0+96(%rbp),%ymm9,%ymm9 7658 vpaddd 0+192(%rbp),%ymm13,%ymm13 7659 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 7660 vpaddd 0+64(%rbp),%ymm4,%ymm4 7661 vpaddd 0+96(%rbp),%ymm8,%ymm8 7662 vpaddd 0+160(%rbp),%ymm12,%ymm12 7663 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7664 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7665 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7666 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7667 vpxor 0+0(%rsi),%ymm3,%ymm3 7668 vpxor 32+0(%rsi),%ymm1,%ymm1 7669 vpxor 64+0(%rsi),%ymm5,%ymm5 7670 vpxor 96+0(%rsi),%ymm9,%ymm9 7671 vmovdqu %ymm3,0+0(%rdi) 7672 vmovdqu %ymm1,32+0(%rdi) 7673 vmovdqu %ymm5,64+0(%rdi) 7674 vmovdqu %ymm9,96+0(%rdi) 7675 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7676 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7677 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7678 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7679 vmovdqa %ymm3,%ymm8 7680 7681 movq $128,%rcx 7682 leaq 128(%rsi),%rsi 7683 subq $128,%rbx 7684 jmp L$seal_avx2_short_hash_remainder 7685 7686L$seal_avx2_tail_384: 7687 vmovdqa L$chacha20_consts(%rip),%ymm0 7688 vmovdqa 0+64(%rbp),%ymm4 7689 vmovdqa 0+96(%rbp),%ymm8 7690 vmovdqa %ymm0,%ymm1 7691 vmovdqa %ymm4,%ymm5 7692 vmovdqa %ymm8,%ymm9 7693 vmovdqa %ymm0,%ymm2 7694 vmovdqa %ymm4,%ymm6 7695 vmovdqa %ymm8,%ymm10 7696 vmovdqa L$avx2_inc(%rip),%ymm12 7697 vpaddd 0+160(%rbp),%ymm12,%ymm14 7698 vpaddd %ymm14,%ymm12,%ymm13 7699 vpaddd %ymm13,%ymm12,%ymm12 7700 vmovdqa %ymm12,0+160(%rbp) 7701 vmovdqa %ymm13,0+192(%rbp) 7702 vmovdqa %ymm14,0+224(%rbp) 7703 7704L$seal_avx2_tail_384_rounds_and_3xhash: 7705 addq 0+0(%rdi),%r10 7706 adcq 8+0(%rdi),%r11 7707 adcq $1,%r12 7708 movq 0+0+0(%rbp),%rax 7709 movq %rax,%r15 7710 mulq %r10 7711 movq %rax,%r13 7712 movq %rdx,%r14 7713 movq 0+0+0(%rbp),%rax 7714 mulq %r11 7715 imulq %r12,%r15 7716 addq %rax,%r14 7717 adcq %rdx,%r15 7718 movq 8+0+0(%rbp),%rax 7719 movq %rax,%r9 7720 mulq %r10 7721 addq %rax,%r14 7722 adcq $0,%rdx 7723 movq %rdx,%r10 7724 movq 8+0+0(%rbp),%rax 7725 mulq %r11 7726 addq %rax,%r15 7727 adcq $0,%rdx 7728 imulq %r12,%r9 7729 addq %r10,%r15 7730 adcq %rdx,%r9 7731 movq %r13,%r10 7732 movq %r14,%r11 7733 movq %r15,%r12 7734 andq $3,%r12 7735 movq %r15,%r13 7736 andq $-4,%r13 7737 movq %r9,%r14 7738 shrdq $2,%r9,%r15 7739 shrq $2,%r9 7740 addq %r13,%r15 7741 adcq %r14,%r9 7742 addq %r15,%r10 7743 adcq %r9,%r11 7744 adcq $0,%r12 7745 7746 leaq 16(%rdi),%rdi 7747L$seal_avx2_tail_384_rounds_and_2xhash: 7748 vpaddd %ymm4,%ymm0,%ymm0 7749 vpxor %ymm0,%ymm12,%ymm12 7750 vpshufb L$rol16(%rip),%ymm12,%ymm12 7751 vpaddd %ymm12,%ymm8,%ymm8 7752 vpxor %ymm8,%ymm4,%ymm4 7753 vpsrld $20,%ymm4,%ymm3 7754 vpslld $12,%ymm4,%ymm4 7755 vpxor %ymm3,%ymm4,%ymm4 7756 vpaddd %ymm4,%ymm0,%ymm0 7757 vpxor %ymm0,%ymm12,%ymm12 7758 vpshufb L$rol8(%rip),%ymm12,%ymm12 7759 vpaddd %ymm12,%ymm8,%ymm8 7760 vpxor %ymm8,%ymm4,%ymm4 7761 vpslld $7,%ymm4,%ymm3 7762 vpsrld $25,%ymm4,%ymm4 7763 vpxor %ymm3,%ymm4,%ymm4 7764 vpalignr $12,%ymm12,%ymm12,%ymm12 7765 vpalignr $8,%ymm8,%ymm8,%ymm8 7766 vpalignr $4,%ymm4,%ymm4,%ymm4 7767 vpaddd %ymm5,%ymm1,%ymm1 7768 vpxor %ymm1,%ymm13,%ymm13 7769 vpshufb L$rol16(%rip),%ymm13,%ymm13 7770 vpaddd %ymm13,%ymm9,%ymm9 7771 vpxor %ymm9,%ymm5,%ymm5 7772 vpsrld $20,%ymm5,%ymm3 7773 vpslld $12,%ymm5,%ymm5 7774 vpxor %ymm3,%ymm5,%ymm5 7775 vpaddd %ymm5,%ymm1,%ymm1 7776 vpxor %ymm1,%ymm13,%ymm13 7777 vpshufb L$rol8(%rip),%ymm13,%ymm13 7778 vpaddd %ymm13,%ymm9,%ymm9 7779 vpxor %ymm9,%ymm5,%ymm5 7780 vpslld $7,%ymm5,%ymm3 7781 vpsrld $25,%ymm5,%ymm5 7782 vpxor %ymm3,%ymm5,%ymm5 7783 vpalignr $12,%ymm13,%ymm13,%ymm13 7784 vpalignr $8,%ymm9,%ymm9,%ymm9 7785 vpalignr $4,%ymm5,%ymm5,%ymm5 7786 addq 0+0(%rdi),%r10 7787 adcq 8+0(%rdi),%r11 7788 adcq $1,%r12 7789 movq 0+0+0(%rbp),%rax 7790 movq %rax,%r15 7791 mulq %r10 7792 movq %rax,%r13 7793 movq %rdx,%r14 7794 movq 0+0+0(%rbp),%rax 7795 mulq %r11 7796 imulq %r12,%r15 7797 addq %rax,%r14 7798 adcq %rdx,%r15 7799 movq 8+0+0(%rbp),%rax 7800 movq %rax,%r9 7801 mulq %r10 7802 addq %rax,%r14 7803 adcq $0,%rdx 7804 movq %rdx,%r10 7805 movq 8+0+0(%rbp),%rax 7806 mulq %r11 7807 addq %rax,%r15 7808 adcq $0,%rdx 7809 imulq %r12,%r9 7810 addq %r10,%r15 7811 adcq %rdx,%r9 7812 movq %r13,%r10 7813 movq %r14,%r11 7814 movq %r15,%r12 7815 andq $3,%r12 7816 movq %r15,%r13 7817 andq $-4,%r13 7818 movq %r9,%r14 7819 shrdq $2,%r9,%r15 7820 shrq $2,%r9 7821 addq %r13,%r15 7822 adcq %r14,%r9 7823 addq %r15,%r10 7824 adcq %r9,%r11 7825 adcq $0,%r12 7826 vpaddd %ymm6,%ymm2,%ymm2 7827 vpxor %ymm2,%ymm14,%ymm14 7828 vpshufb L$rol16(%rip),%ymm14,%ymm14 7829 vpaddd %ymm14,%ymm10,%ymm10 7830 vpxor %ymm10,%ymm6,%ymm6 7831 vpsrld $20,%ymm6,%ymm3 7832 vpslld $12,%ymm6,%ymm6 7833 vpxor %ymm3,%ymm6,%ymm6 7834 vpaddd %ymm6,%ymm2,%ymm2 7835 vpxor %ymm2,%ymm14,%ymm14 7836 vpshufb L$rol8(%rip),%ymm14,%ymm14 7837 vpaddd %ymm14,%ymm10,%ymm10 7838 vpxor %ymm10,%ymm6,%ymm6 7839 vpslld $7,%ymm6,%ymm3 7840 vpsrld $25,%ymm6,%ymm6 7841 vpxor %ymm3,%ymm6,%ymm6 7842 vpalignr $12,%ymm14,%ymm14,%ymm14 7843 vpalignr $8,%ymm10,%ymm10,%ymm10 7844 vpalignr $4,%ymm6,%ymm6,%ymm6 7845 vpaddd %ymm4,%ymm0,%ymm0 7846 vpxor %ymm0,%ymm12,%ymm12 7847 vpshufb L$rol16(%rip),%ymm12,%ymm12 7848 vpaddd %ymm12,%ymm8,%ymm8 7849 vpxor %ymm8,%ymm4,%ymm4 7850 vpsrld $20,%ymm4,%ymm3 7851 vpslld $12,%ymm4,%ymm4 7852 vpxor %ymm3,%ymm4,%ymm4 7853 vpaddd %ymm4,%ymm0,%ymm0 7854 vpxor %ymm0,%ymm12,%ymm12 7855 vpshufb L$rol8(%rip),%ymm12,%ymm12 7856 vpaddd %ymm12,%ymm8,%ymm8 7857 vpxor %ymm8,%ymm4,%ymm4 7858 vpslld $7,%ymm4,%ymm3 7859 vpsrld $25,%ymm4,%ymm4 7860 vpxor %ymm3,%ymm4,%ymm4 7861 vpalignr $4,%ymm12,%ymm12,%ymm12 7862 vpalignr $8,%ymm8,%ymm8,%ymm8 7863 vpalignr $12,%ymm4,%ymm4,%ymm4 7864 addq 0+16(%rdi),%r10 7865 adcq 8+16(%rdi),%r11 7866 adcq $1,%r12 7867 movq 0+0+0(%rbp),%rax 7868 movq %rax,%r15 7869 mulq %r10 7870 movq %rax,%r13 7871 movq %rdx,%r14 7872 movq 0+0+0(%rbp),%rax 7873 mulq %r11 7874 imulq %r12,%r15 7875 addq %rax,%r14 7876 adcq %rdx,%r15 7877 movq 8+0+0(%rbp),%rax 7878 movq %rax,%r9 7879 mulq %r10 7880 addq %rax,%r14 7881 adcq $0,%rdx 7882 movq %rdx,%r10 7883 movq 8+0+0(%rbp),%rax 7884 mulq %r11 7885 addq %rax,%r15 7886 adcq $0,%rdx 7887 imulq %r12,%r9 7888 addq %r10,%r15 7889 adcq %rdx,%r9 7890 movq %r13,%r10 7891 movq %r14,%r11 7892 movq %r15,%r12 7893 andq $3,%r12 7894 movq %r15,%r13 7895 andq $-4,%r13 7896 movq %r9,%r14 7897 shrdq $2,%r9,%r15 7898 shrq $2,%r9 7899 addq %r13,%r15 7900 adcq %r14,%r9 7901 addq %r15,%r10 7902 adcq %r9,%r11 7903 adcq $0,%r12 7904 vpaddd %ymm5,%ymm1,%ymm1 7905 vpxor %ymm1,%ymm13,%ymm13 7906 vpshufb L$rol16(%rip),%ymm13,%ymm13 7907 vpaddd %ymm13,%ymm9,%ymm9 7908 vpxor %ymm9,%ymm5,%ymm5 7909 vpsrld $20,%ymm5,%ymm3 7910 vpslld $12,%ymm5,%ymm5 7911 vpxor %ymm3,%ymm5,%ymm5 7912 vpaddd %ymm5,%ymm1,%ymm1 7913 vpxor %ymm1,%ymm13,%ymm13 7914 vpshufb L$rol8(%rip),%ymm13,%ymm13 7915 vpaddd %ymm13,%ymm9,%ymm9 7916 vpxor %ymm9,%ymm5,%ymm5 7917 vpslld $7,%ymm5,%ymm3 7918 vpsrld $25,%ymm5,%ymm5 7919 vpxor %ymm3,%ymm5,%ymm5 7920 vpalignr $4,%ymm13,%ymm13,%ymm13 7921 vpalignr $8,%ymm9,%ymm9,%ymm9 7922 vpalignr $12,%ymm5,%ymm5,%ymm5 7923 vpaddd %ymm6,%ymm2,%ymm2 7924 vpxor %ymm2,%ymm14,%ymm14 7925 vpshufb L$rol16(%rip),%ymm14,%ymm14 7926 vpaddd %ymm14,%ymm10,%ymm10 7927 vpxor %ymm10,%ymm6,%ymm6 7928 vpsrld $20,%ymm6,%ymm3 7929 vpslld $12,%ymm6,%ymm6 7930 vpxor %ymm3,%ymm6,%ymm6 7931 vpaddd %ymm6,%ymm2,%ymm2 7932 vpxor %ymm2,%ymm14,%ymm14 7933 vpshufb L$rol8(%rip),%ymm14,%ymm14 7934 vpaddd %ymm14,%ymm10,%ymm10 7935 vpxor %ymm10,%ymm6,%ymm6 7936 vpslld $7,%ymm6,%ymm3 7937 vpsrld $25,%ymm6,%ymm6 7938 vpxor %ymm3,%ymm6,%ymm6 7939 vpalignr $4,%ymm14,%ymm14,%ymm14 7940 vpalignr $8,%ymm10,%ymm10,%ymm10 7941 vpalignr $12,%ymm6,%ymm6,%ymm6 7942 7943 leaq 32(%rdi),%rdi 7944 decq %rcx 7945 jg L$seal_avx2_tail_384_rounds_and_3xhash 7946 decq %r8 7947 jge L$seal_avx2_tail_384_rounds_and_2xhash 7948 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 7949 vpaddd 0+64(%rbp),%ymm6,%ymm6 7950 vpaddd 0+96(%rbp),%ymm10,%ymm10 7951 vpaddd 0+224(%rbp),%ymm14,%ymm14 7952 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 7953 vpaddd 0+64(%rbp),%ymm5,%ymm5 7954 vpaddd 0+96(%rbp),%ymm9,%ymm9 7955 vpaddd 0+192(%rbp),%ymm13,%ymm13 7956 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 7957 vpaddd 0+64(%rbp),%ymm4,%ymm4 7958 vpaddd 0+96(%rbp),%ymm8,%ymm8 7959 vpaddd 0+160(%rbp),%ymm12,%ymm12 7960 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 7961 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 7962 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 7963 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 7964 vpxor 0+0(%rsi),%ymm3,%ymm3 7965 vpxor 32+0(%rsi),%ymm2,%ymm2 7966 vpxor 64+0(%rsi),%ymm6,%ymm6 7967 vpxor 96+0(%rsi),%ymm10,%ymm10 7968 vmovdqu %ymm3,0+0(%rdi) 7969 vmovdqu %ymm2,32+0(%rdi) 7970 vmovdqu %ymm6,64+0(%rdi) 7971 vmovdqu %ymm10,96+0(%rdi) 7972 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7973 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7974 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7975 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7976 vpxor 0+128(%rsi),%ymm3,%ymm3 7977 vpxor 32+128(%rsi),%ymm1,%ymm1 7978 vpxor 64+128(%rsi),%ymm5,%ymm5 7979 vpxor 96+128(%rsi),%ymm9,%ymm9 7980 vmovdqu %ymm3,0+128(%rdi) 7981 vmovdqu %ymm1,32+128(%rdi) 7982 vmovdqu %ymm5,64+128(%rdi) 7983 vmovdqu %ymm9,96+128(%rdi) 7984 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7985 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7986 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7987 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7988 vmovdqa %ymm3,%ymm8 7989 7990 movq $256,%rcx 7991 leaq 256(%rsi),%rsi 7992 subq $256,%rbx 7993 jmp L$seal_avx2_short_hash_remainder 7994 7995L$seal_avx2_tail_512: 7996 vmovdqa L$chacha20_consts(%rip),%ymm0 7997 vmovdqa 0+64(%rbp),%ymm4 7998 vmovdqa 0+96(%rbp),%ymm8 7999 vmovdqa %ymm0,%ymm1 8000 vmovdqa %ymm4,%ymm5 8001 vmovdqa %ymm8,%ymm9 8002 vmovdqa %ymm0,%ymm2 8003 vmovdqa %ymm4,%ymm6 8004 vmovdqa %ymm8,%ymm10 8005 vmovdqa %ymm0,%ymm3 8006 vmovdqa %ymm4,%ymm7 8007 vmovdqa %ymm8,%ymm11 8008 vmovdqa L$avx2_inc(%rip),%ymm12 8009 vpaddd 0+160(%rbp),%ymm12,%ymm15 8010 vpaddd %ymm15,%ymm12,%ymm14 8011 vpaddd %ymm14,%ymm12,%ymm13 8012 vpaddd %ymm13,%ymm12,%ymm12 8013 vmovdqa %ymm15,0+256(%rbp) 8014 vmovdqa %ymm14,0+224(%rbp) 8015 vmovdqa %ymm13,0+192(%rbp) 8016 vmovdqa %ymm12,0+160(%rbp) 8017 8018L$seal_avx2_tail_512_rounds_and_3xhash: 8019 addq 0+0(%rdi),%r10 8020 adcq 8+0(%rdi),%r11 8021 adcq $1,%r12 8022 movq 0+0+0(%rbp),%rdx 8023 movq %rdx,%r15 8024 mulxq %r10,%r13,%r14 8025 mulxq %r11,%rax,%rdx 8026 imulq %r12,%r15 8027 addq %rax,%r14 8028 adcq %rdx,%r15 8029 movq 8+0+0(%rbp),%rdx 8030 mulxq %r10,%r10,%rax 8031 addq %r10,%r14 8032 mulxq %r11,%r11,%r9 8033 adcq %r11,%r15 8034 adcq $0,%r9 8035 imulq %r12,%rdx 8036 addq %rax,%r15 8037 adcq %rdx,%r9 8038 movq %r13,%r10 8039 movq %r14,%r11 8040 movq %r15,%r12 8041 andq $3,%r12 8042 movq %r15,%r13 8043 andq $-4,%r13 8044 movq %r9,%r14 8045 shrdq $2,%r9,%r15 8046 shrq $2,%r9 8047 addq %r13,%r15 8048 adcq %r14,%r9 8049 addq %r15,%r10 8050 adcq %r9,%r11 8051 adcq $0,%r12 8052 8053 leaq 16(%rdi),%rdi 8054L$seal_avx2_tail_512_rounds_and_2xhash: 8055 vmovdqa %ymm8,0+128(%rbp) 8056 vmovdqa L$rol16(%rip),%ymm8 8057 vpaddd %ymm7,%ymm3,%ymm3 8058 vpaddd %ymm6,%ymm2,%ymm2 8059 vpaddd %ymm5,%ymm1,%ymm1 8060 vpaddd %ymm4,%ymm0,%ymm0 8061 vpxor %ymm3,%ymm15,%ymm15 8062 vpxor %ymm2,%ymm14,%ymm14 8063 vpxor %ymm1,%ymm13,%ymm13 8064 vpxor %ymm0,%ymm12,%ymm12 8065 vpshufb %ymm8,%ymm15,%ymm15 8066 vpshufb %ymm8,%ymm14,%ymm14 8067 vpshufb %ymm8,%ymm13,%ymm13 8068 vpshufb %ymm8,%ymm12,%ymm12 8069 vpaddd %ymm15,%ymm11,%ymm11 8070 vpaddd %ymm14,%ymm10,%ymm10 8071 vpaddd %ymm13,%ymm9,%ymm9 8072 vpaddd 0+128(%rbp),%ymm12,%ymm8 8073 vpxor %ymm11,%ymm7,%ymm7 8074 vpxor %ymm10,%ymm6,%ymm6 8075 addq 0+0(%rdi),%r10 8076 adcq 8+0(%rdi),%r11 8077 adcq $1,%r12 8078 vpxor %ymm9,%ymm5,%ymm5 8079 vpxor %ymm8,%ymm4,%ymm4 8080 vmovdqa %ymm8,0+128(%rbp) 8081 vpsrld $20,%ymm7,%ymm8 8082 vpslld $32-20,%ymm7,%ymm7 8083 vpxor %ymm8,%ymm7,%ymm7 8084 vpsrld $20,%ymm6,%ymm8 8085 vpslld $32-20,%ymm6,%ymm6 8086 vpxor %ymm8,%ymm6,%ymm6 8087 vpsrld $20,%ymm5,%ymm8 8088 vpslld $32-20,%ymm5,%ymm5 8089 vpxor %ymm8,%ymm5,%ymm5 8090 vpsrld $20,%ymm4,%ymm8 8091 vpslld $32-20,%ymm4,%ymm4 8092 vpxor %ymm8,%ymm4,%ymm4 8093 vmovdqa L$rol8(%rip),%ymm8 8094 vpaddd %ymm7,%ymm3,%ymm3 8095 vpaddd %ymm6,%ymm2,%ymm2 8096 vpaddd %ymm5,%ymm1,%ymm1 8097 vpaddd %ymm4,%ymm0,%ymm0 8098 movq 0+0+0(%rbp),%rdx 8099 movq %rdx,%r15 8100 mulxq %r10,%r13,%r14 8101 mulxq %r11,%rax,%rdx 8102 imulq %r12,%r15 8103 addq %rax,%r14 8104 adcq %rdx,%r15 8105 vpxor %ymm3,%ymm15,%ymm15 8106 vpxor %ymm2,%ymm14,%ymm14 8107 vpxor %ymm1,%ymm13,%ymm13 8108 vpxor %ymm0,%ymm12,%ymm12 8109 vpshufb %ymm8,%ymm15,%ymm15 8110 vpshufb %ymm8,%ymm14,%ymm14 8111 vpshufb %ymm8,%ymm13,%ymm13 8112 vpshufb %ymm8,%ymm12,%ymm12 8113 vpaddd %ymm15,%ymm11,%ymm11 8114 vpaddd %ymm14,%ymm10,%ymm10 8115 vpaddd %ymm13,%ymm9,%ymm9 8116 vpaddd 0+128(%rbp),%ymm12,%ymm8 8117 vpxor %ymm11,%ymm7,%ymm7 8118 vpxor %ymm10,%ymm6,%ymm6 8119 vpxor %ymm9,%ymm5,%ymm5 8120 vpxor %ymm8,%ymm4,%ymm4 8121 vmovdqa %ymm8,0+128(%rbp) 8122 vpsrld $25,%ymm7,%ymm8 8123 vpslld $32-25,%ymm7,%ymm7 8124 vpxor %ymm8,%ymm7,%ymm7 8125 movq 8+0+0(%rbp),%rdx 8126 mulxq %r10,%r10,%rax 8127 addq %r10,%r14 8128 mulxq %r11,%r11,%r9 8129 adcq %r11,%r15 8130 adcq $0,%r9 8131 imulq %r12,%rdx 8132 vpsrld $25,%ymm6,%ymm8 8133 vpslld $32-25,%ymm6,%ymm6 8134 vpxor %ymm8,%ymm6,%ymm6 8135 vpsrld $25,%ymm5,%ymm8 8136 vpslld $32-25,%ymm5,%ymm5 8137 vpxor %ymm8,%ymm5,%ymm5 8138 vpsrld $25,%ymm4,%ymm8 8139 vpslld $32-25,%ymm4,%ymm4 8140 vpxor %ymm8,%ymm4,%ymm4 8141 vmovdqa 0+128(%rbp),%ymm8 8142 vpalignr $4,%ymm7,%ymm7,%ymm7 8143 vpalignr $8,%ymm11,%ymm11,%ymm11 8144 vpalignr $12,%ymm15,%ymm15,%ymm15 8145 vpalignr $4,%ymm6,%ymm6,%ymm6 8146 vpalignr $8,%ymm10,%ymm10,%ymm10 8147 vpalignr $12,%ymm14,%ymm14,%ymm14 8148 vpalignr $4,%ymm5,%ymm5,%ymm5 8149 vpalignr $8,%ymm9,%ymm9,%ymm9 8150 vpalignr $12,%ymm13,%ymm13,%ymm13 8151 vpalignr $4,%ymm4,%ymm4,%ymm4 8152 addq %rax,%r15 8153 adcq %rdx,%r9 8154 vpalignr $8,%ymm8,%ymm8,%ymm8 8155 vpalignr $12,%ymm12,%ymm12,%ymm12 8156 vmovdqa %ymm8,0+128(%rbp) 8157 vmovdqa L$rol16(%rip),%ymm8 8158 vpaddd %ymm7,%ymm3,%ymm3 8159 vpaddd %ymm6,%ymm2,%ymm2 8160 vpaddd %ymm5,%ymm1,%ymm1 8161 vpaddd %ymm4,%ymm0,%ymm0 8162 vpxor %ymm3,%ymm15,%ymm15 8163 vpxor %ymm2,%ymm14,%ymm14 8164 vpxor %ymm1,%ymm13,%ymm13 8165 vpxor %ymm0,%ymm12,%ymm12 8166 vpshufb %ymm8,%ymm15,%ymm15 8167 vpshufb %ymm8,%ymm14,%ymm14 8168 vpshufb %ymm8,%ymm13,%ymm13 8169 vpshufb %ymm8,%ymm12,%ymm12 8170 vpaddd %ymm15,%ymm11,%ymm11 8171 vpaddd %ymm14,%ymm10,%ymm10 8172 vpaddd %ymm13,%ymm9,%ymm9 8173 vpaddd 0+128(%rbp),%ymm12,%ymm8 8174 movq %r13,%r10 8175 movq %r14,%r11 8176 movq %r15,%r12 8177 andq $3,%r12 8178 movq %r15,%r13 8179 andq $-4,%r13 8180 movq %r9,%r14 8181 shrdq $2,%r9,%r15 8182 shrq $2,%r9 8183 addq %r13,%r15 8184 adcq %r14,%r9 8185 addq %r15,%r10 8186 adcq %r9,%r11 8187 adcq $0,%r12 8188 vpxor %ymm11,%ymm7,%ymm7 8189 vpxor %ymm10,%ymm6,%ymm6 8190 vpxor %ymm9,%ymm5,%ymm5 8191 vpxor %ymm8,%ymm4,%ymm4 8192 vmovdqa %ymm8,0+128(%rbp) 8193 vpsrld $20,%ymm7,%ymm8 8194 vpslld $32-20,%ymm7,%ymm7 8195 vpxor %ymm8,%ymm7,%ymm7 8196 vpsrld $20,%ymm6,%ymm8 8197 vpslld $32-20,%ymm6,%ymm6 8198 vpxor %ymm8,%ymm6,%ymm6 8199 vpsrld $20,%ymm5,%ymm8 8200 vpslld $32-20,%ymm5,%ymm5 8201 vpxor %ymm8,%ymm5,%ymm5 8202 vpsrld $20,%ymm4,%ymm8 8203 vpslld $32-20,%ymm4,%ymm4 8204 vpxor %ymm8,%ymm4,%ymm4 8205 vmovdqa L$rol8(%rip),%ymm8 8206 vpaddd %ymm7,%ymm3,%ymm3 8207 vpaddd %ymm6,%ymm2,%ymm2 8208 addq 0+16(%rdi),%r10 8209 adcq 8+16(%rdi),%r11 8210 adcq $1,%r12 8211 vpaddd %ymm5,%ymm1,%ymm1 8212 vpaddd %ymm4,%ymm0,%ymm0 8213 vpxor %ymm3,%ymm15,%ymm15 8214 vpxor %ymm2,%ymm14,%ymm14 8215 vpxor %ymm1,%ymm13,%ymm13 8216 vpxor %ymm0,%ymm12,%ymm12 8217 vpshufb %ymm8,%ymm15,%ymm15 8218 vpshufb %ymm8,%ymm14,%ymm14 8219 vpshufb %ymm8,%ymm13,%ymm13 8220 vpshufb %ymm8,%ymm12,%ymm12 8221 vpaddd %ymm15,%ymm11,%ymm11 8222 vpaddd %ymm14,%ymm10,%ymm10 8223 vpaddd %ymm13,%ymm9,%ymm9 8224 vpaddd 0+128(%rbp),%ymm12,%ymm8 8225 vpxor %ymm11,%ymm7,%ymm7 8226 vpxor %ymm10,%ymm6,%ymm6 8227 vpxor %ymm9,%ymm5,%ymm5 8228 vpxor %ymm8,%ymm4,%ymm4 8229 vmovdqa %ymm8,0+128(%rbp) 8230 vpsrld $25,%ymm7,%ymm8 8231 movq 0+0+0(%rbp),%rdx 8232 movq %rdx,%r15 8233 mulxq %r10,%r13,%r14 8234 mulxq %r11,%rax,%rdx 8235 imulq %r12,%r15 8236 addq %rax,%r14 8237 adcq %rdx,%r15 8238 vpslld $32-25,%ymm7,%ymm7 8239 vpxor %ymm8,%ymm7,%ymm7 8240 vpsrld $25,%ymm6,%ymm8 8241 vpslld $32-25,%ymm6,%ymm6 8242 vpxor %ymm8,%ymm6,%ymm6 8243 vpsrld $25,%ymm5,%ymm8 8244 vpslld $32-25,%ymm5,%ymm5 8245 vpxor %ymm8,%ymm5,%ymm5 8246 vpsrld $25,%ymm4,%ymm8 8247 vpslld $32-25,%ymm4,%ymm4 8248 vpxor %ymm8,%ymm4,%ymm4 8249 vmovdqa 0+128(%rbp),%ymm8 8250 vpalignr $12,%ymm7,%ymm7,%ymm7 8251 vpalignr $8,%ymm11,%ymm11,%ymm11 8252 vpalignr $4,%ymm15,%ymm15,%ymm15 8253 vpalignr $12,%ymm6,%ymm6,%ymm6 8254 vpalignr $8,%ymm10,%ymm10,%ymm10 8255 vpalignr $4,%ymm14,%ymm14,%ymm14 8256 vpalignr $12,%ymm5,%ymm5,%ymm5 8257 vpalignr $8,%ymm9,%ymm9,%ymm9 8258 movq 8+0+0(%rbp),%rdx 8259 mulxq %r10,%r10,%rax 8260 addq %r10,%r14 8261 mulxq %r11,%r11,%r9 8262 adcq %r11,%r15 8263 adcq $0,%r9 8264 imulq %r12,%rdx 8265 vpalignr $4,%ymm13,%ymm13,%ymm13 8266 vpalignr $12,%ymm4,%ymm4,%ymm4 8267 vpalignr $8,%ymm8,%ymm8,%ymm8 8268 vpalignr $4,%ymm12,%ymm12,%ymm12 8269 8270 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 addq %rax,%r15 8286 adcq %rdx,%r9 8287 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 movq %r13,%r10 8308 movq %r14,%r11 8309 movq %r15,%r12 8310 andq $3,%r12 8311 movq %r15,%r13 8312 andq $-4,%r13 8313 movq %r9,%r14 8314 shrdq $2,%r9,%r15 8315 shrq $2,%r9 8316 addq %r13,%r15 8317 adcq %r14,%r9 8318 addq %r15,%r10 8319 adcq %r9,%r11 8320 adcq $0,%r12 8321 8322 leaq 32(%rdi),%rdi 8323 decq %rcx 8324 jg L$seal_avx2_tail_512_rounds_and_3xhash 8325 decq %r8 8326 jge L$seal_avx2_tail_512_rounds_and_2xhash 8327 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 8328 vpaddd 0+64(%rbp),%ymm7,%ymm7 8329 vpaddd 0+96(%rbp),%ymm11,%ymm11 8330 vpaddd 0+256(%rbp),%ymm15,%ymm15 8331 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 8332 vpaddd 0+64(%rbp),%ymm6,%ymm6 8333 vpaddd 0+96(%rbp),%ymm10,%ymm10 8334 vpaddd 0+224(%rbp),%ymm14,%ymm14 8335 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 8336 vpaddd 0+64(%rbp),%ymm5,%ymm5 8337 vpaddd 0+96(%rbp),%ymm9,%ymm9 8338 vpaddd 0+192(%rbp),%ymm13,%ymm13 8339 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 8340 vpaddd 0+64(%rbp),%ymm4,%ymm4 8341 vpaddd 0+96(%rbp),%ymm8,%ymm8 8342 vpaddd 0+160(%rbp),%ymm12,%ymm12 8343 8344 vmovdqa %ymm0,0+128(%rbp) 8345 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 8346 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 8347 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 8348 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 8349 vpxor 0+0(%rsi),%ymm0,%ymm0 8350 vpxor 32+0(%rsi),%ymm3,%ymm3 8351 vpxor 64+0(%rsi),%ymm7,%ymm7 8352 vpxor 96+0(%rsi),%ymm11,%ymm11 8353 vmovdqu %ymm0,0+0(%rdi) 8354 vmovdqu %ymm3,32+0(%rdi) 8355 vmovdqu %ymm7,64+0(%rdi) 8356 vmovdqu %ymm11,96+0(%rdi) 8357 8358 vmovdqa 0+128(%rbp),%ymm0 8359 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8360 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8361 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8362 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8363 vpxor 0+128(%rsi),%ymm3,%ymm3 8364 vpxor 32+128(%rsi),%ymm2,%ymm2 8365 vpxor 64+128(%rsi),%ymm6,%ymm6 8366 vpxor 96+128(%rsi),%ymm10,%ymm10 8367 vmovdqu %ymm3,0+128(%rdi) 8368 vmovdqu %ymm2,32+128(%rdi) 8369 vmovdqu %ymm6,64+128(%rdi) 8370 vmovdqu %ymm10,96+128(%rdi) 8371 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8372 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8373 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8374 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8375 vpxor 0+256(%rsi),%ymm3,%ymm3 8376 vpxor 32+256(%rsi),%ymm1,%ymm1 8377 vpxor 64+256(%rsi),%ymm5,%ymm5 8378 vpxor 96+256(%rsi),%ymm9,%ymm9 8379 vmovdqu %ymm3,0+256(%rdi) 8380 vmovdqu %ymm1,32+256(%rdi) 8381 vmovdqu %ymm5,64+256(%rdi) 8382 vmovdqu %ymm9,96+256(%rdi) 8383 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8384 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8385 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8386 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8387 vmovdqa %ymm3,%ymm8 8388 8389 movq $384,%rcx 8390 leaq 384(%rsi),%rsi 8391 subq $384,%rbx 8392 jmp L$seal_avx2_short_hash_remainder 8393 8394L$seal_avx2_320: 8395 vmovdqa %ymm0,%ymm1 8396 vmovdqa %ymm0,%ymm2 8397 vmovdqa %ymm4,%ymm5 8398 vmovdqa %ymm4,%ymm6 8399 vmovdqa %ymm8,%ymm9 8400 vmovdqa %ymm8,%ymm10 8401 vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 8402 vpaddd L$avx2_inc(%rip),%ymm13,%ymm14 8403 vmovdqa %ymm4,%ymm7 8404 vmovdqa %ymm8,%ymm11 8405 vmovdqa %ymm12,0+160(%rbp) 8406 vmovdqa %ymm13,0+192(%rbp) 8407 vmovdqa %ymm14,0+224(%rbp) 8408 movq $10,%r10 8409L$seal_avx2_320_rounds: 8410 vpaddd %ymm4,%ymm0,%ymm0 8411 vpxor %ymm0,%ymm12,%ymm12 8412 vpshufb L$rol16(%rip),%ymm12,%ymm12 8413 vpaddd %ymm12,%ymm8,%ymm8 8414 vpxor %ymm8,%ymm4,%ymm4 8415 vpsrld $20,%ymm4,%ymm3 8416 vpslld $12,%ymm4,%ymm4 8417 vpxor %ymm3,%ymm4,%ymm4 8418 vpaddd %ymm4,%ymm0,%ymm0 8419 vpxor %ymm0,%ymm12,%ymm12 8420 vpshufb L$rol8(%rip),%ymm12,%ymm12 8421 vpaddd %ymm12,%ymm8,%ymm8 8422 vpxor %ymm8,%ymm4,%ymm4 8423 vpslld $7,%ymm4,%ymm3 8424 vpsrld $25,%ymm4,%ymm4 8425 vpxor %ymm3,%ymm4,%ymm4 8426 vpalignr $12,%ymm12,%ymm12,%ymm12 8427 vpalignr $8,%ymm8,%ymm8,%ymm8 8428 vpalignr $4,%ymm4,%ymm4,%ymm4 8429 vpaddd %ymm5,%ymm1,%ymm1 8430 vpxor %ymm1,%ymm13,%ymm13 8431 vpshufb L$rol16(%rip),%ymm13,%ymm13 8432 vpaddd %ymm13,%ymm9,%ymm9 8433 vpxor %ymm9,%ymm5,%ymm5 8434 vpsrld $20,%ymm5,%ymm3 8435 vpslld $12,%ymm5,%ymm5 8436 vpxor %ymm3,%ymm5,%ymm5 8437 vpaddd %ymm5,%ymm1,%ymm1 8438 vpxor %ymm1,%ymm13,%ymm13 8439 vpshufb L$rol8(%rip),%ymm13,%ymm13 8440 vpaddd %ymm13,%ymm9,%ymm9 8441 vpxor %ymm9,%ymm5,%ymm5 8442 vpslld $7,%ymm5,%ymm3 8443 vpsrld $25,%ymm5,%ymm5 8444 vpxor %ymm3,%ymm5,%ymm5 8445 vpalignr $12,%ymm13,%ymm13,%ymm13 8446 vpalignr $8,%ymm9,%ymm9,%ymm9 8447 vpalignr $4,%ymm5,%ymm5,%ymm5 8448 vpaddd %ymm6,%ymm2,%ymm2 8449 vpxor %ymm2,%ymm14,%ymm14 8450 vpshufb L$rol16(%rip),%ymm14,%ymm14 8451 vpaddd %ymm14,%ymm10,%ymm10 8452 vpxor %ymm10,%ymm6,%ymm6 8453 vpsrld $20,%ymm6,%ymm3 8454 vpslld $12,%ymm6,%ymm6 8455 vpxor %ymm3,%ymm6,%ymm6 8456 vpaddd %ymm6,%ymm2,%ymm2 8457 vpxor %ymm2,%ymm14,%ymm14 8458 vpshufb L$rol8(%rip),%ymm14,%ymm14 8459 vpaddd %ymm14,%ymm10,%ymm10 8460 vpxor %ymm10,%ymm6,%ymm6 8461 vpslld $7,%ymm6,%ymm3 8462 vpsrld $25,%ymm6,%ymm6 8463 vpxor %ymm3,%ymm6,%ymm6 8464 vpalignr $12,%ymm14,%ymm14,%ymm14 8465 vpalignr $8,%ymm10,%ymm10,%ymm10 8466 vpalignr $4,%ymm6,%ymm6,%ymm6 8467 vpaddd %ymm4,%ymm0,%ymm0 8468 vpxor %ymm0,%ymm12,%ymm12 8469 vpshufb L$rol16(%rip),%ymm12,%ymm12 8470 vpaddd %ymm12,%ymm8,%ymm8 8471 vpxor %ymm8,%ymm4,%ymm4 8472 vpsrld $20,%ymm4,%ymm3 8473 vpslld $12,%ymm4,%ymm4 8474 vpxor %ymm3,%ymm4,%ymm4 8475 vpaddd %ymm4,%ymm0,%ymm0 8476 vpxor %ymm0,%ymm12,%ymm12 8477 vpshufb L$rol8(%rip),%ymm12,%ymm12 8478 vpaddd %ymm12,%ymm8,%ymm8 8479 vpxor %ymm8,%ymm4,%ymm4 8480 vpslld $7,%ymm4,%ymm3 8481 vpsrld $25,%ymm4,%ymm4 8482 vpxor %ymm3,%ymm4,%ymm4 8483 vpalignr $4,%ymm12,%ymm12,%ymm12 8484 vpalignr $8,%ymm8,%ymm8,%ymm8 8485 vpalignr $12,%ymm4,%ymm4,%ymm4 8486 vpaddd %ymm5,%ymm1,%ymm1 8487 vpxor %ymm1,%ymm13,%ymm13 8488 vpshufb L$rol16(%rip),%ymm13,%ymm13 8489 vpaddd %ymm13,%ymm9,%ymm9 8490 vpxor %ymm9,%ymm5,%ymm5 8491 vpsrld $20,%ymm5,%ymm3 8492 vpslld $12,%ymm5,%ymm5 8493 vpxor %ymm3,%ymm5,%ymm5 8494 vpaddd %ymm5,%ymm1,%ymm1 8495 vpxor %ymm1,%ymm13,%ymm13 8496 vpshufb L$rol8(%rip),%ymm13,%ymm13 8497 vpaddd %ymm13,%ymm9,%ymm9 8498 vpxor %ymm9,%ymm5,%ymm5 8499 vpslld $7,%ymm5,%ymm3 8500 vpsrld $25,%ymm5,%ymm5 8501 vpxor %ymm3,%ymm5,%ymm5 8502 vpalignr $4,%ymm13,%ymm13,%ymm13 8503 vpalignr $8,%ymm9,%ymm9,%ymm9 8504 vpalignr $12,%ymm5,%ymm5,%ymm5 8505 vpaddd %ymm6,%ymm2,%ymm2 8506 vpxor %ymm2,%ymm14,%ymm14 8507 vpshufb L$rol16(%rip),%ymm14,%ymm14 8508 vpaddd %ymm14,%ymm10,%ymm10 8509 vpxor %ymm10,%ymm6,%ymm6 8510 vpsrld $20,%ymm6,%ymm3 8511 vpslld $12,%ymm6,%ymm6 8512 vpxor %ymm3,%ymm6,%ymm6 8513 vpaddd %ymm6,%ymm2,%ymm2 8514 vpxor %ymm2,%ymm14,%ymm14 8515 vpshufb L$rol8(%rip),%ymm14,%ymm14 8516 vpaddd %ymm14,%ymm10,%ymm10 8517 vpxor %ymm10,%ymm6,%ymm6 8518 vpslld $7,%ymm6,%ymm3 8519 vpsrld $25,%ymm6,%ymm6 8520 vpxor %ymm3,%ymm6,%ymm6 8521 vpalignr $4,%ymm14,%ymm14,%ymm14 8522 vpalignr $8,%ymm10,%ymm10,%ymm10 8523 vpalignr $12,%ymm6,%ymm6,%ymm6 8524 8525 decq %r10 8526 jne L$seal_avx2_320_rounds 8527 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 8528 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 8529 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 8530 vpaddd %ymm7,%ymm4,%ymm4 8531 vpaddd %ymm7,%ymm5,%ymm5 8532 vpaddd %ymm7,%ymm6,%ymm6 8533 vpaddd %ymm11,%ymm8,%ymm8 8534 vpaddd %ymm11,%ymm9,%ymm9 8535 vpaddd %ymm11,%ymm10,%ymm10 8536 vpaddd 0+160(%rbp),%ymm12,%ymm12 8537 vpaddd 0+192(%rbp),%ymm13,%ymm13 8538 vpaddd 0+224(%rbp),%ymm14,%ymm14 8539 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8540 8541 vpand L$clamp(%rip),%ymm3,%ymm3 8542 vmovdqa %ymm3,0+0(%rbp) 8543 8544 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8545 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8546 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8547 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8548 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8549 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8550 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 8551 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 8552 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 8553 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 8554 jmp L$seal_avx2_short 8555 8556L$seal_avx2_192: 8557 vmovdqa %ymm0,%ymm1 8558 vmovdqa %ymm0,%ymm2 8559 vmovdqa %ymm4,%ymm5 8560 vmovdqa %ymm4,%ymm6 8561 vmovdqa %ymm8,%ymm9 8562 vmovdqa %ymm8,%ymm10 8563 vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 8564 vmovdqa %ymm12,%ymm11 8565 vmovdqa %ymm13,%ymm15 8566 movq $10,%r10 8567L$seal_avx2_192_rounds: 8568 vpaddd %ymm4,%ymm0,%ymm0 8569 vpxor %ymm0,%ymm12,%ymm12 8570 vpshufb L$rol16(%rip),%ymm12,%ymm12 8571 vpaddd %ymm12,%ymm8,%ymm8 8572 vpxor %ymm8,%ymm4,%ymm4 8573 vpsrld $20,%ymm4,%ymm3 8574 vpslld $12,%ymm4,%ymm4 8575 vpxor %ymm3,%ymm4,%ymm4 8576 vpaddd %ymm4,%ymm0,%ymm0 8577 vpxor %ymm0,%ymm12,%ymm12 8578 vpshufb L$rol8(%rip),%ymm12,%ymm12 8579 vpaddd %ymm12,%ymm8,%ymm8 8580 vpxor %ymm8,%ymm4,%ymm4 8581 vpslld $7,%ymm4,%ymm3 8582 vpsrld $25,%ymm4,%ymm4 8583 vpxor %ymm3,%ymm4,%ymm4 8584 vpalignr $12,%ymm12,%ymm12,%ymm12 8585 vpalignr $8,%ymm8,%ymm8,%ymm8 8586 vpalignr $4,%ymm4,%ymm4,%ymm4 8587 vpaddd %ymm5,%ymm1,%ymm1 8588 vpxor %ymm1,%ymm13,%ymm13 8589 vpshufb L$rol16(%rip),%ymm13,%ymm13 8590 vpaddd %ymm13,%ymm9,%ymm9 8591 vpxor %ymm9,%ymm5,%ymm5 8592 vpsrld $20,%ymm5,%ymm3 8593 vpslld $12,%ymm5,%ymm5 8594 vpxor %ymm3,%ymm5,%ymm5 8595 vpaddd %ymm5,%ymm1,%ymm1 8596 vpxor %ymm1,%ymm13,%ymm13 8597 vpshufb L$rol8(%rip),%ymm13,%ymm13 8598 vpaddd %ymm13,%ymm9,%ymm9 8599 vpxor %ymm9,%ymm5,%ymm5 8600 vpslld $7,%ymm5,%ymm3 8601 vpsrld $25,%ymm5,%ymm5 8602 vpxor %ymm3,%ymm5,%ymm5 8603 vpalignr $12,%ymm13,%ymm13,%ymm13 8604 vpalignr $8,%ymm9,%ymm9,%ymm9 8605 vpalignr $4,%ymm5,%ymm5,%ymm5 8606 vpaddd %ymm4,%ymm0,%ymm0 8607 vpxor %ymm0,%ymm12,%ymm12 8608 vpshufb L$rol16(%rip),%ymm12,%ymm12 8609 vpaddd %ymm12,%ymm8,%ymm8 8610 vpxor %ymm8,%ymm4,%ymm4 8611 vpsrld $20,%ymm4,%ymm3 8612 vpslld $12,%ymm4,%ymm4 8613 vpxor %ymm3,%ymm4,%ymm4 8614 vpaddd %ymm4,%ymm0,%ymm0 8615 vpxor %ymm0,%ymm12,%ymm12 8616 vpshufb L$rol8(%rip),%ymm12,%ymm12 8617 vpaddd %ymm12,%ymm8,%ymm8 8618 vpxor %ymm8,%ymm4,%ymm4 8619 vpslld $7,%ymm4,%ymm3 8620 vpsrld $25,%ymm4,%ymm4 8621 vpxor %ymm3,%ymm4,%ymm4 8622 vpalignr $4,%ymm12,%ymm12,%ymm12 8623 vpalignr $8,%ymm8,%ymm8,%ymm8 8624 vpalignr $12,%ymm4,%ymm4,%ymm4 8625 vpaddd %ymm5,%ymm1,%ymm1 8626 vpxor %ymm1,%ymm13,%ymm13 8627 vpshufb L$rol16(%rip),%ymm13,%ymm13 8628 vpaddd %ymm13,%ymm9,%ymm9 8629 vpxor %ymm9,%ymm5,%ymm5 8630 vpsrld $20,%ymm5,%ymm3 8631 vpslld $12,%ymm5,%ymm5 8632 vpxor %ymm3,%ymm5,%ymm5 8633 vpaddd %ymm5,%ymm1,%ymm1 8634 vpxor %ymm1,%ymm13,%ymm13 8635 vpshufb L$rol8(%rip),%ymm13,%ymm13 8636 vpaddd %ymm13,%ymm9,%ymm9 8637 vpxor %ymm9,%ymm5,%ymm5 8638 vpslld $7,%ymm5,%ymm3 8639 vpsrld $25,%ymm5,%ymm5 8640 vpxor %ymm3,%ymm5,%ymm5 8641 vpalignr $4,%ymm13,%ymm13,%ymm13 8642 vpalignr $8,%ymm9,%ymm9,%ymm9 8643 vpalignr $12,%ymm5,%ymm5,%ymm5 8644 8645 decq %r10 8646 jne L$seal_avx2_192_rounds 8647 vpaddd %ymm2,%ymm0,%ymm0 8648 vpaddd %ymm2,%ymm1,%ymm1 8649 vpaddd %ymm6,%ymm4,%ymm4 8650 vpaddd %ymm6,%ymm5,%ymm5 8651 vpaddd %ymm10,%ymm8,%ymm8 8652 vpaddd %ymm10,%ymm9,%ymm9 8653 vpaddd %ymm11,%ymm12,%ymm12 8654 vpaddd %ymm15,%ymm13,%ymm13 8655 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8656 8657 vpand L$clamp(%rip),%ymm3,%ymm3 8658 vmovdqa %ymm3,0+0(%rbp) 8659 8660 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8661 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8662 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8663 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8664 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8665 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8666L$seal_avx2_short: 8667 movq %r8,%r8 8668 call poly_hash_ad_internal 8669 xorq %rcx,%rcx 8670L$seal_avx2_short_hash_remainder: 8671 cmpq $16,%rcx 8672 jb L$seal_avx2_short_loop 8673 addq 0+0(%rdi),%r10 8674 adcq 8+0(%rdi),%r11 8675 adcq $1,%r12 8676 movq 0+0+0(%rbp),%rax 8677 movq %rax,%r15 8678 mulq %r10 8679 movq %rax,%r13 8680 movq %rdx,%r14 8681 movq 0+0+0(%rbp),%rax 8682 mulq %r11 8683 imulq %r12,%r15 8684 addq %rax,%r14 8685 adcq %rdx,%r15 8686 movq 8+0+0(%rbp),%rax 8687 movq %rax,%r9 8688 mulq %r10 8689 addq %rax,%r14 8690 adcq $0,%rdx 8691 movq %rdx,%r10 8692 movq 8+0+0(%rbp),%rax 8693 mulq %r11 8694 addq %rax,%r15 8695 adcq $0,%rdx 8696 imulq %r12,%r9 8697 addq %r10,%r15 8698 adcq %rdx,%r9 8699 movq %r13,%r10 8700 movq %r14,%r11 8701 movq %r15,%r12 8702 andq $3,%r12 8703 movq %r15,%r13 8704 andq $-4,%r13 8705 movq %r9,%r14 8706 shrdq $2,%r9,%r15 8707 shrq $2,%r9 8708 addq %r13,%r15 8709 adcq %r14,%r9 8710 addq %r15,%r10 8711 adcq %r9,%r11 8712 adcq $0,%r12 8713 8714 subq $16,%rcx 8715 addq $16,%rdi 8716 jmp L$seal_avx2_short_hash_remainder 8717L$seal_avx2_short_loop: 8718 cmpq $32,%rbx 8719 jb L$seal_avx2_short_tail 8720 subq $32,%rbx 8721 8722 vpxor (%rsi),%ymm0,%ymm0 8723 vmovdqu %ymm0,(%rdi) 8724 leaq 32(%rsi),%rsi 8725 8726 addq 0+0(%rdi),%r10 8727 adcq 8+0(%rdi),%r11 8728 adcq $1,%r12 8729 movq 0+0+0(%rbp),%rax 8730 movq %rax,%r15 8731 mulq %r10 8732 movq %rax,%r13 8733 movq %rdx,%r14 8734 movq 0+0+0(%rbp),%rax 8735 mulq %r11 8736 imulq %r12,%r15 8737 addq %rax,%r14 8738 adcq %rdx,%r15 8739 movq 8+0+0(%rbp),%rax 8740 movq %rax,%r9 8741 mulq %r10 8742 addq %rax,%r14 8743 adcq $0,%rdx 8744 movq %rdx,%r10 8745 movq 8+0+0(%rbp),%rax 8746 mulq %r11 8747 addq %rax,%r15 8748 adcq $0,%rdx 8749 imulq %r12,%r9 8750 addq %r10,%r15 8751 adcq %rdx,%r9 8752 movq %r13,%r10 8753 movq %r14,%r11 8754 movq %r15,%r12 8755 andq $3,%r12 8756 movq %r15,%r13 8757 andq $-4,%r13 8758 movq %r9,%r14 8759 shrdq $2,%r9,%r15 8760 shrq $2,%r9 8761 addq %r13,%r15 8762 adcq %r14,%r9 8763 addq %r15,%r10 8764 adcq %r9,%r11 8765 adcq $0,%r12 8766 addq 0+16(%rdi),%r10 8767 adcq 8+16(%rdi),%r11 8768 adcq $1,%r12 8769 movq 0+0+0(%rbp),%rax 8770 movq %rax,%r15 8771 mulq %r10 8772 movq %rax,%r13 8773 movq %rdx,%r14 8774 movq 0+0+0(%rbp),%rax 8775 mulq %r11 8776 imulq %r12,%r15 8777 addq %rax,%r14 8778 adcq %rdx,%r15 8779 movq 8+0+0(%rbp),%rax 8780 movq %rax,%r9 8781 mulq %r10 8782 addq %rax,%r14 8783 adcq $0,%rdx 8784 movq %rdx,%r10 8785 movq 8+0+0(%rbp),%rax 8786 mulq %r11 8787 addq %rax,%r15 8788 adcq $0,%rdx 8789 imulq %r12,%r9 8790 addq %r10,%r15 8791 adcq %rdx,%r9 8792 movq %r13,%r10 8793 movq %r14,%r11 8794 movq %r15,%r12 8795 andq $3,%r12 8796 movq %r15,%r13 8797 andq $-4,%r13 8798 movq %r9,%r14 8799 shrdq $2,%r9,%r15 8800 shrq $2,%r9 8801 addq %r13,%r15 8802 adcq %r14,%r9 8803 addq %r15,%r10 8804 adcq %r9,%r11 8805 adcq $0,%r12 8806 8807 leaq 32(%rdi),%rdi 8808 8809 vmovdqa %ymm4,%ymm0 8810 vmovdqa %ymm8,%ymm4 8811 vmovdqa %ymm12,%ymm8 8812 vmovdqa %ymm1,%ymm12 8813 vmovdqa %ymm5,%ymm1 8814 vmovdqa %ymm9,%ymm5 8815 vmovdqa %ymm13,%ymm9 8816 vmovdqa %ymm2,%ymm13 8817 vmovdqa %ymm6,%ymm2 8818 jmp L$seal_avx2_short_loop 8819L$seal_avx2_short_tail: 8820 cmpq $16,%rbx 8821 jb L$seal_avx2_exit 8822 subq $16,%rbx 8823 vpxor (%rsi),%xmm0,%xmm3 8824 vmovdqu %xmm3,(%rdi) 8825 leaq 16(%rsi),%rsi 8826 addq 0+0(%rdi),%r10 8827 adcq 8+0(%rdi),%r11 8828 adcq $1,%r12 8829 movq 0+0+0(%rbp),%rax 8830 movq %rax,%r15 8831 mulq %r10 8832 movq %rax,%r13 8833 movq %rdx,%r14 8834 movq 0+0+0(%rbp),%rax 8835 mulq %r11 8836 imulq %r12,%r15 8837 addq %rax,%r14 8838 adcq %rdx,%r15 8839 movq 8+0+0(%rbp),%rax 8840 movq %rax,%r9 8841 mulq %r10 8842 addq %rax,%r14 8843 adcq $0,%rdx 8844 movq %rdx,%r10 8845 movq 8+0+0(%rbp),%rax 8846 mulq %r11 8847 addq %rax,%r15 8848 adcq $0,%rdx 8849 imulq %r12,%r9 8850 addq %r10,%r15 8851 adcq %rdx,%r9 8852 movq %r13,%r10 8853 movq %r14,%r11 8854 movq %r15,%r12 8855 andq $3,%r12 8856 movq %r15,%r13 8857 andq $-4,%r13 8858 movq %r9,%r14 8859 shrdq $2,%r9,%r15 8860 shrq $2,%r9 8861 addq %r13,%r15 8862 adcq %r14,%r9 8863 addq %r15,%r10 8864 adcq %r9,%r11 8865 adcq $0,%r12 8866 8867 leaq 16(%rdi),%rdi 8868 vextracti128 $1,%ymm0,%xmm0 8869L$seal_avx2_exit: 8870 vzeroupper 8871 jmp L$seal_sse_tail_16 8872 8873 8874#endif 8875