1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#include "ring_core_generated/prefix_symbols_asm.h" 12.text 13 14 15chacha20_poly1305_constants: 16 17.p2align 6 18L$chacha20_consts: 19.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 20.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 21L$rol8: 22.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 23.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 24L$rol16: 25.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 26.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 27L$avx2_init: 28.long 0,0,0,0 29L$sse_inc: 30.long 1,0,0,0 31L$avx2_inc: 32.long 2,0,0,0,2,0,0,0 33L$clamp: 34.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC 35.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF 36.p2align 4 37L$and_masks: 38.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 39.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 40.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 41.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 42.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 43.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 44.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 45.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 46.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 47.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 48.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 49.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 50.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 51.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 52.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 53.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff 54 55 56.p2align 6 57poly_hash_ad_internal: 58 59 60 xorq %r10,%r10 61 xorq %r11,%r11 62 xorq %r12,%r12 63 cmpq $13,%r8 64 jne L$hash_ad_loop 65L$poly_fast_tls_ad: 66 67 movq (%rcx),%r10 68 movq 5(%rcx),%r11 69 shrq $24,%r11 70 movq $1,%r12 71 movq 0+0+0(%rbp),%rax 72 movq %rax,%r15 73 mulq %r10 74 movq %rax,%r13 75 movq %rdx,%r14 76 movq 0+0+0(%rbp),%rax 77 mulq %r11 78 imulq %r12,%r15 79 addq %rax,%r14 80 adcq %rdx,%r15 81 movq 8+0+0(%rbp),%rax 82 movq %rax,%r9 83 mulq %r10 84 addq %rax,%r14 85 adcq $0,%rdx 86 movq %rdx,%r10 87 movq 8+0+0(%rbp),%rax 88 mulq %r11 89 addq %rax,%r15 90 adcq $0,%rdx 91 imulq %r12,%r9 92 addq %r10,%r15 93 adcq %rdx,%r9 94 movq %r13,%r10 95 movq %r14,%r11 96 movq %r15,%r12 97 andq $3,%r12 98 movq %r15,%r13 99 andq $-4,%r13 100 movq %r9,%r14 101 shrdq $2,%r9,%r15 102 shrq $2,%r9 103 addq %r13,%r15 104 adcq %r14,%r9 105 addq %r15,%r10 106 adcq %r9,%r11 107 adcq $0,%r12 108 109 .byte 0xf3,0xc3 110L$hash_ad_loop: 111 112 cmpq $16,%r8 113 jb L$hash_ad_tail 114 addq 0+0(%rcx),%r10 115 adcq 8+0(%rcx),%r11 116 adcq $1,%r12 117 movq 0+0+0(%rbp),%rax 118 movq %rax,%r15 119 mulq %r10 120 movq %rax,%r13 121 movq %rdx,%r14 122 movq 0+0+0(%rbp),%rax 123 mulq %r11 124 imulq %r12,%r15 125 addq %rax,%r14 126 adcq %rdx,%r15 127 movq 8+0+0(%rbp),%rax 128 movq %rax,%r9 129 mulq %r10 130 addq %rax,%r14 131 adcq $0,%rdx 132 movq %rdx,%r10 133 movq 8+0+0(%rbp),%rax 134 mulq %r11 135 addq %rax,%r15 136 adcq $0,%rdx 137 imulq %r12,%r9 138 addq %r10,%r15 139 adcq %rdx,%r9 140 movq %r13,%r10 141 movq %r14,%r11 142 movq %r15,%r12 143 andq $3,%r12 144 movq %r15,%r13 145 andq $-4,%r13 146 movq %r9,%r14 147 shrdq $2,%r9,%r15 148 shrq $2,%r9 149 addq %r13,%r15 150 adcq %r14,%r9 151 addq %r15,%r10 152 adcq %r9,%r11 153 adcq $0,%r12 154 155 leaq 16(%rcx),%rcx 156 subq $16,%r8 157 jmp L$hash_ad_loop 158L$hash_ad_tail: 159 cmpq $0,%r8 160 je L$hash_ad_done 161 162 xorq %r13,%r13 163 xorq %r14,%r14 164 xorq %r15,%r15 165 addq %r8,%rcx 166L$hash_ad_tail_loop: 167 shldq $8,%r13,%r14 168 shlq $8,%r13 169 movzbq -1(%rcx),%r15 170 xorq %r15,%r13 171 decq %rcx 172 decq %r8 173 jne L$hash_ad_tail_loop 174 175 addq %r13,%r10 176 adcq %r14,%r11 177 adcq $1,%r12 178 movq 0+0+0(%rbp),%rax 179 movq %rax,%r15 180 mulq %r10 181 movq %rax,%r13 182 movq %rdx,%r14 183 movq 0+0+0(%rbp),%rax 184 mulq %r11 185 imulq %r12,%r15 186 addq %rax,%r14 187 adcq %rdx,%r15 188 movq 8+0+0(%rbp),%rax 189 movq %rax,%r9 190 mulq %r10 191 addq %rax,%r14 192 adcq $0,%rdx 193 movq %rdx,%r10 194 movq 8+0+0(%rbp),%rax 195 mulq %r11 196 addq %rax,%r15 197 adcq $0,%rdx 198 imulq %r12,%r9 199 addq %r10,%r15 200 adcq %rdx,%r9 201 movq %r13,%r10 202 movq %r14,%r11 203 movq %r15,%r12 204 andq $3,%r12 205 movq %r15,%r13 206 andq $-4,%r13 207 movq %r9,%r14 208 shrdq $2,%r9,%r15 209 shrq $2,%r9 210 addq %r13,%r15 211 adcq %r14,%r9 212 addq %r15,%r10 213 adcq %r9,%r11 214 adcq $0,%r12 215 216 217L$hash_ad_done: 218 .byte 0xf3,0xc3 219 220 221 222.globl _chacha20_poly1305_open 223.private_extern _chacha20_poly1305_open 224 225.p2align 6 226_chacha20_poly1305_open: 227 228 pushq %rbp 229 230 pushq %rbx 231 232 pushq %r12 233 234 pushq %r13 235 236 pushq %r14 237 238 pushq %r15 239 240 241 242 pushq %r9 243 244 subq $288 + 0 + 32,%rsp 245 246 247 leaq 32(%rsp),%rbp 248 andq $-32,%rbp 249 250 movq %rdx,%rbx 251 movq %r8,0+0+32(%rbp) 252 movq %rbx,8+0+32(%rbp) 253 254 movl _OPENSSL_ia32cap_P+8(%rip),%eax 255 andl $288,%eax 256 xorl $288,%eax 257 jz chacha20_poly1305_open_avx2 258 259 cmpq $128,%rbx 260 jbe L$open_sse_128 261 262 movdqa L$chacha20_consts(%rip),%xmm0 263 movdqu 0(%r9),%xmm4 264 movdqu 16(%r9),%xmm8 265 movdqu 32(%r9),%xmm12 266 267 movdqa %xmm12,%xmm7 268 269 movdqa %xmm4,0+48(%rbp) 270 movdqa %xmm8,0+64(%rbp) 271 movdqa %xmm12,0+96(%rbp) 272 movq $10,%r10 273L$open_sse_init_rounds: 274 paddd %xmm4,%xmm0 275 pxor %xmm0,%xmm12 276 pshufb L$rol16(%rip),%xmm12 277 paddd %xmm12,%xmm8 278 pxor %xmm8,%xmm4 279 movdqa %xmm4,%xmm3 280 pslld $12,%xmm3 281 psrld $20,%xmm4 282 pxor %xmm3,%xmm4 283 paddd %xmm4,%xmm0 284 pxor %xmm0,%xmm12 285 pshufb L$rol8(%rip),%xmm12 286 paddd %xmm12,%xmm8 287 pxor %xmm8,%xmm4 288 movdqa %xmm4,%xmm3 289 pslld $7,%xmm3 290 psrld $25,%xmm4 291 pxor %xmm3,%xmm4 292.byte 102,15,58,15,228,4 293.byte 102,69,15,58,15,192,8 294.byte 102,69,15,58,15,228,12 295 paddd %xmm4,%xmm0 296 pxor %xmm0,%xmm12 297 pshufb L$rol16(%rip),%xmm12 298 paddd %xmm12,%xmm8 299 pxor %xmm8,%xmm4 300 movdqa %xmm4,%xmm3 301 pslld $12,%xmm3 302 psrld $20,%xmm4 303 pxor %xmm3,%xmm4 304 paddd %xmm4,%xmm0 305 pxor %xmm0,%xmm12 306 pshufb L$rol8(%rip),%xmm12 307 paddd %xmm12,%xmm8 308 pxor %xmm8,%xmm4 309 movdqa %xmm4,%xmm3 310 pslld $7,%xmm3 311 psrld $25,%xmm4 312 pxor %xmm3,%xmm4 313.byte 102,15,58,15,228,12 314.byte 102,69,15,58,15,192,8 315.byte 102,69,15,58,15,228,4 316 317 decq %r10 318 jne L$open_sse_init_rounds 319 320 paddd L$chacha20_consts(%rip),%xmm0 321 paddd 0+48(%rbp),%xmm4 322 323 pand L$clamp(%rip),%xmm0 324 movdqa %xmm0,0+0(%rbp) 325 movdqa %xmm4,0+16(%rbp) 326 327 movq %r8,%r8 328 call poly_hash_ad_internal 329L$open_sse_main_loop: 330 cmpq $256,%rbx 331 jb L$open_sse_tail 332 333 movdqa L$chacha20_consts(%rip),%xmm0 334 movdqa 0+48(%rbp),%xmm4 335 movdqa 0+64(%rbp),%xmm8 336 movdqa %xmm0,%xmm1 337 movdqa %xmm4,%xmm5 338 movdqa %xmm8,%xmm9 339 movdqa %xmm0,%xmm2 340 movdqa %xmm4,%xmm6 341 movdqa %xmm8,%xmm10 342 movdqa %xmm0,%xmm3 343 movdqa %xmm4,%xmm7 344 movdqa %xmm8,%xmm11 345 movdqa 0+96(%rbp),%xmm15 346 paddd L$sse_inc(%rip),%xmm15 347 movdqa %xmm15,%xmm14 348 paddd L$sse_inc(%rip),%xmm14 349 movdqa %xmm14,%xmm13 350 paddd L$sse_inc(%rip),%xmm13 351 movdqa %xmm13,%xmm12 352 paddd L$sse_inc(%rip),%xmm12 353 movdqa %xmm12,0+96(%rbp) 354 movdqa %xmm13,0+112(%rbp) 355 movdqa %xmm14,0+128(%rbp) 356 movdqa %xmm15,0+144(%rbp) 357 358 359 360 movq $4,%rcx 361 movq %rsi,%r8 362L$open_sse_main_loop_rounds: 363 movdqa %xmm8,0+80(%rbp) 364 movdqa L$rol16(%rip),%xmm8 365 paddd %xmm7,%xmm3 366 paddd %xmm6,%xmm2 367 paddd %xmm5,%xmm1 368 paddd %xmm4,%xmm0 369 pxor %xmm3,%xmm15 370 pxor %xmm2,%xmm14 371 pxor %xmm1,%xmm13 372 pxor %xmm0,%xmm12 373.byte 102,69,15,56,0,248 374.byte 102,69,15,56,0,240 375.byte 102,69,15,56,0,232 376.byte 102,69,15,56,0,224 377 movdqa 0+80(%rbp),%xmm8 378 paddd %xmm15,%xmm11 379 paddd %xmm14,%xmm10 380 paddd %xmm13,%xmm9 381 paddd %xmm12,%xmm8 382 pxor %xmm11,%xmm7 383 addq 0+0(%r8),%r10 384 adcq 8+0(%r8),%r11 385 adcq $1,%r12 386 387 leaq 16(%r8),%r8 388 pxor %xmm10,%xmm6 389 pxor %xmm9,%xmm5 390 pxor %xmm8,%xmm4 391 movdqa %xmm8,0+80(%rbp) 392 movdqa %xmm7,%xmm8 393 psrld $20,%xmm8 394 pslld $32-20,%xmm7 395 pxor %xmm8,%xmm7 396 movdqa %xmm6,%xmm8 397 psrld $20,%xmm8 398 pslld $32-20,%xmm6 399 pxor %xmm8,%xmm6 400 movdqa %xmm5,%xmm8 401 psrld $20,%xmm8 402 pslld $32-20,%xmm5 403 pxor %xmm8,%xmm5 404 movdqa %xmm4,%xmm8 405 psrld $20,%xmm8 406 pslld $32-20,%xmm4 407 pxor %xmm8,%xmm4 408 movq 0+0+0(%rbp),%rax 409 movq %rax,%r15 410 mulq %r10 411 movq %rax,%r13 412 movq %rdx,%r14 413 movq 0+0+0(%rbp),%rax 414 mulq %r11 415 imulq %r12,%r15 416 addq %rax,%r14 417 adcq %rdx,%r15 418 movdqa L$rol8(%rip),%xmm8 419 paddd %xmm7,%xmm3 420 paddd %xmm6,%xmm2 421 paddd %xmm5,%xmm1 422 paddd %xmm4,%xmm0 423 pxor %xmm3,%xmm15 424 pxor %xmm2,%xmm14 425 pxor %xmm1,%xmm13 426 pxor %xmm0,%xmm12 427.byte 102,69,15,56,0,248 428.byte 102,69,15,56,0,240 429.byte 102,69,15,56,0,232 430.byte 102,69,15,56,0,224 431 movdqa 0+80(%rbp),%xmm8 432 paddd %xmm15,%xmm11 433 paddd %xmm14,%xmm10 434 paddd %xmm13,%xmm9 435 paddd %xmm12,%xmm8 436 pxor %xmm11,%xmm7 437 pxor %xmm10,%xmm6 438 movq 8+0+0(%rbp),%rax 439 movq %rax,%r9 440 mulq %r10 441 addq %rax,%r14 442 adcq $0,%rdx 443 movq %rdx,%r10 444 movq 8+0+0(%rbp),%rax 445 mulq %r11 446 addq %rax,%r15 447 adcq $0,%rdx 448 pxor %xmm9,%xmm5 449 pxor %xmm8,%xmm4 450 movdqa %xmm8,0+80(%rbp) 451 movdqa %xmm7,%xmm8 452 psrld $25,%xmm8 453 pslld $32-25,%xmm7 454 pxor %xmm8,%xmm7 455 movdqa %xmm6,%xmm8 456 psrld $25,%xmm8 457 pslld $32-25,%xmm6 458 pxor %xmm8,%xmm6 459 movdqa %xmm5,%xmm8 460 psrld $25,%xmm8 461 pslld $32-25,%xmm5 462 pxor %xmm8,%xmm5 463 movdqa %xmm4,%xmm8 464 psrld $25,%xmm8 465 pslld $32-25,%xmm4 466 pxor %xmm8,%xmm4 467 movdqa 0+80(%rbp),%xmm8 468 imulq %r12,%r9 469 addq %r10,%r15 470 adcq %rdx,%r9 471.byte 102,15,58,15,255,4 472.byte 102,69,15,58,15,219,8 473.byte 102,69,15,58,15,255,12 474.byte 102,15,58,15,246,4 475.byte 102,69,15,58,15,210,8 476.byte 102,69,15,58,15,246,12 477.byte 102,15,58,15,237,4 478.byte 102,69,15,58,15,201,8 479.byte 102,69,15,58,15,237,12 480.byte 102,15,58,15,228,4 481.byte 102,69,15,58,15,192,8 482.byte 102,69,15,58,15,228,12 483 movdqa %xmm8,0+80(%rbp) 484 movdqa L$rol16(%rip),%xmm8 485 paddd %xmm7,%xmm3 486 paddd %xmm6,%xmm2 487 paddd %xmm5,%xmm1 488 paddd %xmm4,%xmm0 489 pxor %xmm3,%xmm15 490 pxor %xmm2,%xmm14 491 movq %r13,%r10 492 movq %r14,%r11 493 movq %r15,%r12 494 andq $3,%r12 495 movq %r15,%r13 496 andq $-4,%r13 497 movq %r9,%r14 498 shrdq $2,%r9,%r15 499 shrq $2,%r9 500 addq %r13,%r15 501 adcq %r14,%r9 502 addq %r15,%r10 503 adcq %r9,%r11 504 adcq $0,%r12 505 pxor %xmm1,%xmm13 506 pxor %xmm0,%xmm12 507.byte 102,69,15,56,0,248 508.byte 102,69,15,56,0,240 509.byte 102,69,15,56,0,232 510.byte 102,69,15,56,0,224 511 movdqa 0+80(%rbp),%xmm8 512 paddd %xmm15,%xmm11 513 paddd %xmm14,%xmm10 514 paddd %xmm13,%xmm9 515 paddd %xmm12,%xmm8 516 pxor %xmm11,%xmm7 517 pxor %xmm10,%xmm6 518 pxor %xmm9,%xmm5 519 pxor %xmm8,%xmm4 520 movdqa %xmm8,0+80(%rbp) 521 movdqa %xmm7,%xmm8 522 psrld $20,%xmm8 523 pslld $32-20,%xmm7 524 pxor %xmm8,%xmm7 525 movdqa %xmm6,%xmm8 526 psrld $20,%xmm8 527 pslld $32-20,%xmm6 528 pxor %xmm8,%xmm6 529 movdqa %xmm5,%xmm8 530 psrld $20,%xmm8 531 pslld $32-20,%xmm5 532 pxor %xmm8,%xmm5 533 movdqa %xmm4,%xmm8 534 psrld $20,%xmm8 535 pslld $32-20,%xmm4 536 pxor %xmm8,%xmm4 537 movdqa L$rol8(%rip),%xmm8 538 paddd %xmm7,%xmm3 539 paddd %xmm6,%xmm2 540 paddd %xmm5,%xmm1 541 paddd %xmm4,%xmm0 542 pxor %xmm3,%xmm15 543 pxor %xmm2,%xmm14 544 pxor %xmm1,%xmm13 545 pxor %xmm0,%xmm12 546.byte 102,69,15,56,0,248 547.byte 102,69,15,56,0,240 548.byte 102,69,15,56,0,232 549.byte 102,69,15,56,0,224 550 movdqa 0+80(%rbp),%xmm8 551 paddd %xmm15,%xmm11 552 paddd %xmm14,%xmm10 553 paddd %xmm13,%xmm9 554 paddd %xmm12,%xmm8 555 pxor %xmm11,%xmm7 556 pxor %xmm10,%xmm6 557 pxor %xmm9,%xmm5 558 pxor %xmm8,%xmm4 559 movdqa %xmm8,0+80(%rbp) 560 movdqa %xmm7,%xmm8 561 psrld $25,%xmm8 562 pslld $32-25,%xmm7 563 pxor %xmm8,%xmm7 564 movdqa %xmm6,%xmm8 565 psrld $25,%xmm8 566 pslld $32-25,%xmm6 567 pxor %xmm8,%xmm6 568 movdqa %xmm5,%xmm8 569 psrld $25,%xmm8 570 pslld $32-25,%xmm5 571 pxor %xmm8,%xmm5 572 movdqa %xmm4,%xmm8 573 psrld $25,%xmm8 574 pslld $32-25,%xmm4 575 pxor %xmm8,%xmm4 576 movdqa 0+80(%rbp),%xmm8 577.byte 102,15,58,15,255,12 578.byte 102,69,15,58,15,219,8 579.byte 102,69,15,58,15,255,4 580.byte 102,15,58,15,246,12 581.byte 102,69,15,58,15,210,8 582.byte 102,69,15,58,15,246,4 583.byte 102,15,58,15,237,12 584.byte 102,69,15,58,15,201,8 585.byte 102,69,15,58,15,237,4 586.byte 102,15,58,15,228,12 587.byte 102,69,15,58,15,192,8 588.byte 102,69,15,58,15,228,4 589 590 decq %rcx 591 jge L$open_sse_main_loop_rounds 592 addq 0+0(%r8),%r10 593 adcq 8+0(%r8),%r11 594 adcq $1,%r12 595 movq 0+0+0(%rbp),%rax 596 movq %rax,%r15 597 mulq %r10 598 movq %rax,%r13 599 movq %rdx,%r14 600 movq 0+0+0(%rbp),%rax 601 mulq %r11 602 imulq %r12,%r15 603 addq %rax,%r14 604 adcq %rdx,%r15 605 movq 8+0+0(%rbp),%rax 606 movq %rax,%r9 607 mulq %r10 608 addq %rax,%r14 609 adcq $0,%rdx 610 movq %rdx,%r10 611 movq 8+0+0(%rbp),%rax 612 mulq %r11 613 addq %rax,%r15 614 adcq $0,%rdx 615 imulq %r12,%r9 616 addq %r10,%r15 617 adcq %rdx,%r9 618 movq %r13,%r10 619 movq %r14,%r11 620 movq %r15,%r12 621 andq $3,%r12 622 movq %r15,%r13 623 andq $-4,%r13 624 movq %r9,%r14 625 shrdq $2,%r9,%r15 626 shrq $2,%r9 627 addq %r13,%r15 628 adcq %r14,%r9 629 addq %r15,%r10 630 adcq %r9,%r11 631 adcq $0,%r12 632 633 leaq 16(%r8),%r8 634 cmpq $-6,%rcx 635 jg L$open_sse_main_loop_rounds 636 paddd L$chacha20_consts(%rip),%xmm3 637 paddd 0+48(%rbp),%xmm7 638 paddd 0+64(%rbp),%xmm11 639 paddd 0+144(%rbp),%xmm15 640 paddd L$chacha20_consts(%rip),%xmm2 641 paddd 0+48(%rbp),%xmm6 642 paddd 0+64(%rbp),%xmm10 643 paddd 0+128(%rbp),%xmm14 644 paddd L$chacha20_consts(%rip),%xmm1 645 paddd 0+48(%rbp),%xmm5 646 paddd 0+64(%rbp),%xmm9 647 paddd 0+112(%rbp),%xmm13 648 paddd L$chacha20_consts(%rip),%xmm0 649 paddd 0+48(%rbp),%xmm4 650 paddd 0+64(%rbp),%xmm8 651 paddd 0+96(%rbp),%xmm12 652 movdqa %xmm12,0+80(%rbp) 653 movdqu 0 + 0(%rsi),%xmm12 654 pxor %xmm3,%xmm12 655 movdqu %xmm12,0 + 0(%rdi) 656 movdqu 16 + 0(%rsi),%xmm12 657 pxor %xmm7,%xmm12 658 movdqu %xmm12,16 + 0(%rdi) 659 movdqu 32 + 0(%rsi),%xmm12 660 pxor %xmm11,%xmm12 661 movdqu %xmm12,32 + 0(%rdi) 662 movdqu 48 + 0(%rsi),%xmm12 663 pxor %xmm15,%xmm12 664 movdqu %xmm12,48 + 0(%rdi) 665 movdqu 0 + 64(%rsi),%xmm3 666 movdqu 16 + 64(%rsi),%xmm7 667 movdqu 32 + 64(%rsi),%xmm11 668 movdqu 48 + 64(%rsi),%xmm15 669 pxor %xmm3,%xmm2 670 pxor %xmm7,%xmm6 671 pxor %xmm11,%xmm10 672 pxor %xmm14,%xmm15 673 movdqu %xmm2,0 + 64(%rdi) 674 movdqu %xmm6,16 + 64(%rdi) 675 movdqu %xmm10,32 + 64(%rdi) 676 movdqu %xmm15,48 + 64(%rdi) 677 movdqu 0 + 128(%rsi),%xmm3 678 movdqu 16 + 128(%rsi),%xmm7 679 movdqu 32 + 128(%rsi),%xmm11 680 movdqu 48 + 128(%rsi),%xmm15 681 pxor %xmm3,%xmm1 682 pxor %xmm7,%xmm5 683 pxor %xmm11,%xmm9 684 pxor %xmm13,%xmm15 685 movdqu %xmm1,0 + 128(%rdi) 686 movdqu %xmm5,16 + 128(%rdi) 687 movdqu %xmm9,32 + 128(%rdi) 688 movdqu %xmm15,48 + 128(%rdi) 689 movdqu 0 + 192(%rsi),%xmm3 690 movdqu 16 + 192(%rsi),%xmm7 691 movdqu 32 + 192(%rsi),%xmm11 692 movdqu 48 + 192(%rsi),%xmm15 693 pxor %xmm3,%xmm0 694 pxor %xmm7,%xmm4 695 pxor %xmm11,%xmm8 696 pxor 0+80(%rbp),%xmm15 697 movdqu %xmm0,0 + 192(%rdi) 698 movdqu %xmm4,16 + 192(%rdi) 699 movdqu %xmm8,32 + 192(%rdi) 700 movdqu %xmm15,48 + 192(%rdi) 701 702 leaq 256(%rsi),%rsi 703 leaq 256(%rdi),%rdi 704 subq $256,%rbx 705 jmp L$open_sse_main_loop 706L$open_sse_tail: 707 708 testq %rbx,%rbx 709 jz L$open_sse_finalize 710 cmpq $192,%rbx 711 ja L$open_sse_tail_256 712 cmpq $128,%rbx 713 ja L$open_sse_tail_192 714 cmpq $64,%rbx 715 ja L$open_sse_tail_128 716 movdqa L$chacha20_consts(%rip),%xmm0 717 movdqa 0+48(%rbp),%xmm4 718 movdqa 0+64(%rbp),%xmm8 719 movdqa 0+96(%rbp),%xmm12 720 paddd L$sse_inc(%rip),%xmm12 721 movdqa %xmm12,0+96(%rbp) 722 723 xorq %r8,%r8 724 movq %rbx,%rcx 725 cmpq $16,%rcx 726 jb L$open_sse_tail_64_rounds 727L$open_sse_tail_64_rounds_and_x1hash: 728 addq 0+0(%rsi,%r8,1),%r10 729 adcq 8+0(%rsi,%r8,1),%r11 730 adcq $1,%r12 731 movq 0+0+0(%rbp),%rax 732 movq %rax,%r15 733 mulq %r10 734 movq %rax,%r13 735 movq %rdx,%r14 736 movq 0+0+0(%rbp),%rax 737 mulq %r11 738 imulq %r12,%r15 739 addq %rax,%r14 740 adcq %rdx,%r15 741 movq 8+0+0(%rbp),%rax 742 movq %rax,%r9 743 mulq %r10 744 addq %rax,%r14 745 adcq $0,%rdx 746 movq %rdx,%r10 747 movq 8+0+0(%rbp),%rax 748 mulq %r11 749 addq %rax,%r15 750 adcq $0,%rdx 751 imulq %r12,%r9 752 addq %r10,%r15 753 adcq %rdx,%r9 754 movq %r13,%r10 755 movq %r14,%r11 756 movq %r15,%r12 757 andq $3,%r12 758 movq %r15,%r13 759 andq $-4,%r13 760 movq %r9,%r14 761 shrdq $2,%r9,%r15 762 shrq $2,%r9 763 addq %r13,%r15 764 adcq %r14,%r9 765 addq %r15,%r10 766 adcq %r9,%r11 767 adcq $0,%r12 768 769 subq $16,%rcx 770L$open_sse_tail_64_rounds: 771 addq $16,%r8 772 paddd %xmm4,%xmm0 773 pxor %xmm0,%xmm12 774 pshufb L$rol16(%rip),%xmm12 775 paddd %xmm12,%xmm8 776 pxor %xmm8,%xmm4 777 movdqa %xmm4,%xmm3 778 pslld $12,%xmm3 779 psrld $20,%xmm4 780 pxor %xmm3,%xmm4 781 paddd %xmm4,%xmm0 782 pxor %xmm0,%xmm12 783 pshufb L$rol8(%rip),%xmm12 784 paddd %xmm12,%xmm8 785 pxor %xmm8,%xmm4 786 movdqa %xmm4,%xmm3 787 pslld $7,%xmm3 788 psrld $25,%xmm4 789 pxor %xmm3,%xmm4 790.byte 102,15,58,15,228,4 791.byte 102,69,15,58,15,192,8 792.byte 102,69,15,58,15,228,12 793 paddd %xmm4,%xmm0 794 pxor %xmm0,%xmm12 795 pshufb L$rol16(%rip),%xmm12 796 paddd %xmm12,%xmm8 797 pxor %xmm8,%xmm4 798 movdqa %xmm4,%xmm3 799 pslld $12,%xmm3 800 psrld $20,%xmm4 801 pxor %xmm3,%xmm4 802 paddd %xmm4,%xmm0 803 pxor %xmm0,%xmm12 804 pshufb L$rol8(%rip),%xmm12 805 paddd %xmm12,%xmm8 806 pxor %xmm8,%xmm4 807 movdqa %xmm4,%xmm3 808 pslld $7,%xmm3 809 psrld $25,%xmm4 810 pxor %xmm3,%xmm4 811.byte 102,15,58,15,228,12 812.byte 102,69,15,58,15,192,8 813.byte 102,69,15,58,15,228,4 814 815 cmpq $16,%rcx 816 jae L$open_sse_tail_64_rounds_and_x1hash 817 cmpq $160,%r8 818 jne L$open_sse_tail_64_rounds 819 paddd L$chacha20_consts(%rip),%xmm0 820 paddd 0+48(%rbp),%xmm4 821 paddd 0+64(%rbp),%xmm8 822 paddd 0+96(%rbp),%xmm12 823 824 jmp L$open_sse_tail_64_dec_loop 825 826L$open_sse_tail_128: 827 movdqa L$chacha20_consts(%rip),%xmm0 828 movdqa 0+48(%rbp),%xmm4 829 movdqa 0+64(%rbp),%xmm8 830 movdqa %xmm0,%xmm1 831 movdqa %xmm4,%xmm5 832 movdqa %xmm8,%xmm9 833 movdqa 0+96(%rbp),%xmm13 834 paddd L$sse_inc(%rip),%xmm13 835 movdqa %xmm13,%xmm12 836 paddd L$sse_inc(%rip),%xmm12 837 movdqa %xmm12,0+96(%rbp) 838 movdqa %xmm13,0+112(%rbp) 839 840 movq %rbx,%rcx 841 andq $-16,%rcx 842 xorq %r8,%r8 843L$open_sse_tail_128_rounds_and_x1hash: 844 addq 0+0(%rsi,%r8,1),%r10 845 adcq 8+0(%rsi,%r8,1),%r11 846 adcq $1,%r12 847 movq 0+0+0(%rbp),%rax 848 movq %rax,%r15 849 mulq %r10 850 movq %rax,%r13 851 movq %rdx,%r14 852 movq 0+0+0(%rbp),%rax 853 mulq %r11 854 imulq %r12,%r15 855 addq %rax,%r14 856 adcq %rdx,%r15 857 movq 8+0+0(%rbp),%rax 858 movq %rax,%r9 859 mulq %r10 860 addq %rax,%r14 861 adcq $0,%rdx 862 movq %rdx,%r10 863 movq 8+0+0(%rbp),%rax 864 mulq %r11 865 addq %rax,%r15 866 adcq $0,%rdx 867 imulq %r12,%r9 868 addq %r10,%r15 869 adcq %rdx,%r9 870 movq %r13,%r10 871 movq %r14,%r11 872 movq %r15,%r12 873 andq $3,%r12 874 movq %r15,%r13 875 andq $-4,%r13 876 movq %r9,%r14 877 shrdq $2,%r9,%r15 878 shrq $2,%r9 879 addq %r13,%r15 880 adcq %r14,%r9 881 addq %r15,%r10 882 adcq %r9,%r11 883 adcq $0,%r12 884 885L$open_sse_tail_128_rounds: 886 addq $16,%r8 887 paddd %xmm4,%xmm0 888 pxor %xmm0,%xmm12 889 pshufb L$rol16(%rip),%xmm12 890 paddd %xmm12,%xmm8 891 pxor %xmm8,%xmm4 892 movdqa %xmm4,%xmm3 893 pslld $12,%xmm3 894 psrld $20,%xmm4 895 pxor %xmm3,%xmm4 896 paddd %xmm4,%xmm0 897 pxor %xmm0,%xmm12 898 pshufb L$rol8(%rip),%xmm12 899 paddd %xmm12,%xmm8 900 pxor %xmm8,%xmm4 901 movdqa %xmm4,%xmm3 902 pslld $7,%xmm3 903 psrld $25,%xmm4 904 pxor %xmm3,%xmm4 905.byte 102,15,58,15,228,4 906.byte 102,69,15,58,15,192,8 907.byte 102,69,15,58,15,228,12 908 paddd %xmm5,%xmm1 909 pxor %xmm1,%xmm13 910 pshufb L$rol16(%rip),%xmm13 911 paddd %xmm13,%xmm9 912 pxor %xmm9,%xmm5 913 movdqa %xmm5,%xmm3 914 pslld $12,%xmm3 915 psrld $20,%xmm5 916 pxor %xmm3,%xmm5 917 paddd %xmm5,%xmm1 918 pxor %xmm1,%xmm13 919 pshufb L$rol8(%rip),%xmm13 920 paddd %xmm13,%xmm9 921 pxor %xmm9,%xmm5 922 movdqa %xmm5,%xmm3 923 pslld $7,%xmm3 924 psrld $25,%xmm5 925 pxor %xmm3,%xmm5 926.byte 102,15,58,15,237,4 927.byte 102,69,15,58,15,201,8 928.byte 102,69,15,58,15,237,12 929 paddd %xmm4,%xmm0 930 pxor %xmm0,%xmm12 931 pshufb L$rol16(%rip),%xmm12 932 paddd %xmm12,%xmm8 933 pxor %xmm8,%xmm4 934 movdqa %xmm4,%xmm3 935 pslld $12,%xmm3 936 psrld $20,%xmm4 937 pxor %xmm3,%xmm4 938 paddd %xmm4,%xmm0 939 pxor %xmm0,%xmm12 940 pshufb L$rol8(%rip),%xmm12 941 paddd %xmm12,%xmm8 942 pxor %xmm8,%xmm4 943 movdqa %xmm4,%xmm3 944 pslld $7,%xmm3 945 psrld $25,%xmm4 946 pxor %xmm3,%xmm4 947.byte 102,15,58,15,228,12 948.byte 102,69,15,58,15,192,8 949.byte 102,69,15,58,15,228,4 950 paddd %xmm5,%xmm1 951 pxor %xmm1,%xmm13 952 pshufb L$rol16(%rip),%xmm13 953 paddd %xmm13,%xmm9 954 pxor %xmm9,%xmm5 955 movdqa %xmm5,%xmm3 956 pslld $12,%xmm3 957 psrld $20,%xmm5 958 pxor %xmm3,%xmm5 959 paddd %xmm5,%xmm1 960 pxor %xmm1,%xmm13 961 pshufb L$rol8(%rip),%xmm13 962 paddd %xmm13,%xmm9 963 pxor %xmm9,%xmm5 964 movdqa %xmm5,%xmm3 965 pslld $7,%xmm3 966 psrld $25,%xmm5 967 pxor %xmm3,%xmm5 968.byte 102,15,58,15,237,12 969.byte 102,69,15,58,15,201,8 970.byte 102,69,15,58,15,237,4 971 972 cmpq %rcx,%r8 973 jb L$open_sse_tail_128_rounds_and_x1hash 974 cmpq $160,%r8 975 jne L$open_sse_tail_128_rounds 976 paddd L$chacha20_consts(%rip),%xmm1 977 paddd 0+48(%rbp),%xmm5 978 paddd 0+64(%rbp),%xmm9 979 paddd 0+112(%rbp),%xmm13 980 paddd L$chacha20_consts(%rip),%xmm0 981 paddd 0+48(%rbp),%xmm4 982 paddd 0+64(%rbp),%xmm8 983 paddd 0+96(%rbp),%xmm12 984 movdqu 0 + 0(%rsi),%xmm3 985 movdqu 16 + 0(%rsi),%xmm7 986 movdqu 32 + 0(%rsi),%xmm11 987 movdqu 48 + 0(%rsi),%xmm15 988 pxor %xmm3,%xmm1 989 pxor %xmm7,%xmm5 990 pxor %xmm11,%xmm9 991 pxor %xmm13,%xmm15 992 movdqu %xmm1,0 + 0(%rdi) 993 movdqu %xmm5,16 + 0(%rdi) 994 movdqu %xmm9,32 + 0(%rdi) 995 movdqu %xmm15,48 + 0(%rdi) 996 997 subq $64,%rbx 998 leaq 64(%rsi),%rsi 999 leaq 64(%rdi),%rdi 1000 jmp L$open_sse_tail_64_dec_loop 1001 1002L$open_sse_tail_192: 1003 movdqa L$chacha20_consts(%rip),%xmm0 1004 movdqa 0+48(%rbp),%xmm4 1005 movdqa 0+64(%rbp),%xmm8 1006 movdqa %xmm0,%xmm1 1007 movdqa %xmm4,%xmm5 1008 movdqa %xmm8,%xmm9 1009 movdqa %xmm0,%xmm2 1010 movdqa %xmm4,%xmm6 1011 movdqa %xmm8,%xmm10 1012 movdqa 0+96(%rbp),%xmm14 1013 paddd L$sse_inc(%rip),%xmm14 1014 movdqa %xmm14,%xmm13 1015 paddd L$sse_inc(%rip),%xmm13 1016 movdqa %xmm13,%xmm12 1017 paddd L$sse_inc(%rip),%xmm12 1018 movdqa %xmm12,0+96(%rbp) 1019 movdqa %xmm13,0+112(%rbp) 1020 movdqa %xmm14,0+128(%rbp) 1021 1022 movq %rbx,%rcx 1023 movq $160,%r8 1024 cmpq $160,%rcx 1025 cmovgq %r8,%rcx 1026 andq $-16,%rcx 1027 xorq %r8,%r8 1028L$open_sse_tail_192_rounds_and_x1hash: 1029 addq 0+0(%rsi,%r8,1),%r10 1030 adcq 8+0(%rsi,%r8,1),%r11 1031 adcq $1,%r12 1032 movq 0+0+0(%rbp),%rax 1033 movq %rax,%r15 1034 mulq %r10 1035 movq %rax,%r13 1036 movq %rdx,%r14 1037 movq 0+0+0(%rbp),%rax 1038 mulq %r11 1039 imulq %r12,%r15 1040 addq %rax,%r14 1041 adcq %rdx,%r15 1042 movq 8+0+0(%rbp),%rax 1043 movq %rax,%r9 1044 mulq %r10 1045 addq %rax,%r14 1046 adcq $0,%rdx 1047 movq %rdx,%r10 1048 movq 8+0+0(%rbp),%rax 1049 mulq %r11 1050 addq %rax,%r15 1051 adcq $0,%rdx 1052 imulq %r12,%r9 1053 addq %r10,%r15 1054 adcq %rdx,%r9 1055 movq %r13,%r10 1056 movq %r14,%r11 1057 movq %r15,%r12 1058 andq $3,%r12 1059 movq %r15,%r13 1060 andq $-4,%r13 1061 movq %r9,%r14 1062 shrdq $2,%r9,%r15 1063 shrq $2,%r9 1064 addq %r13,%r15 1065 adcq %r14,%r9 1066 addq %r15,%r10 1067 adcq %r9,%r11 1068 adcq $0,%r12 1069 1070L$open_sse_tail_192_rounds: 1071 addq $16,%r8 1072 paddd %xmm4,%xmm0 1073 pxor %xmm0,%xmm12 1074 pshufb L$rol16(%rip),%xmm12 1075 paddd %xmm12,%xmm8 1076 pxor %xmm8,%xmm4 1077 movdqa %xmm4,%xmm3 1078 pslld $12,%xmm3 1079 psrld $20,%xmm4 1080 pxor %xmm3,%xmm4 1081 paddd %xmm4,%xmm0 1082 pxor %xmm0,%xmm12 1083 pshufb L$rol8(%rip),%xmm12 1084 paddd %xmm12,%xmm8 1085 pxor %xmm8,%xmm4 1086 movdqa %xmm4,%xmm3 1087 pslld $7,%xmm3 1088 psrld $25,%xmm4 1089 pxor %xmm3,%xmm4 1090.byte 102,15,58,15,228,4 1091.byte 102,69,15,58,15,192,8 1092.byte 102,69,15,58,15,228,12 1093 paddd %xmm5,%xmm1 1094 pxor %xmm1,%xmm13 1095 pshufb L$rol16(%rip),%xmm13 1096 paddd %xmm13,%xmm9 1097 pxor %xmm9,%xmm5 1098 movdqa %xmm5,%xmm3 1099 pslld $12,%xmm3 1100 psrld $20,%xmm5 1101 pxor %xmm3,%xmm5 1102 paddd %xmm5,%xmm1 1103 pxor %xmm1,%xmm13 1104 pshufb L$rol8(%rip),%xmm13 1105 paddd %xmm13,%xmm9 1106 pxor %xmm9,%xmm5 1107 movdqa %xmm5,%xmm3 1108 pslld $7,%xmm3 1109 psrld $25,%xmm5 1110 pxor %xmm3,%xmm5 1111.byte 102,15,58,15,237,4 1112.byte 102,69,15,58,15,201,8 1113.byte 102,69,15,58,15,237,12 1114 paddd %xmm6,%xmm2 1115 pxor %xmm2,%xmm14 1116 pshufb L$rol16(%rip),%xmm14 1117 paddd %xmm14,%xmm10 1118 pxor %xmm10,%xmm6 1119 movdqa %xmm6,%xmm3 1120 pslld $12,%xmm3 1121 psrld $20,%xmm6 1122 pxor %xmm3,%xmm6 1123 paddd %xmm6,%xmm2 1124 pxor %xmm2,%xmm14 1125 pshufb L$rol8(%rip),%xmm14 1126 paddd %xmm14,%xmm10 1127 pxor %xmm10,%xmm6 1128 movdqa %xmm6,%xmm3 1129 pslld $7,%xmm3 1130 psrld $25,%xmm6 1131 pxor %xmm3,%xmm6 1132.byte 102,15,58,15,246,4 1133.byte 102,69,15,58,15,210,8 1134.byte 102,69,15,58,15,246,12 1135 paddd %xmm4,%xmm0 1136 pxor %xmm0,%xmm12 1137 pshufb L$rol16(%rip),%xmm12 1138 paddd %xmm12,%xmm8 1139 pxor %xmm8,%xmm4 1140 movdqa %xmm4,%xmm3 1141 pslld $12,%xmm3 1142 psrld $20,%xmm4 1143 pxor %xmm3,%xmm4 1144 paddd %xmm4,%xmm0 1145 pxor %xmm0,%xmm12 1146 pshufb L$rol8(%rip),%xmm12 1147 paddd %xmm12,%xmm8 1148 pxor %xmm8,%xmm4 1149 movdqa %xmm4,%xmm3 1150 pslld $7,%xmm3 1151 psrld $25,%xmm4 1152 pxor %xmm3,%xmm4 1153.byte 102,15,58,15,228,12 1154.byte 102,69,15,58,15,192,8 1155.byte 102,69,15,58,15,228,4 1156 paddd %xmm5,%xmm1 1157 pxor %xmm1,%xmm13 1158 pshufb L$rol16(%rip),%xmm13 1159 paddd %xmm13,%xmm9 1160 pxor %xmm9,%xmm5 1161 movdqa %xmm5,%xmm3 1162 pslld $12,%xmm3 1163 psrld $20,%xmm5 1164 pxor %xmm3,%xmm5 1165 paddd %xmm5,%xmm1 1166 pxor %xmm1,%xmm13 1167 pshufb L$rol8(%rip),%xmm13 1168 paddd %xmm13,%xmm9 1169 pxor %xmm9,%xmm5 1170 movdqa %xmm5,%xmm3 1171 pslld $7,%xmm3 1172 psrld $25,%xmm5 1173 pxor %xmm3,%xmm5 1174.byte 102,15,58,15,237,12 1175.byte 102,69,15,58,15,201,8 1176.byte 102,69,15,58,15,237,4 1177 paddd %xmm6,%xmm2 1178 pxor %xmm2,%xmm14 1179 pshufb L$rol16(%rip),%xmm14 1180 paddd %xmm14,%xmm10 1181 pxor %xmm10,%xmm6 1182 movdqa %xmm6,%xmm3 1183 pslld $12,%xmm3 1184 psrld $20,%xmm6 1185 pxor %xmm3,%xmm6 1186 paddd %xmm6,%xmm2 1187 pxor %xmm2,%xmm14 1188 pshufb L$rol8(%rip),%xmm14 1189 paddd %xmm14,%xmm10 1190 pxor %xmm10,%xmm6 1191 movdqa %xmm6,%xmm3 1192 pslld $7,%xmm3 1193 psrld $25,%xmm6 1194 pxor %xmm3,%xmm6 1195.byte 102,15,58,15,246,12 1196.byte 102,69,15,58,15,210,8 1197.byte 102,69,15,58,15,246,4 1198 1199 cmpq %rcx,%r8 1200 jb L$open_sse_tail_192_rounds_and_x1hash 1201 cmpq $160,%r8 1202 jne L$open_sse_tail_192_rounds 1203 cmpq $176,%rbx 1204 jb L$open_sse_tail_192_finish 1205 addq 0+160(%rsi),%r10 1206 adcq 8+160(%rsi),%r11 1207 adcq $1,%r12 1208 movq 0+0+0(%rbp),%rax 1209 movq %rax,%r15 1210 mulq %r10 1211 movq %rax,%r13 1212 movq %rdx,%r14 1213 movq 0+0+0(%rbp),%rax 1214 mulq %r11 1215 imulq %r12,%r15 1216 addq %rax,%r14 1217 adcq %rdx,%r15 1218 movq 8+0+0(%rbp),%rax 1219 movq %rax,%r9 1220 mulq %r10 1221 addq %rax,%r14 1222 adcq $0,%rdx 1223 movq %rdx,%r10 1224 movq 8+0+0(%rbp),%rax 1225 mulq %r11 1226 addq %rax,%r15 1227 adcq $0,%rdx 1228 imulq %r12,%r9 1229 addq %r10,%r15 1230 adcq %rdx,%r9 1231 movq %r13,%r10 1232 movq %r14,%r11 1233 movq %r15,%r12 1234 andq $3,%r12 1235 movq %r15,%r13 1236 andq $-4,%r13 1237 movq %r9,%r14 1238 shrdq $2,%r9,%r15 1239 shrq $2,%r9 1240 addq %r13,%r15 1241 adcq %r14,%r9 1242 addq %r15,%r10 1243 adcq %r9,%r11 1244 adcq $0,%r12 1245 1246 cmpq $192,%rbx 1247 jb L$open_sse_tail_192_finish 1248 addq 0+176(%rsi),%r10 1249 adcq 8+176(%rsi),%r11 1250 adcq $1,%r12 1251 movq 0+0+0(%rbp),%rax 1252 movq %rax,%r15 1253 mulq %r10 1254 movq %rax,%r13 1255 movq %rdx,%r14 1256 movq 0+0+0(%rbp),%rax 1257 mulq %r11 1258 imulq %r12,%r15 1259 addq %rax,%r14 1260 adcq %rdx,%r15 1261 movq 8+0+0(%rbp),%rax 1262 movq %rax,%r9 1263 mulq %r10 1264 addq %rax,%r14 1265 adcq $0,%rdx 1266 movq %rdx,%r10 1267 movq 8+0+0(%rbp),%rax 1268 mulq %r11 1269 addq %rax,%r15 1270 adcq $0,%rdx 1271 imulq %r12,%r9 1272 addq %r10,%r15 1273 adcq %rdx,%r9 1274 movq %r13,%r10 1275 movq %r14,%r11 1276 movq %r15,%r12 1277 andq $3,%r12 1278 movq %r15,%r13 1279 andq $-4,%r13 1280 movq %r9,%r14 1281 shrdq $2,%r9,%r15 1282 shrq $2,%r9 1283 addq %r13,%r15 1284 adcq %r14,%r9 1285 addq %r15,%r10 1286 adcq %r9,%r11 1287 adcq $0,%r12 1288 1289L$open_sse_tail_192_finish: 1290 paddd L$chacha20_consts(%rip),%xmm2 1291 paddd 0+48(%rbp),%xmm6 1292 paddd 0+64(%rbp),%xmm10 1293 paddd 0+128(%rbp),%xmm14 1294 paddd L$chacha20_consts(%rip),%xmm1 1295 paddd 0+48(%rbp),%xmm5 1296 paddd 0+64(%rbp),%xmm9 1297 paddd 0+112(%rbp),%xmm13 1298 paddd L$chacha20_consts(%rip),%xmm0 1299 paddd 0+48(%rbp),%xmm4 1300 paddd 0+64(%rbp),%xmm8 1301 paddd 0+96(%rbp),%xmm12 1302 movdqu 0 + 0(%rsi),%xmm3 1303 movdqu 16 + 0(%rsi),%xmm7 1304 movdqu 32 + 0(%rsi),%xmm11 1305 movdqu 48 + 0(%rsi),%xmm15 1306 pxor %xmm3,%xmm2 1307 pxor %xmm7,%xmm6 1308 pxor %xmm11,%xmm10 1309 pxor %xmm14,%xmm15 1310 movdqu %xmm2,0 + 0(%rdi) 1311 movdqu %xmm6,16 + 0(%rdi) 1312 movdqu %xmm10,32 + 0(%rdi) 1313 movdqu %xmm15,48 + 0(%rdi) 1314 movdqu 0 + 64(%rsi),%xmm3 1315 movdqu 16 + 64(%rsi),%xmm7 1316 movdqu 32 + 64(%rsi),%xmm11 1317 movdqu 48 + 64(%rsi),%xmm15 1318 pxor %xmm3,%xmm1 1319 pxor %xmm7,%xmm5 1320 pxor %xmm11,%xmm9 1321 pxor %xmm13,%xmm15 1322 movdqu %xmm1,0 + 64(%rdi) 1323 movdqu %xmm5,16 + 64(%rdi) 1324 movdqu %xmm9,32 + 64(%rdi) 1325 movdqu %xmm15,48 + 64(%rdi) 1326 1327 subq $128,%rbx 1328 leaq 128(%rsi),%rsi 1329 leaq 128(%rdi),%rdi 1330 jmp L$open_sse_tail_64_dec_loop 1331 1332L$open_sse_tail_256: 1333 movdqa L$chacha20_consts(%rip),%xmm0 1334 movdqa 0+48(%rbp),%xmm4 1335 movdqa 0+64(%rbp),%xmm8 1336 movdqa %xmm0,%xmm1 1337 movdqa %xmm4,%xmm5 1338 movdqa %xmm8,%xmm9 1339 movdqa %xmm0,%xmm2 1340 movdqa %xmm4,%xmm6 1341 movdqa %xmm8,%xmm10 1342 movdqa %xmm0,%xmm3 1343 movdqa %xmm4,%xmm7 1344 movdqa %xmm8,%xmm11 1345 movdqa 0+96(%rbp),%xmm15 1346 paddd L$sse_inc(%rip),%xmm15 1347 movdqa %xmm15,%xmm14 1348 paddd L$sse_inc(%rip),%xmm14 1349 movdqa %xmm14,%xmm13 1350 paddd L$sse_inc(%rip),%xmm13 1351 movdqa %xmm13,%xmm12 1352 paddd L$sse_inc(%rip),%xmm12 1353 movdqa %xmm12,0+96(%rbp) 1354 movdqa %xmm13,0+112(%rbp) 1355 movdqa %xmm14,0+128(%rbp) 1356 movdqa %xmm15,0+144(%rbp) 1357 1358 xorq %r8,%r8 1359L$open_sse_tail_256_rounds_and_x1hash: 1360 addq 0+0(%rsi,%r8,1),%r10 1361 adcq 8+0(%rsi,%r8,1),%r11 1362 adcq $1,%r12 1363 movdqa %xmm11,0+80(%rbp) 1364 paddd %xmm4,%xmm0 1365 pxor %xmm0,%xmm12 1366 pshufb L$rol16(%rip),%xmm12 1367 paddd %xmm12,%xmm8 1368 pxor %xmm8,%xmm4 1369 movdqa %xmm4,%xmm11 1370 pslld $12,%xmm11 1371 psrld $20,%xmm4 1372 pxor %xmm11,%xmm4 1373 paddd %xmm4,%xmm0 1374 pxor %xmm0,%xmm12 1375 pshufb L$rol8(%rip),%xmm12 1376 paddd %xmm12,%xmm8 1377 pxor %xmm8,%xmm4 1378 movdqa %xmm4,%xmm11 1379 pslld $7,%xmm11 1380 psrld $25,%xmm4 1381 pxor %xmm11,%xmm4 1382.byte 102,15,58,15,228,4 1383.byte 102,69,15,58,15,192,8 1384.byte 102,69,15,58,15,228,12 1385 paddd %xmm5,%xmm1 1386 pxor %xmm1,%xmm13 1387 pshufb L$rol16(%rip),%xmm13 1388 paddd %xmm13,%xmm9 1389 pxor %xmm9,%xmm5 1390 movdqa %xmm5,%xmm11 1391 pslld $12,%xmm11 1392 psrld $20,%xmm5 1393 pxor %xmm11,%xmm5 1394 paddd %xmm5,%xmm1 1395 pxor %xmm1,%xmm13 1396 pshufb L$rol8(%rip),%xmm13 1397 paddd %xmm13,%xmm9 1398 pxor %xmm9,%xmm5 1399 movdqa %xmm5,%xmm11 1400 pslld $7,%xmm11 1401 psrld $25,%xmm5 1402 pxor %xmm11,%xmm5 1403.byte 102,15,58,15,237,4 1404.byte 102,69,15,58,15,201,8 1405.byte 102,69,15,58,15,237,12 1406 paddd %xmm6,%xmm2 1407 pxor %xmm2,%xmm14 1408 pshufb L$rol16(%rip),%xmm14 1409 paddd %xmm14,%xmm10 1410 pxor %xmm10,%xmm6 1411 movdqa %xmm6,%xmm11 1412 pslld $12,%xmm11 1413 psrld $20,%xmm6 1414 pxor %xmm11,%xmm6 1415 paddd %xmm6,%xmm2 1416 pxor %xmm2,%xmm14 1417 pshufb L$rol8(%rip),%xmm14 1418 paddd %xmm14,%xmm10 1419 pxor %xmm10,%xmm6 1420 movdqa %xmm6,%xmm11 1421 pslld $7,%xmm11 1422 psrld $25,%xmm6 1423 pxor %xmm11,%xmm6 1424.byte 102,15,58,15,246,4 1425.byte 102,69,15,58,15,210,8 1426.byte 102,69,15,58,15,246,12 1427 movdqa 0+80(%rbp),%xmm11 1428 movq 0+0+0(%rbp),%rax 1429 movq %rax,%r15 1430 mulq %r10 1431 movq %rax,%r13 1432 movq %rdx,%r14 1433 movq 0+0+0(%rbp),%rax 1434 mulq %r11 1435 imulq %r12,%r15 1436 addq %rax,%r14 1437 adcq %rdx,%r15 1438 movdqa %xmm9,0+80(%rbp) 1439 paddd %xmm7,%xmm3 1440 pxor %xmm3,%xmm15 1441 pshufb L$rol16(%rip),%xmm15 1442 paddd %xmm15,%xmm11 1443 pxor %xmm11,%xmm7 1444 movdqa %xmm7,%xmm9 1445 pslld $12,%xmm9 1446 psrld $20,%xmm7 1447 pxor %xmm9,%xmm7 1448 paddd %xmm7,%xmm3 1449 pxor %xmm3,%xmm15 1450 pshufb L$rol8(%rip),%xmm15 1451 paddd %xmm15,%xmm11 1452 pxor %xmm11,%xmm7 1453 movdqa %xmm7,%xmm9 1454 pslld $7,%xmm9 1455 psrld $25,%xmm7 1456 pxor %xmm9,%xmm7 1457.byte 102,15,58,15,255,4 1458.byte 102,69,15,58,15,219,8 1459.byte 102,69,15,58,15,255,12 1460 movdqa 0+80(%rbp),%xmm9 1461 movq 8+0+0(%rbp),%rax 1462 movq %rax,%r9 1463 mulq %r10 1464 addq %rax,%r14 1465 adcq $0,%rdx 1466 movq %rdx,%r10 1467 movq 8+0+0(%rbp),%rax 1468 mulq %r11 1469 addq %rax,%r15 1470 adcq $0,%rdx 1471 movdqa %xmm11,0+80(%rbp) 1472 paddd %xmm4,%xmm0 1473 pxor %xmm0,%xmm12 1474 pshufb L$rol16(%rip),%xmm12 1475 paddd %xmm12,%xmm8 1476 pxor %xmm8,%xmm4 1477 movdqa %xmm4,%xmm11 1478 pslld $12,%xmm11 1479 psrld $20,%xmm4 1480 pxor %xmm11,%xmm4 1481 paddd %xmm4,%xmm0 1482 pxor %xmm0,%xmm12 1483 pshufb L$rol8(%rip),%xmm12 1484 paddd %xmm12,%xmm8 1485 pxor %xmm8,%xmm4 1486 movdqa %xmm4,%xmm11 1487 pslld $7,%xmm11 1488 psrld $25,%xmm4 1489 pxor %xmm11,%xmm4 1490.byte 102,15,58,15,228,12 1491.byte 102,69,15,58,15,192,8 1492.byte 102,69,15,58,15,228,4 1493 paddd %xmm5,%xmm1 1494 pxor %xmm1,%xmm13 1495 pshufb L$rol16(%rip),%xmm13 1496 paddd %xmm13,%xmm9 1497 pxor %xmm9,%xmm5 1498 movdqa %xmm5,%xmm11 1499 pslld $12,%xmm11 1500 psrld $20,%xmm5 1501 pxor %xmm11,%xmm5 1502 paddd %xmm5,%xmm1 1503 pxor %xmm1,%xmm13 1504 pshufb L$rol8(%rip),%xmm13 1505 paddd %xmm13,%xmm9 1506 pxor %xmm9,%xmm5 1507 movdqa %xmm5,%xmm11 1508 pslld $7,%xmm11 1509 psrld $25,%xmm5 1510 pxor %xmm11,%xmm5 1511.byte 102,15,58,15,237,12 1512.byte 102,69,15,58,15,201,8 1513.byte 102,69,15,58,15,237,4 1514 imulq %r12,%r9 1515 addq %r10,%r15 1516 adcq %rdx,%r9 1517 paddd %xmm6,%xmm2 1518 pxor %xmm2,%xmm14 1519 pshufb L$rol16(%rip),%xmm14 1520 paddd %xmm14,%xmm10 1521 pxor %xmm10,%xmm6 1522 movdqa %xmm6,%xmm11 1523 pslld $12,%xmm11 1524 psrld $20,%xmm6 1525 pxor %xmm11,%xmm6 1526 paddd %xmm6,%xmm2 1527 pxor %xmm2,%xmm14 1528 pshufb L$rol8(%rip),%xmm14 1529 paddd %xmm14,%xmm10 1530 pxor %xmm10,%xmm6 1531 movdqa %xmm6,%xmm11 1532 pslld $7,%xmm11 1533 psrld $25,%xmm6 1534 pxor %xmm11,%xmm6 1535.byte 102,15,58,15,246,12 1536.byte 102,69,15,58,15,210,8 1537.byte 102,69,15,58,15,246,4 1538 movdqa 0+80(%rbp),%xmm11 1539 movq %r13,%r10 1540 movq %r14,%r11 1541 movq %r15,%r12 1542 andq $3,%r12 1543 movq %r15,%r13 1544 andq $-4,%r13 1545 movq %r9,%r14 1546 shrdq $2,%r9,%r15 1547 shrq $2,%r9 1548 addq %r13,%r15 1549 adcq %r14,%r9 1550 addq %r15,%r10 1551 adcq %r9,%r11 1552 adcq $0,%r12 1553 movdqa %xmm9,0+80(%rbp) 1554 paddd %xmm7,%xmm3 1555 pxor %xmm3,%xmm15 1556 pshufb L$rol16(%rip),%xmm15 1557 paddd %xmm15,%xmm11 1558 pxor %xmm11,%xmm7 1559 movdqa %xmm7,%xmm9 1560 pslld $12,%xmm9 1561 psrld $20,%xmm7 1562 pxor %xmm9,%xmm7 1563 paddd %xmm7,%xmm3 1564 pxor %xmm3,%xmm15 1565 pshufb L$rol8(%rip),%xmm15 1566 paddd %xmm15,%xmm11 1567 pxor %xmm11,%xmm7 1568 movdqa %xmm7,%xmm9 1569 pslld $7,%xmm9 1570 psrld $25,%xmm7 1571 pxor %xmm9,%xmm7 1572.byte 102,15,58,15,255,12 1573.byte 102,69,15,58,15,219,8 1574.byte 102,69,15,58,15,255,4 1575 movdqa 0+80(%rbp),%xmm9 1576 1577 addq $16,%r8 1578 cmpq $160,%r8 1579 jb L$open_sse_tail_256_rounds_and_x1hash 1580 1581 movq %rbx,%rcx 1582 andq $-16,%rcx 1583L$open_sse_tail_256_hash: 1584 addq 0+0(%rsi,%r8,1),%r10 1585 adcq 8+0(%rsi,%r8,1),%r11 1586 adcq $1,%r12 1587 movq 0+0+0(%rbp),%rax 1588 movq %rax,%r15 1589 mulq %r10 1590 movq %rax,%r13 1591 movq %rdx,%r14 1592 movq 0+0+0(%rbp),%rax 1593 mulq %r11 1594 imulq %r12,%r15 1595 addq %rax,%r14 1596 adcq %rdx,%r15 1597 movq 8+0+0(%rbp),%rax 1598 movq %rax,%r9 1599 mulq %r10 1600 addq %rax,%r14 1601 adcq $0,%rdx 1602 movq %rdx,%r10 1603 movq 8+0+0(%rbp),%rax 1604 mulq %r11 1605 addq %rax,%r15 1606 adcq $0,%rdx 1607 imulq %r12,%r9 1608 addq %r10,%r15 1609 adcq %rdx,%r9 1610 movq %r13,%r10 1611 movq %r14,%r11 1612 movq %r15,%r12 1613 andq $3,%r12 1614 movq %r15,%r13 1615 andq $-4,%r13 1616 movq %r9,%r14 1617 shrdq $2,%r9,%r15 1618 shrq $2,%r9 1619 addq %r13,%r15 1620 adcq %r14,%r9 1621 addq %r15,%r10 1622 adcq %r9,%r11 1623 adcq $0,%r12 1624 1625 addq $16,%r8 1626 cmpq %rcx,%r8 1627 jb L$open_sse_tail_256_hash 1628 paddd L$chacha20_consts(%rip),%xmm3 1629 paddd 0+48(%rbp),%xmm7 1630 paddd 0+64(%rbp),%xmm11 1631 paddd 0+144(%rbp),%xmm15 1632 paddd L$chacha20_consts(%rip),%xmm2 1633 paddd 0+48(%rbp),%xmm6 1634 paddd 0+64(%rbp),%xmm10 1635 paddd 0+128(%rbp),%xmm14 1636 paddd L$chacha20_consts(%rip),%xmm1 1637 paddd 0+48(%rbp),%xmm5 1638 paddd 0+64(%rbp),%xmm9 1639 paddd 0+112(%rbp),%xmm13 1640 paddd L$chacha20_consts(%rip),%xmm0 1641 paddd 0+48(%rbp),%xmm4 1642 paddd 0+64(%rbp),%xmm8 1643 paddd 0+96(%rbp),%xmm12 1644 movdqa %xmm12,0+80(%rbp) 1645 movdqu 0 + 0(%rsi),%xmm12 1646 pxor %xmm3,%xmm12 1647 movdqu %xmm12,0 + 0(%rdi) 1648 movdqu 16 + 0(%rsi),%xmm12 1649 pxor %xmm7,%xmm12 1650 movdqu %xmm12,16 + 0(%rdi) 1651 movdqu 32 + 0(%rsi),%xmm12 1652 pxor %xmm11,%xmm12 1653 movdqu %xmm12,32 + 0(%rdi) 1654 movdqu 48 + 0(%rsi),%xmm12 1655 pxor %xmm15,%xmm12 1656 movdqu %xmm12,48 + 0(%rdi) 1657 movdqu 0 + 64(%rsi),%xmm3 1658 movdqu 16 + 64(%rsi),%xmm7 1659 movdqu 32 + 64(%rsi),%xmm11 1660 movdqu 48 + 64(%rsi),%xmm15 1661 pxor %xmm3,%xmm2 1662 pxor %xmm7,%xmm6 1663 pxor %xmm11,%xmm10 1664 pxor %xmm14,%xmm15 1665 movdqu %xmm2,0 + 64(%rdi) 1666 movdqu %xmm6,16 + 64(%rdi) 1667 movdqu %xmm10,32 + 64(%rdi) 1668 movdqu %xmm15,48 + 64(%rdi) 1669 movdqu 0 + 128(%rsi),%xmm3 1670 movdqu 16 + 128(%rsi),%xmm7 1671 movdqu 32 + 128(%rsi),%xmm11 1672 movdqu 48 + 128(%rsi),%xmm15 1673 pxor %xmm3,%xmm1 1674 pxor %xmm7,%xmm5 1675 pxor %xmm11,%xmm9 1676 pxor %xmm13,%xmm15 1677 movdqu %xmm1,0 + 128(%rdi) 1678 movdqu %xmm5,16 + 128(%rdi) 1679 movdqu %xmm9,32 + 128(%rdi) 1680 movdqu %xmm15,48 + 128(%rdi) 1681 1682 movdqa 0+80(%rbp),%xmm12 1683 subq $192,%rbx 1684 leaq 192(%rsi),%rsi 1685 leaq 192(%rdi),%rdi 1686 1687 1688L$open_sse_tail_64_dec_loop: 1689 cmpq $16,%rbx 1690 jb L$open_sse_tail_16_init 1691 subq $16,%rbx 1692 movdqu (%rsi),%xmm3 1693 pxor %xmm3,%xmm0 1694 movdqu %xmm0,(%rdi) 1695 leaq 16(%rsi),%rsi 1696 leaq 16(%rdi),%rdi 1697 movdqa %xmm4,%xmm0 1698 movdqa %xmm8,%xmm4 1699 movdqa %xmm12,%xmm8 1700 jmp L$open_sse_tail_64_dec_loop 1701L$open_sse_tail_16_init: 1702 movdqa %xmm0,%xmm1 1703 1704 1705L$open_sse_tail_16: 1706 testq %rbx,%rbx 1707 jz L$open_sse_finalize 1708 1709 1710 1711 pxor %xmm3,%xmm3 1712 leaq -1(%rsi,%rbx,1),%rsi 1713 movq %rbx,%r8 1714L$open_sse_tail_16_compose: 1715 pslldq $1,%xmm3 1716 pinsrb $0,(%rsi),%xmm3 1717 subq $1,%rsi 1718 subq $1,%r8 1719 jnz L$open_sse_tail_16_compose 1720 1721.byte 102,73,15,126,221 1722 pextrq $1,%xmm3,%r14 1723 1724 pxor %xmm1,%xmm3 1725 1726 1727L$open_sse_tail_16_extract: 1728 pextrb $0,%xmm3,(%rdi) 1729 psrldq $1,%xmm3 1730 addq $1,%rdi 1731 subq $1,%rbx 1732 jne L$open_sse_tail_16_extract 1733 1734 addq %r13,%r10 1735 adcq %r14,%r11 1736 adcq $1,%r12 1737 movq 0+0+0(%rbp),%rax 1738 movq %rax,%r15 1739 mulq %r10 1740 movq %rax,%r13 1741 movq %rdx,%r14 1742 movq 0+0+0(%rbp),%rax 1743 mulq %r11 1744 imulq %r12,%r15 1745 addq %rax,%r14 1746 adcq %rdx,%r15 1747 movq 8+0+0(%rbp),%rax 1748 movq %rax,%r9 1749 mulq %r10 1750 addq %rax,%r14 1751 adcq $0,%rdx 1752 movq %rdx,%r10 1753 movq 8+0+0(%rbp),%rax 1754 mulq %r11 1755 addq %rax,%r15 1756 adcq $0,%rdx 1757 imulq %r12,%r9 1758 addq %r10,%r15 1759 adcq %rdx,%r9 1760 movq %r13,%r10 1761 movq %r14,%r11 1762 movq %r15,%r12 1763 andq $3,%r12 1764 movq %r15,%r13 1765 andq $-4,%r13 1766 movq %r9,%r14 1767 shrdq $2,%r9,%r15 1768 shrq $2,%r9 1769 addq %r13,%r15 1770 adcq %r14,%r9 1771 addq %r15,%r10 1772 adcq %r9,%r11 1773 adcq $0,%r12 1774 1775 1776L$open_sse_finalize: 1777 addq 0+0+32(%rbp),%r10 1778 adcq 8+0+32(%rbp),%r11 1779 adcq $1,%r12 1780 movq 0+0+0(%rbp),%rax 1781 movq %rax,%r15 1782 mulq %r10 1783 movq %rax,%r13 1784 movq %rdx,%r14 1785 movq 0+0+0(%rbp),%rax 1786 mulq %r11 1787 imulq %r12,%r15 1788 addq %rax,%r14 1789 adcq %rdx,%r15 1790 movq 8+0+0(%rbp),%rax 1791 movq %rax,%r9 1792 mulq %r10 1793 addq %rax,%r14 1794 adcq $0,%rdx 1795 movq %rdx,%r10 1796 movq 8+0+0(%rbp),%rax 1797 mulq %r11 1798 addq %rax,%r15 1799 adcq $0,%rdx 1800 imulq %r12,%r9 1801 addq %r10,%r15 1802 adcq %rdx,%r9 1803 movq %r13,%r10 1804 movq %r14,%r11 1805 movq %r15,%r12 1806 andq $3,%r12 1807 movq %r15,%r13 1808 andq $-4,%r13 1809 movq %r9,%r14 1810 shrdq $2,%r9,%r15 1811 shrq $2,%r9 1812 addq %r13,%r15 1813 adcq %r14,%r9 1814 addq %r15,%r10 1815 adcq %r9,%r11 1816 adcq $0,%r12 1817 1818 1819 movq %r10,%r13 1820 movq %r11,%r14 1821 movq %r12,%r15 1822 subq $-5,%r10 1823 sbbq $-1,%r11 1824 sbbq $3,%r12 1825 cmovcq %r13,%r10 1826 cmovcq %r14,%r11 1827 cmovcq %r15,%r12 1828 1829 addq 0+0+16(%rbp),%r10 1830 adcq 8+0+16(%rbp),%r11 1831 1832 1833 addq $288 + 0 + 32,%rsp 1834 1835 1836 popq %r9 1837 1838 movq %r10,(%r9) 1839 movq %r11,8(%r9) 1840 popq %r15 1841 1842 popq %r14 1843 1844 popq %r13 1845 1846 popq %r12 1847 1848 popq %rbx 1849 1850 popq %rbp 1851 1852 .byte 0xf3,0xc3 1853 1854L$open_sse_128: 1855 1856 movdqu L$chacha20_consts(%rip),%xmm0 1857 movdqa %xmm0,%xmm1 1858 movdqa %xmm0,%xmm2 1859 movdqu 0(%r9),%xmm4 1860 movdqa %xmm4,%xmm5 1861 movdqa %xmm4,%xmm6 1862 movdqu 16(%r9),%xmm8 1863 movdqa %xmm8,%xmm9 1864 movdqa %xmm8,%xmm10 1865 movdqu 32(%r9),%xmm12 1866 movdqa %xmm12,%xmm13 1867 paddd L$sse_inc(%rip),%xmm13 1868 movdqa %xmm13,%xmm14 1869 paddd L$sse_inc(%rip),%xmm14 1870 movdqa %xmm4,%xmm7 1871 movdqa %xmm8,%xmm11 1872 movdqa %xmm13,%xmm15 1873 movq $10,%r10 1874 1875L$open_sse_128_rounds: 1876 paddd %xmm4,%xmm0 1877 pxor %xmm0,%xmm12 1878 pshufb L$rol16(%rip),%xmm12 1879 paddd %xmm12,%xmm8 1880 pxor %xmm8,%xmm4 1881 movdqa %xmm4,%xmm3 1882 pslld $12,%xmm3 1883 psrld $20,%xmm4 1884 pxor %xmm3,%xmm4 1885 paddd %xmm4,%xmm0 1886 pxor %xmm0,%xmm12 1887 pshufb L$rol8(%rip),%xmm12 1888 paddd %xmm12,%xmm8 1889 pxor %xmm8,%xmm4 1890 movdqa %xmm4,%xmm3 1891 pslld $7,%xmm3 1892 psrld $25,%xmm4 1893 pxor %xmm3,%xmm4 1894.byte 102,15,58,15,228,4 1895.byte 102,69,15,58,15,192,8 1896.byte 102,69,15,58,15,228,12 1897 paddd %xmm5,%xmm1 1898 pxor %xmm1,%xmm13 1899 pshufb L$rol16(%rip),%xmm13 1900 paddd %xmm13,%xmm9 1901 pxor %xmm9,%xmm5 1902 movdqa %xmm5,%xmm3 1903 pslld $12,%xmm3 1904 psrld $20,%xmm5 1905 pxor %xmm3,%xmm5 1906 paddd %xmm5,%xmm1 1907 pxor %xmm1,%xmm13 1908 pshufb L$rol8(%rip),%xmm13 1909 paddd %xmm13,%xmm9 1910 pxor %xmm9,%xmm5 1911 movdqa %xmm5,%xmm3 1912 pslld $7,%xmm3 1913 psrld $25,%xmm5 1914 pxor %xmm3,%xmm5 1915.byte 102,15,58,15,237,4 1916.byte 102,69,15,58,15,201,8 1917.byte 102,69,15,58,15,237,12 1918 paddd %xmm6,%xmm2 1919 pxor %xmm2,%xmm14 1920 pshufb L$rol16(%rip),%xmm14 1921 paddd %xmm14,%xmm10 1922 pxor %xmm10,%xmm6 1923 movdqa %xmm6,%xmm3 1924 pslld $12,%xmm3 1925 psrld $20,%xmm6 1926 pxor %xmm3,%xmm6 1927 paddd %xmm6,%xmm2 1928 pxor %xmm2,%xmm14 1929 pshufb L$rol8(%rip),%xmm14 1930 paddd %xmm14,%xmm10 1931 pxor %xmm10,%xmm6 1932 movdqa %xmm6,%xmm3 1933 pslld $7,%xmm3 1934 psrld $25,%xmm6 1935 pxor %xmm3,%xmm6 1936.byte 102,15,58,15,246,4 1937.byte 102,69,15,58,15,210,8 1938.byte 102,69,15,58,15,246,12 1939 paddd %xmm4,%xmm0 1940 pxor %xmm0,%xmm12 1941 pshufb L$rol16(%rip),%xmm12 1942 paddd %xmm12,%xmm8 1943 pxor %xmm8,%xmm4 1944 movdqa %xmm4,%xmm3 1945 pslld $12,%xmm3 1946 psrld $20,%xmm4 1947 pxor %xmm3,%xmm4 1948 paddd %xmm4,%xmm0 1949 pxor %xmm0,%xmm12 1950 pshufb L$rol8(%rip),%xmm12 1951 paddd %xmm12,%xmm8 1952 pxor %xmm8,%xmm4 1953 movdqa %xmm4,%xmm3 1954 pslld $7,%xmm3 1955 psrld $25,%xmm4 1956 pxor %xmm3,%xmm4 1957.byte 102,15,58,15,228,12 1958.byte 102,69,15,58,15,192,8 1959.byte 102,69,15,58,15,228,4 1960 paddd %xmm5,%xmm1 1961 pxor %xmm1,%xmm13 1962 pshufb L$rol16(%rip),%xmm13 1963 paddd %xmm13,%xmm9 1964 pxor %xmm9,%xmm5 1965 movdqa %xmm5,%xmm3 1966 pslld $12,%xmm3 1967 psrld $20,%xmm5 1968 pxor %xmm3,%xmm5 1969 paddd %xmm5,%xmm1 1970 pxor %xmm1,%xmm13 1971 pshufb L$rol8(%rip),%xmm13 1972 paddd %xmm13,%xmm9 1973 pxor %xmm9,%xmm5 1974 movdqa %xmm5,%xmm3 1975 pslld $7,%xmm3 1976 psrld $25,%xmm5 1977 pxor %xmm3,%xmm5 1978.byte 102,15,58,15,237,12 1979.byte 102,69,15,58,15,201,8 1980.byte 102,69,15,58,15,237,4 1981 paddd %xmm6,%xmm2 1982 pxor %xmm2,%xmm14 1983 pshufb L$rol16(%rip),%xmm14 1984 paddd %xmm14,%xmm10 1985 pxor %xmm10,%xmm6 1986 movdqa %xmm6,%xmm3 1987 pslld $12,%xmm3 1988 psrld $20,%xmm6 1989 pxor %xmm3,%xmm6 1990 paddd %xmm6,%xmm2 1991 pxor %xmm2,%xmm14 1992 pshufb L$rol8(%rip),%xmm14 1993 paddd %xmm14,%xmm10 1994 pxor %xmm10,%xmm6 1995 movdqa %xmm6,%xmm3 1996 pslld $7,%xmm3 1997 psrld $25,%xmm6 1998 pxor %xmm3,%xmm6 1999.byte 102,15,58,15,246,12 2000.byte 102,69,15,58,15,210,8 2001.byte 102,69,15,58,15,246,4 2002 2003 decq %r10 2004 jnz L$open_sse_128_rounds 2005 paddd L$chacha20_consts(%rip),%xmm0 2006 paddd L$chacha20_consts(%rip),%xmm1 2007 paddd L$chacha20_consts(%rip),%xmm2 2008 paddd %xmm7,%xmm4 2009 paddd %xmm7,%xmm5 2010 paddd %xmm7,%xmm6 2011 paddd %xmm11,%xmm9 2012 paddd %xmm11,%xmm10 2013 paddd %xmm15,%xmm13 2014 paddd L$sse_inc(%rip),%xmm15 2015 paddd %xmm15,%xmm14 2016 2017 pand L$clamp(%rip),%xmm0 2018 movdqa %xmm0,0+0(%rbp) 2019 movdqa %xmm4,0+16(%rbp) 2020 2021 movq %r8,%r8 2022 call poly_hash_ad_internal 2023L$open_sse_128_xor_hash: 2024 cmpq $16,%rbx 2025 jb L$open_sse_tail_16 2026 subq $16,%rbx 2027 addq 0+0(%rsi),%r10 2028 adcq 8+0(%rsi),%r11 2029 adcq $1,%r12 2030 2031 2032 movdqu 0(%rsi),%xmm3 2033 pxor %xmm3,%xmm1 2034 movdqu %xmm1,0(%rdi) 2035 leaq 16(%rsi),%rsi 2036 leaq 16(%rdi),%rdi 2037 movq 0+0+0(%rbp),%rax 2038 movq %rax,%r15 2039 mulq %r10 2040 movq %rax,%r13 2041 movq %rdx,%r14 2042 movq 0+0+0(%rbp),%rax 2043 mulq %r11 2044 imulq %r12,%r15 2045 addq %rax,%r14 2046 adcq %rdx,%r15 2047 movq 8+0+0(%rbp),%rax 2048 movq %rax,%r9 2049 mulq %r10 2050 addq %rax,%r14 2051 adcq $0,%rdx 2052 movq %rdx,%r10 2053 movq 8+0+0(%rbp),%rax 2054 mulq %r11 2055 addq %rax,%r15 2056 adcq $0,%rdx 2057 imulq %r12,%r9 2058 addq %r10,%r15 2059 adcq %rdx,%r9 2060 movq %r13,%r10 2061 movq %r14,%r11 2062 movq %r15,%r12 2063 andq $3,%r12 2064 movq %r15,%r13 2065 andq $-4,%r13 2066 movq %r9,%r14 2067 shrdq $2,%r9,%r15 2068 shrq $2,%r9 2069 addq %r13,%r15 2070 adcq %r14,%r9 2071 addq %r15,%r10 2072 adcq %r9,%r11 2073 adcq $0,%r12 2074 2075 2076 movdqa %xmm5,%xmm1 2077 movdqa %xmm9,%xmm5 2078 movdqa %xmm13,%xmm9 2079 movdqa %xmm2,%xmm13 2080 movdqa %xmm6,%xmm2 2081 movdqa %xmm10,%xmm6 2082 movdqa %xmm14,%xmm10 2083 jmp L$open_sse_128_xor_hash 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093.globl _chacha20_poly1305_seal 2094.private_extern _chacha20_poly1305_seal 2095 2096.p2align 6 2097_chacha20_poly1305_seal: 2098 2099 pushq %rbp 2100 2101 pushq %rbx 2102 2103 pushq %r12 2104 2105 pushq %r13 2106 2107 pushq %r14 2108 2109 pushq %r15 2110 2111 2112 2113 pushq %r9 2114 2115 subq $288 + 0 + 32,%rsp 2116 2117 leaq 32(%rsp),%rbp 2118 andq $-32,%rbp 2119 2120 movq 56(%r9),%rbx 2121 addq %rdx,%rbx 2122 movq %r8,0+0+32(%rbp) 2123 movq %rbx,8+0+32(%rbp) 2124 movq %rdx,%rbx 2125 2126 movl _OPENSSL_ia32cap_P+8(%rip),%eax 2127 andl $288,%eax 2128 xorl $288,%eax 2129 jz chacha20_poly1305_seal_avx2 2130 2131 cmpq $128,%rbx 2132 jbe L$seal_sse_128 2133 2134 movdqa L$chacha20_consts(%rip),%xmm0 2135 movdqu 0(%r9),%xmm4 2136 movdqu 16(%r9),%xmm8 2137 movdqu 32(%r9),%xmm12 2138 2139 movdqa %xmm0,%xmm1 2140 movdqa %xmm0,%xmm2 2141 movdqa %xmm0,%xmm3 2142 movdqa %xmm4,%xmm5 2143 movdqa %xmm4,%xmm6 2144 movdqa %xmm4,%xmm7 2145 movdqa %xmm8,%xmm9 2146 movdqa %xmm8,%xmm10 2147 movdqa %xmm8,%xmm11 2148 movdqa %xmm12,%xmm15 2149 paddd L$sse_inc(%rip),%xmm12 2150 movdqa %xmm12,%xmm14 2151 paddd L$sse_inc(%rip),%xmm12 2152 movdqa %xmm12,%xmm13 2153 paddd L$sse_inc(%rip),%xmm12 2154 2155 movdqa %xmm4,0+48(%rbp) 2156 movdqa %xmm8,0+64(%rbp) 2157 movdqa %xmm12,0+96(%rbp) 2158 movdqa %xmm13,0+112(%rbp) 2159 movdqa %xmm14,0+128(%rbp) 2160 movdqa %xmm15,0+144(%rbp) 2161 movq $10,%r10 2162L$seal_sse_init_rounds: 2163 movdqa %xmm8,0+80(%rbp) 2164 movdqa L$rol16(%rip),%xmm8 2165 paddd %xmm7,%xmm3 2166 paddd %xmm6,%xmm2 2167 paddd %xmm5,%xmm1 2168 paddd %xmm4,%xmm0 2169 pxor %xmm3,%xmm15 2170 pxor %xmm2,%xmm14 2171 pxor %xmm1,%xmm13 2172 pxor %xmm0,%xmm12 2173.byte 102,69,15,56,0,248 2174.byte 102,69,15,56,0,240 2175.byte 102,69,15,56,0,232 2176.byte 102,69,15,56,0,224 2177 movdqa 0+80(%rbp),%xmm8 2178 paddd %xmm15,%xmm11 2179 paddd %xmm14,%xmm10 2180 paddd %xmm13,%xmm9 2181 paddd %xmm12,%xmm8 2182 pxor %xmm11,%xmm7 2183 pxor %xmm10,%xmm6 2184 pxor %xmm9,%xmm5 2185 pxor %xmm8,%xmm4 2186 movdqa %xmm8,0+80(%rbp) 2187 movdqa %xmm7,%xmm8 2188 psrld $20,%xmm8 2189 pslld $32-20,%xmm7 2190 pxor %xmm8,%xmm7 2191 movdqa %xmm6,%xmm8 2192 psrld $20,%xmm8 2193 pslld $32-20,%xmm6 2194 pxor %xmm8,%xmm6 2195 movdqa %xmm5,%xmm8 2196 psrld $20,%xmm8 2197 pslld $32-20,%xmm5 2198 pxor %xmm8,%xmm5 2199 movdqa %xmm4,%xmm8 2200 psrld $20,%xmm8 2201 pslld $32-20,%xmm4 2202 pxor %xmm8,%xmm4 2203 movdqa L$rol8(%rip),%xmm8 2204 paddd %xmm7,%xmm3 2205 paddd %xmm6,%xmm2 2206 paddd %xmm5,%xmm1 2207 paddd %xmm4,%xmm0 2208 pxor %xmm3,%xmm15 2209 pxor %xmm2,%xmm14 2210 pxor %xmm1,%xmm13 2211 pxor %xmm0,%xmm12 2212.byte 102,69,15,56,0,248 2213.byte 102,69,15,56,0,240 2214.byte 102,69,15,56,0,232 2215.byte 102,69,15,56,0,224 2216 movdqa 0+80(%rbp),%xmm8 2217 paddd %xmm15,%xmm11 2218 paddd %xmm14,%xmm10 2219 paddd %xmm13,%xmm9 2220 paddd %xmm12,%xmm8 2221 pxor %xmm11,%xmm7 2222 pxor %xmm10,%xmm6 2223 pxor %xmm9,%xmm5 2224 pxor %xmm8,%xmm4 2225 movdqa %xmm8,0+80(%rbp) 2226 movdqa %xmm7,%xmm8 2227 psrld $25,%xmm8 2228 pslld $32-25,%xmm7 2229 pxor %xmm8,%xmm7 2230 movdqa %xmm6,%xmm8 2231 psrld $25,%xmm8 2232 pslld $32-25,%xmm6 2233 pxor %xmm8,%xmm6 2234 movdqa %xmm5,%xmm8 2235 psrld $25,%xmm8 2236 pslld $32-25,%xmm5 2237 pxor %xmm8,%xmm5 2238 movdqa %xmm4,%xmm8 2239 psrld $25,%xmm8 2240 pslld $32-25,%xmm4 2241 pxor %xmm8,%xmm4 2242 movdqa 0+80(%rbp),%xmm8 2243.byte 102,15,58,15,255,4 2244.byte 102,69,15,58,15,219,8 2245.byte 102,69,15,58,15,255,12 2246.byte 102,15,58,15,246,4 2247.byte 102,69,15,58,15,210,8 2248.byte 102,69,15,58,15,246,12 2249.byte 102,15,58,15,237,4 2250.byte 102,69,15,58,15,201,8 2251.byte 102,69,15,58,15,237,12 2252.byte 102,15,58,15,228,4 2253.byte 102,69,15,58,15,192,8 2254.byte 102,69,15,58,15,228,12 2255 movdqa %xmm8,0+80(%rbp) 2256 movdqa L$rol16(%rip),%xmm8 2257 paddd %xmm7,%xmm3 2258 paddd %xmm6,%xmm2 2259 paddd %xmm5,%xmm1 2260 paddd %xmm4,%xmm0 2261 pxor %xmm3,%xmm15 2262 pxor %xmm2,%xmm14 2263 pxor %xmm1,%xmm13 2264 pxor %xmm0,%xmm12 2265.byte 102,69,15,56,0,248 2266.byte 102,69,15,56,0,240 2267.byte 102,69,15,56,0,232 2268.byte 102,69,15,56,0,224 2269 movdqa 0+80(%rbp),%xmm8 2270 paddd %xmm15,%xmm11 2271 paddd %xmm14,%xmm10 2272 paddd %xmm13,%xmm9 2273 paddd %xmm12,%xmm8 2274 pxor %xmm11,%xmm7 2275 pxor %xmm10,%xmm6 2276 pxor %xmm9,%xmm5 2277 pxor %xmm8,%xmm4 2278 movdqa %xmm8,0+80(%rbp) 2279 movdqa %xmm7,%xmm8 2280 psrld $20,%xmm8 2281 pslld $32-20,%xmm7 2282 pxor %xmm8,%xmm7 2283 movdqa %xmm6,%xmm8 2284 psrld $20,%xmm8 2285 pslld $32-20,%xmm6 2286 pxor %xmm8,%xmm6 2287 movdqa %xmm5,%xmm8 2288 psrld $20,%xmm8 2289 pslld $32-20,%xmm5 2290 pxor %xmm8,%xmm5 2291 movdqa %xmm4,%xmm8 2292 psrld $20,%xmm8 2293 pslld $32-20,%xmm4 2294 pxor %xmm8,%xmm4 2295 movdqa L$rol8(%rip),%xmm8 2296 paddd %xmm7,%xmm3 2297 paddd %xmm6,%xmm2 2298 paddd %xmm5,%xmm1 2299 paddd %xmm4,%xmm0 2300 pxor %xmm3,%xmm15 2301 pxor %xmm2,%xmm14 2302 pxor %xmm1,%xmm13 2303 pxor %xmm0,%xmm12 2304.byte 102,69,15,56,0,248 2305.byte 102,69,15,56,0,240 2306.byte 102,69,15,56,0,232 2307.byte 102,69,15,56,0,224 2308 movdqa 0+80(%rbp),%xmm8 2309 paddd %xmm15,%xmm11 2310 paddd %xmm14,%xmm10 2311 paddd %xmm13,%xmm9 2312 paddd %xmm12,%xmm8 2313 pxor %xmm11,%xmm7 2314 pxor %xmm10,%xmm6 2315 pxor %xmm9,%xmm5 2316 pxor %xmm8,%xmm4 2317 movdqa %xmm8,0+80(%rbp) 2318 movdqa %xmm7,%xmm8 2319 psrld $25,%xmm8 2320 pslld $32-25,%xmm7 2321 pxor %xmm8,%xmm7 2322 movdqa %xmm6,%xmm8 2323 psrld $25,%xmm8 2324 pslld $32-25,%xmm6 2325 pxor %xmm8,%xmm6 2326 movdqa %xmm5,%xmm8 2327 psrld $25,%xmm8 2328 pslld $32-25,%xmm5 2329 pxor %xmm8,%xmm5 2330 movdqa %xmm4,%xmm8 2331 psrld $25,%xmm8 2332 pslld $32-25,%xmm4 2333 pxor %xmm8,%xmm4 2334 movdqa 0+80(%rbp),%xmm8 2335.byte 102,15,58,15,255,12 2336.byte 102,69,15,58,15,219,8 2337.byte 102,69,15,58,15,255,4 2338.byte 102,15,58,15,246,12 2339.byte 102,69,15,58,15,210,8 2340.byte 102,69,15,58,15,246,4 2341.byte 102,15,58,15,237,12 2342.byte 102,69,15,58,15,201,8 2343.byte 102,69,15,58,15,237,4 2344.byte 102,15,58,15,228,12 2345.byte 102,69,15,58,15,192,8 2346.byte 102,69,15,58,15,228,4 2347 2348 decq %r10 2349 jnz L$seal_sse_init_rounds 2350 paddd L$chacha20_consts(%rip),%xmm3 2351 paddd 0+48(%rbp),%xmm7 2352 paddd 0+64(%rbp),%xmm11 2353 paddd 0+144(%rbp),%xmm15 2354 paddd L$chacha20_consts(%rip),%xmm2 2355 paddd 0+48(%rbp),%xmm6 2356 paddd 0+64(%rbp),%xmm10 2357 paddd 0+128(%rbp),%xmm14 2358 paddd L$chacha20_consts(%rip),%xmm1 2359 paddd 0+48(%rbp),%xmm5 2360 paddd 0+64(%rbp),%xmm9 2361 paddd 0+112(%rbp),%xmm13 2362 paddd L$chacha20_consts(%rip),%xmm0 2363 paddd 0+48(%rbp),%xmm4 2364 paddd 0+64(%rbp),%xmm8 2365 paddd 0+96(%rbp),%xmm12 2366 2367 2368 pand L$clamp(%rip),%xmm3 2369 movdqa %xmm3,0+0(%rbp) 2370 movdqa %xmm7,0+16(%rbp) 2371 2372 movq %r8,%r8 2373 call poly_hash_ad_internal 2374 movdqu 0 + 0(%rsi),%xmm3 2375 movdqu 16 + 0(%rsi),%xmm7 2376 movdqu 32 + 0(%rsi),%xmm11 2377 movdqu 48 + 0(%rsi),%xmm15 2378 pxor %xmm3,%xmm2 2379 pxor %xmm7,%xmm6 2380 pxor %xmm11,%xmm10 2381 pxor %xmm14,%xmm15 2382 movdqu %xmm2,0 + 0(%rdi) 2383 movdqu %xmm6,16 + 0(%rdi) 2384 movdqu %xmm10,32 + 0(%rdi) 2385 movdqu %xmm15,48 + 0(%rdi) 2386 movdqu 0 + 64(%rsi),%xmm3 2387 movdqu 16 + 64(%rsi),%xmm7 2388 movdqu 32 + 64(%rsi),%xmm11 2389 movdqu 48 + 64(%rsi),%xmm15 2390 pxor %xmm3,%xmm1 2391 pxor %xmm7,%xmm5 2392 pxor %xmm11,%xmm9 2393 pxor %xmm13,%xmm15 2394 movdqu %xmm1,0 + 64(%rdi) 2395 movdqu %xmm5,16 + 64(%rdi) 2396 movdqu %xmm9,32 + 64(%rdi) 2397 movdqu %xmm15,48 + 64(%rdi) 2398 2399 cmpq $192,%rbx 2400 ja L$seal_sse_main_init 2401 movq $128,%rcx 2402 subq $128,%rbx 2403 leaq 128(%rsi),%rsi 2404 jmp L$seal_sse_128_tail_hash 2405L$seal_sse_main_init: 2406 movdqu 0 + 128(%rsi),%xmm3 2407 movdqu 16 + 128(%rsi),%xmm7 2408 movdqu 32 + 128(%rsi),%xmm11 2409 movdqu 48 + 128(%rsi),%xmm15 2410 pxor %xmm3,%xmm0 2411 pxor %xmm7,%xmm4 2412 pxor %xmm11,%xmm8 2413 pxor %xmm12,%xmm15 2414 movdqu %xmm0,0 + 128(%rdi) 2415 movdqu %xmm4,16 + 128(%rdi) 2416 movdqu %xmm8,32 + 128(%rdi) 2417 movdqu %xmm15,48 + 128(%rdi) 2418 2419 movq $192,%rcx 2420 subq $192,%rbx 2421 leaq 192(%rsi),%rsi 2422 movq $2,%rcx 2423 movq $8,%r8 2424 cmpq $64,%rbx 2425 jbe L$seal_sse_tail_64 2426 cmpq $128,%rbx 2427 jbe L$seal_sse_tail_128 2428 cmpq $192,%rbx 2429 jbe L$seal_sse_tail_192 2430 2431L$seal_sse_main_loop: 2432 movdqa L$chacha20_consts(%rip),%xmm0 2433 movdqa 0+48(%rbp),%xmm4 2434 movdqa 0+64(%rbp),%xmm8 2435 movdqa %xmm0,%xmm1 2436 movdqa %xmm4,%xmm5 2437 movdqa %xmm8,%xmm9 2438 movdqa %xmm0,%xmm2 2439 movdqa %xmm4,%xmm6 2440 movdqa %xmm8,%xmm10 2441 movdqa %xmm0,%xmm3 2442 movdqa %xmm4,%xmm7 2443 movdqa %xmm8,%xmm11 2444 movdqa 0+96(%rbp),%xmm15 2445 paddd L$sse_inc(%rip),%xmm15 2446 movdqa %xmm15,%xmm14 2447 paddd L$sse_inc(%rip),%xmm14 2448 movdqa %xmm14,%xmm13 2449 paddd L$sse_inc(%rip),%xmm13 2450 movdqa %xmm13,%xmm12 2451 paddd L$sse_inc(%rip),%xmm12 2452 movdqa %xmm12,0+96(%rbp) 2453 movdqa %xmm13,0+112(%rbp) 2454 movdqa %xmm14,0+128(%rbp) 2455 movdqa %xmm15,0+144(%rbp) 2456 2457.p2align 5 2458L$seal_sse_main_rounds: 2459 movdqa %xmm8,0+80(%rbp) 2460 movdqa L$rol16(%rip),%xmm8 2461 paddd %xmm7,%xmm3 2462 paddd %xmm6,%xmm2 2463 paddd %xmm5,%xmm1 2464 paddd %xmm4,%xmm0 2465 pxor %xmm3,%xmm15 2466 pxor %xmm2,%xmm14 2467 pxor %xmm1,%xmm13 2468 pxor %xmm0,%xmm12 2469.byte 102,69,15,56,0,248 2470.byte 102,69,15,56,0,240 2471.byte 102,69,15,56,0,232 2472.byte 102,69,15,56,0,224 2473 movdqa 0+80(%rbp),%xmm8 2474 paddd %xmm15,%xmm11 2475 paddd %xmm14,%xmm10 2476 paddd %xmm13,%xmm9 2477 paddd %xmm12,%xmm8 2478 pxor %xmm11,%xmm7 2479 addq 0+0(%rdi),%r10 2480 adcq 8+0(%rdi),%r11 2481 adcq $1,%r12 2482 pxor %xmm10,%xmm6 2483 pxor %xmm9,%xmm5 2484 pxor %xmm8,%xmm4 2485 movdqa %xmm8,0+80(%rbp) 2486 movdqa %xmm7,%xmm8 2487 psrld $20,%xmm8 2488 pslld $32-20,%xmm7 2489 pxor %xmm8,%xmm7 2490 movdqa %xmm6,%xmm8 2491 psrld $20,%xmm8 2492 pslld $32-20,%xmm6 2493 pxor %xmm8,%xmm6 2494 movdqa %xmm5,%xmm8 2495 psrld $20,%xmm8 2496 pslld $32-20,%xmm5 2497 pxor %xmm8,%xmm5 2498 movdqa %xmm4,%xmm8 2499 psrld $20,%xmm8 2500 pslld $32-20,%xmm4 2501 pxor %xmm8,%xmm4 2502 movq 0+0+0(%rbp),%rax 2503 movq %rax,%r15 2504 mulq %r10 2505 movq %rax,%r13 2506 movq %rdx,%r14 2507 movq 0+0+0(%rbp),%rax 2508 mulq %r11 2509 imulq %r12,%r15 2510 addq %rax,%r14 2511 adcq %rdx,%r15 2512 movdqa L$rol8(%rip),%xmm8 2513 paddd %xmm7,%xmm3 2514 paddd %xmm6,%xmm2 2515 paddd %xmm5,%xmm1 2516 paddd %xmm4,%xmm0 2517 pxor %xmm3,%xmm15 2518 pxor %xmm2,%xmm14 2519 pxor %xmm1,%xmm13 2520 pxor %xmm0,%xmm12 2521.byte 102,69,15,56,0,248 2522.byte 102,69,15,56,0,240 2523.byte 102,69,15,56,0,232 2524.byte 102,69,15,56,0,224 2525 movdqa 0+80(%rbp),%xmm8 2526 paddd %xmm15,%xmm11 2527 paddd %xmm14,%xmm10 2528 paddd %xmm13,%xmm9 2529 paddd %xmm12,%xmm8 2530 pxor %xmm11,%xmm7 2531 pxor %xmm10,%xmm6 2532 movq 8+0+0(%rbp),%rax 2533 movq %rax,%r9 2534 mulq %r10 2535 addq %rax,%r14 2536 adcq $0,%rdx 2537 movq %rdx,%r10 2538 movq 8+0+0(%rbp),%rax 2539 mulq %r11 2540 addq %rax,%r15 2541 adcq $0,%rdx 2542 pxor %xmm9,%xmm5 2543 pxor %xmm8,%xmm4 2544 movdqa %xmm8,0+80(%rbp) 2545 movdqa %xmm7,%xmm8 2546 psrld $25,%xmm8 2547 pslld $32-25,%xmm7 2548 pxor %xmm8,%xmm7 2549 movdqa %xmm6,%xmm8 2550 psrld $25,%xmm8 2551 pslld $32-25,%xmm6 2552 pxor %xmm8,%xmm6 2553 movdqa %xmm5,%xmm8 2554 psrld $25,%xmm8 2555 pslld $32-25,%xmm5 2556 pxor %xmm8,%xmm5 2557 movdqa %xmm4,%xmm8 2558 psrld $25,%xmm8 2559 pslld $32-25,%xmm4 2560 pxor %xmm8,%xmm4 2561 movdqa 0+80(%rbp),%xmm8 2562 imulq %r12,%r9 2563 addq %r10,%r15 2564 adcq %rdx,%r9 2565.byte 102,15,58,15,255,4 2566.byte 102,69,15,58,15,219,8 2567.byte 102,69,15,58,15,255,12 2568.byte 102,15,58,15,246,4 2569.byte 102,69,15,58,15,210,8 2570.byte 102,69,15,58,15,246,12 2571.byte 102,15,58,15,237,4 2572.byte 102,69,15,58,15,201,8 2573.byte 102,69,15,58,15,237,12 2574.byte 102,15,58,15,228,4 2575.byte 102,69,15,58,15,192,8 2576.byte 102,69,15,58,15,228,12 2577 movdqa %xmm8,0+80(%rbp) 2578 movdqa L$rol16(%rip),%xmm8 2579 paddd %xmm7,%xmm3 2580 paddd %xmm6,%xmm2 2581 paddd %xmm5,%xmm1 2582 paddd %xmm4,%xmm0 2583 pxor %xmm3,%xmm15 2584 pxor %xmm2,%xmm14 2585 movq %r13,%r10 2586 movq %r14,%r11 2587 movq %r15,%r12 2588 andq $3,%r12 2589 movq %r15,%r13 2590 andq $-4,%r13 2591 movq %r9,%r14 2592 shrdq $2,%r9,%r15 2593 shrq $2,%r9 2594 addq %r13,%r15 2595 adcq %r14,%r9 2596 addq %r15,%r10 2597 adcq %r9,%r11 2598 adcq $0,%r12 2599 pxor %xmm1,%xmm13 2600 pxor %xmm0,%xmm12 2601.byte 102,69,15,56,0,248 2602.byte 102,69,15,56,0,240 2603.byte 102,69,15,56,0,232 2604.byte 102,69,15,56,0,224 2605 movdqa 0+80(%rbp),%xmm8 2606 paddd %xmm15,%xmm11 2607 paddd %xmm14,%xmm10 2608 paddd %xmm13,%xmm9 2609 paddd %xmm12,%xmm8 2610 pxor %xmm11,%xmm7 2611 pxor %xmm10,%xmm6 2612 pxor %xmm9,%xmm5 2613 pxor %xmm8,%xmm4 2614 movdqa %xmm8,0+80(%rbp) 2615 movdqa %xmm7,%xmm8 2616 psrld $20,%xmm8 2617 pslld $32-20,%xmm7 2618 pxor %xmm8,%xmm7 2619 movdqa %xmm6,%xmm8 2620 psrld $20,%xmm8 2621 pslld $32-20,%xmm6 2622 pxor %xmm8,%xmm6 2623 movdqa %xmm5,%xmm8 2624 psrld $20,%xmm8 2625 pslld $32-20,%xmm5 2626 pxor %xmm8,%xmm5 2627 movdqa %xmm4,%xmm8 2628 psrld $20,%xmm8 2629 pslld $32-20,%xmm4 2630 pxor %xmm8,%xmm4 2631 movdqa L$rol8(%rip),%xmm8 2632 paddd %xmm7,%xmm3 2633 paddd %xmm6,%xmm2 2634 paddd %xmm5,%xmm1 2635 paddd %xmm4,%xmm0 2636 pxor %xmm3,%xmm15 2637 pxor %xmm2,%xmm14 2638 pxor %xmm1,%xmm13 2639 pxor %xmm0,%xmm12 2640.byte 102,69,15,56,0,248 2641.byte 102,69,15,56,0,240 2642.byte 102,69,15,56,0,232 2643.byte 102,69,15,56,0,224 2644 movdqa 0+80(%rbp),%xmm8 2645 paddd %xmm15,%xmm11 2646 paddd %xmm14,%xmm10 2647 paddd %xmm13,%xmm9 2648 paddd %xmm12,%xmm8 2649 pxor %xmm11,%xmm7 2650 pxor %xmm10,%xmm6 2651 pxor %xmm9,%xmm5 2652 pxor %xmm8,%xmm4 2653 movdqa %xmm8,0+80(%rbp) 2654 movdqa %xmm7,%xmm8 2655 psrld $25,%xmm8 2656 pslld $32-25,%xmm7 2657 pxor %xmm8,%xmm7 2658 movdqa %xmm6,%xmm8 2659 psrld $25,%xmm8 2660 pslld $32-25,%xmm6 2661 pxor %xmm8,%xmm6 2662 movdqa %xmm5,%xmm8 2663 psrld $25,%xmm8 2664 pslld $32-25,%xmm5 2665 pxor %xmm8,%xmm5 2666 movdqa %xmm4,%xmm8 2667 psrld $25,%xmm8 2668 pslld $32-25,%xmm4 2669 pxor %xmm8,%xmm4 2670 movdqa 0+80(%rbp),%xmm8 2671.byte 102,15,58,15,255,12 2672.byte 102,69,15,58,15,219,8 2673.byte 102,69,15,58,15,255,4 2674.byte 102,15,58,15,246,12 2675.byte 102,69,15,58,15,210,8 2676.byte 102,69,15,58,15,246,4 2677.byte 102,15,58,15,237,12 2678.byte 102,69,15,58,15,201,8 2679.byte 102,69,15,58,15,237,4 2680.byte 102,15,58,15,228,12 2681.byte 102,69,15,58,15,192,8 2682.byte 102,69,15,58,15,228,4 2683 2684 leaq 16(%rdi),%rdi 2685 decq %r8 2686 jge L$seal_sse_main_rounds 2687 addq 0+0(%rdi),%r10 2688 adcq 8+0(%rdi),%r11 2689 adcq $1,%r12 2690 movq 0+0+0(%rbp),%rax 2691 movq %rax,%r15 2692 mulq %r10 2693 movq %rax,%r13 2694 movq %rdx,%r14 2695 movq 0+0+0(%rbp),%rax 2696 mulq %r11 2697 imulq %r12,%r15 2698 addq %rax,%r14 2699 adcq %rdx,%r15 2700 movq 8+0+0(%rbp),%rax 2701 movq %rax,%r9 2702 mulq %r10 2703 addq %rax,%r14 2704 adcq $0,%rdx 2705 movq %rdx,%r10 2706 movq 8+0+0(%rbp),%rax 2707 mulq %r11 2708 addq %rax,%r15 2709 adcq $0,%rdx 2710 imulq %r12,%r9 2711 addq %r10,%r15 2712 adcq %rdx,%r9 2713 movq %r13,%r10 2714 movq %r14,%r11 2715 movq %r15,%r12 2716 andq $3,%r12 2717 movq %r15,%r13 2718 andq $-4,%r13 2719 movq %r9,%r14 2720 shrdq $2,%r9,%r15 2721 shrq $2,%r9 2722 addq %r13,%r15 2723 adcq %r14,%r9 2724 addq %r15,%r10 2725 adcq %r9,%r11 2726 adcq $0,%r12 2727 2728 leaq 16(%rdi),%rdi 2729 decq %rcx 2730 jg L$seal_sse_main_rounds 2731 paddd L$chacha20_consts(%rip),%xmm3 2732 paddd 0+48(%rbp),%xmm7 2733 paddd 0+64(%rbp),%xmm11 2734 paddd 0+144(%rbp),%xmm15 2735 paddd L$chacha20_consts(%rip),%xmm2 2736 paddd 0+48(%rbp),%xmm6 2737 paddd 0+64(%rbp),%xmm10 2738 paddd 0+128(%rbp),%xmm14 2739 paddd L$chacha20_consts(%rip),%xmm1 2740 paddd 0+48(%rbp),%xmm5 2741 paddd 0+64(%rbp),%xmm9 2742 paddd 0+112(%rbp),%xmm13 2743 paddd L$chacha20_consts(%rip),%xmm0 2744 paddd 0+48(%rbp),%xmm4 2745 paddd 0+64(%rbp),%xmm8 2746 paddd 0+96(%rbp),%xmm12 2747 2748 movdqa %xmm14,0+80(%rbp) 2749 movdqa %xmm14,0+80(%rbp) 2750 movdqu 0 + 0(%rsi),%xmm14 2751 pxor %xmm3,%xmm14 2752 movdqu %xmm14,0 + 0(%rdi) 2753 movdqu 16 + 0(%rsi),%xmm14 2754 pxor %xmm7,%xmm14 2755 movdqu %xmm14,16 + 0(%rdi) 2756 movdqu 32 + 0(%rsi),%xmm14 2757 pxor %xmm11,%xmm14 2758 movdqu %xmm14,32 + 0(%rdi) 2759 movdqu 48 + 0(%rsi),%xmm14 2760 pxor %xmm15,%xmm14 2761 movdqu %xmm14,48 + 0(%rdi) 2762 2763 movdqa 0+80(%rbp),%xmm14 2764 movdqu 0 + 64(%rsi),%xmm3 2765 movdqu 16 + 64(%rsi),%xmm7 2766 movdqu 32 + 64(%rsi),%xmm11 2767 movdqu 48 + 64(%rsi),%xmm15 2768 pxor %xmm3,%xmm2 2769 pxor %xmm7,%xmm6 2770 pxor %xmm11,%xmm10 2771 pxor %xmm14,%xmm15 2772 movdqu %xmm2,0 + 64(%rdi) 2773 movdqu %xmm6,16 + 64(%rdi) 2774 movdqu %xmm10,32 + 64(%rdi) 2775 movdqu %xmm15,48 + 64(%rdi) 2776 movdqu 0 + 128(%rsi),%xmm3 2777 movdqu 16 + 128(%rsi),%xmm7 2778 movdqu 32 + 128(%rsi),%xmm11 2779 movdqu 48 + 128(%rsi),%xmm15 2780 pxor %xmm3,%xmm1 2781 pxor %xmm7,%xmm5 2782 pxor %xmm11,%xmm9 2783 pxor %xmm13,%xmm15 2784 movdqu %xmm1,0 + 128(%rdi) 2785 movdqu %xmm5,16 + 128(%rdi) 2786 movdqu %xmm9,32 + 128(%rdi) 2787 movdqu %xmm15,48 + 128(%rdi) 2788 2789 cmpq $256,%rbx 2790 ja L$seal_sse_main_loop_xor 2791 2792 movq $192,%rcx 2793 subq $192,%rbx 2794 leaq 192(%rsi),%rsi 2795 jmp L$seal_sse_128_tail_hash 2796L$seal_sse_main_loop_xor: 2797 movdqu 0 + 192(%rsi),%xmm3 2798 movdqu 16 + 192(%rsi),%xmm7 2799 movdqu 32 + 192(%rsi),%xmm11 2800 movdqu 48 + 192(%rsi),%xmm15 2801 pxor %xmm3,%xmm0 2802 pxor %xmm7,%xmm4 2803 pxor %xmm11,%xmm8 2804 pxor %xmm12,%xmm15 2805 movdqu %xmm0,0 + 192(%rdi) 2806 movdqu %xmm4,16 + 192(%rdi) 2807 movdqu %xmm8,32 + 192(%rdi) 2808 movdqu %xmm15,48 + 192(%rdi) 2809 2810 leaq 256(%rsi),%rsi 2811 subq $256,%rbx 2812 movq $6,%rcx 2813 movq $4,%r8 2814 cmpq $192,%rbx 2815 jg L$seal_sse_main_loop 2816 movq %rbx,%rcx 2817 testq %rbx,%rbx 2818 je L$seal_sse_128_tail_hash 2819 movq $6,%rcx 2820 cmpq $128,%rbx 2821 ja L$seal_sse_tail_192 2822 cmpq $64,%rbx 2823 ja L$seal_sse_tail_128 2824 2825L$seal_sse_tail_64: 2826 movdqa L$chacha20_consts(%rip),%xmm0 2827 movdqa 0+48(%rbp),%xmm4 2828 movdqa 0+64(%rbp),%xmm8 2829 movdqa 0+96(%rbp),%xmm12 2830 paddd L$sse_inc(%rip),%xmm12 2831 movdqa %xmm12,0+96(%rbp) 2832 2833L$seal_sse_tail_64_rounds_and_x2hash: 2834 addq 0+0(%rdi),%r10 2835 adcq 8+0(%rdi),%r11 2836 adcq $1,%r12 2837 movq 0+0+0(%rbp),%rax 2838 movq %rax,%r15 2839 mulq %r10 2840 movq %rax,%r13 2841 movq %rdx,%r14 2842 movq 0+0+0(%rbp),%rax 2843 mulq %r11 2844 imulq %r12,%r15 2845 addq %rax,%r14 2846 adcq %rdx,%r15 2847 movq 8+0+0(%rbp),%rax 2848 movq %rax,%r9 2849 mulq %r10 2850 addq %rax,%r14 2851 adcq $0,%rdx 2852 movq %rdx,%r10 2853 movq 8+0+0(%rbp),%rax 2854 mulq %r11 2855 addq %rax,%r15 2856 adcq $0,%rdx 2857 imulq %r12,%r9 2858 addq %r10,%r15 2859 adcq %rdx,%r9 2860 movq %r13,%r10 2861 movq %r14,%r11 2862 movq %r15,%r12 2863 andq $3,%r12 2864 movq %r15,%r13 2865 andq $-4,%r13 2866 movq %r9,%r14 2867 shrdq $2,%r9,%r15 2868 shrq $2,%r9 2869 addq %r13,%r15 2870 adcq %r14,%r9 2871 addq %r15,%r10 2872 adcq %r9,%r11 2873 adcq $0,%r12 2874 2875 leaq 16(%rdi),%rdi 2876L$seal_sse_tail_64_rounds_and_x1hash: 2877 paddd %xmm4,%xmm0 2878 pxor %xmm0,%xmm12 2879 pshufb L$rol16(%rip),%xmm12 2880 paddd %xmm12,%xmm8 2881 pxor %xmm8,%xmm4 2882 movdqa %xmm4,%xmm3 2883 pslld $12,%xmm3 2884 psrld $20,%xmm4 2885 pxor %xmm3,%xmm4 2886 paddd %xmm4,%xmm0 2887 pxor %xmm0,%xmm12 2888 pshufb L$rol8(%rip),%xmm12 2889 paddd %xmm12,%xmm8 2890 pxor %xmm8,%xmm4 2891 movdqa %xmm4,%xmm3 2892 pslld $7,%xmm3 2893 psrld $25,%xmm4 2894 pxor %xmm3,%xmm4 2895.byte 102,15,58,15,228,4 2896.byte 102,69,15,58,15,192,8 2897.byte 102,69,15,58,15,228,12 2898 paddd %xmm4,%xmm0 2899 pxor %xmm0,%xmm12 2900 pshufb L$rol16(%rip),%xmm12 2901 paddd %xmm12,%xmm8 2902 pxor %xmm8,%xmm4 2903 movdqa %xmm4,%xmm3 2904 pslld $12,%xmm3 2905 psrld $20,%xmm4 2906 pxor %xmm3,%xmm4 2907 paddd %xmm4,%xmm0 2908 pxor %xmm0,%xmm12 2909 pshufb L$rol8(%rip),%xmm12 2910 paddd %xmm12,%xmm8 2911 pxor %xmm8,%xmm4 2912 movdqa %xmm4,%xmm3 2913 pslld $7,%xmm3 2914 psrld $25,%xmm4 2915 pxor %xmm3,%xmm4 2916.byte 102,15,58,15,228,12 2917.byte 102,69,15,58,15,192,8 2918.byte 102,69,15,58,15,228,4 2919 addq 0+0(%rdi),%r10 2920 adcq 8+0(%rdi),%r11 2921 adcq $1,%r12 2922 movq 0+0+0(%rbp),%rax 2923 movq %rax,%r15 2924 mulq %r10 2925 movq %rax,%r13 2926 movq %rdx,%r14 2927 movq 0+0+0(%rbp),%rax 2928 mulq %r11 2929 imulq %r12,%r15 2930 addq %rax,%r14 2931 adcq %rdx,%r15 2932 movq 8+0+0(%rbp),%rax 2933 movq %rax,%r9 2934 mulq %r10 2935 addq %rax,%r14 2936 adcq $0,%rdx 2937 movq %rdx,%r10 2938 movq 8+0+0(%rbp),%rax 2939 mulq %r11 2940 addq %rax,%r15 2941 adcq $0,%rdx 2942 imulq %r12,%r9 2943 addq %r10,%r15 2944 adcq %rdx,%r9 2945 movq %r13,%r10 2946 movq %r14,%r11 2947 movq %r15,%r12 2948 andq $3,%r12 2949 movq %r15,%r13 2950 andq $-4,%r13 2951 movq %r9,%r14 2952 shrdq $2,%r9,%r15 2953 shrq $2,%r9 2954 addq %r13,%r15 2955 adcq %r14,%r9 2956 addq %r15,%r10 2957 adcq %r9,%r11 2958 adcq $0,%r12 2959 2960 leaq 16(%rdi),%rdi 2961 decq %rcx 2962 jg L$seal_sse_tail_64_rounds_and_x2hash 2963 decq %r8 2964 jge L$seal_sse_tail_64_rounds_and_x1hash 2965 paddd L$chacha20_consts(%rip),%xmm0 2966 paddd 0+48(%rbp),%xmm4 2967 paddd 0+64(%rbp),%xmm8 2968 paddd 0+96(%rbp),%xmm12 2969 2970 jmp L$seal_sse_128_tail_xor 2971 2972L$seal_sse_tail_128: 2973 movdqa L$chacha20_consts(%rip),%xmm0 2974 movdqa 0+48(%rbp),%xmm4 2975 movdqa 0+64(%rbp),%xmm8 2976 movdqa %xmm0,%xmm1 2977 movdqa %xmm4,%xmm5 2978 movdqa %xmm8,%xmm9 2979 movdqa 0+96(%rbp),%xmm13 2980 paddd L$sse_inc(%rip),%xmm13 2981 movdqa %xmm13,%xmm12 2982 paddd L$sse_inc(%rip),%xmm12 2983 movdqa %xmm12,0+96(%rbp) 2984 movdqa %xmm13,0+112(%rbp) 2985 2986L$seal_sse_tail_128_rounds_and_x2hash: 2987 addq 0+0(%rdi),%r10 2988 adcq 8+0(%rdi),%r11 2989 adcq $1,%r12 2990 movq 0+0+0(%rbp),%rax 2991 movq %rax,%r15 2992 mulq %r10 2993 movq %rax,%r13 2994 movq %rdx,%r14 2995 movq 0+0+0(%rbp),%rax 2996 mulq %r11 2997 imulq %r12,%r15 2998 addq %rax,%r14 2999 adcq %rdx,%r15 3000 movq 8+0+0(%rbp),%rax 3001 movq %rax,%r9 3002 mulq %r10 3003 addq %rax,%r14 3004 adcq $0,%rdx 3005 movq %rdx,%r10 3006 movq 8+0+0(%rbp),%rax 3007 mulq %r11 3008 addq %rax,%r15 3009 adcq $0,%rdx 3010 imulq %r12,%r9 3011 addq %r10,%r15 3012 adcq %rdx,%r9 3013 movq %r13,%r10 3014 movq %r14,%r11 3015 movq %r15,%r12 3016 andq $3,%r12 3017 movq %r15,%r13 3018 andq $-4,%r13 3019 movq %r9,%r14 3020 shrdq $2,%r9,%r15 3021 shrq $2,%r9 3022 addq %r13,%r15 3023 adcq %r14,%r9 3024 addq %r15,%r10 3025 adcq %r9,%r11 3026 adcq $0,%r12 3027 3028 leaq 16(%rdi),%rdi 3029L$seal_sse_tail_128_rounds_and_x1hash: 3030 paddd %xmm4,%xmm0 3031 pxor %xmm0,%xmm12 3032 pshufb L$rol16(%rip),%xmm12 3033 paddd %xmm12,%xmm8 3034 pxor %xmm8,%xmm4 3035 movdqa %xmm4,%xmm3 3036 pslld $12,%xmm3 3037 psrld $20,%xmm4 3038 pxor %xmm3,%xmm4 3039 paddd %xmm4,%xmm0 3040 pxor %xmm0,%xmm12 3041 pshufb L$rol8(%rip),%xmm12 3042 paddd %xmm12,%xmm8 3043 pxor %xmm8,%xmm4 3044 movdqa %xmm4,%xmm3 3045 pslld $7,%xmm3 3046 psrld $25,%xmm4 3047 pxor %xmm3,%xmm4 3048.byte 102,15,58,15,228,4 3049.byte 102,69,15,58,15,192,8 3050.byte 102,69,15,58,15,228,12 3051 paddd %xmm5,%xmm1 3052 pxor %xmm1,%xmm13 3053 pshufb L$rol16(%rip),%xmm13 3054 paddd %xmm13,%xmm9 3055 pxor %xmm9,%xmm5 3056 movdqa %xmm5,%xmm3 3057 pslld $12,%xmm3 3058 psrld $20,%xmm5 3059 pxor %xmm3,%xmm5 3060 paddd %xmm5,%xmm1 3061 pxor %xmm1,%xmm13 3062 pshufb L$rol8(%rip),%xmm13 3063 paddd %xmm13,%xmm9 3064 pxor %xmm9,%xmm5 3065 movdqa %xmm5,%xmm3 3066 pslld $7,%xmm3 3067 psrld $25,%xmm5 3068 pxor %xmm3,%xmm5 3069.byte 102,15,58,15,237,4 3070.byte 102,69,15,58,15,201,8 3071.byte 102,69,15,58,15,237,12 3072 addq 0+0(%rdi),%r10 3073 adcq 8+0(%rdi),%r11 3074 adcq $1,%r12 3075 movq 0+0+0(%rbp),%rax 3076 movq %rax,%r15 3077 mulq %r10 3078 movq %rax,%r13 3079 movq %rdx,%r14 3080 movq 0+0+0(%rbp),%rax 3081 mulq %r11 3082 imulq %r12,%r15 3083 addq %rax,%r14 3084 adcq %rdx,%r15 3085 movq 8+0+0(%rbp),%rax 3086 movq %rax,%r9 3087 mulq %r10 3088 addq %rax,%r14 3089 adcq $0,%rdx 3090 movq %rdx,%r10 3091 movq 8+0+0(%rbp),%rax 3092 mulq %r11 3093 addq %rax,%r15 3094 adcq $0,%rdx 3095 imulq %r12,%r9 3096 addq %r10,%r15 3097 adcq %rdx,%r9 3098 movq %r13,%r10 3099 movq %r14,%r11 3100 movq %r15,%r12 3101 andq $3,%r12 3102 movq %r15,%r13 3103 andq $-4,%r13 3104 movq %r9,%r14 3105 shrdq $2,%r9,%r15 3106 shrq $2,%r9 3107 addq %r13,%r15 3108 adcq %r14,%r9 3109 addq %r15,%r10 3110 adcq %r9,%r11 3111 adcq $0,%r12 3112 paddd %xmm4,%xmm0 3113 pxor %xmm0,%xmm12 3114 pshufb L$rol16(%rip),%xmm12 3115 paddd %xmm12,%xmm8 3116 pxor %xmm8,%xmm4 3117 movdqa %xmm4,%xmm3 3118 pslld $12,%xmm3 3119 psrld $20,%xmm4 3120 pxor %xmm3,%xmm4 3121 paddd %xmm4,%xmm0 3122 pxor %xmm0,%xmm12 3123 pshufb L$rol8(%rip),%xmm12 3124 paddd %xmm12,%xmm8 3125 pxor %xmm8,%xmm4 3126 movdqa %xmm4,%xmm3 3127 pslld $7,%xmm3 3128 psrld $25,%xmm4 3129 pxor %xmm3,%xmm4 3130.byte 102,15,58,15,228,12 3131.byte 102,69,15,58,15,192,8 3132.byte 102,69,15,58,15,228,4 3133 paddd %xmm5,%xmm1 3134 pxor %xmm1,%xmm13 3135 pshufb L$rol16(%rip),%xmm13 3136 paddd %xmm13,%xmm9 3137 pxor %xmm9,%xmm5 3138 movdqa %xmm5,%xmm3 3139 pslld $12,%xmm3 3140 psrld $20,%xmm5 3141 pxor %xmm3,%xmm5 3142 paddd %xmm5,%xmm1 3143 pxor %xmm1,%xmm13 3144 pshufb L$rol8(%rip),%xmm13 3145 paddd %xmm13,%xmm9 3146 pxor %xmm9,%xmm5 3147 movdqa %xmm5,%xmm3 3148 pslld $7,%xmm3 3149 psrld $25,%xmm5 3150 pxor %xmm3,%xmm5 3151.byte 102,15,58,15,237,12 3152.byte 102,69,15,58,15,201,8 3153.byte 102,69,15,58,15,237,4 3154 3155 leaq 16(%rdi),%rdi 3156 decq %rcx 3157 jg L$seal_sse_tail_128_rounds_and_x2hash 3158 decq %r8 3159 jge L$seal_sse_tail_128_rounds_and_x1hash 3160 paddd L$chacha20_consts(%rip),%xmm1 3161 paddd 0+48(%rbp),%xmm5 3162 paddd 0+64(%rbp),%xmm9 3163 paddd 0+112(%rbp),%xmm13 3164 paddd L$chacha20_consts(%rip),%xmm0 3165 paddd 0+48(%rbp),%xmm4 3166 paddd 0+64(%rbp),%xmm8 3167 paddd 0+96(%rbp),%xmm12 3168 movdqu 0 + 0(%rsi),%xmm3 3169 movdqu 16 + 0(%rsi),%xmm7 3170 movdqu 32 + 0(%rsi),%xmm11 3171 movdqu 48 + 0(%rsi),%xmm15 3172 pxor %xmm3,%xmm1 3173 pxor %xmm7,%xmm5 3174 pxor %xmm11,%xmm9 3175 pxor %xmm13,%xmm15 3176 movdqu %xmm1,0 + 0(%rdi) 3177 movdqu %xmm5,16 + 0(%rdi) 3178 movdqu %xmm9,32 + 0(%rdi) 3179 movdqu %xmm15,48 + 0(%rdi) 3180 3181 movq $64,%rcx 3182 subq $64,%rbx 3183 leaq 64(%rsi),%rsi 3184 jmp L$seal_sse_128_tail_hash 3185 3186L$seal_sse_tail_192: 3187 movdqa L$chacha20_consts(%rip),%xmm0 3188 movdqa 0+48(%rbp),%xmm4 3189 movdqa 0+64(%rbp),%xmm8 3190 movdqa %xmm0,%xmm1 3191 movdqa %xmm4,%xmm5 3192 movdqa %xmm8,%xmm9 3193 movdqa %xmm0,%xmm2 3194 movdqa %xmm4,%xmm6 3195 movdqa %xmm8,%xmm10 3196 movdqa 0+96(%rbp),%xmm14 3197 paddd L$sse_inc(%rip),%xmm14 3198 movdqa %xmm14,%xmm13 3199 paddd L$sse_inc(%rip),%xmm13 3200 movdqa %xmm13,%xmm12 3201 paddd L$sse_inc(%rip),%xmm12 3202 movdqa %xmm12,0+96(%rbp) 3203 movdqa %xmm13,0+112(%rbp) 3204 movdqa %xmm14,0+128(%rbp) 3205 3206L$seal_sse_tail_192_rounds_and_x2hash: 3207 addq 0+0(%rdi),%r10 3208 adcq 8+0(%rdi),%r11 3209 adcq $1,%r12 3210 movq 0+0+0(%rbp),%rax 3211 movq %rax,%r15 3212 mulq %r10 3213 movq %rax,%r13 3214 movq %rdx,%r14 3215 movq 0+0+0(%rbp),%rax 3216 mulq %r11 3217 imulq %r12,%r15 3218 addq %rax,%r14 3219 adcq %rdx,%r15 3220 movq 8+0+0(%rbp),%rax 3221 movq %rax,%r9 3222 mulq %r10 3223 addq %rax,%r14 3224 adcq $0,%rdx 3225 movq %rdx,%r10 3226 movq 8+0+0(%rbp),%rax 3227 mulq %r11 3228 addq %rax,%r15 3229 adcq $0,%rdx 3230 imulq %r12,%r9 3231 addq %r10,%r15 3232 adcq %rdx,%r9 3233 movq %r13,%r10 3234 movq %r14,%r11 3235 movq %r15,%r12 3236 andq $3,%r12 3237 movq %r15,%r13 3238 andq $-4,%r13 3239 movq %r9,%r14 3240 shrdq $2,%r9,%r15 3241 shrq $2,%r9 3242 addq %r13,%r15 3243 adcq %r14,%r9 3244 addq %r15,%r10 3245 adcq %r9,%r11 3246 adcq $0,%r12 3247 3248 leaq 16(%rdi),%rdi 3249L$seal_sse_tail_192_rounds_and_x1hash: 3250 paddd %xmm4,%xmm0 3251 pxor %xmm0,%xmm12 3252 pshufb L$rol16(%rip),%xmm12 3253 paddd %xmm12,%xmm8 3254 pxor %xmm8,%xmm4 3255 movdqa %xmm4,%xmm3 3256 pslld $12,%xmm3 3257 psrld $20,%xmm4 3258 pxor %xmm3,%xmm4 3259 paddd %xmm4,%xmm0 3260 pxor %xmm0,%xmm12 3261 pshufb L$rol8(%rip),%xmm12 3262 paddd %xmm12,%xmm8 3263 pxor %xmm8,%xmm4 3264 movdqa %xmm4,%xmm3 3265 pslld $7,%xmm3 3266 psrld $25,%xmm4 3267 pxor %xmm3,%xmm4 3268.byte 102,15,58,15,228,4 3269.byte 102,69,15,58,15,192,8 3270.byte 102,69,15,58,15,228,12 3271 paddd %xmm5,%xmm1 3272 pxor %xmm1,%xmm13 3273 pshufb L$rol16(%rip),%xmm13 3274 paddd %xmm13,%xmm9 3275 pxor %xmm9,%xmm5 3276 movdqa %xmm5,%xmm3 3277 pslld $12,%xmm3 3278 psrld $20,%xmm5 3279 pxor %xmm3,%xmm5 3280 paddd %xmm5,%xmm1 3281 pxor %xmm1,%xmm13 3282 pshufb L$rol8(%rip),%xmm13 3283 paddd %xmm13,%xmm9 3284 pxor %xmm9,%xmm5 3285 movdqa %xmm5,%xmm3 3286 pslld $7,%xmm3 3287 psrld $25,%xmm5 3288 pxor %xmm3,%xmm5 3289.byte 102,15,58,15,237,4 3290.byte 102,69,15,58,15,201,8 3291.byte 102,69,15,58,15,237,12 3292 paddd %xmm6,%xmm2 3293 pxor %xmm2,%xmm14 3294 pshufb L$rol16(%rip),%xmm14 3295 paddd %xmm14,%xmm10 3296 pxor %xmm10,%xmm6 3297 movdqa %xmm6,%xmm3 3298 pslld $12,%xmm3 3299 psrld $20,%xmm6 3300 pxor %xmm3,%xmm6 3301 paddd %xmm6,%xmm2 3302 pxor %xmm2,%xmm14 3303 pshufb L$rol8(%rip),%xmm14 3304 paddd %xmm14,%xmm10 3305 pxor %xmm10,%xmm6 3306 movdqa %xmm6,%xmm3 3307 pslld $7,%xmm3 3308 psrld $25,%xmm6 3309 pxor %xmm3,%xmm6 3310.byte 102,15,58,15,246,4 3311.byte 102,69,15,58,15,210,8 3312.byte 102,69,15,58,15,246,12 3313 addq 0+0(%rdi),%r10 3314 adcq 8+0(%rdi),%r11 3315 adcq $1,%r12 3316 movq 0+0+0(%rbp),%rax 3317 movq %rax,%r15 3318 mulq %r10 3319 movq %rax,%r13 3320 movq %rdx,%r14 3321 movq 0+0+0(%rbp),%rax 3322 mulq %r11 3323 imulq %r12,%r15 3324 addq %rax,%r14 3325 adcq %rdx,%r15 3326 movq 8+0+0(%rbp),%rax 3327 movq %rax,%r9 3328 mulq %r10 3329 addq %rax,%r14 3330 adcq $0,%rdx 3331 movq %rdx,%r10 3332 movq 8+0+0(%rbp),%rax 3333 mulq %r11 3334 addq %rax,%r15 3335 adcq $0,%rdx 3336 imulq %r12,%r9 3337 addq %r10,%r15 3338 adcq %rdx,%r9 3339 movq %r13,%r10 3340 movq %r14,%r11 3341 movq %r15,%r12 3342 andq $3,%r12 3343 movq %r15,%r13 3344 andq $-4,%r13 3345 movq %r9,%r14 3346 shrdq $2,%r9,%r15 3347 shrq $2,%r9 3348 addq %r13,%r15 3349 adcq %r14,%r9 3350 addq %r15,%r10 3351 adcq %r9,%r11 3352 adcq $0,%r12 3353 paddd %xmm4,%xmm0 3354 pxor %xmm0,%xmm12 3355 pshufb L$rol16(%rip),%xmm12 3356 paddd %xmm12,%xmm8 3357 pxor %xmm8,%xmm4 3358 movdqa %xmm4,%xmm3 3359 pslld $12,%xmm3 3360 psrld $20,%xmm4 3361 pxor %xmm3,%xmm4 3362 paddd %xmm4,%xmm0 3363 pxor %xmm0,%xmm12 3364 pshufb L$rol8(%rip),%xmm12 3365 paddd %xmm12,%xmm8 3366 pxor %xmm8,%xmm4 3367 movdqa %xmm4,%xmm3 3368 pslld $7,%xmm3 3369 psrld $25,%xmm4 3370 pxor %xmm3,%xmm4 3371.byte 102,15,58,15,228,12 3372.byte 102,69,15,58,15,192,8 3373.byte 102,69,15,58,15,228,4 3374 paddd %xmm5,%xmm1 3375 pxor %xmm1,%xmm13 3376 pshufb L$rol16(%rip),%xmm13 3377 paddd %xmm13,%xmm9 3378 pxor %xmm9,%xmm5 3379 movdqa %xmm5,%xmm3 3380 pslld $12,%xmm3 3381 psrld $20,%xmm5 3382 pxor %xmm3,%xmm5 3383 paddd %xmm5,%xmm1 3384 pxor %xmm1,%xmm13 3385 pshufb L$rol8(%rip),%xmm13 3386 paddd %xmm13,%xmm9 3387 pxor %xmm9,%xmm5 3388 movdqa %xmm5,%xmm3 3389 pslld $7,%xmm3 3390 psrld $25,%xmm5 3391 pxor %xmm3,%xmm5 3392.byte 102,15,58,15,237,12 3393.byte 102,69,15,58,15,201,8 3394.byte 102,69,15,58,15,237,4 3395 paddd %xmm6,%xmm2 3396 pxor %xmm2,%xmm14 3397 pshufb L$rol16(%rip),%xmm14 3398 paddd %xmm14,%xmm10 3399 pxor %xmm10,%xmm6 3400 movdqa %xmm6,%xmm3 3401 pslld $12,%xmm3 3402 psrld $20,%xmm6 3403 pxor %xmm3,%xmm6 3404 paddd %xmm6,%xmm2 3405 pxor %xmm2,%xmm14 3406 pshufb L$rol8(%rip),%xmm14 3407 paddd %xmm14,%xmm10 3408 pxor %xmm10,%xmm6 3409 movdqa %xmm6,%xmm3 3410 pslld $7,%xmm3 3411 psrld $25,%xmm6 3412 pxor %xmm3,%xmm6 3413.byte 102,15,58,15,246,12 3414.byte 102,69,15,58,15,210,8 3415.byte 102,69,15,58,15,246,4 3416 3417 leaq 16(%rdi),%rdi 3418 decq %rcx 3419 jg L$seal_sse_tail_192_rounds_and_x2hash 3420 decq %r8 3421 jge L$seal_sse_tail_192_rounds_and_x1hash 3422 paddd L$chacha20_consts(%rip),%xmm2 3423 paddd 0+48(%rbp),%xmm6 3424 paddd 0+64(%rbp),%xmm10 3425 paddd 0+128(%rbp),%xmm14 3426 paddd L$chacha20_consts(%rip),%xmm1 3427 paddd 0+48(%rbp),%xmm5 3428 paddd 0+64(%rbp),%xmm9 3429 paddd 0+112(%rbp),%xmm13 3430 paddd L$chacha20_consts(%rip),%xmm0 3431 paddd 0+48(%rbp),%xmm4 3432 paddd 0+64(%rbp),%xmm8 3433 paddd 0+96(%rbp),%xmm12 3434 movdqu 0 + 0(%rsi),%xmm3 3435 movdqu 16 + 0(%rsi),%xmm7 3436 movdqu 32 + 0(%rsi),%xmm11 3437 movdqu 48 + 0(%rsi),%xmm15 3438 pxor %xmm3,%xmm2 3439 pxor %xmm7,%xmm6 3440 pxor %xmm11,%xmm10 3441 pxor %xmm14,%xmm15 3442 movdqu %xmm2,0 + 0(%rdi) 3443 movdqu %xmm6,16 + 0(%rdi) 3444 movdqu %xmm10,32 + 0(%rdi) 3445 movdqu %xmm15,48 + 0(%rdi) 3446 movdqu 0 + 64(%rsi),%xmm3 3447 movdqu 16 + 64(%rsi),%xmm7 3448 movdqu 32 + 64(%rsi),%xmm11 3449 movdqu 48 + 64(%rsi),%xmm15 3450 pxor %xmm3,%xmm1 3451 pxor %xmm7,%xmm5 3452 pxor %xmm11,%xmm9 3453 pxor %xmm13,%xmm15 3454 movdqu %xmm1,0 + 64(%rdi) 3455 movdqu %xmm5,16 + 64(%rdi) 3456 movdqu %xmm9,32 + 64(%rdi) 3457 movdqu %xmm15,48 + 64(%rdi) 3458 3459 movq $128,%rcx 3460 subq $128,%rbx 3461 leaq 128(%rsi),%rsi 3462 3463L$seal_sse_128_tail_hash: 3464 cmpq $16,%rcx 3465 jb L$seal_sse_128_tail_xor 3466 addq 0+0(%rdi),%r10 3467 adcq 8+0(%rdi),%r11 3468 adcq $1,%r12 3469 movq 0+0+0(%rbp),%rax 3470 movq %rax,%r15 3471 mulq %r10 3472 movq %rax,%r13 3473 movq %rdx,%r14 3474 movq 0+0+0(%rbp),%rax 3475 mulq %r11 3476 imulq %r12,%r15 3477 addq %rax,%r14 3478 adcq %rdx,%r15 3479 movq 8+0+0(%rbp),%rax 3480 movq %rax,%r9 3481 mulq %r10 3482 addq %rax,%r14 3483 adcq $0,%rdx 3484 movq %rdx,%r10 3485 movq 8+0+0(%rbp),%rax 3486 mulq %r11 3487 addq %rax,%r15 3488 adcq $0,%rdx 3489 imulq %r12,%r9 3490 addq %r10,%r15 3491 adcq %rdx,%r9 3492 movq %r13,%r10 3493 movq %r14,%r11 3494 movq %r15,%r12 3495 andq $3,%r12 3496 movq %r15,%r13 3497 andq $-4,%r13 3498 movq %r9,%r14 3499 shrdq $2,%r9,%r15 3500 shrq $2,%r9 3501 addq %r13,%r15 3502 adcq %r14,%r9 3503 addq %r15,%r10 3504 adcq %r9,%r11 3505 adcq $0,%r12 3506 3507 subq $16,%rcx 3508 leaq 16(%rdi),%rdi 3509 jmp L$seal_sse_128_tail_hash 3510 3511L$seal_sse_128_tail_xor: 3512 cmpq $16,%rbx 3513 jb L$seal_sse_tail_16 3514 subq $16,%rbx 3515 3516 movdqu 0(%rsi),%xmm3 3517 pxor %xmm3,%xmm0 3518 movdqu %xmm0,0(%rdi) 3519 3520 addq 0(%rdi),%r10 3521 adcq 8(%rdi),%r11 3522 adcq $1,%r12 3523 leaq 16(%rsi),%rsi 3524 leaq 16(%rdi),%rdi 3525 movq 0+0+0(%rbp),%rax 3526 movq %rax,%r15 3527 mulq %r10 3528 movq %rax,%r13 3529 movq %rdx,%r14 3530 movq 0+0+0(%rbp),%rax 3531 mulq %r11 3532 imulq %r12,%r15 3533 addq %rax,%r14 3534 adcq %rdx,%r15 3535 movq 8+0+0(%rbp),%rax 3536 movq %rax,%r9 3537 mulq %r10 3538 addq %rax,%r14 3539 adcq $0,%rdx 3540 movq %rdx,%r10 3541 movq 8+0+0(%rbp),%rax 3542 mulq %r11 3543 addq %rax,%r15 3544 adcq $0,%rdx 3545 imulq %r12,%r9 3546 addq %r10,%r15 3547 adcq %rdx,%r9 3548 movq %r13,%r10 3549 movq %r14,%r11 3550 movq %r15,%r12 3551 andq $3,%r12 3552 movq %r15,%r13 3553 andq $-4,%r13 3554 movq %r9,%r14 3555 shrdq $2,%r9,%r15 3556 shrq $2,%r9 3557 addq %r13,%r15 3558 adcq %r14,%r9 3559 addq %r15,%r10 3560 adcq %r9,%r11 3561 adcq $0,%r12 3562 3563 3564 movdqa %xmm4,%xmm0 3565 movdqa %xmm8,%xmm4 3566 movdqa %xmm12,%xmm8 3567 movdqa %xmm1,%xmm12 3568 movdqa %xmm5,%xmm1 3569 movdqa %xmm9,%xmm5 3570 movdqa %xmm13,%xmm9 3571 jmp L$seal_sse_128_tail_xor 3572 3573L$seal_sse_tail_16: 3574 testq %rbx,%rbx 3575 jz L$process_blocks_of_extra_in 3576 3577 movq %rbx,%r8 3578 movq %rbx,%rcx 3579 leaq -1(%rsi,%rbx,1),%rsi 3580 pxor %xmm15,%xmm15 3581L$seal_sse_tail_16_compose: 3582 pslldq $1,%xmm15 3583 pinsrb $0,(%rsi),%xmm15 3584 leaq -1(%rsi),%rsi 3585 decq %rcx 3586 jne L$seal_sse_tail_16_compose 3587 3588 3589 pxor %xmm0,%xmm15 3590 3591 3592 movq %rbx,%rcx 3593 movdqu %xmm15,%xmm0 3594L$seal_sse_tail_16_extract: 3595 pextrb $0,%xmm0,(%rdi) 3596 psrldq $1,%xmm0 3597 addq $1,%rdi 3598 subq $1,%rcx 3599 jnz L$seal_sse_tail_16_extract 3600 3601 3602 3603 3604 3605 3606 3607 3608 movq 288 + 0 + 32(%rsp),%r9 3609 movq 56(%r9),%r14 3610 movq 48(%r9),%r13 3611 testq %r14,%r14 3612 jz L$process_partial_block 3613 3614 movq $16,%r15 3615 subq %rbx,%r15 3616 cmpq %r15,%r14 3617 3618 jge L$load_extra_in 3619 movq %r14,%r15 3620 3621L$load_extra_in: 3622 3623 3624 leaq -1(%r13,%r15,1),%rsi 3625 3626 3627 addq %r15,%r13 3628 subq %r15,%r14 3629 movq %r13,48(%r9) 3630 movq %r14,56(%r9) 3631 3632 3633 3634 addq %r15,%r8 3635 3636 3637 pxor %xmm11,%xmm11 3638L$load_extra_load_loop: 3639 pslldq $1,%xmm11 3640 pinsrb $0,(%rsi),%xmm11 3641 leaq -1(%rsi),%rsi 3642 subq $1,%r15 3643 jnz L$load_extra_load_loop 3644 3645 3646 3647 3648 movq %rbx,%r15 3649 3650L$load_extra_shift_loop: 3651 pslldq $1,%xmm11 3652 subq $1,%r15 3653 jnz L$load_extra_shift_loop 3654 3655 3656 3657 3658 leaq L$and_masks(%rip),%r15 3659 shlq $4,%rbx 3660 pand -16(%r15,%rbx,1),%xmm15 3661 3662 3663 por %xmm11,%xmm15 3664 3665 3666 3667.byte 102,77,15,126,253 3668 pextrq $1,%xmm15,%r14 3669 addq %r13,%r10 3670 adcq %r14,%r11 3671 adcq $1,%r12 3672 movq 0+0+0(%rbp),%rax 3673 movq %rax,%r15 3674 mulq %r10 3675 movq %rax,%r13 3676 movq %rdx,%r14 3677 movq 0+0+0(%rbp),%rax 3678 mulq %r11 3679 imulq %r12,%r15 3680 addq %rax,%r14 3681 adcq %rdx,%r15 3682 movq 8+0+0(%rbp),%rax 3683 movq %rax,%r9 3684 mulq %r10 3685 addq %rax,%r14 3686 adcq $0,%rdx 3687 movq %rdx,%r10 3688 movq 8+0+0(%rbp),%rax 3689 mulq %r11 3690 addq %rax,%r15 3691 adcq $0,%rdx 3692 imulq %r12,%r9 3693 addq %r10,%r15 3694 adcq %rdx,%r9 3695 movq %r13,%r10 3696 movq %r14,%r11 3697 movq %r15,%r12 3698 andq $3,%r12 3699 movq %r15,%r13 3700 andq $-4,%r13 3701 movq %r9,%r14 3702 shrdq $2,%r9,%r15 3703 shrq $2,%r9 3704 addq %r13,%r15 3705 adcq %r14,%r9 3706 addq %r15,%r10 3707 adcq %r9,%r11 3708 adcq $0,%r12 3709 3710 3711L$process_blocks_of_extra_in: 3712 3713 movq 288+32+0 (%rsp),%r9 3714 movq 48(%r9),%rsi 3715 movq 56(%r9),%r8 3716 movq %r8,%rcx 3717 shrq $4,%r8 3718 3719L$process_extra_hash_loop: 3720 jz process_extra_in_trailer 3721 addq 0+0(%rsi),%r10 3722 adcq 8+0(%rsi),%r11 3723 adcq $1,%r12 3724 movq 0+0+0(%rbp),%rax 3725 movq %rax,%r15 3726 mulq %r10 3727 movq %rax,%r13 3728 movq %rdx,%r14 3729 movq 0+0+0(%rbp),%rax 3730 mulq %r11 3731 imulq %r12,%r15 3732 addq %rax,%r14 3733 adcq %rdx,%r15 3734 movq 8+0+0(%rbp),%rax 3735 movq %rax,%r9 3736 mulq %r10 3737 addq %rax,%r14 3738 adcq $0,%rdx 3739 movq %rdx,%r10 3740 movq 8+0+0(%rbp),%rax 3741 mulq %r11 3742 addq %rax,%r15 3743 adcq $0,%rdx 3744 imulq %r12,%r9 3745 addq %r10,%r15 3746 adcq %rdx,%r9 3747 movq %r13,%r10 3748 movq %r14,%r11 3749 movq %r15,%r12 3750 andq $3,%r12 3751 movq %r15,%r13 3752 andq $-4,%r13 3753 movq %r9,%r14 3754 shrdq $2,%r9,%r15 3755 shrq $2,%r9 3756 addq %r13,%r15 3757 adcq %r14,%r9 3758 addq %r15,%r10 3759 adcq %r9,%r11 3760 adcq $0,%r12 3761 3762 leaq 16(%rsi),%rsi 3763 subq $1,%r8 3764 jmp L$process_extra_hash_loop 3765process_extra_in_trailer: 3766 andq $15,%rcx 3767 movq %rcx,%rbx 3768 jz L$do_length_block 3769 leaq -1(%rsi,%rcx,1),%rsi 3770 3771L$process_extra_in_trailer_load: 3772 pslldq $1,%xmm15 3773 pinsrb $0,(%rsi),%xmm15 3774 leaq -1(%rsi),%rsi 3775 subq $1,%rcx 3776 jnz L$process_extra_in_trailer_load 3777 3778L$process_partial_block: 3779 3780 leaq L$and_masks(%rip),%r15 3781 shlq $4,%rbx 3782 pand -16(%r15,%rbx,1),%xmm15 3783.byte 102,77,15,126,253 3784 pextrq $1,%xmm15,%r14 3785 addq %r13,%r10 3786 adcq %r14,%r11 3787 adcq $1,%r12 3788 movq 0+0+0(%rbp),%rax 3789 movq %rax,%r15 3790 mulq %r10 3791 movq %rax,%r13 3792 movq %rdx,%r14 3793 movq 0+0+0(%rbp),%rax 3794 mulq %r11 3795 imulq %r12,%r15 3796 addq %rax,%r14 3797 adcq %rdx,%r15 3798 movq 8+0+0(%rbp),%rax 3799 movq %rax,%r9 3800 mulq %r10 3801 addq %rax,%r14 3802 adcq $0,%rdx 3803 movq %rdx,%r10 3804 movq 8+0+0(%rbp),%rax 3805 mulq %r11 3806 addq %rax,%r15 3807 adcq $0,%rdx 3808 imulq %r12,%r9 3809 addq %r10,%r15 3810 adcq %rdx,%r9 3811 movq %r13,%r10 3812 movq %r14,%r11 3813 movq %r15,%r12 3814 andq $3,%r12 3815 movq %r15,%r13 3816 andq $-4,%r13 3817 movq %r9,%r14 3818 shrdq $2,%r9,%r15 3819 shrq $2,%r9 3820 addq %r13,%r15 3821 adcq %r14,%r9 3822 addq %r15,%r10 3823 adcq %r9,%r11 3824 adcq $0,%r12 3825 3826 3827L$do_length_block: 3828 addq 0+0+32(%rbp),%r10 3829 adcq 8+0+32(%rbp),%r11 3830 adcq $1,%r12 3831 movq 0+0+0(%rbp),%rax 3832 movq %rax,%r15 3833 mulq %r10 3834 movq %rax,%r13 3835 movq %rdx,%r14 3836 movq 0+0+0(%rbp),%rax 3837 mulq %r11 3838 imulq %r12,%r15 3839 addq %rax,%r14 3840 adcq %rdx,%r15 3841 movq 8+0+0(%rbp),%rax 3842 movq %rax,%r9 3843 mulq %r10 3844 addq %rax,%r14 3845 adcq $0,%rdx 3846 movq %rdx,%r10 3847 movq 8+0+0(%rbp),%rax 3848 mulq %r11 3849 addq %rax,%r15 3850 adcq $0,%rdx 3851 imulq %r12,%r9 3852 addq %r10,%r15 3853 adcq %rdx,%r9 3854 movq %r13,%r10 3855 movq %r14,%r11 3856 movq %r15,%r12 3857 andq $3,%r12 3858 movq %r15,%r13 3859 andq $-4,%r13 3860 movq %r9,%r14 3861 shrdq $2,%r9,%r15 3862 shrq $2,%r9 3863 addq %r13,%r15 3864 adcq %r14,%r9 3865 addq %r15,%r10 3866 adcq %r9,%r11 3867 adcq $0,%r12 3868 3869 3870 movq %r10,%r13 3871 movq %r11,%r14 3872 movq %r12,%r15 3873 subq $-5,%r10 3874 sbbq $-1,%r11 3875 sbbq $3,%r12 3876 cmovcq %r13,%r10 3877 cmovcq %r14,%r11 3878 cmovcq %r15,%r12 3879 3880 addq 0+0+16(%rbp),%r10 3881 adcq 8+0+16(%rbp),%r11 3882 3883 3884 addq $288 + 0 + 32,%rsp 3885 3886 3887 popq %r9 3888 3889 movq %r10,(%r9) 3890 movq %r11,8(%r9) 3891 popq %r15 3892 3893 popq %r14 3894 3895 popq %r13 3896 3897 popq %r12 3898 3899 popq %rbx 3900 3901 popq %rbp 3902 3903 .byte 0xf3,0xc3 3904 3905L$seal_sse_128: 3906 3907 movdqu L$chacha20_consts(%rip),%xmm0 3908 movdqa %xmm0,%xmm1 3909 movdqa %xmm0,%xmm2 3910 movdqu 0(%r9),%xmm4 3911 movdqa %xmm4,%xmm5 3912 movdqa %xmm4,%xmm6 3913 movdqu 16(%r9),%xmm8 3914 movdqa %xmm8,%xmm9 3915 movdqa %xmm8,%xmm10 3916 movdqu 32(%r9),%xmm14 3917 movdqa %xmm14,%xmm12 3918 paddd L$sse_inc(%rip),%xmm12 3919 movdqa %xmm12,%xmm13 3920 paddd L$sse_inc(%rip),%xmm13 3921 movdqa %xmm4,%xmm7 3922 movdqa %xmm8,%xmm11 3923 movdqa %xmm12,%xmm15 3924 movq $10,%r10 3925 3926L$seal_sse_128_rounds: 3927 paddd %xmm4,%xmm0 3928 pxor %xmm0,%xmm12 3929 pshufb L$rol16(%rip),%xmm12 3930 paddd %xmm12,%xmm8 3931 pxor %xmm8,%xmm4 3932 movdqa %xmm4,%xmm3 3933 pslld $12,%xmm3 3934 psrld $20,%xmm4 3935 pxor %xmm3,%xmm4 3936 paddd %xmm4,%xmm0 3937 pxor %xmm0,%xmm12 3938 pshufb L$rol8(%rip),%xmm12 3939 paddd %xmm12,%xmm8 3940 pxor %xmm8,%xmm4 3941 movdqa %xmm4,%xmm3 3942 pslld $7,%xmm3 3943 psrld $25,%xmm4 3944 pxor %xmm3,%xmm4 3945.byte 102,15,58,15,228,4 3946.byte 102,69,15,58,15,192,8 3947.byte 102,69,15,58,15,228,12 3948 paddd %xmm5,%xmm1 3949 pxor %xmm1,%xmm13 3950 pshufb L$rol16(%rip),%xmm13 3951 paddd %xmm13,%xmm9 3952 pxor %xmm9,%xmm5 3953 movdqa %xmm5,%xmm3 3954 pslld $12,%xmm3 3955 psrld $20,%xmm5 3956 pxor %xmm3,%xmm5 3957 paddd %xmm5,%xmm1 3958 pxor %xmm1,%xmm13 3959 pshufb L$rol8(%rip),%xmm13 3960 paddd %xmm13,%xmm9 3961 pxor %xmm9,%xmm5 3962 movdqa %xmm5,%xmm3 3963 pslld $7,%xmm3 3964 psrld $25,%xmm5 3965 pxor %xmm3,%xmm5 3966.byte 102,15,58,15,237,4 3967.byte 102,69,15,58,15,201,8 3968.byte 102,69,15,58,15,237,12 3969 paddd %xmm6,%xmm2 3970 pxor %xmm2,%xmm14 3971 pshufb L$rol16(%rip),%xmm14 3972 paddd %xmm14,%xmm10 3973 pxor %xmm10,%xmm6 3974 movdqa %xmm6,%xmm3 3975 pslld $12,%xmm3 3976 psrld $20,%xmm6 3977 pxor %xmm3,%xmm6 3978 paddd %xmm6,%xmm2 3979 pxor %xmm2,%xmm14 3980 pshufb L$rol8(%rip),%xmm14 3981 paddd %xmm14,%xmm10 3982 pxor %xmm10,%xmm6 3983 movdqa %xmm6,%xmm3 3984 pslld $7,%xmm3 3985 psrld $25,%xmm6 3986 pxor %xmm3,%xmm6 3987.byte 102,15,58,15,246,4 3988.byte 102,69,15,58,15,210,8 3989.byte 102,69,15,58,15,246,12 3990 paddd %xmm4,%xmm0 3991 pxor %xmm0,%xmm12 3992 pshufb L$rol16(%rip),%xmm12 3993 paddd %xmm12,%xmm8 3994 pxor %xmm8,%xmm4 3995 movdqa %xmm4,%xmm3 3996 pslld $12,%xmm3 3997 psrld $20,%xmm4 3998 pxor %xmm3,%xmm4 3999 paddd %xmm4,%xmm0 4000 pxor %xmm0,%xmm12 4001 pshufb L$rol8(%rip),%xmm12 4002 paddd %xmm12,%xmm8 4003 pxor %xmm8,%xmm4 4004 movdqa %xmm4,%xmm3 4005 pslld $7,%xmm3 4006 psrld $25,%xmm4 4007 pxor %xmm3,%xmm4 4008.byte 102,15,58,15,228,12 4009.byte 102,69,15,58,15,192,8 4010.byte 102,69,15,58,15,228,4 4011 paddd %xmm5,%xmm1 4012 pxor %xmm1,%xmm13 4013 pshufb L$rol16(%rip),%xmm13 4014 paddd %xmm13,%xmm9 4015 pxor %xmm9,%xmm5 4016 movdqa %xmm5,%xmm3 4017 pslld $12,%xmm3 4018 psrld $20,%xmm5 4019 pxor %xmm3,%xmm5 4020 paddd %xmm5,%xmm1 4021 pxor %xmm1,%xmm13 4022 pshufb L$rol8(%rip),%xmm13 4023 paddd %xmm13,%xmm9 4024 pxor %xmm9,%xmm5 4025 movdqa %xmm5,%xmm3 4026 pslld $7,%xmm3 4027 psrld $25,%xmm5 4028 pxor %xmm3,%xmm5 4029.byte 102,15,58,15,237,12 4030.byte 102,69,15,58,15,201,8 4031.byte 102,69,15,58,15,237,4 4032 paddd %xmm6,%xmm2 4033 pxor %xmm2,%xmm14 4034 pshufb L$rol16(%rip),%xmm14 4035 paddd %xmm14,%xmm10 4036 pxor %xmm10,%xmm6 4037 movdqa %xmm6,%xmm3 4038 pslld $12,%xmm3 4039 psrld $20,%xmm6 4040 pxor %xmm3,%xmm6 4041 paddd %xmm6,%xmm2 4042 pxor %xmm2,%xmm14 4043 pshufb L$rol8(%rip),%xmm14 4044 paddd %xmm14,%xmm10 4045 pxor %xmm10,%xmm6 4046 movdqa %xmm6,%xmm3 4047 pslld $7,%xmm3 4048 psrld $25,%xmm6 4049 pxor %xmm3,%xmm6 4050.byte 102,15,58,15,246,12 4051.byte 102,69,15,58,15,210,8 4052.byte 102,69,15,58,15,246,4 4053 4054 decq %r10 4055 jnz L$seal_sse_128_rounds 4056 paddd L$chacha20_consts(%rip),%xmm0 4057 paddd L$chacha20_consts(%rip),%xmm1 4058 paddd L$chacha20_consts(%rip),%xmm2 4059 paddd %xmm7,%xmm4 4060 paddd %xmm7,%xmm5 4061 paddd %xmm7,%xmm6 4062 paddd %xmm11,%xmm8 4063 paddd %xmm11,%xmm9 4064 paddd %xmm15,%xmm12 4065 paddd L$sse_inc(%rip),%xmm15 4066 paddd %xmm15,%xmm13 4067 4068 pand L$clamp(%rip),%xmm2 4069 movdqa %xmm2,0+0(%rbp) 4070 movdqa %xmm6,0+16(%rbp) 4071 4072 movq %r8,%r8 4073 call poly_hash_ad_internal 4074 jmp L$seal_sse_128_tail_xor 4075 4076 4077 4078 4079 4080.p2align 6 4081chacha20_poly1305_open_avx2: 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 vzeroupper 4095 vmovdqa L$chacha20_consts(%rip),%ymm0 4096 vbroadcasti128 0(%r9),%ymm4 4097 vbroadcasti128 16(%r9),%ymm8 4098 vbroadcasti128 32(%r9),%ymm12 4099 vpaddd L$avx2_init(%rip),%ymm12,%ymm12 4100 cmpq $192,%rbx 4101 jbe L$open_avx2_192 4102 cmpq $320,%rbx 4103 jbe L$open_avx2_320 4104 4105 vmovdqa %ymm4,0+64(%rbp) 4106 vmovdqa %ymm8,0+96(%rbp) 4107 vmovdqa %ymm12,0+160(%rbp) 4108 movq $10,%r10 4109L$open_avx2_init_rounds: 4110 vpaddd %ymm4,%ymm0,%ymm0 4111 vpxor %ymm0,%ymm12,%ymm12 4112 vpshufb L$rol16(%rip),%ymm12,%ymm12 4113 vpaddd %ymm12,%ymm8,%ymm8 4114 vpxor %ymm8,%ymm4,%ymm4 4115 vpsrld $20,%ymm4,%ymm3 4116 vpslld $12,%ymm4,%ymm4 4117 vpxor %ymm3,%ymm4,%ymm4 4118 vpaddd %ymm4,%ymm0,%ymm0 4119 vpxor %ymm0,%ymm12,%ymm12 4120 vpshufb L$rol8(%rip),%ymm12,%ymm12 4121 vpaddd %ymm12,%ymm8,%ymm8 4122 vpxor %ymm8,%ymm4,%ymm4 4123 vpslld $7,%ymm4,%ymm3 4124 vpsrld $25,%ymm4,%ymm4 4125 vpxor %ymm3,%ymm4,%ymm4 4126 vpalignr $12,%ymm12,%ymm12,%ymm12 4127 vpalignr $8,%ymm8,%ymm8,%ymm8 4128 vpalignr $4,%ymm4,%ymm4,%ymm4 4129 vpaddd %ymm4,%ymm0,%ymm0 4130 vpxor %ymm0,%ymm12,%ymm12 4131 vpshufb L$rol16(%rip),%ymm12,%ymm12 4132 vpaddd %ymm12,%ymm8,%ymm8 4133 vpxor %ymm8,%ymm4,%ymm4 4134 vpsrld $20,%ymm4,%ymm3 4135 vpslld $12,%ymm4,%ymm4 4136 vpxor %ymm3,%ymm4,%ymm4 4137 vpaddd %ymm4,%ymm0,%ymm0 4138 vpxor %ymm0,%ymm12,%ymm12 4139 vpshufb L$rol8(%rip),%ymm12,%ymm12 4140 vpaddd %ymm12,%ymm8,%ymm8 4141 vpxor %ymm8,%ymm4,%ymm4 4142 vpslld $7,%ymm4,%ymm3 4143 vpsrld $25,%ymm4,%ymm4 4144 vpxor %ymm3,%ymm4,%ymm4 4145 vpalignr $4,%ymm12,%ymm12,%ymm12 4146 vpalignr $8,%ymm8,%ymm8,%ymm8 4147 vpalignr $12,%ymm4,%ymm4,%ymm4 4148 4149 decq %r10 4150 jne L$open_avx2_init_rounds 4151 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 4152 vpaddd 0+64(%rbp),%ymm4,%ymm4 4153 vpaddd 0+96(%rbp),%ymm8,%ymm8 4154 vpaddd 0+160(%rbp),%ymm12,%ymm12 4155 4156 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4157 4158 vpand L$clamp(%rip),%ymm3,%ymm3 4159 vmovdqa %ymm3,0+0(%rbp) 4160 4161 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 4162 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 4163 4164 movq %r8,%r8 4165 call poly_hash_ad_internal 4166 4167 xorq %rcx,%rcx 4168L$open_avx2_init_hash: 4169 addq 0+0(%rsi,%rcx,1),%r10 4170 adcq 8+0(%rsi,%rcx,1),%r11 4171 adcq $1,%r12 4172 movq 0+0+0(%rbp),%rax 4173 movq %rax,%r15 4174 mulq %r10 4175 movq %rax,%r13 4176 movq %rdx,%r14 4177 movq 0+0+0(%rbp),%rax 4178 mulq %r11 4179 imulq %r12,%r15 4180 addq %rax,%r14 4181 adcq %rdx,%r15 4182 movq 8+0+0(%rbp),%rax 4183 movq %rax,%r9 4184 mulq %r10 4185 addq %rax,%r14 4186 adcq $0,%rdx 4187 movq %rdx,%r10 4188 movq 8+0+0(%rbp),%rax 4189 mulq %r11 4190 addq %rax,%r15 4191 adcq $0,%rdx 4192 imulq %r12,%r9 4193 addq %r10,%r15 4194 adcq %rdx,%r9 4195 movq %r13,%r10 4196 movq %r14,%r11 4197 movq %r15,%r12 4198 andq $3,%r12 4199 movq %r15,%r13 4200 andq $-4,%r13 4201 movq %r9,%r14 4202 shrdq $2,%r9,%r15 4203 shrq $2,%r9 4204 addq %r13,%r15 4205 adcq %r14,%r9 4206 addq %r15,%r10 4207 adcq %r9,%r11 4208 adcq $0,%r12 4209 4210 addq $16,%rcx 4211 cmpq $64,%rcx 4212 jne L$open_avx2_init_hash 4213 4214 vpxor 0(%rsi),%ymm0,%ymm0 4215 vpxor 32(%rsi),%ymm4,%ymm4 4216 4217 vmovdqu %ymm0,0(%rdi) 4218 vmovdqu %ymm4,32(%rdi) 4219 leaq 64(%rsi),%rsi 4220 leaq 64(%rdi),%rdi 4221 subq $64,%rbx 4222L$open_avx2_main_loop: 4223 4224 cmpq $512,%rbx 4225 jb L$open_avx2_main_loop_done 4226 vmovdqa L$chacha20_consts(%rip),%ymm0 4227 vmovdqa 0+64(%rbp),%ymm4 4228 vmovdqa 0+96(%rbp),%ymm8 4229 vmovdqa %ymm0,%ymm1 4230 vmovdqa %ymm4,%ymm5 4231 vmovdqa %ymm8,%ymm9 4232 vmovdqa %ymm0,%ymm2 4233 vmovdqa %ymm4,%ymm6 4234 vmovdqa %ymm8,%ymm10 4235 vmovdqa %ymm0,%ymm3 4236 vmovdqa %ymm4,%ymm7 4237 vmovdqa %ymm8,%ymm11 4238 vmovdqa L$avx2_inc(%rip),%ymm12 4239 vpaddd 0+160(%rbp),%ymm12,%ymm15 4240 vpaddd %ymm15,%ymm12,%ymm14 4241 vpaddd %ymm14,%ymm12,%ymm13 4242 vpaddd %ymm13,%ymm12,%ymm12 4243 vmovdqa %ymm15,0+256(%rbp) 4244 vmovdqa %ymm14,0+224(%rbp) 4245 vmovdqa %ymm13,0+192(%rbp) 4246 vmovdqa %ymm12,0+160(%rbp) 4247 4248 xorq %rcx,%rcx 4249L$open_avx2_main_loop_rounds: 4250 addq 0+0(%rsi,%rcx,1),%r10 4251 adcq 8+0(%rsi,%rcx,1),%r11 4252 adcq $1,%r12 4253 vmovdqa %ymm8,0+128(%rbp) 4254 vmovdqa L$rol16(%rip),%ymm8 4255 vpaddd %ymm7,%ymm3,%ymm3 4256 vpaddd %ymm6,%ymm2,%ymm2 4257 vpaddd %ymm5,%ymm1,%ymm1 4258 vpaddd %ymm4,%ymm0,%ymm0 4259 vpxor %ymm3,%ymm15,%ymm15 4260 vpxor %ymm2,%ymm14,%ymm14 4261 vpxor %ymm1,%ymm13,%ymm13 4262 vpxor %ymm0,%ymm12,%ymm12 4263 movq 0+0+0(%rbp),%rdx 4264 movq %rdx,%r15 4265 mulxq %r10,%r13,%r14 4266 mulxq %r11,%rax,%rdx 4267 imulq %r12,%r15 4268 addq %rax,%r14 4269 adcq %rdx,%r15 4270 vpshufb %ymm8,%ymm15,%ymm15 4271 vpshufb %ymm8,%ymm14,%ymm14 4272 vpshufb %ymm8,%ymm13,%ymm13 4273 vpshufb %ymm8,%ymm12,%ymm12 4274 vpaddd %ymm15,%ymm11,%ymm11 4275 vpaddd %ymm14,%ymm10,%ymm10 4276 vpaddd %ymm13,%ymm9,%ymm9 4277 vpaddd 0+128(%rbp),%ymm12,%ymm8 4278 vpxor %ymm11,%ymm7,%ymm7 4279 movq 8+0+0(%rbp),%rdx 4280 mulxq %r10,%r10,%rax 4281 addq %r10,%r14 4282 mulxq %r11,%r11,%r9 4283 adcq %r11,%r15 4284 adcq $0,%r9 4285 imulq %r12,%rdx 4286 vpxor %ymm10,%ymm6,%ymm6 4287 vpxor %ymm9,%ymm5,%ymm5 4288 vpxor %ymm8,%ymm4,%ymm4 4289 vmovdqa %ymm8,0+128(%rbp) 4290 vpsrld $20,%ymm7,%ymm8 4291 vpslld $32-20,%ymm7,%ymm7 4292 vpxor %ymm8,%ymm7,%ymm7 4293 vpsrld $20,%ymm6,%ymm8 4294 vpslld $32-20,%ymm6,%ymm6 4295 vpxor %ymm8,%ymm6,%ymm6 4296 vpsrld $20,%ymm5,%ymm8 4297 vpslld $32-20,%ymm5,%ymm5 4298 addq %rax,%r15 4299 adcq %rdx,%r9 4300 vpxor %ymm8,%ymm5,%ymm5 4301 vpsrld $20,%ymm4,%ymm8 4302 vpslld $32-20,%ymm4,%ymm4 4303 vpxor %ymm8,%ymm4,%ymm4 4304 vmovdqa L$rol8(%rip),%ymm8 4305 vpaddd %ymm7,%ymm3,%ymm3 4306 vpaddd %ymm6,%ymm2,%ymm2 4307 vpaddd %ymm5,%ymm1,%ymm1 4308 vpaddd %ymm4,%ymm0,%ymm0 4309 vpxor %ymm3,%ymm15,%ymm15 4310 movq %r13,%r10 4311 movq %r14,%r11 4312 movq %r15,%r12 4313 andq $3,%r12 4314 movq %r15,%r13 4315 andq $-4,%r13 4316 movq %r9,%r14 4317 shrdq $2,%r9,%r15 4318 shrq $2,%r9 4319 addq %r13,%r15 4320 adcq %r14,%r9 4321 addq %r15,%r10 4322 adcq %r9,%r11 4323 adcq $0,%r12 4324 vpxor %ymm2,%ymm14,%ymm14 4325 vpxor %ymm1,%ymm13,%ymm13 4326 vpxor %ymm0,%ymm12,%ymm12 4327 vpshufb %ymm8,%ymm15,%ymm15 4328 vpshufb %ymm8,%ymm14,%ymm14 4329 vpshufb %ymm8,%ymm13,%ymm13 4330 vpshufb %ymm8,%ymm12,%ymm12 4331 vpaddd %ymm15,%ymm11,%ymm11 4332 vpaddd %ymm14,%ymm10,%ymm10 4333 addq 0+16(%rsi,%rcx,1),%r10 4334 adcq 8+16(%rsi,%rcx,1),%r11 4335 adcq $1,%r12 4336 vpaddd %ymm13,%ymm9,%ymm9 4337 vpaddd 0+128(%rbp),%ymm12,%ymm8 4338 vpxor %ymm11,%ymm7,%ymm7 4339 vpxor %ymm10,%ymm6,%ymm6 4340 vpxor %ymm9,%ymm5,%ymm5 4341 vpxor %ymm8,%ymm4,%ymm4 4342 vmovdqa %ymm8,0+128(%rbp) 4343 vpsrld $25,%ymm7,%ymm8 4344 movq 0+0+0(%rbp),%rdx 4345 movq %rdx,%r15 4346 mulxq %r10,%r13,%r14 4347 mulxq %r11,%rax,%rdx 4348 imulq %r12,%r15 4349 addq %rax,%r14 4350 adcq %rdx,%r15 4351 vpslld $32-25,%ymm7,%ymm7 4352 vpxor %ymm8,%ymm7,%ymm7 4353 vpsrld $25,%ymm6,%ymm8 4354 vpslld $32-25,%ymm6,%ymm6 4355 vpxor %ymm8,%ymm6,%ymm6 4356 vpsrld $25,%ymm5,%ymm8 4357 vpslld $32-25,%ymm5,%ymm5 4358 vpxor %ymm8,%ymm5,%ymm5 4359 vpsrld $25,%ymm4,%ymm8 4360 vpslld $32-25,%ymm4,%ymm4 4361 vpxor %ymm8,%ymm4,%ymm4 4362 vmovdqa 0+128(%rbp),%ymm8 4363 vpalignr $4,%ymm7,%ymm7,%ymm7 4364 vpalignr $8,%ymm11,%ymm11,%ymm11 4365 vpalignr $12,%ymm15,%ymm15,%ymm15 4366 vpalignr $4,%ymm6,%ymm6,%ymm6 4367 vpalignr $8,%ymm10,%ymm10,%ymm10 4368 vpalignr $12,%ymm14,%ymm14,%ymm14 4369 movq 8+0+0(%rbp),%rdx 4370 mulxq %r10,%r10,%rax 4371 addq %r10,%r14 4372 mulxq %r11,%r11,%r9 4373 adcq %r11,%r15 4374 adcq $0,%r9 4375 imulq %r12,%rdx 4376 vpalignr $4,%ymm5,%ymm5,%ymm5 4377 vpalignr $8,%ymm9,%ymm9,%ymm9 4378 vpalignr $12,%ymm13,%ymm13,%ymm13 4379 vpalignr $4,%ymm4,%ymm4,%ymm4 4380 vpalignr $8,%ymm8,%ymm8,%ymm8 4381 vpalignr $12,%ymm12,%ymm12,%ymm12 4382 vmovdqa %ymm8,0+128(%rbp) 4383 vmovdqa L$rol16(%rip),%ymm8 4384 vpaddd %ymm7,%ymm3,%ymm3 4385 vpaddd %ymm6,%ymm2,%ymm2 4386 vpaddd %ymm5,%ymm1,%ymm1 4387 vpaddd %ymm4,%ymm0,%ymm0 4388 vpxor %ymm3,%ymm15,%ymm15 4389 vpxor %ymm2,%ymm14,%ymm14 4390 vpxor %ymm1,%ymm13,%ymm13 4391 vpxor %ymm0,%ymm12,%ymm12 4392 vpshufb %ymm8,%ymm15,%ymm15 4393 vpshufb %ymm8,%ymm14,%ymm14 4394 addq %rax,%r15 4395 adcq %rdx,%r9 4396 vpshufb %ymm8,%ymm13,%ymm13 4397 vpshufb %ymm8,%ymm12,%ymm12 4398 vpaddd %ymm15,%ymm11,%ymm11 4399 vpaddd %ymm14,%ymm10,%ymm10 4400 vpaddd %ymm13,%ymm9,%ymm9 4401 vpaddd 0+128(%rbp),%ymm12,%ymm8 4402 vpxor %ymm11,%ymm7,%ymm7 4403 vpxor %ymm10,%ymm6,%ymm6 4404 vpxor %ymm9,%ymm5,%ymm5 4405 movq %r13,%r10 4406 movq %r14,%r11 4407 movq %r15,%r12 4408 andq $3,%r12 4409 movq %r15,%r13 4410 andq $-4,%r13 4411 movq %r9,%r14 4412 shrdq $2,%r9,%r15 4413 shrq $2,%r9 4414 addq %r13,%r15 4415 adcq %r14,%r9 4416 addq %r15,%r10 4417 adcq %r9,%r11 4418 adcq $0,%r12 4419 vpxor %ymm8,%ymm4,%ymm4 4420 vmovdqa %ymm8,0+128(%rbp) 4421 vpsrld $20,%ymm7,%ymm8 4422 vpslld $32-20,%ymm7,%ymm7 4423 vpxor %ymm8,%ymm7,%ymm7 4424 vpsrld $20,%ymm6,%ymm8 4425 vpslld $32-20,%ymm6,%ymm6 4426 vpxor %ymm8,%ymm6,%ymm6 4427 addq 0+32(%rsi,%rcx,1),%r10 4428 adcq 8+32(%rsi,%rcx,1),%r11 4429 adcq $1,%r12 4430 4431 leaq 48(%rcx),%rcx 4432 vpsrld $20,%ymm5,%ymm8 4433 vpslld $32-20,%ymm5,%ymm5 4434 vpxor %ymm8,%ymm5,%ymm5 4435 vpsrld $20,%ymm4,%ymm8 4436 vpslld $32-20,%ymm4,%ymm4 4437 vpxor %ymm8,%ymm4,%ymm4 4438 vmovdqa L$rol8(%rip),%ymm8 4439 vpaddd %ymm7,%ymm3,%ymm3 4440 vpaddd %ymm6,%ymm2,%ymm2 4441 vpaddd %ymm5,%ymm1,%ymm1 4442 vpaddd %ymm4,%ymm0,%ymm0 4443 vpxor %ymm3,%ymm15,%ymm15 4444 vpxor %ymm2,%ymm14,%ymm14 4445 vpxor %ymm1,%ymm13,%ymm13 4446 vpxor %ymm0,%ymm12,%ymm12 4447 vpshufb %ymm8,%ymm15,%ymm15 4448 vpshufb %ymm8,%ymm14,%ymm14 4449 vpshufb %ymm8,%ymm13,%ymm13 4450 movq 0+0+0(%rbp),%rdx 4451 movq %rdx,%r15 4452 mulxq %r10,%r13,%r14 4453 mulxq %r11,%rax,%rdx 4454 imulq %r12,%r15 4455 addq %rax,%r14 4456 adcq %rdx,%r15 4457 vpshufb %ymm8,%ymm12,%ymm12 4458 vpaddd %ymm15,%ymm11,%ymm11 4459 vpaddd %ymm14,%ymm10,%ymm10 4460 vpaddd %ymm13,%ymm9,%ymm9 4461 vpaddd 0+128(%rbp),%ymm12,%ymm8 4462 vpxor %ymm11,%ymm7,%ymm7 4463 vpxor %ymm10,%ymm6,%ymm6 4464 vpxor %ymm9,%ymm5,%ymm5 4465 movq 8+0+0(%rbp),%rdx 4466 mulxq %r10,%r10,%rax 4467 addq %r10,%r14 4468 mulxq %r11,%r11,%r9 4469 adcq %r11,%r15 4470 adcq $0,%r9 4471 imulq %r12,%rdx 4472 vpxor %ymm8,%ymm4,%ymm4 4473 vmovdqa %ymm8,0+128(%rbp) 4474 vpsrld $25,%ymm7,%ymm8 4475 vpslld $32-25,%ymm7,%ymm7 4476 vpxor %ymm8,%ymm7,%ymm7 4477 vpsrld $25,%ymm6,%ymm8 4478 vpslld $32-25,%ymm6,%ymm6 4479 vpxor %ymm8,%ymm6,%ymm6 4480 addq %rax,%r15 4481 adcq %rdx,%r9 4482 vpsrld $25,%ymm5,%ymm8 4483 vpslld $32-25,%ymm5,%ymm5 4484 vpxor %ymm8,%ymm5,%ymm5 4485 vpsrld $25,%ymm4,%ymm8 4486 vpslld $32-25,%ymm4,%ymm4 4487 vpxor %ymm8,%ymm4,%ymm4 4488 vmovdqa 0+128(%rbp),%ymm8 4489 vpalignr $12,%ymm7,%ymm7,%ymm7 4490 vpalignr $8,%ymm11,%ymm11,%ymm11 4491 vpalignr $4,%ymm15,%ymm15,%ymm15 4492 vpalignr $12,%ymm6,%ymm6,%ymm6 4493 vpalignr $8,%ymm10,%ymm10,%ymm10 4494 vpalignr $4,%ymm14,%ymm14,%ymm14 4495 vpalignr $12,%ymm5,%ymm5,%ymm5 4496 vpalignr $8,%ymm9,%ymm9,%ymm9 4497 vpalignr $4,%ymm13,%ymm13,%ymm13 4498 vpalignr $12,%ymm4,%ymm4,%ymm4 4499 vpalignr $8,%ymm8,%ymm8,%ymm8 4500 movq %r13,%r10 4501 movq %r14,%r11 4502 movq %r15,%r12 4503 andq $3,%r12 4504 movq %r15,%r13 4505 andq $-4,%r13 4506 movq %r9,%r14 4507 shrdq $2,%r9,%r15 4508 shrq $2,%r9 4509 addq %r13,%r15 4510 adcq %r14,%r9 4511 addq %r15,%r10 4512 adcq %r9,%r11 4513 adcq $0,%r12 4514 vpalignr $4,%ymm12,%ymm12,%ymm12 4515 4516 cmpq $60*8,%rcx 4517 jne L$open_avx2_main_loop_rounds 4518 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 4519 vpaddd 0+64(%rbp),%ymm7,%ymm7 4520 vpaddd 0+96(%rbp),%ymm11,%ymm11 4521 vpaddd 0+256(%rbp),%ymm15,%ymm15 4522 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 4523 vpaddd 0+64(%rbp),%ymm6,%ymm6 4524 vpaddd 0+96(%rbp),%ymm10,%ymm10 4525 vpaddd 0+224(%rbp),%ymm14,%ymm14 4526 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 4527 vpaddd 0+64(%rbp),%ymm5,%ymm5 4528 vpaddd 0+96(%rbp),%ymm9,%ymm9 4529 vpaddd 0+192(%rbp),%ymm13,%ymm13 4530 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 4531 vpaddd 0+64(%rbp),%ymm4,%ymm4 4532 vpaddd 0+96(%rbp),%ymm8,%ymm8 4533 vpaddd 0+160(%rbp),%ymm12,%ymm12 4534 4535 vmovdqa %ymm0,0+128(%rbp) 4536 addq 0+60*8(%rsi),%r10 4537 adcq 8+60*8(%rsi),%r11 4538 adcq $1,%r12 4539 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 4540 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 4541 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 4542 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 4543 vpxor 0+0(%rsi),%ymm0,%ymm0 4544 vpxor 32+0(%rsi),%ymm3,%ymm3 4545 vpxor 64+0(%rsi),%ymm7,%ymm7 4546 vpxor 96+0(%rsi),%ymm11,%ymm11 4547 vmovdqu %ymm0,0+0(%rdi) 4548 vmovdqu %ymm3,32+0(%rdi) 4549 vmovdqu %ymm7,64+0(%rdi) 4550 vmovdqu %ymm11,96+0(%rdi) 4551 4552 vmovdqa 0+128(%rbp),%ymm0 4553 movq 0+0+0(%rbp),%rax 4554 movq %rax,%r15 4555 mulq %r10 4556 movq %rax,%r13 4557 movq %rdx,%r14 4558 movq 0+0+0(%rbp),%rax 4559 mulq %r11 4560 imulq %r12,%r15 4561 addq %rax,%r14 4562 adcq %rdx,%r15 4563 movq 8+0+0(%rbp),%rax 4564 movq %rax,%r9 4565 mulq %r10 4566 addq %rax,%r14 4567 adcq $0,%rdx 4568 movq %rdx,%r10 4569 movq 8+0+0(%rbp),%rax 4570 mulq %r11 4571 addq %rax,%r15 4572 adcq $0,%rdx 4573 imulq %r12,%r9 4574 addq %r10,%r15 4575 adcq %rdx,%r9 4576 movq %r13,%r10 4577 movq %r14,%r11 4578 movq %r15,%r12 4579 andq $3,%r12 4580 movq %r15,%r13 4581 andq $-4,%r13 4582 movq %r9,%r14 4583 shrdq $2,%r9,%r15 4584 shrq $2,%r9 4585 addq %r13,%r15 4586 adcq %r14,%r9 4587 addq %r15,%r10 4588 adcq %r9,%r11 4589 adcq $0,%r12 4590 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 4591 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 4592 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 4593 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 4594 vpxor 0+128(%rsi),%ymm3,%ymm3 4595 vpxor 32+128(%rsi),%ymm2,%ymm2 4596 vpxor 64+128(%rsi),%ymm6,%ymm6 4597 vpxor 96+128(%rsi),%ymm10,%ymm10 4598 vmovdqu %ymm3,0+128(%rdi) 4599 vmovdqu %ymm2,32+128(%rdi) 4600 vmovdqu %ymm6,64+128(%rdi) 4601 vmovdqu %ymm10,96+128(%rdi) 4602 addq 0+60*8+16(%rsi),%r10 4603 adcq 8+60*8+16(%rsi),%r11 4604 adcq $1,%r12 4605 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 4606 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 4607 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 4608 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 4609 vpxor 0+256(%rsi),%ymm3,%ymm3 4610 vpxor 32+256(%rsi),%ymm1,%ymm1 4611 vpxor 64+256(%rsi),%ymm5,%ymm5 4612 vpxor 96+256(%rsi),%ymm9,%ymm9 4613 vmovdqu %ymm3,0+256(%rdi) 4614 vmovdqu %ymm1,32+256(%rdi) 4615 vmovdqu %ymm5,64+256(%rdi) 4616 vmovdqu %ymm9,96+256(%rdi) 4617 movq 0+0+0(%rbp),%rax 4618 movq %rax,%r15 4619 mulq %r10 4620 movq %rax,%r13 4621 movq %rdx,%r14 4622 movq 0+0+0(%rbp),%rax 4623 mulq %r11 4624 imulq %r12,%r15 4625 addq %rax,%r14 4626 adcq %rdx,%r15 4627 movq 8+0+0(%rbp),%rax 4628 movq %rax,%r9 4629 mulq %r10 4630 addq %rax,%r14 4631 adcq $0,%rdx 4632 movq %rdx,%r10 4633 movq 8+0+0(%rbp),%rax 4634 mulq %r11 4635 addq %rax,%r15 4636 adcq $0,%rdx 4637 imulq %r12,%r9 4638 addq %r10,%r15 4639 adcq %rdx,%r9 4640 movq %r13,%r10 4641 movq %r14,%r11 4642 movq %r15,%r12 4643 andq $3,%r12 4644 movq %r15,%r13 4645 andq $-4,%r13 4646 movq %r9,%r14 4647 shrdq $2,%r9,%r15 4648 shrq $2,%r9 4649 addq %r13,%r15 4650 adcq %r14,%r9 4651 addq %r15,%r10 4652 adcq %r9,%r11 4653 adcq $0,%r12 4654 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4655 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 4656 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 4657 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 4658 vpxor 0+384(%rsi),%ymm3,%ymm3 4659 vpxor 32+384(%rsi),%ymm0,%ymm0 4660 vpxor 64+384(%rsi),%ymm4,%ymm4 4661 vpxor 96+384(%rsi),%ymm8,%ymm8 4662 vmovdqu %ymm3,0+384(%rdi) 4663 vmovdqu %ymm0,32+384(%rdi) 4664 vmovdqu %ymm4,64+384(%rdi) 4665 vmovdqu %ymm8,96+384(%rdi) 4666 4667 leaq 512(%rsi),%rsi 4668 leaq 512(%rdi),%rdi 4669 subq $512,%rbx 4670 jmp L$open_avx2_main_loop 4671L$open_avx2_main_loop_done: 4672 testq %rbx,%rbx 4673 vzeroupper 4674 je L$open_sse_finalize 4675 4676 cmpq $384,%rbx 4677 ja L$open_avx2_tail_512 4678 cmpq $256,%rbx 4679 ja L$open_avx2_tail_384 4680 cmpq $128,%rbx 4681 ja L$open_avx2_tail_256 4682 vmovdqa L$chacha20_consts(%rip),%ymm0 4683 vmovdqa 0+64(%rbp),%ymm4 4684 vmovdqa 0+96(%rbp),%ymm8 4685 vmovdqa L$avx2_inc(%rip),%ymm12 4686 vpaddd 0+160(%rbp),%ymm12,%ymm12 4687 vmovdqa %ymm12,0+160(%rbp) 4688 4689 xorq %r8,%r8 4690 movq %rbx,%rcx 4691 andq $-16,%rcx 4692 testq %rcx,%rcx 4693 je L$open_avx2_tail_128_rounds 4694L$open_avx2_tail_128_rounds_and_x1hash: 4695 addq 0+0(%rsi,%r8,1),%r10 4696 adcq 8+0(%rsi,%r8,1),%r11 4697 adcq $1,%r12 4698 movq 0+0+0(%rbp),%rax 4699 movq %rax,%r15 4700 mulq %r10 4701 movq %rax,%r13 4702 movq %rdx,%r14 4703 movq 0+0+0(%rbp),%rax 4704 mulq %r11 4705 imulq %r12,%r15 4706 addq %rax,%r14 4707 adcq %rdx,%r15 4708 movq 8+0+0(%rbp),%rax 4709 movq %rax,%r9 4710 mulq %r10 4711 addq %rax,%r14 4712 adcq $0,%rdx 4713 movq %rdx,%r10 4714 movq 8+0+0(%rbp),%rax 4715 mulq %r11 4716 addq %rax,%r15 4717 adcq $0,%rdx 4718 imulq %r12,%r9 4719 addq %r10,%r15 4720 adcq %rdx,%r9 4721 movq %r13,%r10 4722 movq %r14,%r11 4723 movq %r15,%r12 4724 andq $3,%r12 4725 movq %r15,%r13 4726 andq $-4,%r13 4727 movq %r9,%r14 4728 shrdq $2,%r9,%r15 4729 shrq $2,%r9 4730 addq %r13,%r15 4731 adcq %r14,%r9 4732 addq %r15,%r10 4733 adcq %r9,%r11 4734 adcq $0,%r12 4735 4736L$open_avx2_tail_128_rounds: 4737 addq $16,%r8 4738 vpaddd %ymm4,%ymm0,%ymm0 4739 vpxor %ymm0,%ymm12,%ymm12 4740 vpshufb L$rol16(%rip),%ymm12,%ymm12 4741 vpaddd %ymm12,%ymm8,%ymm8 4742 vpxor %ymm8,%ymm4,%ymm4 4743 vpsrld $20,%ymm4,%ymm3 4744 vpslld $12,%ymm4,%ymm4 4745 vpxor %ymm3,%ymm4,%ymm4 4746 vpaddd %ymm4,%ymm0,%ymm0 4747 vpxor %ymm0,%ymm12,%ymm12 4748 vpshufb L$rol8(%rip),%ymm12,%ymm12 4749 vpaddd %ymm12,%ymm8,%ymm8 4750 vpxor %ymm8,%ymm4,%ymm4 4751 vpslld $7,%ymm4,%ymm3 4752 vpsrld $25,%ymm4,%ymm4 4753 vpxor %ymm3,%ymm4,%ymm4 4754 vpalignr $12,%ymm12,%ymm12,%ymm12 4755 vpalignr $8,%ymm8,%ymm8,%ymm8 4756 vpalignr $4,%ymm4,%ymm4,%ymm4 4757 vpaddd %ymm4,%ymm0,%ymm0 4758 vpxor %ymm0,%ymm12,%ymm12 4759 vpshufb L$rol16(%rip),%ymm12,%ymm12 4760 vpaddd %ymm12,%ymm8,%ymm8 4761 vpxor %ymm8,%ymm4,%ymm4 4762 vpsrld $20,%ymm4,%ymm3 4763 vpslld $12,%ymm4,%ymm4 4764 vpxor %ymm3,%ymm4,%ymm4 4765 vpaddd %ymm4,%ymm0,%ymm0 4766 vpxor %ymm0,%ymm12,%ymm12 4767 vpshufb L$rol8(%rip),%ymm12,%ymm12 4768 vpaddd %ymm12,%ymm8,%ymm8 4769 vpxor %ymm8,%ymm4,%ymm4 4770 vpslld $7,%ymm4,%ymm3 4771 vpsrld $25,%ymm4,%ymm4 4772 vpxor %ymm3,%ymm4,%ymm4 4773 vpalignr $4,%ymm12,%ymm12,%ymm12 4774 vpalignr $8,%ymm8,%ymm8,%ymm8 4775 vpalignr $12,%ymm4,%ymm4,%ymm4 4776 4777 cmpq %rcx,%r8 4778 jb L$open_avx2_tail_128_rounds_and_x1hash 4779 cmpq $160,%r8 4780 jne L$open_avx2_tail_128_rounds 4781 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 4782 vpaddd 0+64(%rbp),%ymm4,%ymm4 4783 vpaddd 0+96(%rbp),%ymm8,%ymm8 4784 vpaddd 0+160(%rbp),%ymm12,%ymm12 4785 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 4786 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 4787 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 4788 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 4789 vmovdqa %ymm3,%ymm8 4790 4791 jmp L$open_avx2_tail_128_xor 4792 4793L$open_avx2_tail_256: 4794 vmovdqa L$chacha20_consts(%rip),%ymm0 4795 vmovdqa 0+64(%rbp),%ymm4 4796 vmovdqa 0+96(%rbp),%ymm8 4797 vmovdqa %ymm0,%ymm1 4798 vmovdqa %ymm4,%ymm5 4799 vmovdqa %ymm8,%ymm9 4800 vmovdqa L$avx2_inc(%rip),%ymm12 4801 vpaddd 0+160(%rbp),%ymm12,%ymm13 4802 vpaddd %ymm13,%ymm12,%ymm12 4803 vmovdqa %ymm12,0+160(%rbp) 4804 vmovdqa %ymm13,0+192(%rbp) 4805 4806 movq %rbx,0+128(%rbp) 4807 movq %rbx,%rcx 4808 subq $128,%rcx 4809 shrq $4,%rcx 4810 movq $10,%r8 4811 cmpq $10,%rcx 4812 cmovgq %r8,%rcx 4813 movq %rsi,%rbx 4814 xorq %r8,%r8 4815L$open_avx2_tail_256_rounds_and_x1hash: 4816 addq 0+0(%rbx),%r10 4817 adcq 8+0(%rbx),%r11 4818 adcq $1,%r12 4819 movq 0+0+0(%rbp),%rdx 4820 movq %rdx,%r15 4821 mulxq %r10,%r13,%r14 4822 mulxq %r11,%rax,%rdx 4823 imulq %r12,%r15 4824 addq %rax,%r14 4825 adcq %rdx,%r15 4826 movq 8+0+0(%rbp),%rdx 4827 mulxq %r10,%r10,%rax 4828 addq %r10,%r14 4829 mulxq %r11,%r11,%r9 4830 adcq %r11,%r15 4831 adcq $0,%r9 4832 imulq %r12,%rdx 4833 addq %rax,%r15 4834 adcq %rdx,%r9 4835 movq %r13,%r10 4836 movq %r14,%r11 4837 movq %r15,%r12 4838 andq $3,%r12 4839 movq %r15,%r13 4840 andq $-4,%r13 4841 movq %r9,%r14 4842 shrdq $2,%r9,%r15 4843 shrq $2,%r9 4844 addq %r13,%r15 4845 adcq %r14,%r9 4846 addq %r15,%r10 4847 adcq %r9,%r11 4848 adcq $0,%r12 4849 4850 leaq 16(%rbx),%rbx 4851L$open_avx2_tail_256_rounds: 4852 vpaddd %ymm4,%ymm0,%ymm0 4853 vpxor %ymm0,%ymm12,%ymm12 4854 vpshufb L$rol16(%rip),%ymm12,%ymm12 4855 vpaddd %ymm12,%ymm8,%ymm8 4856 vpxor %ymm8,%ymm4,%ymm4 4857 vpsrld $20,%ymm4,%ymm3 4858 vpslld $12,%ymm4,%ymm4 4859 vpxor %ymm3,%ymm4,%ymm4 4860 vpaddd %ymm4,%ymm0,%ymm0 4861 vpxor %ymm0,%ymm12,%ymm12 4862 vpshufb L$rol8(%rip),%ymm12,%ymm12 4863 vpaddd %ymm12,%ymm8,%ymm8 4864 vpxor %ymm8,%ymm4,%ymm4 4865 vpslld $7,%ymm4,%ymm3 4866 vpsrld $25,%ymm4,%ymm4 4867 vpxor %ymm3,%ymm4,%ymm4 4868 vpalignr $12,%ymm12,%ymm12,%ymm12 4869 vpalignr $8,%ymm8,%ymm8,%ymm8 4870 vpalignr $4,%ymm4,%ymm4,%ymm4 4871 vpaddd %ymm5,%ymm1,%ymm1 4872 vpxor %ymm1,%ymm13,%ymm13 4873 vpshufb L$rol16(%rip),%ymm13,%ymm13 4874 vpaddd %ymm13,%ymm9,%ymm9 4875 vpxor %ymm9,%ymm5,%ymm5 4876 vpsrld $20,%ymm5,%ymm3 4877 vpslld $12,%ymm5,%ymm5 4878 vpxor %ymm3,%ymm5,%ymm5 4879 vpaddd %ymm5,%ymm1,%ymm1 4880 vpxor %ymm1,%ymm13,%ymm13 4881 vpshufb L$rol8(%rip),%ymm13,%ymm13 4882 vpaddd %ymm13,%ymm9,%ymm9 4883 vpxor %ymm9,%ymm5,%ymm5 4884 vpslld $7,%ymm5,%ymm3 4885 vpsrld $25,%ymm5,%ymm5 4886 vpxor %ymm3,%ymm5,%ymm5 4887 vpalignr $12,%ymm13,%ymm13,%ymm13 4888 vpalignr $8,%ymm9,%ymm9,%ymm9 4889 vpalignr $4,%ymm5,%ymm5,%ymm5 4890 4891 incq %r8 4892 vpaddd %ymm4,%ymm0,%ymm0 4893 vpxor %ymm0,%ymm12,%ymm12 4894 vpshufb L$rol16(%rip),%ymm12,%ymm12 4895 vpaddd %ymm12,%ymm8,%ymm8 4896 vpxor %ymm8,%ymm4,%ymm4 4897 vpsrld $20,%ymm4,%ymm3 4898 vpslld $12,%ymm4,%ymm4 4899 vpxor %ymm3,%ymm4,%ymm4 4900 vpaddd %ymm4,%ymm0,%ymm0 4901 vpxor %ymm0,%ymm12,%ymm12 4902 vpshufb L$rol8(%rip),%ymm12,%ymm12 4903 vpaddd %ymm12,%ymm8,%ymm8 4904 vpxor %ymm8,%ymm4,%ymm4 4905 vpslld $7,%ymm4,%ymm3 4906 vpsrld $25,%ymm4,%ymm4 4907 vpxor %ymm3,%ymm4,%ymm4 4908 vpalignr $4,%ymm12,%ymm12,%ymm12 4909 vpalignr $8,%ymm8,%ymm8,%ymm8 4910 vpalignr $12,%ymm4,%ymm4,%ymm4 4911 vpaddd %ymm5,%ymm1,%ymm1 4912 vpxor %ymm1,%ymm13,%ymm13 4913 vpshufb L$rol16(%rip),%ymm13,%ymm13 4914 vpaddd %ymm13,%ymm9,%ymm9 4915 vpxor %ymm9,%ymm5,%ymm5 4916 vpsrld $20,%ymm5,%ymm3 4917 vpslld $12,%ymm5,%ymm5 4918 vpxor %ymm3,%ymm5,%ymm5 4919 vpaddd %ymm5,%ymm1,%ymm1 4920 vpxor %ymm1,%ymm13,%ymm13 4921 vpshufb L$rol8(%rip),%ymm13,%ymm13 4922 vpaddd %ymm13,%ymm9,%ymm9 4923 vpxor %ymm9,%ymm5,%ymm5 4924 vpslld $7,%ymm5,%ymm3 4925 vpsrld $25,%ymm5,%ymm5 4926 vpxor %ymm3,%ymm5,%ymm5 4927 vpalignr $4,%ymm13,%ymm13,%ymm13 4928 vpalignr $8,%ymm9,%ymm9,%ymm9 4929 vpalignr $12,%ymm5,%ymm5,%ymm5 4930 vpaddd %ymm6,%ymm2,%ymm2 4931 vpxor %ymm2,%ymm14,%ymm14 4932 vpshufb L$rol16(%rip),%ymm14,%ymm14 4933 vpaddd %ymm14,%ymm10,%ymm10 4934 vpxor %ymm10,%ymm6,%ymm6 4935 vpsrld $20,%ymm6,%ymm3 4936 vpslld $12,%ymm6,%ymm6 4937 vpxor %ymm3,%ymm6,%ymm6 4938 vpaddd %ymm6,%ymm2,%ymm2 4939 vpxor %ymm2,%ymm14,%ymm14 4940 vpshufb L$rol8(%rip),%ymm14,%ymm14 4941 vpaddd %ymm14,%ymm10,%ymm10 4942 vpxor %ymm10,%ymm6,%ymm6 4943 vpslld $7,%ymm6,%ymm3 4944 vpsrld $25,%ymm6,%ymm6 4945 vpxor %ymm3,%ymm6,%ymm6 4946 vpalignr $4,%ymm14,%ymm14,%ymm14 4947 vpalignr $8,%ymm10,%ymm10,%ymm10 4948 vpalignr $12,%ymm6,%ymm6,%ymm6 4949 4950 cmpq %rcx,%r8 4951 jb L$open_avx2_tail_256_rounds_and_x1hash 4952 cmpq $10,%r8 4953 jne L$open_avx2_tail_256_rounds 4954 movq %rbx,%r8 4955 subq %rsi,%rbx 4956 movq %rbx,%rcx 4957 movq 0+128(%rbp),%rbx 4958L$open_avx2_tail_256_hash: 4959 addq $16,%rcx 4960 cmpq %rbx,%rcx 4961 jg L$open_avx2_tail_256_done 4962 addq 0+0(%r8),%r10 4963 adcq 8+0(%r8),%r11 4964 adcq $1,%r12 4965 movq 0+0+0(%rbp),%rdx 4966 movq %rdx,%r15 4967 mulxq %r10,%r13,%r14 4968 mulxq %r11,%rax,%rdx 4969 imulq %r12,%r15 4970 addq %rax,%r14 4971 adcq %rdx,%r15 4972 movq 8+0+0(%rbp),%rdx 4973 mulxq %r10,%r10,%rax 4974 addq %r10,%r14 4975 mulxq %r11,%r11,%r9 4976 adcq %r11,%r15 4977 adcq $0,%r9 4978 imulq %r12,%rdx 4979 addq %rax,%r15 4980 adcq %rdx,%r9 4981 movq %r13,%r10 4982 movq %r14,%r11 4983 movq %r15,%r12 4984 andq $3,%r12 4985 movq %r15,%r13 4986 andq $-4,%r13 4987 movq %r9,%r14 4988 shrdq $2,%r9,%r15 4989 shrq $2,%r9 4990 addq %r13,%r15 4991 adcq %r14,%r9 4992 addq %r15,%r10 4993 adcq %r9,%r11 4994 adcq $0,%r12 4995 4996 leaq 16(%r8),%r8 4997 jmp L$open_avx2_tail_256_hash 4998L$open_avx2_tail_256_done: 4999 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 5000 vpaddd 0+64(%rbp),%ymm5,%ymm5 5001 vpaddd 0+96(%rbp),%ymm9,%ymm9 5002 vpaddd 0+192(%rbp),%ymm13,%ymm13 5003 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 5004 vpaddd 0+64(%rbp),%ymm4,%ymm4 5005 vpaddd 0+96(%rbp),%ymm8,%ymm8 5006 vpaddd 0+160(%rbp),%ymm12,%ymm12 5007 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5008 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5009 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5010 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5011 vpxor 0+0(%rsi),%ymm3,%ymm3 5012 vpxor 32+0(%rsi),%ymm1,%ymm1 5013 vpxor 64+0(%rsi),%ymm5,%ymm5 5014 vpxor 96+0(%rsi),%ymm9,%ymm9 5015 vmovdqu %ymm3,0+0(%rdi) 5016 vmovdqu %ymm1,32+0(%rdi) 5017 vmovdqu %ymm5,64+0(%rdi) 5018 vmovdqu %ymm9,96+0(%rdi) 5019 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5020 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5021 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5022 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5023 vmovdqa %ymm3,%ymm8 5024 5025 leaq 128(%rsi),%rsi 5026 leaq 128(%rdi),%rdi 5027 subq $128,%rbx 5028 jmp L$open_avx2_tail_128_xor 5029 5030L$open_avx2_tail_384: 5031 vmovdqa L$chacha20_consts(%rip),%ymm0 5032 vmovdqa 0+64(%rbp),%ymm4 5033 vmovdqa 0+96(%rbp),%ymm8 5034 vmovdqa %ymm0,%ymm1 5035 vmovdqa %ymm4,%ymm5 5036 vmovdqa %ymm8,%ymm9 5037 vmovdqa %ymm0,%ymm2 5038 vmovdqa %ymm4,%ymm6 5039 vmovdqa %ymm8,%ymm10 5040 vmovdqa L$avx2_inc(%rip),%ymm12 5041 vpaddd 0+160(%rbp),%ymm12,%ymm14 5042 vpaddd %ymm14,%ymm12,%ymm13 5043 vpaddd %ymm13,%ymm12,%ymm12 5044 vmovdqa %ymm12,0+160(%rbp) 5045 vmovdqa %ymm13,0+192(%rbp) 5046 vmovdqa %ymm14,0+224(%rbp) 5047 5048 movq %rbx,0+128(%rbp) 5049 movq %rbx,%rcx 5050 subq $256,%rcx 5051 shrq $4,%rcx 5052 addq $6,%rcx 5053 movq $10,%r8 5054 cmpq $10,%rcx 5055 cmovgq %r8,%rcx 5056 movq %rsi,%rbx 5057 xorq %r8,%r8 5058L$open_avx2_tail_384_rounds_and_x2hash: 5059 addq 0+0(%rbx),%r10 5060 adcq 8+0(%rbx),%r11 5061 adcq $1,%r12 5062 movq 0+0+0(%rbp),%rdx 5063 movq %rdx,%r15 5064 mulxq %r10,%r13,%r14 5065 mulxq %r11,%rax,%rdx 5066 imulq %r12,%r15 5067 addq %rax,%r14 5068 adcq %rdx,%r15 5069 movq 8+0+0(%rbp),%rdx 5070 mulxq %r10,%r10,%rax 5071 addq %r10,%r14 5072 mulxq %r11,%r11,%r9 5073 adcq %r11,%r15 5074 adcq $0,%r9 5075 imulq %r12,%rdx 5076 addq %rax,%r15 5077 adcq %rdx,%r9 5078 movq %r13,%r10 5079 movq %r14,%r11 5080 movq %r15,%r12 5081 andq $3,%r12 5082 movq %r15,%r13 5083 andq $-4,%r13 5084 movq %r9,%r14 5085 shrdq $2,%r9,%r15 5086 shrq $2,%r9 5087 addq %r13,%r15 5088 adcq %r14,%r9 5089 addq %r15,%r10 5090 adcq %r9,%r11 5091 adcq $0,%r12 5092 5093 leaq 16(%rbx),%rbx 5094L$open_avx2_tail_384_rounds_and_x1hash: 5095 vpaddd %ymm6,%ymm2,%ymm2 5096 vpxor %ymm2,%ymm14,%ymm14 5097 vpshufb L$rol16(%rip),%ymm14,%ymm14 5098 vpaddd %ymm14,%ymm10,%ymm10 5099 vpxor %ymm10,%ymm6,%ymm6 5100 vpsrld $20,%ymm6,%ymm3 5101 vpslld $12,%ymm6,%ymm6 5102 vpxor %ymm3,%ymm6,%ymm6 5103 vpaddd %ymm6,%ymm2,%ymm2 5104 vpxor %ymm2,%ymm14,%ymm14 5105 vpshufb L$rol8(%rip),%ymm14,%ymm14 5106 vpaddd %ymm14,%ymm10,%ymm10 5107 vpxor %ymm10,%ymm6,%ymm6 5108 vpslld $7,%ymm6,%ymm3 5109 vpsrld $25,%ymm6,%ymm6 5110 vpxor %ymm3,%ymm6,%ymm6 5111 vpalignr $12,%ymm14,%ymm14,%ymm14 5112 vpalignr $8,%ymm10,%ymm10,%ymm10 5113 vpalignr $4,%ymm6,%ymm6,%ymm6 5114 vpaddd %ymm5,%ymm1,%ymm1 5115 vpxor %ymm1,%ymm13,%ymm13 5116 vpshufb L$rol16(%rip),%ymm13,%ymm13 5117 vpaddd %ymm13,%ymm9,%ymm9 5118 vpxor %ymm9,%ymm5,%ymm5 5119 vpsrld $20,%ymm5,%ymm3 5120 vpslld $12,%ymm5,%ymm5 5121 vpxor %ymm3,%ymm5,%ymm5 5122 vpaddd %ymm5,%ymm1,%ymm1 5123 vpxor %ymm1,%ymm13,%ymm13 5124 vpshufb L$rol8(%rip),%ymm13,%ymm13 5125 vpaddd %ymm13,%ymm9,%ymm9 5126 vpxor %ymm9,%ymm5,%ymm5 5127 vpslld $7,%ymm5,%ymm3 5128 vpsrld $25,%ymm5,%ymm5 5129 vpxor %ymm3,%ymm5,%ymm5 5130 vpalignr $12,%ymm13,%ymm13,%ymm13 5131 vpalignr $8,%ymm9,%ymm9,%ymm9 5132 vpalignr $4,%ymm5,%ymm5,%ymm5 5133 vpaddd %ymm4,%ymm0,%ymm0 5134 vpxor %ymm0,%ymm12,%ymm12 5135 vpshufb L$rol16(%rip),%ymm12,%ymm12 5136 vpaddd %ymm12,%ymm8,%ymm8 5137 vpxor %ymm8,%ymm4,%ymm4 5138 vpsrld $20,%ymm4,%ymm3 5139 vpslld $12,%ymm4,%ymm4 5140 vpxor %ymm3,%ymm4,%ymm4 5141 vpaddd %ymm4,%ymm0,%ymm0 5142 vpxor %ymm0,%ymm12,%ymm12 5143 vpshufb L$rol8(%rip),%ymm12,%ymm12 5144 vpaddd %ymm12,%ymm8,%ymm8 5145 vpxor %ymm8,%ymm4,%ymm4 5146 vpslld $7,%ymm4,%ymm3 5147 vpsrld $25,%ymm4,%ymm4 5148 vpxor %ymm3,%ymm4,%ymm4 5149 vpalignr $12,%ymm12,%ymm12,%ymm12 5150 vpalignr $8,%ymm8,%ymm8,%ymm8 5151 vpalignr $4,%ymm4,%ymm4,%ymm4 5152 addq 0+0(%rbx),%r10 5153 adcq 8+0(%rbx),%r11 5154 adcq $1,%r12 5155 movq 0+0+0(%rbp),%rax 5156 movq %rax,%r15 5157 mulq %r10 5158 movq %rax,%r13 5159 movq %rdx,%r14 5160 movq 0+0+0(%rbp),%rax 5161 mulq %r11 5162 imulq %r12,%r15 5163 addq %rax,%r14 5164 adcq %rdx,%r15 5165 movq 8+0+0(%rbp),%rax 5166 movq %rax,%r9 5167 mulq %r10 5168 addq %rax,%r14 5169 adcq $0,%rdx 5170 movq %rdx,%r10 5171 movq 8+0+0(%rbp),%rax 5172 mulq %r11 5173 addq %rax,%r15 5174 adcq $0,%rdx 5175 imulq %r12,%r9 5176 addq %r10,%r15 5177 adcq %rdx,%r9 5178 movq %r13,%r10 5179 movq %r14,%r11 5180 movq %r15,%r12 5181 andq $3,%r12 5182 movq %r15,%r13 5183 andq $-4,%r13 5184 movq %r9,%r14 5185 shrdq $2,%r9,%r15 5186 shrq $2,%r9 5187 addq %r13,%r15 5188 adcq %r14,%r9 5189 addq %r15,%r10 5190 adcq %r9,%r11 5191 adcq $0,%r12 5192 5193 leaq 16(%rbx),%rbx 5194 incq %r8 5195 vpaddd %ymm6,%ymm2,%ymm2 5196 vpxor %ymm2,%ymm14,%ymm14 5197 vpshufb L$rol16(%rip),%ymm14,%ymm14 5198 vpaddd %ymm14,%ymm10,%ymm10 5199 vpxor %ymm10,%ymm6,%ymm6 5200 vpsrld $20,%ymm6,%ymm3 5201 vpslld $12,%ymm6,%ymm6 5202 vpxor %ymm3,%ymm6,%ymm6 5203 vpaddd %ymm6,%ymm2,%ymm2 5204 vpxor %ymm2,%ymm14,%ymm14 5205 vpshufb L$rol8(%rip),%ymm14,%ymm14 5206 vpaddd %ymm14,%ymm10,%ymm10 5207 vpxor %ymm10,%ymm6,%ymm6 5208 vpslld $7,%ymm6,%ymm3 5209 vpsrld $25,%ymm6,%ymm6 5210 vpxor %ymm3,%ymm6,%ymm6 5211 vpalignr $4,%ymm14,%ymm14,%ymm14 5212 vpalignr $8,%ymm10,%ymm10,%ymm10 5213 vpalignr $12,%ymm6,%ymm6,%ymm6 5214 vpaddd %ymm5,%ymm1,%ymm1 5215 vpxor %ymm1,%ymm13,%ymm13 5216 vpshufb L$rol16(%rip),%ymm13,%ymm13 5217 vpaddd %ymm13,%ymm9,%ymm9 5218 vpxor %ymm9,%ymm5,%ymm5 5219 vpsrld $20,%ymm5,%ymm3 5220 vpslld $12,%ymm5,%ymm5 5221 vpxor %ymm3,%ymm5,%ymm5 5222 vpaddd %ymm5,%ymm1,%ymm1 5223 vpxor %ymm1,%ymm13,%ymm13 5224 vpshufb L$rol8(%rip),%ymm13,%ymm13 5225 vpaddd %ymm13,%ymm9,%ymm9 5226 vpxor %ymm9,%ymm5,%ymm5 5227 vpslld $7,%ymm5,%ymm3 5228 vpsrld $25,%ymm5,%ymm5 5229 vpxor %ymm3,%ymm5,%ymm5 5230 vpalignr $4,%ymm13,%ymm13,%ymm13 5231 vpalignr $8,%ymm9,%ymm9,%ymm9 5232 vpalignr $12,%ymm5,%ymm5,%ymm5 5233 vpaddd %ymm4,%ymm0,%ymm0 5234 vpxor %ymm0,%ymm12,%ymm12 5235 vpshufb L$rol16(%rip),%ymm12,%ymm12 5236 vpaddd %ymm12,%ymm8,%ymm8 5237 vpxor %ymm8,%ymm4,%ymm4 5238 vpsrld $20,%ymm4,%ymm3 5239 vpslld $12,%ymm4,%ymm4 5240 vpxor %ymm3,%ymm4,%ymm4 5241 vpaddd %ymm4,%ymm0,%ymm0 5242 vpxor %ymm0,%ymm12,%ymm12 5243 vpshufb L$rol8(%rip),%ymm12,%ymm12 5244 vpaddd %ymm12,%ymm8,%ymm8 5245 vpxor %ymm8,%ymm4,%ymm4 5246 vpslld $7,%ymm4,%ymm3 5247 vpsrld $25,%ymm4,%ymm4 5248 vpxor %ymm3,%ymm4,%ymm4 5249 vpalignr $4,%ymm12,%ymm12,%ymm12 5250 vpalignr $8,%ymm8,%ymm8,%ymm8 5251 vpalignr $12,%ymm4,%ymm4,%ymm4 5252 5253 cmpq %rcx,%r8 5254 jb L$open_avx2_tail_384_rounds_and_x2hash 5255 cmpq $10,%r8 5256 jne L$open_avx2_tail_384_rounds_and_x1hash 5257 movq %rbx,%r8 5258 subq %rsi,%rbx 5259 movq %rbx,%rcx 5260 movq 0+128(%rbp),%rbx 5261L$open_avx2_384_tail_hash: 5262 addq $16,%rcx 5263 cmpq %rbx,%rcx 5264 jg L$open_avx2_384_tail_done 5265 addq 0+0(%r8),%r10 5266 adcq 8+0(%r8),%r11 5267 adcq $1,%r12 5268 movq 0+0+0(%rbp),%rdx 5269 movq %rdx,%r15 5270 mulxq %r10,%r13,%r14 5271 mulxq %r11,%rax,%rdx 5272 imulq %r12,%r15 5273 addq %rax,%r14 5274 adcq %rdx,%r15 5275 movq 8+0+0(%rbp),%rdx 5276 mulxq %r10,%r10,%rax 5277 addq %r10,%r14 5278 mulxq %r11,%r11,%r9 5279 adcq %r11,%r15 5280 adcq $0,%r9 5281 imulq %r12,%rdx 5282 addq %rax,%r15 5283 adcq %rdx,%r9 5284 movq %r13,%r10 5285 movq %r14,%r11 5286 movq %r15,%r12 5287 andq $3,%r12 5288 movq %r15,%r13 5289 andq $-4,%r13 5290 movq %r9,%r14 5291 shrdq $2,%r9,%r15 5292 shrq $2,%r9 5293 addq %r13,%r15 5294 adcq %r14,%r9 5295 addq %r15,%r10 5296 adcq %r9,%r11 5297 adcq $0,%r12 5298 5299 leaq 16(%r8),%r8 5300 jmp L$open_avx2_384_tail_hash 5301L$open_avx2_384_tail_done: 5302 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 5303 vpaddd 0+64(%rbp),%ymm6,%ymm6 5304 vpaddd 0+96(%rbp),%ymm10,%ymm10 5305 vpaddd 0+224(%rbp),%ymm14,%ymm14 5306 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 5307 vpaddd 0+64(%rbp),%ymm5,%ymm5 5308 vpaddd 0+96(%rbp),%ymm9,%ymm9 5309 vpaddd 0+192(%rbp),%ymm13,%ymm13 5310 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 5311 vpaddd 0+64(%rbp),%ymm4,%ymm4 5312 vpaddd 0+96(%rbp),%ymm8,%ymm8 5313 vpaddd 0+160(%rbp),%ymm12,%ymm12 5314 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5315 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5316 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5317 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5318 vpxor 0+0(%rsi),%ymm3,%ymm3 5319 vpxor 32+0(%rsi),%ymm2,%ymm2 5320 vpxor 64+0(%rsi),%ymm6,%ymm6 5321 vpxor 96+0(%rsi),%ymm10,%ymm10 5322 vmovdqu %ymm3,0+0(%rdi) 5323 vmovdqu %ymm2,32+0(%rdi) 5324 vmovdqu %ymm6,64+0(%rdi) 5325 vmovdqu %ymm10,96+0(%rdi) 5326 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5327 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5328 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5329 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5330 vpxor 0+128(%rsi),%ymm3,%ymm3 5331 vpxor 32+128(%rsi),%ymm1,%ymm1 5332 vpxor 64+128(%rsi),%ymm5,%ymm5 5333 vpxor 96+128(%rsi),%ymm9,%ymm9 5334 vmovdqu %ymm3,0+128(%rdi) 5335 vmovdqu %ymm1,32+128(%rdi) 5336 vmovdqu %ymm5,64+128(%rdi) 5337 vmovdqu %ymm9,96+128(%rdi) 5338 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5339 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5340 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5341 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5342 vmovdqa %ymm3,%ymm8 5343 5344 leaq 256(%rsi),%rsi 5345 leaq 256(%rdi),%rdi 5346 subq $256,%rbx 5347 jmp L$open_avx2_tail_128_xor 5348 5349L$open_avx2_tail_512: 5350 vmovdqa L$chacha20_consts(%rip),%ymm0 5351 vmovdqa 0+64(%rbp),%ymm4 5352 vmovdqa 0+96(%rbp),%ymm8 5353 vmovdqa %ymm0,%ymm1 5354 vmovdqa %ymm4,%ymm5 5355 vmovdqa %ymm8,%ymm9 5356 vmovdqa %ymm0,%ymm2 5357 vmovdqa %ymm4,%ymm6 5358 vmovdqa %ymm8,%ymm10 5359 vmovdqa %ymm0,%ymm3 5360 vmovdqa %ymm4,%ymm7 5361 vmovdqa %ymm8,%ymm11 5362 vmovdqa L$avx2_inc(%rip),%ymm12 5363 vpaddd 0+160(%rbp),%ymm12,%ymm15 5364 vpaddd %ymm15,%ymm12,%ymm14 5365 vpaddd %ymm14,%ymm12,%ymm13 5366 vpaddd %ymm13,%ymm12,%ymm12 5367 vmovdqa %ymm15,0+256(%rbp) 5368 vmovdqa %ymm14,0+224(%rbp) 5369 vmovdqa %ymm13,0+192(%rbp) 5370 vmovdqa %ymm12,0+160(%rbp) 5371 5372 xorq %rcx,%rcx 5373 movq %rsi,%r8 5374L$open_avx2_tail_512_rounds_and_x2hash: 5375 addq 0+0(%r8),%r10 5376 adcq 8+0(%r8),%r11 5377 adcq $1,%r12 5378 movq 0+0+0(%rbp),%rax 5379 movq %rax,%r15 5380 mulq %r10 5381 movq %rax,%r13 5382 movq %rdx,%r14 5383 movq 0+0+0(%rbp),%rax 5384 mulq %r11 5385 imulq %r12,%r15 5386 addq %rax,%r14 5387 adcq %rdx,%r15 5388 movq 8+0+0(%rbp),%rax 5389 movq %rax,%r9 5390 mulq %r10 5391 addq %rax,%r14 5392 adcq $0,%rdx 5393 movq %rdx,%r10 5394 movq 8+0+0(%rbp),%rax 5395 mulq %r11 5396 addq %rax,%r15 5397 adcq $0,%rdx 5398 imulq %r12,%r9 5399 addq %r10,%r15 5400 adcq %rdx,%r9 5401 movq %r13,%r10 5402 movq %r14,%r11 5403 movq %r15,%r12 5404 andq $3,%r12 5405 movq %r15,%r13 5406 andq $-4,%r13 5407 movq %r9,%r14 5408 shrdq $2,%r9,%r15 5409 shrq $2,%r9 5410 addq %r13,%r15 5411 adcq %r14,%r9 5412 addq %r15,%r10 5413 adcq %r9,%r11 5414 adcq $0,%r12 5415 5416 leaq 16(%r8),%r8 5417L$open_avx2_tail_512_rounds_and_x1hash: 5418 vmovdqa %ymm8,0+128(%rbp) 5419 vmovdqa L$rol16(%rip),%ymm8 5420 vpaddd %ymm7,%ymm3,%ymm3 5421 vpaddd %ymm6,%ymm2,%ymm2 5422 vpaddd %ymm5,%ymm1,%ymm1 5423 vpaddd %ymm4,%ymm0,%ymm0 5424 vpxor %ymm3,%ymm15,%ymm15 5425 vpxor %ymm2,%ymm14,%ymm14 5426 vpxor %ymm1,%ymm13,%ymm13 5427 vpxor %ymm0,%ymm12,%ymm12 5428 vpshufb %ymm8,%ymm15,%ymm15 5429 vpshufb %ymm8,%ymm14,%ymm14 5430 vpshufb %ymm8,%ymm13,%ymm13 5431 vpshufb %ymm8,%ymm12,%ymm12 5432 vpaddd %ymm15,%ymm11,%ymm11 5433 vpaddd %ymm14,%ymm10,%ymm10 5434 vpaddd %ymm13,%ymm9,%ymm9 5435 vpaddd 0+128(%rbp),%ymm12,%ymm8 5436 vpxor %ymm11,%ymm7,%ymm7 5437 vpxor %ymm10,%ymm6,%ymm6 5438 vpxor %ymm9,%ymm5,%ymm5 5439 vpxor %ymm8,%ymm4,%ymm4 5440 vmovdqa %ymm8,0+128(%rbp) 5441 vpsrld $20,%ymm7,%ymm8 5442 vpslld $32-20,%ymm7,%ymm7 5443 vpxor %ymm8,%ymm7,%ymm7 5444 vpsrld $20,%ymm6,%ymm8 5445 vpslld $32-20,%ymm6,%ymm6 5446 vpxor %ymm8,%ymm6,%ymm6 5447 vpsrld $20,%ymm5,%ymm8 5448 vpslld $32-20,%ymm5,%ymm5 5449 vpxor %ymm8,%ymm5,%ymm5 5450 vpsrld $20,%ymm4,%ymm8 5451 vpslld $32-20,%ymm4,%ymm4 5452 vpxor %ymm8,%ymm4,%ymm4 5453 vmovdqa L$rol8(%rip),%ymm8 5454 vpaddd %ymm7,%ymm3,%ymm3 5455 addq 0+0(%r8),%r10 5456 adcq 8+0(%r8),%r11 5457 adcq $1,%r12 5458 movq 0+0+0(%rbp),%rdx 5459 movq %rdx,%r15 5460 mulxq %r10,%r13,%r14 5461 mulxq %r11,%rax,%rdx 5462 imulq %r12,%r15 5463 addq %rax,%r14 5464 adcq %rdx,%r15 5465 movq 8+0+0(%rbp),%rdx 5466 mulxq %r10,%r10,%rax 5467 addq %r10,%r14 5468 mulxq %r11,%r11,%r9 5469 adcq %r11,%r15 5470 adcq $0,%r9 5471 imulq %r12,%rdx 5472 addq %rax,%r15 5473 adcq %rdx,%r9 5474 movq %r13,%r10 5475 movq %r14,%r11 5476 movq %r15,%r12 5477 andq $3,%r12 5478 movq %r15,%r13 5479 andq $-4,%r13 5480 movq %r9,%r14 5481 shrdq $2,%r9,%r15 5482 shrq $2,%r9 5483 addq %r13,%r15 5484 adcq %r14,%r9 5485 addq %r15,%r10 5486 adcq %r9,%r11 5487 adcq $0,%r12 5488 vpaddd %ymm6,%ymm2,%ymm2 5489 vpaddd %ymm5,%ymm1,%ymm1 5490 vpaddd %ymm4,%ymm0,%ymm0 5491 vpxor %ymm3,%ymm15,%ymm15 5492 vpxor %ymm2,%ymm14,%ymm14 5493 vpxor %ymm1,%ymm13,%ymm13 5494 vpxor %ymm0,%ymm12,%ymm12 5495 vpshufb %ymm8,%ymm15,%ymm15 5496 vpshufb %ymm8,%ymm14,%ymm14 5497 vpshufb %ymm8,%ymm13,%ymm13 5498 vpshufb %ymm8,%ymm12,%ymm12 5499 vpaddd %ymm15,%ymm11,%ymm11 5500 vpaddd %ymm14,%ymm10,%ymm10 5501 vpaddd %ymm13,%ymm9,%ymm9 5502 vpaddd 0+128(%rbp),%ymm12,%ymm8 5503 vpxor %ymm11,%ymm7,%ymm7 5504 vpxor %ymm10,%ymm6,%ymm6 5505 vpxor %ymm9,%ymm5,%ymm5 5506 vpxor %ymm8,%ymm4,%ymm4 5507 vmovdqa %ymm8,0+128(%rbp) 5508 vpsrld $25,%ymm7,%ymm8 5509 vpslld $32-25,%ymm7,%ymm7 5510 vpxor %ymm8,%ymm7,%ymm7 5511 vpsrld $25,%ymm6,%ymm8 5512 vpslld $32-25,%ymm6,%ymm6 5513 vpxor %ymm8,%ymm6,%ymm6 5514 vpsrld $25,%ymm5,%ymm8 5515 vpslld $32-25,%ymm5,%ymm5 5516 vpxor %ymm8,%ymm5,%ymm5 5517 vpsrld $25,%ymm4,%ymm8 5518 vpslld $32-25,%ymm4,%ymm4 5519 vpxor %ymm8,%ymm4,%ymm4 5520 vmovdqa 0+128(%rbp),%ymm8 5521 vpalignr $4,%ymm7,%ymm7,%ymm7 5522 vpalignr $8,%ymm11,%ymm11,%ymm11 5523 vpalignr $12,%ymm15,%ymm15,%ymm15 5524 vpalignr $4,%ymm6,%ymm6,%ymm6 5525 vpalignr $8,%ymm10,%ymm10,%ymm10 5526 vpalignr $12,%ymm14,%ymm14,%ymm14 5527 vpalignr $4,%ymm5,%ymm5,%ymm5 5528 vpalignr $8,%ymm9,%ymm9,%ymm9 5529 vpalignr $12,%ymm13,%ymm13,%ymm13 5530 vpalignr $4,%ymm4,%ymm4,%ymm4 5531 vpalignr $8,%ymm8,%ymm8,%ymm8 5532 vpalignr $12,%ymm12,%ymm12,%ymm12 5533 vmovdqa %ymm8,0+128(%rbp) 5534 vmovdqa L$rol16(%rip),%ymm8 5535 vpaddd %ymm7,%ymm3,%ymm3 5536 addq 0+16(%r8),%r10 5537 adcq 8+16(%r8),%r11 5538 adcq $1,%r12 5539 movq 0+0+0(%rbp),%rdx 5540 movq %rdx,%r15 5541 mulxq %r10,%r13,%r14 5542 mulxq %r11,%rax,%rdx 5543 imulq %r12,%r15 5544 addq %rax,%r14 5545 adcq %rdx,%r15 5546 movq 8+0+0(%rbp),%rdx 5547 mulxq %r10,%r10,%rax 5548 addq %r10,%r14 5549 mulxq %r11,%r11,%r9 5550 adcq %r11,%r15 5551 adcq $0,%r9 5552 imulq %r12,%rdx 5553 addq %rax,%r15 5554 adcq %rdx,%r9 5555 movq %r13,%r10 5556 movq %r14,%r11 5557 movq %r15,%r12 5558 andq $3,%r12 5559 movq %r15,%r13 5560 andq $-4,%r13 5561 movq %r9,%r14 5562 shrdq $2,%r9,%r15 5563 shrq $2,%r9 5564 addq %r13,%r15 5565 adcq %r14,%r9 5566 addq %r15,%r10 5567 adcq %r9,%r11 5568 adcq $0,%r12 5569 5570 leaq 32(%r8),%r8 5571 vpaddd %ymm6,%ymm2,%ymm2 5572 vpaddd %ymm5,%ymm1,%ymm1 5573 vpaddd %ymm4,%ymm0,%ymm0 5574 vpxor %ymm3,%ymm15,%ymm15 5575 vpxor %ymm2,%ymm14,%ymm14 5576 vpxor %ymm1,%ymm13,%ymm13 5577 vpxor %ymm0,%ymm12,%ymm12 5578 vpshufb %ymm8,%ymm15,%ymm15 5579 vpshufb %ymm8,%ymm14,%ymm14 5580 vpshufb %ymm8,%ymm13,%ymm13 5581 vpshufb %ymm8,%ymm12,%ymm12 5582 vpaddd %ymm15,%ymm11,%ymm11 5583 vpaddd %ymm14,%ymm10,%ymm10 5584 vpaddd %ymm13,%ymm9,%ymm9 5585 vpaddd 0+128(%rbp),%ymm12,%ymm8 5586 vpxor %ymm11,%ymm7,%ymm7 5587 vpxor %ymm10,%ymm6,%ymm6 5588 vpxor %ymm9,%ymm5,%ymm5 5589 vpxor %ymm8,%ymm4,%ymm4 5590 vmovdqa %ymm8,0+128(%rbp) 5591 vpsrld $20,%ymm7,%ymm8 5592 vpslld $32-20,%ymm7,%ymm7 5593 vpxor %ymm8,%ymm7,%ymm7 5594 vpsrld $20,%ymm6,%ymm8 5595 vpslld $32-20,%ymm6,%ymm6 5596 vpxor %ymm8,%ymm6,%ymm6 5597 vpsrld $20,%ymm5,%ymm8 5598 vpslld $32-20,%ymm5,%ymm5 5599 vpxor %ymm8,%ymm5,%ymm5 5600 vpsrld $20,%ymm4,%ymm8 5601 vpslld $32-20,%ymm4,%ymm4 5602 vpxor %ymm8,%ymm4,%ymm4 5603 vmovdqa L$rol8(%rip),%ymm8 5604 vpaddd %ymm7,%ymm3,%ymm3 5605 vpaddd %ymm6,%ymm2,%ymm2 5606 vpaddd %ymm5,%ymm1,%ymm1 5607 vpaddd %ymm4,%ymm0,%ymm0 5608 vpxor %ymm3,%ymm15,%ymm15 5609 vpxor %ymm2,%ymm14,%ymm14 5610 vpxor %ymm1,%ymm13,%ymm13 5611 vpxor %ymm0,%ymm12,%ymm12 5612 vpshufb %ymm8,%ymm15,%ymm15 5613 vpshufb %ymm8,%ymm14,%ymm14 5614 vpshufb %ymm8,%ymm13,%ymm13 5615 vpshufb %ymm8,%ymm12,%ymm12 5616 vpaddd %ymm15,%ymm11,%ymm11 5617 vpaddd %ymm14,%ymm10,%ymm10 5618 vpaddd %ymm13,%ymm9,%ymm9 5619 vpaddd 0+128(%rbp),%ymm12,%ymm8 5620 vpxor %ymm11,%ymm7,%ymm7 5621 vpxor %ymm10,%ymm6,%ymm6 5622 vpxor %ymm9,%ymm5,%ymm5 5623 vpxor %ymm8,%ymm4,%ymm4 5624 vmovdqa %ymm8,0+128(%rbp) 5625 vpsrld $25,%ymm7,%ymm8 5626 vpslld $32-25,%ymm7,%ymm7 5627 vpxor %ymm8,%ymm7,%ymm7 5628 vpsrld $25,%ymm6,%ymm8 5629 vpslld $32-25,%ymm6,%ymm6 5630 vpxor %ymm8,%ymm6,%ymm6 5631 vpsrld $25,%ymm5,%ymm8 5632 vpslld $32-25,%ymm5,%ymm5 5633 vpxor %ymm8,%ymm5,%ymm5 5634 vpsrld $25,%ymm4,%ymm8 5635 vpslld $32-25,%ymm4,%ymm4 5636 vpxor %ymm8,%ymm4,%ymm4 5637 vmovdqa 0+128(%rbp),%ymm8 5638 vpalignr $12,%ymm7,%ymm7,%ymm7 5639 vpalignr $8,%ymm11,%ymm11,%ymm11 5640 vpalignr $4,%ymm15,%ymm15,%ymm15 5641 vpalignr $12,%ymm6,%ymm6,%ymm6 5642 vpalignr $8,%ymm10,%ymm10,%ymm10 5643 vpalignr $4,%ymm14,%ymm14,%ymm14 5644 vpalignr $12,%ymm5,%ymm5,%ymm5 5645 vpalignr $8,%ymm9,%ymm9,%ymm9 5646 vpalignr $4,%ymm13,%ymm13,%ymm13 5647 vpalignr $12,%ymm4,%ymm4,%ymm4 5648 vpalignr $8,%ymm8,%ymm8,%ymm8 5649 vpalignr $4,%ymm12,%ymm12,%ymm12 5650 5651 incq %rcx 5652 cmpq $4,%rcx 5653 jl L$open_avx2_tail_512_rounds_and_x2hash 5654 cmpq $10,%rcx 5655 jne L$open_avx2_tail_512_rounds_and_x1hash 5656 movq %rbx,%rcx 5657 subq $384,%rcx 5658 andq $-16,%rcx 5659L$open_avx2_tail_512_hash: 5660 testq %rcx,%rcx 5661 je L$open_avx2_tail_512_done 5662 addq 0+0(%r8),%r10 5663 adcq 8+0(%r8),%r11 5664 adcq $1,%r12 5665 movq 0+0+0(%rbp),%rdx 5666 movq %rdx,%r15 5667 mulxq %r10,%r13,%r14 5668 mulxq %r11,%rax,%rdx 5669 imulq %r12,%r15 5670 addq %rax,%r14 5671 adcq %rdx,%r15 5672 movq 8+0+0(%rbp),%rdx 5673 mulxq %r10,%r10,%rax 5674 addq %r10,%r14 5675 mulxq %r11,%r11,%r9 5676 adcq %r11,%r15 5677 adcq $0,%r9 5678 imulq %r12,%rdx 5679 addq %rax,%r15 5680 adcq %rdx,%r9 5681 movq %r13,%r10 5682 movq %r14,%r11 5683 movq %r15,%r12 5684 andq $3,%r12 5685 movq %r15,%r13 5686 andq $-4,%r13 5687 movq %r9,%r14 5688 shrdq $2,%r9,%r15 5689 shrq $2,%r9 5690 addq %r13,%r15 5691 adcq %r14,%r9 5692 addq %r15,%r10 5693 adcq %r9,%r11 5694 adcq $0,%r12 5695 5696 leaq 16(%r8),%r8 5697 subq $16,%rcx 5698 jmp L$open_avx2_tail_512_hash 5699L$open_avx2_tail_512_done: 5700 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 5701 vpaddd 0+64(%rbp),%ymm7,%ymm7 5702 vpaddd 0+96(%rbp),%ymm11,%ymm11 5703 vpaddd 0+256(%rbp),%ymm15,%ymm15 5704 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 5705 vpaddd 0+64(%rbp),%ymm6,%ymm6 5706 vpaddd 0+96(%rbp),%ymm10,%ymm10 5707 vpaddd 0+224(%rbp),%ymm14,%ymm14 5708 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 5709 vpaddd 0+64(%rbp),%ymm5,%ymm5 5710 vpaddd 0+96(%rbp),%ymm9,%ymm9 5711 vpaddd 0+192(%rbp),%ymm13,%ymm13 5712 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 5713 vpaddd 0+64(%rbp),%ymm4,%ymm4 5714 vpaddd 0+96(%rbp),%ymm8,%ymm8 5715 vpaddd 0+160(%rbp),%ymm12,%ymm12 5716 5717 vmovdqa %ymm0,0+128(%rbp) 5718 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 5719 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 5720 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 5721 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 5722 vpxor 0+0(%rsi),%ymm0,%ymm0 5723 vpxor 32+0(%rsi),%ymm3,%ymm3 5724 vpxor 64+0(%rsi),%ymm7,%ymm7 5725 vpxor 96+0(%rsi),%ymm11,%ymm11 5726 vmovdqu %ymm0,0+0(%rdi) 5727 vmovdqu %ymm3,32+0(%rdi) 5728 vmovdqu %ymm7,64+0(%rdi) 5729 vmovdqu %ymm11,96+0(%rdi) 5730 5731 vmovdqa 0+128(%rbp),%ymm0 5732 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5733 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5734 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5735 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5736 vpxor 0+128(%rsi),%ymm3,%ymm3 5737 vpxor 32+128(%rsi),%ymm2,%ymm2 5738 vpxor 64+128(%rsi),%ymm6,%ymm6 5739 vpxor 96+128(%rsi),%ymm10,%ymm10 5740 vmovdqu %ymm3,0+128(%rdi) 5741 vmovdqu %ymm2,32+128(%rdi) 5742 vmovdqu %ymm6,64+128(%rdi) 5743 vmovdqu %ymm10,96+128(%rdi) 5744 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5745 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5746 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5747 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5748 vpxor 0+256(%rsi),%ymm3,%ymm3 5749 vpxor 32+256(%rsi),%ymm1,%ymm1 5750 vpxor 64+256(%rsi),%ymm5,%ymm5 5751 vpxor 96+256(%rsi),%ymm9,%ymm9 5752 vmovdqu %ymm3,0+256(%rdi) 5753 vmovdqu %ymm1,32+256(%rdi) 5754 vmovdqu %ymm5,64+256(%rdi) 5755 vmovdqu %ymm9,96+256(%rdi) 5756 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5757 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5758 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5759 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5760 vmovdqa %ymm3,%ymm8 5761 5762 leaq 384(%rsi),%rsi 5763 leaq 384(%rdi),%rdi 5764 subq $384,%rbx 5765L$open_avx2_tail_128_xor: 5766 cmpq $32,%rbx 5767 jb L$open_avx2_tail_32_xor 5768 subq $32,%rbx 5769 vpxor (%rsi),%ymm0,%ymm0 5770 vmovdqu %ymm0,(%rdi) 5771 leaq 32(%rsi),%rsi 5772 leaq 32(%rdi),%rdi 5773 vmovdqa %ymm4,%ymm0 5774 vmovdqa %ymm8,%ymm4 5775 vmovdqa %ymm12,%ymm8 5776 jmp L$open_avx2_tail_128_xor 5777L$open_avx2_tail_32_xor: 5778 cmpq $16,%rbx 5779 vmovdqa %xmm0,%xmm1 5780 jb L$open_avx2_exit 5781 subq $16,%rbx 5782 5783 vpxor (%rsi),%xmm0,%xmm1 5784 vmovdqu %xmm1,(%rdi) 5785 leaq 16(%rsi),%rsi 5786 leaq 16(%rdi),%rdi 5787 vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 5788 vmovdqa %xmm0,%xmm1 5789L$open_avx2_exit: 5790 vzeroupper 5791 jmp L$open_sse_tail_16 5792 5793L$open_avx2_192: 5794 vmovdqa %ymm0,%ymm1 5795 vmovdqa %ymm0,%ymm2 5796 vmovdqa %ymm4,%ymm5 5797 vmovdqa %ymm4,%ymm6 5798 vmovdqa %ymm8,%ymm9 5799 vmovdqa %ymm8,%ymm10 5800 vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 5801 vmovdqa %ymm12,%ymm11 5802 vmovdqa %ymm13,%ymm15 5803 movq $10,%r10 5804L$open_avx2_192_rounds: 5805 vpaddd %ymm4,%ymm0,%ymm0 5806 vpxor %ymm0,%ymm12,%ymm12 5807 vpshufb L$rol16(%rip),%ymm12,%ymm12 5808 vpaddd %ymm12,%ymm8,%ymm8 5809 vpxor %ymm8,%ymm4,%ymm4 5810 vpsrld $20,%ymm4,%ymm3 5811 vpslld $12,%ymm4,%ymm4 5812 vpxor %ymm3,%ymm4,%ymm4 5813 vpaddd %ymm4,%ymm0,%ymm0 5814 vpxor %ymm0,%ymm12,%ymm12 5815 vpshufb L$rol8(%rip),%ymm12,%ymm12 5816 vpaddd %ymm12,%ymm8,%ymm8 5817 vpxor %ymm8,%ymm4,%ymm4 5818 vpslld $7,%ymm4,%ymm3 5819 vpsrld $25,%ymm4,%ymm4 5820 vpxor %ymm3,%ymm4,%ymm4 5821 vpalignr $12,%ymm12,%ymm12,%ymm12 5822 vpalignr $8,%ymm8,%ymm8,%ymm8 5823 vpalignr $4,%ymm4,%ymm4,%ymm4 5824 vpaddd %ymm5,%ymm1,%ymm1 5825 vpxor %ymm1,%ymm13,%ymm13 5826 vpshufb L$rol16(%rip),%ymm13,%ymm13 5827 vpaddd %ymm13,%ymm9,%ymm9 5828 vpxor %ymm9,%ymm5,%ymm5 5829 vpsrld $20,%ymm5,%ymm3 5830 vpslld $12,%ymm5,%ymm5 5831 vpxor %ymm3,%ymm5,%ymm5 5832 vpaddd %ymm5,%ymm1,%ymm1 5833 vpxor %ymm1,%ymm13,%ymm13 5834 vpshufb L$rol8(%rip),%ymm13,%ymm13 5835 vpaddd %ymm13,%ymm9,%ymm9 5836 vpxor %ymm9,%ymm5,%ymm5 5837 vpslld $7,%ymm5,%ymm3 5838 vpsrld $25,%ymm5,%ymm5 5839 vpxor %ymm3,%ymm5,%ymm5 5840 vpalignr $12,%ymm13,%ymm13,%ymm13 5841 vpalignr $8,%ymm9,%ymm9,%ymm9 5842 vpalignr $4,%ymm5,%ymm5,%ymm5 5843 vpaddd %ymm4,%ymm0,%ymm0 5844 vpxor %ymm0,%ymm12,%ymm12 5845 vpshufb L$rol16(%rip),%ymm12,%ymm12 5846 vpaddd %ymm12,%ymm8,%ymm8 5847 vpxor %ymm8,%ymm4,%ymm4 5848 vpsrld $20,%ymm4,%ymm3 5849 vpslld $12,%ymm4,%ymm4 5850 vpxor %ymm3,%ymm4,%ymm4 5851 vpaddd %ymm4,%ymm0,%ymm0 5852 vpxor %ymm0,%ymm12,%ymm12 5853 vpshufb L$rol8(%rip),%ymm12,%ymm12 5854 vpaddd %ymm12,%ymm8,%ymm8 5855 vpxor %ymm8,%ymm4,%ymm4 5856 vpslld $7,%ymm4,%ymm3 5857 vpsrld $25,%ymm4,%ymm4 5858 vpxor %ymm3,%ymm4,%ymm4 5859 vpalignr $4,%ymm12,%ymm12,%ymm12 5860 vpalignr $8,%ymm8,%ymm8,%ymm8 5861 vpalignr $12,%ymm4,%ymm4,%ymm4 5862 vpaddd %ymm5,%ymm1,%ymm1 5863 vpxor %ymm1,%ymm13,%ymm13 5864 vpshufb L$rol16(%rip),%ymm13,%ymm13 5865 vpaddd %ymm13,%ymm9,%ymm9 5866 vpxor %ymm9,%ymm5,%ymm5 5867 vpsrld $20,%ymm5,%ymm3 5868 vpslld $12,%ymm5,%ymm5 5869 vpxor %ymm3,%ymm5,%ymm5 5870 vpaddd %ymm5,%ymm1,%ymm1 5871 vpxor %ymm1,%ymm13,%ymm13 5872 vpshufb L$rol8(%rip),%ymm13,%ymm13 5873 vpaddd %ymm13,%ymm9,%ymm9 5874 vpxor %ymm9,%ymm5,%ymm5 5875 vpslld $7,%ymm5,%ymm3 5876 vpsrld $25,%ymm5,%ymm5 5877 vpxor %ymm3,%ymm5,%ymm5 5878 vpalignr $4,%ymm13,%ymm13,%ymm13 5879 vpalignr $8,%ymm9,%ymm9,%ymm9 5880 vpalignr $12,%ymm5,%ymm5,%ymm5 5881 5882 decq %r10 5883 jne L$open_avx2_192_rounds 5884 vpaddd %ymm2,%ymm0,%ymm0 5885 vpaddd %ymm2,%ymm1,%ymm1 5886 vpaddd %ymm6,%ymm4,%ymm4 5887 vpaddd %ymm6,%ymm5,%ymm5 5888 vpaddd %ymm10,%ymm8,%ymm8 5889 vpaddd %ymm10,%ymm9,%ymm9 5890 vpaddd %ymm11,%ymm12,%ymm12 5891 vpaddd %ymm15,%ymm13,%ymm13 5892 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 5893 5894 vpand L$clamp(%rip),%ymm3,%ymm3 5895 vmovdqa %ymm3,0+0(%rbp) 5896 5897 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 5898 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 5899 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 5900 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 5901 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 5902 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 5903L$open_avx2_short: 5904 movq %r8,%r8 5905 call poly_hash_ad_internal 5906L$open_avx2_short_hash_and_xor_loop: 5907 cmpq $32,%rbx 5908 jb L$open_avx2_short_tail_32 5909 subq $32,%rbx 5910 addq 0+0(%rsi),%r10 5911 adcq 8+0(%rsi),%r11 5912 adcq $1,%r12 5913 movq 0+0+0(%rbp),%rax 5914 movq %rax,%r15 5915 mulq %r10 5916 movq %rax,%r13 5917 movq %rdx,%r14 5918 movq 0+0+0(%rbp),%rax 5919 mulq %r11 5920 imulq %r12,%r15 5921 addq %rax,%r14 5922 adcq %rdx,%r15 5923 movq 8+0+0(%rbp),%rax 5924 movq %rax,%r9 5925 mulq %r10 5926 addq %rax,%r14 5927 adcq $0,%rdx 5928 movq %rdx,%r10 5929 movq 8+0+0(%rbp),%rax 5930 mulq %r11 5931 addq %rax,%r15 5932 adcq $0,%rdx 5933 imulq %r12,%r9 5934 addq %r10,%r15 5935 adcq %rdx,%r9 5936 movq %r13,%r10 5937 movq %r14,%r11 5938 movq %r15,%r12 5939 andq $3,%r12 5940 movq %r15,%r13 5941 andq $-4,%r13 5942 movq %r9,%r14 5943 shrdq $2,%r9,%r15 5944 shrq $2,%r9 5945 addq %r13,%r15 5946 adcq %r14,%r9 5947 addq %r15,%r10 5948 adcq %r9,%r11 5949 adcq $0,%r12 5950 addq 0+16(%rsi),%r10 5951 adcq 8+16(%rsi),%r11 5952 adcq $1,%r12 5953 movq 0+0+0(%rbp),%rax 5954 movq %rax,%r15 5955 mulq %r10 5956 movq %rax,%r13 5957 movq %rdx,%r14 5958 movq 0+0+0(%rbp),%rax 5959 mulq %r11 5960 imulq %r12,%r15 5961 addq %rax,%r14 5962 adcq %rdx,%r15 5963 movq 8+0+0(%rbp),%rax 5964 movq %rax,%r9 5965 mulq %r10 5966 addq %rax,%r14 5967 adcq $0,%rdx 5968 movq %rdx,%r10 5969 movq 8+0+0(%rbp),%rax 5970 mulq %r11 5971 addq %rax,%r15 5972 adcq $0,%rdx 5973 imulq %r12,%r9 5974 addq %r10,%r15 5975 adcq %rdx,%r9 5976 movq %r13,%r10 5977 movq %r14,%r11 5978 movq %r15,%r12 5979 andq $3,%r12 5980 movq %r15,%r13 5981 andq $-4,%r13 5982 movq %r9,%r14 5983 shrdq $2,%r9,%r15 5984 shrq $2,%r9 5985 addq %r13,%r15 5986 adcq %r14,%r9 5987 addq %r15,%r10 5988 adcq %r9,%r11 5989 adcq $0,%r12 5990 5991 5992 vpxor (%rsi),%ymm0,%ymm0 5993 vmovdqu %ymm0,(%rdi) 5994 leaq 32(%rsi),%rsi 5995 leaq 32(%rdi),%rdi 5996 5997 vmovdqa %ymm4,%ymm0 5998 vmovdqa %ymm8,%ymm4 5999 vmovdqa %ymm12,%ymm8 6000 vmovdqa %ymm1,%ymm12 6001 vmovdqa %ymm5,%ymm1 6002 vmovdqa %ymm9,%ymm5 6003 vmovdqa %ymm13,%ymm9 6004 vmovdqa %ymm2,%ymm13 6005 vmovdqa %ymm6,%ymm2 6006 jmp L$open_avx2_short_hash_and_xor_loop 6007L$open_avx2_short_tail_32: 6008 cmpq $16,%rbx 6009 vmovdqa %xmm0,%xmm1 6010 jb L$open_avx2_short_tail_32_exit 6011 subq $16,%rbx 6012 addq 0+0(%rsi),%r10 6013 adcq 8+0(%rsi),%r11 6014 adcq $1,%r12 6015 movq 0+0+0(%rbp),%rax 6016 movq %rax,%r15 6017 mulq %r10 6018 movq %rax,%r13 6019 movq %rdx,%r14 6020 movq 0+0+0(%rbp),%rax 6021 mulq %r11 6022 imulq %r12,%r15 6023 addq %rax,%r14 6024 adcq %rdx,%r15 6025 movq 8+0+0(%rbp),%rax 6026 movq %rax,%r9 6027 mulq %r10 6028 addq %rax,%r14 6029 adcq $0,%rdx 6030 movq %rdx,%r10 6031 movq 8+0+0(%rbp),%rax 6032 mulq %r11 6033 addq %rax,%r15 6034 adcq $0,%rdx 6035 imulq %r12,%r9 6036 addq %r10,%r15 6037 adcq %rdx,%r9 6038 movq %r13,%r10 6039 movq %r14,%r11 6040 movq %r15,%r12 6041 andq $3,%r12 6042 movq %r15,%r13 6043 andq $-4,%r13 6044 movq %r9,%r14 6045 shrdq $2,%r9,%r15 6046 shrq $2,%r9 6047 addq %r13,%r15 6048 adcq %r14,%r9 6049 addq %r15,%r10 6050 adcq %r9,%r11 6051 adcq $0,%r12 6052 6053 vpxor (%rsi),%xmm0,%xmm3 6054 vmovdqu %xmm3,(%rdi) 6055 leaq 16(%rsi),%rsi 6056 leaq 16(%rdi),%rdi 6057 vextracti128 $1,%ymm0,%xmm1 6058L$open_avx2_short_tail_32_exit: 6059 vzeroupper 6060 jmp L$open_sse_tail_16 6061 6062L$open_avx2_320: 6063 vmovdqa %ymm0,%ymm1 6064 vmovdqa %ymm0,%ymm2 6065 vmovdqa %ymm4,%ymm5 6066 vmovdqa %ymm4,%ymm6 6067 vmovdqa %ymm8,%ymm9 6068 vmovdqa %ymm8,%ymm10 6069 vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 6070 vpaddd L$avx2_inc(%rip),%ymm13,%ymm14 6071 vmovdqa %ymm4,%ymm7 6072 vmovdqa %ymm8,%ymm11 6073 vmovdqa %ymm12,0+160(%rbp) 6074 vmovdqa %ymm13,0+192(%rbp) 6075 vmovdqa %ymm14,0+224(%rbp) 6076 movq $10,%r10 6077L$open_avx2_320_rounds: 6078 vpaddd %ymm4,%ymm0,%ymm0 6079 vpxor %ymm0,%ymm12,%ymm12 6080 vpshufb L$rol16(%rip),%ymm12,%ymm12 6081 vpaddd %ymm12,%ymm8,%ymm8 6082 vpxor %ymm8,%ymm4,%ymm4 6083 vpsrld $20,%ymm4,%ymm3 6084 vpslld $12,%ymm4,%ymm4 6085 vpxor %ymm3,%ymm4,%ymm4 6086 vpaddd %ymm4,%ymm0,%ymm0 6087 vpxor %ymm0,%ymm12,%ymm12 6088 vpshufb L$rol8(%rip),%ymm12,%ymm12 6089 vpaddd %ymm12,%ymm8,%ymm8 6090 vpxor %ymm8,%ymm4,%ymm4 6091 vpslld $7,%ymm4,%ymm3 6092 vpsrld $25,%ymm4,%ymm4 6093 vpxor %ymm3,%ymm4,%ymm4 6094 vpalignr $12,%ymm12,%ymm12,%ymm12 6095 vpalignr $8,%ymm8,%ymm8,%ymm8 6096 vpalignr $4,%ymm4,%ymm4,%ymm4 6097 vpaddd %ymm5,%ymm1,%ymm1 6098 vpxor %ymm1,%ymm13,%ymm13 6099 vpshufb L$rol16(%rip),%ymm13,%ymm13 6100 vpaddd %ymm13,%ymm9,%ymm9 6101 vpxor %ymm9,%ymm5,%ymm5 6102 vpsrld $20,%ymm5,%ymm3 6103 vpslld $12,%ymm5,%ymm5 6104 vpxor %ymm3,%ymm5,%ymm5 6105 vpaddd %ymm5,%ymm1,%ymm1 6106 vpxor %ymm1,%ymm13,%ymm13 6107 vpshufb L$rol8(%rip),%ymm13,%ymm13 6108 vpaddd %ymm13,%ymm9,%ymm9 6109 vpxor %ymm9,%ymm5,%ymm5 6110 vpslld $7,%ymm5,%ymm3 6111 vpsrld $25,%ymm5,%ymm5 6112 vpxor %ymm3,%ymm5,%ymm5 6113 vpalignr $12,%ymm13,%ymm13,%ymm13 6114 vpalignr $8,%ymm9,%ymm9,%ymm9 6115 vpalignr $4,%ymm5,%ymm5,%ymm5 6116 vpaddd %ymm6,%ymm2,%ymm2 6117 vpxor %ymm2,%ymm14,%ymm14 6118 vpshufb L$rol16(%rip),%ymm14,%ymm14 6119 vpaddd %ymm14,%ymm10,%ymm10 6120 vpxor %ymm10,%ymm6,%ymm6 6121 vpsrld $20,%ymm6,%ymm3 6122 vpslld $12,%ymm6,%ymm6 6123 vpxor %ymm3,%ymm6,%ymm6 6124 vpaddd %ymm6,%ymm2,%ymm2 6125 vpxor %ymm2,%ymm14,%ymm14 6126 vpshufb L$rol8(%rip),%ymm14,%ymm14 6127 vpaddd %ymm14,%ymm10,%ymm10 6128 vpxor %ymm10,%ymm6,%ymm6 6129 vpslld $7,%ymm6,%ymm3 6130 vpsrld $25,%ymm6,%ymm6 6131 vpxor %ymm3,%ymm6,%ymm6 6132 vpalignr $12,%ymm14,%ymm14,%ymm14 6133 vpalignr $8,%ymm10,%ymm10,%ymm10 6134 vpalignr $4,%ymm6,%ymm6,%ymm6 6135 vpaddd %ymm4,%ymm0,%ymm0 6136 vpxor %ymm0,%ymm12,%ymm12 6137 vpshufb L$rol16(%rip),%ymm12,%ymm12 6138 vpaddd %ymm12,%ymm8,%ymm8 6139 vpxor %ymm8,%ymm4,%ymm4 6140 vpsrld $20,%ymm4,%ymm3 6141 vpslld $12,%ymm4,%ymm4 6142 vpxor %ymm3,%ymm4,%ymm4 6143 vpaddd %ymm4,%ymm0,%ymm0 6144 vpxor %ymm0,%ymm12,%ymm12 6145 vpshufb L$rol8(%rip),%ymm12,%ymm12 6146 vpaddd %ymm12,%ymm8,%ymm8 6147 vpxor %ymm8,%ymm4,%ymm4 6148 vpslld $7,%ymm4,%ymm3 6149 vpsrld $25,%ymm4,%ymm4 6150 vpxor %ymm3,%ymm4,%ymm4 6151 vpalignr $4,%ymm12,%ymm12,%ymm12 6152 vpalignr $8,%ymm8,%ymm8,%ymm8 6153 vpalignr $12,%ymm4,%ymm4,%ymm4 6154 vpaddd %ymm5,%ymm1,%ymm1 6155 vpxor %ymm1,%ymm13,%ymm13 6156 vpshufb L$rol16(%rip),%ymm13,%ymm13 6157 vpaddd %ymm13,%ymm9,%ymm9 6158 vpxor %ymm9,%ymm5,%ymm5 6159 vpsrld $20,%ymm5,%ymm3 6160 vpslld $12,%ymm5,%ymm5 6161 vpxor %ymm3,%ymm5,%ymm5 6162 vpaddd %ymm5,%ymm1,%ymm1 6163 vpxor %ymm1,%ymm13,%ymm13 6164 vpshufb L$rol8(%rip),%ymm13,%ymm13 6165 vpaddd %ymm13,%ymm9,%ymm9 6166 vpxor %ymm9,%ymm5,%ymm5 6167 vpslld $7,%ymm5,%ymm3 6168 vpsrld $25,%ymm5,%ymm5 6169 vpxor %ymm3,%ymm5,%ymm5 6170 vpalignr $4,%ymm13,%ymm13,%ymm13 6171 vpalignr $8,%ymm9,%ymm9,%ymm9 6172 vpalignr $12,%ymm5,%ymm5,%ymm5 6173 vpaddd %ymm6,%ymm2,%ymm2 6174 vpxor %ymm2,%ymm14,%ymm14 6175 vpshufb L$rol16(%rip),%ymm14,%ymm14 6176 vpaddd %ymm14,%ymm10,%ymm10 6177 vpxor %ymm10,%ymm6,%ymm6 6178 vpsrld $20,%ymm6,%ymm3 6179 vpslld $12,%ymm6,%ymm6 6180 vpxor %ymm3,%ymm6,%ymm6 6181 vpaddd %ymm6,%ymm2,%ymm2 6182 vpxor %ymm2,%ymm14,%ymm14 6183 vpshufb L$rol8(%rip),%ymm14,%ymm14 6184 vpaddd %ymm14,%ymm10,%ymm10 6185 vpxor %ymm10,%ymm6,%ymm6 6186 vpslld $7,%ymm6,%ymm3 6187 vpsrld $25,%ymm6,%ymm6 6188 vpxor %ymm3,%ymm6,%ymm6 6189 vpalignr $4,%ymm14,%ymm14,%ymm14 6190 vpalignr $8,%ymm10,%ymm10,%ymm10 6191 vpalignr $12,%ymm6,%ymm6,%ymm6 6192 6193 decq %r10 6194 jne L$open_avx2_320_rounds 6195 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 6196 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 6197 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 6198 vpaddd %ymm7,%ymm4,%ymm4 6199 vpaddd %ymm7,%ymm5,%ymm5 6200 vpaddd %ymm7,%ymm6,%ymm6 6201 vpaddd %ymm11,%ymm8,%ymm8 6202 vpaddd %ymm11,%ymm9,%ymm9 6203 vpaddd %ymm11,%ymm10,%ymm10 6204 vpaddd 0+160(%rbp),%ymm12,%ymm12 6205 vpaddd 0+192(%rbp),%ymm13,%ymm13 6206 vpaddd 0+224(%rbp),%ymm14,%ymm14 6207 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 6208 6209 vpand L$clamp(%rip),%ymm3,%ymm3 6210 vmovdqa %ymm3,0+0(%rbp) 6211 6212 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 6213 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 6214 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 6215 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 6216 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 6217 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 6218 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 6219 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 6220 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 6221 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 6222 jmp L$open_avx2_short 6223 6224 6225 6226 6227 6228.p2align 6 6229chacha20_poly1305_seal_avx2: 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 vzeroupper 6243 vmovdqa L$chacha20_consts(%rip),%ymm0 6244 vbroadcasti128 0(%r9),%ymm4 6245 vbroadcasti128 16(%r9),%ymm8 6246 vbroadcasti128 32(%r9),%ymm12 6247 vpaddd L$avx2_init(%rip),%ymm12,%ymm12 6248 cmpq $192,%rbx 6249 jbe L$seal_avx2_192 6250 cmpq $320,%rbx 6251 jbe L$seal_avx2_320 6252 vmovdqa %ymm0,%ymm1 6253 vmovdqa %ymm0,%ymm2 6254 vmovdqa %ymm0,%ymm3 6255 vmovdqa %ymm4,%ymm5 6256 vmovdqa %ymm4,%ymm6 6257 vmovdqa %ymm4,%ymm7 6258 vmovdqa %ymm4,0+64(%rbp) 6259 vmovdqa %ymm8,%ymm9 6260 vmovdqa %ymm8,%ymm10 6261 vmovdqa %ymm8,%ymm11 6262 vmovdqa %ymm8,0+96(%rbp) 6263 vmovdqa %ymm12,%ymm15 6264 vpaddd L$avx2_inc(%rip),%ymm15,%ymm14 6265 vpaddd L$avx2_inc(%rip),%ymm14,%ymm13 6266 vpaddd L$avx2_inc(%rip),%ymm13,%ymm12 6267 vmovdqa %ymm12,0+160(%rbp) 6268 vmovdqa %ymm13,0+192(%rbp) 6269 vmovdqa %ymm14,0+224(%rbp) 6270 vmovdqa %ymm15,0+256(%rbp) 6271 movq $10,%r10 6272L$seal_avx2_init_rounds: 6273 vmovdqa %ymm8,0+128(%rbp) 6274 vmovdqa L$rol16(%rip),%ymm8 6275 vpaddd %ymm7,%ymm3,%ymm3 6276 vpaddd %ymm6,%ymm2,%ymm2 6277 vpaddd %ymm5,%ymm1,%ymm1 6278 vpaddd %ymm4,%ymm0,%ymm0 6279 vpxor %ymm3,%ymm15,%ymm15 6280 vpxor %ymm2,%ymm14,%ymm14 6281 vpxor %ymm1,%ymm13,%ymm13 6282 vpxor %ymm0,%ymm12,%ymm12 6283 vpshufb %ymm8,%ymm15,%ymm15 6284 vpshufb %ymm8,%ymm14,%ymm14 6285 vpshufb %ymm8,%ymm13,%ymm13 6286 vpshufb %ymm8,%ymm12,%ymm12 6287 vpaddd %ymm15,%ymm11,%ymm11 6288 vpaddd %ymm14,%ymm10,%ymm10 6289 vpaddd %ymm13,%ymm9,%ymm9 6290 vpaddd 0+128(%rbp),%ymm12,%ymm8 6291 vpxor %ymm11,%ymm7,%ymm7 6292 vpxor %ymm10,%ymm6,%ymm6 6293 vpxor %ymm9,%ymm5,%ymm5 6294 vpxor %ymm8,%ymm4,%ymm4 6295 vmovdqa %ymm8,0+128(%rbp) 6296 vpsrld $20,%ymm7,%ymm8 6297 vpslld $32-20,%ymm7,%ymm7 6298 vpxor %ymm8,%ymm7,%ymm7 6299 vpsrld $20,%ymm6,%ymm8 6300 vpslld $32-20,%ymm6,%ymm6 6301 vpxor %ymm8,%ymm6,%ymm6 6302 vpsrld $20,%ymm5,%ymm8 6303 vpslld $32-20,%ymm5,%ymm5 6304 vpxor %ymm8,%ymm5,%ymm5 6305 vpsrld $20,%ymm4,%ymm8 6306 vpslld $32-20,%ymm4,%ymm4 6307 vpxor %ymm8,%ymm4,%ymm4 6308 vmovdqa L$rol8(%rip),%ymm8 6309 vpaddd %ymm7,%ymm3,%ymm3 6310 vpaddd %ymm6,%ymm2,%ymm2 6311 vpaddd %ymm5,%ymm1,%ymm1 6312 vpaddd %ymm4,%ymm0,%ymm0 6313 vpxor %ymm3,%ymm15,%ymm15 6314 vpxor %ymm2,%ymm14,%ymm14 6315 vpxor %ymm1,%ymm13,%ymm13 6316 vpxor %ymm0,%ymm12,%ymm12 6317 vpshufb %ymm8,%ymm15,%ymm15 6318 vpshufb %ymm8,%ymm14,%ymm14 6319 vpshufb %ymm8,%ymm13,%ymm13 6320 vpshufb %ymm8,%ymm12,%ymm12 6321 vpaddd %ymm15,%ymm11,%ymm11 6322 vpaddd %ymm14,%ymm10,%ymm10 6323 vpaddd %ymm13,%ymm9,%ymm9 6324 vpaddd 0+128(%rbp),%ymm12,%ymm8 6325 vpxor %ymm11,%ymm7,%ymm7 6326 vpxor %ymm10,%ymm6,%ymm6 6327 vpxor %ymm9,%ymm5,%ymm5 6328 vpxor %ymm8,%ymm4,%ymm4 6329 vmovdqa %ymm8,0+128(%rbp) 6330 vpsrld $25,%ymm7,%ymm8 6331 vpslld $32-25,%ymm7,%ymm7 6332 vpxor %ymm8,%ymm7,%ymm7 6333 vpsrld $25,%ymm6,%ymm8 6334 vpslld $32-25,%ymm6,%ymm6 6335 vpxor %ymm8,%ymm6,%ymm6 6336 vpsrld $25,%ymm5,%ymm8 6337 vpslld $32-25,%ymm5,%ymm5 6338 vpxor %ymm8,%ymm5,%ymm5 6339 vpsrld $25,%ymm4,%ymm8 6340 vpslld $32-25,%ymm4,%ymm4 6341 vpxor %ymm8,%ymm4,%ymm4 6342 vmovdqa 0+128(%rbp),%ymm8 6343 vpalignr $4,%ymm7,%ymm7,%ymm7 6344 vpalignr $8,%ymm11,%ymm11,%ymm11 6345 vpalignr $12,%ymm15,%ymm15,%ymm15 6346 vpalignr $4,%ymm6,%ymm6,%ymm6 6347 vpalignr $8,%ymm10,%ymm10,%ymm10 6348 vpalignr $12,%ymm14,%ymm14,%ymm14 6349 vpalignr $4,%ymm5,%ymm5,%ymm5 6350 vpalignr $8,%ymm9,%ymm9,%ymm9 6351 vpalignr $12,%ymm13,%ymm13,%ymm13 6352 vpalignr $4,%ymm4,%ymm4,%ymm4 6353 vpalignr $8,%ymm8,%ymm8,%ymm8 6354 vpalignr $12,%ymm12,%ymm12,%ymm12 6355 vmovdqa %ymm8,0+128(%rbp) 6356 vmovdqa L$rol16(%rip),%ymm8 6357 vpaddd %ymm7,%ymm3,%ymm3 6358 vpaddd %ymm6,%ymm2,%ymm2 6359 vpaddd %ymm5,%ymm1,%ymm1 6360 vpaddd %ymm4,%ymm0,%ymm0 6361 vpxor %ymm3,%ymm15,%ymm15 6362 vpxor %ymm2,%ymm14,%ymm14 6363 vpxor %ymm1,%ymm13,%ymm13 6364 vpxor %ymm0,%ymm12,%ymm12 6365 vpshufb %ymm8,%ymm15,%ymm15 6366 vpshufb %ymm8,%ymm14,%ymm14 6367 vpshufb %ymm8,%ymm13,%ymm13 6368 vpshufb %ymm8,%ymm12,%ymm12 6369 vpaddd %ymm15,%ymm11,%ymm11 6370 vpaddd %ymm14,%ymm10,%ymm10 6371 vpaddd %ymm13,%ymm9,%ymm9 6372 vpaddd 0+128(%rbp),%ymm12,%ymm8 6373 vpxor %ymm11,%ymm7,%ymm7 6374 vpxor %ymm10,%ymm6,%ymm6 6375 vpxor %ymm9,%ymm5,%ymm5 6376 vpxor %ymm8,%ymm4,%ymm4 6377 vmovdqa %ymm8,0+128(%rbp) 6378 vpsrld $20,%ymm7,%ymm8 6379 vpslld $32-20,%ymm7,%ymm7 6380 vpxor %ymm8,%ymm7,%ymm7 6381 vpsrld $20,%ymm6,%ymm8 6382 vpslld $32-20,%ymm6,%ymm6 6383 vpxor %ymm8,%ymm6,%ymm6 6384 vpsrld $20,%ymm5,%ymm8 6385 vpslld $32-20,%ymm5,%ymm5 6386 vpxor %ymm8,%ymm5,%ymm5 6387 vpsrld $20,%ymm4,%ymm8 6388 vpslld $32-20,%ymm4,%ymm4 6389 vpxor %ymm8,%ymm4,%ymm4 6390 vmovdqa L$rol8(%rip),%ymm8 6391 vpaddd %ymm7,%ymm3,%ymm3 6392 vpaddd %ymm6,%ymm2,%ymm2 6393 vpaddd %ymm5,%ymm1,%ymm1 6394 vpaddd %ymm4,%ymm0,%ymm0 6395 vpxor %ymm3,%ymm15,%ymm15 6396 vpxor %ymm2,%ymm14,%ymm14 6397 vpxor %ymm1,%ymm13,%ymm13 6398 vpxor %ymm0,%ymm12,%ymm12 6399 vpshufb %ymm8,%ymm15,%ymm15 6400 vpshufb %ymm8,%ymm14,%ymm14 6401 vpshufb %ymm8,%ymm13,%ymm13 6402 vpshufb %ymm8,%ymm12,%ymm12 6403 vpaddd %ymm15,%ymm11,%ymm11 6404 vpaddd %ymm14,%ymm10,%ymm10 6405 vpaddd %ymm13,%ymm9,%ymm9 6406 vpaddd 0+128(%rbp),%ymm12,%ymm8 6407 vpxor %ymm11,%ymm7,%ymm7 6408 vpxor %ymm10,%ymm6,%ymm6 6409 vpxor %ymm9,%ymm5,%ymm5 6410 vpxor %ymm8,%ymm4,%ymm4 6411 vmovdqa %ymm8,0+128(%rbp) 6412 vpsrld $25,%ymm7,%ymm8 6413 vpslld $32-25,%ymm7,%ymm7 6414 vpxor %ymm8,%ymm7,%ymm7 6415 vpsrld $25,%ymm6,%ymm8 6416 vpslld $32-25,%ymm6,%ymm6 6417 vpxor %ymm8,%ymm6,%ymm6 6418 vpsrld $25,%ymm5,%ymm8 6419 vpslld $32-25,%ymm5,%ymm5 6420 vpxor %ymm8,%ymm5,%ymm5 6421 vpsrld $25,%ymm4,%ymm8 6422 vpslld $32-25,%ymm4,%ymm4 6423 vpxor %ymm8,%ymm4,%ymm4 6424 vmovdqa 0+128(%rbp),%ymm8 6425 vpalignr $12,%ymm7,%ymm7,%ymm7 6426 vpalignr $8,%ymm11,%ymm11,%ymm11 6427 vpalignr $4,%ymm15,%ymm15,%ymm15 6428 vpalignr $12,%ymm6,%ymm6,%ymm6 6429 vpalignr $8,%ymm10,%ymm10,%ymm10 6430 vpalignr $4,%ymm14,%ymm14,%ymm14 6431 vpalignr $12,%ymm5,%ymm5,%ymm5 6432 vpalignr $8,%ymm9,%ymm9,%ymm9 6433 vpalignr $4,%ymm13,%ymm13,%ymm13 6434 vpalignr $12,%ymm4,%ymm4,%ymm4 6435 vpalignr $8,%ymm8,%ymm8,%ymm8 6436 vpalignr $4,%ymm12,%ymm12,%ymm12 6437 6438 decq %r10 6439 jnz L$seal_avx2_init_rounds 6440 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 6441 vpaddd 0+64(%rbp),%ymm7,%ymm7 6442 vpaddd 0+96(%rbp),%ymm11,%ymm11 6443 vpaddd 0+256(%rbp),%ymm15,%ymm15 6444 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 6445 vpaddd 0+64(%rbp),%ymm6,%ymm6 6446 vpaddd 0+96(%rbp),%ymm10,%ymm10 6447 vpaddd 0+224(%rbp),%ymm14,%ymm14 6448 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 6449 vpaddd 0+64(%rbp),%ymm5,%ymm5 6450 vpaddd 0+96(%rbp),%ymm9,%ymm9 6451 vpaddd 0+192(%rbp),%ymm13,%ymm13 6452 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 6453 vpaddd 0+64(%rbp),%ymm4,%ymm4 6454 vpaddd 0+96(%rbp),%ymm8,%ymm8 6455 vpaddd 0+160(%rbp),%ymm12,%ymm12 6456 6457 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 6458 vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 6459 vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 6460 vpand L$clamp(%rip),%ymm15,%ymm15 6461 vmovdqa %ymm15,0+0(%rbp) 6462 movq %r8,%r8 6463 call poly_hash_ad_internal 6464 6465 vpxor 0(%rsi),%ymm3,%ymm3 6466 vpxor 32(%rsi),%ymm11,%ymm11 6467 vmovdqu %ymm3,0(%rdi) 6468 vmovdqu %ymm11,32(%rdi) 6469 vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 6470 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 6471 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 6472 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 6473 vpxor 0+64(%rsi),%ymm15,%ymm15 6474 vpxor 32+64(%rsi),%ymm2,%ymm2 6475 vpxor 64+64(%rsi),%ymm6,%ymm6 6476 vpxor 96+64(%rsi),%ymm10,%ymm10 6477 vmovdqu %ymm15,0+64(%rdi) 6478 vmovdqu %ymm2,32+64(%rdi) 6479 vmovdqu %ymm6,64+64(%rdi) 6480 vmovdqu %ymm10,96+64(%rdi) 6481 vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 6482 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 6483 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 6484 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 6485 vpxor 0+192(%rsi),%ymm15,%ymm15 6486 vpxor 32+192(%rsi),%ymm1,%ymm1 6487 vpxor 64+192(%rsi),%ymm5,%ymm5 6488 vpxor 96+192(%rsi),%ymm9,%ymm9 6489 vmovdqu %ymm15,0+192(%rdi) 6490 vmovdqu %ymm1,32+192(%rdi) 6491 vmovdqu %ymm5,64+192(%rdi) 6492 vmovdqu %ymm9,96+192(%rdi) 6493 vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 6494 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 6495 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 6496 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 6497 vmovdqa %ymm15,%ymm8 6498 6499 leaq 320(%rsi),%rsi 6500 subq $320,%rbx 6501 movq $320,%rcx 6502 cmpq $128,%rbx 6503 jbe L$seal_avx2_short_hash_remainder 6504 vpxor 0(%rsi),%ymm0,%ymm0 6505 vpxor 32(%rsi),%ymm4,%ymm4 6506 vpxor 64(%rsi),%ymm8,%ymm8 6507 vpxor 96(%rsi),%ymm12,%ymm12 6508 vmovdqu %ymm0,320(%rdi) 6509 vmovdqu %ymm4,352(%rdi) 6510 vmovdqu %ymm8,384(%rdi) 6511 vmovdqu %ymm12,416(%rdi) 6512 leaq 128(%rsi),%rsi 6513 subq $128,%rbx 6514 movq $8,%rcx 6515 movq $2,%r8 6516 cmpq $128,%rbx 6517 jbe L$seal_avx2_tail_128 6518 cmpq $256,%rbx 6519 jbe L$seal_avx2_tail_256 6520 cmpq $384,%rbx 6521 jbe L$seal_avx2_tail_384 6522 cmpq $512,%rbx 6523 jbe L$seal_avx2_tail_512 6524 vmovdqa L$chacha20_consts(%rip),%ymm0 6525 vmovdqa 0+64(%rbp),%ymm4 6526 vmovdqa 0+96(%rbp),%ymm8 6527 vmovdqa %ymm0,%ymm1 6528 vmovdqa %ymm4,%ymm5 6529 vmovdqa %ymm8,%ymm9 6530 vmovdqa %ymm0,%ymm2 6531 vmovdqa %ymm4,%ymm6 6532 vmovdqa %ymm8,%ymm10 6533 vmovdqa %ymm0,%ymm3 6534 vmovdqa %ymm4,%ymm7 6535 vmovdqa %ymm8,%ymm11 6536 vmovdqa L$avx2_inc(%rip),%ymm12 6537 vpaddd 0+160(%rbp),%ymm12,%ymm15 6538 vpaddd %ymm15,%ymm12,%ymm14 6539 vpaddd %ymm14,%ymm12,%ymm13 6540 vpaddd %ymm13,%ymm12,%ymm12 6541 vmovdqa %ymm15,0+256(%rbp) 6542 vmovdqa %ymm14,0+224(%rbp) 6543 vmovdqa %ymm13,0+192(%rbp) 6544 vmovdqa %ymm12,0+160(%rbp) 6545 vmovdqa %ymm8,0+128(%rbp) 6546 vmovdqa L$rol16(%rip),%ymm8 6547 vpaddd %ymm7,%ymm3,%ymm3 6548 vpaddd %ymm6,%ymm2,%ymm2 6549 vpaddd %ymm5,%ymm1,%ymm1 6550 vpaddd %ymm4,%ymm0,%ymm0 6551 vpxor %ymm3,%ymm15,%ymm15 6552 vpxor %ymm2,%ymm14,%ymm14 6553 vpxor %ymm1,%ymm13,%ymm13 6554 vpxor %ymm0,%ymm12,%ymm12 6555 vpshufb %ymm8,%ymm15,%ymm15 6556 vpshufb %ymm8,%ymm14,%ymm14 6557 vpshufb %ymm8,%ymm13,%ymm13 6558 vpshufb %ymm8,%ymm12,%ymm12 6559 vpaddd %ymm15,%ymm11,%ymm11 6560 vpaddd %ymm14,%ymm10,%ymm10 6561 vpaddd %ymm13,%ymm9,%ymm9 6562 vpaddd 0+128(%rbp),%ymm12,%ymm8 6563 vpxor %ymm11,%ymm7,%ymm7 6564 vpxor %ymm10,%ymm6,%ymm6 6565 vpxor %ymm9,%ymm5,%ymm5 6566 vpxor %ymm8,%ymm4,%ymm4 6567 vmovdqa %ymm8,0+128(%rbp) 6568 vpsrld $20,%ymm7,%ymm8 6569 vpslld $32-20,%ymm7,%ymm7 6570 vpxor %ymm8,%ymm7,%ymm7 6571 vpsrld $20,%ymm6,%ymm8 6572 vpslld $32-20,%ymm6,%ymm6 6573 vpxor %ymm8,%ymm6,%ymm6 6574 vpsrld $20,%ymm5,%ymm8 6575 vpslld $32-20,%ymm5,%ymm5 6576 vpxor %ymm8,%ymm5,%ymm5 6577 vpsrld $20,%ymm4,%ymm8 6578 vpslld $32-20,%ymm4,%ymm4 6579 vpxor %ymm8,%ymm4,%ymm4 6580 vmovdqa L$rol8(%rip),%ymm8 6581 vpaddd %ymm7,%ymm3,%ymm3 6582 vpaddd %ymm6,%ymm2,%ymm2 6583 vpaddd %ymm5,%ymm1,%ymm1 6584 vpaddd %ymm4,%ymm0,%ymm0 6585 vpxor %ymm3,%ymm15,%ymm15 6586 vpxor %ymm2,%ymm14,%ymm14 6587 vpxor %ymm1,%ymm13,%ymm13 6588 vpxor %ymm0,%ymm12,%ymm12 6589 vpshufb %ymm8,%ymm15,%ymm15 6590 vpshufb %ymm8,%ymm14,%ymm14 6591 vpshufb %ymm8,%ymm13,%ymm13 6592 vpshufb %ymm8,%ymm12,%ymm12 6593 vpaddd %ymm15,%ymm11,%ymm11 6594 vpaddd %ymm14,%ymm10,%ymm10 6595 vpaddd %ymm13,%ymm9,%ymm9 6596 vpaddd 0+128(%rbp),%ymm12,%ymm8 6597 vpxor %ymm11,%ymm7,%ymm7 6598 vpxor %ymm10,%ymm6,%ymm6 6599 vpxor %ymm9,%ymm5,%ymm5 6600 vpxor %ymm8,%ymm4,%ymm4 6601 vmovdqa %ymm8,0+128(%rbp) 6602 vpsrld $25,%ymm7,%ymm8 6603 vpslld $32-25,%ymm7,%ymm7 6604 vpxor %ymm8,%ymm7,%ymm7 6605 vpsrld $25,%ymm6,%ymm8 6606 vpslld $32-25,%ymm6,%ymm6 6607 vpxor %ymm8,%ymm6,%ymm6 6608 vpsrld $25,%ymm5,%ymm8 6609 vpslld $32-25,%ymm5,%ymm5 6610 vpxor %ymm8,%ymm5,%ymm5 6611 vpsrld $25,%ymm4,%ymm8 6612 vpslld $32-25,%ymm4,%ymm4 6613 vpxor %ymm8,%ymm4,%ymm4 6614 vmovdqa 0+128(%rbp),%ymm8 6615 vpalignr $4,%ymm7,%ymm7,%ymm7 6616 vpalignr $8,%ymm11,%ymm11,%ymm11 6617 vpalignr $12,%ymm15,%ymm15,%ymm15 6618 vpalignr $4,%ymm6,%ymm6,%ymm6 6619 vpalignr $8,%ymm10,%ymm10,%ymm10 6620 vpalignr $12,%ymm14,%ymm14,%ymm14 6621 vpalignr $4,%ymm5,%ymm5,%ymm5 6622 vpalignr $8,%ymm9,%ymm9,%ymm9 6623 vpalignr $12,%ymm13,%ymm13,%ymm13 6624 vpalignr $4,%ymm4,%ymm4,%ymm4 6625 vpalignr $8,%ymm8,%ymm8,%ymm8 6626 vpalignr $12,%ymm12,%ymm12,%ymm12 6627 vmovdqa %ymm8,0+128(%rbp) 6628 vmovdqa L$rol16(%rip),%ymm8 6629 vpaddd %ymm7,%ymm3,%ymm3 6630 vpaddd %ymm6,%ymm2,%ymm2 6631 vpaddd %ymm5,%ymm1,%ymm1 6632 vpaddd %ymm4,%ymm0,%ymm0 6633 vpxor %ymm3,%ymm15,%ymm15 6634 vpxor %ymm2,%ymm14,%ymm14 6635 vpxor %ymm1,%ymm13,%ymm13 6636 vpxor %ymm0,%ymm12,%ymm12 6637 vpshufb %ymm8,%ymm15,%ymm15 6638 vpshufb %ymm8,%ymm14,%ymm14 6639 vpshufb %ymm8,%ymm13,%ymm13 6640 vpshufb %ymm8,%ymm12,%ymm12 6641 vpaddd %ymm15,%ymm11,%ymm11 6642 vpaddd %ymm14,%ymm10,%ymm10 6643 vpaddd %ymm13,%ymm9,%ymm9 6644 vpaddd 0+128(%rbp),%ymm12,%ymm8 6645 vpxor %ymm11,%ymm7,%ymm7 6646 vpxor %ymm10,%ymm6,%ymm6 6647 vpxor %ymm9,%ymm5,%ymm5 6648 vpxor %ymm8,%ymm4,%ymm4 6649 vmovdqa %ymm8,0+128(%rbp) 6650 vpsrld $20,%ymm7,%ymm8 6651 vpslld $32-20,%ymm7,%ymm7 6652 vpxor %ymm8,%ymm7,%ymm7 6653 vpsrld $20,%ymm6,%ymm8 6654 vpslld $32-20,%ymm6,%ymm6 6655 vpxor %ymm8,%ymm6,%ymm6 6656 vpsrld $20,%ymm5,%ymm8 6657 vpslld $32-20,%ymm5,%ymm5 6658 vpxor %ymm8,%ymm5,%ymm5 6659 vpsrld $20,%ymm4,%ymm8 6660 vpslld $32-20,%ymm4,%ymm4 6661 vpxor %ymm8,%ymm4,%ymm4 6662 vmovdqa L$rol8(%rip),%ymm8 6663 vpaddd %ymm7,%ymm3,%ymm3 6664 vpaddd %ymm6,%ymm2,%ymm2 6665 vpaddd %ymm5,%ymm1,%ymm1 6666 vpaddd %ymm4,%ymm0,%ymm0 6667 vpxor %ymm3,%ymm15,%ymm15 6668 vpxor %ymm2,%ymm14,%ymm14 6669 vpxor %ymm1,%ymm13,%ymm13 6670 vpxor %ymm0,%ymm12,%ymm12 6671 vpshufb %ymm8,%ymm15,%ymm15 6672 vpshufb %ymm8,%ymm14,%ymm14 6673 vpshufb %ymm8,%ymm13,%ymm13 6674 vpshufb %ymm8,%ymm12,%ymm12 6675 vpaddd %ymm15,%ymm11,%ymm11 6676 vpaddd %ymm14,%ymm10,%ymm10 6677 vpaddd %ymm13,%ymm9,%ymm9 6678 vpaddd 0+128(%rbp),%ymm12,%ymm8 6679 vpxor %ymm11,%ymm7,%ymm7 6680 vpxor %ymm10,%ymm6,%ymm6 6681 vpxor %ymm9,%ymm5,%ymm5 6682 vpxor %ymm8,%ymm4,%ymm4 6683 vmovdqa %ymm8,0+128(%rbp) 6684 vpsrld $25,%ymm7,%ymm8 6685 vpslld $32-25,%ymm7,%ymm7 6686 vpxor %ymm8,%ymm7,%ymm7 6687 vpsrld $25,%ymm6,%ymm8 6688 vpslld $32-25,%ymm6,%ymm6 6689 vpxor %ymm8,%ymm6,%ymm6 6690 vpsrld $25,%ymm5,%ymm8 6691 vpslld $32-25,%ymm5,%ymm5 6692 vpxor %ymm8,%ymm5,%ymm5 6693 vpsrld $25,%ymm4,%ymm8 6694 vpslld $32-25,%ymm4,%ymm4 6695 vpxor %ymm8,%ymm4,%ymm4 6696 vmovdqa 0+128(%rbp),%ymm8 6697 vpalignr $12,%ymm7,%ymm7,%ymm7 6698 vpalignr $8,%ymm11,%ymm11,%ymm11 6699 vpalignr $4,%ymm15,%ymm15,%ymm15 6700 vpalignr $12,%ymm6,%ymm6,%ymm6 6701 vpalignr $8,%ymm10,%ymm10,%ymm10 6702 vpalignr $4,%ymm14,%ymm14,%ymm14 6703 vpalignr $12,%ymm5,%ymm5,%ymm5 6704 vpalignr $8,%ymm9,%ymm9,%ymm9 6705 vpalignr $4,%ymm13,%ymm13,%ymm13 6706 vpalignr $12,%ymm4,%ymm4,%ymm4 6707 vpalignr $8,%ymm8,%ymm8,%ymm8 6708 vpalignr $4,%ymm12,%ymm12,%ymm12 6709 vmovdqa %ymm8,0+128(%rbp) 6710 vmovdqa L$rol16(%rip),%ymm8 6711 vpaddd %ymm7,%ymm3,%ymm3 6712 vpaddd %ymm6,%ymm2,%ymm2 6713 vpaddd %ymm5,%ymm1,%ymm1 6714 vpaddd %ymm4,%ymm0,%ymm0 6715 vpxor %ymm3,%ymm15,%ymm15 6716 vpxor %ymm2,%ymm14,%ymm14 6717 vpxor %ymm1,%ymm13,%ymm13 6718 vpxor %ymm0,%ymm12,%ymm12 6719 vpshufb %ymm8,%ymm15,%ymm15 6720 vpshufb %ymm8,%ymm14,%ymm14 6721 vpshufb %ymm8,%ymm13,%ymm13 6722 vpshufb %ymm8,%ymm12,%ymm12 6723 vpaddd %ymm15,%ymm11,%ymm11 6724 vpaddd %ymm14,%ymm10,%ymm10 6725 vpaddd %ymm13,%ymm9,%ymm9 6726 vpaddd 0+128(%rbp),%ymm12,%ymm8 6727 vpxor %ymm11,%ymm7,%ymm7 6728 vpxor %ymm10,%ymm6,%ymm6 6729 vpxor %ymm9,%ymm5,%ymm5 6730 vpxor %ymm8,%ymm4,%ymm4 6731 vmovdqa %ymm8,0+128(%rbp) 6732 vpsrld $20,%ymm7,%ymm8 6733 vpslld $32-20,%ymm7,%ymm7 6734 vpxor %ymm8,%ymm7,%ymm7 6735 vpsrld $20,%ymm6,%ymm8 6736 vpslld $32-20,%ymm6,%ymm6 6737 vpxor %ymm8,%ymm6,%ymm6 6738 vpsrld $20,%ymm5,%ymm8 6739 vpslld $32-20,%ymm5,%ymm5 6740 vpxor %ymm8,%ymm5,%ymm5 6741 vpsrld $20,%ymm4,%ymm8 6742 vpslld $32-20,%ymm4,%ymm4 6743 vpxor %ymm8,%ymm4,%ymm4 6744 vmovdqa L$rol8(%rip),%ymm8 6745 vpaddd %ymm7,%ymm3,%ymm3 6746 vpaddd %ymm6,%ymm2,%ymm2 6747 vpaddd %ymm5,%ymm1,%ymm1 6748 vpaddd %ymm4,%ymm0,%ymm0 6749 vpxor %ymm3,%ymm15,%ymm15 6750 6751 subq $16,%rdi 6752 movq $9,%rcx 6753 jmp L$seal_avx2_main_loop_rounds_entry 6754.p2align 5 6755L$seal_avx2_main_loop: 6756 vmovdqa L$chacha20_consts(%rip),%ymm0 6757 vmovdqa 0+64(%rbp),%ymm4 6758 vmovdqa 0+96(%rbp),%ymm8 6759 vmovdqa %ymm0,%ymm1 6760 vmovdqa %ymm4,%ymm5 6761 vmovdqa %ymm8,%ymm9 6762 vmovdqa %ymm0,%ymm2 6763 vmovdqa %ymm4,%ymm6 6764 vmovdqa %ymm8,%ymm10 6765 vmovdqa %ymm0,%ymm3 6766 vmovdqa %ymm4,%ymm7 6767 vmovdqa %ymm8,%ymm11 6768 vmovdqa L$avx2_inc(%rip),%ymm12 6769 vpaddd 0+160(%rbp),%ymm12,%ymm15 6770 vpaddd %ymm15,%ymm12,%ymm14 6771 vpaddd %ymm14,%ymm12,%ymm13 6772 vpaddd %ymm13,%ymm12,%ymm12 6773 vmovdqa %ymm15,0+256(%rbp) 6774 vmovdqa %ymm14,0+224(%rbp) 6775 vmovdqa %ymm13,0+192(%rbp) 6776 vmovdqa %ymm12,0+160(%rbp) 6777 6778 movq $10,%rcx 6779.p2align 5 6780L$seal_avx2_main_loop_rounds: 6781 addq 0+0(%rdi),%r10 6782 adcq 8+0(%rdi),%r11 6783 adcq $1,%r12 6784 vmovdqa %ymm8,0+128(%rbp) 6785 vmovdqa L$rol16(%rip),%ymm8 6786 vpaddd %ymm7,%ymm3,%ymm3 6787 vpaddd %ymm6,%ymm2,%ymm2 6788 vpaddd %ymm5,%ymm1,%ymm1 6789 vpaddd %ymm4,%ymm0,%ymm0 6790 vpxor %ymm3,%ymm15,%ymm15 6791 vpxor %ymm2,%ymm14,%ymm14 6792 vpxor %ymm1,%ymm13,%ymm13 6793 vpxor %ymm0,%ymm12,%ymm12 6794 movq 0+0+0(%rbp),%rdx 6795 movq %rdx,%r15 6796 mulxq %r10,%r13,%r14 6797 mulxq %r11,%rax,%rdx 6798 imulq %r12,%r15 6799 addq %rax,%r14 6800 adcq %rdx,%r15 6801 vpshufb %ymm8,%ymm15,%ymm15 6802 vpshufb %ymm8,%ymm14,%ymm14 6803 vpshufb %ymm8,%ymm13,%ymm13 6804 vpshufb %ymm8,%ymm12,%ymm12 6805 vpaddd %ymm15,%ymm11,%ymm11 6806 vpaddd %ymm14,%ymm10,%ymm10 6807 vpaddd %ymm13,%ymm9,%ymm9 6808 vpaddd 0+128(%rbp),%ymm12,%ymm8 6809 vpxor %ymm11,%ymm7,%ymm7 6810 movq 8+0+0(%rbp),%rdx 6811 mulxq %r10,%r10,%rax 6812 addq %r10,%r14 6813 mulxq %r11,%r11,%r9 6814 adcq %r11,%r15 6815 adcq $0,%r9 6816 imulq %r12,%rdx 6817 vpxor %ymm10,%ymm6,%ymm6 6818 vpxor %ymm9,%ymm5,%ymm5 6819 vpxor %ymm8,%ymm4,%ymm4 6820 vmovdqa %ymm8,0+128(%rbp) 6821 vpsrld $20,%ymm7,%ymm8 6822 vpslld $32-20,%ymm7,%ymm7 6823 vpxor %ymm8,%ymm7,%ymm7 6824 vpsrld $20,%ymm6,%ymm8 6825 vpslld $32-20,%ymm6,%ymm6 6826 vpxor %ymm8,%ymm6,%ymm6 6827 vpsrld $20,%ymm5,%ymm8 6828 vpslld $32-20,%ymm5,%ymm5 6829 addq %rax,%r15 6830 adcq %rdx,%r9 6831 vpxor %ymm8,%ymm5,%ymm5 6832 vpsrld $20,%ymm4,%ymm8 6833 vpslld $32-20,%ymm4,%ymm4 6834 vpxor %ymm8,%ymm4,%ymm4 6835 vmovdqa L$rol8(%rip),%ymm8 6836 vpaddd %ymm7,%ymm3,%ymm3 6837 vpaddd %ymm6,%ymm2,%ymm2 6838 vpaddd %ymm5,%ymm1,%ymm1 6839 vpaddd %ymm4,%ymm0,%ymm0 6840 vpxor %ymm3,%ymm15,%ymm15 6841 movq %r13,%r10 6842 movq %r14,%r11 6843 movq %r15,%r12 6844 andq $3,%r12 6845 movq %r15,%r13 6846 andq $-4,%r13 6847 movq %r9,%r14 6848 shrdq $2,%r9,%r15 6849 shrq $2,%r9 6850 addq %r13,%r15 6851 adcq %r14,%r9 6852 addq %r15,%r10 6853 adcq %r9,%r11 6854 adcq $0,%r12 6855 6856L$seal_avx2_main_loop_rounds_entry: 6857 vpxor %ymm2,%ymm14,%ymm14 6858 vpxor %ymm1,%ymm13,%ymm13 6859 vpxor %ymm0,%ymm12,%ymm12 6860 vpshufb %ymm8,%ymm15,%ymm15 6861 vpshufb %ymm8,%ymm14,%ymm14 6862 vpshufb %ymm8,%ymm13,%ymm13 6863 vpshufb %ymm8,%ymm12,%ymm12 6864 vpaddd %ymm15,%ymm11,%ymm11 6865 vpaddd %ymm14,%ymm10,%ymm10 6866 addq 0+16(%rdi),%r10 6867 adcq 8+16(%rdi),%r11 6868 adcq $1,%r12 6869 vpaddd %ymm13,%ymm9,%ymm9 6870 vpaddd 0+128(%rbp),%ymm12,%ymm8 6871 vpxor %ymm11,%ymm7,%ymm7 6872 vpxor %ymm10,%ymm6,%ymm6 6873 vpxor %ymm9,%ymm5,%ymm5 6874 vpxor %ymm8,%ymm4,%ymm4 6875 vmovdqa %ymm8,0+128(%rbp) 6876 vpsrld $25,%ymm7,%ymm8 6877 movq 0+0+0(%rbp),%rdx 6878 movq %rdx,%r15 6879 mulxq %r10,%r13,%r14 6880 mulxq %r11,%rax,%rdx 6881 imulq %r12,%r15 6882 addq %rax,%r14 6883 adcq %rdx,%r15 6884 vpslld $32-25,%ymm7,%ymm7 6885 vpxor %ymm8,%ymm7,%ymm7 6886 vpsrld $25,%ymm6,%ymm8 6887 vpslld $32-25,%ymm6,%ymm6 6888 vpxor %ymm8,%ymm6,%ymm6 6889 vpsrld $25,%ymm5,%ymm8 6890 vpslld $32-25,%ymm5,%ymm5 6891 vpxor %ymm8,%ymm5,%ymm5 6892 vpsrld $25,%ymm4,%ymm8 6893 vpslld $32-25,%ymm4,%ymm4 6894 vpxor %ymm8,%ymm4,%ymm4 6895 vmovdqa 0+128(%rbp),%ymm8 6896 vpalignr $4,%ymm7,%ymm7,%ymm7 6897 vpalignr $8,%ymm11,%ymm11,%ymm11 6898 vpalignr $12,%ymm15,%ymm15,%ymm15 6899 vpalignr $4,%ymm6,%ymm6,%ymm6 6900 vpalignr $8,%ymm10,%ymm10,%ymm10 6901 vpalignr $12,%ymm14,%ymm14,%ymm14 6902 movq 8+0+0(%rbp),%rdx 6903 mulxq %r10,%r10,%rax 6904 addq %r10,%r14 6905 mulxq %r11,%r11,%r9 6906 adcq %r11,%r15 6907 adcq $0,%r9 6908 imulq %r12,%rdx 6909 vpalignr $4,%ymm5,%ymm5,%ymm5 6910 vpalignr $8,%ymm9,%ymm9,%ymm9 6911 vpalignr $12,%ymm13,%ymm13,%ymm13 6912 vpalignr $4,%ymm4,%ymm4,%ymm4 6913 vpalignr $8,%ymm8,%ymm8,%ymm8 6914 vpalignr $12,%ymm12,%ymm12,%ymm12 6915 vmovdqa %ymm8,0+128(%rbp) 6916 vmovdqa L$rol16(%rip),%ymm8 6917 vpaddd %ymm7,%ymm3,%ymm3 6918 vpaddd %ymm6,%ymm2,%ymm2 6919 vpaddd %ymm5,%ymm1,%ymm1 6920 vpaddd %ymm4,%ymm0,%ymm0 6921 vpxor %ymm3,%ymm15,%ymm15 6922 vpxor %ymm2,%ymm14,%ymm14 6923 vpxor %ymm1,%ymm13,%ymm13 6924 vpxor %ymm0,%ymm12,%ymm12 6925 vpshufb %ymm8,%ymm15,%ymm15 6926 vpshufb %ymm8,%ymm14,%ymm14 6927 addq %rax,%r15 6928 adcq %rdx,%r9 6929 vpshufb %ymm8,%ymm13,%ymm13 6930 vpshufb %ymm8,%ymm12,%ymm12 6931 vpaddd %ymm15,%ymm11,%ymm11 6932 vpaddd %ymm14,%ymm10,%ymm10 6933 vpaddd %ymm13,%ymm9,%ymm9 6934 vpaddd 0+128(%rbp),%ymm12,%ymm8 6935 vpxor %ymm11,%ymm7,%ymm7 6936 vpxor %ymm10,%ymm6,%ymm6 6937 vpxor %ymm9,%ymm5,%ymm5 6938 movq %r13,%r10 6939 movq %r14,%r11 6940 movq %r15,%r12 6941 andq $3,%r12 6942 movq %r15,%r13 6943 andq $-4,%r13 6944 movq %r9,%r14 6945 shrdq $2,%r9,%r15 6946 shrq $2,%r9 6947 addq %r13,%r15 6948 adcq %r14,%r9 6949 addq %r15,%r10 6950 adcq %r9,%r11 6951 adcq $0,%r12 6952 vpxor %ymm8,%ymm4,%ymm4 6953 vmovdqa %ymm8,0+128(%rbp) 6954 vpsrld $20,%ymm7,%ymm8 6955 vpslld $32-20,%ymm7,%ymm7 6956 vpxor %ymm8,%ymm7,%ymm7 6957 vpsrld $20,%ymm6,%ymm8 6958 vpslld $32-20,%ymm6,%ymm6 6959 vpxor %ymm8,%ymm6,%ymm6 6960 addq 0+32(%rdi),%r10 6961 adcq 8+32(%rdi),%r11 6962 adcq $1,%r12 6963 6964 leaq 48(%rdi),%rdi 6965 vpsrld $20,%ymm5,%ymm8 6966 vpslld $32-20,%ymm5,%ymm5 6967 vpxor %ymm8,%ymm5,%ymm5 6968 vpsrld $20,%ymm4,%ymm8 6969 vpslld $32-20,%ymm4,%ymm4 6970 vpxor %ymm8,%ymm4,%ymm4 6971 vmovdqa L$rol8(%rip),%ymm8 6972 vpaddd %ymm7,%ymm3,%ymm3 6973 vpaddd %ymm6,%ymm2,%ymm2 6974 vpaddd %ymm5,%ymm1,%ymm1 6975 vpaddd %ymm4,%ymm0,%ymm0 6976 vpxor %ymm3,%ymm15,%ymm15 6977 vpxor %ymm2,%ymm14,%ymm14 6978 vpxor %ymm1,%ymm13,%ymm13 6979 vpxor %ymm0,%ymm12,%ymm12 6980 vpshufb %ymm8,%ymm15,%ymm15 6981 vpshufb %ymm8,%ymm14,%ymm14 6982 vpshufb %ymm8,%ymm13,%ymm13 6983 movq 0+0+0(%rbp),%rdx 6984 movq %rdx,%r15 6985 mulxq %r10,%r13,%r14 6986 mulxq %r11,%rax,%rdx 6987 imulq %r12,%r15 6988 addq %rax,%r14 6989 adcq %rdx,%r15 6990 vpshufb %ymm8,%ymm12,%ymm12 6991 vpaddd %ymm15,%ymm11,%ymm11 6992 vpaddd %ymm14,%ymm10,%ymm10 6993 vpaddd %ymm13,%ymm9,%ymm9 6994 vpaddd 0+128(%rbp),%ymm12,%ymm8 6995 vpxor %ymm11,%ymm7,%ymm7 6996 vpxor %ymm10,%ymm6,%ymm6 6997 vpxor %ymm9,%ymm5,%ymm5 6998 movq 8+0+0(%rbp),%rdx 6999 mulxq %r10,%r10,%rax 7000 addq %r10,%r14 7001 mulxq %r11,%r11,%r9 7002 adcq %r11,%r15 7003 adcq $0,%r9 7004 imulq %r12,%rdx 7005 vpxor %ymm8,%ymm4,%ymm4 7006 vmovdqa %ymm8,0+128(%rbp) 7007 vpsrld $25,%ymm7,%ymm8 7008 vpslld $32-25,%ymm7,%ymm7 7009 vpxor %ymm8,%ymm7,%ymm7 7010 vpsrld $25,%ymm6,%ymm8 7011 vpslld $32-25,%ymm6,%ymm6 7012 vpxor %ymm8,%ymm6,%ymm6 7013 addq %rax,%r15 7014 adcq %rdx,%r9 7015 vpsrld $25,%ymm5,%ymm8 7016 vpslld $32-25,%ymm5,%ymm5 7017 vpxor %ymm8,%ymm5,%ymm5 7018 vpsrld $25,%ymm4,%ymm8 7019 vpslld $32-25,%ymm4,%ymm4 7020 vpxor %ymm8,%ymm4,%ymm4 7021 vmovdqa 0+128(%rbp),%ymm8 7022 vpalignr $12,%ymm7,%ymm7,%ymm7 7023 vpalignr $8,%ymm11,%ymm11,%ymm11 7024 vpalignr $4,%ymm15,%ymm15,%ymm15 7025 vpalignr $12,%ymm6,%ymm6,%ymm6 7026 vpalignr $8,%ymm10,%ymm10,%ymm10 7027 vpalignr $4,%ymm14,%ymm14,%ymm14 7028 vpalignr $12,%ymm5,%ymm5,%ymm5 7029 vpalignr $8,%ymm9,%ymm9,%ymm9 7030 vpalignr $4,%ymm13,%ymm13,%ymm13 7031 vpalignr $12,%ymm4,%ymm4,%ymm4 7032 vpalignr $8,%ymm8,%ymm8,%ymm8 7033 movq %r13,%r10 7034 movq %r14,%r11 7035 movq %r15,%r12 7036 andq $3,%r12 7037 movq %r15,%r13 7038 andq $-4,%r13 7039 movq %r9,%r14 7040 shrdq $2,%r9,%r15 7041 shrq $2,%r9 7042 addq %r13,%r15 7043 adcq %r14,%r9 7044 addq %r15,%r10 7045 adcq %r9,%r11 7046 adcq $0,%r12 7047 vpalignr $4,%ymm12,%ymm12,%ymm12 7048 7049 decq %rcx 7050 jne L$seal_avx2_main_loop_rounds 7051 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 7052 vpaddd 0+64(%rbp),%ymm7,%ymm7 7053 vpaddd 0+96(%rbp),%ymm11,%ymm11 7054 vpaddd 0+256(%rbp),%ymm15,%ymm15 7055 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 7056 vpaddd 0+64(%rbp),%ymm6,%ymm6 7057 vpaddd 0+96(%rbp),%ymm10,%ymm10 7058 vpaddd 0+224(%rbp),%ymm14,%ymm14 7059 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 7060 vpaddd 0+64(%rbp),%ymm5,%ymm5 7061 vpaddd 0+96(%rbp),%ymm9,%ymm9 7062 vpaddd 0+192(%rbp),%ymm13,%ymm13 7063 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 7064 vpaddd 0+64(%rbp),%ymm4,%ymm4 7065 vpaddd 0+96(%rbp),%ymm8,%ymm8 7066 vpaddd 0+160(%rbp),%ymm12,%ymm12 7067 7068 vmovdqa %ymm0,0+128(%rbp) 7069 addq 0+0(%rdi),%r10 7070 adcq 8+0(%rdi),%r11 7071 adcq $1,%r12 7072 movq 0+0+0(%rbp),%rdx 7073 movq %rdx,%r15 7074 mulxq %r10,%r13,%r14 7075 mulxq %r11,%rax,%rdx 7076 imulq %r12,%r15 7077 addq %rax,%r14 7078 adcq %rdx,%r15 7079 movq 8+0+0(%rbp),%rdx 7080 mulxq %r10,%r10,%rax 7081 addq %r10,%r14 7082 mulxq %r11,%r11,%r9 7083 adcq %r11,%r15 7084 adcq $0,%r9 7085 imulq %r12,%rdx 7086 addq %rax,%r15 7087 adcq %rdx,%r9 7088 movq %r13,%r10 7089 movq %r14,%r11 7090 movq %r15,%r12 7091 andq $3,%r12 7092 movq %r15,%r13 7093 andq $-4,%r13 7094 movq %r9,%r14 7095 shrdq $2,%r9,%r15 7096 shrq $2,%r9 7097 addq %r13,%r15 7098 adcq %r14,%r9 7099 addq %r15,%r10 7100 adcq %r9,%r11 7101 adcq $0,%r12 7102 addq 0+16(%rdi),%r10 7103 adcq 8+16(%rdi),%r11 7104 adcq $1,%r12 7105 movq 0+0+0(%rbp),%rdx 7106 movq %rdx,%r15 7107 mulxq %r10,%r13,%r14 7108 mulxq %r11,%rax,%rdx 7109 imulq %r12,%r15 7110 addq %rax,%r14 7111 adcq %rdx,%r15 7112 movq 8+0+0(%rbp),%rdx 7113 mulxq %r10,%r10,%rax 7114 addq %r10,%r14 7115 mulxq %r11,%r11,%r9 7116 adcq %r11,%r15 7117 adcq $0,%r9 7118 imulq %r12,%rdx 7119 addq %rax,%r15 7120 adcq %rdx,%r9 7121 movq %r13,%r10 7122 movq %r14,%r11 7123 movq %r15,%r12 7124 andq $3,%r12 7125 movq %r15,%r13 7126 andq $-4,%r13 7127 movq %r9,%r14 7128 shrdq $2,%r9,%r15 7129 shrq $2,%r9 7130 addq %r13,%r15 7131 adcq %r14,%r9 7132 addq %r15,%r10 7133 adcq %r9,%r11 7134 adcq $0,%r12 7135 7136 leaq 32(%rdi),%rdi 7137 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 7138 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 7139 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 7140 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 7141 vpxor 0+0(%rsi),%ymm0,%ymm0 7142 vpxor 32+0(%rsi),%ymm3,%ymm3 7143 vpxor 64+0(%rsi),%ymm7,%ymm7 7144 vpxor 96+0(%rsi),%ymm11,%ymm11 7145 vmovdqu %ymm0,0+0(%rdi) 7146 vmovdqu %ymm3,32+0(%rdi) 7147 vmovdqu %ymm7,64+0(%rdi) 7148 vmovdqu %ymm11,96+0(%rdi) 7149 7150 vmovdqa 0+128(%rbp),%ymm0 7151 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 7152 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 7153 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 7154 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 7155 vpxor 0+128(%rsi),%ymm3,%ymm3 7156 vpxor 32+128(%rsi),%ymm2,%ymm2 7157 vpxor 64+128(%rsi),%ymm6,%ymm6 7158 vpxor 96+128(%rsi),%ymm10,%ymm10 7159 vmovdqu %ymm3,0+128(%rdi) 7160 vmovdqu %ymm2,32+128(%rdi) 7161 vmovdqu %ymm6,64+128(%rdi) 7162 vmovdqu %ymm10,96+128(%rdi) 7163 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7164 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7165 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7166 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7167 vpxor 0+256(%rsi),%ymm3,%ymm3 7168 vpxor 32+256(%rsi),%ymm1,%ymm1 7169 vpxor 64+256(%rsi),%ymm5,%ymm5 7170 vpxor 96+256(%rsi),%ymm9,%ymm9 7171 vmovdqu %ymm3,0+256(%rdi) 7172 vmovdqu %ymm1,32+256(%rdi) 7173 vmovdqu %ymm5,64+256(%rdi) 7174 vmovdqu %ymm9,96+256(%rdi) 7175 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 7176 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 7177 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 7178 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 7179 vpxor 0+384(%rsi),%ymm3,%ymm3 7180 vpxor 32+384(%rsi),%ymm0,%ymm0 7181 vpxor 64+384(%rsi),%ymm4,%ymm4 7182 vpxor 96+384(%rsi),%ymm8,%ymm8 7183 vmovdqu %ymm3,0+384(%rdi) 7184 vmovdqu %ymm0,32+384(%rdi) 7185 vmovdqu %ymm4,64+384(%rdi) 7186 vmovdqu %ymm8,96+384(%rdi) 7187 7188 leaq 512(%rsi),%rsi 7189 subq $512,%rbx 7190 cmpq $512,%rbx 7191 jg L$seal_avx2_main_loop 7192 7193 addq 0+0(%rdi),%r10 7194 adcq 8+0(%rdi),%r11 7195 adcq $1,%r12 7196 movq 0+0+0(%rbp),%rdx 7197 movq %rdx,%r15 7198 mulxq %r10,%r13,%r14 7199 mulxq %r11,%rax,%rdx 7200 imulq %r12,%r15 7201 addq %rax,%r14 7202 adcq %rdx,%r15 7203 movq 8+0+0(%rbp),%rdx 7204 mulxq %r10,%r10,%rax 7205 addq %r10,%r14 7206 mulxq %r11,%r11,%r9 7207 adcq %r11,%r15 7208 adcq $0,%r9 7209 imulq %r12,%rdx 7210 addq %rax,%r15 7211 adcq %rdx,%r9 7212 movq %r13,%r10 7213 movq %r14,%r11 7214 movq %r15,%r12 7215 andq $3,%r12 7216 movq %r15,%r13 7217 andq $-4,%r13 7218 movq %r9,%r14 7219 shrdq $2,%r9,%r15 7220 shrq $2,%r9 7221 addq %r13,%r15 7222 adcq %r14,%r9 7223 addq %r15,%r10 7224 adcq %r9,%r11 7225 adcq $0,%r12 7226 addq 0+16(%rdi),%r10 7227 adcq 8+16(%rdi),%r11 7228 adcq $1,%r12 7229 movq 0+0+0(%rbp),%rdx 7230 movq %rdx,%r15 7231 mulxq %r10,%r13,%r14 7232 mulxq %r11,%rax,%rdx 7233 imulq %r12,%r15 7234 addq %rax,%r14 7235 adcq %rdx,%r15 7236 movq 8+0+0(%rbp),%rdx 7237 mulxq %r10,%r10,%rax 7238 addq %r10,%r14 7239 mulxq %r11,%r11,%r9 7240 adcq %r11,%r15 7241 adcq $0,%r9 7242 imulq %r12,%rdx 7243 addq %rax,%r15 7244 adcq %rdx,%r9 7245 movq %r13,%r10 7246 movq %r14,%r11 7247 movq %r15,%r12 7248 andq $3,%r12 7249 movq %r15,%r13 7250 andq $-4,%r13 7251 movq %r9,%r14 7252 shrdq $2,%r9,%r15 7253 shrq $2,%r9 7254 addq %r13,%r15 7255 adcq %r14,%r9 7256 addq %r15,%r10 7257 adcq %r9,%r11 7258 adcq $0,%r12 7259 7260 leaq 32(%rdi),%rdi 7261 movq $10,%rcx 7262 xorq %r8,%r8 7263 7264 cmpq $384,%rbx 7265 ja L$seal_avx2_tail_512 7266 cmpq $256,%rbx 7267 ja L$seal_avx2_tail_384 7268 cmpq $128,%rbx 7269 ja L$seal_avx2_tail_256 7270 7271L$seal_avx2_tail_128: 7272 vmovdqa L$chacha20_consts(%rip),%ymm0 7273 vmovdqa 0+64(%rbp),%ymm4 7274 vmovdqa 0+96(%rbp),%ymm8 7275 vmovdqa L$avx2_inc(%rip),%ymm12 7276 vpaddd 0+160(%rbp),%ymm12,%ymm12 7277 vmovdqa %ymm12,0+160(%rbp) 7278 7279L$seal_avx2_tail_128_rounds_and_3xhash: 7280 addq 0+0(%rdi),%r10 7281 adcq 8+0(%rdi),%r11 7282 adcq $1,%r12 7283 movq 0+0+0(%rbp),%rdx 7284 movq %rdx,%r15 7285 mulxq %r10,%r13,%r14 7286 mulxq %r11,%rax,%rdx 7287 imulq %r12,%r15 7288 addq %rax,%r14 7289 adcq %rdx,%r15 7290 movq 8+0+0(%rbp),%rdx 7291 mulxq %r10,%r10,%rax 7292 addq %r10,%r14 7293 mulxq %r11,%r11,%r9 7294 adcq %r11,%r15 7295 adcq $0,%r9 7296 imulq %r12,%rdx 7297 addq %rax,%r15 7298 adcq %rdx,%r9 7299 movq %r13,%r10 7300 movq %r14,%r11 7301 movq %r15,%r12 7302 andq $3,%r12 7303 movq %r15,%r13 7304 andq $-4,%r13 7305 movq %r9,%r14 7306 shrdq $2,%r9,%r15 7307 shrq $2,%r9 7308 addq %r13,%r15 7309 adcq %r14,%r9 7310 addq %r15,%r10 7311 adcq %r9,%r11 7312 adcq $0,%r12 7313 7314 leaq 16(%rdi),%rdi 7315L$seal_avx2_tail_128_rounds_and_2xhash: 7316 vpaddd %ymm4,%ymm0,%ymm0 7317 vpxor %ymm0,%ymm12,%ymm12 7318 vpshufb L$rol16(%rip),%ymm12,%ymm12 7319 vpaddd %ymm12,%ymm8,%ymm8 7320 vpxor %ymm8,%ymm4,%ymm4 7321 vpsrld $20,%ymm4,%ymm3 7322 vpslld $12,%ymm4,%ymm4 7323 vpxor %ymm3,%ymm4,%ymm4 7324 vpaddd %ymm4,%ymm0,%ymm0 7325 vpxor %ymm0,%ymm12,%ymm12 7326 vpshufb L$rol8(%rip),%ymm12,%ymm12 7327 vpaddd %ymm12,%ymm8,%ymm8 7328 vpxor %ymm8,%ymm4,%ymm4 7329 vpslld $7,%ymm4,%ymm3 7330 vpsrld $25,%ymm4,%ymm4 7331 vpxor %ymm3,%ymm4,%ymm4 7332 vpalignr $12,%ymm12,%ymm12,%ymm12 7333 vpalignr $8,%ymm8,%ymm8,%ymm8 7334 vpalignr $4,%ymm4,%ymm4,%ymm4 7335 addq 0+0(%rdi),%r10 7336 adcq 8+0(%rdi),%r11 7337 adcq $1,%r12 7338 movq 0+0+0(%rbp),%rdx 7339 movq %rdx,%r15 7340 mulxq %r10,%r13,%r14 7341 mulxq %r11,%rax,%rdx 7342 imulq %r12,%r15 7343 addq %rax,%r14 7344 adcq %rdx,%r15 7345 movq 8+0+0(%rbp),%rdx 7346 mulxq %r10,%r10,%rax 7347 addq %r10,%r14 7348 mulxq %r11,%r11,%r9 7349 adcq %r11,%r15 7350 adcq $0,%r9 7351 imulq %r12,%rdx 7352 addq %rax,%r15 7353 adcq %rdx,%r9 7354 movq %r13,%r10 7355 movq %r14,%r11 7356 movq %r15,%r12 7357 andq $3,%r12 7358 movq %r15,%r13 7359 andq $-4,%r13 7360 movq %r9,%r14 7361 shrdq $2,%r9,%r15 7362 shrq $2,%r9 7363 addq %r13,%r15 7364 adcq %r14,%r9 7365 addq %r15,%r10 7366 adcq %r9,%r11 7367 adcq $0,%r12 7368 vpaddd %ymm4,%ymm0,%ymm0 7369 vpxor %ymm0,%ymm12,%ymm12 7370 vpshufb L$rol16(%rip),%ymm12,%ymm12 7371 vpaddd %ymm12,%ymm8,%ymm8 7372 vpxor %ymm8,%ymm4,%ymm4 7373 vpsrld $20,%ymm4,%ymm3 7374 vpslld $12,%ymm4,%ymm4 7375 vpxor %ymm3,%ymm4,%ymm4 7376 vpaddd %ymm4,%ymm0,%ymm0 7377 vpxor %ymm0,%ymm12,%ymm12 7378 vpshufb L$rol8(%rip),%ymm12,%ymm12 7379 vpaddd %ymm12,%ymm8,%ymm8 7380 vpxor %ymm8,%ymm4,%ymm4 7381 vpslld $7,%ymm4,%ymm3 7382 vpsrld $25,%ymm4,%ymm4 7383 vpxor %ymm3,%ymm4,%ymm4 7384 vpalignr $4,%ymm12,%ymm12,%ymm12 7385 vpalignr $8,%ymm8,%ymm8,%ymm8 7386 vpalignr $12,%ymm4,%ymm4,%ymm4 7387 addq 0+16(%rdi),%r10 7388 adcq 8+16(%rdi),%r11 7389 adcq $1,%r12 7390 movq 0+0+0(%rbp),%rdx 7391 movq %rdx,%r15 7392 mulxq %r10,%r13,%r14 7393 mulxq %r11,%rax,%rdx 7394 imulq %r12,%r15 7395 addq %rax,%r14 7396 adcq %rdx,%r15 7397 movq 8+0+0(%rbp),%rdx 7398 mulxq %r10,%r10,%rax 7399 addq %r10,%r14 7400 mulxq %r11,%r11,%r9 7401 adcq %r11,%r15 7402 adcq $0,%r9 7403 imulq %r12,%rdx 7404 addq %rax,%r15 7405 adcq %rdx,%r9 7406 movq %r13,%r10 7407 movq %r14,%r11 7408 movq %r15,%r12 7409 andq $3,%r12 7410 movq %r15,%r13 7411 andq $-4,%r13 7412 movq %r9,%r14 7413 shrdq $2,%r9,%r15 7414 shrq $2,%r9 7415 addq %r13,%r15 7416 adcq %r14,%r9 7417 addq %r15,%r10 7418 adcq %r9,%r11 7419 adcq $0,%r12 7420 7421 leaq 32(%rdi),%rdi 7422 decq %rcx 7423 jg L$seal_avx2_tail_128_rounds_and_3xhash 7424 decq %r8 7425 jge L$seal_avx2_tail_128_rounds_and_2xhash 7426 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 7427 vpaddd 0+64(%rbp),%ymm4,%ymm4 7428 vpaddd 0+96(%rbp),%ymm8,%ymm8 7429 vpaddd 0+160(%rbp),%ymm12,%ymm12 7430 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7431 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7432 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7433 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7434 vmovdqa %ymm3,%ymm8 7435 7436 jmp L$seal_avx2_short_loop 7437 7438L$seal_avx2_tail_256: 7439 vmovdqa L$chacha20_consts(%rip),%ymm0 7440 vmovdqa 0+64(%rbp),%ymm4 7441 vmovdqa 0+96(%rbp),%ymm8 7442 vmovdqa %ymm0,%ymm1 7443 vmovdqa %ymm4,%ymm5 7444 vmovdqa %ymm8,%ymm9 7445 vmovdqa L$avx2_inc(%rip),%ymm12 7446 vpaddd 0+160(%rbp),%ymm12,%ymm13 7447 vpaddd %ymm13,%ymm12,%ymm12 7448 vmovdqa %ymm12,0+160(%rbp) 7449 vmovdqa %ymm13,0+192(%rbp) 7450 7451L$seal_avx2_tail_256_rounds_and_3xhash: 7452 addq 0+0(%rdi),%r10 7453 adcq 8+0(%rdi),%r11 7454 adcq $1,%r12 7455 movq 0+0+0(%rbp),%rax 7456 movq %rax,%r15 7457 mulq %r10 7458 movq %rax,%r13 7459 movq %rdx,%r14 7460 movq 0+0+0(%rbp),%rax 7461 mulq %r11 7462 imulq %r12,%r15 7463 addq %rax,%r14 7464 adcq %rdx,%r15 7465 movq 8+0+0(%rbp),%rax 7466 movq %rax,%r9 7467 mulq %r10 7468 addq %rax,%r14 7469 adcq $0,%rdx 7470 movq %rdx,%r10 7471 movq 8+0+0(%rbp),%rax 7472 mulq %r11 7473 addq %rax,%r15 7474 adcq $0,%rdx 7475 imulq %r12,%r9 7476 addq %r10,%r15 7477 adcq %rdx,%r9 7478 movq %r13,%r10 7479 movq %r14,%r11 7480 movq %r15,%r12 7481 andq $3,%r12 7482 movq %r15,%r13 7483 andq $-4,%r13 7484 movq %r9,%r14 7485 shrdq $2,%r9,%r15 7486 shrq $2,%r9 7487 addq %r13,%r15 7488 adcq %r14,%r9 7489 addq %r15,%r10 7490 adcq %r9,%r11 7491 adcq $0,%r12 7492 7493 leaq 16(%rdi),%rdi 7494L$seal_avx2_tail_256_rounds_and_2xhash: 7495 vpaddd %ymm4,%ymm0,%ymm0 7496 vpxor %ymm0,%ymm12,%ymm12 7497 vpshufb L$rol16(%rip),%ymm12,%ymm12 7498 vpaddd %ymm12,%ymm8,%ymm8 7499 vpxor %ymm8,%ymm4,%ymm4 7500 vpsrld $20,%ymm4,%ymm3 7501 vpslld $12,%ymm4,%ymm4 7502 vpxor %ymm3,%ymm4,%ymm4 7503 vpaddd %ymm4,%ymm0,%ymm0 7504 vpxor %ymm0,%ymm12,%ymm12 7505 vpshufb L$rol8(%rip),%ymm12,%ymm12 7506 vpaddd %ymm12,%ymm8,%ymm8 7507 vpxor %ymm8,%ymm4,%ymm4 7508 vpslld $7,%ymm4,%ymm3 7509 vpsrld $25,%ymm4,%ymm4 7510 vpxor %ymm3,%ymm4,%ymm4 7511 vpalignr $12,%ymm12,%ymm12,%ymm12 7512 vpalignr $8,%ymm8,%ymm8,%ymm8 7513 vpalignr $4,%ymm4,%ymm4,%ymm4 7514 vpaddd %ymm5,%ymm1,%ymm1 7515 vpxor %ymm1,%ymm13,%ymm13 7516 vpshufb L$rol16(%rip),%ymm13,%ymm13 7517 vpaddd %ymm13,%ymm9,%ymm9 7518 vpxor %ymm9,%ymm5,%ymm5 7519 vpsrld $20,%ymm5,%ymm3 7520 vpslld $12,%ymm5,%ymm5 7521 vpxor %ymm3,%ymm5,%ymm5 7522 vpaddd %ymm5,%ymm1,%ymm1 7523 vpxor %ymm1,%ymm13,%ymm13 7524 vpshufb L$rol8(%rip),%ymm13,%ymm13 7525 vpaddd %ymm13,%ymm9,%ymm9 7526 vpxor %ymm9,%ymm5,%ymm5 7527 vpslld $7,%ymm5,%ymm3 7528 vpsrld $25,%ymm5,%ymm5 7529 vpxor %ymm3,%ymm5,%ymm5 7530 vpalignr $12,%ymm13,%ymm13,%ymm13 7531 vpalignr $8,%ymm9,%ymm9,%ymm9 7532 vpalignr $4,%ymm5,%ymm5,%ymm5 7533 addq 0+0(%rdi),%r10 7534 adcq 8+0(%rdi),%r11 7535 adcq $1,%r12 7536 movq 0+0+0(%rbp),%rax 7537 movq %rax,%r15 7538 mulq %r10 7539 movq %rax,%r13 7540 movq %rdx,%r14 7541 movq 0+0+0(%rbp),%rax 7542 mulq %r11 7543 imulq %r12,%r15 7544 addq %rax,%r14 7545 adcq %rdx,%r15 7546 movq 8+0+0(%rbp),%rax 7547 movq %rax,%r9 7548 mulq %r10 7549 addq %rax,%r14 7550 adcq $0,%rdx 7551 movq %rdx,%r10 7552 movq 8+0+0(%rbp),%rax 7553 mulq %r11 7554 addq %rax,%r15 7555 adcq $0,%rdx 7556 imulq %r12,%r9 7557 addq %r10,%r15 7558 adcq %rdx,%r9 7559 movq %r13,%r10 7560 movq %r14,%r11 7561 movq %r15,%r12 7562 andq $3,%r12 7563 movq %r15,%r13 7564 andq $-4,%r13 7565 movq %r9,%r14 7566 shrdq $2,%r9,%r15 7567 shrq $2,%r9 7568 addq %r13,%r15 7569 adcq %r14,%r9 7570 addq %r15,%r10 7571 adcq %r9,%r11 7572 adcq $0,%r12 7573 vpaddd %ymm4,%ymm0,%ymm0 7574 vpxor %ymm0,%ymm12,%ymm12 7575 vpshufb L$rol16(%rip),%ymm12,%ymm12 7576 vpaddd %ymm12,%ymm8,%ymm8 7577 vpxor %ymm8,%ymm4,%ymm4 7578 vpsrld $20,%ymm4,%ymm3 7579 vpslld $12,%ymm4,%ymm4 7580 vpxor %ymm3,%ymm4,%ymm4 7581 vpaddd %ymm4,%ymm0,%ymm0 7582 vpxor %ymm0,%ymm12,%ymm12 7583 vpshufb L$rol8(%rip),%ymm12,%ymm12 7584 vpaddd %ymm12,%ymm8,%ymm8 7585 vpxor %ymm8,%ymm4,%ymm4 7586 vpslld $7,%ymm4,%ymm3 7587 vpsrld $25,%ymm4,%ymm4 7588 vpxor %ymm3,%ymm4,%ymm4 7589 vpalignr $4,%ymm12,%ymm12,%ymm12 7590 vpalignr $8,%ymm8,%ymm8,%ymm8 7591 vpalignr $12,%ymm4,%ymm4,%ymm4 7592 vpaddd %ymm5,%ymm1,%ymm1 7593 vpxor %ymm1,%ymm13,%ymm13 7594 vpshufb L$rol16(%rip),%ymm13,%ymm13 7595 vpaddd %ymm13,%ymm9,%ymm9 7596 vpxor %ymm9,%ymm5,%ymm5 7597 vpsrld $20,%ymm5,%ymm3 7598 vpslld $12,%ymm5,%ymm5 7599 vpxor %ymm3,%ymm5,%ymm5 7600 vpaddd %ymm5,%ymm1,%ymm1 7601 vpxor %ymm1,%ymm13,%ymm13 7602 vpshufb L$rol8(%rip),%ymm13,%ymm13 7603 vpaddd %ymm13,%ymm9,%ymm9 7604 vpxor %ymm9,%ymm5,%ymm5 7605 vpslld $7,%ymm5,%ymm3 7606 vpsrld $25,%ymm5,%ymm5 7607 vpxor %ymm3,%ymm5,%ymm5 7608 vpalignr $4,%ymm13,%ymm13,%ymm13 7609 vpalignr $8,%ymm9,%ymm9,%ymm9 7610 vpalignr $12,%ymm5,%ymm5,%ymm5 7611 addq 0+16(%rdi),%r10 7612 adcq 8+16(%rdi),%r11 7613 adcq $1,%r12 7614 movq 0+0+0(%rbp),%rax 7615 movq %rax,%r15 7616 mulq %r10 7617 movq %rax,%r13 7618 movq %rdx,%r14 7619 movq 0+0+0(%rbp),%rax 7620 mulq %r11 7621 imulq %r12,%r15 7622 addq %rax,%r14 7623 adcq %rdx,%r15 7624 movq 8+0+0(%rbp),%rax 7625 movq %rax,%r9 7626 mulq %r10 7627 addq %rax,%r14 7628 adcq $0,%rdx 7629 movq %rdx,%r10 7630 movq 8+0+0(%rbp),%rax 7631 mulq %r11 7632 addq %rax,%r15 7633 adcq $0,%rdx 7634 imulq %r12,%r9 7635 addq %r10,%r15 7636 adcq %rdx,%r9 7637 movq %r13,%r10 7638 movq %r14,%r11 7639 movq %r15,%r12 7640 andq $3,%r12 7641 movq %r15,%r13 7642 andq $-4,%r13 7643 movq %r9,%r14 7644 shrdq $2,%r9,%r15 7645 shrq $2,%r9 7646 addq %r13,%r15 7647 adcq %r14,%r9 7648 addq %r15,%r10 7649 adcq %r9,%r11 7650 adcq $0,%r12 7651 7652 leaq 32(%rdi),%rdi 7653 decq %rcx 7654 jg L$seal_avx2_tail_256_rounds_and_3xhash 7655 decq %r8 7656 jge L$seal_avx2_tail_256_rounds_and_2xhash 7657 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 7658 vpaddd 0+64(%rbp),%ymm5,%ymm5 7659 vpaddd 0+96(%rbp),%ymm9,%ymm9 7660 vpaddd 0+192(%rbp),%ymm13,%ymm13 7661 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 7662 vpaddd 0+64(%rbp),%ymm4,%ymm4 7663 vpaddd 0+96(%rbp),%ymm8,%ymm8 7664 vpaddd 0+160(%rbp),%ymm12,%ymm12 7665 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7666 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7667 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7668 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7669 vpxor 0+0(%rsi),%ymm3,%ymm3 7670 vpxor 32+0(%rsi),%ymm1,%ymm1 7671 vpxor 64+0(%rsi),%ymm5,%ymm5 7672 vpxor 96+0(%rsi),%ymm9,%ymm9 7673 vmovdqu %ymm3,0+0(%rdi) 7674 vmovdqu %ymm1,32+0(%rdi) 7675 vmovdqu %ymm5,64+0(%rdi) 7676 vmovdqu %ymm9,96+0(%rdi) 7677 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7678 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7679 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7680 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7681 vmovdqa %ymm3,%ymm8 7682 7683 movq $128,%rcx 7684 leaq 128(%rsi),%rsi 7685 subq $128,%rbx 7686 jmp L$seal_avx2_short_hash_remainder 7687 7688L$seal_avx2_tail_384: 7689 vmovdqa L$chacha20_consts(%rip),%ymm0 7690 vmovdqa 0+64(%rbp),%ymm4 7691 vmovdqa 0+96(%rbp),%ymm8 7692 vmovdqa %ymm0,%ymm1 7693 vmovdqa %ymm4,%ymm5 7694 vmovdqa %ymm8,%ymm9 7695 vmovdqa %ymm0,%ymm2 7696 vmovdqa %ymm4,%ymm6 7697 vmovdqa %ymm8,%ymm10 7698 vmovdqa L$avx2_inc(%rip),%ymm12 7699 vpaddd 0+160(%rbp),%ymm12,%ymm14 7700 vpaddd %ymm14,%ymm12,%ymm13 7701 vpaddd %ymm13,%ymm12,%ymm12 7702 vmovdqa %ymm12,0+160(%rbp) 7703 vmovdqa %ymm13,0+192(%rbp) 7704 vmovdqa %ymm14,0+224(%rbp) 7705 7706L$seal_avx2_tail_384_rounds_and_3xhash: 7707 addq 0+0(%rdi),%r10 7708 adcq 8+0(%rdi),%r11 7709 adcq $1,%r12 7710 movq 0+0+0(%rbp),%rax 7711 movq %rax,%r15 7712 mulq %r10 7713 movq %rax,%r13 7714 movq %rdx,%r14 7715 movq 0+0+0(%rbp),%rax 7716 mulq %r11 7717 imulq %r12,%r15 7718 addq %rax,%r14 7719 adcq %rdx,%r15 7720 movq 8+0+0(%rbp),%rax 7721 movq %rax,%r9 7722 mulq %r10 7723 addq %rax,%r14 7724 adcq $0,%rdx 7725 movq %rdx,%r10 7726 movq 8+0+0(%rbp),%rax 7727 mulq %r11 7728 addq %rax,%r15 7729 adcq $0,%rdx 7730 imulq %r12,%r9 7731 addq %r10,%r15 7732 adcq %rdx,%r9 7733 movq %r13,%r10 7734 movq %r14,%r11 7735 movq %r15,%r12 7736 andq $3,%r12 7737 movq %r15,%r13 7738 andq $-4,%r13 7739 movq %r9,%r14 7740 shrdq $2,%r9,%r15 7741 shrq $2,%r9 7742 addq %r13,%r15 7743 adcq %r14,%r9 7744 addq %r15,%r10 7745 adcq %r9,%r11 7746 adcq $0,%r12 7747 7748 leaq 16(%rdi),%rdi 7749L$seal_avx2_tail_384_rounds_and_2xhash: 7750 vpaddd %ymm4,%ymm0,%ymm0 7751 vpxor %ymm0,%ymm12,%ymm12 7752 vpshufb L$rol16(%rip),%ymm12,%ymm12 7753 vpaddd %ymm12,%ymm8,%ymm8 7754 vpxor %ymm8,%ymm4,%ymm4 7755 vpsrld $20,%ymm4,%ymm3 7756 vpslld $12,%ymm4,%ymm4 7757 vpxor %ymm3,%ymm4,%ymm4 7758 vpaddd %ymm4,%ymm0,%ymm0 7759 vpxor %ymm0,%ymm12,%ymm12 7760 vpshufb L$rol8(%rip),%ymm12,%ymm12 7761 vpaddd %ymm12,%ymm8,%ymm8 7762 vpxor %ymm8,%ymm4,%ymm4 7763 vpslld $7,%ymm4,%ymm3 7764 vpsrld $25,%ymm4,%ymm4 7765 vpxor %ymm3,%ymm4,%ymm4 7766 vpalignr $12,%ymm12,%ymm12,%ymm12 7767 vpalignr $8,%ymm8,%ymm8,%ymm8 7768 vpalignr $4,%ymm4,%ymm4,%ymm4 7769 vpaddd %ymm5,%ymm1,%ymm1 7770 vpxor %ymm1,%ymm13,%ymm13 7771 vpshufb L$rol16(%rip),%ymm13,%ymm13 7772 vpaddd %ymm13,%ymm9,%ymm9 7773 vpxor %ymm9,%ymm5,%ymm5 7774 vpsrld $20,%ymm5,%ymm3 7775 vpslld $12,%ymm5,%ymm5 7776 vpxor %ymm3,%ymm5,%ymm5 7777 vpaddd %ymm5,%ymm1,%ymm1 7778 vpxor %ymm1,%ymm13,%ymm13 7779 vpshufb L$rol8(%rip),%ymm13,%ymm13 7780 vpaddd %ymm13,%ymm9,%ymm9 7781 vpxor %ymm9,%ymm5,%ymm5 7782 vpslld $7,%ymm5,%ymm3 7783 vpsrld $25,%ymm5,%ymm5 7784 vpxor %ymm3,%ymm5,%ymm5 7785 vpalignr $12,%ymm13,%ymm13,%ymm13 7786 vpalignr $8,%ymm9,%ymm9,%ymm9 7787 vpalignr $4,%ymm5,%ymm5,%ymm5 7788 addq 0+0(%rdi),%r10 7789 adcq 8+0(%rdi),%r11 7790 adcq $1,%r12 7791 movq 0+0+0(%rbp),%rax 7792 movq %rax,%r15 7793 mulq %r10 7794 movq %rax,%r13 7795 movq %rdx,%r14 7796 movq 0+0+0(%rbp),%rax 7797 mulq %r11 7798 imulq %r12,%r15 7799 addq %rax,%r14 7800 adcq %rdx,%r15 7801 movq 8+0+0(%rbp),%rax 7802 movq %rax,%r9 7803 mulq %r10 7804 addq %rax,%r14 7805 adcq $0,%rdx 7806 movq %rdx,%r10 7807 movq 8+0+0(%rbp),%rax 7808 mulq %r11 7809 addq %rax,%r15 7810 adcq $0,%rdx 7811 imulq %r12,%r9 7812 addq %r10,%r15 7813 adcq %rdx,%r9 7814 movq %r13,%r10 7815 movq %r14,%r11 7816 movq %r15,%r12 7817 andq $3,%r12 7818 movq %r15,%r13 7819 andq $-4,%r13 7820 movq %r9,%r14 7821 shrdq $2,%r9,%r15 7822 shrq $2,%r9 7823 addq %r13,%r15 7824 adcq %r14,%r9 7825 addq %r15,%r10 7826 adcq %r9,%r11 7827 adcq $0,%r12 7828 vpaddd %ymm6,%ymm2,%ymm2 7829 vpxor %ymm2,%ymm14,%ymm14 7830 vpshufb L$rol16(%rip),%ymm14,%ymm14 7831 vpaddd %ymm14,%ymm10,%ymm10 7832 vpxor %ymm10,%ymm6,%ymm6 7833 vpsrld $20,%ymm6,%ymm3 7834 vpslld $12,%ymm6,%ymm6 7835 vpxor %ymm3,%ymm6,%ymm6 7836 vpaddd %ymm6,%ymm2,%ymm2 7837 vpxor %ymm2,%ymm14,%ymm14 7838 vpshufb L$rol8(%rip),%ymm14,%ymm14 7839 vpaddd %ymm14,%ymm10,%ymm10 7840 vpxor %ymm10,%ymm6,%ymm6 7841 vpslld $7,%ymm6,%ymm3 7842 vpsrld $25,%ymm6,%ymm6 7843 vpxor %ymm3,%ymm6,%ymm6 7844 vpalignr $12,%ymm14,%ymm14,%ymm14 7845 vpalignr $8,%ymm10,%ymm10,%ymm10 7846 vpalignr $4,%ymm6,%ymm6,%ymm6 7847 vpaddd %ymm4,%ymm0,%ymm0 7848 vpxor %ymm0,%ymm12,%ymm12 7849 vpshufb L$rol16(%rip),%ymm12,%ymm12 7850 vpaddd %ymm12,%ymm8,%ymm8 7851 vpxor %ymm8,%ymm4,%ymm4 7852 vpsrld $20,%ymm4,%ymm3 7853 vpslld $12,%ymm4,%ymm4 7854 vpxor %ymm3,%ymm4,%ymm4 7855 vpaddd %ymm4,%ymm0,%ymm0 7856 vpxor %ymm0,%ymm12,%ymm12 7857 vpshufb L$rol8(%rip),%ymm12,%ymm12 7858 vpaddd %ymm12,%ymm8,%ymm8 7859 vpxor %ymm8,%ymm4,%ymm4 7860 vpslld $7,%ymm4,%ymm3 7861 vpsrld $25,%ymm4,%ymm4 7862 vpxor %ymm3,%ymm4,%ymm4 7863 vpalignr $4,%ymm12,%ymm12,%ymm12 7864 vpalignr $8,%ymm8,%ymm8,%ymm8 7865 vpalignr $12,%ymm4,%ymm4,%ymm4 7866 addq 0+16(%rdi),%r10 7867 adcq 8+16(%rdi),%r11 7868 adcq $1,%r12 7869 movq 0+0+0(%rbp),%rax 7870 movq %rax,%r15 7871 mulq %r10 7872 movq %rax,%r13 7873 movq %rdx,%r14 7874 movq 0+0+0(%rbp),%rax 7875 mulq %r11 7876 imulq %r12,%r15 7877 addq %rax,%r14 7878 adcq %rdx,%r15 7879 movq 8+0+0(%rbp),%rax 7880 movq %rax,%r9 7881 mulq %r10 7882 addq %rax,%r14 7883 adcq $0,%rdx 7884 movq %rdx,%r10 7885 movq 8+0+0(%rbp),%rax 7886 mulq %r11 7887 addq %rax,%r15 7888 adcq $0,%rdx 7889 imulq %r12,%r9 7890 addq %r10,%r15 7891 adcq %rdx,%r9 7892 movq %r13,%r10 7893 movq %r14,%r11 7894 movq %r15,%r12 7895 andq $3,%r12 7896 movq %r15,%r13 7897 andq $-4,%r13 7898 movq %r9,%r14 7899 shrdq $2,%r9,%r15 7900 shrq $2,%r9 7901 addq %r13,%r15 7902 adcq %r14,%r9 7903 addq %r15,%r10 7904 adcq %r9,%r11 7905 adcq $0,%r12 7906 vpaddd %ymm5,%ymm1,%ymm1 7907 vpxor %ymm1,%ymm13,%ymm13 7908 vpshufb L$rol16(%rip),%ymm13,%ymm13 7909 vpaddd %ymm13,%ymm9,%ymm9 7910 vpxor %ymm9,%ymm5,%ymm5 7911 vpsrld $20,%ymm5,%ymm3 7912 vpslld $12,%ymm5,%ymm5 7913 vpxor %ymm3,%ymm5,%ymm5 7914 vpaddd %ymm5,%ymm1,%ymm1 7915 vpxor %ymm1,%ymm13,%ymm13 7916 vpshufb L$rol8(%rip),%ymm13,%ymm13 7917 vpaddd %ymm13,%ymm9,%ymm9 7918 vpxor %ymm9,%ymm5,%ymm5 7919 vpslld $7,%ymm5,%ymm3 7920 vpsrld $25,%ymm5,%ymm5 7921 vpxor %ymm3,%ymm5,%ymm5 7922 vpalignr $4,%ymm13,%ymm13,%ymm13 7923 vpalignr $8,%ymm9,%ymm9,%ymm9 7924 vpalignr $12,%ymm5,%ymm5,%ymm5 7925 vpaddd %ymm6,%ymm2,%ymm2 7926 vpxor %ymm2,%ymm14,%ymm14 7927 vpshufb L$rol16(%rip),%ymm14,%ymm14 7928 vpaddd %ymm14,%ymm10,%ymm10 7929 vpxor %ymm10,%ymm6,%ymm6 7930 vpsrld $20,%ymm6,%ymm3 7931 vpslld $12,%ymm6,%ymm6 7932 vpxor %ymm3,%ymm6,%ymm6 7933 vpaddd %ymm6,%ymm2,%ymm2 7934 vpxor %ymm2,%ymm14,%ymm14 7935 vpshufb L$rol8(%rip),%ymm14,%ymm14 7936 vpaddd %ymm14,%ymm10,%ymm10 7937 vpxor %ymm10,%ymm6,%ymm6 7938 vpslld $7,%ymm6,%ymm3 7939 vpsrld $25,%ymm6,%ymm6 7940 vpxor %ymm3,%ymm6,%ymm6 7941 vpalignr $4,%ymm14,%ymm14,%ymm14 7942 vpalignr $8,%ymm10,%ymm10,%ymm10 7943 vpalignr $12,%ymm6,%ymm6,%ymm6 7944 7945 leaq 32(%rdi),%rdi 7946 decq %rcx 7947 jg L$seal_avx2_tail_384_rounds_and_3xhash 7948 decq %r8 7949 jge L$seal_avx2_tail_384_rounds_and_2xhash 7950 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 7951 vpaddd 0+64(%rbp),%ymm6,%ymm6 7952 vpaddd 0+96(%rbp),%ymm10,%ymm10 7953 vpaddd 0+224(%rbp),%ymm14,%ymm14 7954 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 7955 vpaddd 0+64(%rbp),%ymm5,%ymm5 7956 vpaddd 0+96(%rbp),%ymm9,%ymm9 7957 vpaddd 0+192(%rbp),%ymm13,%ymm13 7958 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 7959 vpaddd 0+64(%rbp),%ymm4,%ymm4 7960 vpaddd 0+96(%rbp),%ymm8,%ymm8 7961 vpaddd 0+160(%rbp),%ymm12,%ymm12 7962 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 7963 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 7964 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 7965 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 7966 vpxor 0+0(%rsi),%ymm3,%ymm3 7967 vpxor 32+0(%rsi),%ymm2,%ymm2 7968 vpxor 64+0(%rsi),%ymm6,%ymm6 7969 vpxor 96+0(%rsi),%ymm10,%ymm10 7970 vmovdqu %ymm3,0+0(%rdi) 7971 vmovdqu %ymm2,32+0(%rdi) 7972 vmovdqu %ymm6,64+0(%rdi) 7973 vmovdqu %ymm10,96+0(%rdi) 7974 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7975 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7976 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7977 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7978 vpxor 0+128(%rsi),%ymm3,%ymm3 7979 vpxor 32+128(%rsi),%ymm1,%ymm1 7980 vpxor 64+128(%rsi),%ymm5,%ymm5 7981 vpxor 96+128(%rsi),%ymm9,%ymm9 7982 vmovdqu %ymm3,0+128(%rdi) 7983 vmovdqu %ymm1,32+128(%rdi) 7984 vmovdqu %ymm5,64+128(%rdi) 7985 vmovdqu %ymm9,96+128(%rdi) 7986 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7987 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7988 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7989 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7990 vmovdqa %ymm3,%ymm8 7991 7992 movq $256,%rcx 7993 leaq 256(%rsi),%rsi 7994 subq $256,%rbx 7995 jmp L$seal_avx2_short_hash_remainder 7996 7997L$seal_avx2_tail_512: 7998 vmovdqa L$chacha20_consts(%rip),%ymm0 7999 vmovdqa 0+64(%rbp),%ymm4 8000 vmovdqa 0+96(%rbp),%ymm8 8001 vmovdqa %ymm0,%ymm1 8002 vmovdqa %ymm4,%ymm5 8003 vmovdqa %ymm8,%ymm9 8004 vmovdqa %ymm0,%ymm2 8005 vmovdqa %ymm4,%ymm6 8006 vmovdqa %ymm8,%ymm10 8007 vmovdqa %ymm0,%ymm3 8008 vmovdqa %ymm4,%ymm7 8009 vmovdqa %ymm8,%ymm11 8010 vmovdqa L$avx2_inc(%rip),%ymm12 8011 vpaddd 0+160(%rbp),%ymm12,%ymm15 8012 vpaddd %ymm15,%ymm12,%ymm14 8013 vpaddd %ymm14,%ymm12,%ymm13 8014 vpaddd %ymm13,%ymm12,%ymm12 8015 vmovdqa %ymm15,0+256(%rbp) 8016 vmovdqa %ymm14,0+224(%rbp) 8017 vmovdqa %ymm13,0+192(%rbp) 8018 vmovdqa %ymm12,0+160(%rbp) 8019 8020L$seal_avx2_tail_512_rounds_and_3xhash: 8021 addq 0+0(%rdi),%r10 8022 adcq 8+0(%rdi),%r11 8023 adcq $1,%r12 8024 movq 0+0+0(%rbp),%rdx 8025 movq %rdx,%r15 8026 mulxq %r10,%r13,%r14 8027 mulxq %r11,%rax,%rdx 8028 imulq %r12,%r15 8029 addq %rax,%r14 8030 adcq %rdx,%r15 8031 movq 8+0+0(%rbp),%rdx 8032 mulxq %r10,%r10,%rax 8033 addq %r10,%r14 8034 mulxq %r11,%r11,%r9 8035 adcq %r11,%r15 8036 adcq $0,%r9 8037 imulq %r12,%rdx 8038 addq %rax,%r15 8039 adcq %rdx,%r9 8040 movq %r13,%r10 8041 movq %r14,%r11 8042 movq %r15,%r12 8043 andq $3,%r12 8044 movq %r15,%r13 8045 andq $-4,%r13 8046 movq %r9,%r14 8047 shrdq $2,%r9,%r15 8048 shrq $2,%r9 8049 addq %r13,%r15 8050 adcq %r14,%r9 8051 addq %r15,%r10 8052 adcq %r9,%r11 8053 adcq $0,%r12 8054 8055 leaq 16(%rdi),%rdi 8056L$seal_avx2_tail_512_rounds_and_2xhash: 8057 vmovdqa %ymm8,0+128(%rbp) 8058 vmovdqa L$rol16(%rip),%ymm8 8059 vpaddd %ymm7,%ymm3,%ymm3 8060 vpaddd %ymm6,%ymm2,%ymm2 8061 vpaddd %ymm5,%ymm1,%ymm1 8062 vpaddd %ymm4,%ymm0,%ymm0 8063 vpxor %ymm3,%ymm15,%ymm15 8064 vpxor %ymm2,%ymm14,%ymm14 8065 vpxor %ymm1,%ymm13,%ymm13 8066 vpxor %ymm0,%ymm12,%ymm12 8067 vpshufb %ymm8,%ymm15,%ymm15 8068 vpshufb %ymm8,%ymm14,%ymm14 8069 vpshufb %ymm8,%ymm13,%ymm13 8070 vpshufb %ymm8,%ymm12,%ymm12 8071 vpaddd %ymm15,%ymm11,%ymm11 8072 vpaddd %ymm14,%ymm10,%ymm10 8073 vpaddd %ymm13,%ymm9,%ymm9 8074 vpaddd 0+128(%rbp),%ymm12,%ymm8 8075 vpxor %ymm11,%ymm7,%ymm7 8076 vpxor %ymm10,%ymm6,%ymm6 8077 addq 0+0(%rdi),%r10 8078 adcq 8+0(%rdi),%r11 8079 adcq $1,%r12 8080 vpxor %ymm9,%ymm5,%ymm5 8081 vpxor %ymm8,%ymm4,%ymm4 8082 vmovdqa %ymm8,0+128(%rbp) 8083 vpsrld $20,%ymm7,%ymm8 8084 vpslld $32-20,%ymm7,%ymm7 8085 vpxor %ymm8,%ymm7,%ymm7 8086 vpsrld $20,%ymm6,%ymm8 8087 vpslld $32-20,%ymm6,%ymm6 8088 vpxor %ymm8,%ymm6,%ymm6 8089 vpsrld $20,%ymm5,%ymm8 8090 vpslld $32-20,%ymm5,%ymm5 8091 vpxor %ymm8,%ymm5,%ymm5 8092 vpsrld $20,%ymm4,%ymm8 8093 vpslld $32-20,%ymm4,%ymm4 8094 vpxor %ymm8,%ymm4,%ymm4 8095 vmovdqa L$rol8(%rip),%ymm8 8096 vpaddd %ymm7,%ymm3,%ymm3 8097 vpaddd %ymm6,%ymm2,%ymm2 8098 vpaddd %ymm5,%ymm1,%ymm1 8099 vpaddd %ymm4,%ymm0,%ymm0 8100 movq 0+0+0(%rbp),%rdx 8101 movq %rdx,%r15 8102 mulxq %r10,%r13,%r14 8103 mulxq %r11,%rax,%rdx 8104 imulq %r12,%r15 8105 addq %rax,%r14 8106 adcq %rdx,%r15 8107 vpxor %ymm3,%ymm15,%ymm15 8108 vpxor %ymm2,%ymm14,%ymm14 8109 vpxor %ymm1,%ymm13,%ymm13 8110 vpxor %ymm0,%ymm12,%ymm12 8111 vpshufb %ymm8,%ymm15,%ymm15 8112 vpshufb %ymm8,%ymm14,%ymm14 8113 vpshufb %ymm8,%ymm13,%ymm13 8114 vpshufb %ymm8,%ymm12,%ymm12 8115 vpaddd %ymm15,%ymm11,%ymm11 8116 vpaddd %ymm14,%ymm10,%ymm10 8117 vpaddd %ymm13,%ymm9,%ymm9 8118 vpaddd 0+128(%rbp),%ymm12,%ymm8 8119 vpxor %ymm11,%ymm7,%ymm7 8120 vpxor %ymm10,%ymm6,%ymm6 8121 vpxor %ymm9,%ymm5,%ymm5 8122 vpxor %ymm8,%ymm4,%ymm4 8123 vmovdqa %ymm8,0+128(%rbp) 8124 vpsrld $25,%ymm7,%ymm8 8125 vpslld $32-25,%ymm7,%ymm7 8126 vpxor %ymm8,%ymm7,%ymm7 8127 movq 8+0+0(%rbp),%rdx 8128 mulxq %r10,%r10,%rax 8129 addq %r10,%r14 8130 mulxq %r11,%r11,%r9 8131 adcq %r11,%r15 8132 adcq $0,%r9 8133 imulq %r12,%rdx 8134 vpsrld $25,%ymm6,%ymm8 8135 vpslld $32-25,%ymm6,%ymm6 8136 vpxor %ymm8,%ymm6,%ymm6 8137 vpsrld $25,%ymm5,%ymm8 8138 vpslld $32-25,%ymm5,%ymm5 8139 vpxor %ymm8,%ymm5,%ymm5 8140 vpsrld $25,%ymm4,%ymm8 8141 vpslld $32-25,%ymm4,%ymm4 8142 vpxor %ymm8,%ymm4,%ymm4 8143 vmovdqa 0+128(%rbp),%ymm8 8144 vpalignr $4,%ymm7,%ymm7,%ymm7 8145 vpalignr $8,%ymm11,%ymm11,%ymm11 8146 vpalignr $12,%ymm15,%ymm15,%ymm15 8147 vpalignr $4,%ymm6,%ymm6,%ymm6 8148 vpalignr $8,%ymm10,%ymm10,%ymm10 8149 vpalignr $12,%ymm14,%ymm14,%ymm14 8150 vpalignr $4,%ymm5,%ymm5,%ymm5 8151 vpalignr $8,%ymm9,%ymm9,%ymm9 8152 vpalignr $12,%ymm13,%ymm13,%ymm13 8153 vpalignr $4,%ymm4,%ymm4,%ymm4 8154 addq %rax,%r15 8155 adcq %rdx,%r9 8156 vpalignr $8,%ymm8,%ymm8,%ymm8 8157 vpalignr $12,%ymm12,%ymm12,%ymm12 8158 vmovdqa %ymm8,0+128(%rbp) 8159 vmovdqa L$rol16(%rip),%ymm8 8160 vpaddd %ymm7,%ymm3,%ymm3 8161 vpaddd %ymm6,%ymm2,%ymm2 8162 vpaddd %ymm5,%ymm1,%ymm1 8163 vpaddd %ymm4,%ymm0,%ymm0 8164 vpxor %ymm3,%ymm15,%ymm15 8165 vpxor %ymm2,%ymm14,%ymm14 8166 vpxor %ymm1,%ymm13,%ymm13 8167 vpxor %ymm0,%ymm12,%ymm12 8168 vpshufb %ymm8,%ymm15,%ymm15 8169 vpshufb %ymm8,%ymm14,%ymm14 8170 vpshufb %ymm8,%ymm13,%ymm13 8171 vpshufb %ymm8,%ymm12,%ymm12 8172 vpaddd %ymm15,%ymm11,%ymm11 8173 vpaddd %ymm14,%ymm10,%ymm10 8174 vpaddd %ymm13,%ymm9,%ymm9 8175 vpaddd 0+128(%rbp),%ymm12,%ymm8 8176 movq %r13,%r10 8177 movq %r14,%r11 8178 movq %r15,%r12 8179 andq $3,%r12 8180 movq %r15,%r13 8181 andq $-4,%r13 8182 movq %r9,%r14 8183 shrdq $2,%r9,%r15 8184 shrq $2,%r9 8185 addq %r13,%r15 8186 adcq %r14,%r9 8187 addq %r15,%r10 8188 adcq %r9,%r11 8189 adcq $0,%r12 8190 vpxor %ymm11,%ymm7,%ymm7 8191 vpxor %ymm10,%ymm6,%ymm6 8192 vpxor %ymm9,%ymm5,%ymm5 8193 vpxor %ymm8,%ymm4,%ymm4 8194 vmovdqa %ymm8,0+128(%rbp) 8195 vpsrld $20,%ymm7,%ymm8 8196 vpslld $32-20,%ymm7,%ymm7 8197 vpxor %ymm8,%ymm7,%ymm7 8198 vpsrld $20,%ymm6,%ymm8 8199 vpslld $32-20,%ymm6,%ymm6 8200 vpxor %ymm8,%ymm6,%ymm6 8201 vpsrld $20,%ymm5,%ymm8 8202 vpslld $32-20,%ymm5,%ymm5 8203 vpxor %ymm8,%ymm5,%ymm5 8204 vpsrld $20,%ymm4,%ymm8 8205 vpslld $32-20,%ymm4,%ymm4 8206 vpxor %ymm8,%ymm4,%ymm4 8207 vmovdqa L$rol8(%rip),%ymm8 8208 vpaddd %ymm7,%ymm3,%ymm3 8209 vpaddd %ymm6,%ymm2,%ymm2 8210 addq 0+16(%rdi),%r10 8211 adcq 8+16(%rdi),%r11 8212 adcq $1,%r12 8213 vpaddd %ymm5,%ymm1,%ymm1 8214 vpaddd %ymm4,%ymm0,%ymm0 8215 vpxor %ymm3,%ymm15,%ymm15 8216 vpxor %ymm2,%ymm14,%ymm14 8217 vpxor %ymm1,%ymm13,%ymm13 8218 vpxor %ymm0,%ymm12,%ymm12 8219 vpshufb %ymm8,%ymm15,%ymm15 8220 vpshufb %ymm8,%ymm14,%ymm14 8221 vpshufb %ymm8,%ymm13,%ymm13 8222 vpshufb %ymm8,%ymm12,%ymm12 8223 vpaddd %ymm15,%ymm11,%ymm11 8224 vpaddd %ymm14,%ymm10,%ymm10 8225 vpaddd %ymm13,%ymm9,%ymm9 8226 vpaddd 0+128(%rbp),%ymm12,%ymm8 8227 vpxor %ymm11,%ymm7,%ymm7 8228 vpxor %ymm10,%ymm6,%ymm6 8229 vpxor %ymm9,%ymm5,%ymm5 8230 vpxor %ymm8,%ymm4,%ymm4 8231 vmovdqa %ymm8,0+128(%rbp) 8232 vpsrld $25,%ymm7,%ymm8 8233 movq 0+0+0(%rbp),%rdx 8234 movq %rdx,%r15 8235 mulxq %r10,%r13,%r14 8236 mulxq %r11,%rax,%rdx 8237 imulq %r12,%r15 8238 addq %rax,%r14 8239 adcq %rdx,%r15 8240 vpslld $32-25,%ymm7,%ymm7 8241 vpxor %ymm8,%ymm7,%ymm7 8242 vpsrld $25,%ymm6,%ymm8 8243 vpslld $32-25,%ymm6,%ymm6 8244 vpxor %ymm8,%ymm6,%ymm6 8245 vpsrld $25,%ymm5,%ymm8 8246 vpslld $32-25,%ymm5,%ymm5 8247 vpxor %ymm8,%ymm5,%ymm5 8248 vpsrld $25,%ymm4,%ymm8 8249 vpslld $32-25,%ymm4,%ymm4 8250 vpxor %ymm8,%ymm4,%ymm4 8251 vmovdqa 0+128(%rbp),%ymm8 8252 vpalignr $12,%ymm7,%ymm7,%ymm7 8253 vpalignr $8,%ymm11,%ymm11,%ymm11 8254 vpalignr $4,%ymm15,%ymm15,%ymm15 8255 vpalignr $12,%ymm6,%ymm6,%ymm6 8256 vpalignr $8,%ymm10,%ymm10,%ymm10 8257 vpalignr $4,%ymm14,%ymm14,%ymm14 8258 vpalignr $12,%ymm5,%ymm5,%ymm5 8259 vpalignr $8,%ymm9,%ymm9,%ymm9 8260 movq 8+0+0(%rbp),%rdx 8261 mulxq %r10,%r10,%rax 8262 addq %r10,%r14 8263 mulxq %r11,%r11,%r9 8264 adcq %r11,%r15 8265 adcq $0,%r9 8266 imulq %r12,%rdx 8267 vpalignr $4,%ymm13,%ymm13,%ymm13 8268 vpalignr $12,%ymm4,%ymm4,%ymm4 8269 vpalignr $8,%ymm8,%ymm8,%ymm8 8270 vpalignr $4,%ymm12,%ymm12,%ymm12 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 8287 addq %rax,%r15 8288 adcq %rdx,%r9 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 8308 8309 movq %r13,%r10 8310 movq %r14,%r11 8311 movq %r15,%r12 8312 andq $3,%r12 8313 movq %r15,%r13 8314 andq $-4,%r13 8315 movq %r9,%r14 8316 shrdq $2,%r9,%r15 8317 shrq $2,%r9 8318 addq %r13,%r15 8319 adcq %r14,%r9 8320 addq %r15,%r10 8321 adcq %r9,%r11 8322 adcq $0,%r12 8323 8324 leaq 32(%rdi),%rdi 8325 decq %rcx 8326 jg L$seal_avx2_tail_512_rounds_and_3xhash 8327 decq %r8 8328 jge L$seal_avx2_tail_512_rounds_and_2xhash 8329 vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 8330 vpaddd 0+64(%rbp),%ymm7,%ymm7 8331 vpaddd 0+96(%rbp),%ymm11,%ymm11 8332 vpaddd 0+256(%rbp),%ymm15,%ymm15 8333 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 8334 vpaddd 0+64(%rbp),%ymm6,%ymm6 8335 vpaddd 0+96(%rbp),%ymm10,%ymm10 8336 vpaddd 0+224(%rbp),%ymm14,%ymm14 8337 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 8338 vpaddd 0+64(%rbp),%ymm5,%ymm5 8339 vpaddd 0+96(%rbp),%ymm9,%ymm9 8340 vpaddd 0+192(%rbp),%ymm13,%ymm13 8341 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 8342 vpaddd 0+64(%rbp),%ymm4,%ymm4 8343 vpaddd 0+96(%rbp),%ymm8,%ymm8 8344 vpaddd 0+160(%rbp),%ymm12,%ymm12 8345 8346 vmovdqa %ymm0,0+128(%rbp) 8347 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 8348 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 8349 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 8350 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 8351 vpxor 0+0(%rsi),%ymm0,%ymm0 8352 vpxor 32+0(%rsi),%ymm3,%ymm3 8353 vpxor 64+0(%rsi),%ymm7,%ymm7 8354 vpxor 96+0(%rsi),%ymm11,%ymm11 8355 vmovdqu %ymm0,0+0(%rdi) 8356 vmovdqu %ymm3,32+0(%rdi) 8357 vmovdqu %ymm7,64+0(%rdi) 8358 vmovdqu %ymm11,96+0(%rdi) 8359 8360 vmovdqa 0+128(%rbp),%ymm0 8361 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8362 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8363 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8364 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8365 vpxor 0+128(%rsi),%ymm3,%ymm3 8366 vpxor 32+128(%rsi),%ymm2,%ymm2 8367 vpxor 64+128(%rsi),%ymm6,%ymm6 8368 vpxor 96+128(%rsi),%ymm10,%ymm10 8369 vmovdqu %ymm3,0+128(%rdi) 8370 vmovdqu %ymm2,32+128(%rdi) 8371 vmovdqu %ymm6,64+128(%rdi) 8372 vmovdqu %ymm10,96+128(%rdi) 8373 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8374 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8375 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8376 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8377 vpxor 0+256(%rsi),%ymm3,%ymm3 8378 vpxor 32+256(%rsi),%ymm1,%ymm1 8379 vpxor 64+256(%rsi),%ymm5,%ymm5 8380 vpxor 96+256(%rsi),%ymm9,%ymm9 8381 vmovdqu %ymm3,0+256(%rdi) 8382 vmovdqu %ymm1,32+256(%rdi) 8383 vmovdqu %ymm5,64+256(%rdi) 8384 vmovdqu %ymm9,96+256(%rdi) 8385 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8386 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8387 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8388 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8389 vmovdqa %ymm3,%ymm8 8390 8391 movq $384,%rcx 8392 leaq 384(%rsi),%rsi 8393 subq $384,%rbx 8394 jmp L$seal_avx2_short_hash_remainder 8395 8396L$seal_avx2_320: 8397 vmovdqa %ymm0,%ymm1 8398 vmovdqa %ymm0,%ymm2 8399 vmovdqa %ymm4,%ymm5 8400 vmovdqa %ymm4,%ymm6 8401 vmovdqa %ymm8,%ymm9 8402 vmovdqa %ymm8,%ymm10 8403 vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 8404 vpaddd L$avx2_inc(%rip),%ymm13,%ymm14 8405 vmovdqa %ymm4,%ymm7 8406 vmovdqa %ymm8,%ymm11 8407 vmovdqa %ymm12,0+160(%rbp) 8408 vmovdqa %ymm13,0+192(%rbp) 8409 vmovdqa %ymm14,0+224(%rbp) 8410 movq $10,%r10 8411L$seal_avx2_320_rounds: 8412 vpaddd %ymm4,%ymm0,%ymm0 8413 vpxor %ymm0,%ymm12,%ymm12 8414 vpshufb L$rol16(%rip),%ymm12,%ymm12 8415 vpaddd %ymm12,%ymm8,%ymm8 8416 vpxor %ymm8,%ymm4,%ymm4 8417 vpsrld $20,%ymm4,%ymm3 8418 vpslld $12,%ymm4,%ymm4 8419 vpxor %ymm3,%ymm4,%ymm4 8420 vpaddd %ymm4,%ymm0,%ymm0 8421 vpxor %ymm0,%ymm12,%ymm12 8422 vpshufb L$rol8(%rip),%ymm12,%ymm12 8423 vpaddd %ymm12,%ymm8,%ymm8 8424 vpxor %ymm8,%ymm4,%ymm4 8425 vpslld $7,%ymm4,%ymm3 8426 vpsrld $25,%ymm4,%ymm4 8427 vpxor %ymm3,%ymm4,%ymm4 8428 vpalignr $12,%ymm12,%ymm12,%ymm12 8429 vpalignr $8,%ymm8,%ymm8,%ymm8 8430 vpalignr $4,%ymm4,%ymm4,%ymm4 8431 vpaddd %ymm5,%ymm1,%ymm1 8432 vpxor %ymm1,%ymm13,%ymm13 8433 vpshufb L$rol16(%rip),%ymm13,%ymm13 8434 vpaddd %ymm13,%ymm9,%ymm9 8435 vpxor %ymm9,%ymm5,%ymm5 8436 vpsrld $20,%ymm5,%ymm3 8437 vpslld $12,%ymm5,%ymm5 8438 vpxor %ymm3,%ymm5,%ymm5 8439 vpaddd %ymm5,%ymm1,%ymm1 8440 vpxor %ymm1,%ymm13,%ymm13 8441 vpshufb L$rol8(%rip),%ymm13,%ymm13 8442 vpaddd %ymm13,%ymm9,%ymm9 8443 vpxor %ymm9,%ymm5,%ymm5 8444 vpslld $7,%ymm5,%ymm3 8445 vpsrld $25,%ymm5,%ymm5 8446 vpxor %ymm3,%ymm5,%ymm5 8447 vpalignr $12,%ymm13,%ymm13,%ymm13 8448 vpalignr $8,%ymm9,%ymm9,%ymm9 8449 vpalignr $4,%ymm5,%ymm5,%ymm5 8450 vpaddd %ymm6,%ymm2,%ymm2 8451 vpxor %ymm2,%ymm14,%ymm14 8452 vpshufb L$rol16(%rip),%ymm14,%ymm14 8453 vpaddd %ymm14,%ymm10,%ymm10 8454 vpxor %ymm10,%ymm6,%ymm6 8455 vpsrld $20,%ymm6,%ymm3 8456 vpslld $12,%ymm6,%ymm6 8457 vpxor %ymm3,%ymm6,%ymm6 8458 vpaddd %ymm6,%ymm2,%ymm2 8459 vpxor %ymm2,%ymm14,%ymm14 8460 vpshufb L$rol8(%rip),%ymm14,%ymm14 8461 vpaddd %ymm14,%ymm10,%ymm10 8462 vpxor %ymm10,%ymm6,%ymm6 8463 vpslld $7,%ymm6,%ymm3 8464 vpsrld $25,%ymm6,%ymm6 8465 vpxor %ymm3,%ymm6,%ymm6 8466 vpalignr $12,%ymm14,%ymm14,%ymm14 8467 vpalignr $8,%ymm10,%ymm10,%ymm10 8468 vpalignr $4,%ymm6,%ymm6,%ymm6 8469 vpaddd %ymm4,%ymm0,%ymm0 8470 vpxor %ymm0,%ymm12,%ymm12 8471 vpshufb L$rol16(%rip),%ymm12,%ymm12 8472 vpaddd %ymm12,%ymm8,%ymm8 8473 vpxor %ymm8,%ymm4,%ymm4 8474 vpsrld $20,%ymm4,%ymm3 8475 vpslld $12,%ymm4,%ymm4 8476 vpxor %ymm3,%ymm4,%ymm4 8477 vpaddd %ymm4,%ymm0,%ymm0 8478 vpxor %ymm0,%ymm12,%ymm12 8479 vpshufb L$rol8(%rip),%ymm12,%ymm12 8480 vpaddd %ymm12,%ymm8,%ymm8 8481 vpxor %ymm8,%ymm4,%ymm4 8482 vpslld $7,%ymm4,%ymm3 8483 vpsrld $25,%ymm4,%ymm4 8484 vpxor %ymm3,%ymm4,%ymm4 8485 vpalignr $4,%ymm12,%ymm12,%ymm12 8486 vpalignr $8,%ymm8,%ymm8,%ymm8 8487 vpalignr $12,%ymm4,%ymm4,%ymm4 8488 vpaddd %ymm5,%ymm1,%ymm1 8489 vpxor %ymm1,%ymm13,%ymm13 8490 vpshufb L$rol16(%rip),%ymm13,%ymm13 8491 vpaddd %ymm13,%ymm9,%ymm9 8492 vpxor %ymm9,%ymm5,%ymm5 8493 vpsrld $20,%ymm5,%ymm3 8494 vpslld $12,%ymm5,%ymm5 8495 vpxor %ymm3,%ymm5,%ymm5 8496 vpaddd %ymm5,%ymm1,%ymm1 8497 vpxor %ymm1,%ymm13,%ymm13 8498 vpshufb L$rol8(%rip),%ymm13,%ymm13 8499 vpaddd %ymm13,%ymm9,%ymm9 8500 vpxor %ymm9,%ymm5,%ymm5 8501 vpslld $7,%ymm5,%ymm3 8502 vpsrld $25,%ymm5,%ymm5 8503 vpxor %ymm3,%ymm5,%ymm5 8504 vpalignr $4,%ymm13,%ymm13,%ymm13 8505 vpalignr $8,%ymm9,%ymm9,%ymm9 8506 vpalignr $12,%ymm5,%ymm5,%ymm5 8507 vpaddd %ymm6,%ymm2,%ymm2 8508 vpxor %ymm2,%ymm14,%ymm14 8509 vpshufb L$rol16(%rip),%ymm14,%ymm14 8510 vpaddd %ymm14,%ymm10,%ymm10 8511 vpxor %ymm10,%ymm6,%ymm6 8512 vpsrld $20,%ymm6,%ymm3 8513 vpslld $12,%ymm6,%ymm6 8514 vpxor %ymm3,%ymm6,%ymm6 8515 vpaddd %ymm6,%ymm2,%ymm2 8516 vpxor %ymm2,%ymm14,%ymm14 8517 vpshufb L$rol8(%rip),%ymm14,%ymm14 8518 vpaddd %ymm14,%ymm10,%ymm10 8519 vpxor %ymm10,%ymm6,%ymm6 8520 vpslld $7,%ymm6,%ymm3 8521 vpsrld $25,%ymm6,%ymm6 8522 vpxor %ymm3,%ymm6,%ymm6 8523 vpalignr $4,%ymm14,%ymm14,%ymm14 8524 vpalignr $8,%ymm10,%ymm10,%ymm10 8525 vpalignr $12,%ymm6,%ymm6,%ymm6 8526 8527 decq %r10 8528 jne L$seal_avx2_320_rounds 8529 vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 8530 vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 8531 vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 8532 vpaddd %ymm7,%ymm4,%ymm4 8533 vpaddd %ymm7,%ymm5,%ymm5 8534 vpaddd %ymm7,%ymm6,%ymm6 8535 vpaddd %ymm11,%ymm8,%ymm8 8536 vpaddd %ymm11,%ymm9,%ymm9 8537 vpaddd %ymm11,%ymm10,%ymm10 8538 vpaddd 0+160(%rbp),%ymm12,%ymm12 8539 vpaddd 0+192(%rbp),%ymm13,%ymm13 8540 vpaddd 0+224(%rbp),%ymm14,%ymm14 8541 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8542 8543 vpand L$clamp(%rip),%ymm3,%ymm3 8544 vmovdqa %ymm3,0+0(%rbp) 8545 8546 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8547 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8548 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8549 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8550 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8551 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8552 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 8553 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 8554 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 8555 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 8556 jmp L$seal_avx2_short 8557 8558L$seal_avx2_192: 8559 vmovdqa %ymm0,%ymm1 8560 vmovdqa %ymm0,%ymm2 8561 vmovdqa %ymm4,%ymm5 8562 vmovdqa %ymm4,%ymm6 8563 vmovdqa %ymm8,%ymm9 8564 vmovdqa %ymm8,%ymm10 8565 vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 8566 vmovdqa %ymm12,%ymm11 8567 vmovdqa %ymm13,%ymm15 8568 movq $10,%r10 8569L$seal_avx2_192_rounds: 8570 vpaddd %ymm4,%ymm0,%ymm0 8571 vpxor %ymm0,%ymm12,%ymm12 8572 vpshufb L$rol16(%rip),%ymm12,%ymm12 8573 vpaddd %ymm12,%ymm8,%ymm8 8574 vpxor %ymm8,%ymm4,%ymm4 8575 vpsrld $20,%ymm4,%ymm3 8576 vpslld $12,%ymm4,%ymm4 8577 vpxor %ymm3,%ymm4,%ymm4 8578 vpaddd %ymm4,%ymm0,%ymm0 8579 vpxor %ymm0,%ymm12,%ymm12 8580 vpshufb L$rol8(%rip),%ymm12,%ymm12 8581 vpaddd %ymm12,%ymm8,%ymm8 8582 vpxor %ymm8,%ymm4,%ymm4 8583 vpslld $7,%ymm4,%ymm3 8584 vpsrld $25,%ymm4,%ymm4 8585 vpxor %ymm3,%ymm4,%ymm4 8586 vpalignr $12,%ymm12,%ymm12,%ymm12 8587 vpalignr $8,%ymm8,%ymm8,%ymm8 8588 vpalignr $4,%ymm4,%ymm4,%ymm4 8589 vpaddd %ymm5,%ymm1,%ymm1 8590 vpxor %ymm1,%ymm13,%ymm13 8591 vpshufb L$rol16(%rip),%ymm13,%ymm13 8592 vpaddd %ymm13,%ymm9,%ymm9 8593 vpxor %ymm9,%ymm5,%ymm5 8594 vpsrld $20,%ymm5,%ymm3 8595 vpslld $12,%ymm5,%ymm5 8596 vpxor %ymm3,%ymm5,%ymm5 8597 vpaddd %ymm5,%ymm1,%ymm1 8598 vpxor %ymm1,%ymm13,%ymm13 8599 vpshufb L$rol8(%rip),%ymm13,%ymm13 8600 vpaddd %ymm13,%ymm9,%ymm9 8601 vpxor %ymm9,%ymm5,%ymm5 8602 vpslld $7,%ymm5,%ymm3 8603 vpsrld $25,%ymm5,%ymm5 8604 vpxor %ymm3,%ymm5,%ymm5 8605 vpalignr $12,%ymm13,%ymm13,%ymm13 8606 vpalignr $8,%ymm9,%ymm9,%ymm9 8607 vpalignr $4,%ymm5,%ymm5,%ymm5 8608 vpaddd %ymm4,%ymm0,%ymm0 8609 vpxor %ymm0,%ymm12,%ymm12 8610 vpshufb L$rol16(%rip),%ymm12,%ymm12 8611 vpaddd %ymm12,%ymm8,%ymm8 8612 vpxor %ymm8,%ymm4,%ymm4 8613 vpsrld $20,%ymm4,%ymm3 8614 vpslld $12,%ymm4,%ymm4 8615 vpxor %ymm3,%ymm4,%ymm4 8616 vpaddd %ymm4,%ymm0,%ymm0 8617 vpxor %ymm0,%ymm12,%ymm12 8618 vpshufb L$rol8(%rip),%ymm12,%ymm12 8619 vpaddd %ymm12,%ymm8,%ymm8 8620 vpxor %ymm8,%ymm4,%ymm4 8621 vpslld $7,%ymm4,%ymm3 8622 vpsrld $25,%ymm4,%ymm4 8623 vpxor %ymm3,%ymm4,%ymm4 8624 vpalignr $4,%ymm12,%ymm12,%ymm12 8625 vpalignr $8,%ymm8,%ymm8,%ymm8 8626 vpalignr $12,%ymm4,%ymm4,%ymm4 8627 vpaddd %ymm5,%ymm1,%ymm1 8628 vpxor %ymm1,%ymm13,%ymm13 8629 vpshufb L$rol16(%rip),%ymm13,%ymm13 8630 vpaddd %ymm13,%ymm9,%ymm9 8631 vpxor %ymm9,%ymm5,%ymm5 8632 vpsrld $20,%ymm5,%ymm3 8633 vpslld $12,%ymm5,%ymm5 8634 vpxor %ymm3,%ymm5,%ymm5 8635 vpaddd %ymm5,%ymm1,%ymm1 8636 vpxor %ymm1,%ymm13,%ymm13 8637 vpshufb L$rol8(%rip),%ymm13,%ymm13 8638 vpaddd %ymm13,%ymm9,%ymm9 8639 vpxor %ymm9,%ymm5,%ymm5 8640 vpslld $7,%ymm5,%ymm3 8641 vpsrld $25,%ymm5,%ymm5 8642 vpxor %ymm3,%ymm5,%ymm5 8643 vpalignr $4,%ymm13,%ymm13,%ymm13 8644 vpalignr $8,%ymm9,%ymm9,%ymm9 8645 vpalignr $12,%ymm5,%ymm5,%ymm5 8646 8647 decq %r10 8648 jne L$seal_avx2_192_rounds 8649 vpaddd %ymm2,%ymm0,%ymm0 8650 vpaddd %ymm2,%ymm1,%ymm1 8651 vpaddd %ymm6,%ymm4,%ymm4 8652 vpaddd %ymm6,%ymm5,%ymm5 8653 vpaddd %ymm10,%ymm8,%ymm8 8654 vpaddd %ymm10,%ymm9,%ymm9 8655 vpaddd %ymm11,%ymm12,%ymm12 8656 vpaddd %ymm15,%ymm13,%ymm13 8657 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8658 8659 vpand L$clamp(%rip),%ymm3,%ymm3 8660 vmovdqa %ymm3,0+0(%rbp) 8661 8662 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8663 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8664 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8665 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8666 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8667 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8668L$seal_avx2_short: 8669 movq %r8,%r8 8670 call poly_hash_ad_internal 8671 xorq %rcx,%rcx 8672L$seal_avx2_short_hash_remainder: 8673 cmpq $16,%rcx 8674 jb L$seal_avx2_short_loop 8675 addq 0+0(%rdi),%r10 8676 adcq 8+0(%rdi),%r11 8677 adcq $1,%r12 8678 movq 0+0+0(%rbp),%rax 8679 movq %rax,%r15 8680 mulq %r10 8681 movq %rax,%r13 8682 movq %rdx,%r14 8683 movq 0+0+0(%rbp),%rax 8684 mulq %r11 8685 imulq %r12,%r15 8686 addq %rax,%r14 8687 adcq %rdx,%r15 8688 movq 8+0+0(%rbp),%rax 8689 movq %rax,%r9 8690 mulq %r10 8691 addq %rax,%r14 8692 adcq $0,%rdx 8693 movq %rdx,%r10 8694 movq 8+0+0(%rbp),%rax 8695 mulq %r11 8696 addq %rax,%r15 8697 adcq $0,%rdx 8698 imulq %r12,%r9 8699 addq %r10,%r15 8700 adcq %rdx,%r9 8701 movq %r13,%r10 8702 movq %r14,%r11 8703 movq %r15,%r12 8704 andq $3,%r12 8705 movq %r15,%r13 8706 andq $-4,%r13 8707 movq %r9,%r14 8708 shrdq $2,%r9,%r15 8709 shrq $2,%r9 8710 addq %r13,%r15 8711 adcq %r14,%r9 8712 addq %r15,%r10 8713 adcq %r9,%r11 8714 adcq $0,%r12 8715 8716 subq $16,%rcx 8717 addq $16,%rdi 8718 jmp L$seal_avx2_short_hash_remainder 8719L$seal_avx2_short_loop: 8720 cmpq $32,%rbx 8721 jb L$seal_avx2_short_tail 8722 subq $32,%rbx 8723 8724 vpxor (%rsi),%ymm0,%ymm0 8725 vmovdqu %ymm0,(%rdi) 8726 leaq 32(%rsi),%rsi 8727 8728 addq 0+0(%rdi),%r10 8729 adcq 8+0(%rdi),%r11 8730 adcq $1,%r12 8731 movq 0+0+0(%rbp),%rax 8732 movq %rax,%r15 8733 mulq %r10 8734 movq %rax,%r13 8735 movq %rdx,%r14 8736 movq 0+0+0(%rbp),%rax 8737 mulq %r11 8738 imulq %r12,%r15 8739 addq %rax,%r14 8740 adcq %rdx,%r15 8741 movq 8+0+0(%rbp),%rax 8742 movq %rax,%r9 8743 mulq %r10 8744 addq %rax,%r14 8745 adcq $0,%rdx 8746 movq %rdx,%r10 8747 movq 8+0+0(%rbp),%rax 8748 mulq %r11 8749 addq %rax,%r15 8750 adcq $0,%rdx 8751 imulq %r12,%r9 8752 addq %r10,%r15 8753 adcq %rdx,%r9 8754 movq %r13,%r10 8755 movq %r14,%r11 8756 movq %r15,%r12 8757 andq $3,%r12 8758 movq %r15,%r13 8759 andq $-4,%r13 8760 movq %r9,%r14 8761 shrdq $2,%r9,%r15 8762 shrq $2,%r9 8763 addq %r13,%r15 8764 adcq %r14,%r9 8765 addq %r15,%r10 8766 adcq %r9,%r11 8767 adcq $0,%r12 8768 addq 0+16(%rdi),%r10 8769 adcq 8+16(%rdi),%r11 8770 adcq $1,%r12 8771 movq 0+0+0(%rbp),%rax 8772 movq %rax,%r15 8773 mulq %r10 8774 movq %rax,%r13 8775 movq %rdx,%r14 8776 movq 0+0+0(%rbp),%rax 8777 mulq %r11 8778 imulq %r12,%r15 8779 addq %rax,%r14 8780 adcq %rdx,%r15 8781 movq 8+0+0(%rbp),%rax 8782 movq %rax,%r9 8783 mulq %r10 8784 addq %rax,%r14 8785 adcq $0,%rdx 8786 movq %rdx,%r10 8787 movq 8+0+0(%rbp),%rax 8788 mulq %r11 8789 addq %rax,%r15 8790 adcq $0,%rdx 8791 imulq %r12,%r9 8792 addq %r10,%r15 8793 adcq %rdx,%r9 8794 movq %r13,%r10 8795 movq %r14,%r11 8796 movq %r15,%r12 8797 andq $3,%r12 8798 movq %r15,%r13 8799 andq $-4,%r13 8800 movq %r9,%r14 8801 shrdq $2,%r9,%r15 8802 shrq $2,%r9 8803 addq %r13,%r15 8804 adcq %r14,%r9 8805 addq %r15,%r10 8806 adcq %r9,%r11 8807 adcq $0,%r12 8808 8809 leaq 32(%rdi),%rdi 8810 8811 vmovdqa %ymm4,%ymm0 8812 vmovdqa %ymm8,%ymm4 8813 vmovdqa %ymm12,%ymm8 8814 vmovdqa %ymm1,%ymm12 8815 vmovdqa %ymm5,%ymm1 8816 vmovdqa %ymm9,%ymm5 8817 vmovdqa %ymm13,%ymm9 8818 vmovdqa %ymm2,%ymm13 8819 vmovdqa %ymm6,%ymm2 8820 jmp L$seal_avx2_short_loop 8821L$seal_avx2_short_tail: 8822 cmpq $16,%rbx 8823 jb L$seal_avx2_exit 8824 subq $16,%rbx 8825 vpxor (%rsi),%xmm0,%xmm3 8826 vmovdqu %xmm3,(%rdi) 8827 leaq 16(%rsi),%rsi 8828 addq 0+0(%rdi),%r10 8829 adcq 8+0(%rdi),%r11 8830 adcq $1,%r12 8831 movq 0+0+0(%rbp),%rax 8832 movq %rax,%r15 8833 mulq %r10 8834 movq %rax,%r13 8835 movq %rdx,%r14 8836 movq 0+0+0(%rbp),%rax 8837 mulq %r11 8838 imulq %r12,%r15 8839 addq %rax,%r14 8840 adcq %rdx,%r15 8841 movq 8+0+0(%rbp),%rax 8842 movq %rax,%r9 8843 mulq %r10 8844 addq %rax,%r14 8845 adcq $0,%rdx 8846 movq %rdx,%r10 8847 movq 8+0+0(%rbp),%rax 8848 mulq %r11 8849 addq %rax,%r15 8850 adcq $0,%rdx 8851 imulq %r12,%r9 8852 addq %r10,%r15 8853 adcq %rdx,%r9 8854 movq %r13,%r10 8855 movq %r14,%r11 8856 movq %r15,%r12 8857 andq $3,%r12 8858 movq %r15,%r13 8859 andq $-4,%r13 8860 movq %r9,%r14 8861 shrdq $2,%r9,%r15 8862 shrq $2,%r9 8863 addq %r13,%r15 8864 adcq %r14,%r9 8865 addq %r15,%r10 8866 adcq %r9,%r11 8867 adcq $0,%r12 8868 8869 leaq 16(%rdi),%rdi 8870 vextracti128 $1,%ymm0,%xmm0 8871L$seal_avx2_exit: 8872 vzeroupper 8873 jmp L$seal_sse_tail_16 8874 8875 8876#endif 8877