1#if defined(__i386__) 2.text 3.globl gcm_gmult_4bit_mmx 4.hidden gcm_gmult_4bit_mmx 5.type gcm_gmult_4bit_mmx,@function 6.align 16 7gcm_gmult_4bit_mmx: 8.L_gcm_gmult_4bit_mmx_begin: 9 pushl %ebp 10 pushl %ebx 11 pushl %esi 12 pushl %edi 13 movl 20(%esp),%edi 14 movl 24(%esp),%esi 15 call .L000pic_point 16.L000pic_point: 17 popl %eax 18 leal .Lrem_4bit-.L000pic_point(%eax),%eax 19 movzbl 15(%edi),%ebx 20 xorl %ecx,%ecx 21 movl %ebx,%edx 22 movb %dl,%cl 23 movl $14,%ebp 24 shlb $4,%cl 25 andl $240,%edx 26 movq 8(%esi,%ecx,1),%mm0 27 movq (%esi,%ecx,1),%mm1 28 movd %mm0,%ebx 29 jmp .L001mmx_loop 30.align 16 31.L001mmx_loop: 32 psrlq $4,%mm0 33 andl $15,%ebx 34 movq %mm1,%mm2 35 psrlq $4,%mm1 36 pxor 8(%esi,%edx,1),%mm0 37 movb (%edi,%ebp,1),%cl 38 psllq $60,%mm2 39 pxor (%eax,%ebx,8),%mm1 40 decl %ebp 41 movd %mm0,%ebx 42 pxor (%esi,%edx,1),%mm1 43 movl %ecx,%edx 44 pxor %mm2,%mm0 45 js .L002mmx_break 46 shlb $4,%cl 47 andl $15,%ebx 48 psrlq $4,%mm0 49 andl $240,%edx 50 movq %mm1,%mm2 51 psrlq $4,%mm1 52 pxor 8(%esi,%ecx,1),%mm0 53 psllq $60,%mm2 54 pxor (%eax,%ebx,8),%mm1 55 movd %mm0,%ebx 56 pxor (%esi,%ecx,1),%mm1 57 pxor %mm2,%mm0 58 jmp .L001mmx_loop 59.align 16 60.L002mmx_break: 61 shlb $4,%cl 62 andl $15,%ebx 63 psrlq $4,%mm0 64 andl $240,%edx 65 movq %mm1,%mm2 66 psrlq $4,%mm1 67 pxor 8(%esi,%ecx,1),%mm0 68 psllq $60,%mm2 69 pxor (%eax,%ebx,8),%mm1 70 movd %mm0,%ebx 71 pxor (%esi,%ecx,1),%mm1 72 pxor %mm2,%mm0 73 psrlq $4,%mm0 74 andl $15,%ebx 75 movq %mm1,%mm2 76 psrlq $4,%mm1 77 pxor 8(%esi,%edx,1),%mm0 78 psllq $60,%mm2 79 pxor (%eax,%ebx,8),%mm1 80 movd %mm0,%ebx 81 pxor (%esi,%edx,1),%mm1 82 pxor %mm2,%mm0 83 psrlq $32,%mm0 84 movd %mm1,%edx 85 psrlq $32,%mm1 86 movd %mm0,%ecx 87 movd %mm1,%ebp 88 bswap %ebx 89 bswap %edx 90 bswap %ecx 91 bswap %ebp 92 emms 93 movl %ebx,12(%edi) 94 movl %edx,4(%edi) 95 movl %ecx,8(%edi) 96 movl %ebp,(%edi) 97 popl %edi 98 popl %esi 99 popl %ebx 100 popl %ebp 101 ret 102.size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin 103.globl gcm_ghash_4bit_mmx 104.hidden gcm_ghash_4bit_mmx 105.type gcm_ghash_4bit_mmx,@function 106.align 16 107gcm_ghash_4bit_mmx: 108.L_gcm_ghash_4bit_mmx_begin: 109 pushl %ebp 110 pushl %ebx 111 pushl %esi 112 pushl %edi 113 movl 20(%esp),%eax 114 movl 24(%esp),%ebx 115 movl 28(%esp),%ecx 116 movl 32(%esp),%edx 117 movl %esp,%ebp 118 call .L003pic_point 119.L003pic_point: 120 popl %esi 121 leal .Lrem_8bit-.L003pic_point(%esi),%esi 122 subl $544,%esp 123 andl $-64,%esp 124 subl $16,%esp 125 addl %ecx,%edx 126 movl %eax,544(%esp) 127 movl %edx,552(%esp) 128 movl %ebp,556(%esp) 129 addl $128,%ebx 130 leal 144(%esp),%edi 131 leal 400(%esp),%ebp 132 movl -120(%ebx),%edx 133 movq -120(%ebx),%mm0 134 movq -128(%ebx),%mm3 135 shll $4,%edx 136 movb %dl,(%esp) 137 movl -104(%ebx),%edx 138 movq -104(%ebx),%mm2 139 movq -112(%ebx),%mm5 140 movq %mm0,-128(%edi) 141 psrlq $4,%mm0 142 movq %mm3,(%edi) 143 movq %mm3,%mm7 144 psrlq $4,%mm3 145 shll $4,%edx 146 movb %dl,1(%esp) 147 movl -88(%ebx),%edx 148 movq -88(%ebx),%mm1 149 psllq $60,%mm7 150 movq -96(%ebx),%mm4 151 por %mm7,%mm0 152 movq %mm2,-120(%edi) 153 psrlq $4,%mm2 154 movq %mm5,8(%edi) 155 movq %mm5,%mm6 156 movq %mm0,-128(%ebp) 157 psrlq $4,%mm5 158 movq %mm3,(%ebp) 159 shll $4,%edx 160 movb %dl,2(%esp) 161 movl -72(%ebx),%edx 162 movq -72(%ebx),%mm0 163 psllq $60,%mm6 164 movq -80(%ebx),%mm3 165 por %mm6,%mm2 166 movq %mm1,-112(%edi) 167 psrlq $4,%mm1 168 movq %mm4,16(%edi) 169 movq %mm4,%mm7 170 movq %mm2,-120(%ebp) 171 psrlq $4,%mm4 172 movq %mm5,8(%ebp) 173 shll $4,%edx 174 movb %dl,3(%esp) 175 movl -56(%ebx),%edx 176 movq -56(%ebx),%mm2 177 psllq $60,%mm7 178 movq -64(%ebx),%mm5 179 por %mm7,%mm1 180 movq %mm0,-104(%edi) 181 psrlq $4,%mm0 182 movq %mm3,24(%edi) 183 movq %mm3,%mm6 184 movq %mm1,-112(%ebp) 185 psrlq $4,%mm3 186 movq %mm4,16(%ebp) 187 shll $4,%edx 188 movb %dl,4(%esp) 189 movl -40(%ebx),%edx 190 movq -40(%ebx),%mm1 191 psllq $60,%mm6 192 movq -48(%ebx),%mm4 193 por %mm6,%mm0 194 movq %mm2,-96(%edi) 195 psrlq $4,%mm2 196 movq %mm5,32(%edi) 197 movq %mm5,%mm7 198 movq %mm0,-104(%ebp) 199 psrlq $4,%mm5 200 movq %mm3,24(%ebp) 201 shll $4,%edx 202 movb %dl,5(%esp) 203 movl -24(%ebx),%edx 204 movq -24(%ebx),%mm0 205 psllq $60,%mm7 206 movq -32(%ebx),%mm3 207 por %mm7,%mm2 208 movq %mm1,-88(%edi) 209 psrlq $4,%mm1 210 movq %mm4,40(%edi) 211 movq %mm4,%mm6 212 movq %mm2,-96(%ebp) 213 psrlq $4,%mm4 214 movq %mm5,32(%ebp) 215 shll $4,%edx 216 movb %dl,6(%esp) 217 movl -8(%ebx),%edx 218 movq -8(%ebx),%mm2 219 psllq $60,%mm6 220 movq -16(%ebx),%mm5 221 por %mm6,%mm1 222 movq %mm0,-80(%edi) 223 psrlq $4,%mm0 224 movq %mm3,48(%edi) 225 movq %mm3,%mm7 226 movq %mm1,-88(%ebp) 227 psrlq $4,%mm3 228 movq %mm4,40(%ebp) 229 shll $4,%edx 230 movb %dl,7(%esp) 231 movl 8(%ebx),%edx 232 movq 8(%ebx),%mm1 233 psllq $60,%mm7 234 movq (%ebx),%mm4 235 por %mm7,%mm0 236 movq %mm2,-72(%edi) 237 psrlq $4,%mm2 238 movq %mm5,56(%edi) 239 movq %mm5,%mm6 240 movq %mm0,-80(%ebp) 241 psrlq $4,%mm5 242 movq %mm3,48(%ebp) 243 shll $4,%edx 244 movb %dl,8(%esp) 245 movl 24(%ebx),%edx 246 movq 24(%ebx),%mm0 247 psllq $60,%mm6 248 movq 16(%ebx),%mm3 249 por %mm6,%mm2 250 movq %mm1,-64(%edi) 251 psrlq $4,%mm1 252 movq %mm4,64(%edi) 253 movq %mm4,%mm7 254 movq %mm2,-72(%ebp) 255 psrlq $4,%mm4 256 movq %mm5,56(%ebp) 257 shll $4,%edx 258 movb %dl,9(%esp) 259 movl 40(%ebx),%edx 260 movq 40(%ebx),%mm2 261 psllq $60,%mm7 262 movq 32(%ebx),%mm5 263 por %mm7,%mm1 264 movq %mm0,-56(%edi) 265 psrlq $4,%mm0 266 movq %mm3,72(%edi) 267 movq %mm3,%mm6 268 movq %mm1,-64(%ebp) 269 psrlq $4,%mm3 270 movq %mm4,64(%ebp) 271 shll $4,%edx 272 movb %dl,10(%esp) 273 movl 56(%ebx),%edx 274 movq 56(%ebx),%mm1 275 psllq $60,%mm6 276 movq 48(%ebx),%mm4 277 por %mm6,%mm0 278 movq %mm2,-48(%edi) 279 psrlq $4,%mm2 280 movq %mm5,80(%edi) 281 movq %mm5,%mm7 282 movq %mm0,-56(%ebp) 283 psrlq $4,%mm5 284 movq %mm3,72(%ebp) 285 shll $4,%edx 286 movb %dl,11(%esp) 287 movl 72(%ebx),%edx 288 movq 72(%ebx),%mm0 289 psllq $60,%mm7 290 movq 64(%ebx),%mm3 291 por %mm7,%mm2 292 movq %mm1,-40(%edi) 293 psrlq $4,%mm1 294 movq %mm4,88(%edi) 295 movq %mm4,%mm6 296 movq %mm2,-48(%ebp) 297 psrlq $4,%mm4 298 movq %mm5,80(%ebp) 299 shll $4,%edx 300 movb %dl,12(%esp) 301 movl 88(%ebx),%edx 302 movq 88(%ebx),%mm2 303 psllq $60,%mm6 304 movq 80(%ebx),%mm5 305 por %mm6,%mm1 306 movq %mm0,-32(%edi) 307 psrlq $4,%mm0 308 movq %mm3,96(%edi) 309 movq %mm3,%mm7 310 movq %mm1,-40(%ebp) 311 psrlq $4,%mm3 312 movq %mm4,88(%ebp) 313 shll $4,%edx 314 movb %dl,13(%esp) 315 movl 104(%ebx),%edx 316 movq 104(%ebx),%mm1 317 psllq $60,%mm7 318 movq 96(%ebx),%mm4 319 por %mm7,%mm0 320 movq %mm2,-24(%edi) 321 psrlq $4,%mm2 322 movq %mm5,104(%edi) 323 movq %mm5,%mm6 324 movq %mm0,-32(%ebp) 325 psrlq $4,%mm5 326 movq %mm3,96(%ebp) 327 shll $4,%edx 328 movb %dl,14(%esp) 329 movl 120(%ebx),%edx 330 movq 120(%ebx),%mm0 331 psllq $60,%mm6 332 movq 112(%ebx),%mm3 333 por %mm6,%mm2 334 movq %mm1,-16(%edi) 335 psrlq $4,%mm1 336 movq %mm4,112(%edi) 337 movq %mm4,%mm7 338 movq %mm2,-24(%ebp) 339 psrlq $4,%mm4 340 movq %mm5,104(%ebp) 341 shll $4,%edx 342 movb %dl,15(%esp) 343 psllq $60,%mm7 344 por %mm7,%mm1 345 movq %mm0,-8(%edi) 346 psrlq $4,%mm0 347 movq %mm3,120(%edi) 348 movq %mm3,%mm6 349 movq %mm1,-16(%ebp) 350 psrlq $4,%mm3 351 movq %mm4,112(%ebp) 352 psllq $60,%mm6 353 por %mm6,%mm0 354 movq %mm0,-8(%ebp) 355 movq %mm3,120(%ebp) 356 movq (%eax),%mm6 357 movl 8(%eax),%ebx 358 movl 12(%eax),%edx 359.align 16 360.L004outer: 361 xorl 12(%ecx),%edx 362 xorl 8(%ecx),%ebx 363 pxor (%ecx),%mm6 364 leal 16(%ecx),%ecx 365 movl %ebx,536(%esp) 366 movq %mm6,528(%esp) 367 movl %ecx,548(%esp) 368 xorl %eax,%eax 369 roll $8,%edx 370 movb %dl,%al 371 movl %eax,%ebp 372 andb $15,%al 373 shrl $4,%ebp 374 pxor %mm0,%mm0 375 roll $8,%edx 376 pxor %mm1,%mm1 377 pxor %mm2,%mm2 378 movq 16(%esp,%eax,8),%mm7 379 movq 144(%esp,%eax,8),%mm6 380 movb %dl,%al 381 movd %mm7,%ebx 382 psrlq $8,%mm7 383 movq %mm6,%mm3 384 movl %eax,%edi 385 psrlq $8,%mm6 386 pxor 272(%esp,%ebp,8),%mm7 387 andb $15,%al 388 psllq $56,%mm3 389 shrl $4,%edi 390 pxor 16(%esp,%eax,8),%mm7 391 roll $8,%edx 392 pxor 144(%esp,%eax,8),%mm6 393 pxor %mm3,%mm7 394 pxor 400(%esp,%ebp,8),%mm6 395 xorb (%esp,%ebp,1),%bl 396 movb %dl,%al 397 movd %mm7,%ecx 398 movzbl %bl,%ebx 399 psrlq $8,%mm7 400 movq %mm6,%mm3 401 movl %eax,%ebp 402 psrlq $8,%mm6 403 pxor 272(%esp,%edi,8),%mm7 404 andb $15,%al 405 psllq $56,%mm3 406 shrl $4,%ebp 407 pinsrw $2,(%esi,%ebx,2),%mm2 408 pxor 16(%esp,%eax,8),%mm7 409 roll $8,%edx 410 pxor 144(%esp,%eax,8),%mm6 411 pxor %mm3,%mm7 412 pxor 400(%esp,%edi,8),%mm6 413 xorb (%esp,%edi,1),%cl 414 movb %dl,%al 415 movl 536(%esp),%edx 416 movd %mm7,%ebx 417 movzbl %cl,%ecx 418 psrlq $8,%mm7 419 movq %mm6,%mm3 420 movl %eax,%edi 421 psrlq $8,%mm6 422 pxor 272(%esp,%ebp,8),%mm7 423 andb $15,%al 424 psllq $56,%mm3 425 pxor %mm2,%mm6 426 shrl $4,%edi 427 pinsrw $2,(%esi,%ecx,2),%mm1 428 pxor 16(%esp,%eax,8),%mm7 429 roll $8,%edx 430 pxor 144(%esp,%eax,8),%mm6 431 pxor %mm3,%mm7 432 pxor 400(%esp,%ebp,8),%mm6 433 xorb (%esp,%ebp,1),%bl 434 movb %dl,%al 435 movd %mm7,%ecx 436 movzbl %bl,%ebx 437 psrlq $8,%mm7 438 movq %mm6,%mm3 439 movl %eax,%ebp 440 psrlq $8,%mm6 441 pxor 272(%esp,%edi,8),%mm7 442 andb $15,%al 443 psllq $56,%mm3 444 pxor %mm1,%mm6 445 shrl $4,%ebp 446 pinsrw $2,(%esi,%ebx,2),%mm0 447 pxor 16(%esp,%eax,8),%mm7 448 roll $8,%edx 449 pxor 144(%esp,%eax,8),%mm6 450 pxor %mm3,%mm7 451 pxor 400(%esp,%edi,8),%mm6 452 xorb (%esp,%edi,1),%cl 453 movb %dl,%al 454 movd %mm7,%ebx 455 movzbl %cl,%ecx 456 psrlq $8,%mm7 457 movq %mm6,%mm3 458 movl %eax,%edi 459 psrlq $8,%mm6 460 pxor 272(%esp,%ebp,8),%mm7 461 andb $15,%al 462 psllq $56,%mm3 463 pxor %mm0,%mm6 464 shrl $4,%edi 465 pinsrw $2,(%esi,%ecx,2),%mm2 466 pxor 16(%esp,%eax,8),%mm7 467 roll $8,%edx 468 pxor 144(%esp,%eax,8),%mm6 469 pxor %mm3,%mm7 470 pxor 400(%esp,%ebp,8),%mm6 471 xorb (%esp,%ebp,1),%bl 472 movb %dl,%al 473 movd %mm7,%ecx 474 movzbl %bl,%ebx 475 psrlq $8,%mm7 476 movq %mm6,%mm3 477 movl %eax,%ebp 478 psrlq $8,%mm6 479 pxor 272(%esp,%edi,8),%mm7 480 andb $15,%al 481 psllq $56,%mm3 482 pxor %mm2,%mm6 483 shrl $4,%ebp 484 pinsrw $2,(%esi,%ebx,2),%mm1 485 pxor 16(%esp,%eax,8),%mm7 486 roll $8,%edx 487 pxor 144(%esp,%eax,8),%mm6 488 pxor %mm3,%mm7 489 pxor 400(%esp,%edi,8),%mm6 490 xorb (%esp,%edi,1),%cl 491 movb %dl,%al 492 movl 532(%esp),%edx 493 movd %mm7,%ebx 494 movzbl %cl,%ecx 495 psrlq $8,%mm7 496 movq %mm6,%mm3 497 movl %eax,%edi 498 psrlq $8,%mm6 499 pxor 272(%esp,%ebp,8),%mm7 500 andb $15,%al 501 psllq $56,%mm3 502 pxor %mm1,%mm6 503 shrl $4,%edi 504 pinsrw $2,(%esi,%ecx,2),%mm0 505 pxor 16(%esp,%eax,8),%mm7 506 roll $8,%edx 507 pxor 144(%esp,%eax,8),%mm6 508 pxor %mm3,%mm7 509 pxor 400(%esp,%ebp,8),%mm6 510 xorb (%esp,%ebp,1),%bl 511 movb %dl,%al 512 movd %mm7,%ecx 513 movzbl %bl,%ebx 514 psrlq $8,%mm7 515 movq %mm6,%mm3 516 movl %eax,%ebp 517 psrlq $8,%mm6 518 pxor 272(%esp,%edi,8),%mm7 519 andb $15,%al 520 psllq $56,%mm3 521 pxor %mm0,%mm6 522 shrl $4,%ebp 523 pinsrw $2,(%esi,%ebx,2),%mm2 524 pxor 16(%esp,%eax,8),%mm7 525 roll $8,%edx 526 pxor 144(%esp,%eax,8),%mm6 527 pxor %mm3,%mm7 528 pxor 400(%esp,%edi,8),%mm6 529 xorb (%esp,%edi,1),%cl 530 movb %dl,%al 531 movd %mm7,%ebx 532 movzbl %cl,%ecx 533 psrlq $8,%mm7 534 movq %mm6,%mm3 535 movl %eax,%edi 536 psrlq $8,%mm6 537 pxor 272(%esp,%ebp,8),%mm7 538 andb $15,%al 539 psllq $56,%mm3 540 pxor %mm2,%mm6 541 shrl $4,%edi 542 pinsrw $2,(%esi,%ecx,2),%mm1 543 pxor 16(%esp,%eax,8),%mm7 544 roll $8,%edx 545 pxor 144(%esp,%eax,8),%mm6 546 pxor %mm3,%mm7 547 pxor 400(%esp,%ebp,8),%mm6 548 xorb (%esp,%ebp,1),%bl 549 movb %dl,%al 550 movd %mm7,%ecx 551 movzbl %bl,%ebx 552 psrlq $8,%mm7 553 movq %mm6,%mm3 554 movl %eax,%ebp 555 psrlq $8,%mm6 556 pxor 272(%esp,%edi,8),%mm7 557 andb $15,%al 558 psllq $56,%mm3 559 pxor %mm1,%mm6 560 shrl $4,%ebp 561 pinsrw $2,(%esi,%ebx,2),%mm0 562 pxor 16(%esp,%eax,8),%mm7 563 roll $8,%edx 564 pxor 144(%esp,%eax,8),%mm6 565 pxor %mm3,%mm7 566 pxor 400(%esp,%edi,8),%mm6 567 xorb (%esp,%edi,1),%cl 568 movb %dl,%al 569 movl 528(%esp),%edx 570 movd %mm7,%ebx 571 movzbl %cl,%ecx 572 psrlq $8,%mm7 573 movq %mm6,%mm3 574 movl %eax,%edi 575 psrlq $8,%mm6 576 pxor 272(%esp,%ebp,8),%mm7 577 andb $15,%al 578 psllq $56,%mm3 579 pxor %mm0,%mm6 580 shrl $4,%edi 581 pinsrw $2,(%esi,%ecx,2),%mm2 582 pxor 16(%esp,%eax,8),%mm7 583 roll $8,%edx 584 pxor 144(%esp,%eax,8),%mm6 585 pxor %mm3,%mm7 586 pxor 400(%esp,%ebp,8),%mm6 587 xorb (%esp,%ebp,1),%bl 588 movb %dl,%al 589 movd %mm7,%ecx 590 movzbl %bl,%ebx 591 psrlq $8,%mm7 592 movq %mm6,%mm3 593 movl %eax,%ebp 594 psrlq $8,%mm6 595 pxor 272(%esp,%edi,8),%mm7 596 andb $15,%al 597 psllq $56,%mm3 598 pxor %mm2,%mm6 599 shrl $4,%ebp 600 pinsrw $2,(%esi,%ebx,2),%mm1 601 pxor 16(%esp,%eax,8),%mm7 602 roll $8,%edx 603 pxor 144(%esp,%eax,8),%mm6 604 pxor %mm3,%mm7 605 pxor 400(%esp,%edi,8),%mm6 606 xorb (%esp,%edi,1),%cl 607 movb %dl,%al 608 movd %mm7,%ebx 609 movzbl %cl,%ecx 610 psrlq $8,%mm7 611 movq %mm6,%mm3 612 movl %eax,%edi 613 psrlq $8,%mm6 614 pxor 272(%esp,%ebp,8),%mm7 615 andb $15,%al 616 psllq $56,%mm3 617 pxor %mm1,%mm6 618 shrl $4,%edi 619 pinsrw $2,(%esi,%ecx,2),%mm0 620 pxor 16(%esp,%eax,8),%mm7 621 roll $8,%edx 622 pxor 144(%esp,%eax,8),%mm6 623 pxor %mm3,%mm7 624 pxor 400(%esp,%ebp,8),%mm6 625 xorb (%esp,%ebp,1),%bl 626 movb %dl,%al 627 movd %mm7,%ecx 628 movzbl %bl,%ebx 629 psrlq $8,%mm7 630 movq %mm6,%mm3 631 movl %eax,%ebp 632 psrlq $8,%mm6 633 pxor 272(%esp,%edi,8),%mm7 634 andb $15,%al 635 psllq $56,%mm3 636 pxor %mm0,%mm6 637 shrl $4,%ebp 638 pinsrw $2,(%esi,%ebx,2),%mm2 639 pxor 16(%esp,%eax,8),%mm7 640 roll $8,%edx 641 pxor 144(%esp,%eax,8),%mm6 642 pxor %mm3,%mm7 643 pxor 400(%esp,%edi,8),%mm6 644 xorb (%esp,%edi,1),%cl 645 movb %dl,%al 646 movl 524(%esp),%edx 647 movd %mm7,%ebx 648 movzbl %cl,%ecx 649 psrlq $8,%mm7 650 movq %mm6,%mm3 651 movl %eax,%edi 652 psrlq $8,%mm6 653 pxor 272(%esp,%ebp,8),%mm7 654 andb $15,%al 655 psllq $56,%mm3 656 pxor %mm2,%mm6 657 shrl $4,%edi 658 pinsrw $2,(%esi,%ecx,2),%mm1 659 pxor 16(%esp,%eax,8),%mm7 660 pxor 144(%esp,%eax,8),%mm6 661 xorb (%esp,%ebp,1),%bl 662 pxor %mm3,%mm7 663 pxor 400(%esp,%ebp,8),%mm6 664 movzbl %bl,%ebx 665 pxor %mm2,%mm2 666 psllq $4,%mm1 667 movd %mm7,%ecx 668 psrlq $4,%mm7 669 movq %mm6,%mm3 670 psrlq $4,%mm6 671 shll $4,%ecx 672 pxor 16(%esp,%edi,8),%mm7 673 psllq $60,%mm3 674 movzbl %cl,%ecx 675 pxor %mm3,%mm7 676 pxor 144(%esp,%edi,8),%mm6 677 pinsrw $2,(%esi,%ebx,2),%mm0 678 pxor %mm1,%mm6 679 movd %mm7,%edx 680 pinsrw $3,(%esi,%ecx,2),%mm2 681 psllq $12,%mm0 682 pxor %mm0,%mm6 683 psrlq $32,%mm7 684 pxor %mm2,%mm6 685 movl 548(%esp),%ecx 686 movd %mm7,%ebx 687 movq %mm6,%mm3 688 psllw $8,%mm6 689 psrlw $8,%mm3 690 por %mm3,%mm6 691 bswap %edx 692 pshufw $27,%mm6,%mm6 693 bswap %ebx 694 cmpl 552(%esp),%ecx 695 jne .L004outer 696 movl 544(%esp),%eax 697 movl %edx,12(%eax) 698 movl %ebx,8(%eax) 699 movq %mm6,(%eax) 700 movl 556(%esp),%esp 701 emms 702 popl %edi 703 popl %esi 704 popl %ebx 705 popl %ebp 706 ret 707.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin 708.globl gcm_init_clmul 709.hidden gcm_init_clmul 710.type gcm_init_clmul,@function 711.align 16 712gcm_init_clmul: 713.L_gcm_init_clmul_begin: 714 movl 4(%esp),%edx 715 movl 8(%esp),%eax 716 call .L005pic 717.L005pic: 718 popl %ecx 719 leal .Lbswap-.L005pic(%ecx),%ecx 720 movdqu (%eax),%xmm2 721 pshufd $78,%xmm2,%xmm2 722 pshufd $255,%xmm2,%xmm4 723 movdqa %xmm2,%xmm3 724 psllq $1,%xmm2 725 pxor %xmm5,%xmm5 726 psrlq $63,%xmm3 727 pcmpgtd %xmm4,%xmm5 728 pslldq $8,%xmm3 729 por %xmm3,%xmm2 730 pand 16(%ecx),%xmm5 731 pxor %xmm5,%xmm2 732 movdqa %xmm2,%xmm0 733 movdqa %xmm0,%xmm1 734 pshufd $78,%xmm0,%xmm3 735 pshufd $78,%xmm2,%xmm4 736 pxor %xmm0,%xmm3 737 pxor %xmm2,%xmm4 738.byte 102,15,58,68,194,0 739.byte 102,15,58,68,202,17 740.byte 102,15,58,68,220,0 741 xorps %xmm0,%xmm3 742 xorps %xmm1,%xmm3 743 movdqa %xmm3,%xmm4 744 psrldq $8,%xmm3 745 pslldq $8,%xmm4 746 pxor %xmm3,%xmm1 747 pxor %xmm4,%xmm0 748 movdqa %xmm0,%xmm4 749 movdqa %xmm0,%xmm3 750 psllq $5,%xmm0 751 pxor %xmm0,%xmm3 752 psllq $1,%xmm0 753 pxor %xmm3,%xmm0 754 psllq $57,%xmm0 755 movdqa %xmm0,%xmm3 756 pslldq $8,%xmm0 757 psrldq $8,%xmm3 758 pxor %xmm4,%xmm0 759 pxor %xmm3,%xmm1 760 movdqa %xmm0,%xmm4 761 psrlq $1,%xmm0 762 pxor %xmm4,%xmm1 763 pxor %xmm0,%xmm4 764 psrlq $5,%xmm0 765 pxor %xmm4,%xmm0 766 psrlq $1,%xmm0 767 pxor %xmm1,%xmm0 768 pshufd $78,%xmm2,%xmm3 769 pshufd $78,%xmm0,%xmm4 770 pxor %xmm2,%xmm3 771 movdqu %xmm2,(%edx) 772 pxor %xmm0,%xmm4 773 movdqu %xmm0,16(%edx) 774.byte 102,15,58,15,227,8 775 movdqu %xmm4,32(%edx) 776 ret 777.size gcm_init_clmul,.-.L_gcm_init_clmul_begin 778.globl gcm_gmult_clmul 779.hidden gcm_gmult_clmul 780.type gcm_gmult_clmul,@function 781.align 16 782gcm_gmult_clmul: 783.L_gcm_gmult_clmul_begin: 784 movl 4(%esp),%eax 785 movl 8(%esp),%edx 786 call .L006pic 787.L006pic: 788 popl %ecx 789 leal .Lbswap-.L006pic(%ecx),%ecx 790 movdqu (%eax),%xmm0 791 movdqa (%ecx),%xmm5 792 movups (%edx),%xmm2 793.byte 102,15,56,0,197 794 movups 32(%edx),%xmm4 795 movdqa %xmm0,%xmm1 796 pshufd $78,%xmm0,%xmm3 797 pxor %xmm0,%xmm3 798.byte 102,15,58,68,194,0 799.byte 102,15,58,68,202,17 800.byte 102,15,58,68,220,0 801 xorps %xmm0,%xmm3 802 xorps %xmm1,%xmm3 803 movdqa %xmm3,%xmm4 804 psrldq $8,%xmm3 805 pslldq $8,%xmm4 806 pxor %xmm3,%xmm1 807 pxor %xmm4,%xmm0 808 movdqa %xmm0,%xmm4 809 movdqa %xmm0,%xmm3 810 psllq $5,%xmm0 811 pxor %xmm0,%xmm3 812 psllq $1,%xmm0 813 pxor %xmm3,%xmm0 814 psllq $57,%xmm0 815 movdqa %xmm0,%xmm3 816 pslldq $8,%xmm0 817 psrldq $8,%xmm3 818 pxor %xmm4,%xmm0 819 pxor %xmm3,%xmm1 820 movdqa %xmm0,%xmm4 821 psrlq $1,%xmm0 822 pxor %xmm4,%xmm1 823 pxor %xmm0,%xmm4 824 psrlq $5,%xmm0 825 pxor %xmm4,%xmm0 826 psrlq $1,%xmm0 827 pxor %xmm1,%xmm0 828.byte 102,15,56,0,197 829 movdqu %xmm0,(%eax) 830 ret 831.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin 832.globl gcm_ghash_clmul 833.hidden gcm_ghash_clmul 834.type gcm_ghash_clmul,@function 835.align 16 836gcm_ghash_clmul: 837.L_gcm_ghash_clmul_begin: 838 pushl %ebp 839 pushl %ebx 840 pushl %esi 841 pushl %edi 842 movl 20(%esp),%eax 843 movl 24(%esp),%edx 844 movl 28(%esp),%esi 845 movl 32(%esp),%ebx 846 call .L007pic 847.L007pic: 848 popl %ecx 849 leal .Lbswap-.L007pic(%ecx),%ecx 850 movdqu (%eax),%xmm0 851 movdqa (%ecx),%xmm5 852 movdqu (%edx),%xmm2 853.byte 102,15,56,0,197 854 subl $16,%ebx 855 jz .L008odd_tail 856 movdqu (%esi),%xmm3 857 movdqu 16(%esi),%xmm6 858.byte 102,15,56,0,221 859.byte 102,15,56,0,245 860 movdqu 32(%edx),%xmm5 861 pxor %xmm3,%xmm0 862 pshufd $78,%xmm6,%xmm3 863 movdqa %xmm6,%xmm7 864 pxor %xmm6,%xmm3 865 leal 32(%esi),%esi 866.byte 102,15,58,68,242,0 867.byte 102,15,58,68,250,17 868.byte 102,15,58,68,221,0 869 movups 16(%edx),%xmm2 870 nop 871 subl $32,%ebx 872 jbe .L009even_tail 873 jmp .L010mod_loop 874.align 32 875.L010mod_loop: 876 pshufd $78,%xmm0,%xmm4 877 movdqa %xmm0,%xmm1 878 pxor %xmm0,%xmm4 879 nop 880.byte 102,15,58,68,194,0 881.byte 102,15,58,68,202,17 882.byte 102,15,58,68,229,16 883 movups (%edx),%xmm2 884 xorps %xmm6,%xmm0 885 movdqa (%ecx),%xmm5 886 xorps %xmm7,%xmm1 887 movdqu (%esi),%xmm7 888 pxor %xmm0,%xmm3 889 movdqu 16(%esi),%xmm6 890 pxor %xmm1,%xmm3 891.byte 102,15,56,0,253 892 pxor %xmm3,%xmm4 893 movdqa %xmm4,%xmm3 894 psrldq $8,%xmm4 895 pslldq $8,%xmm3 896 pxor %xmm4,%xmm1 897 pxor %xmm3,%xmm0 898.byte 102,15,56,0,245 899 pxor %xmm7,%xmm1 900 movdqa %xmm6,%xmm7 901 movdqa %xmm0,%xmm4 902 movdqa %xmm0,%xmm3 903 psllq $5,%xmm0 904 pxor %xmm0,%xmm3 905 psllq $1,%xmm0 906 pxor %xmm3,%xmm0 907.byte 102,15,58,68,242,0 908 movups 32(%edx),%xmm5 909 psllq $57,%xmm0 910 movdqa %xmm0,%xmm3 911 pslldq $8,%xmm0 912 psrldq $8,%xmm3 913 pxor %xmm4,%xmm0 914 pxor %xmm3,%xmm1 915 pshufd $78,%xmm7,%xmm3 916 movdqa %xmm0,%xmm4 917 psrlq $1,%xmm0 918 pxor %xmm7,%xmm3 919 pxor %xmm4,%xmm1 920.byte 102,15,58,68,250,17 921 movups 16(%edx),%xmm2 922 pxor %xmm0,%xmm4 923 psrlq $5,%xmm0 924 pxor %xmm4,%xmm0 925 psrlq $1,%xmm0 926 pxor %xmm1,%xmm0 927.byte 102,15,58,68,221,0 928 leal 32(%esi),%esi 929 subl $32,%ebx 930 ja .L010mod_loop 931.L009even_tail: 932 pshufd $78,%xmm0,%xmm4 933 movdqa %xmm0,%xmm1 934 pxor %xmm0,%xmm4 935.byte 102,15,58,68,194,0 936.byte 102,15,58,68,202,17 937.byte 102,15,58,68,229,16 938 movdqa (%ecx),%xmm5 939 xorps %xmm6,%xmm0 940 xorps %xmm7,%xmm1 941 pxor %xmm0,%xmm3 942 pxor %xmm1,%xmm3 943 pxor %xmm3,%xmm4 944 movdqa %xmm4,%xmm3 945 psrldq $8,%xmm4 946 pslldq $8,%xmm3 947 pxor %xmm4,%xmm1 948 pxor %xmm3,%xmm0 949 movdqa %xmm0,%xmm4 950 movdqa %xmm0,%xmm3 951 psllq $5,%xmm0 952 pxor %xmm0,%xmm3 953 psllq $1,%xmm0 954 pxor %xmm3,%xmm0 955 psllq $57,%xmm0 956 movdqa %xmm0,%xmm3 957 pslldq $8,%xmm0 958 psrldq $8,%xmm3 959 pxor %xmm4,%xmm0 960 pxor %xmm3,%xmm1 961 movdqa %xmm0,%xmm4 962 psrlq $1,%xmm0 963 pxor %xmm4,%xmm1 964 pxor %xmm0,%xmm4 965 psrlq $5,%xmm0 966 pxor %xmm4,%xmm0 967 psrlq $1,%xmm0 968 pxor %xmm1,%xmm0 969 testl %ebx,%ebx 970 jnz .L011done 971 movups (%edx),%xmm2 972.L008odd_tail: 973 movdqu (%esi),%xmm3 974.byte 102,15,56,0,221 975 pxor %xmm3,%xmm0 976 movdqa %xmm0,%xmm1 977 pshufd $78,%xmm0,%xmm3 978 pshufd $78,%xmm2,%xmm4 979 pxor %xmm0,%xmm3 980 pxor %xmm2,%xmm4 981.byte 102,15,58,68,194,0 982.byte 102,15,58,68,202,17 983.byte 102,15,58,68,220,0 984 xorps %xmm0,%xmm3 985 xorps %xmm1,%xmm3 986 movdqa %xmm3,%xmm4 987 psrldq $8,%xmm3 988 pslldq $8,%xmm4 989 pxor %xmm3,%xmm1 990 pxor %xmm4,%xmm0 991 movdqa %xmm0,%xmm4 992 movdqa %xmm0,%xmm3 993 psllq $5,%xmm0 994 pxor %xmm0,%xmm3 995 psllq $1,%xmm0 996 pxor %xmm3,%xmm0 997 psllq $57,%xmm0 998 movdqa %xmm0,%xmm3 999 pslldq $8,%xmm0 1000 psrldq $8,%xmm3 1001 pxor %xmm4,%xmm0 1002 pxor %xmm3,%xmm1 1003 movdqa %xmm0,%xmm4 1004 psrlq $1,%xmm0 1005 pxor %xmm4,%xmm1 1006 pxor %xmm0,%xmm4 1007 psrlq $5,%xmm0 1008 pxor %xmm4,%xmm0 1009 psrlq $1,%xmm0 1010 pxor %xmm1,%xmm0 1011.L011done: 1012.byte 102,15,56,0,197 1013 movdqu %xmm0,(%eax) 1014 popl %edi 1015 popl %esi 1016 popl %ebx 1017 popl %ebp 1018 ret 1019.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin 1020.align 64 1021.Lbswap: 1022.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1023.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 1024.align 64 1025.Lrem_8bit: 1026.value 0,450,900,582,1800,1738,1164,1358 1027.value 3600,4050,3476,3158,2328,2266,2716,2910 1028.value 7200,7650,8100,7782,6952,6890,6316,6510 1029.value 4656,5106,4532,4214,5432,5370,5820,6014 1030.value 14400,14722,15300,14854,16200,16010,15564,15630 1031.value 13904,14226,13780,13334,12632,12442,13020,13086 1032.value 9312,9634,10212,9766,9064,8874,8428,8494 1033.value 10864,11186,10740,10294,11640,11450,12028,12094 1034.value 28800,28994,29444,29382,30600,30282,29708,30158 1035.value 32400,32594,32020,31958,31128,30810,31260,31710 1036.value 27808,28002,28452,28390,27560,27242,26668,27118 1037.value 25264,25458,24884,24822,26040,25722,26172,26622 1038.value 18624,18690,19268,19078,20424,19978,19532,19854 1039.value 18128,18194,17748,17558,16856,16410,16988,17310 1040.value 21728,21794,22372,22182,21480,21034,20588,20910 1041.value 23280,23346,22900,22710,24056,23610,24188,24510 1042.value 57600,57538,57988,58182,58888,59338,58764,58446 1043.value 61200,61138,60564,60758,59416,59866,60316,59998 1044.value 64800,64738,65188,65382,64040,64490,63916,63598 1045.value 62256,62194,61620,61814,62520,62970,63420,63102 1046.value 55616,55426,56004,56070,56904,57226,56780,56334 1047.value 55120,54930,54484,54550,53336,53658,54236,53790 1048.value 50528,50338,50916,50982,49768,50090,49644,49198 1049.value 52080,51890,51444,51510,52344,52666,53244,52798 1050.value 37248,36930,37380,37830,38536,38730,38156,38094 1051.value 40848,40530,39956,40406,39064,39258,39708,39646 1052.value 36256,35938,36388,36838,35496,35690,35116,35054 1053.value 33712,33394,32820,33270,33976,34170,34620,34558 1054.value 43456,43010,43588,43910,44744,44810,44364,44174 1055.value 42960,42514,42068,42390,41176,41242,41820,41630 1056.value 46560,46114,46692,47014,45800,45866,45420,45230 1057.value 48112,47666,47220,47542,48376,48442,49020,48830 1058.align 64 1059.Lrem_4bit: 1060.long 0,0,0,471859200,0,943718400,0,610271232 1061.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1062.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1063.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1064.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 1065.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 1066.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 1067.byte 0 1068#endif 1069