1#if defined(__i386__) 2.file "ghash-x86.S" 3.text 4.globl gcm_gmult_4bit_mmx 5.hidden gcm_gmult_4bit_mmx 6.type gcm_gmult_4bit_mmx,@function 7.align 16 8gcm_gmult_4bit_mmx: 9.L_gcm_gmult_4bit_mmx_begin: 10 pushl %ebp 11 pushl %ebx 12 pushl %esi 13 pushl %edi 14 movl 20(%esp),%edi 15 movl 24(%esp),%esi 16 call .L000pic_point 17.L000pic_point: 18 popl %eax 19 leal .Lrem_4bit-.L000pic_point(%eax),%eax 20 movzbl 15(%edi),%ebx 21 xorl %ecx,%ecx 22 movl %ebx,%edx 23 movb %dl,%cl 24 movl $14,%ebp 25 shlb $4,%cl 26 andl $240,%edx 27 movq 8(%esi,%ecx,1),%mm0 28 movq (%esi,%ecx,1),%mm1 29 movd %mm0,%ebx 30 jmp .L001mmx_loop 31.align 16 32.L001mmx_loop: 33 psrlq $4,%mm0 34 andl $15,%ebx 35 movq %mm1,%mm2 36 psrlq $4,%mm1 37 pxor 8(%esi,%edx,1),%mm0 38 movb (%edi,%ebp,1),%cl 39 psllq $60,%mm2 40 pxor (%eax,%ebx,8),%mm1 41 decl %ebp 42 movd %mm0,%ebx 43 pxor (%esi,%edx,1),%mm1 44 movl %ecx,%edx 45 pxor %mm2,%mm0 46 js .L002mmx_break 47 shlb $4,%cl 48 andl $15,%ebx 49 psrlq $4,%mm0 50 andl $240,%edx 51 movq %mm1,%mm2 52 psrlq $4,%mm1 53 pxor 8(%esi,%ecx,1),%mm0 54 psllq $60,%mm2 55 pxor (%eax,%ebx,8),%mm1 56 movd %mm0,%ebx 57 pxor (%esi,%ecx,1),%mm1 58 pxor %mm2,%mm0 59 jmp .L001mmx_loop 60.align 16 61.L002mmx_break: 62 shlb $4,%cl 63 andl $15,%ebx 64 psrlq $4,%mm0 65 andl $240,%edx 66 movq %mm1,%mm2 67 psrlq $4,%mm1 68 pxor 8(%esi,%ecx,1),%mm0 69 psllq $60,%mm2 70 pxor (%eax,%ebx,8),%mm1 71 movd %mm0,%ebx 72 pxor (%esi,%ecx,1),%mm1 73 pxor %mm2,%mm0 74 psrlq $4,%mm0 75 andl $15,%ebx 76 movq %mm1,%mm2 77 psrlq $4,%mm1 78 pxor 8(%esi,%edx,1),%mm0 79 psllq $60,%mm2 80 pxor (%eax,%ebx,8),%mm1 81 movd %mm0,%ebx 82 pxor (%esi,%edx,1),%mm1 83 pxor %mm2,%mm0 84 psrlq $32,%mm0 85 movd %mm1,%edx 86 psrlq $32,%mm1 87 movd %mm0,%ecx 88 movd %mm1,%ebp 89 bswap %ebx 90 bswap %edx 91 bswap %ecx 92 bswap %ebp 93 emms 94 movl %ebx,12(%edi) 95 movl %edx,4(%edi) 96 movl %ecx,8(%edi) 97 movl %ebp,(%edi) 98 popl %edi 99 popl %esi 100 popl %ebx 101 popl %ebp 102 ret 103.size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin 104.globl gcm_ghash_4bit_mmx 105.hidden gcm_ghash_4bit_mmx 106.type gcm_ghash_4bit_mmx,@function 107.align 16 108gcm_ghash_4bit_mmx: 109.L_gcm_ghash_4bit_mmx_begin: 110 pushl %ebp 111 pushl %ebx 112 pushl %esi 113 pushl %edi 114 movl 20(%esp),%eax 115 movl 24(%esp),%ebx 116 movl 28(%esp),%ecx 117 movl 32(%esp),%edx 118 movl %esp,%ebp 119 call .L003pic_point 120.L003pic_point: 121 popl %esi 122 leal .Lrem_8bit-.L003pic_point(%esi),%esi 123 subl $544,%esp 124 andl $-64,%esp 125 subl $16,%esp 126 addl %ecx,%edx 127 movl %eax,544(%esp) 128 movl %edx,552(%esp) 129 movl %ebp,556(%esp) 130 addl $128,%ebx 131 leal 144(%esp),%edi 132 leal 400(%esp),%ebp 133 movl -120(%ebx),%edx 134 movq -120(%ebx),%mm0 135 movq -128(%ebx),%mm3 136 shll $4,%edx 137 movb %dl,(%esp) 138 movl -104(%ebx),%edx 139 movq -104(%ebx),%mm2 140 movq -112(%ebx),%mm5 141 movq %mm0,-128(%edi) 142 psrlq $4,%mm0 143 movq %mm3,(%edi) 144 movq %mm3,%mm7 145 psrlq $4,%mm3 146 shll $4,%edx 147 movb %dl,1(%esp) 148 movl -88(%ebx),%edx 149 movq -88(%ebx),%mm1 150 psllq $60,%mm7 151 movq -96(%ebx),%mm4 152 por %mm7,%mm0 153 movq %mm2,-120(%edi) 154 psrlq $4,%mm2 155 movq %mm5,8(%edi) 156 movq %mm5,%mm6 157 movq %mm0,-128(%ebp) 158 psrlq $4,%mm5 159 movq %mm3,(%ebp) 160 shll $4,%edx 161 movb %dl,2(%esp) 162 movl -72(%ebx),%edx 163 movq -72(%ebx),%mm0 164 psllq $60,%mm6 165 movq -80(%ebx),%mm3 166 por %mm6,%mm2 167 movq %mm1,-112(%edi) 168 psrlq $4,%mm1 169 movq %mm4,16(%edi) 170 movq %mm4,%mm7 171 movq %mm2,-120(%ebp) 172 psrlq $4,%mm4 173 movq %mm5,8(%ebp) 174 shll $4,%edx 175 movb %dl,3(%esp) 176 movl -56(%ebx),%edx 177 movq -56(%ebx),%mm2 178 psllq $60,%mm7 179 movq -64(%ebx),%mm5 180 por %mm7,%mm1 181 movq %mm0,-104(%edi) 182 psrlq $4,%mm0 183 movq %mm3,24(%edi) 184 movq %mm3,%mm6 185 movq %mm1,-112(%ebp) 186 psrlq $4,%mm3 187 movq %mm4,16(%ebp) 188 shll $4,%edx 189 movb %dl,4(%esp) 190 movl -40(%ebx),%edx 191 movq -40(%ebx),%mm1 192 psllq $60,%mm6 193 movq -48(%ebx),%mm4 194 por %mm6,%mm0 195 movq %mm2,-96(%edi) 196 psrlq $4,%mm2 197 movq %mm5,32(%edi) 198 movq %mm5,%mm7 199 movq %mm0,-104(%ebp) 200 psrlq $4,%mm5 201 movq %mm3,24(%ebp) 202 shll $4,%edx 203 movb %dl,5(%esp) 204 movl -24(%ebx),%edx 205 movq -24(%ebx),%mm0 206 psllq $60,%mm7 207 movq -32(%ebx),%mm3 208 por %mm7,%mm2 209 movq %mm1,-88(%edi) 210 psrlq $4,%mm1 211 movq %mm4,40(%edi) 212 movq %mm4,%mm6 213 movq %mm2,-96(%ebp) 214 psrlq $4,%mm4 215 movq %mm5,32(%ebp) 216 shll $4,%edx 217 movb %dl,6(%esp) 218 movl -8(%ebx),%edx 219 movq -8(%ebx),%mm2 220 psllq $60,%mm6 221 movq -16(%ebx),%mm5 222 por %mm6,%mm1 223 movq %mm0,-80(%edi) 224 psrlq $4,%mm0 225 movq %mm3,48(%edi) 226 movq %mm3,%mm7 227 movq %mm1,-88(%ebp) 228 psrlq $4,%mm3 229 movq %mm4,40(%ebp) 230 shll $4,%edx 231 movb %dl,7(%esp) 232 movl 8(%ebx),%edx 233 movq 8(%ebx),%mm1 234 psllq $60,%mm7 235 movq (%ebx),%mm4 236 por %mm7,%mm0 237 movq %mm2,-72(%edi) 238 psrlq $4,%mm2 239 movq %mm5,56(%edi) 240 movq %mm5,%mm6 241 movq %mm0,-80(%ebp) 242 psrlq $4,%mm5 243 movq %mm3,48(%ebp) 244 shll $4,%edx 245 movb %dl,8(%esp) 246 movl 24(%ebx),%edx 247 movq 24(%ebx),%mm0 248 psllq $60,%mm6 249 movq 16(%ebx),%mm3 250 por %mm6,%mm2 251 movq %mm1,-64(%edi) 252 psrlq $4,%mm1 253 movq %mm4,64(%edi) 254 movq %mm4,%mm7 255 movq %mm2,-72(%ebp) 256 psrlq $4,%mm4 257 movq %mm5,56(%ebp) 258 shll $4,%edx 259 movb %dl,9(%esp) 260 movl 40(%ebx),%edx 261 movq 40(%ebx),%mm2 262 psllq $60,%mm7 263 movq 32(%ebx),%mm5 264 por %mm7,%mm1 265 movq %mm0,-56(%edi) 266 psrlq $4,%mm0 267 movq %mm3,72(%edi) 268 movq %mm3,%mm6 269 movq %mm1,-64(%ebp) 270 psrlq $4,%mm3 271 movq %mm4,64(%ebp) 272 shll $4,%edx 273 movb %dl,10(%esp) 274 movl 56(%ebx),%edx 275 movq 56(%ebx),%mm1 276 psllq $60,%mm6 277 movq 48(%ebx),%mm4 278 por %mm6,%mm0 279 movq %mm2,-48(%edi) 280 psrlq $4,%mm2 281 movq %mm5,80(%edi) 282 movq %mm5,%mm7 283 movq %mm0,-56(%ebp) 284 psrlq $4,%mm5 285 movq %mm3,72(%ebp) 286 shll $4,%edx 287 movb %dl,11(%esp) 288 movl 72(%ebx),%edx 289 movq 72(%ebx),%mm0 290 psllq $60,%mm7 291 movq 64(%ebx),%mm3 292 por %mm7,%mm2 293 movq %mm1,-40(%edi) 294 psrlq $4,%mm1 295 movq %mm4,88(%edi) 296 movq %mm4,%mm6 297 movq %mm2,-48(%ebp) 298 psrlq $4,%mm4 299 movq %mm5,80(%ebp) 300 shll $4,%edx 301 movb %dl,12(%esp) 302 movl 88(%ebx),%edx 303 movq 88(%ebx),%mm2 304 psllq $60,%mm6 305 movq 80(%ebx),%mm5 306 por %mm6,%mm1 307 movq %mm0,-32(%edi) 308 psrlq $4,%mm0 309 movq %mm3,96(%edi) 310 movq %mm3,%mm7 311 movq %mm1,-40(%ebp) 312 psrlq $4,%mm3 313 movq %mm4,88(%ebp) 314 shll $4,%edx 315 movb %dl,13(%esp) 316 movl 104(%ebx),%edx 317 movq 104(%ebx),%mm1 318 psllq $60,%mm7 319 movq 96(%ebx),%mm4 320 por %mm7,%mm0 321 movq %mm2,-24(%edi) 322 psrlq $4,%mm2 323 movq %mm5,104(%edi) 324 movq %mm5,%mm6 325 movq %mm0,-32(%ebp) 326 psrlq $4,%mm5 327 movq %mm3,96(%ebp) 328 shll $4,%edx 329 movb %dl,14(%esp) 330 movl 120(%ebx),%edx 331 movq 120(%ebx),%mm0 332 psllq $60,%mm6 333 movq 112(%ebx),%mm3 334 por %mm6,%mm2 335 movq %mm1,-16(%edi) 336 psrlq $4,%mm1 337 movq %mm4,112(%edi) 338 movq %mm4,%mm7 339 movq %mm2,-24(%ebp) 340 psrlq $4,%mm4 341 movq %mm5,104(%ebp) 342 shll $4,%edx 343 movb %dl,15(%esp) 344 psllq $60,%mm7 345 por %mm7,%mm1 346 movq %mm0,-8(%edi) 347 psrlq $4,%mm0 348 movq %mm3,120(%edi) 349 movq %mm3,%mm6 350 movq %mm1,-16(%ebp) 351 psrlq $4,%mm3 352 movq %mm4,112(%ebp) 353 psllq $60,%mm6 354 por %mm6,%mm0 355 movq %mm0,-8(%ebp) 356 movq %mm3,120(%ebp) 357 movq (%eax),%mm6 358 movl 8(%eax),%ebx 359 movl 12(%eax),%edx 360.align 16 361.L004outer: 362 xorl 12(%ecx),%edx 363 xorl 8(%ecx),%ebx 364 pxor (%ecx),%mm6 365 leal 16(%ecx),%ecx 366 movl %ebx,536(%esp) 367 movq %mm6,528(%esp) 368 movl %ecx,548(%esp) 369 xorl %eax,%eax 370 roll $8,%edx 371 movb %dl,%al 372 movl %eax,%ebp 373 andb $15,%al 374 shrl $4,%ebp 375 pxor %mm0,%mm0 376 roll $8,%edx 377 pxor %mm1,%mm1 378 pxor %mm2,%mm2 379 movq 16(%esp,%eax,8),%mm7 380 movq 144(%esp,%eax,8),%mm6 381 movb %dl,%al 382 movd %mm7,%ebx 383 psrlq $8,%mm7 384 movq %mm6,%mm3 385 movl %eax,%edi 386 psrlq $8,%mm6 387 pxor 272(%esp,%ebp,8),%mm7 388 andb $15,%al 389 psllq $56,%mm3 390 shrl $4,%edi 391 pxor 16(%esp,%eax,8),%mm7 392 roll $8,%edx 393 pxor 144(%esp,%eax,8),%mm6 394 pxor %mm3,%mm7 395 pxor 400(%esp,%ebp,8),%mm6 396 xorb (%esp,%ebp,1),%bl 397 movb %dl,%al 398 movd %mm7,%ecx 399 movzbl %bl,%ebx 400 psrlq $8,%mm7 401 movq %mm6,%mm3 402 movl %eax,%ebp 403 psrlq $8,%mm6 404 pxor 272(%esp,%edi,8),%mm7 405 andb $15,%al 406 psllq $56,%mm3 407 shrl $4,%ebp 408 pinsrw $2,(%esi,%ebx,2),%mm2 409 pxor 16(%esp,%eax,8),%mm7 410 roll $8,%edx 411 pxor 144(%esp,%eax,8),%mm6 412 pxor %mm3,%mm7 413 pxor 400(%esp,%edi,8),%mm6 414 xorb (%esp,%edi,1),%cl 415 movb %dl,%al 416 movl 536(%esp),%edx 417 movd %mm7,%ebx 418 movzbl %cl,%ecx 419 psrlq $8,%mm7 420 movq %mm6,%mm3 421 movl %eax,%edi 422 psrlq $8,%mm6 423 pxor 272(%esp,%ebp,8),%mm7 424 andb $15,%al 425 psllq $56,%mm3 426 pxor %mm2,%mm6 427 shrl $4,%edi 428 pinsrw $2,(%esi,%ecx,2),%mm1 429 pxor 16(%esp,%eax,8),%mm7 430 roll $8,%edx 431 pxor 144(%esp,%eax,8),%mm6 432 pxor %mm3,%mm7 433 pxor 400(%esp,%ebp,8),%mm6 434 xorb (%esp,%ebp,1),%bl 435 movb %dl,%al 436 movd %mm7,%ecx 437 movzbl %bl,%ebx 438 psrlq $8,%mm7 439 movq %mm6,%mm3 440 movl %eax,%ebp 441 psrlq $8,%mm6 442 pxor 272(%esp,%edi,8),%mm7 443 andb $15,%al 444 psllq $56,%mm3 445 pxor %mm1,%mm6 446 shrl $4,%ebp 447 pinsrw $2,(%esi,%ebx,2),%mm0 448 pxor 16(%esp,%eax,8),%mm7 449 roll $8,%edx 450 pxor 144(%esp,%eax,8),%mm6 451 pxor %mm3,%mm7 452 pxor 400(%esp,%edi,8),%mm6 453 xorb (%esp,%edi,1),%cl 454 movb %dl,%al 455 movd %mm7,%ebx 456 movzbl %cl,%ecx 457 psrlq $8,%mm7 458 movq %mm6,%mm3 459 movl %eax,%edi 460 psrlq $8,%mm6 461 pxor 272(%esp,%ebp,8),%mm7 462 andb $15,%al 463 psllq $56,%mm3 464 pxor %mm0,%mm6 465 shrl $4,%edi 466 pinsrw $2,(%esi,%ecx,2),%mm2 467 pxor 16(%esp,%eax,8),%mm7 468 roll $8,%edx 469 pxor 144(%esp,%eax,8),%mm6 470 pxor %mm3,%mm7 471 pxor 400(%esp,%ebp,8),%mm6 472 xorb (%esp,%ebp,1),%bl 473 movb %dl,%al 474 movd %mm7,%ecx 475 movzbl %bl,%ebx 476 psrlq $8,%mm7 477 movq %mm6,%mm3 478 movl %eax,%ebp 479 psrlq $8,%mm6 480 pxor 272(%esp,%edi,8),%mm7 481 andb $15,%al 482 psllq $56,%mm3 483 pxor %mm2,%mm6 484 shrl $4,%ebp 485 pinsrw $2,(%esi,%ebx,2),%mm1 486 pxor 16(%esp,%eax,8),%mm7 487 roll $8,%edx 488 pxor 144(%esp,%eax,8),%mm6 489 pxor %mm3,%mm7 490 pxor 400(%esp,%edi,8),%mm6 491 xorb (%esp,%edi,1),%cl 492 movb %dl,%al 493 movl 532(%esp),%edx 494 movd %mm7,%ebx 495 movzbl %cl,%ecx 496 psrlq $8,%mm7 497 movq %mm6,%mm3 498 movl %eax,%edi 499 psrlq $8,%mm6 500 pxor 272(%esp,%ebp,8),%mm7 501 andb $15,%al 502 psllq $56,%mm3 503 pxor %mm1,%mm6 504 shrl $4,%edi 505 pinsrw $2,(%esi,%ecx,2),%mm0 506 pxor 16(%esp,%eax,8),%mm7 507 roll $8,%edx 508 pxor 144(%esp,%eax,8),%mm6 509 pxor %mm3,%mm7 510 pxor 400(%esp,%ebp,8),%mm6 511 xorb (%esp,%ebp,1),%bl 512 movb %dl,%al 513 movd %mm7,%ecx 514 movzbl %bl,%ebx 515 psrlq $8,%mm7 516 movq %mm6,%mm3 517 movl %eax,%ebp 518 psrlq $8,%mm6 519 pxor 272(%esp,%edi,8),%mm7 520 andb $15,%al 521 psllq $56,%mm3 522 pxor %mm0,%mm6 523 shrl $4,%ebp 524 pinsrw $2,(%esi,%ebx,2),%mm2 525 pxor 16(%esp,%eax,8),%mm7 526 roll $8,%edx 527 pxor 144(%esp,%eax,8),%mm6 528 pxor %mm3,%mm7 529 pxor 400(%esp,%edi,8),%mm6 530 xorb (%esp,%edi,1),%cl 531 movb %dl,%al 532 movd %mm7,%ebx 533 movzbl %cl,%ecx 534 psrlq $8,%mm7 535 movq %mm6,%mm3 536 movl %eax,%edi 537 psrlq $8,%mm6 538 pxor 272(%esp,%ebp,8),%mm7 539 andb $15,%al 540 psllq $56,%mm3 541 pxor %mm2,%mm6 542 shrl $4,%edi 543 pinsrw $2,(%esi,%ecx,2),%mm1 544 pxor 16(%esp,%eax,8),%mm7 545 roll $8,%edx 546 pxor 144(%esp,%eax,8),%mm6 547 pxor %mm3,%mm7 548 pxor 400(%esp,%ebp,8),%mm6 549 xorb (%esp,%ebp,1),%bl 550 movb %dl,%al 551 movd %mm7,%ecx 552 movzbl %bl,%ebx 553 psrlq $8,%mm7 554 movq %mm6,%mm3 555 movl %eax,%ebp 556 psrlq $8,%mm6 557 pxor 272(%esp,%edi,8),%mm7 558 andb $15,%al 559 psllq $56,%mm3 560 pxor %mm1,%mm6 561 shrl $4,%ebp 562 pinsrw $2,(%esi,%ebx,2),%mm0 563 pxor 16(%esp,%eax,8),%mm7 564 roll $8,%edx 565 pxor 144(%esp,%eax,8),%mm6 566 pxor %mm3,%mm7 567 pxor 400(%esp,%edi,8),%mm6 568 xorb (%esp,%edi,1),%cl 569 movb %dl,%al 570 movl 528(%esp),%edx 571 movd %mm7,%ebx 572 movzbl %cl,%ecx 573 psrlq $8,%mm7 574 movq %mm6,%mm3 575 movl %eax,%edi 576 psrlq $8,%mm6 577 pxor 272(%esp,%ebp,8),%mm7 578 andb $15,%al 579 psllq $56,%mm3 580 pxor %mm0,%mm6 581 shrl $4,%edi 582 pinsrw $2,(%esi,%ecx,2),%mm2 583 pxor 16(%esp,%eax,8),%mm7 584 roll $8,%edx 585 pxor 144(%esp,%eax,8),%mm6 586 pxor %mm3,%mm7 587 pxor 400(%esp,%ebp,8),%mm6 588 xorb (%esp,%ebp,1),%bl 589 movb %dl,%al 590 movd %mm7,%ecx 591 movzbl %bl,%ebx 592 psrlq $8,%mm7 593 movq %mm6,%mm3 594 movl %eax,%ebp 595 psrlq $8,%mm6 596 pxor 272(%esp,%edi,8),%mm7 597 andb $15,%al 598 psllq $56,%mm3 599 pxor %mm2,%mm6 600 shrl $4,%ebp 601 pinsrw $2,(%esi,%ebx,2),%mm1 602 pxor 16(%esp,%eax,8),%mm7 603 roll $8,%edx 604 pxor 144(%esp,%eax,8),%mm6 605 pxor %mm3,%mm7 606 pxor 400(%esp,%edi,8),%mm6 607 xorb (%esp,%edi,1),%cl 608 movb %dl,%al 609 movd %mm7,%ebx 610 movzbl %cl,%ecx 611 psrlq $8,%mm7 612 movq %mm6,%mm3 613 movl %eax,%edi 614 psrlq $8,%mm6 615 pxor 272(%esp,%ebp,8),%mm7 616 andb $15,%al 617 psllq $56,%mm3 618 pxor %mm1,%mm6 619 shrl $4,%edi 620 pinsrw $2,(%esi,%ecx,2),%mm0 621 pxor 16(%esp,%eax,8),%mm7 622 roll $8,%edx 623 pxor 144(%esp,%eax,8),%mm6 624 pxor %mm3,%mm7 625 pxor 400(%esp,%ebp,8),%mm6 626 xorb (%esp,%ebp,1),%bl 627 movb %dl,%al 628 movd %mm7,%ecx 629 movzbl %bl,%ebx 630 psrlq $8,%mm7 631 movq %mm6,%mm3 632 movl %eax,%ebp 633 psrlq $8,%mm6 634 pxor 272(%esp,%edi,8),%mm7 635 andb $15,%al 636 psllq $56,%mm3 637 pxor %mm0,%mm6 638 shrl $4,%ebp 639 pinsrw $2,(%esi,%ebx,2),%mm2 640 pxor 16(%esp,%eax,8),%mm7 641 roll $8,%edx 642 pxor 144(%esp,%eax,8),%mm6 643 pxor %mm3,%mm7 644 pxor 400(%esp,%edi,8),%mm6 645 xorb (%esp,%edi,1),%cl 646 movb %dl,%al 647 movl 524(%esp),%edx 648 movd %mm7,%ebx 649 movzbl %cl,%ecx 650 psrlq $8,%mm7 651 movq %mm6,%mm3 652 movl %eax,%edi 653 psrlq $8,%mm6 654 pxor 272(%esp,%ebp,8),%mm7 655 andb $15,%al 656 psllq $56,%mm3 657 pxor %mm2,%mm6 658 shrl $4,%edi 659 pinsrw $2,(%esi,%ecx,2),%mm1 660 pxor 16(%esp,%eax,8),%mm7 661 pxor 144(%esp,%eax,8),%mm6 662 xorb (%esp,%ebp,1),%bl 663 pxor %mm3,%mm7 664 pxor 400(%esp,%ebp,8),%mm6 665 movzbl %bl,%ebx 666 pxor %mm2,%mm2 667 psllq $4,%mm1 668 movd %mm7,%ecx 669 psrlq $4,%mm7 670 movq %mm6,%mm3 671 psrlq $4,%mm6 672 shll $4,%ecx 673 pxor 16(%esp,%edi,8),%mm7 674 psllq $60,%mm3 675 movzbl %cl,%ecx 676 pxor %mm3,%mm7 677 pxor 144(%esp,%edi,8),%mm6 678 pinsrw $2,(%esi,%ebx,2),%mm0 679 pxor %mm1,%mm6 680 movd %mm7,%edx 681 pinsrw $3,(%esi,%ecx,2),%mm2 682 psllq $12,%mm0 683 pxor %mm0,%mm6 684 psrlq $32,%mm7 685 pxor %mm2,%mm6 686 movl 548(%esp),%ecx 687 movd %mm7,%ebx 688 movq %mm6,%mm3 689 psllw $8,%mm6 690 psrlw $8,%mm3 691 por %mm3,%mm6 692 bswap %edx 693 pshufw $27,%mm6,%mm6 694 bswap %ebx 695 cmpl 552(%esp),%ecx 696 jne .L004outer 697 movl 544(%esp),%eax 698 movl %edx,12(%eax) 699 movl %ebx,8(%eax) 700 movq %mm6,(%eax) 701 movl 556(%esp),%esp 702 emms 703 popl %edi 704 popl %esi 705 popl %ebx 706 popl %ebp 707 ret 708.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin 709.globl gcm_init_clmul 710.hidden gcm_init_clmul 711.type gcm_init_clmul,@function 712.align 16 713gcm_init_clmul: 714.L_gcm_init_clmul_begin: 715 movl 4(%esp),%edx 716 movl 8(%esp),%eax 717 call .L005pic 718.L005pic: 719 popl %ecx 720 leal .Lbswap-.L005pic(%ecx),%ecx 721 movdqu (%eax),%xmm2 722 pshufd $78,%xmm2,%xmm2 723 pshufd $255,%xmm2,%xmm4 724 movdqa %xmm2,%xmm3 725 psllq $1,%xmm2 726 pxor %xmm5,%xmm5 727 psrlq $63,%xmm3 728 pcmpgtd %xmm4,%xmm5 729 pslldq $8,%xmm3 730 por %xmm3,%xmm2 731 pand 16(%ecx),%xmm5 732 pxor %xmm5,%xmm2 733 movdqa %xmm2,%xmm0 734 movdqa %xmm0,%xmm1 735 pshufd $78,%xmm0,%xmm3 736 pshufd $78,%xmm2,%xmm4 737 pxor %xmm0,%xmm3 738 pxor %xmm2,%xmm4 739.byte 102,15,58,68,194,0 740.byte 102,15,58,68,202,17 741.byte 102,15,58,68,220,0 742 xorps %xmm0,%xmm3 743 xorps %xmm1,%xmm3 744 movdqa %xmm3,%xmm4 745 psrldq $8,%xmm3 746 pslldq $8,%xmm4 747 pxor %xmm3,%xmm1 748 pxor %xmm4,%xmm0 749 movdqa %xmm0,%xmm4 750 movdqa %xmm0,%xmm3 751 psllq $5,%xmm0 752 pxor %xmm0,%xmm3 753 psllq $1,%xmm0 754 pxor %xmm3,%xmm0 755 psllq $57,%xmm0 756 movdqa %xmm0,%xmm3 757 pslldq $8,%xmm0 758 psrldq $8,%xmm3 759 pxor %xmm4,%xmm0 760 pxor %xmm3,%xmm1 761 movdqa %xmm0,%xmm4 762 psrlq $1,%xmm0 763 pxor %xmm4,%xmm1 764 pxor %xmm0,%xmm4 765 psrlq $5,%xmm0 766 pxor %xmm4,%xmm0 767 psrlq $1,%xmm0 768 pxor %xmm1,%xmm0 769 pshufd $78,%xmm2,%xmm3 770 pshufd $78,%xmm0,%xmm4 771 pxor %xmm2,%xmm3 772 movdqu %xmm2,(%edx) 773 pxor %xmm0,%xmm4 774 movdqu %xmm0,16(%edx) 775.byte 102,15,58,15,227,8 776 movdqu %xmm4,32(%edx) 777 ret 778.size gcm_init_clmul,.-.L_gcm_init_clmul_begin 779.globl gcm_gmult_clmul 780.hidden gcm_gmult_clmul 781.type gcm_gmult_clmul,@function 782.align 16 783gcm_gmult_clmul: 784.L_gcm_gmult_clmul_begin: 785 movl 4(%esp),%eax 786 movl 8(%esp),%edx 787 call .L006pic 788.L006pic: 789 popl %ecx 790 leal .Lbswap-.L006pic(%ecx),%ecx 791 movdqu (%eax),%xmm0 792 movdqa (%ecx),%xmm5 793 movups (%edx),%xmm2 794.byte 102,15,56,0,197 795 movups 32(%edx),%xmm4 796 movdqa %xmm0,%xmm1 797 pshufd $78,%xmm0,%xmm3 798 pxor %xmm0,%xmm3 799.byte 102,15,58,68,194,0 800.byte 102,15,58,68,202,17 801.byte 102,15,58,68,220,0 802 xorps %xmm0,%xmm3 803 xorps %xmm1,%xmm3 804 movdqa %xmm3,%xmm4 805 psrldq $8,%xmm3 806 pslldq $8,%xmm4 807 pxor %xmm3,%xmm1 808 pxor %xmm4,%xmm0 809 movdqa %xmm0,%xmm4 810 movdqa %xmm0,%xmm3 811 psllq $5,%xmm0 812 pxor %xmm0,%xmm3 813 psllq $1,%xmm0 814 pxor %xmm3,%xmm0 815 psllq $57,%xmm0 816 movdqa %xmm0,%xmm3 817 pslldq $8,%xmm0 818 psrldq $8,%xmm3 819 pxor %xmm4,%xmm0 820 pxor %xmm3,%xmm1 821 movdqa %xmm0,%xmm4 822 psrlq $1,%xmm0 823 pxor %xmm4,%xmm1 824 pxor %xmm0,%xmm4 825 psrlq $5,%xmm0 826 pxor %xmm4,%xmm0 827 psrlq $1,%xmm0 828 pxor %xmm1,%xmm0 829.byte 102,15,56,0,197 830 movdqu %xmm0,(%eax) 831 ret 832.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin 833.globl gcm_ghash_clmul 834.hidden gcm_ghash_clmul 835.type gcm_ghash_clmul,@function 836.align 16 837gcm_ghash_clmul: 838.L_gcm_ghash_clmul_begin: 839 pushl %ebp 840 pushl %ebx 841 pushl %esi 842 pushl %edi 843 movl 20(%esp),%eax 844 movl 24(%esp),%edx 845 movl 28(%esp),%esi 846 movl 32(%esp),%ebx 847 call .L007pic 848.L007pic: 849 popl %ecx 850 leal .Lbswap-.L007pic(%ecx),%ecx 851 movdqu (%eax),%xmm0 852 movdqa (%ecx),%xmm5 853 movdqu (%edx),%xmm2 854.byte 102,15,56,0,197 855 subl $16,%ebx 856 jz .L008odd_tail 857 movdqu (%esi),%xmm3 858 movdqu 16(%esi),%xmm6 859.byte 102,15,56,0,221 860.byte 102,15,56,0,245 861 movdqu 32(%edx),%xmm5 862 pxor %xmm3,%xmm0 863 pshufd $78,%xmm6,%xmm3 864 movdqa %xmm6,%xmm7 865 pxor %xmm6,%xmm3 866 leal 32(%esi),%esi 867.byte 102,15,58,68,242,0 868.byte 102,15,58,68,250,17 869.byte 102,15,58,68,221,0 870 movups 16(%edx),%xmm2 871 nop 872 subl $32,%ebx 873 jbe .L009even_tail 874 jmp .L010mod_loop 875.align 32 876.L010mod_loop: 877 pshufd $78,%xmm0,%xmm4 878 movdqa %xmm0,%xmm1 879 pxor %xmm0,%xmm4 880 nop 881.byte 102,15,58,68,194,0 882.byte 102,15,58,68,202,17 883.byte 102,15,58,68,229,16 884 movups (%edx),%xmm2 885 xorps %xmm6,%xmm0 886 movdqa (%ecx),%xmm5 887 xorps %xmm7,%xmm1 888 movdqu (%esi),%xmm7 889 pxor %xmm0,%xmm3 890 movdqu 16(%esi),%xmm6 891 pxor %xmm1,%xmm3 892.byte 102,15,56,0,253 893 pxor %xmm3,%xmm4 894 movdqa %xmm4,%xmm3 895 psrldq $8,%xmm4 896 pslldq $8,%xmm3 897 pxor %xmm4,%xmm1 898 pxor %xmm3,%xmm0 899.byte 102,15,56,0,245 900 pxor %xmm7,%xmm1 901 movdqa %xmm6,%xmm7 902 movdqa %xmm0,%xmm4 903 movdqa %xmm0,%xmm3 904 psllq $5,%xmm0 905 pxor %xmm0,%xmm3 906 psllq $1,%xmm0 907 pxor %xmm3,%xmm0 908.byte 102,15,58,68,242,0 909 movups 32(%edx),%xmm5 910 psllq $57,%xmm0 911 movdqa %xmm0,%xmm3 912 pslldq $8,%xmm0 913 psrldq $8,%xmm3 914 pxor %xmm4,%xmm0 915 pxor %xmm3,%xmm1 916 pshufd $78,%xmm7,%xmm3 917 movdqa %xmm0,%xmm4 918 psrlq $1,%xmm0 919 pxor %xmm7,%xmm3 920 pxor %xmm4,%xmm1 921.byte 102,15,58,68,250,17 922 movups 16(%edx),%xmm2 923 pxor %xmm0,%xmm4 924 psrlq $5,%xmm0 925 pxor %xmm4,%xmm0 926 psrlq $1,%xmm0 927 pxor %xmm1,%xmm0 928.byte 102,15,58,68,221,0 929 leal 32(%esi),%esi 930 subl $32,%ebx 931 ja .L010mod_loop 932.L009even_tail: 933 pshufd $78,%xmm0,%xmm4 934 movdqa %xmm0,%xmm1 935 pxor %xmm0,%xmm4 936.byte 102,15,58,68,194,0 937.byte 102,15,58,68,202,17 938.byte 102,15,58,68,229,16 939 movdqa (%ecx),%xmm5 940 xorps %xmm6,%xmm0 941 xorps %xmm7,%xmm1 942 pxor %xmm0,%xmm3 943 pxor %xmm1,%xmm3 944 pxor %xmm3,%xmm4 945 movdqa %xmm4,%xmm3 946 psrldq $8,%xmm4 947 pslldq $8,%xmm3 948 pxor %xmm4,%xmm1 949 pxor %xmm3,%xmm0 950 movdqa %xmm0,%xmm4 951 movdqa %xmm0,%xmm3 952 psllq $5,%xmm0 953 pxor %xmm0,%xmm3 954 psllq $1,%xmm0 955 pxor %xmm3,%xmm0 956 psllq $57,%xmm0 957 movdqa %xmm0,%xmm3 958 pslldq $8,%xmm0 959 psrldq $8,%xmm3 960 pxor %xmm4,%xmm0 961 pxor %xmm3,%xmm1 962 movdqa %xmm0,%xmm4 963 psrlq $1,%xmm0 964 pxor %xmm4,%xmm1 965 pxor %xmm0,%xmm4 966 psrlq $5,%xmm0 967 pxor %xmm4,%xmm0 968 psrlq $1,%xmm0 969 pxor %xmm1,%xmm0 970 testl %ebx,%ebx 971 jnz .L011done 972 movups (%edx),%xmm2 973.L008odd_tail: 974 movdqu (%esi),%xmm3 975.byte 102,15,56,0,221 976 pxor %xmm3,%xmm0 977 movdqa %xmm0,%xmm1 978 pshufd $78,%xmm0,%xmm3 979 pshufd $78,%xmm2,%xmm4 980 pxor %xmm0,%xmm3 981 pxor %xmm2,%xmm4 982.byte 102,15,58,68,194,0 983.byte 102,15,58,68,202,17 984.byte 102,15,58,68,220,0 985 xorps %xmm0,%xmm3 986 xorps %xmm1,%xmm3 987 movdqa %xmm3,%xmm4 988 psrldq $8,%xmm3 989 pslldq $8,%xmm4 990 pxor %xmm3,%xmm1 991 pxor %xmm4,%xmm0 992 movdqa %xmm0,%xmm4 993 movdqa %xmm0,%xmm3 994 psllq $5,%xmm0 995 pxor %xmm0,%xmm3 996 psllq $1,%xmm0 997 pxor %xmm3,%xmm0 998 psllq $57,%xmm0 999 movdqa %xmm0,%xmm3 1000 pslldq $8,%xmm0 1001 psrldq $8,%xmm3 1002 pxor %xmm4,%xmm0 1003 pxor %xmm3,%xmm1 1004 movdqa %xmm0,%xmm4 1005 psrlq $1,%xmm0 1006 pxor %xmm4,%xmm1 1007 pxor %xmm0,%xmm4 1008 psrlq $5,%xmm0 1009 pxor %xmm4,%xmm0 1010 psrlq $1,%xmm0 1011 pxor %xmm1,%xmm0 1012.L011done: 1013.byte 102,15,56,0,197 1014 movdqu %xmm0,(%eax) 1015 popl %edi 1016 popl %esi 1017 popl %ebx 1018 popl %ebp 1019 ret 1020.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin 1021.align 64 1022.Lbswap: 1023.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1024.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 1025.align 64 1026.Lrem_8bit: 1027.value 0,450,900,582,1800,1738,1164,1358 1028.value 3600,4050,3476,3158,2328,2266,2716,2910 1029.value 7200,7650,8100,7782,6952,6890,6316,6510 1030.value 4656,5106,4532,4214,5432,5370,5820,6014 1031.value 14400,14722,15300,14854,16200,16010,15564,15630 1032.value 13904,14226,13780,13334,12632,12442,13020,13086 1033.value 9312,9634,10212,9766,9064,8874,8428,8494 1034.value 10864,11186,10740,10294,11640,11450,12028,12094 1035.value 28800,28994,29444,29382,30600,30282,29708,30158 1036.value 32400,32594,32020,31958,31128,30810,31260,31710 1037.value 27808,28002,28452,28390,27560,27242,26668,27118 1038.value 25264,25458,24884,24822,26040,25722,26172,26622 1039.value 18624,18690,19268,19078,20424,19978,19532,19854 1040.value 18128,18194,17748,17558,16856,16410,16988,17310 1041.value 21728,21794,22372,22182,21480,21034,20588,20910 1042.value 23280,23346,22900,22710,24056,23610,24188,24510 1043.value 57600,57538,57988,58182,58888,59338,58764,58446 1044.value 61200,61138,60564,60758,59416,59866,60316,59998 1045.value 64800,64738,65188,65382,64040,64490,63916,63598 1046.value 62256,62194,61620,61814,62520,62970,63420,63102 1047.value 55616,55426,56004,56070,56904,57226,56780,56334 1048.value 55120,54930,54484,54550,53336,53658,54236,53790 1049.value 50528,50338,50916,50982,49768,50090,49644,49198 1050.value 52080,51890,51444,51510,52344,52666,53244,52798 1051.value 37248,36930,37380,37830,38536,38730,38156,38094 1052.value 40848,40530,39956,40406,39064,39258,39708,39646 1053.value 36256,35938,36388,36838,35496,35690,35116,35054 1054.value 33712,33394,32820,33270,33976,34170,34620,34558 1055.value 43456,43010,43588,43910,44744,44810,44364,44174 1056.value 42960,42514,42068,42390,41176,41242,41820,41630 1057.value 46560,46114,46692,47014,45800,45866,45420,45230 1058.value 48112,47666,47220,47542,48376,48442,49020,48830 1059.align 64 1060.Lrem_4bit: 1061.long 0,0,0,471859200,0,943718400,0,610271232 1062.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1063.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1064.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1065.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 1066.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 1067.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 1068.byte 0 1069#endif 1070