1/* 2Copyright (c) 2010, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#ifndef L 32# define L(label) .L##label 33#endif 34 35#ifndef cfi_startproc 36# define cfi_startproc .cfi_startproc 37#endif 38 39#ifndef cfi_endproc 40# define cfi_endproc .cfi_endproc 41#endif 42 43#ifndef cfi_rel_offset 44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 45#endif 46 47#ifndef cfi_restore 48# define cfi_restore(reg) .cfi_restore (reg) 49#endif 50 51#ifndef cfi_adjust_cfa_offset 52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 53#endif 54 55#ifndef ENTRY 56# define ENTRY(name) \ 57 .type name, @function; \ 58 .globl name; \ 59 .p2align 4; \ 60name: \ 61 cfi_startproc 62#endif 63 64#ifndef END 65# define END(name) \ 66 cfi_endproc; \ 67 .size name, .-name 68#endif 69 70#define CFI_PUSH(REG) \ 71 cfi_adjust_cfa_offset (4); \ 72 cfi_rel_offset (REG, 0) 73 74#define CFI_POP(REG) \ 75 cfi_adjust_cfa_offset (-4); \ 76 cfi_restore (REG) 77 78#define PUSH(REG) pushl REG; CFI_PUSH (REG) 79#define POP(REG) popl REG; CFI_POP (REG) 80 81#ifndef USE_AS_STRNCMP 82# define STR1 4 83# define STR2 STR1+4 84# define RETURN ret 85 86# define UPDATE_STRNCMP_COUNTER 87#else 88# define STR1 8 89# define STR2 STR1+4 90# define CNT STR2+4 91# define RETURN POP (%ebp); ret; CFI_PUSH (%ebp) 92 93# define UPDATE_STRNCMP_COUNTER \ 94 /* calculate left number to compare */ \ 95 mov $16, %esi; \ 96 sub %ecx, %esi; \ 97 cmp %esi, %ebp; \ 98 jbe L(more8byteseq); \ 99 sub %esi, %ebp 100#endif 101 102 .section .text.ssse3,"ax",@progbits 103ENTRY (ssse3_strcmp_latest) 104#ifdef USE_AS_STRNCMP 105 PUSH (%ebp) 106#endif 107 movl STR1(%esp), %edx 108 movl STR2(%esp), %eax 109#ifdef USE_AS_STRNCMP 110 movl CNT(%esp), %ebp 111 cmp $16, %ebp 112 jb L(less16bytes_sncmp) 113 jmp L(more16bytes) 114#endif 115 116 movzbl (%eax), %ecx 117 cmpb %cl, (%edx) 118 jne L(neq) 119 cmpl $0, %ecx 120 je L(eq) 121 122 movzbl 1(%eax), %ecx 123 cmpb %cl, 1(%edx) 124 jne L(neq) 125 cmpl $0, %ecx 126 je L(eq) 127 128 movzbl 2(%eax), %ecx 129 cmpb %cl, 2(%edx) 130 jne L(neq) 131 cmpl $0, %ecx 132 je L(eq) 133 134 movzbl 3(%eax), %ecx 135 cmpb %cl, 3(%edx) 136 jne L(neq) 137 cmpl $0, %ecx 138 je L(eq) 139 140 movzbl 4(%eax), %ecx 141 cmpb %cl, 4(%edx) 142 jne L(neq) 143 cmpl $0, %ecx 144 je L(eq) 145 146 movzbl 5(%eax), %ecx 147 cmpb %cl, 5(%edx) 148 jne L(neq) 149 cmpl $0, %ecx 150 je L(eq) 151 152 movzbl 6(%eax), %ecx 153 cmpb %cl, 6(%edx) 154 jne L(neq) 155 cmpl $0, %ecx 156 je L(eq) 157 158 movzbl 7(%eax), %ecx 159 cmpb %cl, 7(%edx) 160 jne L(neq) 161 cmpl $0, %ecx 162 je L(eq) 163 164 add $8, %edx 165 add $8, %eax 166#ifdef USE_AS_STRNCMP 167 cmp $8, %ebp 168 lea -8(%ebp), %ebp 169 je L(eq) 170L(more16bytes): 171#endif 172 movl %edx, %ecx 173 and $0xfff, %ecx 174 cmp $0xff0, %ecx 175 ja L(crosspage) 176 mov %eax, %ecx 177 and $0xfff, %ecx 178 cmp $0xff0, %ecx 179 ja L(crosspage) 180 pxor %xmm0, %xmm0 181 movlpd (%eax), %xmm1 182 movlpd (%edx), %xmm2 183 movhpd 8(%eax), %xmm1 184 movhpd 8(%edx), %xmm2 185 pcmpeqb %xmm1, %xmm0 186 pcmpeqb %xmm2, %xmm1 187 psubb %xmm0, %xmm1 188 pmovmskb %xmm1, %ecx 189 sub $0xffff, %ecx 190 jnz L(less16bytes) 191#ifdef USE_AS_STRNCMP 192 cmp $16, %ebp 193 lea -16(%ebp), %ebp 194 jbe L(eq) 195#endif 196 add $16, %eax 197 add $16, %edx 198 199L(crosspage): 200 201 PUSH (%ebx) 202 PUSH (%edi) 203 PUSH (%esi) 204 205 movl %edx, %edi 206 movl %eax, %ecx 207 and $0xf, %ecx 208 and $0xf, %edi 209 xor %ecx, %eax 210 xor %edi, %edx 211 xor %ebx, %ebx 212 cmp %edi, %ecx 213 je L(ashr_0) 214 ja L(bigger) 215 or $0x20, %ebx 216 xchg %edx, %eax 217 xchg %ecx, %edi 218L(bigger): 219 lea 15(%edi), %edi 220 sub %ecx, %edi 221 cmp $8, %edi 222 jle L(ashr_less_8) 223 cmp $14, %edi 224 je L(ashr_15) 225 cmp $13, %edi 226 je L(ashr_14) 227 cmp $12, %edi 228 je L(ashr_13) 229 cmp $11, %edi 230 je L(ashr_12) 231 cmp $10, %edi 232 je L(ashr_11) 233 cmp $9, %edi 234 je L(ashr_10) 235L(ashr_less_8): 236 je L(ashr_9) 237 cmp $7, %edi 238 je L(ashr_8) 239 cmp $6, %edi 240 je L(ashr_7) 241 cmp $5, %edi 242 je L(ashr_6) 243 cmp $4, %edi 244 je L(ashr_5) 245 cmp $3, %edi 246 je L(ashr_4) 247 cmp $2, %edi 248 je L(ashr_3) 249 cmp $1, %edi 250 je L(ashr_2) 251 cmp $0, %edi 252 je L(ashr_1) 253 254/* 255 * The following cases will be handled by ashr_0 256 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 257 * n(0~15) n(0~15) 15(15+ n-n) ashr_0 258 */ 259 .p2align 4 260L(ashr_0): 261 mov $0xffff, %esi 262 movdqa (%eax), %xmm1 263 pxor %xmm0, %xmm0 264 pcmpeqb %xmm1, %xmm0 265 pcmpeqb (%edx), %xmm1 266 psubb %xmm0, %xmm1 267 pmovmskb %xmm1, %edi 268 shr %cl, %esi 269 shr %cl, %edi 270 sub %edi, %esi 271 mov %ecx, %edi 272 jne L(less32bytes) 273 UPDATE_STRNCMP_COUNTER 274 mov $0x10, %ebx 275 mov $0x10, %ecx 276 pxor %xmm0, %xmm0 277 .p2align 4 278L(loop_ashr_0): 279 movdqa (%eax, %ecx), %xmm1 280 movdqa (%edx, %ecx), %xmm2 281 282 pcmpeqb %xmm1, %xmm0 283 pcmpeqb %xmm2, %xmm1 284 psubb %xmm0, %xmm1 285 pmovmskb %xmm1, %esi 286 sub $0xffff, %esi 287 jnz L(exit) 288#ifdef USE_AS_STRNCMP 289 cmp $16, %ebp 290 lea -16(%ebp), %ebp 291 jbe L(more8byteseq) 292#endif 293 add $16, %ecx 294 jmp L(loop_ashr_0) 295 296/* 297 * The following cases will be handled by ashr_1 298 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 299 * n(15) n -15 0(15 +(n-15) - n) ashr_1 300 */ 301 .p2align 4 302L(ashr_1): 303 mov $0xffff, %esi 304 pxor %xmm0, %xmm0 305 movdqa (%edx), %xmm2 306 movdqa (%eax), %xmm1 307 pcmpeqb %xmm1, %xmm0 308 pslldq $15, %xmm2 309 pcmpeqb %xmm1, %xmm2 310 psubb %xmm0, %xmm2 311 pmovmskb %xmm2, %edi 312 shr %cl, %esi 313 shr %cl, %edi 314 sub %edi, %esi 315 lea -15(%ecx), %edi 316 jnz L(less32bytes) 317 318 UPDATE_STRNCMP_COUNTER 319 320 movdqa (%edx), %xmm3 321 pxor %xmm0, %xmm0 322 mov $16, %ecx 323 or $1, %ebx 324 lea 1(%edx), %edi 325 and $0xfff, %edi 326 sub $0x1000, %edi 327 328 .p2align 4 329L(loop_ashr_1): 330 add $16, %edi 331 jg L(nibble_ashr_1) 332 333L(gobble_ashr_1): 334 movdqa (%eax, %ecx), %xmm1 335 movdqa (%edx, %ecx), %xmm2 336 movdqa %xmm2, %xmm4 337 338 palignr $1, %xmm3, %xmm2 339 340 pcmpeqb %xmm1, %xmm0 341 pcmpeqb %xmm2, %xmm1 342 psubb %xmm0, %xmm1 343 pmovmskb %xmm1, %esi 344 sub $0xffff, %esi 345 jnz L(exit) 346#ifdef USE_AS_STRNCMP 347 cmp $16, %ebp 348 lea -16(%ebp), %ebp 349 jbe L(more8byteseq) 350#endif 351 352 add $16, %ecx 353 movdqa %xmm4, %xmm3 354 355 add $16, %edi 356 jg L(nibble_ashr_1) 357 358 movdqa (%eax, %ecx), %xmm1 359 movdqa (%edx, %ecx), %xmm2 360 movdqa %xmm2, %xmm4 361 362 palignr $1, %xmm3, %xmm2 363 364 pcmpeqb %xmm1, %xmm0 365 pcmpeqb %xmm2, %xmm1 366 psubb %xmm0, %xmm1 367 pmovmskb %xmm1, %esi 368 sub $0xffff, %esi 369 jnz L(exit) 370 371#ifdef USE_AS_STRNCMP 372 cmp $16, %ebp 373 lea -16(%ebp), %ebp 374 jbe L(more8byteseq) 375#endif 376 add $16, %ecx 377 movdqa %xmm4, %xmm3 378 jmp L(loop_ashr_1) 379 380 .p2align 4 381L(nibble_ashr_1): 382 pcmpeqb %xmm3, %xmm0 383 pmovmskb %xmm0, %esi 384 test $0xfffe, %esi 385 jnz L(ashr_1_exittail) 386 387#ifdef USE_AS_STRNCMP 388 cmp $15, %ebp 389 jbe L(ashr_1_exittail) 390#endif 391 pxor %xmm0, %xmm0 392 sub $0x1000, %edi 393 jmp L(gobble_ashr_1) 394 395 .p2align 4 396L(ashr_1_exittail): 397 movdqa (%eax, %ecx), %xmm1 398 psrldq $1, %xmm0 399 psrldq $1, %xmm3 400 jmp L(aftertail) 401 402/* 403 * The following cases will be handled by ashr_2 404 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 405 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2 406 */ 407 .p2align 4 408L(ashr_2): 409 mov $0xffff, %esi 410 pxor %xmm0, %xmm0 411 movdqa (%edx), %xmm2 412 movdqa (%eax), %xmm1 413 pcmpeqb %xmm1, %xmm0 414 pslldq $14, %xmm2 415 pcmpeqb %xmm1, %xmm2 416 psubb %xmm0, %xmm2 417 pmovmskb %xmm2, %edi 418 shr %cl, %esi 419 shr %cl, %edi 420 sub %edi, %esi 421 lea -14(%ecx), %edi 422 jnz L(less32bytes) 423 424 UPDATE_STRNCMP_COUNTER 425 426 movdqa (%edx), %xmm3 427 pxor %xmm0, %xmm0 428 mov $16, %ecx 429 or $2, %ebx 430 lea 2(%edx), %edi 431 and $0xfff, %edi 432 sub $0x1000, %edi 433 434 .p2align 4 435L(loop_ashr_2): 436 add $16, %edi 437 jg L(nibble_ashr_2) 438 439L(gobble_ashr_2): 440 movdqa (%eax, %ecx), %xmm1 441 movdqa (%edx, %ecx), %xmm2 442 movdqa %xmm2, %xmm4 443 444 palignr $2, %xmm3, %xmm2 445 446 pcmpeqb %xmm1, %xmm0 447 pcmpeqb %xmm2, %xmm1 448 psubb %xmm0, %xmm1 449 pmovmskb %xmm1, %esi 450 sub $0xffff, %esi 451 jnz L(exit) 452 453#ifdef USE_AS_STRNCMP 454 cmp $16, %ebp 455 lea -16(%ebp), %ebp 456 jbe L(more8byteseq) 457#endif 458 add $16, %ecx 459 movdqa %xmm4, %xmm3 460 461 add $16, %edi 462 jg L(nibble_ashr_2) 463 464 movdqa (%eax, %ecx), %xmm1 465 movdqa (%edx, %ecx), %xmm2 466 movdqa %xmm2, %xmm4 467 468 palignr $2, %xmm3, %xmm2 469 470 pcmpeqb %xmm1, %xmm0 471 pcmpeqb %xmm2, %xmm1 472 psubb %xmm0, %xmm1 473 pmovmskb %xmm1, %esi 474 sub $0xffff, %esi 475 jnz L(exit) 476 477#ifdef USE_AS_STRNCMP 478 cmp $16, %ebp 479 lea -16(%ebp), %ebp 480 jbe L(more8byteseq) 481#endif 482 add $16, %ecx 483 movdqa %xmm4, %xmm3 484 jmp L(loop_ashr_2) 485 486 .p2align 4 487L(nibble_ashr_2): 488 pcmpeqb %xmm3, %xmm0 489 pmovmskb %xmm0, %esi 490 test $0xfffc, %esi 491 jnz L(ashr_2_exittail) 492 493#ifdef USE_AS_STRNCMP 494 cmp $14, %ebp 495 jbe L(ashr_2_exittail) 496#endif 497 498 pxor %xmm0, %xmm0 499 sub $0x1000, %edi 500 jmp L(gobble_ashr_2) 501 502 .p2align 4 503L(ashr_2_exittail): 504 movdqa (%eax, %ecx), %xmm1 505 psrldq $2, %xmm0 506 psrldq $2, %xmm3 507 jmp L(aftertail) 508 509/* 510 * The following cases will be handled by ashr_3 511 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 512 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3 513 */ 514 .p2align 4 515L(ashr_3): 516 mov $0xffff, %esi 517 pxor %xmm0, %xmm0 518 movdqa (%edx), %xmm2 519 movdqa (%eax), %xmm1 520 pcmpeqb %xmm1, %xmm0 521 pslldq $13, %xmm2 522 pcmpeqb %xmm1, %xmm2 523 psubb %xmm0, %xmm2 524 pmovmskb %xmm2, %edi 525 shr %cl, %esi 526 shr %cl, %edi 527 sub %edi, %esi 528 lea -13(%ecx), %edi 529 jnz L(less32bytes) 530 531 UPDATE_STRNCMP_COUNTER 532 533 movdqa (%edx), %xmm3 534 pxor %xmm0, %xmm0 535 mov $16, %ecx 536 or $3, %ebx 537 lea 3(%edx), %edi 538 and $0xfff, %edi 539 sub $0x1000, %edi 540 541 .p2align 4 542L(loop_ashr_3): 543 add $16, %edi 544 jg L(nibble_ashr_3) 545 546L(gobble_ashr_3): 547 movdqa (%eax, %ecx), %xmm1 548 movdqa (%edx, %ecx), %xmm2 549 movdqa %xmm2, %xmm4 550 551 palignr $3, %xmm3, %xmm2 552 553 pcmpeqb %xmm1, %xmm0 554 pcmpeqb %xmm2, %xmm1 555 psubb %xmm0, %xmm1 556 pmovmskb %xmm1, %esi 557 sub $0xffff, %esi 558 jnz L(exit) 559 560#ifdef USE_AS_STRNCMP 561 cmp $16, %ebp 562 lea -16(%ebp), %ebp 563 jbe L(more8byteseq) 564#endif 565 add $16, %ecx 566 movdqa %xmm4, %xmm3 567 568 add $16, %edi 569 jg L(nibble_ashr_3) 570 571 movdqa (%eax, %ecx), %xmm1 572 movdqa (%edx, %ecx), %xmm2 573 movdqa %xmm2, %xmm4 574 575 palignr $3, %xmm3, %xmm2 576 577 pcmpeqb %xmm1, %xmm0 578 pcmpeqb %xmm2, %xmm1 579 psubb %xmm0, %xmm1 580 pmovmskb %xmm1, %esi 581 sub $0xffff, %esi 582 jnz L(exit) 583 584#ifdef USE_AS_STRNCMP 585 cmp $16, %ebp 586 lea -16(%ebp), %ebp 587 jbe L(more8byteseq) 588#endif 589 add $16, %ecx 590 movdqa %xmm4, %xmm3 591 jmp L(loop_ashr_3) 592 593 .p2align 4 594L(nibble_ashr_3): 595 pcmpeqb %xmm3, %xmm0 596 pmovmskb %xmm0, %esi 597 test $0xfff8, %esi 598 jnz L(ashr_3_exittail) 599 600#ifdef USE_AS_STRNCMP 601 cmp $13, %ebp 602 jbe L(ashr_3_exittail) 603#endif 604 pxor %xmm0, %xmm0 605 sub $0x1000, %edi 606 jmp L(gobble_ashr_3) 607 608 .p2align 4 609L(ashr_3_exittail): 610 movdqa (%eax, %ecx), %xmm1 611 psrldq $3, %xmm0 612 psrldq $3, %xmm3 613 jmp L(aftertail) 614 615/* 616 * The following cases will be handled by ashr_4 617 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 618 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4 619 */ 620 .p2align 4 621L(ashr_4): 622 mov $0xffff, %esi 623 pxor %xmm0, %xmm0 624 movdqa (%edx), %xmm2 625 movdqa (%eax), %xmm1 626 pcmpeqb %xmm1, %xmm0 627 pslldq $12, %xmm2 628 pcmpeqb %xmm1, %xmm2 629 psubb %xmm0, %xmm2 630 pmovmskb %xmm2, %edi 631 shr %cl, %esi 632 shr %cl, %edi 633 sub %edi, %esi 634 lea -12(%ecx), %edi 635 jnz L(less32bytes) 636 637 UPDATE_STRNCMP_COUNTER 638 639 movdqa (%edx), %xmm3 640 pxor %xmm0, %xmm0 641 mov $16, %ecx 642 or $4, %ebx 643 lea 4(%edx), %edi 644 and $0xfff, %edi 645 sub $0x1000, %edi 646 647 .p2align 4 648L(loop_ashr_4): 649 add $16, %edi 650 jg L(nibble_ashr_4) 651 652L(gobble_ashr_4): 653 movdqa (%eax, %ecx), %xmm1 654 movdqa (%edx, %ecx), %xmm2 655 movdqa %xmm2, %xmm4 656 657 palignr $4, %xmm3, %xmm2 658 659 pcmpeqb %xmm1, %xmm0 660 pcmpeqb %xmm2, %xmm1 661 psubb %xmm0, %xmm1 662 pmovmskb %xmm1, %esi 663 sub $0xffff, %esi 664 jnz L(exit) 665 666#ifdef USE_AS_STRNCMP 667 cmp $16, %ebp 668 lea -16(%ebp), %ebp 669 jbe L(more8byteseq) 670#endif 671 672 add $16, %ecx 673 movdqa %xmm4, %xmm3 674 675 add $16, %edi 676 jg L(nibble_ashr_4) 677 678 movdqa (%eax, %ecx), %xmm1 679 movdqa (%edx, %ecx), %xmm2 680 movdqa %xmm2, %xmm4 681 682 palignr $4, %xmm3, %xmm2 683 684 pcmpeqb %xmm1, %xmm0 685 pcmpeqb %xmm2, %xmm1 686 psubb %xmm0, %xmm1 687 pmovmskb %xmm1, %esi 688 sub $0xffff, %esi 689 jnz L(exit) 690 691#ifdef USE_AS_STRNCMP 692 cmp $16, %ebp 693 lea -16(%ebp), %ebp 694 jbe L(more8byteseq) 695#endif 696 697 add $16, %ecx 698 movdqa %xmm4, %xmm3 699 jmp L(loop_ashr_4) 700 701 .p2align 4 702L(nibble_ashr_4): 703 pcmpeqb %xmm3, %xmm0 704 pmovmskb %xmm0, %esi 705 test $0xfff0, %esi 706 jnz L(ashr_4_exittail) 707 708#ifdef USE_AS_STRNCMP 709 cmp $12, %ebp 710 jbe L(ashr_4_exittail) 711#endif 712 713 pxor %xmm0, %xmm0 714 sub $0x1000, %edi 715 jmp L(gobble_ashr_4) 716 717 .p2align 4 718L(ashr_4_exittail): 719 movdqa (%eax, %ecx), %xmm1 720 psrldq $4, %xmm0 721 psrldq $4, %xmm3 722 jmp L(aftertail) 723 724/* 725 * The following cases will be handled by ashr_5 726 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 727 * n(11~15) n -11 4(15 +(n-11) - n) ashr_5 728 */ 729 .p2align 4 730L(ashr_5): 731 mov $0xffff, %esi 732 pxor %xmm0, %xmm0 733 movdqa (%edx), %xmm2 734 movdqa (%eax), %xmm1 735 pcmpeqb %xmm1, %xmm0 736 pslldq $11, %xmm2 737 pcmpeqb %xmm1, %xmm2 738 psubb %xmm0, %xmm2 739 pmovmskb %xmm2, %edi 740 shr %cl, %esi 741 shr %cl, %edi 742 sub %edi, %esi 743 lea -11(%ecx), %edi 744 jnz L(less32bytes) 745 746 UPDATE_STRNCMP_COUNTER 747 748 movdqa (%edx), %xmm3 749 pxor %xmm0, %xmm0 750 mov $16, %ecx 751 or $5, %ebx 752 lea 5(%edx), %edi 753 and $0xfff, %edi 754 sub $0x1000, %edi 755 756 .p2align 4 757L(loop_ashr_5): 758 add $16, %edi 759 jg L(nibble_ashr_5) 760 761L(gobble_ashr_5): 762 movdqa (%eax, %ecx), %xmm1 763 movdqa (%edx, %ecx), %xmm2 764 movdqa %xmm2, %xmm4 765 766 palignr $5, %xmm3, %xmm2 767 768 pcmpeqb %xmm1, %xmm0 769 pcmpeqb %xmm2, %xmm1 770 psubb %xmm0, %xmm1 771 pmovmskb %xmm1, %esi 772 sub $0xffff, %esi 773 jnz L(exit) 774 775#ifdef USE_AS_STRNCMP 776 cmp $16, %ebp 777 lea -16(%ebp), %ebp 778 jbe L(more8byteseq) 779#endif 780 add $16, %ecx 781 movdqa %xmm4, %xmm3 782 783 add $16, %edi 784 jg L(nibble_ashr_5) 785 786 movdqa (%eax, %ecx), %xmm1 787 movdqa (%edx, %ecx), %xmm2 788 movdqa %xmm2, %xmm4 789 790 palignr $5, %xmm3, %xmm2 791 792 pcmpeqb %xmm1, %xmm0 793 pcmpeqb %xmm2, %xmm1 794 psubb %xmm0, %xmm1 795 pmovmskb %xmm1, %esi 796 sub $0xffff, %esi 797 jnz L(exit) 798 799#ifdef USE_AS_STRNCMP 800 cmp $16, %ebp 801 lea -16(%ebp), %ebp 802 jbe L(more8byteseq) 803#endif 804 add $16, %ecx 805 movdqa %xmm4, %xmm3 806 jmp L(loop_ashr_5) 807 808 .p2align 4 809L(nibble_ashr_5): 810 pcmpeqb %xmm3, %xmm0 811 pmovmskb %xmm0, %esi 812 test $0xffe0, %esi 813 jnz L(ashr_5_exittail) 814 815#ifdef USE_AS_STRNCMP 816 cmp $11, %ebp 817 jbe L(ashr_5_exittail) 818#endif 819 pxor %xmm0, %xmm0 820 sub $0x1000, %edi 821 jmp L(gobble_ashr_5) 822 823 .p2align 4 824L(ashr_5_exittail): 825 movdqa (%eax, %ecx), %xmm1 826 psrldq $5, %xmm0 827 psrldq $5, %xmm3 828 jmp L(aftertail) 829 830/* 831 * The following cases will be handled by ashr_6 832 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 833 * n(10~15) n -10 5(15 +(n-10) - n) ashr_6 834 */ 835 836 .p2align 4 837L(ashr_6): 838 mov $0xffff, %esi 839 pxor %xmm0, %xmm0 840 movdqa (%edx), %xmm2 841 movdqa (%eax), %xmm1 842 pcmpeqb %xmm1, %xmm0 843 pslldq $10, %xmm2 844 pcmpeqb %xmm1, %xmm2 845 psubb %xmm0, %xmm2 846 pmovmskb %xmm2, %edi 847 shr %cl, %esi 848 shr %cl, %edi 849 sub %edi, %esi 850 lea -10(%ecx), %edi 851 jnz L(less32bytes) 852 853 UPDATE_STRNCMP_COUNTER 854 855 movdqa (%edx), %xmm3 856 pxor %xmm0, %xmm0 857 mov $16, %ecx 858 or $6, %ebx 859 lea 6(%edx), %edi 860 and $0xfff, %edi 861 sub $0x1000, %edi 862 863 .p2align 4 864L(loop_ashr_6): 865 add $16, %edi 866 jg L(nibble_ashr_6) 867 868L(gobble_ashr_6): 869 movdqa (%eax, %ecx), %xmm1 870 movdqa (%edx, %ecx), %xmm2 871 movdqa %xmm2, %xmm4 872 873 palignr $6, %xmm3, %xmm2 874 875 pcmpeqb %xmm1, %xmm0 876 pcmpeqb %xmm2, %xmm1 877 psubb %xmm0, %xmm1 878 pmovmskb %xmm1, %esi 879 sub $0xffff, %esi 880 jnz L(exit) 881 882#ifdef USE_AS_STRNCMP 883 cmp $16, %ebp 884 lea -16(%ebp), %ebp 885 jbe L(more8byteseq) 886#endif 887 888 add $16, %ecx 889 movdqa %xmm4, %xmm3 890 891 add $16, %edi 892 jg L(nibble_ashr_6) 893 894 movdqa (%eax, %ecx), %xmm1 895 movdqa (%edx, %ecx), %xmm2 896 movdqa %xmm2, %xmm4 897 898 palignr $6, %xmm3, %xmm2 899 900 pcmpeqb %xmm1, %xmm0 901 pcmpeqb %xmm2, %xmm1 902 psubb %xmm0, %xmm1 903 pmovmskb %xmm1, %esi 904 sub $0xffff, %esi 905 jnz L(exit) 906#ifdef USE_AS_STRNCMP 907 cmp $16, %ebp 908 lea -16(%ebp), %ebp 909 jbe L(more8byteseq) 910#endif 911 912 add $16, %ecx 913 movdqa %xmm4, %xmm3 914 jmp L(loop_ashr_6) 915 916 .p2align 4 917L(nibble_ashr_6): 918 pcmpeqb %xmm3, %xmm0 919 pmovmskb %xmm0, %esi 920 test $0xffc0, %esi 921 jnz L(ashr_6_exittail) 922 923#ifdef USE_AS_STRNCMP 924 cmp $10, %ebp 925 jbe L(ashr_6_exittail) 926#endif 927 pxor %xmm0, %xmm0 928 sub $0x1000, %edi 929 jmp L(gobble_ashr_6) 930 931 .p2align 4 932L(ashr_6_exittail): 933 movdqa (%eax, %ecx), %xmm1 934 psrldq $6, %xmm0 935 psrldq $6, %xmm3 936 jmp L(aftertail) 937 938/* 939 * The following cases will be handled by ashr_7 940 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 941 * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7 942 */ 943 944 .p2align 4 945L(ashr_7): 946 mov $0xffff, %esi 947 pxor %xmm0, %xmm0 948 movdqa (%edx), %xmm2 949 movdqa (%eax), %xmm1 950 pcmpeqb %xmm1, %xmm0 951 pslldq $9, %xmm2 952 pcmpeqb %xmm1, %xmm2 953 psubb %xmm0, %xmm2 954 pmovmskb %xmm2, %edi 955 shr %cl, %esi 956 shr %cl, %edi 957 sub %edi, %esi 958 lea -9(%ecx), %edi 959 jnz L(less32bytes) 960 961 UPDATE_STRNCMP_COUNTER 962 963 movdqa (%edx), %xmm3 964 pxor %xmm0, %xmm0 965 mov $16, %ecx 966 or $7, %ebx 967 lea 8(%edx), %edi 968 and $0xfff, %edi 969 sub $0x1000, %edi 970 971 .p2align 4 972L(loop_ashr_7): 973 add $16, %edi 974 jg L(nibble_ashr_7) 975 976L(gobble_ashr_7): 977 movdqa (%eax, %ecx), %xmm1 978 movdqa (%edx, %ecx), %xmm2 979 movdqa %xmm2, %xmm4 980 981 palignr $7, %xmm3, %xmm2 982 983 pcmpeqb %xmm1, %xmm0 984 pcmpeqb %xmm2, %xmm1 985 psubb %xmm0, %xmm1 986 pmovmskb %xmm1, %esi 987 sub $0xffff, %esi 988 jnz L(exit) 989 990#ifdef USE_AS_STRNCMP 991 cmp $16, %ebp 992 lea -16(%ebp), %ebp 993 jbe L(more8byteseq) 994#endif 995 996 add $16, %ecx 997 movdqa %xmm4, %xmm3 998 999 add $16, %edi 1000 jg L(nibble_ashr_7) 1001 1002 movdqa (%eax, %ecx), %xmm1 1003 movdqa (%edx, %ecx), %xmm2 1004 movdqa %xmm2, %xmm4 1005 1006 palignr $7, %xmm3, %xmm2 1007 1008 pcmpeqb %xmm1, %xmm0 1009 pcmpeqb %xmm2, %xmm1 1010 psubb %xmm0, %xmm1 1011 pmovmskb %xmm1, %esi 1012 sub $0xffff, %esi 1013 jnz L(exit) 1014 1015#ifdef USE_AS_STRNCMP 1016 cmp $16, %ebp 1017 lea -16(%ebp), %ebp 1018 jbe L(more8byteseq) 1019#endif 1020 1021 add $16, %ecx 1022 movdqa %xmm4, %xmm3 1023 jmp L(loop_ashr_7) 1024 1025 .p2align 4 1026L(nibble_ashr_7): 1027 pcmpeqb %xmm3, %xmm0 1028 pmovmskb %xmm0, %esi 1029 test $0xff80, %esi 1030 jnz L(ashr_7_exittail) 1031 1032#ifdef USE_AS_STRNCMP 1033 cmp $9, %ebp 1034 jbe L(ashr_7_exittail) 1035#endif 1036 pxor %xmm0, %xmm0 1037 pxor %xmm0, %xmm0 1038 sub $0x1000, %edi 1039 jmp L(gobble_ashr_7) 1040 1041 .p2align 4 1042L(ashr_7_exittail): 1043 movdqa (%eax, %ecx), %xmm1 1044 psrldq $7, %xmm0 1045 psrldq $7, %xmm3 1046 jmp L(aftertail) 1047 1048/* 1049 * The following cases will be handled by ashr_8 1050 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1051 * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8 1052 */ 1053 .p2align 4 1054L(ashr_8): 1055 mov $0xffff, %esi 1056 pxor %xmm0, %xmm0 1057 movdqa (%edx), %xmm2 1058 movdqa (%eax), %xmm1 1059 pcmpeqb %xmm1, %xmm0 1060 pslldq $8, %xmm2 1061 pcmpeqb %xmm1, %xmm2 1062 psubb %xmm0, %xmm2 1063 pmovmskb %xmm2, %edi 1064 shr %cl, %esi 1065 shr %cl, %edi 1066 sub %edi, %esi 1067 lea -8(%ecx), %edi 1068 jnz L(less32bytes) 1069 1070 UPDATE_STRNCMP_COUNTER 1071 1072 movdqa (%edx), %xmm3 1073 pxor %xmm0, %xmm0 1074 mov $16, %ecx 1075 or $8, %ebx 1076 lea 8(%edx), %edi 1077 and $0xfff, %edi 1078 sub $0x1000, %edi 1079 1080 .p2align 4 1081L(loop_ashr_8): 1082 add $16, %edi 1083 jg L(nibble_ashr_8) 1084 1085L(gobble_ashr_8): 1086 movdqa (%eax, %ecx), %xmm1 1087 movdqa (%edx, %ecx), %xmm2 1088 movdqa %xmm2, %xmm4 1089 1090 palignr $8, %xmm3, %xmm2 1091 1092 pcmpeqb %xmm1, %xmm0 1093 pcmpeqb %xmm2, %xmm1 1094 psubb %xmm0, %xmm1 1095 pmovmskb %xmm1, %esi 1096 sub $0xffff, %esi 1097 jnz L(exit) 1098 1099#ifdef USE_AS_STRNCMP 1100 cmp $16, %ebp 1101 lea -16(%ebp), %ebp 1102 jbe L(more8byteseq) 1103#endif 1104 add $16, %ecx 1105 movdqa %xmm4, %xmm3 1106 1107 add $16, %edi 1108 jg L(nibble_ashr_8) 1109 1110 movdqa (%eax, %ecx), %xmm1 1111 movdqa (%edx, %ecx), %xmm2 1112 movdqa %xmm2, %xmm4 1113 1114 palignr $8, %xmm3, %xmm2 1115 1116 pcmpeqb %xmm1, %xmm0 1117 pcmpeqb %xmm2, %xmm1 1118 psubb %xmm0, %xmm1 1119 pmovmskb %xmm1, %esi 1120 sub $0xffff, %esi 1121 jnz L(exit) 1122 1123#ifdef USE_AS_STRNCMP 1124 cmp $16, %ebp 1125 lea -16(%ebp), %ebp 1126 jbe L(more8byteseq) 1127#endif 1128 add $16, %ecx 1129 movdqa %xmm4, %xmm3 1130 jmp L(loop_ashr_8) 1131 1132 .p2align 4 1133L(nibble_ashr_8): 1134 pcmpeqb %xmm3, %xmm0 1135 pmovmskb %xmm0, %esi 1136 test $0xff00, %esi 1137 jnz L(ashr_8_exittail) 1138 1139#ifdef USE_AS_STRNCMP 1140 cmp $8, %ebp 1141 jbe L(ashr_8_exittail) 1142#endif 1143 pxor %xmm0, %xmm0 1144 pxor %xmm0, %xmm0 1145 sub $0x1000, %edi 1146 jmp L(gobble_ashr_8) 1147 1148 .p2align 4 1149L(ashr_8_exittail): 1150 movdqa (%eax, %ecx), %xmm1 1151 psrldq $8, %xmm0 1152 psrldq $8, %xmm3 1153 jmp L(aftertail) 1154 1155/* 1156 * The following cases will be handled by ashr_9 1157 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1158 * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9 1159 */ 1160 .p2align 4 1161L(ashr_9): 1162 mov $0xffff, %esi 1163 pxor %xmm0, %xmm0 1164 movdqa (%edx), %xmm2 1165 movdqa (%eax), %xmm1 1166 pcmpeqb %xmm1, %xmm0 1167 pslldq $7, %xmm2 1168 pcmpeqb %xmm1, %xmm2 1169 psubb %xmm0, %xmm2 1170 pmovmskb %xmm2, %edi 1171 shr %cl, %esi 1172 shr %cl, %edi 1173 sub %edi, %esi 1174 lea -7(%ecx), %edi 1175 jnz L(less32bytes) 1176 1177 UPDATE_STRNCMP_COUNTER 1178 1179 movdqa (%edx), %xmm3 1180 pxor %xmm0, %xmm0 1181 mov $16, %ecx 1182 or $9, %ebx 1183 lea 9(%edx), %edi 1184 and $0xfff, %edi 1185 sub $0x1000, %edi 1186 1187 .p2align 4 1188L(loop_ashr_9): 1189 add $16, %edi 1190 jg L(nibble_ashr_9) 1191 1192L(gobble_ashr_9): 1193 movdqa (%eax, %ecx), %xmm1 1194 movdqa (%edx, %ecx), %xmm2 1195 movdqa %xmm2, %xmm4 1196 1197 palignr $9, %xmm3, %xmm2 1198 1199 pcmpeqb %xmm1, %xmm0 1200 pcmpeqb %xmm2, %xmm1 1201 psubb %xmm0, %xmm1 1202 pmovmskb %xmm1, %esi 1203 sub $0xffff, %esi 1204 jnz L(exit) 1205 1206#ifdef USE_AS_STRNCMP 1207 cmp $16, %ebp 1208 lea -16(%ebp), %ebp 1209 jbe L(more8byteseq) 1210#endif 1211 add $16, %ecx 1212 movdqa %xmm4, %xmm3 1213 1214 add $16, %edi 1215 jg L(nibble_ashr_9) 1216 1217 movdqa (%eax, %ecx), %xmm1 1218 movdqa (%edx, %ecx), %xmm2 1219 movdqa %xmm2, %xmm4 1220 1221 palignr $9, %xmm3, %xmm2 1222 1223 pcmpeqb %xmm1, %xmm0 1224 pcmpeqb %xmm2, %xmm1 1225 psubb %xmm0, %xmm1 1226 pmovmskb %xmm1, %esi 1227 sub $0xffff, %esi 1228 jnz L(exit) 1229 1230#ifdef USE_AS_STRNCMP 1231 cmp $16, %ebp 1232 lea -16(%ebp), %ebp 1233 jbe L(more8byteseq) 1234#endif 1235 add $16, %ecx 1236 movdqa %xmm4, %xmm3 1237 jmp L(loop_ashr_9) 1238 1239 .p2align 4 1240L(nibble_ashr_9): 1241 pcmpeqb %xmm3, %xmm0 1242 pmovmskb %xmm0, %esi 1243 test $0xfe00, %esi 1244 jnz L(ashr_9_exittail) 1245 1246#ifdef USE_AS_STRNCMP 1247 cmp $7, %ebp 1248 jbe L(ashr_9_exittail) 1249#endif 1250 pxor %xmm0, %xmm0 1251 sub $0x1000, %edi 1252 jmp L(gobble_ashr_9) 1253 1254 .p2align 4 1255L(ashr_9_exittail): 1256 movdqa (%eax, %ecx), %xmm1 1257 psrldq $9, %xmm0 1258 psrldq $9, %xmm3 1259 jmp L(aftertail) 1260 1261/* 1262 * The following cases will be handled by ashr_10 1263 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1264 * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10 1265 */ 1266 .p2align 4 1267L(ashr_10): 1268 mov $0xffff, %esi 1269 pxor %xmm0, %xmm0 1270 movdqa (%edx), %xmm2 1271 movdqa (%eax), %xmm1 1272 pcmpeqb %xmm1, %xmm0 1273 pslldq $6, %xmm2 1274 pcmpeqb %xmm1, %xmm2 1275 psubb %xmm0, %xmm2 1276 pmovmskb %xmm2, %edi 1277 shr %cl, %esi 1278 shr %cl, %edi 1279 sub %edi, %esi 1280 lea -6(%ecx), %edi 1281 jnz L(less32bytes) 1282 1283 UPDATE_STRNCMP_COUNTER 1284 1285 movdqa (%edx), %xmm3 1286 pxor %xmm0, %xmm0 1287 mov $16, %ecx 1288 or $10, %ebx 1289 lea 10(%edx), %edi 1290 and $0xfff, %edi 1291 sub $0x1000, %edi 1292 1293 .p2align 4 1294L(loop_ashr_10): 1295 add $16, %edi 1296 jg L(nibble_ashr_10) 1297 1298L(gobble_ashr_10): 1299 movdqa (%eax, %ecx), %xmm1 1300 movdqa (%edx, %ecx), %xmm2 1301 movdqa %xmm2, %xmm4 1302 1303 palignr $10, %xmm3, %xmm2 1304 1305 pcmpeqb %xmm1, %xmm0 1306 pcmpeqb %xmm2, %xmm1 1307 psubb %xmm0, %xmm1 1308 pmovmskb %xmm1, %esi 1309 sub $0xffff, %esi 1310 jnz L(exit) 1311 1312#ifdef USE_AS_STRNCMP 1313 cmp $16, %ebp 1314 lea -16(%ebp), %ebp 1315 jbe L(more8byteseq) 1316#endif 1317 add $16, %ecx 1318 movdqa %xmm4, %xmm3 1319 1320 add $16, %edi 1321 jg L(nibble_ashr_10) 1322 1323 movdqa (%eax, %ecx), %xmm1 1324 movdqa (%edx, %ecx), %xmm2 1325 movdqa %xmm2, %xmm4 1326 1327 palignr $10, %xmm3, %xmm2 1328 1329 pcmpeqb %xmm1, %xmm0 1330 pcmpeqb %xmm2, %xmm1 1331 psubb %xmm0, %xmm1 1332 pmovmskb %xmm1, %esi 1333 sub $0xffff, %esi 1334 jnz L(exit) 1335 1336#ifdef USE_AS_STRNCMP 1337 cmp $16, %ebp 1338 lea -16(%ebp), %ebp 1339 jbe L(more8byteseq) 1340#endif 1341 add $16, %ecx 1342 movdqa %xmm4, %xmm3 1343 jmp L(loop_ashr_10) 1344 1345 .p2align 4 1346L(nibble_ashr_10): 1347 pcmpeqb %xmm3, %xmm0 1348 pmovmskb %xmm0, %esi 1349 test $0xfc00, %esi 1350 jnz L(ashr_10_exittail) 1351 1352#ifdef USE_AS_STRNCMP 1353 cmp $6, %ebp 1354 jbe L(ashr_10_exittail) 1355#endif 1356 pxor %xmm0, %xmm0 1357 sub $0x1000, %edi 1358 jmp L(gobble_ashr_10) 1359 1360 .p2align 4 1361L(ashr_10_exittail): 1362 movdqa (%eax, %ecx), %xmm1 1363 psrldq $10, %xmm0 1364 psrldq $10, %xmm3 1365 jmp L(aftertail) 1366 1367/* 1368 * The following cases will be handled by ashr_11 1369 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1370 * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11 1371 */ 1372 .p2align 4 1373L(ashr_11): 1374 mov $0xffff, %esi 1375 pxor %xmm0, %xmm0 1376 movdqa (%edx), %xmm2 1377 movdqa (%eax), %xmm1 1378 pcmpeqb %xmm1, %xmm0 1379 pslldq $5, %xmm2 1380 pcmpeqb %xmm1, %xmm2 1381 psubb %xmm0, %xmm2 1382 pmovmskb %xmm2, %edi 1383 shr %cl, %esi 1384 shr %cl, %edi 1385 sub %edi, %esi 1386 lea -5(%ecx), %edi 1387 jnz L(less32bytes) 1388 1389 UPDATE_STRNCMP_COUNTER 1390 1391 movdqa (%edx), %xmm3 1392 pxor %xmm0, %xmm0 1393 mov $16, %ecx 1394 or $11, %ebx 1395 lea 11(%edx), %edi 1396 and $0xfff, %edi 1397 sub $0x1000, %edi 1398 1399 .p2align 4 1400L(loop_ashr_11): 1401 add $16, %edi 1402 jg L(nibble_ashr_11) 1403 1404L(gobble_ashr_11): 1405 movdqa (%eax, %ecx), %xmm1 1406 movdqa (%edx, %ecx), %xmm2 1407 movdqa %xmm2, %xmm4 1408 1409 palignr $11, %xmm3, %xmm2 1410 1411 pcmpeqb %xmm1, %xmm0 1412 pcmpeqb %xmm2, %xmm1 1413 psubb %xmm0, %xmm1 1414 pmovmskb %xmm1, %esi 1415 sub $0xffff, %esi 1416 jnz L(exit) 1417 1418#ifdef USE_AS_STRNCMP 1419 cmp $16, %ebp 1420 lea -16(%ebp), %ebp 1421 jbe L(more8byteseq) 1422#endif 1423 add $16, %ecx 1424 movdqa %xmm4, %xmm3 1425 1426 add $16, %edi 1427 jg L(nibble_ashr_11) 1428 1429 movdqa (%eax, %ecx), %xmm1 1430 movdqa (%edx, %ecx), %xmm2 1431 movdqa %xmm2, %xmm4 1432 1433 palignr $11, %xmm3, %xmm2 1434 1435 pcmpeqb %xmm1, %xmm0 1436 pcmpeqb %xmm2, %xmm1 1437 psubb %xmm0, %xmm1 1438 pmovmskb %xmm1, %esi 1439 sub $0xffff, %esi 1440 jnz L(exit) 1441 1442#ifdef USE_AS_STRNCMP 1443 cmp $16, %ebp 1444 lea -16(%ebp), %ebp 1445 jbe L(more8byteseq) 1446#endif 1447 add $16, %ecx 1448 movdqa %xmm4, %xmm3 1449 jmp L(loop_ashr_11) 1450 1451 .p2align 4 1452L(nibble_ashr_11): 1453 pcmpeqb %xmm3, %xmm0 1454 pmovmskb %xmm0, %esi 1455 test $0xf800, %esi 1456 jnz L(ashr_11_exittail) 1457 1458#ifdef USE_AS_STRNCMP 1459 cmp $5, %ebp 1460 jbe L(ashr_11_exittail) 1461#endif 1462 pxor %xmm0, %xmm0 1463 sub $0x1000, %edi 1464 jmp L(gobble_ashr_11) 1465 1466 .p2align 4 1467L(ashr_11_exittail): 1468 movdqa (%eax, %ecx), %xmm1 1469 psrldq $11, %xmm0 1470 psrldq $11, %xmm3 1471 jmp L(aftertail) 1472 1473/* 1474 * The following cases will be handled by ashr_12 1475 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1476 * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12 1477 */ 1478 .p2align 4 1479L(ashr_12): 1480 mov $0xffff, %esi 1481 pxor %xmm0, %xmm0 1482 movdqa (%edx), %xmm2 1483 movdqa (%eax), %xmm1 1484 pcmpeqb %xmm1, %xmm0 1485 pslldq $4, %xmm2 1486 pcmpeqb %xmm1, %xmm2 1487 psubb %xmm0, %xmm2 1488 pmovmskb %xmm2, %edi 1489 shr %cl, %esi 1490 shr %cl, %edi 1491 sub %edi, %esi 1492 lea -4(%ecx), %edi 1493 jnz L(less32bytes) 1494 1495 UPDATE_STRNCMP_COUNTER 1496 1497 movdqa (%edx), %xmm3 1498 pxor %xmm0, %xmm0 1499 mov $16, %ecx 1500 or $12, %ebx 1501 lea 12(%edx), %edi 1502 and $0xfff, %edi 1503 sub $0x1000, %edi 1504 1505 .p2align 4 1506L(loop_ashr_12): 1507 add $16, %edi 1508 jg L(nibble_ashr_12) 1509 1510L(gobble_ashr_12): 1511 movdqa (%eax, %ecx), %xmm1 1512 movdqa (%edx, %ecx), %xmm2 1513 movdqa %xmm2, %xmm4 1514 1515 palignr $12, %xmm3, %xmm2 1516 1517 pcmpeqb %xmm1, %xmm0 1518 pcmpeqb %xmm2, %xmm1 1519 psubb %xmm0, %xmm1 1520 pmovmskb %xmm1, %esi 1521 sub $0xffff, %esi 1522 jnz L(exit) 1523 1524 add $16, %ecx 1525 movdqa %xmm4, %xmm3 1526 1527 add $16, %edi 1528 jg L(nibble_ashr_12) 1529 1530#ifdef USE_AS_STRNCMP 1531 cmp $16, %ebp 1532 lea -16(%ebp), %ebp 1533 jbe L(more8byteseq) 1534#endif 1535 movdqa (%eax, %ecx), %xmm1 1536 movdqa (%edx, %ecx), %xmm2 1537 movdqa %xmm2, %xmm4 1538 1539 palignr $12, %xmm3, %xmm2 1540 1541 pcmpeqb %xmm1, %xmm0 1542 pcmpeqb %xmm2, %xmm1 1543 psubb %xmm0, %xmm1 1544 pmovmskb %xmm1, %esi 1545 sub $0xffff, %esi 1546 jnz L(exit) 1547 1548#ifdef USE_AS_STRNCMP 1549 cmp $16, %ebp 1550 lea -16(%ebp), %ebp 1551 jbe L(more8byteseq) 1552#endif 1553 add $16, %ecx 1554 movdqa %xmm4, %xmm3 1555 jmp L(loop_ashr_12) 1556 1557 .p2align 4 1558L(nibble_ashr_12): 1559 pcmpeqb %xmm3, %xmm0 1560 pmovmskb %xmm0, %esi 1561 test $0xf000, %esi 1562 jnz L(ashr_12_exittail) 1563 1564#ifdef USE_AS_STRNCMP 1565 cmp $4, %ebp 1566 jbe L(ashr_12_exittail) 1567#endif 1568 pxor %xmm0, %xmm0 1569 sub $0x1000, %edi 1570 jmp L(gobble_ashr_12) 1571 1572 .p2align 4 1573L(ashr_12_exittail): 1574 movdqa (%eax, %ecx), %xmm1 1575 psrldq $12, %xmm0 1576 psrldq $12, %xmm3 1577 jmp L(aftertail) 1578 1579/* 1580 * The following cases will be handled by ashr_13 1581 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1582 * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13 1583 */ 1584 .p2align 4 1585L(ashr_13): 1586 mov $0xffff, %esi 1587 pxor %xmm0, %xmm0 1588 movdqa (%edx), %xmm2 1589 movdqa (%eax), %xmm1 1590 pcmpeqb %xmm1, %xmm0 1591 pslldq $3, %xmm2 1592 pcmpeqb %xmm1, %xmm2 1593 psubb %xmm0, %xmm2 1594 pmovmskb %xmm2, %edi 1595 shr %cl, %esi 1596 shr %cl, %edi 1597 sub %edi, %esi 1598 lea -3(%ecx), %edi 1599 jnz L(less32bytes) 1600 1601 UPDATE_STRNCMP_COUNTER 1602 1603 movdqa (%edx), %xmm3 1604 pxor %xmm0, %xmm0 1605 mov $16, %ecx 1606 or $13, %ebx 1607 lea 13(%edx), %edi 1608 and $0xfff, %edi 1609 sub $0x1000, %edi 1610 1611 .p2align 4 1612L(loop_ashr_13): 1613 add $16, %edi 1614 jg L(nibble_ashr_13) 1615 1616L(gobble_ashr_13): 1617 movdqa (%eax, %ecx), %xmm1 1618 movdqa (%edx, %ecx), %xmm2 1619 movdqa %xmm2, %xmm4 1620 1621 palignr $13, %xmm3, %xmm2 1622 1623 pcmpeqb %xmm1, %xmm0 1624 pcmpeqb %xmm2, %xmm1 1625 psubb %xmm0, %xmm1 1626 pmovmskb %xmm1, %esi 1627 sub $0xffff, %esi 1628 jnz L(exit) 1629 1630#ifdef USE_AS_STRNCMP 1631 cmp $16, %ebp 1632 lea -16(%ebp), %ebp 1633 jbe L(more8byteseq) 1634#endif 1635 add $16, %ecx 1636 movdqa %xmm4, %xmm3 1637 1638 add $16, %edi 1639 jg L(nibble_ashr_13) 1640 1641 movdqa (%eax, %ecx), %xmm1 1642 movdqa (%edx, %ecx), %xmm2 1643 movdqa %xmm2, %xmm4 1644 1645 palignr $13, %xmm3, %xmm2 1646 1647 pcmpeqb %xmm1, %xmm0 1648 pcmpeqb %xmm2, %xmm1 1649 psubb %xmm0, %xmm1 1650 pmovmskb %xmm1, %esi 1651 sub $0xffff, %esi 1652 jnz L(exit) 1653 1654#ifdef USE_AS_STRNCMP 1655 cmp $16, %ebp 1656 lea -16(%ebp), %ebp 1657 jbe L(more8byteseq) 1658#endif 1659 add $16, %ecx 1660 movdqa %xmm4, %xmm3 1661 jmp L(loop_ashr_13) 1662 1663 .p2align 4 1664L(nibble_ashr_13): 1665 pcmpeqb %xmm3, %xmm0 1666 pmovmskb %xmm0, %esi 1667 test $0xe000, %esi 1668 jnz L(ashr_13_exittail) 1669 1670#ifdef USE_AS_STRNCMP 1671 cmp $3, %ebp 1672 jbe L(ashr_13_exittail) 1673#endif 1674 pxor %xmm0, %xmm0 1675 sub $0x1000, %edi 1676 jmp L(gobble_ashr_13) 1677 1678 .p2align 4 1679L(ashr_13_exittail): 1680 movdqa (%eax, %ecx), %xmm1 1681 psrldq $13, %xmm0 1682 psrldq $13, %xmm3 1683 jmp L(aftertail) 1684 1685/* 1686 * The following cases will be handled by ashr_14 1687 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1688 * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14 1689 */ 1690 .p2align 4 1691L(ashr_14): 1692 mov $0xffff, %esi 1693 pxor %xmm0, %xmm0 1694 movdqa (%edx), %xmm2 1695 movdqa (%eax), %xmm1 1696 pcmpeqb %xmm1, %xmm0 1697 pslldq $2, %xmm2 1698 pcmpeqb %xmm1, %xmm2 1699 psubb %xmm0, %xmm2 1700 pmovmskb %xmm2, %edi 1701 shr %cl, %esi 1702 shr %cl, %edi 1703 sub %edi, %esi 1704 lea -2(%ecx), %edi 1705 jnz L(less32bytes) 1706 1707 UPDATE_STRNCMP_COUNTER 1708 1709 movdqa (%edx), %xmm3 1710 pxor %xmm0, %xmm0 1711 mov $16, %ecx 1712 or $14, %ebx 1713 lea 14(%edx), %edi 1714 and $0xfff, %edi 1715 sub $0x1000, %edi 1716 1717 .p2align 4 1718L(loop_ashr_14): 1719 add $16, %edi 1720 jg L(nibble_ashr_14) 1721 1722L(gobble_ashr_14): 1723 movdqa (%eax, %ecx), %xmm1 1724 movdqa (%edx, %ecx), %xmm2 1725 movdqa %xmm2, %xmm4 1726 1727 palignr $14, %xmm3, %xmm2 1728 1729 pcmpeqb %xmm1, %xmm0 1730 pcmpeqb %xmm2, %xmm1 1731 psubb %xmm0, %xmm1 1732 pmovmskb %xmm1, %esi 1733 sub $0xffff, %esi 1734 jnz L(exit) 1735 1736#ifdef USE_AS_STRNCMP 1737 cmp $16, %ebp 1738 lea -16(%ebp), %ebp 1739 jbe L(more8byteseq) 1740#endif 1741 add $16, %ecx 1742 movdqa %xmm4, %xmm3 1743 1744 add $16, %edi 1745 jg L(nibble_ashr_14) 1746 1747 movdqa (%eax, %ecx), %xmm1 1748 movdqa (%edx, %ecx), %xmm2 1749 movdqa %xmm2, %xmm4 1750 1751 palignr $14, %xmm3, %xmm2 1752 1753 pcmpeqb %xmm1, %xmm0 1754 pcmpeqb %xmm2, %xmm1 1755 psubb %xmm0, %xmm1 1756 pmovmskb %xmm1, %esi 1757 sub $0xffff, %esi 1758 jnz L(exit) 1759 1760#ifdef USE_AS_STRNCMP 1761 cmp $16, %ebp 1762 lea -16(%ebp), %ebp 1763 jbe L(more8byteseq) 1764#endif 1765 add $16, %ecx 1766 movdqa %xmm4, %xmm3 1767 jmp L(loop_ashr_14) 1768 1769 .p2align 4 1770L(nibble_ashr_14): 1771 pcmpeqb %xmm3, %xmm0 1772 pmovmskb %xmm0, %esi 1773 test $0xc000, %esi 1774 jnz L(ashr_14_exittail) 1775 1776#ifdef USE_AS_STRNCMP 1777 cmp $2, %ebp 1778 jbe L(ashr_14_exittail) 1779#endif 1780 pxor %xmm0, %xmm0 1781 sub $0x1000, %edi 1782 jmp L(gobble_ashr_14) 1783 1784 .p2align 4 1785L(ashr_14_exittail): 1786 movdqa (%eax, %ecx), %xmm1 1787 psrldq $14, %xmm0 1788 psrldq $14, %xmm3 1789 jmp L(aftertail) 1790 1791/* 1792 * The following cases will be handled by ashr_14 1793 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case 1794 * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15 1795 */ 1796 1797 .p2align 4 1798L(ashr_15): 1799 mov $0xffff, %esi 1800 pxor %xmm0, %xmm0 1801 movdqa (%edx), %xmm2 1802 movdqa (%eax), %xmm1 1803 pcmpeqb %xmm1, %xmm0 1804 pslldq $1, %xmm2 1805 pcmpeqb %xmm1, %xmm2 1806 psubb %xmm0, %xmm2 1807 pmovmskb %xmm2, %edi 1808 shr %cl, %esi 1809 shr %cl, %edi 1810 sub %edi, %esi 1811 lea -1(%ecx), %edi 1812 jnz L(less32bytes) 1813 1814 UPDATE_STRNCMP_COUNTER 1815 1816 movdqa (%edx), %xmm3 1817 pxor %xmm0, %xmm0 1818 mov $16, %ecx 1819 or $15, %ebx 1820 lea 15(%edx), %edi 1821 and $0xfff, %edi 1822 sub $0x1000, %edi 1823 1824 .p2align 4 1825L(loop_ashr_15): 1826 add $16, %edi 1827 jg L(nibble_ashr_15) 1828 1829L(gobble_ashr_15): 1830 movdqa (%eax, %ecx), %xmm1 1831 movdqa (%edx, %ecx), %xmm2 1832 movdqa %xmm2, %xmm4 1833 1834 palignr $15, %xmm3, %xmm2 1835 1836 pcmpeqb %xmm1, %xmm0 1837 pcmpeqb %xmm2, %xmm1 1838 psubb %xmm0, %xmm1 1839 pmovmskb %xmm1, %esi 1840 sub $0xffff, %esi 1841 jnz L(exit) 1842 1843#ifdef USE_AS_STRNCMP 1844 cmp $16, %ebp 1845 lea -16(%ebp), %ebp 1846 jbe L(more8byteseq) 1847#endif 1848 add $16, %ecx 1849 movdqa %xmm4, %xmm3 1850 1851 add $16, %edi 1852 jg L(nibble_ashr_15) 1853 1854 movdqa (%eax, %ecx), %xmm1 1855 movdqa (%edx, %ecx), %xmm2 1856 movdqa %xmm2, %xmm4 1857 1858 palignr $15, %xmm3, %xmm2 1859 1860 pcmpeqb %xmm1, %xmm0 1861 pcmpeqb %xmm2, %xmm1 1862 psubb %xmm0, %xmm1 1863 pmovmskb %xmm1, %esi 1864 sub $0xffff, %esi 1865 jnz L(exit) 1866 1867#ifdef USE_AS_STRNCMP 1868 cmp $16, %ebp 1869 lea -16(%ebp), %ebp 1870 jbe L(more8byteseq) 1871#endif 1872 add $16, %ecx 1873 movdqa %xmm4, %xmm3 1874 jmp L(loop_ashr_15) 1875 1876 .p2align 4 1877L(nibble_ashr_15): 1878 pcmpeqb %xmm3, %xmm0 1879 pmovmskb %xmm0, %esi 1880 test $0x8000, %esi 1881 jnz L(ashr_15_exittail) 1882 1883#ifdef USE_AS_STRNCMP 1884 cmp $1, %ebp 1885 jbe L(ashr_15_exittail) 1886#endif 1887 pxor %xmm0, %xmm0 1888 sub $0x1000, %edi 1889 jmp L(gobble_ashr_15) 1890 1891 .p2align 4 1892L(ashr_15_exittail): 1893 movdqa (%eax, %ecx), %xmm1 1894 psrldq $15, %xmm0 1895 psrldq $15, %xmm3 1896 jmp L(aftertail) 1897 1898 .p2align 4 1899L(aftertail): 1900 pcmpeqb %xmm3, %xmm1 1901 psubb %xmm0, %xmm1 1902 pmovmskb %xmm1, %esi 1903 not %esi 1904L(exit): 1905 mov %ebx, %edi 1906 and $0x1f, %edi 1907 lea -16(%edi, %ecx), %edi 1908L(less32bytes): 1909 add %edi, %edx 1910 add %ecx, %eax 1911 test $0x20, %ebx 1912 jz L(ret2) 1913 xchg %eax, %edx 1914 1915 .p2align 4 1916L(ret2): 1917 mov %esi, %ecx 1918 POP (%esi) 1919 POP (%edi) 1920 POP (%ebx) 1921L(less16bytes): 1922 test %cl, %cl 1923 jz L(2next_8_bytes) 1924 1925 test $0x01, %cl 1926 jnz L(Byte0) 1927 1928 test $0x02, %cl 1929 jnz L(Byte1) 1930 1931 test $0x04, %cl 1932 jnz L(Byte2) 1933 1934 test $0x08, %cl 1935 jnz L(Byte3) 1936 1937 test $0x10, %cl 1938 jnz L(Byte4) 1939 1940 test $0x20, %cl 1941 jnz L(Byte5) 1942 1943 test $0x40, %cl 1944 jnz L(Byte6) 1945#ifdef USE_AS_STRNCMP 1946 cmp $7, %ebp 1947 jbe L(eq) 1948#endif 1949 1950 movzx 7(%eax), %ecx 1951 movzx 7(%edx), %eax 1952 1953 sub %ecx, %eax 1954 RETURN 1955 1956 .p2align 4 1957L(Byte0): 1958#ifdef USE_AS_STRNCMP 1959 cmp $0, %ebp 1960 jbe L(eq) 1961#endif 1962 movzx (%eax), %ecx 1963 movzx (%edx), %eax 1964 1965 sub %ecx, %eax 1966 RETURN 1967 1968 .p2align 4 1969L(Byte1): 1970#ifdef USE_AS_STRNCMP 1971 cmp $1, %ebp 1972 jbe L(eq) 1973#endif 1974 movzx 1(%eax), %ecx 1975 movzx 1(%edx), %eax 1976 1977 sub %ecx, %eax 1978 RETURN 1979 1980 .p2align 4 1981L(Byte2): 1982#ifdef USE_AS_STRNCMP 1983 cmp $2, %ebp 1984 jbe L(eq) 1985#endif 1986 movzx 2(%eax), %ecx 1987 movzx 2(%edx), %eax 1988 1989 sub %ecx, %eax 1990 RETURN 1991 1992 .p2align 4 1993L(Byte3): 1994#ifdef USE_AS_STRNCMP 1995 cmp $3, %ebp 1996 jbe L(eq) 1997#endif 1998 movzx 3(%eax), %ecx 1999 movzx 3(%edx), %eax 2000 2001 sub %ecx, %eax 2002 RETURN 2003 2004 .p2align 4 2005L(Byte4): 2006#ifdef USE_AS_STRNCMP 2007 cmp $4, %ebp 2008 jbe L(eq) 2009#endif 2010 movzx 4(%eax), %ecx 2011 movzx 4(%edx), %eax 2012 2013 sub %ecx, %eax 2014 RETURN 2015 2016 .p2align 4 2017L(Byte5): 2018#ifdef USE_AS_STRNCMP 2019 cmp $5, %ebp 2020 jbe L(eq) 2021#endif 2022 movzx 5(%eax), %ecx 2023 movzx 5(%edx), %eax 2024 2025 sub %ecx, %eax 2026 RETURN 2027 2028 .p2align 4 2029L(Byte6): 2030#ifdef USE_AS_STRNCMP 2031 cmp $6, %ebp 2032 jbe L(eq) 2033#endif 2034 movzx 6(%eax), %ecx 2035 movzx 6(%edx), %eax 2036 2037 sub %ecx, %eax 2038 RETURN 2039 2040 .p2align 4 2041L(2next_8_bytes): 2042 add $8, %eax 2043 add $8, %edx 2044#ifdef USE_AS_STRNCMP 2045 cmp $8, %ebp 2046 lea -8(%ebp), %ebp 2047 jbe L(eq) 2048#endif 2049 2050 test $0x01, %ch 2051 jnz L(Byte0) 2052 2053 test $0x02, %ch 2054 jnz L(Byte1) 2055 2056 test $0x04, %ch 2057 jnz L(Byte2) 2058 2059 test $0x08, %ch 2060 jnz L(Byte3) 2061 2062 test $0x10, %ch 2063 jnz L(Byte4) 2064 2065 test $0x20, %ch 2066 jnz L(Byte5) 2067 2068 test $0x40, %ch 2069 jnz L(Byte6) 2070 2071#ifdef USE_AS_STRNCMP 2072 cmp $7, %ebp 2073 jbe L(eq) 2074#endif 2075 movzx 7(%eax), %ecx 2076 movzx 7(%edx), %eax 2077 2078 sub %ecx, %eax 2079 RETURN 2080 2081 .p2align 4 2082L(neq): 2083 mov $1, %eax 2084 ja L(neq_bigger) 2085 neg %eax 2086L(neq_bigger): 2087 RETURN 2088 2089#ifdef USE_AS_STRNCMP 2090 CFI_PUSH (%ebx) 2091 CFI_PUSH (%edi) 2092 CFI_PUSH (%esi) 2093 2094 .p2align 4 2095L(more8byteseq): 2096 POP (%esi) 2097 POP (%edi) 2098 POP (%ebx) 2099#endif 2100 2101L(eq): 2102 2103#ifdef USE_AS_STRNCMP 2104 POP (%ebp) 2105#endif 2106 xorl %eax, %eax 2107 ret 2108 2109#ifdef USE_AS_STRNCMP 2110 CFI_PUSH (%ebp) 2111 2112 .p2align 4 2113L(less16bytes_sncmp): 2114 test %ebp, %ebp 2115 jz L(eq) 2116 2117 movzbl (%eax), %ecx 2118 cmpb %cl, (%edx) 2119 jne L(neq) 2120 test %cl, %cl 2121 je L(eq) 2122 2123 cmp $1, %ebp 2124 je L(eq) 2125 2126 movzbl 1(%eax), %ecx 2127 cmpb %cl, 1(%edx) 2128 jne L(neq) 2129 test %cl, %cl 2130 je L(eq) 2131 2132 cmp $2, %ebp 2133 je L(eq) 2134 2135 movzbl 2(%eax), %ecx 2136 cmpb %cl, 2(%edx) 2137 jne L(neq) 2138 test %cl, %cl 2139 je L(eq) 2140 2141 cmp $3, %ebp 2142 je L(eq) 2143 2144 movzbl 3(%eax), %ecx 2145 cmpb %cl, 3(%edx) 2146 jne L(neq) 2147 test %cl, %cl 2148 je L(eq) 2149 2150 cmp $4, %ebp 2151 je L(eq) 2152 2153 movzbl 4(%eax), %ecx 2154 cmpb %cl, 4(%edx) 2155 jne L(neq) 2156 test %cl, %cl 2157 je L(eq) 2158 2159 cmp $5, %ebp 2160 je L(eq) 2161 2162 movzbl 5(%eax), %ecx 2163 cmpb %cl, 5(%edx) 2164 jne L(neq) 2165 test %cl, %cl 2166 je L(eq) 2167 2168 cmp $6, %ebp 2169 je L(eq) 2170 2171 movzbl 6(%eax), %ecx 2172 cmpb %cl, 6(%edx) 2173 jne L(neq) 2174 test %cl, %cl 2175 je L(eq) 2176 2177 cmp $7, %ebp 2178 je L(eq) 2179 2180 movzbl 7(%eax), %ecx 2181 cmpb %cl, 7(%edx) 2182 jne L(neq) 2183 test %cl, %cl 2184 je L(eq) 2185 2186 2187 cmp $8, %ebp 2188 je L(eq) 2189 2190 movzbl 8(%eax), %ecx 2191 cmpb %cl, 8(%edx) 2192 jne L(neq) 2193 test %cl, %cl 2194 je L(eq) 2195 2196 cmp $9, %ebp 2197 je L(eq) 2198 2199 movzbl 9(%eax), %ecx 2200 cmpb %cl, 9(%edx) 2201 jne L(neq) 2202 test %cl, %cl 2203 je L(eq) 2204 2205 cmp $10, %ebp 2206 je L(eq) 2207 2208 movzbl 10(%eax), %ecx 2209 cmpb %cl, 10(%edx) 2210 jne L(neq) 2211 test %cl, %cl 2212 je L(eq) 2213 2214 cmp $11, %ebp 2215 je L(eq) 2216 2217 movzbl 11(%eax), %ecx 2218 cmpb %cl, 11(%edx) 2219 jne L(neq) 2220 test %cl, %cl 2221 je L(eq) 2222 2223 2224 cmp $12, %ebp 2225 je L(eq) 2226 2227 movzbl 12(%eax), %ecx 2228 cmpb %cl, 12(%edx) 2229 jne L(neq) 2230 test %cl, %cl 2231 je L(eq) 2232 2233 cmp $13, %ebp 2234 je L(eq) 2235 2236 movzbl 13(%eax), %ecx 2237 cmpb %cl, 13(%edx) 2238 jne L(neq) 2239 test %cl, %cl 2240 je L(eq) 2241 2242 cmp $14, %ebp 2243 je L(eq) 2244 2245 movzbl 14(%eax), %ecx 2246 cmpb %cl, 14(%edx) 2247 jne L(neq) 2248 test %cl, %cl 2249 je L(eq) 2250 2251 cmp $15, %ebp 2252 je L(eq) 2253 2254 movzbl 15(%eax), %ecx 2255 cmpb %cl, 15(%edx) 2256 jne L(neq) 2257 test %cl, %cl 2258 je L(eq) 2259 2260 POP (%ebp) 2261 xor %eax, %eax 2262 ret 2263#endif 2264 2265END (ssse3_strcmp_latest) 2266