1/* 2Copyright (c) 2011, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#ifndef USE_AS_STRCAT 32 33# ifndef L 34# define L(label) .L##label 35# endif 36 37# ifndef cfi_startproc 38# define cfi_startproc .cfi_startproc 39# endif 40 41# ifndef cfi_endproc 42# define cfi_endproc .cfi_endproc 43# endif 44 45# ifndef cfi_rel_offset 46# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 47# endif 48 49# ifndef cfi_restore 50# define cfi_restore(reg) .cfi_restore reg 51# endif 52 53# ifndef cfi_adjust_cfa_offset 54# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 55# endif 56 57# ifndef ENTRY 58# define ENTRY(name) \ 59 .type name, @function; \ 60 .globl name; \ 61 .p2align 4; \ 62name: \ 63 cfi_startproc 64# endif 65 66# ifndef END 67# define END(name) \ 68 cfi_endproc; \ 69 .size name, .-name 70# endif 71 72# define CFI_PUSH(REG) \ 73 cfi_adjust_cfa_offset (4); \ 74 cfi_rel_offset (REG, 0) 75 76# define CFI_POP(REG) \ 77 cfi_adjust_cfa_offset (-4); \ 78 cfi_restore (REG) 79 80# define PUSH(REG) pushl REG; CFI_PUSH (REG) 81# define POP(REG) popl REG; CFI_POP (REG) 82 83# ifndef STRCPY 84# define STRCPY strcpy_atom 85# endif 86 87# ifdef USE_AS_STRNCPY 88# define PARMS 8 89# define ENTRANCE PUSH (%ebx) 90# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx); 91# define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi) 92# else 93# define PARMS 4 94# define ENTRANCE 95# define RETURN ret 96# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi) 97# endif 98 99# ifdef USE_AS_STPCPY 100# define SAVE_RESULT(n) lea n(%edx), %eax 101# define SAVE_RESULT_TAIL(n) lea n(%edx), %eax 102# else 103# define SAVE_RESULT(n) movl %edi, %eax 104# define SAVE_RESULT_TAIL(n) movl %edx, %eax 105# endif 106 107# define STR1 PARMS 108# define STR2 STR1+4 109# define LEN STR2+4 110 111/* In this code following instructions are used for copying: 112 movb - 1 byte 113 movw - 2 byte 114 movl - 4 byte 115 movlpd - 8 byte 116 movaps - 16 byte - requires 16 byte alignment 117 of sourse and destination adresses. 118*/ 119 120.text 121ENTRY (STRCPY) 122 ENTRANCE 123 mov STR1(%esp), %edx 124 mov STR2(%esp), %ecx 125# ifdef USE_AS_STRNCPY 126 movl LEN(%esp), %ebx 127 cmp $8, %ebx 128 jbe L(StrncpyExit8Bytes) 129# endif 130 cmpb $0, (%ecx) 131 jz L(ExitTail1) 132 cmpb $0, 1(%ecx) 133 jz L(ExitTail2) 134 cmpb $0, 2(%ecx) 135 jz L(ExitTail3) 136 cmpb $0, 3(%ecx) 137 jz L(ExitTail4) 138 cmpb $0, 4(%ecx) 139 jz L(ExitTail5) 140 cmpb $0, 5(%ecx) 141 jz L(ExitTail6) 142 cmpb $0, 6(%ecx) 143 jz L(ExitTail7) 144 cmpb $0, 7(%ecx) 145 jz L(ExitTail8) 146# ifdef USE_AS_STRNCPY 147 cmp $16, %ebx 148 jb L(StrncpyExit15Bytes) 149# endif 150 cmpb $0, 8(%ecx) 151 jz L(ExitTail9) 152 cmpb $0, 9(%ecx) 153 jz L(ExitTail10) 154 cmpb $0, 10(%ecx) 155 jz L(ExitTail11) 156 cmpb $0, 11(%ecx) 157 jz L(ExitTail12) 158 cmpb $0, 12(%ecx) 159 jz L(ExitTail13) 160 cmpb $0, 13(%ecx) 161 jz L(ExitTail14) 162 cmpb $0, 14(%ecx) 163 jz L(ExitTail15) 164# if defined USE_AS_STRNCPY && !defined USE_AS_STRLCPY 165 cmp $16, %ebx 166 je L(ExitTail16) 167# endif 168 cmpb $0, 15(%ecx) 169 jz L(ExitTail16) 170 171# if defined USE_AS_STRNCPY && defined USE_AS_STRLCPY 172 cmp $16, %ebx 173 je L(StrlcpyExitTail16) 174# endif 175 176 PUSH (%edi) 177# ifndef USE_AS_STRLCPY 178 mov %edx, %edi 179# else 180 mov %ecx, %edi 181# endif 182#endif 183 PUSH (%esi) 184#ifdef USE_AS_STRNCPY 185 mov %ecx, %esi 186 sub $16, %ebx 187 and $0xf, %esi 188 189/* add 16 bytes ecx_offset to ebx */ 190 191 add %esi, %ebx 192#endif 193 lea 16(%ecx), %esi 194 and $-16, %esi 195 pxor %xmm0, %xmm0 196 movlpd (%ecx), %xmm1 197 movlpd %xmm1, (%edx) 198 199 pcmpeqb (%esi), %xmm0 200 movlpd 8(%ecx), %xmm1 201 movlpd %xmm1, 8(%edx) 202 203 pmovmskb %xmm0, %eax 204 sub %ecx, %esi 205 206#ifdef USE_AS_STRNCPY 207 sub $16, %ebx 208 jbe L(CopyFrom1To16BytesCase2OrCase3) 209#endif 210 test %eax, %eax 211 jnz L(CopyFrom1To16Bytes) 212 213 mov %edx, %eax 214 lea 16(%edx), %edx 215 and $-16, %edx 216 sub %edx, %eax 217 218#ifdef USE_AS_STRNCPY 219 add %eax, %esi 220 lea -1(%esi), %esi 221 and $1<<31, %esi 222 test %esi, %esi 223 jnz L(ContinueCopy) 224 lea 16(%ebx), %ebx 225 226L(ContinueCopy): 227#endif 228 sub %eax, %ecx 229 mov %ecx, %eax 230 and $0xf, %eax 231 mov $0, %esi 232 233/* case: ecx_offset == edx_offset */ 234 235 jz L(Align16Both) 236 237 cmp $8, %eax 238 jae L(ShlHigh8) 239 cmp $1, %eax 240 je L(Shl1) 241 cmp $2, %eax 242 je L(Shl2) 243 cmp $3, %eax 244 je L(Shl3) 245 cmp $4, %eax 246 je L(Shl4) 247 cmp $5, %eax 248 je L(Shl5) 249 cmp $6, %eax 250 je L(Shl6) 251 jmp L(Shl7) 252 253L(ShlHigh8): 254 je L(Shl8) 255 cmp $9, %eax 256 je L(Shl9) 257 cmp $10, %eax 258 je L(Shl10) 259 cmp $11, %eax 260 je L(Shl11) 261 cmp $12, %eax 262 je L(Shl12) 263 cmp $13, %eax 264 je L(Shl13) 265 cmp $14, %eax 266 je L(Shl14) 267 jmp L(Shl15) 268 269L(Align16Both): 270 movaps (%ecx), %xmm1 271 movaps 16(%ecx), %xmm2 272 movaps %xmm1, (%edx) 273 pcmpeqb %xmm2, %xmm0 274 pmovmskb %xmm0, %eax 275 lea 16(%esi), %esi 276#ifdef USE_AS_STRNCPY 277 sub $16, %ebx 278 jbe L(CopyFrom1To16BytesCase2OrCase3) 279#endif 280 test %eax, %eax 281 jnz L(CopyFrom1To16Bytes) 282 283 movaps 16(%ecx, %esi), %xmm3 284 movaps %xmm2, (%edx, %esi) 285 pcmpeqb %xmm3, %xmm0 286 pmovmskb %xmm0, %eax 287 lea 16(%esi), %esi 288#ifdef USE_AS_STRNCPY 289 sub $16, %ebx 290 jbe L(CopyFrom1To16BytesCase2OrCase3) 291#endif 292 test %eax, %eax 293 jnz L(CopyFrom1To16Bytes) 294 295 movaps 16(%ecx, %esi), %xmm4 296 movaps %xmm3, (%edx, %esi) 297 pcmpeqb %xmm4, %xmm0 298 pmovmskb %xmm0, %eax 299 lea 16(%esi), %esi 300#ifdef USE_AS_STRNCPY 301 sub $16, %ebx 302 jbe L(CopyFrom1To16BytesCase2OrCase3) 303#endif 304 test %eax, %eax 305 jnz L(CopyFrom1To16Bytes) 306 307 movaps 16(%ecx, %esi), %xmm1 308 movaps %xmm4, (%edx, %esi) 309 pcmpeqb %xmm1, %xmm0 310 pmovmskb %xmm0, %eax 311 lea 16(%esi), %esi 312#ifdef USE_AS_STRNCPY 313 sub $16, %ebx 314 jbe L(CopyFrom1To16BytesCase2OrCase3) 315#endif 316 test %eax, %eax 317 jnz L(CopyFrom1To16Bytes) 318 319 movaps 16(%ecx, %esi), %xmm2 320 movaps %xmm1, (%edx, %esi) 321 pcmpeqb %xmm2, %xmm0 322 pmovmskb %xmm0, %eax 323 lea 16(%esi), %esi 324#ifdef USE_AS_STRNCPY 325 sub $16, %ebx 326 jbe L(CopyFrom1To16BytesCase2OrCase3) 327#endif 328 test %eax, %eax 329 jnz L(CopyFrom1To16Bytes) 330 331 movaps 16(%ecx, %esi), %xmm3 332 movaps %xmm2, (%edx, %esi) 333 pcmpeqb %xmm3, %xmm0 334 pmovmskb %xmm0, %eax 335 lea 16(%esi), %esi 336#ifdef USE_AS_STRNCPY 337 sub $16, %ebx 338 jbe L(CopyFrom1To16BytesCase2OrCase3) 339#endif 340 test %eax, %eax 341 jnz L(CopyFrom1To16Bytes) 342 343 movaps %xmm3, (%edx, %esi) 344 mov %ecx, %eax 345 lea 16(%ecx, %esi), %ecx 346 and $-0x40, %ecx 347 sub %ecx, %eax 348 sub %eax, %edx 349#ifdef USE_AS_STRNCPY 350 lea 112(%ebx, %eax), %ebx 351#endif 352 mov $-0x40, %esi 353 354L(Aligned64Loop): 355 movaps (%ecx), %xmm2 356 movaps 32(%ecx), %xmm3 357 movaps %xmm2, %xmm4 358 movaps 16(%ecx), %xmm5 359 movaps %xmm3, %xmm6 360 movaps 48(%ecx), %xmm7 361 pminub %xmm5, %xmm2 362 pminub %xmm7, %xmm3 363 pminub %xmm2, %xmm3 364 lea 64(%edx), %edx 365 pcmpeqb %xmm0, %xmm3 366 lea 64(%ecx), %ecx 367 pmovmskb %xmm3, %eax 368#ifdef USE_AS_STRNCPY 369 sub $64, %ebx 370 jbe L(StrncpyLeaveCase2OrCase3) 371#endif 372 test %eax, %eax 373 jnz L(Aligned64Leave) 374 movaps %xmm4, -64(%edx) 375 movaps %xmm5, -48(%edx) 376 movaps %xmm6, -32(%edx) 377 movaps %xmm7, -16(%edx) 378 jmp L(Aligned64Loop) 379 380L(Aligned64Leave): 381#ifdef USE_AS_STRNCPY 382 lea 48(%ebx), %ebx 383#endif 384 pcmpeqb %xmm4, %xmm0 385 pmovmskb %xmm0, %eax 386 test %eax, %eax 387 jnz L(CopyFrom1To16Bytes) 388 389 pcmpeqb %xmm5, %xmm0 390#ifdef USE_AS_STRNCPY 391 lea -16(%ebx), %ebx 392#endif 393 pmovmskb %xmm0, %eax 394 movaps %xmm4, -64(%edx) 395 lea 16(%esi), %esi 396 test %eax, %eax 397 jnz L(CopyFrom1To16Bytes) 398 399 pcmpeqb %xmm6, %xmm0 400#ifdef USE_AS_STRNCPY 401 lea -16(%ebx), %ebx 402#endif 403 pmovmskb %xmm0, %eax 404 movaps %xmm5, -48(%edx) 405 lea 16(%esi), %esi 406 test %eax, %eax 407 jnz L(CopyFrom1To16Bytes) 408 409 movaps %xmm6, -32(%edx) 410 pcmpeqb %xmm7, %xmm0 411#ifdef USE_AS_STRNCPY 412 lea -16(%ebx), %ebx 413#endif 414 pmovmskb %xmm0, %eax 415 lea 16(%esi), %esi 416 jmp L(CopyFrom1To16Bytes) 417 418 .p2align 4 419L(Shl1): 420 movaps -1(%ecx), %xmm1 421 movaps 15(%ecx), %xmm2 422L(Shl1Start): 423 pcmpeqb %xmm2, %xmm0 424 pmovmskb %xmm0, %eax 425 movaps %xmm2, %xmm3 426#ifdef USE_AS_STRNCPY 427 sub $16, %ebx 428 jbe L(StrncpyExit1Case2OrCase3) 429#endif 430 test %eax, %eax 431 jnz L(Shl1LoopExit) 432 433 palignr $1, %xmm1, %xmm2 434 movaps %xmm3, %xmm1 435 movaps %xmm2, (%edx) 436 movaps 31(%ecx), %xmm2 437 438 pcmpeqb %xmm2, %xmm0 439 lea 16(%edx), %edx 440 pmovmskb %xmm0, %eax 441 lea 16(%ecx), %ecx 442 movaps %xmm2, %xmm3 443#ifdef USE_AS_STRNCPY 444 sub $16, %ebx 445 jbe L(StrncpyExit1Case2OrCase3) 446#endif 447 test %eax, %eax 448 jnz L(Shl1LoopExit) 449 450 palignr $1, %xmm1, %xmm2 451 movaps %xmm2, (%edx) 452 movaps 31(%ecx), %xmm2 453 movaps %xmm3, %xmm1 454 455 pcmpeqb %xmm2, %xmm0 456 lea 16(%edx), %edx 457 pmovmskb %xmm0, %eax 458 lea 16(%ecx), %ecx 459 movaps %xmm2, %xmm3 460#ifdef USE_AS_STRNCPY 461 sub $16, %ebx 462 jbe L(StrncpyExit1Case2OrCase3) 463#endif 464 test %eax, %eax 465 jnz L(Shl1LoopExit) 466 467 palignr $1, %xmm1, %xmm2 468 movaps %xmm2, (%edx) 469 movaps 31(%ecx), %xmm2 470 471 pcmpeqb %xmm2, %xmm0 472 lea 16(%edx), %edx 473 pmovmskb %xmm0, %eax 474 lea 16(%ecx), %ecx 475#ifdef USE_AS_STRNCPY 476 sub $16, %ebx 477 jbe L(StrncpyExit1Case2OrCase3) 478#endif 479 test %eax, %eax 480 jnz L(Shl1LoopExit) 481 482 palignr $1, %xmm3, %xmm2 483 movaps %xmm2, (%edx) 484 lea 31(%ecx), %ecx 485 lea 16(%edx), %edx 486 487 mov %ecx, %eax 488 and $-0x40, %ecx 489 sub %ecx, %eax 490 lea -15(%ecx), %ecx 491 sub %eax, %edx 492#ifdef USE_AS_STRNCPY 493 add %eax, %ebx 494#endif 495 movaps -1(%ecx), %xmm1 496 497L(Shl1LoopStart): 498 movaps 15(%ecx), %xmm2 499 movaps 31(%ecx), %xmm3 500 movaps %xmm3, %xmm6 501 movaps 47(%ecx), %xmm4 502 movaps %xmm4, %xmm7 503 movaps 63(%ecx), %xmm5 504 pminub %xmm2, %xmm6 505 pminub %xmm5, %xmm7 506 pminub %xmm6, %xmm7 507 pcmpeqb %xmm0, %xmm7 508 pmovmskb %xmm7, %eax 509 movaps %xmm5, %xmm7 510 palignr $1, %xmm4, %xmm5 511 palignr $1, %xmm3, %xmm4 512 test %eax, %eax 513 jnz L(Shl1Start) 514#ifdef USE_AS_STRNCPY 515 sub $64, %ebx 516 jbe L(StrncpyLeave1) 517#endif 518 palignr $1, %xmm2, %xmm3 519 lea 64(%ecx), %ecx 520 palignr $1, %xmm1, %xmm2 521 movaps %xmm7, %xmm1 522 movaps %xmm5, 48(%edx) 523 movaps %xmm4, 32(%edx) 524 movaps %xmm3, 16(%edx) 525 movaps %xmm2, (%edx) 526 lea 64(%edx), %edx 527 jmp L(Shl1LoopStart) 528 529L(Shl1LoopExit): 530 movlpd (%ecx), %xmm0 531 movlpd %xmm0, (%edx) 532 movlpd 7(%ecx), %xmm0 533 movlpd %xmm0, 7(%edx) 534 mov $15, %esi 535 jmp L(CopyFrom1To16Bytes) 536 537 .p2align 4 538L(Shl2): 539 movaps -2(%ecx), %xmm1 540 movaps 14(%ecx), %xmm2 541L(Shl2Start): 542 pcmpeqb %xmm2, %xmm0 543 pmovmskb %xmm0, %eax 544 movaps %xmm2, %xmm3 545#ifdef USE_AS_STRNCPY 546 sub $16, %ebx 547 jbe L(StrncpyExit2Case2OrCase3) 548#endif 549 test %eax, %eax 550 jnz L(Shl2LoopExit) 551 552 palignr $2, %xmm1, %xmm2 553 movaps %xmm3, %xmm1 554 movaps %xmm2, (%edx) 555 movaps 30(%ecx), %xmm2 556 557 pcmpeqb %xmm2, %xmm0 558 lea 16(%edx), %edx 559 pmovmskb %xmm0, %eax 560 lea 16(%ecx), %ecx 561 movaps %xmm2, %xmm3 562#ifdef USE_AS_STRNCPY 563 sub $16, %ebx 564 jbe L(StrncpyExit2Case2OrCase3) 565#endif 566 test %eax, %eax 567 jnz L(Shl2LoopExit) 568 569 palignr $2, %xmm1, %xmm2 570 movaps %xmm2, (%edx) 571 movaps 30(%ecx), %xmm2 572 movaps %xmm3, %xmm1 573 574 pcmpeqb %xmm2, %xmm0 575 lea 16(%edx), %edx 576 pmovmskb %xmm0, %eax 577 lea 16(%ecx), %ecx 578 movaps %xmm2, %xmm3 579#ifdef USE_AS_STRNCPY 580 sub $16, %ebx 581 jbe L(StrncpyExit2Case2OrCase3) 582#endif 583 test %eax, %eax 584 jnz L(Shl2LoopExit) 585 586 palignr $2, %xmm1, %xmm2 587 movaps %xmm2, (%edx) 588 movaps 30(%ecx), %xmm2 589 590 pcmpeqb %xmm2, %xmm0 591 lea 16(%edx), %edx 592 pmovmskb %xmm0, %eax 593 lea 16(%ecx), %ecx 594#ifdef USE_AS_STRNCPY 595 sub $16, %ebx 596 jbe L(StrncpyExit2Case2OrCase3) 597#endif 598 test %eax, %eax 599 jnz L(Shl2LoopExit) 600 601 palignr $2, %xmm3, %xmm2 602 movaps %xmm2, (%edx) 603 lea 30(%ecx), %ecx 604 lea 16(%edx), %edx 605 606 mov %ecx, %eax 607 and $-0x40, %ecx 608 sub %ecx, %eax 609 lea -14(%ecx), %ecx 610 sub %eax, %edx 611#ifdef USE_AS_STRNCPY 612 add %eax, %ebx 613#endif 614 movaps -2(%ecx), %xmm1 615 616L(Shl2LoopStart): 617 movaps 14(%ecx), %xmm2 618 movaps 30(%ecx), %xmm3 619 movaps %xmm3, %xmm6 620 movaps 46(%ecx), %xmm4 621 movaps %xmm4, %xmm7 622 movaps 62(%ecx), %xmm5 623 pminub %xmm2, %xmm6 624 pminub %xmm5, %xmm7 625 pminub %xmm6, %xmm7 626 pcmpeqb %xmm0, %xmm7 627 pmovmskb %xmm7, %eax 628 movaps %xmm5, %xmm7 629 palignr $2, %xmm4, %xmm5 630 palignr $2, %xmm3, %xmm4 631 test %eax, %eax 632 jnz L(Shl2Start) 633#ifdef USE_AS_STRNCPY 634 sub $64, %ebx 635 jbe L(StrncpyLeave2) 636#endif 637 palignr $2, %xmm2, %xmm3 638 lea 64(%ecx), %ecx 639 palignr $2, %xmm1, %xmm2 640 movaps %xmm7, %xmm1 641 movaps %xmm5, 48(%edx) 642 movaps %xmm4, 32(%edx) 643 movaps %xmm3, 16(%edx) 644 movaps %xmm2, (%edx) 645 lea 64(%edx), %edx 646 jmp L(Shl2LoopStart) 647 648L(Shl2LoopExit): 649 movlpd (%ecx), %xmm0 650 movlpd 6(%ecx), %xmm1 651 movlpd %xmm0, (%edx) 652 movlpd %xmm1, 6(%edx) 653 mov $14, %esi 654 jmp L(CopyFrom1To16Bytes) 655 656 .p2align 4 657L(Shl3): 658 movaps -3(%ecx), %xmm1 659 movaps 13(%ecx), %xmm2 660L(Shl3Start): 661 pcmpeqb %xmm2, %xmm0 662 pmovmskb %xmm0, %eax 663 movaps %xmm2, %xmm3 664#ifdef USE_AS_STRNCPY 665 sub $16, %ebx 666 jbe L(StrncpyExit3Case2OrCase3) 667#endif 668 test %eax, %eax 669 jnz L(Shl3LoopExit) 670 671 palignr $3, %xmm1, %xmm2 672 movaps %xmm3, %xmm1 673 movaps %xmm2, (%edx) 674 movaps 29(%ecx), %xmm2 675 676 pcmpeqb %xmm2, %xmm0 677 lea 16(%edx), %edx 678 pmovmskb %xmm0, %eax 679 lea 16(%ecx), %ecx 680 movaps %xmm2, %xmm3 681#ifdef USE_AS_STRNCPY 682 sub $16, %ebx 683 jbe L(StrncpyExit3Case2OrCase3) 684#endif 685 test %eax, %eax 686 jnz L(Shl3LoopExit) 687 688 palignr $3, %xmm1, %xmm2 689 movaps %xmm2, (%edx) 690 movaps 29(%ecx), %xmm2 691 movaps %xmm3, %xmm1 692 693 pcmpeqb %xmm2, %xmm0 694 lea 16(%edx), %edx 695 pmovmskb %xmm0, %eax 696 lea 16(%ecx), %ecx 697 movaps %xmm2, %xmm3 698#ifdef USE_AS_STRNCPY 699 sub $16, %ebx 700 jbe L(StrncpyExit3Case2OrCase3) 701#endif 702 test %eax, %eax 703 jnz L(Shl3LoopExit) 704 705 palignr $3, %xmm1, %xmm2 706 movaps %xmm2, (%edx) 707 movaps 29(%ecx), %xmm2 708 709 pcmpeqb %xmm2, %xmm0 710 lea 16(%edx), %edx 711 pmovmskb %xmm0, %eax 712 lea 16(%ecx), %ecx 713#ifdef USE_AS_STRNCPY 714 sub $16, %ebx 715 jbe L(StrncpyExit3Case2OrCase3) 716#endif 717 test %eax, %eax 718 jnz L(Shl3LoopExit) 719 720 palignr $3, %xmm3, %xmm2 721 movaps %xmm2, (%edx) 722 lea 29(%ecx), %ecx 723 lea 16(%edx), %edx 724 725 mov %ecx, %eax 726 and $-0x40, %ecx 727 sub %ecx, %eax 728 lea -13(%ecx), %ecx 729 sub %eax, %edx 730#ifdef USE_AS_STRNCPY 731 add %eax, %ebx 732#endif 733 movaps -3(%ecx), %xmm1 734 735L(Shl3LoopStart): 736 movaps 13(%ecx), %xmm2 737 movaps 29(%ecx), %xmm3 738 movaps %xmm3, %xmm6 739 movaps 45(%ecx), %xmm4 740 movaps %xmm4, %xmm7 741 movaps 61(%ecx), %xmm5 742 pminub %xmm2, %xmm6 743 pminub %xmm5, %xmm7 744 pminub %xmm6, %xmm7 745 pcmpeqb %xmm0, %xmm7 746 pmovmskb %xmm7, %eax 747 movaps %xmm5, %xmm7 748 palignr $3, %xmm4, %xmm5 749 palignr $3, %xmm3, %xmm4 750 test %eax, %eax 751 jnz L(Shl3Start) 752#ifdef USE_AS_STRNCPY 753 sub $64, %ebx 754 jbe L(StrncpyLeave3) 755#endif 756 palignr $3, %xmm2, %xmm3 757 lea 64(%ecx), %ecx 758 palignr $3, %xmm1, %xmm2 759 movaps %xmm7, %xmm1 760 movaps %xmm5, 48(%edx) 761 movaps %xmm4, 32(%edx) 762 movaps %xmm3, 16(%edx) 763 movaps %xmm2, (%edx) 764 lea 64(%edx), %edx 765 jmp L(Shl3LoopStart) 766 767L(Shl3LoopExit): 768 movlpd (%ecx), %xmm0 769 movlpd 5(%ecx), %xmm1 770 movlpd %xmm0, (%edx) 771 movlpd %xmm1, 5(%edx) 772 mov $13, %esi 773 jmp L(CopyFrom1To16Bytes) 774 775 .p2align 4 776L(Shl4): 777 movaps -4(%ecx), %xmm1 778 movaps 12(%ecx), %xmm2 779L(Shl4Start): 780 pcmpeqb %xmm2, %xmm0 781 pmovmskb %xmm0, %eax 782 movaps %xmm2, %xmm3 783#ifdef USE_AS_STRNCPY 784 sub $16, %ebx 785 jbe L(StrncpyExit4Case2OrCase3) 786#endif 787 test %eax, %eax 788 jnz L(Shl4LoopExit) 789 790 palignr $4, %xmm1, %xmm2 791 movaps %xmm3, %xmm1 792 movaps %xmm2, (%edx) 793 movaps 28(%ecx), %xmm2 794 795 pcmpeqb %xmm2, %xmm0 796 lea 16(%edx), %edx 797 pmovmskb %xmm0, %eax 798 lea 16(%ecx), %ecx 799 movaps %xmm2, %xmm3 800#ifdef USE_AS_STRNCPY 801 sub $16, %ebx 802 jbe L(StrncpyExit4Case2OrCase3) 803#endif 804 test %eax, %eax 805 jnz L(Shl4LoopExit) 806 807 palignr $4, %xmm1, %xmm2 808 movaps %xmm2, (%edx) 809 movaps 28(%ecx), %xmm2 810 movaps %xmm3, %xmm1 811 812 pcmpeqb %xmm2, %xmm0 813 lea 16(%edx), %edx 814 pmovmskb %xmm0, %eax 815 lea 16(%ecx), %ecx 816 movaps %xmm2, %xmm3 817#ifdef USE_AS_STRNCPY 818 sub $16, %ebx 819 jbe L(StrncpyExit4Case2OrCase3) 820#endif 821 test %eax, %eax 822 jnz L(Shl4LoopExit) 823 824 palignr $4, %xmm1, %xmm2 825 movaps %xmm2, (%edx) 826 movaps 28(%ecx), %xmm2 827 828 pcmpeqb %xmm2, %xmm0 829 lea 16(%edx), %edx 830 pmovmskb %xmm0, %eax 831 lea 16(%ecx), %ecx 832#ifdef USE_AS_STRNCPY 833 sub $16, %ebx 834 jbe L(StrncpyExit4Case2OrCase3) 835#endif 836 test %eax, %eax 837 jnz L(Shl4LoopExit) 838 839 palignr $4, %xmm3, %xmm2 840 movaps %xmm2, (%edx) 841 lea 28(%ecx), %ecx 842 lea 16(%edx), %edx 843 844 mov %ecx, %eax 845 and $-0x40, %ecx 846 sub %ecx, %eax 847 lea -12(%ecx), %ecx 848 sub %eax, %edx 849#ifdef USE_AS_STRNCPY 850 add %eax, %ebx 851#endif 852 movaps -4(%ecx), %xmm1 853 854L(Shl4LoopStart): 855 movaps 12(%ecx), %xmm2 856 movaps 28(%ecx), %xmm3 857 movaps %xmm3, %xmm6 858 movaps 44(%ecx), %xmm4 859 movaps %xmm4, %xmm7 860 movaps 60(%ecx), %xmm5 861 pminub %xmm2, %xmm6 862 pminub %xmm5, %xmm7 863 pminub %xmm6, %xmm7 864 pcmpeqb %xmm0, %xmm7 865 pmovmskb %xmm7, %eax 866 movaps %xmm5, %xmm7 867 palignr $4, %xmm4, %xmm5 868 palignr $4, %xmm3, %xmm4 869 test %eax, %eax 870 jnz L(Shl4Start) 871#ifdef USE_AS_STRNCPY 872 sub $64, %ebx 873 jbe L(StrncpyLeave4) 874#endif 875 palignr $4, %xmm2, %xmm3 876 lea 64(%ecx), %ecx 877 palignr $4, %xmm1, %xmm2 878 movaps %xmm7, %xmm1 879 movaps %xmm5, 48(%edx) 880 movaps %xmm4, 32(%edx) 881 movaps %xmm3, 16(%edx) 882 movaps %xmm2, (%edx) 883 lea 64(%edx), %edx 884 jmp L(Shl4LoopStart) 885 886L(Shl4LoopExit): 887 movlpd (%ecx), %xmm0 888 movl 8(%ecx), %esi 889 movlpd %xmm0, (%edx) 890 movl %esi, 8(%edx) 891 mov $12, %esi 892 jmp L(CopyFrom1To16Bytes) 893 894 .p2align 4 895L(Shl5): 896 movaps -5(%ecx), %xmm1 897 movaps 11(%ecx), %xmm2 898L(Shl5Start): 899 pcmpeqb %xmm2, %xmm0 900 pmovmskb %xmm0, %eax 901 movaps %xmm2, %xmm3 902#ifdef USE_AS_STRNCPY 903 sub $16, %ebx 904 jbe L(StrncpyExit5Case2OrCase3) 905#endif 906 test %eax, %eax 907 jnz L(Shl5LoopExit) 908 909 palignr $5, %xmm1, %xmm2 910 movaps %xmm3, %xmm1 911 movaps %xmm2, (%edx) 912 movaps 27(%ecx), %xmm2 913 914 pcmpeqb %xmm2, %xmm0 915 lea 16(%edx), %edx 916 pmovmskb %xmm0, %eax 917 lea 16(%ecx), %ecx 918 movaps %xmm2, %xmm3 919#ifdef USE_AS_STRNCPY 920 sub $16, %ebx 921 jbe L(StrncpyExit5Case2OrCase3) 922#endif 923 test %eax, %eax 924 jnz L(Shl5LoopExit) 925 926 palignr $5, %xmm1, %xmm2 927 movaps %xmm2, (%edx) 928 movaps 27(%ecx), %xmm2 929 movaps %xmm3, %xmm1 930 931 pcmpeqb %xmm2, %xmm0 932 lea 16(%edx), %edx 933 pmovmskb %xmm0, %eax 934 lea 16(%ecx), %ecx 935 movaps %xmm2, %xmm3 936#ifdef USE_AS_STRNCPY 937 sub $16, %ebx 938 jbe L(StrncpyExit5Case2OrCase3) 939#endif 940 test %eax, %eax 941 jnz L(Shl5LoopExit) 942 943 palignr $5, %xmm1, %xmm2 944 movaps %xmm2, (%edx) 945 movaps 27(%ecx), %xmm2 946 947 pcmpeqb %xmm2, %xmm0 948 lea 16(%edx), %edx 949 pmovmskb %xmm0, %eax 950 lea 16(%ecx), %ecx 951#ifdef USE_AS_STRNCPY 952 sub $16, %ebx 953 jbe L(StrncpyExit5Case2OrCase3) 954#endif 955 test %eax, %eax 956 jnz L(Shl5LoopExit) 957 958 palignr $5, %xmm3, %xmm2 959 movaps %xmm2, (%edx) 960 lea 27(%ecx), %ecx 961 lea 16(%edx), %edx 962 963 mov %ecx, %eax 964 and $-0x40, %ecx 965 sub %ecx, %eax 966 lea -11(%ecx), %ecx 967 sub %eax, %edx 968#ifdef USE_AS_STRNCPY 969 add %eax, %ebx 970#endif 971 movaps -5(%ecx), %xmm1 972 973L(Shl5LoopStart): 974 movaps 11(%ecx), %xmm2 975 movaps 27(%ecx), %xmm3 976 movaps %xmm3, %xmm6 977 movaps 43(%ecx), %xmm4 978 movaps %xmm4, %xmm7 979 movaps 59(%ecx), %xmm5 980 pminub %xmm2, %xmm6 981 pminub %xmm5, %xmm7 982 pminub %xmm6, %xmm7 983 pcmpeqb %xmm0, %xmm7 984 pmovmskb %xmm7, %eax 985 movaps %xmm5, %xmm7 986 palignr $5, %xmm4, %xmm5 987 palignr $5, %xmm3, %xmm4 988 test %eax, %eax 989 jnz L(Shl5Start) 990#ifdef USE_AS_STRNCPY 991 sub $64, %ebx 992 jbe L(StrncpyLeave5) 993#endif 994 palignr $5, %xmm2, %xmm3 995 lea 64(%ecx), %ecx 996 palignr $5, %xmm1, %xmm2 997 movaps %xmm7, %xmm1 998 movaps %xmm5, 48(%edx) 999 movaps %xmm4, 32(%edx) 1000 movaps %xmm3, 16(%edx) 1001 movaps %xmm2, (%edx) 1002 lea 64(%edx), %edx 1003 jmp L(Shl5LoopStart) 1004 1005L(Shl5LoopExit): 1006 movlpd (%ecx), %xmm0 1007 movl 7(%ecx), %esi 1008 movlpd %xmm0, (%edx) 1009 movl %esi, 7(%edx) 1010 mov $11, %esi 1011 jmp L(CopyFrom1To16Bytes) 1012 1013 .p2align 4 1014L(Shl6): 1015 movaps -6(%ecx), %xmm1 1016 movaps 10(%ecx), %xmm2 1017L(Shl6Start): 1018 pcmpeqb %xmm2, %xmm0 1019 pmovmskb %xmm0, %eax 1020 movaps %xmm2, %xmm3 1021#ifdef USE_AS_STRNCPY 1022 sub $16, %ebx 1023 jbe L(StrncpyExit6Case2OrCase3) 1024#endif 1025 test %eax, %eax 1026 jnz L(Shl6LoopExit) 1027 1028 palignr $6, %xmm1, %xmm2 1029 movaps %xmm3, %xmm1 1030 movaps %xmm2, (%edx) 1031 movaps 26(%ecx), %xmm2 1032 1033 pcmpeqb %xmm2, %xmm0 1034 lea 16(%edx), %edx 1035 pmovmskb %xmm0, %eax 1036 lea 16(%ecx), %ecx 1037 movaps %xmm2, %xmm3 1038#ifdef USE_AS_STRNCPY 1039 sub $16, %ebx 1040 jbe L(StrncpyExit6Case2OrCase3) 1041#endif 1042 test %eax, %eax 1043 jnz L(Shl6LoopExit) 1044 1045 palignr $6, %xmm1, %xmm2 1046 movaps %xmm2, (%edx) 1047 movaps 26(%ecx), %xmm2 1048 movaps %xmm3, %xmm1 1049 1050 pcmpeqb %xmm2, %xmm0 1051 lea 16(%edx), %edx 1052 pmovmskb %xmm0, %eax 1053 lea 16(%ecx), %ecx 1054 movaps %xmm2, %xmm3 1055#ifdef USE_AS_STRNCPY 1056 sub $16, %ebx 1057 jbe L(StrncpyExit6Case2OrCase3) 1058#endif 1059 test %eax, %eax 1060 jnz L(Shl6LoopExit) 1061 1062 palignr $6, %xmm1, %xmm2 1063 movaps %xmm2, (%edx) 1064 movaps 26(%ecx), %xmm2 1065 1066 pcmpeqb %xmm2, %xmm0 1067 lea 16(%edx), %edx 1068 pmovmskb %xmm0, %eax 1069 lea 16(%ecx), %ecx 1070#ifdef USE_AS_STRNCPY 1071 sub $16, %ebx 1072 jbe L(StrncpyExit6Case2OrCase3) 1073#endif 1074 test %eax, %eax 1075 jnz L(Shl6LoopExit) 1076 1077 palignr $6, %xmm3, %xmm2 1078 movaps %xmm2, (%edx) 1079 lea 26(%ecx), %ecx 1080 lea 16(%edx), %edx 1081 1082 mov %ecx, %eax 1083 and $-0x40, %ecx 1084 sub %ecx, %eax 1085 lea -10(%ecx), %ecx 1086 sub %eax, %edx 1087#ifdef USE_AS_STRNCPY 1088 add %eax, %ebx 1089#endif 1090 movaps -6(%ecx), %xmm1 1091 1092L(Shl6LoopStart): 1093 movaps 10(%ecx), %xmm2 1094 movaps 26(%ecx), %xmm3 1095 movaps %xmm3, %xmm6 1096 movaps 42(%ecx), %xmm4 1097 movaps %xmm4, %xmm7 1098 movaps 58(%ecx), %xmm5 1099 pminub %xmm2, %xmm6 1100 pminub %xmm5, %xmm7 1101 pminub %xmm6, %xmm7 1102 pcmpeqb %xmm0, %xmm7 1103 pmovmskb %xmm7, %eax 1104 movaps %xmm5, %xmm7 1105 palignr $6, %xmm4, %xmm5 1106 palignr $6, %xmm3, %xmm4 1107 test %eax, %eax 1108 jnz L(Shl6Start) 1109#ifdef USE_AS_STRNCPY 1110 sub $64, %ebx 1111 jbe L(StrncpyLeave6) 1112#endif 1113 palignr $6, %xmm2, %xmm3 1114 lea 64(%ecx), %ecx 1115 palignr $6, %xmm1, %xmm2 1116 movaps %xmm7, %xmm1 1117 movaps %xmm5, 48(%edx) 1118 movaps %xmm4, 32(%edx) 1119 movaps %xmm3, 16(%edx) 1120 movaps %xmm2, (%edx) 1121 lea 64(%edx), %edx 1122 jmp L(Shl6LoopStart) 1123 1124L(Shl6LoopExit): 1125 movlpd (%ecx), %xmm0 1126 movl 6(%ecx), %esi 1127 movlpd %xmm0, (%edx) 1128 movl %esi, 6(%edx) 1129 mov $10, %esi 1130 jmp L(CopyFrom1To16Bytes) 1131 1132 .p2align 4 1133L(Shl7): 1134 movaps -7(%ecx), %xmm1 1135 movaps 9(%ecx), %xmm2 1136L(Shl7Start): 1137 pcmpeqb %xmm2, %xmm0 1138 pmovmskb %xmm0, %eax 1139 movaps %xmm2, %xmm3 1140#ifdef USE_AS_STRNCPY 1141 sub $16, %ebx 1142 jbe L(StrncpyExit7Case2OrCase3) 1143#endif 1144 test %eax, %eax 1145 jnz L(Shl7LoopExit) 1146 1147 palignr $7, %xmm1, %xmm2 1148 movaps %xmm3, %xmm1 1149 movaps %xmm2, (%edx) 1150 movaps 25(%ecx), %xmm2 1151 1152 pcmpeqb %xmm2, %xmm0 1153 lea 16(%edx), %edx 1154 pmovmskb %xmm0, %eax 1155 lea 16(%ecx), %ecx 1156 movaps %xmm2, %xmm3 1157#ifdef USE_AS_STRNCPY 1158 sub $16, %ebx 1159 jbe L(StrncpyExit7Case2OrCase3) 1160#endif 1161 test %eax, %eax 1162 jnz L(Shl7LoopExit) 1163 1164 palignr $7, %xmm1, %xmm2 1165 movaps %xmm2, (%edx) 1166 movaps 25(%ecx), %xmm2 1167 movaps %xmm3, %xmm1 1168 1169 pcmpeqb %xmm2, %xmm0 1170 lea 16(%edx), %edx 1171 pmovmskb %xmm0, %eax 1172 lea 16(%ecx), %ecx 1173 movaps %xmm2, %xmm3 1174#ifdef USE_AS_STRNCPY 1175 sub $16, %ebx 1176 jbe L(StrncpyExit7Case2OrCase3) 1177#endif 1178 test %eax, %eax 1179 jnz L(Shl7LoopExit) 1180 1181 palignr $7, %xmm1, %xmm2 1182 movaps %xmm2, (%edx) 1183 movaps 25(%ecx), %xmm2 1184 1185 pcmpeqb %xmm2, %xmm0 1186 lea 16(%edx), %edx 1187 pmovmskb %xmm0, %eax 1188 lea 16(%ecx), %ecx 1189#ifdef USE_AS_STRNCPY 1190 sub $16, %ebx 1191 jbe L(StrncpyExit7Case2OrCase3) 1192#endif 1193 test %eax, %eax 1194 jnz L(Shl7LoopExit) 1195 1196 palignr $7, %xmm3, %xmm2 1197 movaps %xmm2, (%edx) 1198 lea 25(%ecx), %ecx 1199 lea 16(%edx), %edx 1200 1201 mov %ecx, %eax 1202 and $-0x40, %ecx 1203 sub %ecx, %eax 1204 lea -9(%ecx), %ecx 1205 sub %eax, %edx 1206#ifdef USE_AS_STRNCPY 1207 add %eax, %ebx 1208#endif 1209 movaps -7(%ecx), %xmm1 1210 1211L(Shl7LoopStart): 1212 movaps 9(%ecx), %xmm2 1213 movaps 25(%ecx), %xmm3 1214 movaps %xmm3, %xmm6 1215 movaps 41(%ecx), %xmm4 1216 movaps %xmm4, %xmm7 1217 movaps 57(%ecx), %xmm5 1218 pminub %xmm2, %xmm6 1219 pminub %xmm5, %xmm7 1220 pminub %xmm6, %xmm7 1221 pcmpeqb %xmm0, %xmm7 1222 pmovmskb %xmm7, %eax 1223 movaps %xmm5, %xmm7 1224 palignr $7, %xmm4, %xmm5 1225 palignr $7, %xmm3, %xmm4 1226 test %eax, %eax 1227 jnz L(Shl7Start) 1228#ifdef USE_AS_STRNCPY 1229 sub $64, %ebx 1230 jbe L(StrncpyLeave7) 1231#endif 1232 palignr $7, %xmm2, %xmm3 1233 lea 64(%ecx), %ecx 1234 palignr $7, %xmm1, %xmm2 1235 movaps %xmm7, %xmm1 1236 movaps %xmm5, 48(%edx) 1237 movaps %xmm4, 32(%edx) 1238 movaps %xmm3, 16(%edx) 1239 movaps %xmm2, (%edx) 1240 lea 64(%edx), %edx 1241 jmp L(Shl7LoopStart) 1242 1243L(Shl7LoopExit): 1244 movlpd (%ecx), %xmm0 1245 movl 5(%ecx), %esi 1246 movlpd %xmm0, (%edx) 1247 movl %esi, 5(%edx) 1248 mov $9, %esi 1249 jmp L(CopyFrom1To16Bytes) 1250 1251 .p2align 4 1252L(Shl8): 1253 movaps -8(%ecx), %xmm1 1254 movaps 8(%ecx), %xmm2 1255L(Shl8Start): 1256 pcmpeqb %xmm2, %xmm0 1257 pmovmskb %xmm0, %eax 1258 movaps %xmm2, %xmm3 1259#ifdef USE_AS_STRNCPY 1260 sub $16, %ebx 1261 jbe L(StrncpyExit8Case2OrCase3) 1262#endif 1263 test %eax, %eax 1264 jnz L(Shl8LoopExit) 1265 1266 palignr $8, %xmm1, %xmm2 1267 movaps %xmm3, %xmm1 1268 movaps %xmm2, (%edx) 1269 movaps 24(%ecx), %xmm2 1270 1271 pcmpeqb %xmm2, %xmm0 1272 lea 16(%edx), %edx 1273 pmovmskb %xmm0, %eax 1274 lea 16(%ecx), %ecx 1275 movaps %xmm2, %xmm3 1276#ifdef USE_AS_STRNCPY 1277 sub $16, %ebx 1278 jbe L(StrncpyExit8Case2OrCase3) 1279#endif 1280 test %eax, %eax 1281 jnz L(Shl8LoopExit) 1282 1283 palignr $8, %xmm1, %xmm2 1284 movaps %xmm2, (%edx) 1285 movaps 24(%ecx), %xmm2 1286 movaps %xmm3, %xmm1 1287 1288 pcmpeqb %xmm2, %xmm0 1289 lea 16(%edx), %edx 1290 pmovmskb %xmm0, %eax 1291 lea 16(%ecx), %ecx 1292 movaps %xmm2, %xmm3 1293#ifdef USE_AS_STRNCPY 1294 sub $16, %ebx 1295 jbe L(StrncpyExit8Case2OrCase3) 1296#endif 1297 test %eax, %eax 1298 jnz L(Shl8LoopExit) 1299 1300 palignr $8, %xmm1, %xmm2 1301 movaps %xmm2, (%edx) 1302 movaps 24(%ecx), %xmm2 1303 1304 pcmpeqb %xmm2, %xmm0 1305 lea 16(%edx), %edx 1306 pmovmskb %xmm0, %eax 1307 lea 16(%ecx), %ecx 1308#ifdef USE_AS_STRNCPY 1309 sub $16, %ebx 1310 jbe L(StrncpyExit8Case2OrCase3) 1311#endif 1312 test %eax, %eax 1313 jnz L(Shl8LoopExit) 1314 1315 palignr $8, %xmm3, %xmm2 1316 movaps %xmm2, (%edx) 1317 lea 24(%ecx), %ecx 1318 lea 16(%edx), %edx 1319 1320 mov %ecx, %eax 1321 and $-0x40, %ecx 1322 sub %ecx, %eax 1323 lea -8(%ecx), %ecx 1324 sub %eax, %edx 1325#ifdef USE_AS_STRNCPY 1326 add %eax, %ebx 1327#endif 1328 movaps -8(%ecx), %xmm1 1329 1330L(Shl8LoopStart): 1331 movaps 8(%ecx), %xmm2 1332 movaps 24(%ecx), %xmm3 1333 movaps %xmm3, %xmm6 1334 movaps 40(%ecx), %xmm4 1335 movaps %xmm4, %xmm7 1336 movaps 56(%ecx), %xmm5 1337 pminub %xmm2, %xmm6 1338 pminub %xmm5, %xmm7 1339 pminub %xmm6, %xmm7 1340 pcmpeqb %xmm0, %xmm7 1341 pmovmskb %xmm7, %eax 1342 movaps %xmm5, %xmm7 1343 palignr $8, %xmm4, %xmm5 1344 palignr $8, %xmm3, %xmm4 1345 test %eax, %eax 1346 jnz L(Shl8Start) 1347#ifdef USE_AS_STRNCPY 1348 sub $64, %ebx 1349 jbe L(StrncpyLeave8) 1350#endif 1351 palignr $8, %xmm2, %xmm3 1352 lea 64(%ecx), %ecx 1353 palignr $8, %xmm1, %xmm2 1354 movaps %xmm7, %xmm1 1355 movaps %xmm5, 48(%edx) 1356 movaps %xmm4, 32(%edx) 1357 movaps %xmm3, 16(%edx) 1358 movaps %xmm2, (%edx) 1359 lea 64(%edx), %edx 1360 jmp L(Shl8LoopStart) 1361 1362L(Shl8LoopExit): 1363 movlpd (%ecx), %xmm0 1364 movlpd %xmm0, (%edx) 1365 mov $8, %esi 1366 jmp L(CopyFrom1To16Bytes) 1367 1368 .p2align 4 1369L(Shl9): 1370 movaps -9(%ecx), %xmm1 1371 movaps 7(%ecx), %xmm2 1372L(Shl9Start): 1373 pcmpeqb %xmm2, %xmm0 1374 pmovmskb %xmm0, %eax 1375 movaps %xmm2, %xmm3 1376#ifdef USE_AS_STRNCPY 1377 sub $16, %ebx 1378 jbe L(StrncpyExit9Case2OrCase3) 1379#endif 1380 test %eax, %eax 1381 jnz L(Shl9LoopExit) 1382 1383 palignr $9, %xmm1, %xmm2 1384 movaps %xmm3, %xmm1 1385 movaps %xmm2, (%edx) 1386 movaps 23(%ecx), %xmm2 1387 1388 pcmpeqb %xmm2, %xmm0 1389 lea 16(%edx), %edx 1390 pmovmskb %xmm0, %eax 1391 lea 16(%ecx), %ecx 1392 movaps %xmm2, %xmm3 1393#ifdef USE_AS_STRNCPY 1394 sub $16, %ebx 1395 jbe L(StrncpyExit9Case2OrCase3) 1396#endif 1397 test %eax, %eax 1398 jnz L(Shl9LoopExit) 1399 1400 palignr $9, %xmm1, %xmm2 1401 movaps %xmm2, (%edx) 1402 movaps 23(%ecx), %xmm2 1403 movaps %xmm3, %xmm1 1404 1405 pcmpeqb %xmm2, %xmm0 1406 lea 16(%edx), %edx 1407 pmovmskb %xmm0, %eax 1408 lea 16(%ecx), %ecx 1409 movaps %xmm2, %xmm3 1410#ifdef USE_AS_STRNCPY 1411 sub $16, %ebx 1412 jbe L(StrncpyExit9Case2OrCase3) 1413#endif 1414 test %eax, %eax 1415 jnz L(Shl9LoopExit) 1416 1417 palignr $9, %xmm1, %xmm2 1418 movaps %xmm2, (%edx) 1419 movaps 23(%ecx), %xmm2 1420 1421 pcmpeqb %xmm2, %xmm0 1422 lea 16(%edx), %edx 1423 pmovmskb %xmm0, %eax 1424 lea 16(%ecx), %ecx 1425#ifdef USE_AS_STRNCPY 1426 sub $16, %ebx 1427 jbe L(StrncpyExit9Case2OrCase3) 1428#endif 1429 test %eax, %eax 1430 jnz L(Shl9LoopExit) 1431 1432 palignr $9, %xmm3, %xmm2 1433 movaps %xmm2, (%edx) 1434 lea 23(%ecx), %ecx 1435 lea 16(%edx), %edx 1436 1437 mov %ecx, %eax 1438 and $-0x40, %ecx 1439 sub %ecx, %eax 1440 lea -7(%ecx), %ecx 1441 sub %eax, %edx 1442#ifdef USE_AS_STRNCPY 1443 add %eax, %ebx 1444#endif 1445 movaps -9(%ecx), %xmm1 1446 1447L(Shl9LoopStart): 1448 movaps 7(%ecx), %xmm2 1449 movaps 23(%ecx), %xmm3 1450 movaps %xmm3, %xmm6 1451 movaps 39(%ecx), %xmm4 1452 movaps %xmm4, %xmm7 1453 movaps 55(%ecx), %xmm5 1454 pminub %xmm2, %xmm6 1455 pminub %xmm5, %xmm7 1456 pminub %xmm6, %xmm7 1457 pcmpeqb %xmm0, %xmm7 1458 pmovmskb %xmm7, %eax 1459 movaps %xmm5, %xmm7 1460 palignr $9, %xmm4, %xmm5 1461 palignr $9, %xmm3, %xmm4 1462 test %eax, %eax 1463 jnz L(Shl9Start) 1464#ifdef USE_AS_STRNCPY 1465 sub $64, %ebx 1466 jbe L(StrncpyLeave9) 1467#endif 1468 palignr $9, %xmm2, %xmm3 1469 lea 64(%ecx), %ecx 1470 palignr $9, %xmm1, %xmm2 1471 movaps %xmm7, %xmm1 1472 movaps %xmm5, 48(%edx) 1473 movaps %xmm4, 32(%edx) 1474 movaps %xmm3, 16(%edx) 1475 movaps %xmm2, (%edx) 1476 lea 64(%edx), %edx 1477 jmp L(Shl9LoopStart) 1478 1479L(Shl9LoopExit): 1480 movlpd -1(%ecx), %xmm0 1481 movlpd %xmm0, -1(%edx) 1482 mov $7, %esi 1483 jmp L(CopyFrom1To16Bytes) 1484 1485 .p2align 4 1486L(Shl10): 1487 movaps -10(%ecx), %xmm1 1488 movaps 6(%ecx), %xmm2 1489L(Shl10Start): 1490 pcmpeqb %xmm2, %xmm0 1491 pmovmskb %xmm0, %eax 1492 movaps %xmm2, %xmm3 1493#ifdef USE_AS_STRNCPY 1494 sub $16, %ebx 1495 jbe L(StrncpyExit10Case2OrCase3) 1496#endif 1497 test %eax, %eax 1498 jnz L(Shl10LoopExit) 1499 1500 palignr $10, %xmm1, %xmm2 1501 movaps %xmm3, %xmm1 1502 movaps %xmm2, (%edx) 1503 movaps 22(%ecx), %xmm2 1504 1505 pcmpeqb %xmm2, %xmm0 1506 lea 16(%edx), %edx 1507 pmovmskb %xmm0, %eax 1508 lea 16(%ecx), %ecx 1509 movaps %xmm2, %xmm3 1510#ifdef USE_AS_STRNCPY 1511 sub $16, %ebx 1512 jbe L(StrncpyExit10Case2OrCase3) 1513#endif 1514 test %eax, %eax 1515 jnz L(Shl10LoopExit) 1516 1517 palignr $10, %xmm1, %xmm2 1518 movaps %xmm2, (%edx) 1519 movaps 22(%ecx), %xmm2 1520 movaps %xmm3, %xmm1 1521 1522 pcmpeqb %xmm2, %xmm0 1523 lea 16(%edx), %edx 1524 pmovmskb %xmm0, %eax 1525 lea 16(%ecx), %ecx 1526 movaps %xmm2, %xmm3 1527#ifdef USE_AS_STRNCPY 1528 sub $16, %ebx 1529 jbe L(StrncpyExit10Case2OrCase3) 1530#endif 1531 test %eax, %eax 1532 jnz L(Shl10LoopExit) 1533 1534 palignr $10, %xmm1, %xmm2 1535 movaps %xmm2, (%edx) 1536 movaps 22(%ecx), %xmm2 1537 1538 pcmpeqb %xmm2, %xmm0 1539 lea 16(%edx), %edx 1540 pmovmskb %xmm0, %eax 1541 lea 16(%ecx), %ecx 1542#ifdef USE_AS_STRNCPY 1543 sub $16, %ebx 1544 jbe L(StrncpyExit10Case2OrCase3) 1545#endif 1546 test %eax, %eax 1547 jnz L(Shl10LoopExit) 1548 1549 palignr $10, %xmm3, %xmm2 1550 movaps %xmm2, (%edx) 1551 lea 22(%ecx), %ecx 1552 lea 16(%edx), %edx 1553 1554 mov %ecx, %eax 1555 and $-0x40, %ecx 1556 sub %ecx, %eax 1557 lea -6(%ecx), %ecx 1558 sub %eax, %edx 1559#ifdef USE_AS_STRNCPY 1560 add %eax, %ebx 1561#endif 1562 movaps -10(%ecx), %xmm1 1563 1564L(Shl10LoopStart): 1565 movaps 6(%ecx), %xmm2 1566 movaps 22(%ecx), %xmm3 1567 movaps %xmm3, %xmm6 1568 movaps 38(%ecx), %xmm4 1569 movaps %xmm4, %xmm7 1570 movaps 54(%ecx), %xmm5 1571 pminub %xmm2, %xmm6 1572 pminub %xmm5, %xmm7 1573 pminub %xmm6, %xmm7 1574 pcmpeqb %xmm0, %xmm7 1575 pmovmskb %xmm7, %eax 1576 movaps %xmm5, %xmm7 1577 palignr $10, %xmm4, %xmm5 1578 palignr $10, %xmm3, %xmm4 1579 test %eax, %eax 1580 jnz L(Shl10Start) 1581#ifdef USE_AS_STRNCPY 1582 sub $64, %ebx 1583 jbe L(StrncpyLeave10) 1584#endif 1585 palignr $10, %xmm2, %xmm3 1586 lea 64(%ecx), %ecx 1587 palignr $10, %xmm1, %xmm2 1588 movaps %xmm7, %xmm1 1589 movaps %xmm5, 48(%edx) 1590 movaps %xmm4, 32(%edx) 1591 movaps %xmm3, 16(%edx) 1592 movaps %xmm2, (%edx) 1593 lea 64(%edx), %edx 1594 jmp L(Shl10LoopStart) 1595 1596L(Shl10LoopExit): 1597 movlpd -2(%ecx), %xmm0 1598 movlpd %xmm0, -2(%edx) 1599 mov $6, %esi 1600 jmp L(CopyFrom1To16Bytes) 1601 1602 .p2align 4 1603L(Shl11): 1604 movaps -11(%ecx), %xmm1 1605 movaps 5(%ecx), %xmm2 1606L(Shl11Start): 1607 pcmpeqb %xmm2, %xmm0 1608 pmovmskb %xmm0, %eax 1609 movaps %xmm2, %xmm3 1610#ifdef USE_AS_STRNCPY 1611 sub $16, %ebx 1612 jbe L(StrncpyExit11Case2OrCase3) 1613#endif 1614 test %eax, %eax 1615 jnz L(Shl11LoopExit) 1616 1617 palignr $11, %xmm1, %xmm2 1618 movaps %xmm3, %xmm1 1619 movaps %xmm2, (%edx) 1620 movaps 21(%ecx), %xmm2 1621 1622 pcmpeqb %xmm2, %xmm0 1623 lea 16(%edx), %edx 1624 pmovmskb %xmm0, %eax 1625 lea 16(%ecx), %ecx 1626 movaps %xmm2, %xmm3 1627#ifdef USE_AS_STRNCPY 1628 sub $16, %ebx 1629 jbe L(StrncpyExit11Case2OrCase3) 1630#endif 1631 test %eax, %eax 1632 jnz L(Shl11LoopExit) 1633 1634 palignr $11, %xmm1, %xmm2 1635 movaps %xmm2, (%edx) 1636 movaps 21(%ecx), %xmm2 1637 movaps %xmm3, %xmm1 1638 1639 pcmpeqb %xmm2, %xmm0 1640 lea 16(%edx), %edx 1641 pmovmskb %xmm0, %eax 1642 lea 16(%ecx), %ecx 1643 movaps %xmm2, %xmm3 1644#ifdef USE_AS_STRNCPY 1645 sub $16, %ebx 1646 jbe L(StrncpyExit11Case2OrCase3) 1647#endif 1648 test %eax, %eax 1649 jnz L(Shl11LoopExit) 1650 1651 palignr $11, %xmm1, %xmm2 1652 movaps %xmm2, (%edx) 1653 movaps 21(%ecx), %xmm2 1654 1655 pcmpeqb %xmm2, %xmm0 1656 lea 16(%edx), %edx 1657 pmovmskb %xmm0, %eax 1658 lea 16(%ecx), %ecx 1659#ifdef USE_AS_STRNCPY 1660 sub $16, %ebx 1661 jbe L(StrncpyExit11Case2OrCase3) 1662#endif 1663 test %eax, %eax 1664 jnz L(Shl11LoopExit) 1665 1666 palignr $11, %xmm3, %xmm2 1667 movaps %xmm2, (%edx) 1668 lea 21(%ecx), %ecx 1669 lea 16(%edx), %edx 1670 1671 mov %ecx, %eax 1672 and $-0x40, %ecx 1673 sub %ecx, %eax 1674 lea -5(%ecx), %ecx 1675 sub %eax, %edx 1676#ifdef USE_AS_STRNCPY 1677 add %eax, %ebx 1678#endif 1679 movaps -11(%ecx), %xmm1 1680 1681L(Shl11LoopStart): 1682 movaps 5(%ecx), %xmm2 1683 movaps 21(%ecx), %xmm3 1684 movaps %xmm3, %xmm6 1685 movaps 37(%ecx), %xmm4 1686 movaps %xmm4, %xmm7 1687 movaps 53(%ecx), %xmm5 1688 pminub %xmm2, %xmm6 1689 pminub %xmm5, %xmm7 1690 pminub %xmm6, %xmm7 1691 pcmpeqb %xmm0, %xmm7 1692 pmovmskb %xmm7, %eax 1693 movaps %xmm5, %xmm7 1694 palignr $11, %xmm4, %xmm5 1695 palignr $11, %xmm3, %xmm4 1696 test %eax, %eax 1697 jnz L(Shl11Start) 1698#ifdef USE_AS_STRNCPY 1699 sub $64, %ebx 1700 jbe L(StrncpyLeave11) 1701#endif 1702 palignr $11, %xmm2, %xmm3 1703 lea 64(%ecx), %ecx 1704 palignr $11, %xmm1, %xmm2 1705 movaps %xmm7, %xmm1 1706 movaps %xmm5, 48(%edx) 1707 movaps %xmm4, 32(%edx) 1708 movaps %xmm3, 16(%edx) 1709 movaps %xmm2, (%edx) 1710 lea 64(%edx), %edx 1711 jmp L(Shl11LoopStart) 1712 1713L(Shl11LoopExit): 1714 movlpd -3(%ecx), %xmm0 1715 movlpd %xmm0, -3(%edx) 1716 mov $5, %esi 1717 jmp L(CopyFrom1To16Bytes) 1718 1719 .p2align 4 1720L(Shl12): 1721 movaps -12(%ecx), %xmm1 1722 movaps 4(%ecx), %xmm2 1723L(Shl12Start): 1724 pcmpeqb %xmm2, %xmm0 1725 pmovmskb %xmm0, %eax 1726 movaps %xmm2, %xmm3 1727#ifdef USE_AS_STRNCPY 1728 sub $16, %ebx 1729 jbe L(StrncpyExit12Case2OrCase3) 1730#endif 1731 test %eax, %eax 1732 jnz L(Shl12LoopExit) 1733 1734 palignr $12, %xmm1, %xmm2 1735 movaps %xmm3, %xmm1 1736 movaps %xmm2, (%edx) 1737 movaps 20(%ecx), %xmm2 1738 1739 pcmpeqb %xmm2, %xmm0 1740 lea 16(%edx), %edx 1741 pmovmskb %xmm0, %eax 1742 lea 16(%ecx), %ecx 1743 movaps %xmm2, %xmm3 1744#ifdef USE_AS_STRNCPY 1745 sub $16, %ebx 1746 jbe L(StrncpyExit12Case2OrCase3) 1747#endif 1748 test %eax, %eax 1749 jnz L(Shl12LoopExit) 1750 1751 palignr $12, %xmm1, %xmm2 1752 movaps %xmm2, (%edx) 1753 movaps 20(%ecx), %xmm2 1754 movaps %xmm3, %xmm1 1755 1756 pcmpeqb %xmm2, %xmm0 1757 lea 16(%edx), %edx 1758 pmovmskb %xmm0, %eax 1759 lea 16(%ecx), %ecx 1760 movaps %xmm2, %xmm3 1761#ifdef USE_AS_STRNCPY 1762 sub $16, %ebx 1763 jbe L(StrncpyExit12Case2OrCase3) 1764#endif 1765 test %eax, %eax 1766 jnz L(Shl12LoopExit) 1767 1768 palignr $12, %xmm1, %xmm2 1769 movaps %xmm2, (%edx) 1770 movaps 20(%ecx), %xmm2 1771 1772 pcmpeqb %xmm2, %xmm0 1773 lea 16(%edx), %edx 1774 pmovmskb %xmm0, %eax 1775 lea 16(%ecx), %ecx 1776#ifdef USE_AS_STRNCPY 1777 sub $16, %ebx 1778 jbe L(StrncpyExit12Case2OrCase3) 1779#endif 1780 test %eax, %eax 1781 jnz L(Shl12LoopExit) 1782 1783 palignr $12, %xmm3, %xmm2 1784 movaps %xmm2, (%edx) 1785 lea 20(%ecx), %ecx 1786 lea 16(%edx), %edx 1787 1788 mov %ecx, %eax 1789 and $-0x40, %ecx 1790 sub %ecx, %eax 1791 lea -4(%ecx), %ecx 1792 sub %eax, %edx 1793#ifdef USE_AS_STRNCPY 1794 add %eax, %ebx 1795#endif 1796 movaps -12(%ecx), %xmm1 1797 1798L(Shl12LoopStart): 1799 movaps 4(%ecx), %xmm2 1800 movaps 20(%ecx), %xmm3 1801 movaps %xmm3, %xmm6 1802 movaps 36(%ecx), %xmm4 1803 movaps %xmm4, %xmm7 1804 movaps 52(%ecx), %xmm5 1805 pminub %xmm2, %xmm6 1806 pminub %xmm5, %xmm7 1807 pminub %xmm6, %xmm7 1808 pcmpeqb %xmm0, %xmm7 1809 pmovmskb %xmm7, %eax 1810 movaps %xmm5, %xmm7 1811 palignr $12, %xmm4, %xmm5 1812 palignr $12, %xmm3, %xmm4 1813 test %eax, %eax 1814 jnz L(Shl12Start) 1815#ifdef USE_AS_STRNCPY 1816 sub $64, %ebx 1817 jbe L(StrncpyLeave12) 1818#endif 1819 palignr $12, %xmm2, %xmm3 1820 lea 64(%ecx), %ecx 1821 palignr $12, %xmm1, %xmm2 1822 movaps %xmm7, %xmm1 1823 movaps %xmm5, 48(%edx) 1824 movaps %xmm4, 32(%edx) 1825 movaps %xmm3, 16(%edx) 1826 movaps %xmm2, (%edx) 1827 lea 64(%edx), %edx 1828 jmp L(Shl12LoopStart) 1829 1830L(Shl12LoopExit): 1831 movl (%ecx), %esi 1832 movl %esi, (%edx) 1833 mov $4, %esi 1834 jmp L(CopyFrom1To16Bytes) 1835 1836 .p2align 4 1837L(Shl13): 1838 movaps -13(%ecx), %xmm1 1839 movaps 3(%ecx), %xmm2 1840L(Shl13Start): 1841 pcmpeqb %xmm2, %xmm0 1842 pmovmskb %xmm0, %eax 1843 movaps %xmm2, %xmm3 1844#ifdef USE_AS_STRNCPY 1845 sub $16, %ebx 1846 jbe L(StrncpyExit13Case2OrCase3) 1847#endif 1848 test %eax, %eax 1849 jnz L(Shl13LoopExit) 1850 1851 palignr $13, %xmm1, %xmm2 1852 movaps %xmm3, %xmm1 1853 movaps %xmm2, (%edx) 1854 movaps 19(%ecx), %xmm2 1855 1856 pcmpeqb %xmm2, %xmm0 1857 lea 16(%edx), %edx 1858 pmovmskb %xmm0, %eax 1859 lea 16(%ecx), %ecx 1860 movaps %xmm2, %xmm3 1861#ifdef USE_AS_STRNCPY 1862 sub $16, %ebx 1863 jbe L(StrncpyExit13Case2OrCase3) 1864#endif 1865 test %eax, %eax 1866 jnz L(Shl13LoopExit) 1867 1868 palignr $13, %xmm1, %xmm2 1869 movaps %xmm2, (%edx) 1870 movaps 19(%ecx), %xmm2 1871 movaps %xmm3, %xmm1 1872 1873 pcmpeqb %xmm2, %xmm0 1874 lea 16(%edx), %edx 1875 pmovmskb %xmm0, %eax 1876 lea 16(%ecx), %ecx 1877 movaps %xmm2, %xmm3 1878#ifdef USE_AS_STRNCPY 1879 sub $16, %ebx 1880 jbe L(StrncpyExit13Case2OrCase3) 1881#endif 1882 test %eax, %eax 1883 jnz L(Shl13LoopExit) 1884 1885 palignr $13, %xmm1, %xmm2 1886 movaps %xmm2, (%edx) 1887 movaps 19(%ecx), %xmm2 1888 1889 pcmpeqb %xmm2, %xmm0 1890 lea 16(%edx), %edx 1891 pmovmskb %xmm0, %eax 1892 lea 16(%ecx), %ecx 1893#ifdef USE_AS_STRNCPY 1894 sub $16, %ebx 1895 jbe L(StrncpyExit13Case2OrCase3) 1896#endif 1897 test %eax, %eax 1898 jnz L(Shl13LoopExit) 1899 1900 palignr $13, %xmm3, %xmm2 1901 movaps %xmm2, (%edx) 1902 lea 19(%ecx), %ecx 1903 lea 16(%edx), %edx 1904 1905 mov %ecx, %eax 1906 and $-0x40, %ecx 1907 sub %ecx, %eax 1908 lea -3(%ecx), %ecx 1909 sub %eax, %edx 1910#ifdef USE_AS_STRNCPY 1911 add %eax, %ebx 1912#endif 1913 movaps -13(%ecx), %xmm1 1914 1915L(Shl13LoopStart): 1916 movaps 3(%ecx), %xmm2 1917 movaps 19(%ecx), %xmm3 1918 movaps %xmm3, %xmm6 1919 movaps 35(%ecx), %xmm4 1920 movaps %xmm4, %xmm7 1921 movaps 51(%ecx), %xmm5 1922 pminub %xmm2, %xmm6 1923 pminub %xmm5, %xmm7 1924 pminub %xmm6, %xmm7 1925 pcmpeqb %xmm0, %xmm7 1926 pmovmskb %xmm7, %eax 1927 movaps %xmm5, %xmm7 1928 palignr $13, %xmm4, %xmm5 1929 palignr $13, %xmm3, %xmm4 1930 test %eax, %eax 1931 jnz L(Shl13Start) 1932#ifdef USE_AS_STRNCPY 1933 sub $64, %ebx 1934 jbe L(StrncpyLeave13) 1935#endif 1936 palignr $13, %xmm2, %xmm3 1937 lea 64(%ecx), %ecx 1938 palignr $13, %xmm1, %xmm2 1939 movaps %xmm7, %xmm1 1940 movaps %xmm5, 48(%edx) 1941 movaps %xmm4, 32(%edx) 1942 movaps %xmm3, 16(%edx) 1943 movaps %xmm2, (%edx) 1944 lea 64(%edx), %edx 1945 jmp L(Shl13LoopStart) 1946 1947L(Shl13LoopExit): 1948 movl -1(%ecx), %esi 1949 movl %esi, -1(%edx) 1950 mov $3, %esi 1951 jmp L(CopyFrom1To16Bytes) 1952 1953 .p2align 4 1954L(Shl14): 1955 movaps -14(%ecx), %xmm1 1956 movaps 2(%ecx), %xmm2 1957L(Shl14Start): 1958 pcmpeqb %xmm2, %xmm0 1959 pmovmskb %xmm0, %eax 1960 movaps %xmm2, %xmm3 1961#ifdef USE_AS_STRNCPY 1962 sub $16, %ebx 1963 jbe L(StrncpyExit14Case2OrCase3) 1964#endif 1965 test %eax, %eax 1966 jnz L(Shl14LoopExit) 1967 1968 palignr $14, %xmm1, %xmm2 1969 movaps %xmm3, %xmm1 1970 movaps %xmm2, (%edx) 1971 movaps 18(%ecx), %xmm2 1972 1973 pcmpeqb %xmm2, %xmm0 1974 lea 16(%edx), %edx 1975 pmovmskb %xmm0, %eax 1976 lea 16(%ecx), %ecx 1977 movaps %xmm2, %xmm3 1978#ifdef USE_AS_STRNCPY 1979 sub $16, %ebx 1980 jbe L(StrncpyExit14Case2OrCase3) 1981#endif 1982 test %eax, %eax 1983 jnz L(Shl14LoopExit) 1984 1985 palignr $14, %xmm1, %xmm2 1986 movaps %xmm2, (%edx) 1987 movaps 18(%ecx), %xmm2 1988 movaps %xmm3, %xmm1 1989 1990 pcmpeqb %xmm2, %xmm0 1991 lea 16(%edx), %edx 1992 pmovmskb %xmm0, %eax 1993 lea 16(%ecx), %ecx 1994 movaps %xmm2, %xmm3 1995#ifdef USE_AS_STRNCPY 1996 sub $16, %ebx 1997 jbe L(StrncpyExit14Case2OrCase3) 1998#endif 1999 test %eax, %eax 2000 jnz L(Shl14LoopExit) 2001 2002 palignr $14, %xmm1, %xmm2 2003 movaps %xmm2, (%edx) 2004 movaps 18(%ecx), %xmm2 2005 2006 pcmpeqb %xmm2, %xmm0 2007 lea 16(%edx), %edx 2008 pmovmskb %xmm0, %eax 2009 lea 16(%ecx), %ecx 2010#ifdef USE_AS_STRNCPY 2011 sub $16, %ebx 2012 jbe L(StrncpyExit14Case2OrCase3) 2013#endif 2014 test %eax, %eax 2015 jnz L(Shl14LoopExit) 2016 2017 palignr $14, %xmm3, %xmm2 2018 movaps %xmm2, (%edx) 2019 lea 18(%ecx), %ecx 2020 lea 16(%edx), %edx 2021 2022 mov %ecx, %eax 2023 and $-0x40, %ecx 2024 sub %ecx, %eax 2025 lea -2(%ecx), %ecx 2026 sub %eax, %edx 2027#ifdef USE_AS_STRNCPY 2028 add %eax, %ebx 2029#endif 2030 movaps -14(%ecx), %xmm1 2031 2032L(Shl14LoopStart): 2033 movaps 2(%ecx), %xmm2 2034 movaps 18(%ecx), %xmm3 2035 movaps %xmm3, %xmm6 2036 movaps 34(%ecx), %xmm4 2037 movaps %xmm4, %xmm7 2038 movaps 50(%ecx), %xmm5 2039 pminub %xmm2, %xmm6 2040 pminub %xmm5, %xmm7 2041 pminub %xmm6, %xmm7 2042 pcmpeqb %xmm0, %xmm7 2043 pmovmskb %xmm7, %eax 2044 movaps %xmm5, %xmm7 2045 palignr $14, %xmm4, %xmm5 2046 palignr $14, %xmm3, %xmm4 2047 test %eax, %eax 2048 jnz L(Shl14Start) 2049#ifdef USE_AS_STRNCPY 2050 sub $64, %ebx 2051 jbe L(StrncpyLeave14) 2052#endif 2053 palignr $14, %xmm2, %xmm3 2054 lea 64(%ecx), %ecx 2055 palignr $14, %xmm1, %xmm2 2056 movaps %xmm7, %xmm1 2057 movaps %xmm5, 48(%edx) 2058 movaps %xmm4, 32(%edx) 2059 movaps %xmm3, 16(%edx) 2060 movaps %xmm2, (%edx) 2061 lea 64(%edx), %edx 2062 jmp L(Shl14LoopStart) 2063 2064L(Shl14LoopExit): 2065 movl -2(%ecx), %esi 2066 movl %esi, -2(%edx) 2067 mov $2, %esi 2068 jmp L(CopyFrom1To16Bytes) 2069 2070 .p2align 4 2071L(Shl15): 2072 movaps -15(%ecx), %xmm1 2073 movaps 1(%ecx), %xmm2 2074L(Shl15Start): 2075 pcmpeqb %xmm2, %xmm0 2076 pmovmskb %xmm0, %eax 2077 movaps %xmm2, %xmm3 2078#ifdef USE_AS_STRNCPY 2079 sub $16, %ebx 2080 jbe L(StrncpyExit15Case2OrCase3) 2081#endif 2082 test %eax, %eax 2083 jnz L(Shl15LoopExit) 2084 2085 palignr $15, %xmm1, %xmm2 2086 movaps %xmm3, %xmm1 2087 movaps %xmm2, (%edx) 2088 movaps 17(%ecx), %xmm2 2089 2090 pcmpeqb %xmm2, %xmm0 2091 lea 16(%edx), %edx 2092 pmovmskb %xmm0, %eax 2093 lea 16(%ecx), %ecx 2094 movaps %xmm2, %xmm3 2095#ifdef USE_AS_STRNCPY 2096 sub $16, %ebx 2097 jbe L(StrncpyExit15Case2OrCase3) 2098#endif 2099 test %eax, %eax 2100 jnz L(Shl15LoopExit) 2101 2102 palignr $15, %xmm1, %xmm2 2103 movaps %xmm2, (%edx) 2104 movaps 17(%ecx), %xmm2 2105 movaps %xmm3, %xmm1 2106 2107 pcmpeqb %xmm2, %xmm0 2108 lea 16(%edx), %edx 2109 pmovmskb %xmm0, %eax 2110 lea 16(%ecx), %ecx 2111 movaps %xmm2, %xmm3 2112#ifdef USE_AS_STRNCPY 2113 sub $16, %ebx 2114 jbe L(StrncpyExit15Case2OrCase3) 2115#endif 2116 test %eax, %eax 2117 jnz L(Shl15LoopExit) 2118 2119 palignr $15, %xmm1, %xmm2 2120 movaps %xmm2, (%edx) 2121 movaps 17(%ecx), %xmm2 2122 2123 pcmpeqb %xmm2, %xmm0 2124 lea 16(%edx), %edx 2125 pmovmskb %xmm0, %eax 2126 lea 16(%ecx), %ecx 2127#ifdef USE_AS_STRNCPY 2128 sub $16, %ebx 2129 jbe L(StrncpyExit15Case2OrCase3) 2130#endif 2131 test %eax, %eax 2132 jnz L(Shl15LoopExit) 2133 2134 palignr $15, %xmm3, %xmm2 2135 movaps %xmm2, (%edx) 2136 lea 17(%ecx), %ecx 2137 lea 16(%edx), %edx 2138 2139 mov %ecx, %eax 2140 and $-0x40, %ecx 2141 sub %ecx, %eax 2142 lea -1(%ecx), %ecx 2143 sub %eax, %edx 2144#ifdef USE_AS_STRNCPY 2145 add %eax, %ebx 2146#endif 2147 movaps -15(%ecx), %xmm1 2148 2149L(Shl15LoopStart): 2150 movaps 1(%ecx), %xmm2 2151 movaps 17(%ecx), %xmm3 2152 movaps %xmm3, %xmm6 2153 movaps 33(%ecx), %xmm4 2154 movaps %xmm4, %xmm7 2155 movaps 49(%ecx), %xmm5 2156 pminub %xmm2, %xmm6 2157 pminub %xmm5, %xmm7 2158 pminub %xmm6, %xmm7 2159 pcmpeqb %xmm0, %xmm7 2160 pmovmskb %xmm7, %eax 2161 movaps %xmm5, %xmm7 2162 palignr $15, %xmm4, %xmm5 2163 palignr $15, %xmm3, %xmm4 2164 test %eax, %eax 2165 jnz L(Shl15Start) 2166#ifdef USE_AS_STRNCPY 2167 sub $64, %ebx 2168 jbe L(StrncpyLeave15) 2169#endif 2170 palignr $15, %xmm2, %xmm3 2171 lea 64(%ecx), %ecx 2172 palignr $15, %xmm1, %xmm2 2173 movaps %xmm7, %xmm1 2174 movaps %xmm5, 48(%edx) 2175 movaps %xmm4, 32(%edx) 2176 movaps %xmm3, 16(%edx) 2177 movaps %xmm2, (%edx) 2178 lea 64(%edx), %edx 2179 jmp L(Shl15LoopStart) 2180 2181L(Shl15LoopExit): 2182 movl -3(%ecx), %esi 2183 movl %esi, -3(%edx) 2184 mov $1, %esi 2185#if defined USE_AS_STRCAT || defined USE_AS_STRLCPY 2186 jmp L(CopyFrom1To16Bytes) 2187#endif 2188 2189 2190#if !defined USE_AS_STRCAT && !defined USE_AS_STRLCPY 2191 2192 .p2align 4 2193L(CopyFrom1To16Bytes): 2194# ifdef USE_AS_STRNCPY 2195 add $16, %ebx 2196# endif 2197 add %esi, %edx 2198 add %esi, %ecx 2199 2200 POP (%esi) 2201 test %al, %al 2202 jz L(ExitHigh8) 2203 2204L(CopyFrom1To16BytesLess8): 2205 mov %al, %ah 2206 and $15, %ah 2207 jz L(ExitHigh4) 2208 2209 test $0x01, %al 2210 jnz L(Exit1) 2211 test $0x02, %al 2212 jnz L(Exit2) 2213 test $0x04, %al 2214 jnz L(Exit3) 2215 2216 .p2align 4 2217L(Exit4): 2218 movl (%ecx), %eax 2219 movl %eax, (%edx) 2220 SAVE_RESULT (3) 2221# ifdef USE_AS_STRNCPY 2222 sub $4, %ebx 2223 lea 4(%edx), %ecx 2224 jnz L(StrncpyFillTailWithZero1) 2225# ifdef USE_AS_STPCPY 2226 cmpb $1, (%eax) 2227 sbb $-1, %eax 2228# endif 2229# endif 2230 RETURN1 2231 2232 .p2align 4 2233L(ExitHigh4): 2234 test $0x10, %al 2235 jnz L(Exit5) 2236 test $0x20, %al 2237 jnz L(Exit6) 2238 test $0x40, %al 2239 jnz L(Exit7) 2240 2241 .p2align 4 2242L(Exit8): 2243 movlpd (%ecx), %xmm0 2244 movlpd %xmm0, (%edx) 2245 SAVE_RESULT (7) 2246# ifdef USE_AS_STRNCPY 2247 sub $8, %ebx 2248 lea 8(%edx), %ecx 2249 jnz L(StrncpyFillTailWithZero1) 2250# ifdef USE_AS_STPCPY 2251 cmpb $1, (%eax) 2252 sbb $-1, %eax 2253# endif 2254# endif 2255 RETURN1 2256 2257 .p2align 4 2258L(ExitHigh8): 2259 mov %ah, %al 2260 and $15, %al 2261 jz L(ExitHigh12) 2262 2263 test $0x01, %ah 2264 jnz L(Exit9) 2265 test $0x02, %ah 2266 jnz L(Exit10) 2267 test $0x04, %ah 2268 jnz L(Exit11) 2269 2270 .p2align 4 2271L(Exit12): 2272 movlpd (%ecx), %xmm0 2273 movl 8(%ecx), %eax 2274 movlpd %xmm0, (%edx) 2275 movl %eax, 8(%edx) 2276 SAVE_RESULT (11) 2277# ifdef USE_AS_STRNCPY 2278 sub $12, %ebx 2279 lea 12(%edx), %ecx 2280 jnz L(StrncpyFillTailWithZero1) 2281# ifdef USE_AS_STPCPY 2282 cmpb $1, (%eax) 2283 sbb $-1, %eax 2284# endif 2285# endif 2286 RETURN1 2287 2288 .p2align 4 2289L(ExitHigh12): 2290 test $0x10, %ah 2291 jnz L(Exit13) 2292 test $0x20, %ah 2293 jnz L(Exit14) 2294 test $0x40, %ah 2295 jnz L(Exit15) 2296 2297 .p2align 4 2298L(Exit16): 2299 movdqu (%ecx), %xmm0 2300 movdqu %xmm0, (%edx) 2301 SAVE_RESULT (15) 2302# ifdef USE_AS_STRNCPY 2303 sub $16, %ebx 2304 lea 16(%edx), %ecx 2305 jnz L(StrncpyFillTailWithZero1) 2306# ifdef USE_AS_STPCPY 2307 cmpb $1, (%eax) 2308 sbb $-1, %eax 2309# endif 2310# endif 2311 RETURN1 2312 2313# ifdef USE_AS_STRNCPY 2314 2315 CFI_PUSH(%esi) 2316 2317 .p2align 4 2318L(CopyFrom1To16BytesCase2): 2319 add $16, %ebx 2320 add %esi, %ecx 2321 add %esi, %edx 2322 2323 POP (%esi) 2324 2325 test %al, %al 2326 jz L(ExitHighCase2) 2327 2328 cmp $8, %ebx 2329 ja L(CopyFrom1To16BytesLess8) 2330 2331 test $0x01, %al 2332 jnz L(Exit1) 2333 cmp $1, %ebx 2334 je L(Exit1) 2335 test $0x02, %al 2336 jnz L(Exit2) 2337 cmp $2, %ebx 2338 je L(Exit2) 2339 test $0x04, %al 2340 jnz L(Exit3) 2341 cmp $3, %ebx 2342 je L(Exit3) 2343 test $0x08, %al 2344 jnz L(Exit4) 2345 cmp $4, %ebx 2346 je L(Exit4) 2347 test $0x10, %al 2348 jnz L(Exit5) 2349 cmp $5, %ebx 2350 je L(Exit5) 2351 test $0x20, %al 2352 jnz L(Exit6) 2353 cmp $6, %ebx 2354 je L(Exit6) 2355 test $0x40, %al 2356 jnz L(Exit7) 2357 cmp $7, %ebx 2358 je L(Exit7) 2359 jmp L(Exit8) 2360 2361 .p2align 4 2362L(ExitHighCase2): 2363 cmp $8, %ebx 2364 jbe L(CopyFrom1To16BytesLess8Case3) 2365 2366 test $0x01, %ah 2367 jnz L(Exit9) 2368 cmp $9, %ebx 2369 je L(Exit9) 2370 test $0x02, %ah 2371 jnz L(Exit10) 2372 cmp $10, %ebx 2373 je L(Exit10) 2374 test $0x04, %ah 2375 jnz L(Exit11) 2376 cmp $11, %ebx 2377 je L(Exit11) 2378 test $0x8, %ah 2379 jnz L(Exit12) 2380 cmp $12, %ebx 2381 je L(Exit12) 2382 test $0x10, %ah 2383 jnz L(Exit13) 2384 cmp $13, %ebx 2385 je L(Exit13) 2386 test $0x20, %ah 2387 jnz L(Exit14) 2388 cmp $14, %ebx 2389 je L(Exit14) 2390 test $0x40, %ah 2391 jnz L(Exit15) 2392 cmp $15, %ebx 2393 je L(Exit15) 2394 jmp L(Exit16) 2395 2396 CFI_PUSH(%esi) 2397 2398 .p2align 4 2399L(CopyFrom1To16BytesCase2OrCase3): 2400 test %eax, %eax 2401 jnz L(CopyFrom1To16BytesCase2) 2402 2403 .p2align 4 2404L(CopyFrom1To16BytesCase3): 2405 add $16, %ebx 2406 add %esi, %edx 2407 add %esi, %ecx 2408 2409 POP (%esi) 2410 2411 cmp $8, %ebx 2412 ja L(ExitHigh8Case3) 2413 2414L(CopyFrom1To16BytesLess8Case3): 2415 cmp $4, %ebx 2416 ja L(ExitHigh4Case3) 2417 2418 cmp $1, %ebx 2419 je L(Exit1) 2420 cmp $2, %ebx 2421 je L(Exit2) 2422 cmp $3, %ebx 2423 je L(Exit3) 2424 movl (%ecx), %eax 2425 movl %eax, (%edx) 2426 SAVE_RESULT (4) 2427 RETURN1 2428 2429 .p2align 4 2430L(ExitHigh4Case3): 2431 cmp $5, %ebx 2432 je L(Exit5) 2433 cmp $6, %ebx 2434 je L(Exit6) 2435 cmp $7, %ebx 2436 je L(Exit7) 2437 movlpd (%ecx), %xmm0 2438 movlpd %xmm0, (%edx) 2439 SAVE_RESULT (8) 2440 RETURN1 2441 2442 .p2align 4 2443L(ExitHigh8Case3): 2444 cmp $12, %ebx 2445 ja L(ExitHigh12Case3) 2446 2447 cmp $9, %ebx 2448 je L(Exit9) 2449 cmp $10, %ebx 2450 je L(Exit10) 2451 cmp $11, %ebx 2452 je L(Exit11) 2453 movlpd (%ecx), %xmm0 2454 movl 8(%ecx), %eax 2455 movlpd %xmm0, (%edx) 2456 movl %eax, 8(%edx) 2457 SAVE_RESULT (12) 2458 RETURN1 2459 2460 .p2align 4 2461L(ExitHigh12Case3): 2462 cmp $13, %ebx 2463 je L(Exit13) 2464 cmp $14, %ebx 2465 je L(Exit14) 2466 cmp $15, %ebx 2467 je L(Exit15) 2468 movlpd (%ecx), %xmm0 2469 movlpd 8(%ecx), %xmm1 2470 movlpd %xmm0, (%edx) 2471 movlpd %xmm1, 8(%edx) 2472 SAVE_RESULT (16) 2473 RETURN1 2474 2475# endif 2476 2477 .p2align 4 2478L(Exit1): 2479 movb (%ecx), %al 2480 movb %al, (%edx) 2481 SAVE_RESULT (0) 2482# ifdef USE_AS_STRNCPY 2483 sub $1, %ebx 2484 lea 1(%edx), %ecx 2485 jnz L(StrncpyFillTailWithZero1) 2486# ifdef USE_AS_STPCPY 2487 cmpb $1, (%eax) 2488 sbb $-1, %eax 2489# endif 2490# endif 2491 RETURN1 2492 2493 .p2align 4 2494L(Exit2): 2495 movw (%ecx), %ax 2496 movw %ax, (%edx) 2497 SAVE_RESULT (1) 2498# ifdef USE_AS_STRNCPY 2499 sub $2, %ebx 2500 lea 2(%edx), %ecx 2501 jnz L(StrncpyFillTailWithZero1) 2502# ifdef USE_AS_STPCPY 2503 cmpb $1, (%eax) 2504 sbb $-1, %eax 2505# endif 2506# endif 2507 RETURN1 2508 2509 .p2align 4 2510L(Exit3): 2511 movw (%ecx), %ax 2512 movw %ax, (%edx) 2513 movb 2(%ecx), %al 2514 movb %al, 2(%edx) 2515 SAVE_RESULT (2) 2516# ifdef USE_AS_STRNCPY 2517 sub $3, %ebx 2518 lea 3(%edx), %ecx 2519 jnz L(StrncpyFillTailWithZero1) 2520# ifdef USE_AS_STPCPY 2521 cmpb $1, (%eax) 2522 sbb $-1, %eax 2523# endif 2524# endif 2525 RETURN1 2526 2527 .p2align 4 2528L(Exit5): 2529 movl (%ecx), %eax 2530 movl %eax, (%edx) 2531 movb 4(%ecx), %al 2532 movb %al, 4(%edx) 2533 SAVE_RESULT (4) 2534# ifdef USE_AS_STRNCPY 2535 sub $5, %ebx 2536 lea 5(%edx), %ecx 2537 jnz L(StrncpyFillTailWithZero1) 2538# ifdef USE_AS_STPCPY 2539 cmpb $1, (%eax) 2540 sbb $-1, %eax 2541# endif 2542# endif 2543 RETURN1 2544 2545 .p2align 4 2546L(Exit6): 2547 movl (%ecx), %eax 2548 movl %eax, (%edx) 2549 movw 4(%ecx), %ax 2550 movw %ax, 4(%edx) 2551 SAVE_RESULT (5) 2552# ifdef USE_AS_STRNCPY 2553 sub $6, %ebx 2554 lea 6(%edx), %ecx 2555 jnz L(StrncpyFillTailWithZero1) 2556# ifdef USE_AS_STPCPY 2557 cmpb $1, (%eax) 2558 sbb $-1, %eax 2559# endif 2560# endif 2561 RETURN1 2562 2563 .p2align 4 2564L(Exit7): 2565 movl (%ecx), %eax 2566 movl %eax, (%edx) 2567 movl 3(%ecx), %eax 2568 movl %eax, 3(%edx) 2569 SAVE_RESULT (6) 2570# ifdef USE_AS_STRNCPY 2571 sub $7, %ebx 2572 lea 7(%edx), %ecx 2573 jnz L(StrncpyFillTailWithZero1) 2574# ifdef USE_AS_STPCPY 2575 cmpb $1, (%eax) 2576 sbb $-1, %eax 2577# endif 2578# endif 2579 RETURN1 2580 2581 .p2align 4 2582L(Exit9): 2583 movlpd (%ecx), %xmm0 2584 movb 8(%ecx), %al 2585 movlpd %xmm0, (%edx) 2586 movb %al, 8(%edx) 2587 SAVE_RESULT (8) 2588# ifdef USE_AS_STRNCPY 2589 sub $9, %ebx 2590 lea 9(%edx), %ecx 2591 jnz L(StrncpyFillTailWithZero1) 2592# ifdef USE_AS_STPCPY 2593 cmpb $1, (%eax) 2594 sbb $-1, %eax 2595# endif 2596# endif 2597 RETURN1 2598 2599 .p2align 4 2600L(Exit10): 2601 movlpd (%ecx), %xmm0 2602 movw 8(%ecx), %ax 2603 movlpd %xmm0, (%edx) 2604 movw %ax, 8(%edx) 2605 SAVE_RESULT (9) 2606# ifdef USE_AS_STRNCPY 2607 sub $10, %ebx 2608 lea 10(%edx), %ecx 2609 jnz L(StrncpyFillTailWithZero1) 2610# ifdef USE_AS_STPCPY 2611 cmpb $1, (%eax) 2612 sbb $-1, %eax 2613# endif 2614# endif 2615 RETURN1 2616 2617 .p2align 4 2618L(Exit11): 2619 movlpd (%ecx), %xmm0 2620 movl 7(%ecx), %eax 2621 movlpd %xmm0, (%edx) 2622 movl %eax, 7(%edx) 2623 SAVE_RESULT (10) 2624# ifdef USE_AS_STRNCPY 2625 sub $11, %ebx 2626 lea 11(%edx), %ecx 2627 jnz L(StrncpyFillTailWithZero1) 2628# ifdef USE_AS_STPCPY 2629 cmpb $1, (%eax) 2630 sbb $-1, %eax 2631# endif 2632# endif 2633 RETURN1 2634 2635 .p2align 4 2636L(Exit13): 2637 movlpd (%ecx), %xmm0 2638 movlpd 5(%ecx), %xmm1 2639 movlpd %xmm0, (%edx) 2640 movlpd %xmm1, 5(%edx) 2641 SAVE_RESULT (12) 2642# ifdef USE_AS_STRNCPY 2643 sub $13, %ebx 2644 lea 13(%edx), %ecx 2645 jnz L(StrncpyFillTailWithZero1) 2646# ifdef USE_AS_STPCPY 2647 cmpb $1, (%eax) 2648 sbb $-1, %eax 2649# endif 2650# endif 2651 RETURN1 2652 2653 .p2align 4 2654L(Exit14): 2655 movlpd (%ecx), %xmm0 2656 movlpd 6(%ecx), %xmm1 2657 movlpd %xmm0, (%edx) 2658 movlpd %xmm1, 6(%edx) 2659 SAVE_RESULT (13) 2660# ifdef USE_AS_STRNCPY 2661 sub $14, %ebx 2662 lea 14(%edx), %ecx 2663 jnz L(StrncpyFillTailWithZero1) 2664# ifdef USE_AS_STPCPY 2665 cmpb $1, (%eax) 2666 sbb $-1, %eax 2667# endif 2668# endif 2669 RETURN1 2670 2671 .p2align 4 2672L(Exit15): 2673 movlpd (%ecx), %xmm0 2674 movlpd 7(%ecx), %xmm1 2675 movlpd %xmm0, (%edx) 2676 movlpd %xmm1, 7(%edx) 2677 SAVE_RESULT (14) 2678# ifdef USE_AS_STRNCPY 2679 sub $15, %ebx 2680 lea 15(%edx), %ecx 2681 jnz L(StrncpyFillTailWithZero1) 2682# ifdef USE_AS_STPCPY 2683 cmpb $1, (%eax) 2684 sbb $-1, %eax 2685# endif 2686# endif 2687 RETURN1 2688 2689CFI_POP (%edi) 2690 2691# ifdef USE_AS_STRNCPY 2692 .p2align 4 2693L(Fill0): 2694 RETURN 2695 2696 .p2align 4 2697L(Fill1): 2698 movb %dl, (%ecx) 2699 RETURN 2700 2701 .p2align 4 2702L(Fill2): 2703 movw %dx, (%ecx) 2704 RETURN 2705 2706 .p2align 4 2707L(Fill3): 2708 movw %dx, (%ecx) 2709 movb %dl, 2(%ecx) 2710 RETURN 2711 2712 .p2align 4 2713L(Fill4): 2714 movl %edx, (%ecx) 2715 RETURN 2716 2717 .p2align 4 2718L(Fill5): 2719 movl %edx, (%ecx) 2720 movb %dl, 4(%ecx) 2721 RETURN 2722 2723 .p2align 4 2724L(Fill6): 2725 movl %edx, (%ecx) 2726 movw %dx, 4(%ecx) 2727 RETURN 2728 2729 .p2align 4 2730L(Fill7): 2731 movl %edx, (%ecx) 2732 movl %edx, 3(%ecx) 2733 RETURN 2734 2735 .p2align 4 2736L(Fill8): 2737 movlpd %xmm0, (%ecx) 2738 RETURN 2739 2740 .p2align 4 2741L(Fill9): 2742 movlpd %xmm0, (%ecx) 2743 movb %dl, 8(%ecx) 2744 RETURN 2745 2746 .p2align 4 2747L(Fill10): 2748 movlpd %xmm0, (%ecx) 2749 movw %dx, 8(%ecx) 2750 RETURN 2751 2752 .p2align 4 2753L(Fill11): 2754 movlpd %xmm0, (%ecx) 2755 movl %edx, 7(%ecx) 2756 RETURN 2757 2758 .p2align 4 2759L(Fill12): 2760 movlpd %xmm0, (%ecx) 2761 movl %edx, 8(%ecx) 2762 RETURN 2763 2764 .p2align 4 2765L(Fill13): 2766 movlpd %xmm0, (%ecx) 2767 movlpd %xmm0, 5(%ecx) 2768 RETURN 2769 2770 .p2align 4 2771L(Fill14): 2772 movlpd %xmm0, (%ecx) 2773 movlpd %xmm0, 6(%ecx) 2774 RETURN 2775 2776 .p2align 4 2777L(Fill15): 2778 movlpd %xmm0, (%ecx) 2779 movlpd %xmm0, 7(%ecx) 2780 RETURN 2781 2782 .p2align 4 2783L(Fill16): 2784 movlpd %xmm0, (%ecx) 2785 movlpd %xmm0, 8(%ecx) 2786 RETURN 2787 2788 .p2align 4 2789L(StrncpyFillExit1): 2790 lea 16(%ebx), %ebx 2791L(FillFrom1To16Bytes): 2792 test %ebx, %ebx 2793 jz L(Fill0) 2794 cmp $16, %ebx 2795 je L(Fill16) 2796 cmp $8, %ebx 2797 je L(Fill8) 2798 jg L(FillMore8) 2799 cmp $4, %ebx 2800 je L(Fill4) 2801 jg L(FillMore4) 2802 cmp $2, %ebx 2803 jl L(Fill1) 2804 je L(Fill2) 2805 jg L(Fill3) 2806L(FillMore8): /* but less than 16 */ 2807 cmp $12, %ebx 2808 je L(Fill12) 2809 jl L(FillLess12) 2810 cmp $14, %ebx 2811 jl L(Fill13) 2812 je L(Fill14) 2813 jg L(Fill15) 2814L(FillMore4): /* but less than 8 */ 2815 cmp $6, %ebx 2816 jl L(Fill5) 2817 je L(Fill6) 2818 jg L(Fill7) 2819L(FillLess12): /* but more than 8 */ 2820 cmp $10, %ebx 2821 jl L(Fill9) 2822 je L(Fill10) 2823 jmp L(Fill11) 2824 2825 CFI_PUSH(%edi) 2826 2827 .p2align 4 2828L(StrncpyFillTailWithZero1): 2829 POP (%edi) 2830L(StrncpyFillTailWithZero): 2831 pxor %xmm0, %xmm0 2832 xor %edx, %edx 2833 sub $16, %ebx 2834 jbe L(StrncpyFillExit1) 2835 2836 movlpd %xmm0, (%ecx) 2837 movlpd %xmm0, 8(%ecx) 2838 2839 lea 16(%ecx), %ecx 2840 2841 mov %ecx, %edx 2842 and $0xf, %edx 2843 sub %edx, %ecx 2844 add %edx, %ebx 2845 xor %edx, %edx 2846 sub $64, %ebx 2847 jb L(StrncpyFillLess64) 2848 2849L(StrncpyFillLoopMovdqa): 2850 movdqa %xmm0, (%ecx) 2851 movdqa %xmm0, 16(%ecx) 2852 movdqa %xmm0, 32(%ecx) 2853 movdqa %xmm0, 48(%ecx) 2854 lea 64(%ecx), %ecx 2855 sub $64, %ebx 2856 jae L(StrncpyFillLoopMovdqa) 2857 2858L(StrncpyFillLess64): 2859 add $32, %ebx 2860 jl L(StrncpyFillLess32) 2861 movdqa %xmm0, (%ecx) 2862 movdqa %xmm0, 16(%ecx) 2863 lea 32(%ecx), %ecx 2864 sub $16, %ebx 2865 jl L(StrncpyFillExit1) 2866 movdqa %xmm0, (%ecx) 2867 lea 16(%ecx), %ecx 2868 jmp L(FillFrom1To16Bytes) 2869 2870L(StrncpyFillLess32): 2871 add $16, %ebx 2872 jl L(StrncpyFillExit1) 2873 movdqa %xmm0, (%ecx) 2874 lea 16(%ecx), %ecx 2875 jmp L(FillFrom1To16Bytes) 2876# endif 2877 2878 .p2align 4 2879L(ExitTail1): 2880 movb (%ecx), %al 2881 movb %al, (%edx) 2882 SAVE_RESULT_TAIL (0) 2883# ifdef USE_AS_STRNCPY 2884 sub $1, %ebx 2885 lea 1(%edx), %ecx 2886 jnz L(StrncpyFillTailWithZero) 2887# ifdef USE_AS_STPCPY 2888 cmpb $1, (%eax) 2889 sbb $-1, %eax 2890# endif 2891# endif 2892 RETURN 2893 2894 .p2align 4 2895L(ExitTail2): 2896 movw (%ecx), %ax 2897 movw %ax, (%edx) 2898 SAVE_RESULT_TAIL (1) 2899# ifdef USE_AS_STRNCPY 2900 sub $2, %ebx 2901 lea 2(%edx), %ecx 2902 jnz L(StrncpyFillTailWithZero) 2903# ifdef USE_AS_STPCPY 2904 cmpb $1, (%eax) 2905 sbb $-1, %eax 2906# endif 2907# endif 2908 RETURN 2909 2910 .p2align 4 2911L(ExitTail3): 2912 movw (%ecx), %ax 2913 movw %ax, (%edx) 2914 movb 2(%ecx), %al 2915 movb %al, 2(%edx) 2916 SAVE_RESULT_TAIL (2) 2917# ifdef USE_AS_STRNCPY 2918 sub $3, %ebx 2919 lea 3(%edx), %ecx 2920 jnz L(StrncpyFillTailWithZero) 2921# ifdef USE_AS_STPCPY 2922 cmpb $1, (%eax) 2923 sbb $-1, %eax 2924# endif 2925# endif 2926 RETURN 2927 2928 .p2align 4 2929L(ExitTail4): 2930 movl (%ecx), %eax 2931 movl %eax, (%edx) 2932 SAVE_RESULT_TAIL (3) 2933# ifdef USE_AS_STRNCPY 2934 sub $4, %ebx 2935 lea 4(%edx), %ecx 2936 jnz L(StrncpyFillTailWithZero) 2937# ifdef USE_AS_STPCPY 2938 cmpb $1, (%eax) 2939 sbb $-1, %eax 2940# endif 2941# endif 2942 RETURN 2943 2944 .p2align 4 2945L(ExitTail5): 2946 movl (%ecx), %eax 2947 movl %eax, (%edx) 2948 movb 4(%ecx), %al 2949 movb %al, 4(%edx) 2950 SAVE_RESULT_TAIL (4) 2951# ifdef USE_AS_STRNCPY 2952 sub $5, %ebx 2953 lea 5(%edx), %ecx 2954 jnz L(StrncpyFillTailWithZero) 2955# ifdef USE_AS_STPCPY 2956 cmpb $1, (%eax) 2957 sbb $-1, %eax 2958# endif 2959# endif 2960 RETURN 2961 2962 .p2align 4 2963L(ExitTail6): 2964 movl (%ecx), %eax 2965 movl %eax, (%edx) 2966 movw 4(%ecx), %ax 2967 movw %ax, 4(%edx) 2968 SAVE_RESULT_TAIL (5) 2969# ifdef USE_AS_STRNCPY 2970 sub $6, %ebx 2971 lea 6(%edx), %ecx 2972 jnz L(StrncpyFillTailWithZero) 2973# ifdef USE_AS_STPCPY 2974 cmpb $1, (%eax) 2975 sbb $-1, %eax 2976# endif 2977# endif 2978 RETURN 2979 2980 .p2align 4 2981L(ExitTail7): 2982 movl (%ecx), %eax 2983 movl %eax, (%edx) 2984 movl 3(%ecx), %eax 2985 movl %eax, 3(%edx) 2986 SAVE_RESULT_TAIL (6) 2987# ifdef USE_AS_STRNCPY 2988 sub $7, %ebx 2989 lea 7(%edx), %ecx 2990 jnz L(StrncpyFillTailWithZero) 2991# ifdef USE_AS_STPCPY 2992 cmpb $1, (%eax) 2993 sbb $-1, %eax 2994# endif 2995# endif 2996 RETURN 2997 2998 .p2align 4 2999L(ExitTail8): 3000 movlpd (%ecx), %xmm0 3001 movlpd %xmm0, (%edx) 3002 SAVE_RESULT_TAIL (7) 3003# ifdef USE_AS_STRNCPY 3004 sub $8, %ebx 3005 lea 8(%edx), %ecx 3006 jnz L(StrncpyFillTailWithZero) 3007# endif 3008 RETURN 3009 3010 .p2align 4 3011L(ExitTail9): 3012 movlpd (%ecx), %xmm0 3013 movb 8(%ecx), %al 3014 movlpd %xmm0, (%edx) 3015 movb %al, 8(%edx) 3016 SAVE_RESULT_TAIL (8) 3017# ifdef USE_AS_STRNCPY 3018 sub $9, %ebx 3019 lea 9(%edx), %ecx 3020 jnz L(StrncpyFillTailWithZero) 3021# ifdef USE_AS_STPCPY 3022 cmpb $1, (%eax) 3023 sbb $-1, %eax 3024# endif 3025# endif 3026 RETURN 3027 3028 .p2align 4 3029L(ExitTail10): 3030 movlpd (%ecx), %xmm0 3031 movw 8(%ecx), %ax 3032 movlpd %xmm0, (%edx) 3033 movw %ax, 8(%edx) 3034 SAVE_RESULT_TAIL (9) 3035# ifdef USE_AS_STRNCPY 3036 sub $10, %ebx 3037 lea 10(%edx), %ecx 3038 jnz L(StrncpyFillTailWithZero) 3039# ifdef USE_AS_STPCPY 3040 cmpb $1, (%eax) 3041 sbb $-1, %eax 3042# endif 3043# endif 3044 RETURN 3045 3046 .p2align 4 3047L(ExitTail11): 3048 movlpd (%ecx), %xmm0 3049 movl 7(%ecx), %eax 3050 movlpd %xmm0, (%edx) 3051 movl %eax, 7(%edx) 3052 SAVE_RESULT_TAIL (10) 3053# ifdef USE_AS_STRNCPY 3054 sub $11, %ebx 3055 lea 11(%edx), %ecx 3056 jnz L(StrncpyFillTailWithZero) 3057# ifdef USE_AS_STPCPY 3058 cmpb $1, (%eax) 3059 sbb $-1, %eax 3060# endif 3061# endif 3062 RETURN 3063 3064 .p2align 4 3065L(ExitTail12): 3066 movlpd (%ecx), %xmm0 3067 movl 8(%ecx), %eax 3068 movlpd %xmm0, (%edx) 3069 movl %eax, 8(%edx) 3070 SAVE_RESULT_TAIL (11) 3071# ifdef USE_AS_STRNCPY 3072 sub $12, %ebx 3073 lea 12(%edx), %ecx 3074 jnz L(StrncpyFillTailWithZero) 3075# ifdef USE_AS_STPCPY 3076 cmpb $1, (%eax) 3077 sbb $-1, %eax 3078# endif 3079# endif 3080 RETURN 3081 3082 .p2align 4 3083L(ExitTail13): 3084 movlpd (%ecx), %xmm0 3085 movlpd 5(%ecx), %xmm1 3086 movlpd %xmm0, (%edx) 3087 movlpd %xmm1, 5(%edx) 3088 SAVE_RESULT_TAIL (12) 3089# ifdef USE_AS_STRNCPY 3090 sub $13, %ebx 3091 lea 13(%edx), %ecx 3092 jnz L(StrncpyFillTailWithZero) 3093# ifdef USE_AS_STPCPY 3094 cmpb $1, (%eax) 3095 sbb $-1, %eax 3096# endif 3097# endif 3098 RETURN 3099 3100 .p2align 4 3101L(ExitTail14): 3102 movlpd (%ecx), %xmm0 3103 movlpd 6(%ecx), %xmm1 3104 movlpd %xmm0, (%edx) 3105 movlpd %xmm1, 6(%edx) 3106 SAVE_RESULT_TAIL (13) 3107# ifdef USE_AS_STRNCPY 3108 sub $14, %ebx 3109 lea 14(%edx), %ecx 3110 jnz L(StrncpyFillTailWithZero) 3111# ifdef USE_AS_STPCPY 3112 cmpb $1, (%eax) 3113 sbb $-1, %eax 3114# endif 3115# endif 3116 RETURN 3117 3118 .p2align 4 3119L(ExitTail15): 3120 movlpd (%ecx), %xmm0 3121 movlpd 7(%ecx), %xmm1 3122 movlpd %xmm0, (%edx) 3123 movlpd %xmm1, 7(%edx) 3124 SAVE_RESULT_TAIL (14) 3125# ifdef USE_AS_STRNCPY 3126 sub $15, %ebx 3127 lea 15(%edx), %ecx 3128 jnz L(StrncpyFillTailWithZero) 3129# endif 3130 RETURN 3131 3132 .p2align 4 3133L(ExitTail16): 3134 movdqu (%ecx), %xmm0 3135 movdqu %xmm0, (%edx) 3136 SAVE_RESULT_TAIL (15) 3137# ifdef USE_AS_STRNCPY 3138 sub $16, %ebx 3139 lea 16(%edx), %ecx 3140 jnz L(StrncpyFillTailWithZero) 3141# ifdef USE_AS_STPCPY 3142 cmpb $1, (%eax) 3143 sbb $-1, %eax 3144# endif 3145# endif 3146 RETURN 3147#endif 3148 3149#ifdef USE_AS_STRNCPY 3150# if !defined(USE_AS_STRCAT) && !defined(USE_AS_STRLCPY) 3151 CFI_PUSH (%esi) 3152 CFI_PUSH (%edi) 3153# endif 3154 .p2align 4 3155L(StrncpyLeaveCase2OrCase3): 3156 test %eax, %eax 3157 jnz L(Aligned64LeaveCase2) 3158 3159L(Aligned64LeaveCase3): 3160 add $48, %ebx 3161 jle L(CopyFrom1To16BytesCase3) 3162 movaps %xmm4, -64(%edx) 3163 lea 16(%esi), %esi 3164 sub $16, %ebx 3165 jbe L(CopyFrom1To16BytesCase3) 3166 movaps %xmm5, -48(%edx) 3167 lea 16(%esi), %esi 3168 sub $16, %ebx 3169 jbe L(CopyFrom1To16BytesCase3) 3170 movaps %xmm6, -32(%edx) 3171 lea 16(%esi), %esi 3172 lea -16(%ebx), %ebx 3173 jmp L(CopyFrom1To16BytesCase3) 3174 3175L(Aligned64LeaveCase2): 3176 pcmpeqb %xmm4, %xmm0 3177 pmovmskb %xmm0, %eax 3178 add $48, %ebx 3179 jle L(CopyFrom1To16BytesCase2OrCase3) 3180 test %eax, %eax 3181 jnz L(CopyFrom1To16Bytes) 3182 3183 pcmpeqb %xmm5, %xmm0 3184 pmovmskb %xmm0, %eax 3185 movaps %xmm4, -64(%edx) 3186 lea 16(%esi), %esi 3187 sub $16, %ebx 3188 jbe L(CopyFrom1To16BytesCase2OrCase3) 3189 test %eax, %eax 3190 jnz L(CopyFrom1To16Bytes) 3191 3192 pcmpeqb %xmm6, %xmm0 3193 pmovmskb %xmm0, %eax 3194 movaps %xmm5, -48(%edx) 3195 lea 16(%esi), %esi 3196 sub $16, %ebx 3197 jbe L(CopyFrom1To16BytesCase2OrCase3) 3198 test %eax, %eax 3199 jnz L(CopyFrom1To16Bytes) 3200 3201 pcmpeqb %xmm7, %xmm0 3202 pmovmskb %xmm0, %eax 3203 movaps %xmm6, -32(%edx) 3204 lea 16(%esi), %esi 3205 lea -16(%ebx), %ebx 3206 jmp L(CopyFrom1To16BytesCase2) 3207 3208/*--------------------------------------------------*/ 3209 .p2align 4 3210L(StrncpyExit1Case2OrCase3): 3211 movlpd (%ecx), %xmm0 3212 movlpd 7(%ecx), %xmm1 3213 movlpd %xmm0, (%edx) 3214 movlpd %xmm1, 7(%edx) 3215 mov $15, %esi 3216 test %eax, %eax 3217 jnz L(CopyFrom1To16BytesCase2) 3218 jmp L(CopyFrom1To16BytesCase3) 3219 3220 .p2align 4 3221L(StrncpyExit2Case2OrCase3): 3222 movlpd (%ecx), %xmm0 3223 movlpd 6(%ecx), %xmm1 3224 movlpd %xmm0, (%edx) 3225 movlpd %xmm1, 6(%edx) 3226 mov $14, %esi 3227 test %eax, %eax 3228 jnz L(CopyFrom1To16BytesCase2) 3229 jmp L(CopyFrom1To16BytesCase3) 3230 3231 .p2align 4 3232L(StrncpyExit3Case2OrCase3): 3233 movlpd (%ecx), %xmm0 3234 movlpd 5(%ecx), %xmm1 3235 movlpd %xmm0, (%edx) 3236 movlpd %xmm1, 5(%edx) 3237 mov $13, %esi 3238 test %eax, %eax 3239 jnz L(CopyFrom1To16BytesCase2) 3240 jmp L(CopyFrom1To16BytesCase3) 3241 3242 .p2align 4 3243L(StrncpyExit4Case2OrCase3): 3244 movlpd (%ecx), %xmm0 3245 movl 8(%ecx), %esi 3246 movlpd %xmm0, (%edx) 3247 movl %esi, 8(%edx) 3248 mov $12, %esi 3249 test %eax, %eax 3250 jnz L(CopyFrom1To16BytesCase2) 3251 jmp L(CopyFrom1To16BytesCase3) 3252 3253 .p2align 4 3254L(StrncpyExit5Case2OrCase3): 3255 movlpd (%ecx), %xmm0 3256 movl 7(%ecx), %esi 3257 movlpd %xmm0, (%edx) 3258 movl %esi, 7(%edx) 3259 mov $11, %esi 3260 test %eax, %eax 3261 jnz L(CopyFrom1To16BytesCase2) 3262 jmp L(CopyFrom1To16BytesCase3) 3263 3264 .p2align 4 3265L(StrncpyExit6Case2OrCase3): 3266 movlpd (%ecx), %xmm0 3267 movl 6(%ecx), %esi 3268 movlpd %xmm0, (%edx) 3269 movl %esi, 6(%edx) 3270 mov $10, %esi 3271 test %eax, %eax 3272 jnz L(CopyFrom1To16BytesCase2) 3273 jmp L(CopyFrom1To16BytesCase3) 3274 3275 .p2align 4 3276L(StrncpyExit7Case2OrCase3): 3277 movlpd (%ecx), %xmm0 3278 movl 5(%ecx), %esi 3279 movlpd %xmm0, (%edx) 3280 movl %esi, 5(%edx) 3281 mov $9, %esi 3282 test %eax, %eax 3283 jnz L(CopyFrom1To16BytesCase2) 3284 jmp L(CopyFrom1To16BytesCase3) 3285 3286 .p2align 4 3287L(StrncpyExit8Case2OrCase3): 3288 movlpd (%ecx), %xmm0 3289 movlpd %xmm0, (%edx) 3290 mov $8, %esi 3291 test %eax, %eax 3292 jnz L(CopyFrom1To16BytesCase2) 3293 jmp L(CopyFrom1To16BytesCase3) 3294 3295 .p2align 4 3296L(StrncpyExit9Case2OrCase3): 3297 movlpd (%ecx), %xmm0 3298 movlpd %xmm0, (%edx) 3299 mov $7, %esi 3300 test %eax, %eax 3301 jnz L(CopyFrom1To16BytesCase2) 3302 jmp L(CopyFrom1To16BytesCase3) 3303 3304 .p2align 4 3305L(StrncpyExit10Case2OrCase3): 3306 movlpd -1(%ecx), %xmm0 3307 movlpd %xmm0, -1(%edx) 3308 mov $6, %esi 3309 test %eax, %eax 3310 jnz L(CopyFrom1To16BytesCase2) 3311 jmp L(CopyFrom1To16BytesCase3) 3312 3313 .p2align 4 3314L(StrncpyExit11Case2OrCase3): 3315 movlpd -2(%ecx), %xmm0 3316 movlpd %xmm0, -2(%edx) 3317 mov $5, %esi 3318 test %eax, %eax 3319 jnz L(CopyFrom1To16BytesCase2) 3320 jmp L(CopyFrom1To16BytesCase3) 3321 3322 .p2align 4 3323L(StrncpyExit12Case2OrCase3): 3324 movl (%ecx), %esi 3325 movl %esi, (%edx) 3326 mov $4, %esi 3327 test %eax, %eax 3328 jnz L(CopyFrom1To16BytesCase2) 3329 jmp L(CopyFrom1To16BytesCase3) 3330 3331 .p2align 4 3332L(StrncpyExit13Case2OrCase3): 3333 movl -1(%ecx), %esi 3334 movl %esi, -1(%edx) 3335 mov $3, %esi 3336 test %eax, %eax 3337 jnz L(CopyFrom1To16BytesCase2) 3338 jmp L(CopyFrom1To16BytesCase3) 3339 3340 .p2align 4 3341L(StrncpyExit14Case2OrCase3): 3342 movl -2(%ecx), %esi 3343 movl %esi, -2(%edx) 3344 mov $2, %esi 3345 test %eax, %eax 3346 jnz L(CopyFrom1To16BytesCase2) 3347 jmp L(CopyFrom1To16BytesCase3) 3348 3349 .p2align 4 3350L(StrncpyExit15Case2OrCase3): 3351 movl -3(%ecx), %esi 3352 movl %esi, -3(%edx) 3353 mov $1, %esi 3354 test %eax, %eax 3355 jnz L(CopyFrom1To16BytesCase2) 3356 jmp L(CopyFrom1To16BytesCase3) 3357 3358L(StrncpyLeave1): 3359 movaps %xmm2, %xmm3 3360 add $48, %ebx 3361 jle L(StrncpyExit1) 3362 palignr $1, %xmm1, %xmm2 3363 movaps %xmm2, (%edx) 3364 movaps 31(%ecx), %xmm2 3365 lea 16(%esi), %esi 3366 sub $16, %ebx 3367 jbe L(StrncpyExit1) 3368 palignr $1, %xmm3, %xmm2 3369 movaps %xmm2, 16(%edx) 3370 lea 16(%esi), %esi 3371 sub $16, %ebx 3372 jbe L(StrncpyExit1) 3373 movaps %xmm4, 32(%edx) 3374 lea 16(%esi), %esi 3375 sub $16, %ebx 3376 jbe L(StrncpyExit1) 3377 movaps %xmm5, 48(%edx) 3378 lea 16(%esi), %esi 3379 lea -16(%ebx), %ebx 3380L(StrncpyExit1): 3381 lea 15(%edx, %esi), %edx 3382 lea 15(%ecx, %esi), %ecx 3383 movdqu -16(%ecx), %xmm0 3384 xor %esi, %esi 3385 movdqu %xmm0, -16(%edx) 3386 jmp L(CopyFrom1To16BytesCase3) 3387 3388L(StrncpyLeave2): 3389 movaps %xmm2, %xmm3 3390 add $48, %ebx 3391 jle L(StrncpyExit2) 3392 palignr $2, %xmm1, %xmm2 3393 movaps %xmm2, (%edx) 3394 movaps 30(%ecx), %xmm2 3395 lea 16(%esi), %esi 3396 sub $16, %ebx 3397 jbe L(StrncpyExit2) 3398 palignr $2, %xmm3, %xmm2 3399 movaps %xmm2, 16(%edx) 3400 lea 16(%esi), %esi 3401 sub $16, %ebx 3402 jbe L(StrncpyExit2) 3403 movaps %xmm4, 32(%edx) 3404 lea 16(%esi), %esi 3405 sub $16, %ebx 3406 jbe L(StrncpyExit2) 3407 movaps %xmm5, 48(%edx) 3408 lea 16(%esi), %esi 3409 lea -16(%ebx), %ebx 3410L(StrncpyExit2): 3411 lea 14(%edx, %esi), %edx 3412 lea 14(%ecx, %esi), %ecx 3413 movdqu -16(%ecx), %xmm0 3414 xor %esi, %esi 3415 movdqu %xmm0, -16(%edx) 3416 jmp L(CopyFrom1To16BytesCase3) 3417 3418L(StrncpyLeave3): 3419 movaps %xmm2, %xmm3 3420 add $48, %ebx 3421 jle L(StrncpyExit3) 3422 palignr $3, %xmm1, %xmm2 3423 movaps %xmm2, (%edx) 3424 movaps 29(%ecx), %xmm2 3425 lea 16(%esi), %esi 3426 sub $16, %ebx 3427 jbe L(StrncpyExit3) 3428 palignr $3, %xmm3, %xmm2 3429 movaps %xmm2, 16(%edx) 3430 lea 16(%esi), %esi 3431 sub $16, %ebx 3432 jbe L(StrncpyExit3) 3433 movaps %xmm4, 32(%edx) 3434 lea 16(%esi), %esi 3435 sub $16, %ebx 3436 jbe L(StrncpyExit3) 3437 movaps %xmm5, 48(%edx) 3438 lea 16(%esi), %esi 3439 lea -16(%ebx), %ebx 3440L(StrncpyExit3): 3441 lea 13(%edx, %esi), %edx 3442 lea 13(%ecx, %esi), %ecx 3443 movdqu -16(%ecx), %xmm0 3444 xor %esi, %esi 3445 movdqu %xmm0, -16(%edx) 3446 jmp L(CopyFrom1To16BytesCase3) 3447 3448L(StrncpyLeave4): 3449 movaps %xmm2, %xmm3 3450 add $48, %ebx 3451 jle L(StrncpyExit4) 3452 palignr $4, %xmm1, %xmm2 3453 movaps %xmm2, (%edx) 3454 movaps 28(%ecx), %xmm2 3455 lea 16(%esi), %esi 3456 sub $16, %ebx 3457 jbe L(StrncpyExit4) 3458 palignr $4, %xmm3, %xmm2 3459 movaps %xmm2, 16(%edx) 3460 lea 16(%esi), %esi 3461 sub $16, %ebx 3462 jbe L(StrncpyExit4) 3463 movaps %xmm4, 32(%edx) 3464 lea 16(%esi), %esi 3465 sub $16, %ebx 3466 jbe L(StrncpyExit4) 3467 movaps %xmm5, 48(%edx) 3468 lea 16(%esi), %esi 3469 lea -16(%ebx), %ebx 3470L(StrncpyExit4): 3471 lea 12(%edx, %esi), %edx 3472 lea 12(%ecx, %esi), %ecx 3473 movlpd -12(%ecx), %xmm0 3474 movl -4(%ecx), %eax 3475 movlpd %xmm0, -12(%edx) 3476 movl %eax, -4(%edx) 3477 xor %esi, %esi 3478 jmp L(CopyFrom1To16BytesCase3) 3479 3480L(StrncpyLeave5): 3481 movaps %xmm2, %xmm3 3482 add $48, %ebx 3483 jle L(StrncpyExit5) 3484 palignr $5, %xmm1, %xmm2 3485 movaps %xmm2, (%edx) 3486 movaps 27(%ecx), %xmm2 3487 lea 16(%esi), %esi 3488 sub $16, %ebx 3489 jbe L(StrncpyExit5) 3490 palignr $5, %xmm3, %xmm2 3491 movaps %xmm2, 16(%edx) 3492 lea 16(%esi), %esi 3493 sub $16, %ebx 3494 jbe L(StrncpyExit5) 3495 movaps %xmm4, 32(%edx) 3496 lea 16(%esi), %esi 3497 sub $16, %ebx 3498 jbe L(StrncpyExit5) 3499 movaps %xmm5, 48(%edx) 3500 lea 16(%esi), %esi 3501 lea -16(%ebx), %ebx 3502L(StrncpyExit5): 3503 lea 11(%edx, %esi), %edx 3504 lea 11(%ecx, %esi), %ecx 3505 movlpd -11(%ecx), %xmm0 3506 movl -4(%ecx), %eax 3507 movlpd %xmm0, -11(%edx) 3508 movl %eax, -4(%edx) 3509 xor %esi, %esi 3510 jmp L(CopyFrom1To16BytesCase3) 3511 3512L(StrncpyLeave6): 3513 movaps %xmm2, %xmm3 3514 add $48, %ebx 3515 jle L(StrncpyExit6) 3516 palignr $6, %xmm1, %xmm2 3517 movaps %xmm2, (%edx) 3518 movaps 26(%ecx), %xmm2 3519 lea 16(%esi), %esi 3520 sub $16, %ebx 3521 jbe L(StrncpyExit6) 3522 palignr $6, %xmm3, %xmm2 3523 movaps %xmm2, 16(%edx) 3524 lea 16(%esi), %esi 3525 sub $16, %ebx 3526 jbe L(StrncpyExit6) 3527 movaps %xmm4, 32(%edx) 3528 lea 16(%esi), %esi 3529 sub $16, %ebx 3530 jbe L(StrncpyExit6) 3531 movaps %xmm5, 48(%edx) 3532 lea 16(%esi), %esi 3533 lea -16(%ebx), %ebx 3534L(StrncpyExit6): 3535 lea 10(%edx, %esi), %edx 3536 lea 10(%ecx, %esi), %ecx 3537 3538 movlpd -10(%ecx), %xmm0 3539 movw -2(%ecx), %ax 3540 movlpd %xmm0, -10(%edx) 3541 movw %ax, -2(%edx) 3542 xor %esi, %esi 3543 jmp L(CopyFrom1To16BytesCase3) 3544 3545L(StrncpyLeave7): 3546 movaps %xmm2, %xmm3 3547 add $48, %ebx 3548 jle L(StrncpyExit7) 3549 palignr $7, %xmm1, %xmm2 3550 movaps %xmm2, (%edx) 3551 movaps 25(%ecx), %xmm2 3552 lea 16(%esi), %esi 3553 sub $16, %ebx 3554 jbe L(StrncpyExit7) 3555 palignr $7, %xmm3, %xmm2 3556 movaps %xmm2, 16(%edx) 3557 lea 16(%esi), %esi 3558 sub $16, %ebx 3559 jbe L(StrncpyExit7) 3560 movaps %xmm4, 32(%edx) 3561 lea 16(%esi), %esi 3562 sub $16, %ebx 3563 jbe L(StrncpyExit7) 3564 movaps %xmm5, 48(%edx) 3565 lea 16(%esi), %esi 3566 lea -16(%ebx), %ebx 3567L(StrncpyExit7): 3568 lea 9(%edx, %esi), %edx 3569 lea 9(%ecx, %esi), %ecx 3570 3571 movlpd -9(%ecx), %xmm0 3572 movb -1(%ecx), %ah 3573 movlpd %xmm0, -9(%edx) 3574 movb %ah, -1(%edx) 3575 xor %esi, %esi 3576 jmp L(CopyFrom1To16BytesCase3) 3577 3578L(StrncpyLeave8): 3579 movaps %xmm2, %xmm3 3580 add $48, %ebx 3581 jle L(StrncpyExit8) 3582 palignr $8, %xmm1, %xmm2 3583 movaps %xmm2, (%edx) 3584 movaps 24(%ecx), %xmm2 3585 lea 16(%esi), %esi 3586 sub $16, %ebx 3587 jbe L(StrncpyExit8) 3588 palignr $8, %xmm3, %xmm2 3589 movaps %xmm2, 16(%edx) 3590 lea 16(%esi), %esi 3591 sub $16, %ebx 3592 jbe L(StrncpyExit8) 3593 movaps %xmm4, 32(%edx) 3594 lea 16(%esi), %esi 3595 sub $16, %ebx 3596 jbe L(StrncpyExit8) 3597 movaps %xmm5, 48(%edx) 3598 lea 16(%esi), %esi 3599 lea -16(%ebx), %ebx 3600L(StrncpyExit8): 3601 lea 8(%edx, %esi), %edx 3602 lea 8(%ecx, %esi), %ecx 3603 movlpd -8(%ecx), %xmm0 3604 movlpd %xmm0, -8(%edx) 3605 xor %esi, %esi 3606 jmp L(CopyFrom1To16BytesCase3) 3607 3608L(StrncpyLeave9): 3609 movaps %xmm2, %xmm3 3610 add $48, %ebx 3611 jle L(StrncpyExit9) 3612 palignr $9, %xmm1, %xmm2 3613 movaps %xmm2, (%edx) 3614 movaps 23(%ecx), %xmm2 3615 lea 16(%esi), %esi 3616 sub $16, %ebx 3617 jbe L(StrncpyExit9) 3618 palignr $9, %xmm3, %xmm2 3619 movaps %xmm2, 16(%edx) 3620 lea 16(%esi), %esi 3621 sub $16, %ebx 3622 jbe L(StrncpyExit9) 3623 movaps %xmm4, 32(%edx) 3624 lea 16(%esi), %esi 3625 sub $16, %ebx 3626 jbe L(StrncpyExit9) 3627 movaps %xmm5, 48(%edx) 3628 lea 16(%esi), %esi 3629 lea -16(%ebx), %ebx 3630L(StrncpyExit9): 3631 lea 7(%edx, %esi), %edx 3632 lea 7(%ecx, %esi), %ecx 3633 3634 movlpd -8(%ecx), %xmm0 3635 movlpd %xmm0, -8(%edx) 3636 xor %esi, %esi 3637 jmp L(CopyFrom1To16BytesCase3) 3638 3639L(StrncpyLeave10): 3640 movaps %xmm2, %xmm3 3641 add $48, %ebx 3642 jle L(StrncpyExit10) 3643 palignr $10, %xmm1, %xmm2 3644 movaps %xmm2, (%edx) 3645 movaps 22(%ecx), %xmm2 3646 lea 16(%esi), %esi 3647 sub $16, %ebx 3648 jbe L(StrncpyExit10) 3649 palignr $10, %xmm3, %xmm2 3650 movaps %xmm2, 16(%edx) 3651 lea 16(%esi), %esi 3652 sub $16, %ebx 3653 jbe L(StrncpyExit10) 3654 movaps %xmm4, 32(%edx) 3655 lea 16(%esi), %esi 3656 sub $16, %ebx 3657 jbe L(StrncpyExit10) 3658 movaps %xmm5, 48(%edx) 3659 lea 16(%esi), %esi 3660 lea -16(%ebx), %ebx 3661L(StrncpyExit10): 3662 lea 6(%edx, %esi), %edx 3663 lea 6(%ecx, %esi), %ecx 3664 3665 movlpd -8(%ecx), %xmm0 3666 movlpd %xmm0, -8(%edx) 3667 xor %esi, %esi 3668 jmp L(CopyFrom1To16BytesCase3) 3669 3670L(StrncpyLeave11): 3671 movaps %xmm2, %xmm3 3672 add $48, %ebx 3673 jle L(StrncpyExit11) 3674 palignr $11, %xmm1, %xmm2 3675 movaps %xmm2, (%edx) 3676 movaps 21(%ecx), %xmm2 3677 lea 16(%esi), %esi 3678 sub $16, %ebx 3679 jbe L(StrncpyExit11) 3680 palignr $11, %xmm3, %xmm2 3681 movaps %xmm2, 16(%edx) 3682 lea 16(%esi), %esi 3683 sub $16, %ebx 3684 jbe L(StrncpyExit11) 3685 movaps %xmm4, 32(%edx) 3686 lea 16(%esi), %esi 3687 sub $16, %ebx 3688 jbe L(StrncpyExit11) 3689 movaps %xmm5, 48(%edx) 3690 lea 16(%esi), %esi 3691 lea -16(%ebx), %ebx 3692L(StrncpyExit11): 3693 lea 5(%edx, %esi), %edx 3694 lea 5(%ecx, %esi), %ecx 3695 movl -5(%ecx), %esi 3696 movb -1(%ecx), %ah 3697 movl %esi, -5(%edx) 3698 movb %ah, -1(%edx) 3699 xor %esi, %esi 3700 jmp L(CopyFrom1To16BytesCase3) 3701 3702L(StrncpyLeave12): 3703 movaps %xmm2, %xmm3 3704 add $48, %ebx 3705 jle L(StrncpyExit12) 3706 palignr $12, %xmm1, %xmm2 3707 movaps %xmm2, (%edx) 3708 movaps 20(%ecx), %xmm2 3709 lea 16(%esi), %esi 3710 sub $16, %ebx 3711 jbe L(StrncpyExit12) 3712 palignr $12, %xmm3, %xmm2 3713 movaps %xmm2, 16(%edx) 3714 lea 16(%esi), %esi 3715 sub $16, %ebx 3716 jbe L(StrncpyExit12) 3717 movaps %xmm4, 32(%edx) 3718 lea 16(%esi), %esi 3719 sub $16, %ebx 3720 jbe L(StrncpyExit12) 3721 movaps %xmm5, 48(%edx) 3722 lea 16(%esi), %esi 3723 lea -16(%ebx), %ebx 3724L(StrncpyExit12): 3725 lea 4(%edx, %esi), %edx 3726 lea 4(%ecx, %esi), %ecx 3727 movl -4(%ecx), %eax 3728 movl %eax, -4(%edx) 3729 xor %esi, %esi 3730 jmp L(CopyFrom1To16BytesCase3) 3731 3732L(StrncpyLeave13): 3733 movaps %xmm2, %xmm3 3734 add $48, %ebx 3735 jle L(StrncpyExit13) 3736 palignr $13, %xmm1, %xmm2 3737 movaps %xmm2, (%edx) 3738 movaps 19(%ecx), %xmm2 3739 lea 16(%esi), %esi 3740 sub $16, %ebx 3741 jbe L(StrncpyExit13) 3742 palignr $13, %xmm3, %xmm2 3743 movaps %xmm2, 16(%edx) 3744 lea 16(%esi), %esi 3745 sub $16, %ebx 3746 jbe L(StrncpyExit13) 3747 movaps %xmm4, 32(%edx) 3748 lea 16(%esi), %esi 3749 sub $16, %ebx 3750 jbe L(StrncpyExit13) 3751 movaps %xmm5, 48(%edx) 3752 lea 16(%esi), %esi 3753 lea -16(%ebx), %ebx 3754L(StrncpyExit13): 3755 lea 3(%edx, %esi), %edx 3756 lea 3(%ecx, %esi), %ecx 3757 3758 movl -4(%ecx), %eax 3759 movl %eax, -4(%edx) 3760 xor %esi, %esi 3761 jmp L(CopyFrom1To16BytesCase3) 3762 3763L(StrncpyLeave14): 3764 movaps %xmm2, %xmm3 3765 add $48, %ebx 3766 jle L(StrncpyExit14) 3767 palignr $14, %xmm1, %xmm2 3768 movaps %xmm2, (%edx) 3769 movaps 18(%ecx), %xmm2 3770 lea 16(%esi), %esi 3771 sub $16, %ebx 3772 jbe L(StrncpyExit14) 3773 palignr $14, %xmm3, %xmm2 3774 movaps %xmm2, 16(%edx) 3775 lea 16(%esi), %esi 3776 sub $16, %ebx 3777 jbe L(StrncpyExit14) 3778 movaps %xmm4, 32(%edx) 3779 lea 16(%esi), %esi 3780 sub $16, %ebx 3781 jbe L(StrncpyExit14) 3782 movaps %xmm5, 48(%edx) 3783 lea 16(%esi), %esi 3784 lea -16(%ebx), %ebx 3785L(StrncpyExit14): 3786 lea 2(%edx, %esi), %edx 3787 lea 2(%ecx, %esi), %ecx 3788 movw -2(%ecx), %ax 3789 movw %ax, -2(%edx) 3790 xor %esi, %esi 3791 jmp L(CopyFrom1To16BytesCase3) 3792 3793L(StrncpyLeave15): 3794 movaps %xmm2, %xmm3 3795 add $48, %ebx 3796 jle L(StrncpyExit15) 3797 palignr $15, %xmm1, %xmm2 3798 movaps %xmm2, (%edx) 3799 movaps 17(%ecx), %xmm2 3800 lea 16(%esi), %esi 3801 sub $16, %ebx 3802 jbe L(StrncpyExit15) 3803 palignr $15, %xmm3, %xmm2 3804 movaps %xmm2, 16(%edx) 3805 lea 16(%esi), %esi 3806 sub $16, %ebx 3807 jbe L(StrncpyExit15) 3808 movaps %xmm4, 32(%edx) 3809 lea 16(%esi), %esi 3810 sub $16, %ebx 3811 jbe L(StrncpyExit15) 3812 movaps %xmm5, 48(%edx) 3813 lea 16(%esi), %esi 3814 lea -16(%ebx), %ebx 3815L(StrncpyExit15): 3816 lea 1(%edx, %esi), %edx 3817 lea 1(%ecx, %esi), %ecx 3818 movb -1(%ecx), %ah 3819 movb %ah, -1(%edx) 3820 xor %esi, %esi 3821 jmp L(CopyFrom1To16BytesCase3) 3822#endif 3823 3824#if !defined USE_AS_STRCAT && ! defined USE_AS_STRLCPY 3825# ifdef USE_AS_STRNCPY 3826 CFI_POP (%esi) 3827 CFI_POP (%edi) 3828 3829 .p2align 4 3830L(ExitTail0): 3831 movl %edx, %eax 3832 RETURN 3833 3834 .p2align 4 3835L(StrncpyExit15Bytes): 3836 cmp $12, %ebx 3837 jbe L(StrncpyExit12Bytes) 3838 cmpb $0, 8(%ecx) 3839 jz L(ExitTail9) 3840 cmpb $0, 9(%ecx) 3841 jz L(ExitTail10) 3842 cmpb $0, 10(%ecx) 3843 jz L(ExitTail11) 3844 cmpb $0, 11(%ecx) 3845 jz L(ExitTail12) 3846 cmp $13, %ebx 3847 je L(ExitTail13) 3848 cmpb $0, 12(%ecx) 3849 jz L(ExitTail13) 3850 cmp $14, %ebx 3851 je L(ExitTail14) 3852 cmpb $0, 13(%ecx) 3853 jz L(ExitTail14) 3854 movlpd (%ecx), %xmm0 3855 movlpd 7(%ecx), %xmm1 3856 movlpd %xmm0, (%edx) 3857 movlpd %xmm1, 7(%edx) 3858# ifdef USE_AS_STPCPY 3859 lea 14(%edx), %eax 3860 cmpb $1, (%eax) 3861 sbb $-1, %eax 3862# else 3863 movl %edx, %eax 3864# endif 3865 RETURN 3866 3867 .p2align 4 3868L(StrncpyExit12Bytes): 3869 cmp $9, %ebx 3870 je L(ExitTail9) 3871 cmpb $0, 8(%ecx) 3872 jz L(ExitTail9) 3873 cmp $10, %ebx 3874 je L(ExitTail10) 3875 cmpb $0, 9(%ecx) 3876 jz L(ExitTail10) 3877 cmp $11, %ebx 3878 je L(ExitTail11) 3879 cmpb $0, 10(%ecx) 3880 jz L(ExitTail11) 3881 movlpd (%ecx), %xmm0 3882 movl 8(%ecx), %eax 3883 movlpd %xmm0, (%edx) 3884 movl %eax, 8(%edx) 3885 SAVE_RESULT_TAIL (11) 3886# ifdef USE_AS_STPCPY 3887 cmpb $1, (%eax) 3888 sbb $-1, %eax 3889# endif 3890 RETURN 3891 3892 .p2align 4 3893L(StrncpyExit8Bytes): 3894 cmp $4, %ebx 3895 jbe L(StrncpyExit4Bytes) 3896 cmpb $0, (%ecx) 3897 jz L(ExitTail1) 3898 cmpb $0, 1(%ecx) 3899 jz L(ExitTail2) 3900 cmpb $0, 2(%ecx) 3901 jz L(ExitTail3) 3902 cmpb $0, 3(%ecx) 3903 jz L(ExitTail4) 3904 3905 cmp $5, %ebx 3906 je L(ExitTail5) 3907 cmpb $0, 4(%ecx) 3908 jz L(ExitTail5) 3909 cmp $6, %ebx 3910 je L(ExitTail6) 3911 cmpb $0, 5(%ecx) 3912 jz L(ExitTail6) 3913 cmp $7, %ebx 3914 je L(ExitTail7) 3915 cmpb $0, 6(%ecx) 3916 jz L(ExitTail7) 3917 movlpd (%ecx), %xmm0 3918 movlpd %xmm0, (%edx) 3919# ifdef USE_AS_STPCPY 3920 lea 7(%edx), %eax 3921 cmpb $1, (%eax) 3922 sbb $-1, %eax 3923# else 3924 movl %edx, %eax 3925# endif 3926 RETURN 3927 3928 .p2align 4 3929L(StrncpyExit4Bytes): 3930 test %ebx, %ebx 3931 jz L(ExitTail0) 3932 cmp $1, %ebx 3933 je L(ExitTail1) 3934 cmpb $0, (%ecx) 3935 jz L(ExitTail1) 3936 cmp $2, %ebx 3937 je L(ExitTail2) 3938 cmpb $0, 1(%ecx) 3939 jz L(ExitTail2) 3940 cmp $3, %ebx 3941 je L(ExitTail3) 3942 cmpb $0, 2(%ecx) 3943 jz L(ExitTail3) 3944 movl (%ecx), %eax 3945 movl %eax, (%edx) 3946 SAVE_RESULT_TAIL (3) 3947# ifdef USE_AS_STPCPY 3948 cmpb $1, (%eax) 3949 sbb $-1, %eax 3950# endif 3951 RETURN 3952# endif 3953 3954END (STRCPY) 3955#endif 3956