• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label)	.L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc	.cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc	.cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg)	.cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
53#endif
54
55#ifndef ENTRY
56# define ENTRY(name)             \
57	.type name, @function;   \
58	.globl name;             \
59	.p2align 4;              \
60name:                            \
61	cfi_startproc
62#endif
63
64#ifndef END
65# define END(name)               \
66	cfi_endproc;             \
67	.size name, .-name
68#endif
69
70#define CFI_PUSH(REG)                  \
71	cfi_adjust_cfa_offset (4);     \
72	cfi_rel_offset (REG, 0)
73
74#define CFI_POP(REG)                   \
75	cfi_adjust_cfa_offset (-4);    \
76	cfi_restore (REG)
77
78#define PUSH(REG) pushl REG; CFI_PUSH (REG)
79#define POP(REG) popl REG; CFI_POP (REG)
80
81#ifndef STRCPY
82# define STRCPY  strcpy_generic
83#endif
84
85#ifdef USE_AS_STPNCPY
86# define USE_AS_STRNCPY
87# define USE_AS_STPCPY
88#endif
89
90#ifdef USE_AS_STRNCPY
91# define PARMS  16
92# define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi)
93# define RETURN  POP(%edi); POP(%esi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi); CFI_PUSH(%edi);
94#else
95# define PARMS  12
96# define ENTRANCE PUSH(%esi); PUSH(%edi)
97# define RETURN  POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
98#endif
99
100#define STR1  PARMS
101#define STR2  STR1+4
102#define LEN  STR2+4
103
104
105#if (defined SHARED || defined __PIC__)
106# define JMPTBL(I, B)	I - B
107
108/* Load an entry in a jump table into ECX and branch to it.  TABLE is a
109	jump	table with relative offsets.  INDEX is a register contains the
110	index	into the jump table.   SCALE is the scale of INDEX. */
111
112# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)            \
113	/* We first load PC into ECX.  */                       \
114	call	__x86.get_pc_thunk.cx;                         \
115	/* Get the address of the jump table.  */               \
116	addl	$(TABLE - .), %ecx;                             \
117	/* Get the entry and convert the relative offset to the \
118	absolute	address.  */                            \
119	addl	(%ecx,INDEX,SCALE), %ecx;                       \
120	/* We loaded the jump table and adjuested ECX. Go.  */  \
121	jmp	*%ecx
122#else
123# define JMPTBL(I, B)	I
124
125/* Branch to an entry in a jump table.  TABLE is a jump table with
126	absolute	offsets.  INDEX is a register contains the index into the
127	jump	table.  SCALE is the scale of INDEX. */
128
129# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
130	jmp	*TABLE(,INDEX,SCALE)
131#endif
132
133.text
134ENTRY (STRCPY)
135	ENTRANCE
136	mov	STR1(%esp), %edi
137	mov	STR2(%esp), %esi
138#ifdef USE_AS_STRNCPY
139	movl	LEN(%esp), %ebx
140	test	%ebx, %ebx
141	jz	L(ExitZero)
142#endif
143
144	mov	%esi, %ecx
145#ifndef USE_AS_STPCPY
146	mov	%edi, %eax      /* save result */
147#endif
148	and	$15, %ecx
149	jz	L(SourceStringAlignmentZero)
150
151	and	$-16, %esi
152	pxor	%xmm0, %xmm0
153	pxor	%xmm1, %xmm1
154
155	pcmpeqb	(%esi), %xmm1
156#ifdef USE_AS_STRNCPY
157	add	%ecx, %ebx
158#endif
159	pmovmskb %xmm1, %edx
160	shr	%cl, %edx
161#ifdef USE_AS_STRNCPY
162#ifdef USE_AS_STPCPY
163	cmp	$16, %ebx
164	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
165#else
166	cmp	$17, %ebx
167	jbe	L(CopyFrom1To16BytesTailCase2OrCase3)
168#endif
169#endif
170	test	%edx, %edx
171	jnz	L(CopyFrom1To16BytesTail)
172
173	pcmpeqb	16(%esi), %xmm0
174	pmovmskb %xmm0, %edx
175#ifdef USE_AS_STRNCPY
176#ifdef USE_AS_STPCPY
177	cmp	$32, %ebx
178	jbe	L(CopyFrom1To32BytesCase2OrCase3)
179#else
180	cmp	$33, %ebx
181	jbe	L(CopyFrom1To32BytesCase2OrCase3)
182#endif
183#endif
184	test	%edx, %edx
185	jnz	L(CopyFrom1To32Bytes)
186
187	movdqu	(%esi, %ecx), %xmm1   /* copy 16 bytes */
188	movdqu	%xmm1, (%edi)
189
190	sub	%ecx, %edi
191	mov	%edi, %edx
192	mov	$16, %ecx
193	and	$15, %edx
194	jz	L(Align16Both)
195
196/* If source adress alignment != destination adress alignment */
197	.p2align 4
198L(Unalign16Both):
199	movdqa	(%esi, %ecx), %xmm1
200	movaps	16(%esi, %ecx), %xmm2
201	movdqu	%xmm1, (%edi, %ecx)
202	pcmpeqb	%xmm2, %xmm0
203	pmovmskb %xmm0, %edx
204	add	$16, %ecx
205#ifdef USE_AS_STRNCPY
206	sub	$48, %ebx
207	jbe	L(CopyFrom1To16BytesCase2OrCase3)
208	test	%edx, %edx
209	jnz	L(CopyFrom1To16BytesUnalignedXmm2)
210#else
211	test	%edx, %edx
212	jnz	L(CopyFrom1To16Bytes)
213#endif
214
215	movaps	16(%esi, %ecx), %xmm3
216	movdqu	%xmm2, (%edi, %ecx)
217	pcmpeqb	%xmm3, %xmm0
218	pmovmskb %xmm0, %edx
219	add	$16, %ecx
220#ifdef USE_AS_STRNCPY
221	sub	$16, %ebx
222	jbe	L(CopyFrom1To16BytesCase2OrCase3)
223	test	%edx, %edx
224	jnz	L(CopyFrom1To16BytesUnalignedXmm3)
225#else
226	test	%edx, %edx
227	jnz	L(CopyFrom1To16Bytes)
228#endif
229
230	movaps	16(%esi, %ecx), %xmm4
231	movdqu	%xmm3, (%edi, %ecx)
232	pcmpeqb	%xmm4, %xmm0
233	pmovmskb %xmm0, %edx
234	add	$16, %ecx
235#ifdef USE_AS_STRNCPY
236	sub	$16, %ebx
237	jbe	L(CopyFrom1To16BytesCase2OrCase3)
238	test	%edx, %edx
239	jnz	L(CopyFrom1To16BytesUnalignedXmm4)
240#else
241	test	%edx, %edx
242	jnz	L(CopyFrom1To16Bytes)
243#endif
244
245	movaps	16(%esi, %ecx), %xmm1
246	movdqu	%xmm4, (%edi, %ecx)
247	pcmpeqb	%xmm1, %xmm0
248	pmovmskb %xmm0, %edx
249	add	$16, %ecx
250#ifdef USE_AS_STRNCPY
251	sub	$16, %ebx
252	jbe	L(CopyFrom1To16BytesCase2OrCase3)
253	test	%edx, %edx
254	jnz	L(CopyFrom1To16BytesUnalignedXmm1)
255#else
256	test	%edx, %edx
257	jnz	L(CopyFrom1To16Bytes)
258#endif
259
260	movaps	16(%esi, %ecx), %xmm2
261	movdqu	%xmm1, (%edi, %ecx)
262	pcmpeqb	%xmm2, %xmm0
263	pmovmskb %xmm0, %edx
264	add	$16, %ecx
265#ifdef USE_AS_STRNCPY
266	sub	$16, %ebx
267	jbe	L(CopyFrom1To16BytesCase2OrCase3)
268	test	%edx, %edx
269	jnz	L(CopyFrom1To16BytesUnalignedXmm2)
270#else
271	test	%edx, %edx
272	jnz	L(CopyFrom1To16Bytes)
273#endif
274
275	movaps	16(%esi, %ecx), %xmm3
276	movdqu	%xmm2, (%edi, %ecx)
277	pcmpeqb	%xmm3, %xmm0
278	pmovmskb %xmm0, %edx
279	add	$16, %ecx
280#ifdef USE_AS_STRNCPY
281	sub	$16, %ebx
282	jbe	L(CopyFrom1To16BytesCase2OrCase3)
283	test	%edx, %edx
284	jnz	L(CopyFrom1To16BytesUnalignedXmm3)
285#else
286	test	%edx, %edx
287	jnz	L(CopyFrom1To16Bytes)
288#endif
289
290	movdqu	%xmm3, (%edi, %ecx)
291	mov	%esi, %edx
292	lea	16(%esi, %ecx), %esi
293	and	$-0x40, %esi
294	sub	%esi, %edx
295	sub	%edx, %edi
296#ifdef USE_AS_STRNCPY
297	lea	64+64(%ebx, %edx), %ebx
298#endif
299L(Unaligned64Loop):
300	movaps	(%esi), %xmm2
301	movaps	%xmm2, %xmm4
302	movaps	16(%esi), %xmm5
303	movaps	32(%esi), %xmm3
304	movaps	%xmm3, %xmm6
305	movaps	48(%esi), %xmm7
306	pminub	%xmm5, %xmm2
307	pminub	%xmm7, %xmm3
308	pminub	%xmm2, %xmm3
309	pcmpeqb	%xmm0, %xmm3
310	pmovmskb %xmm3, %edx
311#ifdef USE_AS_STRNCPY
312	sub	$64, %ebx
313	jbe	L(UnalignedLeaveCase2OrCase3)
314#endif
315	test	%edx, %edx
316	jnz	L(Unaligned64Leave)
317
318L(Unaligned64Loop_start):
319	add	$64, %edi
320	add	$64, %esi
321	movdqu	%xmm4, -64(%edi)
322	movaps	(%esi), %xmm2
323	movdqa	%xmm2, %xmm4
324	movdqu	%xmm5, -48(%edi)
325	movaps	16(%esi), %xmm5
326	pminub	%xmm5, %xmm2
327	movaps	32(%esi), %xmm3
328	movdqu	%xmm6, -32(%edi)
329	movaps	%xmm3, %xmm6
330	movdqu	%xmm7, -16(%edi)
331	movaps	48(%esi), %xmm7
332	pminub	%xmm7, %xmm3
333	pminub	%xmm2, %xmm3
334	pcmpeqb	%xmm3, %xmm0
335	pmovmskb %xmm0, %edx
336#ifdef USE_AS_STRNCPY
337	sub	$64, %ebx
338	jbe	L(UnalignedLeaveCase2OrCase3)
339#endif
340	test	%edx, %edx
341	jz	L(Unaligned64Loop_start)
342
343L(Unaligned64Leave):
344	pxor	%xmm0, %xmm0
345	pxor	%xmm1, %xmm1
346
347	pcmpeqb	%xmm4, %xmm0
348	pcmpeqb	%xmm5, %xmm1
349	pmovmskb %xmm0, %edx
350	pmovmskb %xmm1, %ecx
351	test	%edx, %edx
352	jnz	L(CopyFrom1To16BytesUnaligned_0)
353	test	%ecx, %ecx
354	jnz	L(CopyFrom1To16BytesUnaligned_16)
355
356	pcmpeqb	%xmm6, %xmm0
357	pcmpeqb	%xmm7, %xmm1
358	pmovmskb %xmm0, %edx
359	pmovmskb %xmm1, %ecx
360	test	%edx, %edx
361	jnz	L(CopyFrom1To16BytesUnaligned_32)
362
363	bsf	%ecx, %edx
364	movdqu	%xmm4, (%edi)
365	movdqu	%xmm5, 16(%edi)
366	movdqu	%xmm6, 32(%edi)
367#ifdef USE_AS_STRNCPY
368#ifdef USE_AS_STPCPY
369	lea	48(%edi, %edx), %eax
370#endif
371	movdqu	%xmm7, 48(%edi)
372	add	$15, %ebx
373	sub	%edx, %ebx
374	lea	49(%edi, %edx), %edi
375	jmp	L(StrncpyFillTailWithZero)
376#else
377	add	$48, %esi
378	add	$48, %edi
379	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
380#endif
381
382/* If source adress alignment == destination adress alignment */
383
384L(SourceStringAlignmentZero):
385	pxor	%xmm0, %xmm0
386	movdqa	(%esi), %xmm1
387	pcmpeqb	%xmm1, %xmm0
388	pmovmskb %xmm0, %edx
389
390#ifdef USE_AS_STRNCPY
391#ifdef USE_AS_STPCPY
392	cmp	$16, %ebx
393	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
394#else
395	cmp	$17, %ebx
396	jbe	L(CopyFrom1To16BytesTail1Case2OrCase3)
397#endif
398#endif
399	test	%edx, %edx
400	jnz	L(CopyFrom1To16BytesTail1)
401
402	pcmpeqb	16(%esi), %xmm0
403	movdqu	%xmm1, (%edi)
404	pmovmskb %xmm0, %edx
405
406#ifdef USE_AS_STRNCPY
407#ifdef USE_AS_STPCPY
408	cmp	$32, %ebx
409	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
410#else
411	cmp	$33, %ebx
412	jbe	L(CopyFrom1To32Bytes1Case2OrCase3)
413#endif
414#endif
415	test	%edx, %edx
416	jnz	L(CopyFrom1To32Bytes1)
417
418	mov	%edi, %edx
419	mov	$16, %ecx
420	and	$15, %edx
421	jnz	L(Unalign16Both)
422
423L(Align16Both):
424	movdqa	(%esi, %ecx), %xmm1
425	movdqa	16(%esi, %ecx), %xmm2
426	movdqa	%xmm1, (%edi, %ecx)
427	pcmpeqb	%xmm2, %xmm0
428	pmovmskb %xmm0, %edx
429	add	$16, %ecx
430#ifdef USE_AS_STRNCPY
431	sub	$48, %ebx
432	jbe	L(CopyFrom1To16BytesCase2OrCase3)
433	test	%edx, %edx
434	jnz	L(CopyFrom1To16BytesXmm2)
435#else
436	test	%edx, %edx
437	jnz	L(CopyFrom1To16Bytes)
438#endif
439
440	movdqa	16(%esi, %ecx), %xmm3
441	movdqa	%xmm2, (%edi, %ecx)
442	pcmpeqb	%xmm3, %xmm0
443	pmovmskb %xmm0, %edx
444	lea	16(%ecx), %ecx
445#ifdef USE_AS_STRNCPY
446	sub	$16, %ebx
447	jbe	L(CopyFrom1To16BytesCase2OrCase3)
448	test	%edx, %edx
449	jnz	L(CopyFrom1To16BytesXmm3)
450#else
451	test	%edx, %edx
452	jnz	L(CopyFrom1To16Bytes)
453#endif
454
455	movdqa	16(%esi, %ecx), %xmm4
456	movdqa	%xmm3, (%edi, %ecx)
457	pcmpeqb	%xmm4, %xmm0
458	pmovmskb %xmm0, %edx
459	lea	16(%ecx), %ecx
460#ifdef USE_AS_STRNCPY
461	sub	$16, %ebx
462	jbe	L(CopyFrom1To16BytesCase2OrCase3)
463	test	%edx, %edx
464	jnz	L(CopyFrom1To16BytesXmm4)
465#else
466	test	%edx, %edx
467	jnz	L(CopyFrom1To16Bytes)
468#endif
469
470	movdqa	16(%esi, %ecx), %xmm1
471	movdqa	%xmm4, (%edi, %ecx)
472	pcmpeqb	%xmm1, %xmm0
473	pmovmskb %xmm0, %edx
474	lea	16(%ecx), %ecx
475#ifdef USE_AS_STRNCPY
476	sub	$16, %ebx
477	jbe	L(CopyFrom1To16BytesCase2OrCase3)
478	test	%edx, %edx
479	jnz	L(CopyFrom1To16BytesXmm1)
480#else
481	test	%edx, %edx
482	jnz	L(CopyFrom1To16Bytes)
483#endif
484
485	movdqa	16(%esi, %ecx), %xmm2
486	movdqa	%xmm1, (%edi, %ecx)
487	pcmpeqb	%xmm2, %xmm0
488	pmovmskb %xmm0, %edx
489	lea	16(%ecx), %ecx
490#ifdef USE_AS_STRNCPY
491	sub	$16, %ebx
492	jbe	L(CopyFrom1To16BytesCase2OrCase3)
493	test	%edx, %edx
494	jnz	L(CopyFrom1To16BytesXmm2)
495#else
496	test	%edx, %edx
497	jnz	L(CopyFrom1To16Bytes)
498#endif
499
500	movdqa	16(%esi, %ecx), %xmm3
501	movdqa	%xmm2, (%edi, %ecx)
502	pcmpeqb	%xmm3, %xmm0
503	pmovmskb %xmm0, %edx
504	lea	16(%ecx), %ecx
505#ifdef USE_AS_STRNCPY
506	sub	$16, %ebx
507	jbe	L(CopyFrom1To16BytesCase2OrCase3)
508	test	%edx, %edx
509	jnz	L(CopyFrom1To16BytesXmm3)
510#else
511	test	%edx, %edx
512	jnz	L(CopyFrom1To16Bytes)
513#endif
514
515	movdqa	%xmm3, (%edi, %ecx)
516	mov	%esi, %edx
517	lea	16(%esi, %ecx), %esi
518	and	$-0x40, %esi
519	sub	%esi, %edx
520	sub	%edx, %edi
521#ifdef USE_AS_STRNCPY
522	lea	64+64(%ebx, %edx), %ebx
523#endif
524L(Aligned64Loop):
525	movdqa	(%esi), %xmm2
526	movdqa	%xmm2, %xmm4
527	movaps	16(%esi), %xmm5
528	movdqa	32(%esi), %xmm3
529	movdqa	%xmm3, %xmm6
530	movaps	48(%esi), %xmm7
531	pminub	%xmm5, %xmm2
532	pminub	%xmm7, %xmm3
533	pminub	%xmm2, %xmm3
534	pcmpeqb	%xmm0, %xmm3
535	pmovmskb %xmm3, %edx
536#ifdef USE_AS_STRNCPY
537	sub	$64, %ebx
538	jbe	L(AlignedLeaveCase2OrCase3)
539#endif
540	test	%edx, %edx
541	jnz	L(Aligned64Leave)
542
543L(Aligned64Loop_start):
544	add	$64, %esi
545	add	$64, %edi
546	movaps	%xmm4, -64(%edi)
547	movdqa	(%esi), %xmm2
548	movdqa	%xmm2, %xmm4
549	movaps	%xmm5, -48(%edi)
550	movaps	16(%esi), %xmm5
551	pminub	%xmm5, %xmm2
552	movaps	32(%esi), %xmm3
553	movaps	%xmm6, -32(%edi)
554	movdqa	%xmm3, %xmm6
555	movaps	%xmm7, -16(%edi)
556	movaps	48(%esi), %xmm7
557	pminub	%xmm7, %xmm3
558	pminub	%xmm2, %xmm3
559	pcmpeqb	%xmm3, %xmm0
560	pmovmskb %xmm0, %edx
561#ifdef USE_AS_STRNCPY
562	sub	$64, %ebx
563	jbe	L(AlignedLeaveCase2OrCase3)
564#endif
565	test	%edx, %edx
566	jz	L(Aligned64Loop_start)
567
568L(Aligned64Leave):
569	pxor	%xmm0, %xmm0
570	pxor	%xmm1, %xmm1
571
572	pcmpeqb	%xmm4, %xmm0
573	pcmpeqb	%xmm5, %xmm1
574	pmovmskb %xmm0, %edx
575	pmovmskb %xmm1, %ecx
576	test	%edx, %edx
577	jnz	L(CopyFrom1To16Bytes_0)
578	test	%ecx, %ecx
579	jnz	L(CopyFrom1To16Bytes_16)
580
581	pcmpeqb	%xmm6, %xmm0
582	pcmpeqb	%xmm7, %xmm1
583	pmovmskb %xmm0, %edx
584	pmovmskb %xmm1, %ecx
585	test	%edx, %edx
586	jnz	L(CopyFrom1To16Bytes_32)
587
588	bsf	%ecx, %edx
589	movdqa	%xmm4, (%edi)
590	movdqa	%xmm5, 16(%edi)
591	movdqa	%xmm6, 32(%edi)
592#ifdef USE_AS_STRNCPY
593#ifdef USE_AS_STPCPY
594	lea	48(%edi, %edx), %eax
595#endif
596	movdqa	%xmm7, 48(%edi)
597	add	$15, %ebx
598	sub	%edx, %ebx
599	lea	49(%edi, %edx), %edi
600	jmp	L(StrncpyFillTailWithZero)
601#else
602	add	$48, %esi
603	add	$48, %edi
604	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
605#endif
606
607/*----------------------------------------------------*/
608
609/* Case1 */
610#ifndef USE_AS_STRNCPY
611	.p2align 4
612L(CopyFrom1To16Bytes):
613	add	%ecx, %edi
614	add	%ecx, %esi
615	bsf	%edx, %edx
616	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
617#endif
618	.p2align 4
619L(CopyFrom1To16BytesTail):
620#ifdef USE_AS_STRNCPY
621	sub	%ecx, %ebx
622#endif
623	add	%ecx, %esi
624	bsf	%edx, %edx
625	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
626
627	.p2align 4
628L(CopyFrom1To32Bytes1):
629	add	$16, %esi
630	add	$16, %edi
631#ifdef USE_AS_STRNCPY
632	sub	$16, %ebx
633#endif
634L(CopyFrom1To16BytesTail1):
635	bsf	%edx, %edx
636	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
637
638	.p2align 4
639L(CopyFrom1To32Bytes):
640#ifdef USE_AS_STRNCPY
641	sub	%ecx, %ebx
642#endif
643	bsf	%edx, %edx
644	add	%ecx, %esi
645	add	$16, %edx
646	sub	%ecx, %edx
647	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
648
649	.p2align 4
650L(CopyFrom1To16Bytes_0):
651	bsf	%edx, %edx
652#ifdef USE_AS_STRNCPY
653#ifdef USE_AS_STPCPY
654	lea	(%edi, %edx), %eax
655#endif
656	movdqa	%xmm4, (%edi)
657	add	$63, %ebx
658	sub	%edx, %ebx
659	lea	1(%edi, %edx), %edi
660	jmp	L(StrncpyFillTailWithZero)
661#else
662	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
663#endif
664
665	.p2align 4
666L(CopyFrom1To16Bytes_16):
667	bsf	%ecx, %edx
668	movdqa	%xmm4, (%edi)
669#ifdef USE_AS_STRNCPY
670#ifdef USE_AS_STPCPY
671	lea	16(%edi, %edx), %eax
672#endif
673	movdqa	%xmm5, 16(%edi)
674	add	$47, %ebx
675	sub	%edx, %ebx
676	lea	17(%edi, %edx), %edi
677	jmp	L(StrncpyFillTailWithZero)
678#else
679	add	$16, %esi
680	add	$16, %edi
681	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
682#endif
683
684	.p2align 4
685L(CopyFrom1To16Bytes_32):
686	bsf	%edx, %edx
687	movdqa	%xmm4, (%edi)
688	movdqa	%xmm5, 16(%edi)
689#ifdef USE_AS_STRNCPY
690#ifdef USE_AS_STPCPY
691	lea	32(%edi, %edx), %eax
692#endif
693	movdqa	%xmm6, 32(%edi)
694	add	$31, %ebx
695	sub	%edx, %ebx
696	lea	33(%edi, %edx), %edi
697	jmp	L(StrncpyFillTailWithZero)
698#else
699	add	$32, %esi
700	add	$32, %edi
701	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
702#endif
703
704	.p2align 4
705L(CopyFrom1To16BytesUnaligned_0):
706	bsf	%edx, %edx
707#ifdef USE_AS_STRNCPY
708#ifdef USE_AS_STPCPY
709	lea	(%edi, %edx), %eax
710#endif
711	movdqu	%xmm4, (%edi)
712	add	$63, %ebx
713	sub	%edx, %ebx
714	lea	1(%edi, %edx), %edi
715	jmp	L(StrncpyFillTailWithZero)
716#else
717	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
718#endif
719
720	.p2align 4
721L(CopyFrom1To16BytesUnaligned_16):
722	bsf	%ecx, %edx
723	movdqu	%xmm4, (%edi)
724#ifdef USE_AS_STRNCPY
725#ifdef USE_AS_STPCPY
726	lea	16(%edi, %edx), %eax
727#endif
728	movdqu	%xmm5, 16(%edi)
729	add	$47, %ebx
730	sub	%edx, %ebx
731	lea	17(%edi, %edx), %edi
732	jmp	L(StrncpyFillTailWithZero)
733#else
734	add	$16, %esi
735	add	$16, %edi
736	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
737#endif
738
739	.p2align 4
740L(CopyFrom1To16BytesUnaligned_32):
741	bsf	%edx, %edx
742	movdqu	%xmm4, (%edi)
743	movdqu	%xmm5, 16(%edi)
744#ifdef USE_AS_STRNCPY
745#ifdef USE_AS_STPCPY
746	lea	32(%edi, %edx), %eax
747#endif
748	movdqu	%xmm6, 32(%edi)
749	add	$31, %ebx
750	sub	%edx, %ebx
751	lea	33(%edi, %edx), %edi
752	jmp	L(StrncpyFillTailWithZero)
753#else
754	add	$32, %esi
755	add	$32, %edi
756	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
757#endif
758
759#ifdef USE_AS_STRNCPY
760	.p2align 4
761L(CopyFrom1To16BytesXmm6):
762	movdqa	%xmm6, (%edi, %ecx)
763	jmp	L(CopyFrom1To16BytesXmmExit)
764
765	.p2align 4
766L(CopyFrom1To16BytesXmm5):
767	movdqa	%xmm5, (%edi, %ecx)
768	jmp	L(CopyFrom1To16BytesXmmExit)
769
770	.p2align 4
771L(CopyFrom1To16BytesXmm4):
772	movdqa	%xmm4, (%edi, %ecx)
773	jmp	L(CopyFrom1To16BytesXmmExit)
774
775	.p2align 4
776L(CopyFrom1To16BytesXmm3):
777	movdqa	%xmm3, (%edi, %ecx)
778	jmp	L(CopyFrom1To16BytesXmmExit)
779
780	.p2align 4
781L(CopyFrom1To16BytesXmm2):
782	movdqa	%xmm2, (%edi, %ecx)
783	jmp	L(CopyFrom1To16BytesXmmExit)
784
785	.p2align 4
786L(CopyFrom1To16BytesXmm1):
787	movdqa	%xmm1, (%edi, %ecx)
788	jmp	L(CopyFrom1To16BytesXmmExit)
789
790	.p2align 4
791L(CopyFrom1To16BytesUnalignedXmm6):
792	movdqu	%xmm6, (%edi, %ecx)
793	jmp	L(CopyFrom1To16BytesXmmExit)
794
795	.p2align 4
796L(CopyFrom1To16BytesUnalignedXmm5):
797	movdqu	%xmm5, (%edi, %ecx)
798	jmp	L(CopyFrom1To16BytesXmmExit)
799
800	.p2align 4
801L(CopyFrom1To16BytesUnalignedXmm4):
802	movdqu	%xmm4, (%edi, %ecx)
803	jmp	L(CopyFrom1To16BytesXmmExit)
804
805	.p2align 4
806L(CopyFrom1To16BytesUnalignedXmm3):
807	movdqu	%xmm3, (%edi, %ecx)
808	jmp	L(CopyFrom1To16BytesXmmExit)
809
810	.p2align 4
811L(CopyFrom1To16BytesUnalignedXmm1):
812	movdqu	%xmm1, (%edi, %ecx)
813	jmp	L(CopyFrom1To16BytesXmmExit)
814
815	.p2align 4
816L(CopyFrom1To16BytesExit):
817	BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
818
819/* Case2 */
820
821	.p2align 4
822L(CopyFrom1To16BytesCase2):
823	add	$16, %ebx
824	add	%ecx, %edi
825	add	%ecx, %esi
826	bsf	%edx, %edx
827	cmp	%ebx, %edx
828	jb	L(CopyFrom1To16BytesExit)
829	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
830
831	.p2align 4
832L(CopyFrom1To32BytesCase2):
833	sub	%ecx, %ebx
834	add	%ecx, %esi
835	bsf	%edx, %edx
836	add	$16, %edx
837	sub	%ecx, %edx
838	cmp	%ebx, %edx
839	jb	L(CopyFrom1To16BytesExit)
840	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
841
842L(CopyFrom1To16BytesTailCase2):
843	sub	%ecx, %ebx
844	add	%ecx, %esi
845	bsf	%edx, %edx
846	cmp	%ebx, %edx
847	jb	L(CopyFrom1To16BytesExit)
848	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
849
850L(CopyFrom1To16BytesTail1Case2):
851	bsf	%edx, %edx
852	cmp	%ebx, %edx
853	jb	L(CopyFrom1To16BytesExit)
854	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
855
856/* Case2 or Case3,  Case3 */
857
858	.p2align 4
859L(CopyFrom1To16BytesCase2OrCase3):
860	test	%edx, %edx
861	jnz	L(CopyFrom1To16BytesCase2)
862L(CopyFrom1To16BytesCase3):
863	add	$16, %ebx
864	add	%ecx, %edi
865	add	%ecx, %esi
866	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
867
868	.p2align 4
869L(CopyFrom1To32BytesCase2OrCase3):
870	test	%edx, %edx
871	jnz	L(CopyFrom1To32BytesCase2)
872	sub	%ecx, %ebx
873	add	%ecx, %esi
874	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
875
876	.p2align 4
877L(CopyFrom1To16BytesTailCase2OrCase3):
878	test	%edx, %edx
879	jnz	L(CopyFrom1To16BytesTailCase2)
880	sub	%ecx, %ebx
881	add	%ecx, %esi
882	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
883
884	.p2align 4
885L(CopyFrom1To32Bytes1Case2OrCase3):
886	add	$16, %edi
887	add	$16, %esi
888	sub	$16, %ebx
889L(CopyFrom1To16BytesTail1Case2OrCase3):
890	test	%edx, %edx
891	jnz	L(CopyFrom1To16BytesTail1Case2)
892	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
893
894#endif
895
896/*-----------------------------------------------------------------*/
897	.p2align 4
898L(Exit0):
899#ifdef USE_AS_STPCPY
900	mov	%edi, %eax
901#endif
902	RETURN
903
904	.p2align 4
905L(Exit1):
906	movb	%dh, (%edi)
907#ifdef USE_AS_STPCPY
908	lea	(%edi), %eax
909#endif
910#ifdef USE_AS_STRNCPY
911	sub	$1, %ebx
912	lea	1(%edi), %edi
913	jnz	L(StrncpyFillTailWithZero)
914#endif
915	RETURN
916
917	.p2align 4
918L(Exit2):
919	movw	(%esi), %dx
920	movw	%dx, (%edi)
921#ifdef USE_AS_STPCPY
922	lea	1(%edi), %eax
923#endif
924#ifdef USE_AS_STRNCPY
925	sub	$2, %ebx
926	lea	2(%edi), %edi
927	jnz	L(StrncpyFillTailWithZero)
928#endif
929	RETURN
930
931	.p2align 4
932L(Exit3):
933	movw	(%esi), %cx
934	movw	%cx, (%edi)
935	movb	%dh, 2(%edi)
936#ifdef USE_AS_STPCPY
937	lea	2(%edi), %eax
938#endif
939#ifdef USE_AS_STRNCPY
940	sub	$3, %ebx
941	lea	3(%edi), %edi
942	jnz	L(StrncpyFillTailWithZero)
943#endif
944	RETURN
945
946	.p2align 4
947L(Exit4):
948	movl	(%esi), %edx
949	movl	%edx, (%edi)
950#ifdef USE_AS_STPCPY
951	lea	3(%edi), %eax
952#endif
953#ifdef USE_AS_STRNCPY
954	sub	$4, %ebx
955	lea	4(%edi), %edi
956	jnz	L(StrncpyFillTailWithZero)
957#endif
958	RETURN
959
960	.p2align 4
961L(Exit5):
962	movl	(%esi), %ecx
963	movb	%dh, 4(%edi)
964	movl	%ecx, (%edi)
965#ifdef USE_AS_STPCPY
966	lea	4(%edi), %eax
967#endif
968#ifdef USE_AS_STRNCPY
969	sub	$5, %ebx
970	lea	5(%edi), %edi
971	jnz	L(StrncpyFillTailWithZero)
972#endif
973	RETURN
974
975	.p2align 4
976L(Exit6):
977	movl	(%esi), %ecx
978	movw	4(%esi), %dx
979	movl	%ecx, (%edi)
980	movw	%dx, 4(%edi)
981#ifdef USE_AS_STPCPY
982	lea	5(%edi), %eax
983#endif
984#ifdef USE_AS_STRNCPY
985	sub	$6, %ebx
986	lea	6(%edi), %edi
987	jnz	L(StrncpyFillTailWithZero)
988#endif
989	RETURN
990
991	.p2align 4
992L(Exit7):
993	movl	(%esi), %ecx
994	movl	3(%esi), %edx
995	movl	%ecx, (%edi)
996	movl	%edx, 3(%edi)
997#ifdef USE_AS_STPCPY
998	lea	6(%edi), %eax
999#endif
1000#ifdef USE_AS_STRNCPY
1001	sub	$7, %ebx
1002	lea	7(%edi), %edi
1003	jnz	L(StrncpyFillTailWithZero)
1004#endif
1005	RETURN
1006
1007	.p2align 4
1008L(Exit8):
1009	movlpd	(%esi), %xmm0
1010	movlpd	%xmm0, (%edi)
1011#ifdef USE_AS_STPCPY
1012	lea	7(%edi), %eax
1013#endif
1014#ifdef USE_AS_STRNCPY
1015	sub	$8, %ebx
1016	lea	8(%edi), %edi
1017	jnz	L(StrncpyFillTailWithZero)
1018#endif
1019	RETURN
1020
1021	.p2align 4
1022L(Exit9):
1023	movlpd	(%esi), %xmm0
1024	movb	%dh, 8(%edi)
1025	movlpd	%xmm0, (%edi)
1026#ifdef USE_AS_STPCPY
1027	lea	8(%edi), %eax
1028#endif
1029#ifdef USE_AS_STRNCPY
1030	sub	$9, %ebx
1031	lea	9(%edi), %edi
1032	jnz	L(StrncpyFillTailWithZero)
1033#endif
1034	RETURN
1035
1036	.p2align 4
1037L(Exit10):
1038	movlpd	(%esi), %xmm0
1039	movw	8(%esi), %dx
1040	movlpd	%xmm0, (%edi)
1041	movw	%dx, 8(%edi)
1042#ifdef USE_AS_STPCPY
1043	lea	9(%edi), %eax
1044#endif
1045#ifdef USE_AS_STRNCPY
1046	sub	$10, %ebx
1047	lea	10(%edi), %edi
1048	jnz	L(StrncpyFillTailWithZero)
1049#endif
1050	RETURN
1051
1052	.p2align 4
1053L(Exit11):
1054	movlpd	(%esi), %xmm0
1055	movl	7(%esi), %edx
1056	movlpd	%xmm0, (%edi)
1057	movl	%edx, 7(%edi)
1058#ifdef USE_AS_STPCPY
1059	lea	10(%edi), %eax
1060#endif
1061#ifdef USE_AS_STRNCPY
1062	sub	$11, %ebx
1063	lea	11(%edi), %edi
1064	jnz	L(StrncpyFillTailWithZero)
1065#endif
1066	RETURN
1067
1068	.p2align 4
1069L(Exit12):
1070	movlpd	(%esi), %xmm0
1071	movl	8(%esi), %edx
1072	movlpd	%xmm0, (%edi)
1073	movl	%edx, 8(%edi)
1074#ifdef USE_AS_STPCPY
1075	lea	11(%edi), %eax
1076#endif
1077#ifdef USE_AS_STRNCPY
1078	sub	$12, %ebx
1079	lea	12(%edi), %edi
1080	jnz	L(StrncpyFillTailWithZero)
1081#endif
1082	RETURN
1083
1084	.p2align 4
1085L(Exit13):
1086	movlpd	(%esi), %xmm0
1087	movlpd	5(%esi), %xmm1
1088	movlpd	%xmm0, (%edi)
1089	movlpd	%xmm1, 5(%edi)
1090#ifdef USE_AS_STPCPY
1091	lea	12(%edi), %eax
1092#endif
1093#ifdef USE_AS_STRNCPY
1094	sub	$13, %ebx
1095	lea	13(%edi), %edi
1096	jnz	L(StrncpyFillTailWithZero)
1097#endif
1098	RETURN
1099
1100	.p2align 4
1101L(Exit14):
1102	movlpd	(%esi), %xmm0
1103	movlpd	6(%esi), %xmm1
1104	movlpd	%xmm0, (%edi)
1105	movlpd	%xmm1, 6(%edi)
1106#ifdef USE_AS_STPCPY
1107	lea	13(%edi), %eax
1108#endif
1109#ifdef USE_AS_STRNCPY
1110	sub	$14, %ebx
1111	lea	14(%edi), %edi
1112	jnz	L(StrncpyFillTailWithZero)
1113#endif
1114	RETURN
1115
1116	.p2align 4
1117L(Exit15):
1118	movlpd	(%esi), %xmm0
1119	movlpd	7(%esi), %xmm1
1120	movlpd	%xmm0, (%edi)
1121	movlpd	%xmm1, 7(%edi)
1122#ifdef USE_AS_STPCPY
1123	lea	14(%edi), %eax
1124#endif
1125#ifdef USE_AS_STRNCPY
1126	sub	$15, %ebx
1127	lea	15(%edi), %edi
1128	jnz	L(StrncpyFillTailWithZero)
1129#endif
1130	RETURN
1131
1132	.p2align 4
1133L(Exit16):
1134	movdqu	(%esi), %xmm0
1135	movdqu	%xmm0, (%edi)
1136#ifdef USE_AS_STPCPY
1137	lea	15(%edi), %eax
1138#endif
1139#ifdef USE_AS_STRNCPY
1140	sub	$16, %ebx
1141	lea	16(%edi), %edi
1142	jnz	L(StrncpyFillTailWithZero)
1143#endif
1144	RETURN
1145
1146	.p2align 4
1147L(Exit17):
1148	movdqu	(%esi), %xmm0
1149	xor	%cl, %cl
1150	movdqu	%xmm0, (%edi)
1151	movb	%cl, 16(%edi)
1152#ifdef USE_AS_STPCPY
1153	lea	16(%edi), %eax
1154#endif
1155#ifdef USE_AS_STRNCPY
1156	sub	$17, %ebx
1157	lea	17(%edi), %edi
1158	jnz	L(StrncpyFillTailWithZero)
1159#endif
1160	RETURN
1161
1162	.p2align 4
1163L(Exit18):
1164	movdqu	(%esi), %xmm0
1165	movw	16(%esi), %cx
1166	movdqu	%xmm0, (%edi)
1167	movw	%cx, 16(%edi)
1168#ifdef USE_AS_STPCPY
1169	lea	17(%edi), %eax
1170#endif
1171#ifdef USE_AS_STRNCPY
1172	sub	$18, %ebx
1173	lea	18(%edi), %edi
1174	jnz	L(StrncpyFillTailWithZero)
1175#endif
1176	RETURN
1177
1178	.p2align 4
1179L(Exit19):
1180	movdqu	(%esi), %xmm0
1181	movl	15(%esi), %ecx
1182	movdqu	%xmm0, (%edi)
1183	movl	%ecx, 15(%edi)
1184#ifdef USE_AS_STPCPY
1185	lea	18(%edi), %eax
1186#endif
1187#ifdef USE_AS_STRNCPY
1188	sub	$19, %ebx
1189	lea	19(%edi), %edi
1190	jnz	L(StrncpyFillTailWithZero)
1191#endif
1192	RETURN
1193
1194	.p2align 4
1195L(Exit20):
1196	movdqu	(%esi), %xmm0
1197	movl	16(%esi), %ecx
1198	movdqu	%xmm0, (%edi)
1199	movl	%ecx, 16(%edi)
1200#ifdef USE_AS_STPCPY
1201	lea	19(%edi), %eax
1202#endif
1203#ifdef USE_AS_STRNCPY
1204	sub	$20, %ebx
1205	lea	20(%edi), %edi
1206	jnz	L(StrncpyFillTailWithZero)
1207#endif
1208	RETURN
1209
1210	.p2align 4
1211L(Exit21):
1212	movdqu	(%esi), %xmm0
1213	movl	16(%esi), %ecx
1214	xor	%dl, %dl
1215	movdqu	%xmm0, (%edi)
1216	movl	%ecx, 16(%edi)
1217	movb	%dl, 20(%edi)
1218#ifdef USE_AS_STPCPY
1219	lea	20(%edi), %eax
1220#endif
1221#ifdef USE_AS_STRNCPY
1222	sub	$21, %ebx
1223	lea	21(%edi), %edi
1224	jnz	L(StrncpyFillTailWithZero)
1225#endif
1226	RETURN
1227
1228	.p2align 4
1229L(Exit22):
1230	movdqu	(%esi), %xmm0
1231	movlpd	14(%esi), %xmm3
1232	movdqu	%xmm0, (%edi)
1233	movlpd	%xmm3, 14(%edi)
1234#ifdef USE_AS_STPCPY
1235	lea	21(%edi), %eax
1236#endif
1237#ifdef USE_AS_STRNCPY
1238	sub	$22, %ebx
1239	lea	22(%edi), %edi
1240	jnz	L(StrncpyFillTailWithZero)
1241#endif
1242	RETURN
1243
1244	.p2align 4
1245L(Exit23):
1246	movdqu	(%esi), %xmm0
1247	movlpd	15(%esi), %xmm3
1248	movdqu	%xmm0, (%edi)
1249	movlpd	%xmm3, 15(%edi)
1250#ifdef USE_AS_STPCPY
1251	lea	22(%edi), %eax
1252#endif
1253#ifdef USE_AS_STRNCPY
1254	sub	$23, %ebx
1255	lea	23(%edi), %edi
1256	jnz	L(StrncpyFillTailWithZero)
1257#endif
1258	RETURN
1259
1260	.p2align 4
1261L(Exit24):
1262	movdqu	(%esi), %xmm0
1263	movlpd	16(%esi), %xmm2
1264	movdqu	%xmm0, (%edi)
1265	movlpd	%xmm2, 16(%edi)
1266#ifdef USE_AS_STPCPY
1267	lea	23(%edi), %eax
1268#endif
1269#ifdef USE_AS_STRNCPY
1270	sub	$24, %ebx
1271	lea	24(%edi), %edi
1272	jnz	L(StrncpyFillTailWithZero)
1273#endif
1274	RETURN
1275
1276	.p2align 4
1277L(Exit25):
1278	movdqu	(%esi), %xmm0
1279	movlpd	16(%esi), %xmm2
1280	xor	%cl, %cl
1281	movdqu	%xmm0, (%edi)
1282	movlpd	%xmm2, 16(%edi)
1283	movb	%cl, 24(%edi)
1284#ifdef USE_AS_STPCPY
1285	lea	24(%edi), %eax
1286#endif
1287#ifdef USE_AS_STRNCPY
1288	sub	$25, %ebx
1289	lea	25(%edi), %edi
1290	jnz	L(StrncpyFillTailWithZero)
1291#endif
1292	RETURN
1293
1294	.p2align 4
1295L(Exit26):
1296	movdqu	(%esi), %xmm0
1297	movlpd	16(%esi), %xmm2
1298	movw	24(%esi), %cx
1299	movdqu	%xmm0, (%edi)
1300	movlpd	%xmm2, 16(%edi)
1301	movw	%cx, 24(%edi)
1302#ifdef USE_AS_STPCPY
1303	lea	25(%edi), %eax
1304#endif
1305#ifdef USE_AS_STRNCPY
1306	sub	$26, %ebx
1307	lea	26(%edi), %edi
1308	jnz	L(StrncpyFillTailWithZero)
1309#endif
1310	RETURN
1311
1312	.p2align 4
1313L(Exit27):
1314	movdqu	(%esi), %xmm0
1315	movlpd	16(%esi), %xmm2
1316	movl	23(%esi), %ecx
1317	movdqu	%xmm0, (%edi)
1318	movlpd	%xmm2, 16(%edi)
1319	movl	%ecx, 23(%edi)
1320#ifdef USE_AS_STPCPY
1321	lea	26(%edi), %eax
1322#endif
1323#ifdef USE_AS_STRNCPY
1324	sub	$27, %ebx
1325	lea	27(%edi), %edi
1326	jnz	L(StrncpyFillTailWithZero)
1327#endif
1328	RETURN
1329
1330	.p2align 4
1331L(Exit28):
1332	movdqu	(%esi), %xmm0
1333	movlpd	16(%esi), %xmm2
1334	movl	24(%esi), %ecx
1335	movdqu	%xmm0, (%edi)
1336	movlpd	%xmm2, 16(%edi)
1337	movl	%ecx, 24(%edi)
1338#ifdef USE_AS_STPCPY
1339	lea	27(%edi), %eax
1340#endif
1341#ifdef USE_AS_STRNCPY
1342	sub	$28, %ebx
1343	lea	28(%edi), %edi
1344	jnz	L(StrncpyFillTailWithZero)
1345#endif
1346	RETURN
1347
1348	.p2align 4
1349L(Exit29):
1350	movdqu	(%esi), %xmm0
1351	movdqu	13(%esi), %xmm2
1352	movdqu	%xmm0, (%edi)
1353	movdqu	%xmm2, 13(%edi)
1354#ifdef USE_AS_STPCPY
1355	lea	28(%edi), %eax
1356#endif
1357#ifdef USE_AS_STRNCPY
1358	sub	$29, %ebx
1359	lea	29(%edi), %edi
1360	jnz	L(StrncpyFillTailWithZero)
1361#endif
1362	RETURN
1363
1364	.p2align 4
1365L(Exit30):
1366	movdqu	(%esi), %xmm0
1367	movdqu	14(%esi), %xmm2
1368	movdqu	%xmm0, (%edi)
1369	movdqu	%xmm2, 14(%edi)
1370#ifdef USE_AS_STPCPY
1371	lea	29(%edi), %eax
1372#endif
1373#ifdef USE_AS_STRNCPY
1374	sub	$30, %ebx
1375	lea	30(%edi), %edi
1376	jnz	L(StrncpyFillTailWithZero)
1377#endif
1378	RETURN
1379
1380
1381	.p2align 4
1382L(Exit31):
1383	movdqu	(%esi), %xmm0
1384	movdqu	15(%esi), %xmm2
1385	movdqu	%xmm0, (%edi)
1386	movdqu	%xmm2, 15(%edi)
1387#ifdef USE_AS_STPCPY
1388	lea	30(%edi), %eax
1389#endif
1390#ifdef USE_AS_STRNCPY
1391	sub	$31, %ebx
1392	lea	31(%edi), %edi
1393	jnz	L(StrncpyFillTailWithZero)
1394#endif
1395	RETURN
1396
1397	.p2align 4
1398L(Exit32):
1399	movdqu	(%esi), %xmm0
1400	movdqu	16(%esi), %xmm2
1401	movdqu	%xmm0, (%edi)
1402	movdqu	%xmm2, 16(%edi)
1403#ifdef USE_AS_STPCPY
1404	lea	31(%edi), %eax
1405#endif
1406#ifdef USE_AS_STRNCPY
1407	sub	$32, %ebx
1408	lea	32(%edi), %edi
1409	jnz	L(StrncpyFillTailWithZero)
1410#endif
1411	RETURN
1412
1413#ifdef USE_AS_STRNCPY
1414
1415	.p2align 4
1416L(StrncpyExit1):
1417	movb	(%esi), %dl
1418	movb	%dl, (%edi)
1419#ifdef USE_AS_STPCPY
1420	lea	1(%edi), %eax
1421#endif
1422	RETURN
1423
1424	.p2align 4
1425L(StrncpyExit2):
1426	movw	(%esi), %dx
1427	movw	%dx, (%edi)
1428#ifdef USE_AS_STPCPY
1429	lea	2(%edi), %eax
1430#endif
1431	RETURN
1432	.p2align 4
1433L(StrncpyExit3):
1434	movw	(%esi), %cx
1435	movb	2(%esi), %dl
1436	movw	%cx, (%edi)
1437	movb	%dl, 2(%edi)
1438#ifdef USE_AS_STPCPY
1439	lea	3(%edi), %eax
1440#endif
1441	RETURN
1442
1443	.p2align 4
1444L(StrncpyExit4):
1445	movl	(%esi), %edx
1446	movl	%edx, (%edi)
1447#ifdef USE_AS_STPCPY
1448	lea	4(%edi), %eax
1449#endif
1450	RETURN
1451
1452	.p2align 4
1453L(StrncpyExit5):
1454	movl	(%esi), %ecx
1455	movb	4(%esi), %dl
1456	movl	%ecx, (%edi)
1457	movb	%dl, 4(%edi)
1458#ifdef USE_AS_STPCPY
1459	lea	5(%edi), %eax
1460#endif
1461	RETURN
1462
1463	.p2align 4
1464L(StrncpyExit6):
1465	movl	(%esi), %ecx
1466	movw	4(%esi), %dx
1467	movl	%ecx, (%edi)
1468	movw	%dx, 4(%edi)
1469#ifdef USE_AS_STPCPY
1470	lea	6(%edi), %eax
1471#endif
1472	RETURN
1473
1474	.p2align 4
1475L(StrncpyExit7):
1476	movl	(%esi), %ecx
1477	movl	3(%esi), %edx
1478	movl	%ecx, (%edi)
1479	movl	%edx, 3(%edi)
1480#ifdef USE_AS_STPCPY
1481	lea	7(%edi), %eax
1482#endif
1483	RETURN
1484
1485	.p2align 4
1486L(StrncpyExit8):
1487	movlpd	(%esi), %xmm0
1488	movlpd	%xmm0, (%edi)
1489#ifdef USE_AS_STPCPY
1490	lea	8(%edi), %eax
1491#endif
1492	RETURN
1493
1494	.p2align 4
1495L(StrncpyExit9):
1496	movlpd	(%esi), %xmm0
1497	movb	8(%esi), %dl
1498	movlpd	%xmm0, (%edi)
1499	movb	%dl, 8(%edi)
1500#ifdef USE_AS_STPCPY
1501	lea	9(%edi), %eax
1502#endif
1503	RETURN
1504
1505	.p2align 4
1506L(StrncpyExit10):
1507	movlpd	(%esi), %xmm0
1508	movw	8(%esi), %dx
1509	movlpd	%xmm0, (%edi)
1510	movw	%dx, 8(%edi)
1511#ifdef USE_AS_STPCPY
1512	lea	10(%edi), %eax
1513#endif
1514	RETURN
1515
1516	.p2align 4
1517L(StrncpyExit11):
1518	movlpd	(%esi), %xmm0
1519	movl	7(%esi), %edx
1520	movlpd	%xmm0, (%edi)
1521	movl	%edx, 7(%edi)
1522#ifdef USE_AS_STPCPY
1523	lea	11(%edi), %eax
1524#endif
1525	RETURN
1526
1527	.p2align 4
1528L(StrncpyExit12):
1529	movlpd	(%esi), %xmm0
1530	movl	8(%esi), %edx
1531	movlpd	%xmm0, (%edi)
1532	movl	%edx, 8(%edi)
1533#ifdef USE_AS_STPCPY
1534	lea	12(%edi), %eax
1535#endif
1536	RETURN
1537
1538	.p2align 4
1539L(StrncpyExit13):
1540	movlpd	(%esi), %xmm0
1541	movlpd	5(%esi), %xmm1
1542	movlpd	%xmm0, (%edi)
1543	movlpd	%xmm1, 5(%edi)
1544#ifdef USE_AS_STPCPY
1545	lea	13(%edi), %eax
1546#endif
1547	RETURN
1548
1549	.p2align 4
1550L(StrncpyExit14):
1551	movlpd	(%esi), %xmm0
1552	movlpd	6(%esi), %xmm1
1553	movlpd	%xmm0, (%edi)
1554	movlpd	%xmm1, 6(%edi)
1555#ifdef USE_AS_STPCPY
1556	lea	14(%edi), %eax
1557#endif
1558	RETURN
1559
1560	.p2align 4
1561L(StrncpyExit15):
1562	movlpd	(%esi), %xmm0
1563	movlpd	7(%esi), %xmm1
1564	movlpd	%xmm0, (%edi)
1565	movlpd	%xmm1, 7(%edi)
1566#ifdef USE_AS_STPCPY
1567	lea	15(%edi), %eax
1568#endif
1569	RETURN
1570
1571	.p2align 4
1572L(StrncpyExit16):
1573	movdqu	(%esi), %xmm0
1574	movdqu	%xmm0, (%edi)
1575#ifdef USE_AS_STPCPY
1576	lea	16(%edi), %eax
1577#endif
1578	RETURN
1579
1580	.p2align 4
1581L(StrncpyExit17):
1582	movdqu	(%esi), %xmm0
1583	movb	16(%esi), %cl
1584	movdqu	%xmm0, (%edi)
1585	movb	%cl, 16(%edi)
1586#ifdef USE_AS_STPCPY
1587	lea	17(%edi), %eax
1588#endif
1589	RETURN
1590
1591	.p2align 4
1592L(StrncpyExit18):
1593	movdqu	(%esi), %xmm0
1594	movw	16(%esi), %cx
1595	movdqu	%xmm0, (%edi)
1596	movw	%cx, 16(%edi)
1597#ifdef USE_AS_STPCPY
1598	lea	18(%edi), %eax
1599#endif
1600	RETURN
1601
1602	.p2align 4
1603L(StrncpyExit19):
1604	movdqu	(%esi), %xmm0
1605	movl	15(%esi), %ecx
1606	movdqu	%xmm0, (%edi)
1607	movl	%ecx, 15(%edi)
1608#ifdef USE_AS_STPCPY
1609	lea	19(%edi), %eax
1610#endif
1611	RETURN
1612
1613	.p2align 4
1614L(StrncpyExit20):
1615	movdqu	(%esi), %xmm0
1616	movl	16(%esi), %ecx
1617	movdqu	%xmm0, (%edi)
1618	movl	%ecx, 16(%edi)
1619#ifdef USE_AS_STPCPY
1620	lea	20(%edi), %eax
1621#endif
1622	RETURN
1623
1624	.p2align 4
1625L(StrncpyExit21):
1626	movdqu	(%esi), %xmm0
1627	movl	16(%esi), %ecx
1628	movb	20(%esi), %dl
1629	movdqu	%xmm0, (%edi)
1630	movl	%ecx, 16(%edi)
1631	movb	%dl, 20(%edi)
1632#ifdef USE_AS_STPCPY
1633	lea	21(%edi), %eax
1634#endif
1635	RETURN
1636
1637	.p2align 4
1638L(StrncpyExit22):
1639	movdqu	(%esi), %xmm0
1640	movlpd	14(%esi), %xmm3
1641	movdqu	%xmm0, (%edi)
1642	movlpd	%xmm3, 14(%edi)
1643#ifdef USE_AS_STPCPY
1644	lea	22(%edi), %eax
1645#endif
1646	RETURN
1647
1648	.p2align 4
1649L(StrncpyExit23):
1650	movdqu	(%esi), %xmm0
1651	movlpd	15(%esi), %xmm3
1652	movdqu	%xmm0, (%edi)
1653	movlpd	%xmm3, 15(%edi)
1654#ifdef USE_AS_STPCPY
1655	lea	23(%edi), %eax
1656#endif
1657	RETURN
1658
1659	.p2align 4
1660L(StrncpyExit24):
1661	movdqu	(%esi), %xmm0
1662	movlpd	16(%esi), %xmm2
1663	movdqu	%xmm0, (%edi)
1664	movlpd	%xmm2, 16(%edi)
1665#ifdef USE_AS_STPCPY
1666	lea	24(%edi), %eax
1667#endif
1668	RETURN
1669
1670	.p2align 4
1671L(StrncpyExit25):
1672	movdqu	(%esi), %xmm0
1673	movlpd	16(%esi), %xmm2
1674	movb	24(%esi), %cl
1675	movdqu	%xmm0, (%edi)
1676	movlpd	%xmm2, 16(%edi)
1677	movb	%cl, 24(%edi)
1678#ifdef USE_AS_STPCPY
1679	lea	25(%edi), %eax
1680#endif
1681	RETURN
1682
1683	.p2align 4
1684L(StrncpyExit26):
1685	movdqu	(%esi), %xmm0
1686	movlpd	16(%esi), %xmm2
1687	movw	24(%esi), %cx
1688	movdqu	%xmm0, (%edi)
1689	movlpd	%xmm2, 16(%edi)
1690	movw	%cx, 24(%edi)
1691#ifdef USE_AS_STPCPY
1692	lea	26(%edi), %eax
1693#endif
1694	RETURN
1695
1696	.p2align 4
1697L(StrncpyExit27):
1698	movdqu	(%esi), %xmm0
1699	movlpd	16(%esi), %xmm2
1700	movl	23(%esi), %ecx
1701	movdqu	%xmm0, (%edi)
1702	movlpd	%xmm2, 16(%edi)
1703	movl	%ecx, 23(%edi)
1704#ifdef USE_AS_STPCPY
1705	lea	27(%edi), %eax
1706#endif
1707	RETURN
1708
1709	.p2align 4
1710L(StrncpyExit28):
1711	movdqu	(%esi), %xmm0
1712	movlpd	16(%esi), %xmm2
1713	movl	24(%esi), %ecx
1714	movdqu	%xmm0, (%edi)
1715	movlpd	%xmm2, 16(%edi)
1716	movl	%ecx, 24(%edi)
1717#ifdef USE_AS_STPCPY
1718	lea	28(%edi), %eax
1719#endif
1720	RETURN
1721
1722	.p2align 4
1723L(StrncpyExit29):
1724	movdqu	(%esi), %xmm0
1725	movdqu	13(%esi), %xmm2
1726	movdqu	%xmm0, (%edi)
1727	movdqu	%xmm2, 13(%edi)
1728#ifdef USE_AS_STPCPY
1729	lea	29(%edi), %eax
1730#endif
1731	RETURN
1732
1733	.p2align 4
1734L(StrncpyExit30):
1735	movdqu	(%esi), %xmm0
1736	movdqu	14(%esi), %xmm2
1737	movdqu	%xmm0, (%edi)
1738	movdqu	%xmm2, 14(%edi)
1739#ifdef USE_AS_STPCPY
1740	lea	30(%edi), %eax
1741#endif
1742	RETURN
1743
1744	.p2align 4
1745L(StrncpyExit31):
1746	movdqu	(%esi), %xmm0
1747	movdqu	15(%esi), %xmm2
1748	movdqu	%xmm0, (%edi)
1749	movdqu	%xmm2, 15(%edi)
1750#ifdef USE_AS_STPCPY
1751	lea	31(%edi), %eax
1752#endif
1753	RETURN
1754
1755	.p2align 4
1756L(StrncpyExit32):
1757	movdqu	(%esi), %xmm0
1758	movdqu	16(%esi), %xmm2
1759	movdqu	%xmm0, (%edi)
1760	movdqu	%xmm2, 16(%edi)
1761#ifdef USE_AS_STPCPY
1762	lea	32(%edi), %eax
1763#endif
1764	RETURN
1765
1766	.p2align 4
1767L(StrncpyExit33):
1768	movdqu	(%esi), %xmm0
1769	movdqu	16(%esi), %xmm2
1770	movb	32(%esi), %cl
1771	movdqu	%xmm0, (%edi)
1772	movdqu	%xmm2, 16(%edi)
1773	movb	%cl, 32(%edi)
1774	RETURN
1775
1776	.p2align 4
1777L(Fill0):
1778	RETURN
1779
1780	.p2align 4
1781L(Fill1):
1782	movb	%dl, (%edi)
1783	RETURN
1784
1785	.p2align 4
1786L(Fill2):
1787	movw	%dx, (%edi)
1788	RETURN
1789
1790	.p2align 4
1791L(Fill3):
1792	movl	%edx, -1(%edi)
1793	RETURN
1794
1795	.p2align 4
1796L(Fill4):
1797	movl	%edx, (%edi)
1798	RETURN
1799
1800	.p2align 4
1801L(Fill5):
1802	movl	%edx, (%edi)
1803	movb	%dl, 4(%edi)
1804	RETURN
1805
1806	.p2align 4
1807L(Fill6):
1808	movl	%edx, (%edi)
1809	movw	%dx, 4(%edi)
1810	RETURN
1811
1812	.p2align 4
1813L(Fill7):
1814	movlpd	%xmm0, -1(%edi)
1815	RETURN
1816
1817	.p2align 4
1818L(Fill8):
1819	movlpd	%xmm0, (%edi)
1820	RETURN
1821
1822	.p2align 4
1823L(Fill9):
1824	movlpd	%xmm0, (%edi)
1825	movb	%dl, 8(%edi)
1826	RETURN
1827
1828	.p2align 4
1829L(Fill10):
1830	movlpd	%xmm0, (%edi)
1831	movw	%dx, 8(%edi)
1832	RETURN
1833
1834	.p2align 4
1835L(Fill11):
1836	movlpd	%xmm0, (%edi)
1837	movl	%edx, 7(%edi)
1838	RETURN
1839
1840	.p2align 4
1841L(Fill12):
1842	movlpd	%xmm0, (%edi)
1843	movl	%edx, 8(%edi)
1844	RETURN
1845
1846	.p2align 4
1847L(Fill13):
1848	movlpd	%xmm0, (%edi)
1849	movlpd	%xmm0, 5(%edi)
1850	RETURN
1851
1852	.p2align 4
1853L(Fill14):
1854	movlpd	%xmm0, (%edi)
1855	movlpd	%xmm0, 6(%edi)
1856	RETURN
1857
1858	.p2align 4
1859L(Fill15):
1860	movdqu	%xmm0, -1(%edi)
1861	RETURN
1862
1863	.p2align 4
1864L(Fill16):
1865	movdqu	%xmm0, (%edi)
1866	RETURN
1867
1868	.p2align 4
1869L(CopyFrom1To16BytesUnalignedXmm2):
1870	movdqu	%xmm2, (%edi, %ecx)
1871
1872	.p2align 4
1873L(CopyFrom1To16BytesXmmExit):
1874	bsf	%edx, %edx
1875	add	$15, %ebx
1876	add	%ecx, %edi
1877#ifdef USE_AS_STPCPY
1878	lea	(%edi, %edx), %eax
1879#endif
1880	sub	%edx, %ebx
1881	lea	1(%edi, %edx), %edi
1882
1883	.p2align 4
1884L(StrncpyFillTailWithZero):
1885	pxor	%xmm0, %xmm0
1886	xor	%edx, %edx
1887	sub	$16, %ebx
1888	jbe	L(StrncpyFillExit)
1889
1890	movdqu	%xmm0, (%edi)
1891	add	$16, %edi
1892
1893	mov	%edi, %esi
1894	and	$0xf, %esi
1895	sub	%esi, %edi
1896	add	%esi, %ebx
1897	sub	$64, %ebx
1898	jb	L(StrncpyFillLess64)
1899
1900L(StrncpyFillLoopMovdqa):
1901	movdqa	%xmm0, (%edi)
1902	movdqa	%xmm0, 16(%edi)
1903	movdqa	%xmm0, 32(%edi)
1904	movdqa	%xmm0, 48(%edi)
1905	add	$64, %edi
1906	sub	$64, %ebx
1907	jae	L(StrncpyFillLoopMovdqa)
1908
1909L(StrncpyFillLess64):
1910	add	$32, %ebx
1911	jl	L(StrncpyFillLess32)
1912	movdqa	%xmm0, (%edi)
1913	movdqa	%xmm0, 16(%edi)
1914	add	$32, %edi
1915	sub	$16, %ebx
1916	jl	L(StrncpyFillExit)
1917	movdqa	%xmm0, (%edi)
1918	add	$16, %edi
1919	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1920
1921L(StrncpyFillLess32):
1922	add	$16, %ebx
1923	jl	L(StrncpyFillExit)
1924	movdqa	%xmm0, (%edi)
1925	add	$16, %edi
1926	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1927
1928L(StrncpyFillExit):
1929	add	$16, %ebx
1930	BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4)
1931
1932	.p2align 4
1933L(AlignedLeaveCase2OrCase3):
1934	test	%edx, %edx
1935	jnz	L(Aligned64LeaveCase2)
1936L(Aligned64LeaveCase3):
1937	lea	64(%ebx), %ecx
1938	and	$-16, %ecx
1939	add	$48, %ebx
1940	jl	L(CopyFrom1To16BytesCase3)
1941	movdqa	%xmm4, (%edi)
1942	sub	$16, %ebx
1943	jb	L(CopyFrom1To16BytesCase3)
1944	movdqa	%xmm5, 16(%edi)
1945	sub	$16, %ebx
1946	jb	L(CopyFrom1To16BytesCase3)
1947	movdqa	%xmm6, 32(%edi)
1948	sub	$16, %ebx
1949	jb	L(CopyFrom1To16BytesCase3)
1950	movdqa	%xmm7, 48(%edi)
1951#ifdef USE_AS_STPCPY
1952	lea	64(%edi), %eax
1953#endif
1954	RETURN
1955
1956	.p2align 4
1957L(Aligned64LeaveCase2):
1958	pxor	%xmm0, %xmm0
1959	xor	%ecx, %ecx
1960	pcmpeqb	%xmm4, %xmm0
1961	pmovmskb %xmm0, %edx
1962	add	$48, %ebx
1963	jle	L(CopyFrom1To16BytesCase2OrCase3)
1964	test	%edx, %edx
1965	jnz	L(CopyFrom1To16BytesXmm4)
1966
1967	pcmpeqb	%xmm5, %xmm0
1968	pmovmskb %xmm0, %edx
1969	movdqa	%xmm4, (%edi)
1970	add	$16, %ecx
1971	sub	$16, %ebx
1972	jbe	L(CopyFrom1To16BytesCase2OrCase3)
1973	test	%edx, %edx
1974	jnz	L(CopyFrom1To16BytesXmm5)
1975
1976	pcmpeqb	%xmm6, %xmm0
1977	pmovmskb %xmm0, %edx
1978	movdqa	%xmm5, 16(%edi)
1979	add	$16, %ecx
1980	sub	$16, %ebx
1981	jbe	L(CopyFrom1To16BytesCase2OrCase3)
1982	test	%edx, %edx
1983	jnz	L(CopyFrom1To16BytesXmm6)
1984
1985	pcmpeqb	%xmm7, %xmm0
1986	pmovmskb %xmm0, %edx
1987	movdqa	%xmm6, 32(%edi)
1988	lea	16(%edi, %ecx), %edi
1989	lea	16(%esi, %ecx), %esi
1990	bsf	%edx, %edx
1991	cmp	%ebx, %edx
1992	jb	L(CopyFrom1To16BytesExit)
1993	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
1994
1995	.p2align 4
1996L(UnalignedLeaveCase2OrCase3):
1997	test	%edx, %edx
1998	jnz	L(Unaligned64LeaveCase2)
1999L(Unaligned64LeaveCase3):
2000	lea	64(%ebx), %ecx
2001	and	$-16, %ecx
2002	add	$48, %ebx
2003	jl	L(CopyFrom1To16BytesCase3)
2004	movdqu	%xmm4, (%edi)
2005	sub	$16, %ebx
2006	jb	L(CopyFrom1To16BytesCase3)
2007	movdqu	%xmm5, 16(%edi)
2008	sub	$16, %ebx
2009	jb	L(CopyFrom1To16BytesCase3)
2010	movdqu	%xmm6, 32(%edi)
2011	sub	$16, %ebx
2012	jb	L(CopyFrom1To16BytesCase3)
2013	movdqu	%xmm7, 48(%edi)
2014#ifdef USE_AS_STPCPY
2015	lea	64(%edi), %eax
2016#endif
2017	RETURN
2018
2019	.p2align 4
2020L(Unaligned64LeaveCase2):
2021	pxor	%xmm0, %xmm0
2022	xor	%ecx, %ecx
2023	pcmpeqb	%xmm4, %xmm0
2024	pmovmskb %xmm0, %edx
2025	add	$48, %ebx
2026	jle	L(CopyFrom1To16BytesCase2OrCase3)
2027	test	%edx, %edx
2028	jnz	L(CopyFrom1To16BytesUnalignedXmm4)
2029
2030	pcmpeqb	%xmm5, %xmm0
2031	pmovmskb %xmm0, %edx
2032	movdqu	%xmm4, (%edi)
2033	add	$16, %ecx
2034	sub	$16, %ebx
2035	jbe	L(CopyFrom1To16BytesCase2OrCase3)
2036	test	%edx, %edx
2037	jnz	L(CopyFrom1To16BytesUnalignedXmm5)
2038
2039	pcmpeqb	%xmm6, %xmm0
2040	pmovmskb %xmm0, %edx
2041	movdqu	%xmm5, 16(%edi)
2042	add	$16, %ecx
2043	sub	$16, %ebx
2044	jbe	L(CopyFrom1To16BytesCase2OrCase3)
2045	test	%edx, %edx
2046	jnz	L(CopyFrom1To16BytesUnalignedXmm6)
2047
2048	pcmpeqb	%xmm7, %xmm0
2049	pmovmskb %xmm0, %edx
2050	movdqu	%xmm6, 32(%edi)
2051	lea	16(%edi, %ecx), %edi
2052	lea	16(%esi, %ecx), %esi
2053	bsf	%edx, %edx
2054	cmp	%ebx, %edx
2055	jb	L(CopyFrom1To16BytesExit)
2056	BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4)
2057
2058	.p2align 4
2059L(ExitZero):
2060	movl	%edi, %eax
2061	RETURN
2062#endif
2063
2064END (STRCPY)
2065
2066	.p2align 4
2067	.section .rodata
2068L(ExitTable):
2069	.int	JMPTBL(L(Exit1), L(ExitTable))
2070	.int	JMPTBL(L(Exit2), L(ExitTable))
2071	.int	JMPTBL(L(Exit3), L(ExitTable))
2072	.int	JMPTBL(L(Exit4), L(ExitTable))
2073	.int	JMPTBL(L(Exit5), L(ExitTable))
2074	.int	JMPTBL(L(Exit6), L(ExitTable))
2075	.int	JMPTBL(L(Exit7), L(ExitTable))
2076	.int	JMPTBL(L(Exit8), L(ExitTable))
2077	.int	JMPTBL(L(Exit9), L(ExitTable))
2078	.int	JMPTBL(L(Exit10), L(ExitTable))
2079	.int	JMPTBL(L(Exit11), L(ExitTable))
2080	.int	JMPTBL(L(Exit12), L(ExitTable))
2081	.int	JMPTBL(L(Exit13), L(ExitTable))
2082	.int	JMPTBL(L(Exit14), L(ExitTable))
2083	.int	JMPTBL(L(Exit15), L(ExitTable))
2084	.int	JMPTBL(L(Exit16), L(ExitTable))
2085	.int	JMPTBL(L(Exit17), L(ExitTable))
2086	.int	JMPTBL(L(Exit18), L(ExitTable))
2087	.int	JMPTBL(L(Exit19), L(ExitTable))
2088	.int	JMPTBL(L(Exit20), L(ExitTable))
2089	.int	JMPTBL(L(Exit21), L(ExitTable))
2090	.int	JMPTBL(L(Exit22), L(ExitTable))
2091	.int    JMPTBL(L(Exit23), L(ExitTable))
2092	.int	JMPTBL(L(Exit24), L(ExitTable))
2093	.int	JMPTBL(L(Exit25), L(ExitTable))
2094	.int	JMPTBL(L(Exit26), L(ExitTable))
2095	.int	JMPTBL(L(Exit27), L(ExitTable))
2096	.int	JMPTBL(L(Exit28), L(ExitTable))
2097	.int	JMPTBL(L(Exit29), L(ExitTable))
2098	.int	JMPTBL(L(Exit30), L(ExitTable))
2099	.int	JMPTBL(L(Exit31), L(ExitTable))
2100	.int	JMPTBL(L(Exit32), L(ExitTable))
2101#ifdef USE_AS_STRNCPY
2102L(ExitStrncpyTable):
2103	.int	JMPTBL(L(Exit0), L(ExitStrncpyTable))
2104	.int	JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
2105	.int	JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
2106	.int	JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
2107	.int	JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
2108	.int	JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
2109	.int	JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
2110	.int	JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
2111	.int	JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
2112	.int	JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
2113	.int	JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
2114	.int	JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
2115	.int	JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
2116	.int	JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
2117	.int	JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
2118	.int	JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
2119	.int	JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
2120	.int	JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
2121	.int	JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
2122	.int	JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
2123	.int	JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
2124	.int	JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
2125	.int	JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
2126	.int    JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
2127	.int	JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
2128	.int	JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
2129	.int	JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
2130	.int	JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
2131	.int	JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
2132	.int	JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
2133	.int	JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
2134	.int	JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
2135	.int	JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
2136	.int	JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
2137
2138	.p2align 4
2139L(FillTable):
2140	.int	JMPTBL(L(Fill0), L(FillTable))
2141	.int	JMPTBL(L(Fill1), L(FillTable))
2142	.int	JMPTBL(L(Fill2), L(FillTable))
2143	.int	JMPTBL(L(Fill3), L(FillTable))
2144	.int	JMPTBL(L(Fill4), L(FillTable))
2145	.int	JMPTBL(L(Fill5), L(FillTable))
2146	.int	JMPTBL(L(Fill6), L(FillTable))
2147	.int	JMPTBL(L(Fill7), L(FillTable))
2148	.int	JMPTBL(L(Fill8), L(FillTable))
2149	.int	JMPTBL(L(Fill9), L(FillTable))
2150	.int	JMPTBL(L(Fill10), L(FillTable))
2151	.int	JMPTBL(L(Fill11), L(FillTable))
2152	.int	JMPTBL(L(Fill12), L(FillTable))
2153	.int	JMPTBL(L(Fill13), L(FillTable))
2154	.int	JMPTBL(L(Fill14), L(FillTable))
2155	.int	JMPTBL(L(Fill15), L(FillTable))
2156	.int	JMPTBL(L(Fill16), L(FillTable))
2157#endif
2158