• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label)	.L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc			.cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc			.cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg)		.cfi_restore (reg)
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
53#endif
54
55#ifndef ENTRY
56# define ENTRY(name)			\
57	.type name,  @function; 	\
58	.globl name;			\
59	.p2align 4;			\
60name:					\
61	cfi_startproc
62#endif
63
64#ifndef END
65# define END(name)			\
66	cfi_endproc;			\
67	.size name, .-name
68#endif
69
70#define CFI_PUSH(REG)						\
71  cfi_adjust_cfa_offset (4);					\
72  cfi_rel_offset (REG, 0)
73
74#define CFI_POP(REG)						\
75  cfi_adjust_cfa_offset (-4);					\
76  cfi_restore (REG)
77
78#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
79#define POP(REG)	popl REG; CFI_POP (REG)
80
81#ifndef USE_AS_STRNCMP
82# define STR1		4
83# define STR2		STR1+4
84# define RETURN		ret
85
86# define UPDATE_STRNCMP_COUNTER
87#else
88# define STR1		8
89# define STR2		STR1+4
90# define CNT		STR2+4
91# define RETURN		POP (%ebp); ret; CFI_PUSH (%ebp)
92
93# define UPDATE_STRNCMP_COUNTER				\
94	/* calculate left number to compare */		\
95	mov	$16, %esi;				\
96	sub	%ecx, %esi;				\
97	cmp	%esi, %ebp;				\
98	jbe	L(more8byteseq);			\
99	sub	%esi, %ebp
100#endif
101
102	.section .text.ssse3,"ax",@progbits
103ENTRY (ssse3_strcmp_latest)
104#ifdef USE_AS_STRNCMP
105	PUSH	(%ebp)
106#endif
107	movl	STR1(%esp), %edx
108	movl	STR2(%esp), %eax
109#ifdef USE_AS_STRNCMP
110	movl	CNT(%esp), %ebp
111	cmp	$16, %ebp
112	jb	L(less16bytes_sncmp)
113	jmp	L(more16bytes)
114#endif
115
116	movzbl	(%eax), %ecx
117	cmpb	%cl, (%edx)
118	jne	L(neq)
119	cmpl	$0, %ecx
120	je	L(eq)
121
122	movzbl	1(%eax), %ecx
123	cmpb	%cl, 1(%edx)
124	jne	L(neq)
125	cmpl	$0, %ecx
126	je	L(eq)
127
128	movzbl	2(%eax), %ecx
129	cmpb	%cl, 2(%edx)
130	jne	L(neq)
131	cmpl	$0, %ecx
132	je	L(eq)
133
134	movzbl	3(%eax), %ecx
135	cmpb	%cl, 3(%edx)
136	jne	L(neq)
137	cmpl	$0, %ecx
138	je	L(eq)
139
140	movzbl	4(%eax), %ecx
141	cmpb	%cl, 4(%edx)
142	jne	L(neq)
143	cmpl	$0, %ecx
144	je	L(eq)
145
146	movzbl	5(%eax), %ecx
147	cmpb	%cl, 5(%edx)
148	jne	L(neq)
149	cmpl	$0, %ecx
150	je	L(eq)
151
152	movzbl	6(%eax), %ecx
153	cmpb	%cl, 6(%edx)
154	jne	L(neq)
155	cmpl	$0, %ecx
156	je	L(eq)
157
158	movzbl	7(%eax), %ecx
159	cmpb	%cl, 7(%edx)
160	jne	L(neq)
161	cmpl	$0, %ecx
162	je	L(eq)
163
164	add	$8, %edx
165	add	$8, %eax
166#ifdef USE_AS_STRNCMP
167	cmp	$8, %ebp
168	lea	-8(%ebp), %ebp
169	je	L(eq)
170L(more16bytes):
171#endif
172	movl	%edx, %ecx
173	and	$0xfff, %ecx
174	cmp	$0xff0, %ecx
175	ja	L(crosspage)
176	mov	%eax, %ecx
177	and	$0xfff, %ecx
178	cmp	$0xff0, %ecx
179	ja	L(crosspage)
180	pxor	%xmm0, %xmm0
181	movlpd	(%eax), %xmm1
182	movlpd	(%edx), %xmm2
183	movhpd	8(%eax), %xmm1
184	movhpd	8(%edx), %xmm2
185	pcmpeqb	%xmm1, %xmm0
186	pcmpeqb	%xmm2, %xmm1
187	psubb	%xmm0, %xmm1
188	pmovmskb %xmm1, %ecx
189	sub	$0xffff, %ecx
190	jnz	L(less16bytes)
191#ifdef USE_AS_STRNCMP
192	cmp	$16, %ebp
193	lea	-16(%ebp), %ebp
194	jbe	L(eq)
195#endif
196	add	$16, %eax
197	add	$16, %edx
198
199L(crosspage):
200
201	PUSH	(%ebx)
202	PUSH	(%edi)
203	PUSH	(%esi)
204
205	movl	%edx, %edi
206	movl	%eax, %ecx
207	and	$0xf, %ecx
208	and	$0xf, %edi
209	xor	%ecx, %eax
210	xor	%edi, %edx
211	xor	%ebx, %ebx
212	cmp	%edi, %ecx
213	je	L(ashr_0)
214	ja	L(bigger)
215	or	$0x20, %ebx
216	xchg	%edx, %eax
217	xchg	%ecx, %edi
218L(bigger):
219	lea	15(%edi), %edi
220	sub	%ecx, %edi
221	cmp	$8, %edi
222	jle	L(ashr_less_8)
223	cmp	$14, %edi
224	je	L(ashr_15)
225	cmp	$13, %edi
226	je	L(ashr_14)
227	cmp	$12, %edi
228	je	L(ashr_13)
229	cmp	$11, %edi
230	je	L(ashr_12)
231	cmp	$10, %edi
232	je	L(ashr_11)
233	cmp	$9, %edi
234	je	L(ashr_10)
235L(ashr_less_8):
236	je	L(ashr_9)
237	cmp	$7, %edi
238	je	L(ashr_8)
239	cmp	$6, %edi
240	je	L(ashr_7)
241	cmp	$5, %edi
242	je	L(ashr_6)
243	cmp	$4, %edi
244	je	L(ashr_5)
245	cmp	$3, %edi
246	je	L(ashr_4)
247	cmp	$2, %edi
248	je	L(ashr_3)
249	cmp	$1, %edi
250	je	L(ashr_2)
251	cmp	$0, %edi
252	je	L(ashr_1)
253
254/*
255 * The following cases will be handled by ashr_0
256 *  ecx(offset of esi)  eax(offset of edi)  relative offset  corresponding case
257 *        n(0~15)            n(0~15)           15(15+ n-n)         ashr_0
258 */
259	.p2align 4
260L(ashr_0):
261	mov	$0xffff, %esi
262	movdqa	(%eax), %xmm1
263	pxor	%xmm0, %xmm0
264	pcmpeqb	%xmm1, %xmm0
265	pcmpeqb	(%edx), %xmm1
266	psubb	%xmm0, %xmm1
267	pmovmskb %xmm1, %edi
268	shr	%cl, %esi
269	shr	%cl, %edi
270	sub	%edi, %esi
271	mov	%ecx, %edi
272	jne	L(less32bytes)
273	UPDATE_STRNCMP_COUNTER
274	mov	$0x10, %ebx
275	mov	$0x10, %ecx
276	pxor	%xmm0, %xmm0
277	.p2align 4
278L(loop_ashr_0):
279	movdqa	(%eax, %ecx), %xmm1
280	movdqa	(%edx, %ecx), %xmm2
281
282	pcmpeqb	%xmm1, %xmm0
283	pcmpeqb	%xmm2, %xmm1
284	psubb	%xmm0, %xmm1
285	pmovmskb %xmm1, %esi
286	sub	$0xffff, %esi
287	jnz	L(exit)
288#ifdef USE_AS_STRNCMP
289	cmp	$16, %ebp
290	lea	-16(%ebp), %ebp
291	jbe	L(more8byteseq)
292#endif
293	add	$16, %ecx
294	jmp	L(loop_ashr_0)
295
296/*
297 * The following cases will be handled by ashr_1
298 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
299 *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1
300 */
301	.p2align 4
302L(ashr_1):
303	mov	$0xffff, %esi
304	pxor	%xmm0, %xmm0
305	movdqa	(%edx), %xmm2
306	movdqa	(%eax), %xmm1
307	pcmpeqb	%xmm1, %xmm0
308	pslldq	$15, %xmm2
309	pcmpeqb	%xmm1, %xmm2
310	psubb	%xmm0, %xmm2
311	pmovmskb %xmm2, %edi
312	shr	%cl, %esi
313	shr	%cl, %edi
314	sub	%edi, %esi
315	lea	-15(%ecx), %edi
316	jnz	L(less32bytes)
317
318	UPDATE_STRNCMP_COUNTER
319
320	movdqa	(%edx), %xmm3
321	pxor	%xmm0, %xmm0
322	mov	$16, %ecx
323	or	$1, %ebx
324	lea	1(%edx), %edi
325	and	$0xfff, %edi
326	sub	$0x1000, %edi
327
328	.p2align 4
329L(loop_ashr_1):
330	add	$16, %edi
331	jg	L(nibble_ashr_1)
332
333L(gobble_ashr_1):
334	movdqa	(%eax, %ecx), %xmm1
335	movdqa	(%edx, %ecx), %xmm2
336	movdqa	%xmm2, %xmm4
337
338	palignr	$1, %xmm3, %xmm2
339
340	pcmpeqb	%xmm1, %xmm0
341	pcmpeqb	%xmm2, %xmm1
342	psubb	%xmm0, %xmm1
343	pmovmskb %xmm1, %esi
344	sub	$0xffff, %esi
345	jnz	L(exit)
346#ifdef USE_AS_STRNCMP
347	cmp	$16, %ebp
348	lea	-16(%ebp), %ebp
349	jbe	L(more8byteseq)
350#endif
351
352	add	$16, %ecx
353	movdqa	%xmm4, %xmm3
354
355	add	$16, %edi
356	jg	L(nibble_ashr_1)
357
358	movdqa	(%eax, %ecx), %xmm1
359	movdqa	(%edx, %ecx), %xmm2
360	movdqa	%xmm2, %xmm4
361
362	palignr	$1, %xmm3, %xmm2
363
364	pcmpeqb	%xmm1, %xmm0
365	pcmpeqb	%xmm2, %xmm1
366	psubb	%xmm0, %xmm1
367	pmovmskb %xmm1, %esi
368	sub	$0xffff, %esi
369	jnz	L(exit)
370
371#ifdef USE_AS_STRNCMP
372	cmp	$16, %ebp
373	lea	-16(%ebp), %ebp
374	jbe	L(more8byteseq)
375#endif
376	add	$16, %ecx
377	movdqa	%xmm4, %xmm3
378	jmp	L(loop_ashr_1)
379
380	.p2align 4
381L(nibble_ashr_1):
382	pcmpeqb	%xmm3, %xmm0
383	pmovmskb %xmm0, %esi
384	test	$0xfffe, %esi
385	jnz	L(ashr_1_exittail)
386
387#ifdef USE_AS_STRNCMP
388	cmp	$15, %ebp
389	jbe	L(ashr_1_exittail)
390#endif
391	pxor	%xmm0, %xmm0
392	sub	$0x1000, %edi
393	jmp	L(gobble_ashr_1)
394
395	.p2align 4
396L(ashr_1_exittail):
397	movdqa	(%eax, %ecx), %xmm1
398	psrldq	$1, %xmm0
399	psrldq	$1, %xmm3
400	jmp	L(aftertail)
401
402/*
403 * The following cases will be handled by ashr_2
404 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
405 *        n(14~15)            n -14            1(15 +(n-14) - n)         ashr_2
406 */
407	.p2align 4
408L(ashr_2):
409	mov	$0xffff, %esi
410	pxor	%xmm0, %xmm0
411	movdqa	(%edx), %xmm2
412	movdqa	(%eax), %xmm1
413	pcmpeqb	%xmm1, %xmm0
414	pslldq	$14, %xmm2
415	pcmpeqb	%xmm1, %xmm2
416	psubb	%xmm0, %xmm2
417	pmovmskb %xmm2, %edi
418	shr	%cl, %esi
419	shr	%cl, %edi
420	sub	%edi, %esi
421	lea	-14(%ecx), %edi
422	jnz	L(less32bytes)
423
424	UPDATE_STRNCMP_COUNTER
425
426	movdqa	(%edx), %xmm3
427	pxor	%xmm0, %xmm0
428	mov	$16, %ecx
429	or	$2, %ebx
430	lea	2(%edx), %edi
431	and	$0xfff, %edi
432	sub	$0x1000, %edi
433
434	.p2align 4
435L(loop_ashr_2):
436	add	$16, %edi
437	jg	L(nibble_ashr_2)
438
439L(gobble_ashr_2):
440	movdqa	(%eax, %ecx), %xmm1
441	movdqa	(%edx, %ecx), %xmm2
442	movdqa	%xmm2, %xmm4
443
444	palignr	$2, %xmm3, %xmm2
445
446	pcmpeqb	%xmm1, %xmm0
447	pcmpeqb	%xmm2, %xmm1
448	psubb	%xmm0, %xmm1
449	pmovmskb %xmm1, %esi
450	sub	$0xffff, %esi
451	jnz	L(exit)
452
453#ifdef USE_AS_STRNCMP
454	cmp	$16, %ebp
455	lea	-16(%ebp), %ebp
456	jbe	L(more8byteseq)
457#endif
458	add	$16, %ecx
459	movdqa	%xmm4, %xmm3
460
461	add	$16, %edi
462	jg	L(nibble_ashr_2)
463
464	movdqa	(%eax, %ecx), %xmm1
465	movdqa	(%edx, %ecx), %xmm2
466	movdqa	%xmm2, %xmm4
467
468	palignr	$2, %xmm3, %xmm2
469
470	pcmpeqb	%xmm1, %xmm0
471	pcmpeqb	%xmm2, %xmm1
472	psubb	%xmm0, %xmm1
473	pmovmskb %xmm1, %esi
474	sub	$0xffff, %esi
475	jnz	L(exit)
476
477#ifdef USE_AS_STRNCMP
478	cmp	$16, %ebp
479	lea	-16(%ebp), %ebp
480	jbe	L(more8byteseq)
481#endif
482	add	$16, %ecx
483	movdqa	%xmm4, %xmm3
484	jmp	L(loop_ashr_2)
485
486	.p2align 4
487L(nibble_ashr_2):
488	pcmpeqb	%xmm3, %xmm0
489	pmovmskb %xmm0, %esi
490	test	$0xfffc, %esi
491	jnz	L(ashr_2_exittail)
492
493#ifdef USE_AS_STRNCMP
494	cmp	$14, %ebp
495	jbe	L(ashr_2_exittail)
496#endif
497
498	pxor	%xmm0, %xmm0
499	sub	$0x1000, %edi
500	jmp	L(gobble_ashr_2)
501
502	.p2align 4
503L(ashr_2_exittail):
504	movdqa	(%eax, %ecx), %xmm1
505	psrldq	$2, %xmm0
506	psrldq	$2, %xmm3
507	jmp	L(aftertail)
508
509/*
510 * The following cases will be handled by ashr_3
511 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
512 *        n(13~15)            n -13            2(15 +(n-13) - n)         ashr_3
513 */
514	.p2align 4
515L(ashr_3):
516	mov	$0xffff, %esi
517	pxor	%xmm0, %xmm0
518	movdqa	(%edx), %xmm2
519	movdqa	(%eax), %xmm1
520	pcmpeqb	%xmm1, %xmm0
521	pslldq	$13, %xmm2
522	pcmpeqb	%xmm1, %xmm2
523	psubb	%xmm0, %xmm2
524	pmovmskb %xmm2, %edi
525	shr	%cl, %esi
526	shr	%cl, %edi
527	sub	%edi, %esi
528	lea	-13(%ecx), %edi
529	jnz	L(less32bytes)
530
531	UPDATE_STRNCMP_COUNTER
532
533	movdqa	(%edx), %xmm3
534	pxor	%xmm0, %xmm0
535	mov	$16, %ecx
536	or	$3, %ebx
537	lea	3(%edx), %edi
538	and	$0xfff, %edi
539	sub	$0x1000, %edi
540
541	.p2align 4
542L(loop_ashr_3):
543	add	$16, %edi
544	jg	L(nibble_ashr_3)
545
546L(gobble_ashr_3):
547	movdqa	(%eax, %ecx), %xmm1
548	movdqa	(%edx, %ecx), %xmm2
549	movdqa	%xmm2, %xmm4
550
551	palignr	$3, %xmm3, %xmm2
552
553	pcmpeqb	%xmm1, %xmm0
554	pcmpeqb	%xmm2, %xmm1
555	psubb	%xmm0, %xmm1
556	pmovmskb %xmm1, %esi
557	sub	$0xffff, %esi
558	jnz	L(exit)
559
560#ifdef USE_AS_STRNCMP
561	cmp	$16, %ebp
562	lea	-16(%ebp), %ebp
563	jbe	L(more8byteseq)
564#endif
565	add	$16, %ecx
566	movdqa	%xmm4, %xmm3
567
568	add	$16, %edi
569	jg	L(nibble_ashr_3)
570
571	movdqa	(%eax, %ecx), %xmm1
572	movdqa	(%edx, %ecx), %xmm2
573	movdqa	%xmm2, %xmm4
574
575	palignr	$3, %xmm3, %xmm2
576
577	pcmpeqb	%xmm1, %xmm0
578	pcmpeqb	%xmm2, %xmm1
579	psubb	%xmm0, %xmm1
580	pmovmskb %xmm1, %esi
581	sub	$0xffff, %esi
582	jnz	L(exit)
583
584#ifdef USE_AS_STRNCMP
585	cmp	$16, %ebp
586	lea	-16(%ebp), %ebp
587	jbe	L(more8byteseq)
588#endif
589	add	$16, %ecx
590	movdqa	%xmm4, %xmm3
591	jmp	L(loop_ashr_3)
592
593	.p2align 4
594L(nibble_ashr_3):
595	pcmpeqb	%xmm3, %xmm0
596	pmovmskb %xmm0, %esi
597	test	$0xfff8, %esi
598	jnz	L(ashr_3_exittail)
599
600#ifdef USE_AS_STRNCMP
601	cmp	$13, %ebp
602	jbe	L(ashr_3_exittail)
603#endif
604	pxor	%xmm0, %xmm0
605	sub	$0x1000, %edi
606	jmp	L(gobble_ashr_3)
607
608	.p2align 4
609L(ashr_3_exittail):
610	movdqa	(%eax, %ecx), %xmm1
611	psrldq	$3, %xmm0
612	psrldq	$3, %xmm3
613	jmp	L(aftertail)
614
615/*
616 * The following cases will be handled by ashr_4
617 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
618 *        n(12~15)            n -12            3(15 +(n-12) - n)         ashr_4
619 */
620	.p2align 4
621L(ashr_4):
622	mov	$0xffff, %esi
623	pxor	%xmm0, %xmm0
624	movdqa	(%edx), %xmm2
625	movdqa	(%eax), %xmm1
626	pcmpeqb	%xmm1, %xmm0
627	pslldq	$12, %xmm2
628	pcmpeqb	%xmm1, %xmm2
629	psubb	%xmm0, %xmm2
630	pmovmskb %xmm2, %edi
631	shr	%cl, %esi
632	shr	%cl, %edi
633	sub	%edi, %esi
634	lea	-12(%ecx), %edi
635	jnz	L(less32bytes)
636
637	UPDATE_STRNCMP_COUNTER
638
639	movdqa	(%edx), %xmm3
640	pxor	%xmm0, %xmm0
641	mov	$16, %ecx
642	or	$4, %ebx
643	lea	4(%edx), %edi
644	and	$0xfff, %edi
645	sub	$0x1000, %edi
646
647	.p2align 4
648L(loop_ashr_4):
649	add	$16, %edi
650	jg	L(nibble_ashr_4)
651
652L(gobble_ashr_4):
653	movdqa	(%eax, %ecx), %xmm1
654	movdqa	(%edx, %ecx), %xmm2
655	movdqa	%xmm2, %xmm4
656
657	palignr	$4, %xmm3, %xmm2
658
659	pcmpeqb	%xmm1, %xmm0
660	pcmpeqb	%xmm2, %xmm1
661	psubb	%xmm0, %xmm1
662	pmovmskb %xmm1, %esi
663	sub	$0xffff, %esi
664	jnz	L(exit)
665
666#ifdef USE_AS_STRNCMP
667	cmp	$16, %ebp
668	lea	-16(%ebp), %ebp
669	jbe	L(more8byteseq)
670#endif
671
672	add	$16, %ecx
673	movdqa	%xmm4, %xmm3
674
675	add	$16, %edi
676	jg	L(nibble_ashr_4)
677
678	movdqa	(%eax, %ecx), %xmm1
679	movdqa	(%edx, %ecx), %xmm2
680	movdqa	%xmm2, %xmm4
681
682	palignr	$4, %xmm3, %xmm2
683
684	pcmpeqb	%xmm1, %xmm0
685	pcmpeqb	%xmm2, %xmm1
686	psubb	%xmm0, %xmm1
687	pmovmskb %xmm1, %esi
688	sub	$0xffff, %esi
689	jnz	L(exit)
690
691#ifdef USE_AS_STRNCMP
692	cmp	$16, %ebp
693	lea	-16(%ebp), %ebp
694	jbe	L(more8byteseq)
695#endif
696
697	add	$16, %ecx
698	movdqa	%xmm4, %xmm3
699	jmp	L(loop_ashr_4)
700
701	.p2align 4
702L(nibble_ashr_4):
703	pcmpeqb	%xmm3, %xmm0
704	pmovmskb %xmm0, %esi
705	test	$0xfff0, %esi
706	jnz	L(ashr_4_exittail)
707
708#ifdef USE_AS_STRNCMP
709	cmp	$12, %ebp
710	jbe	L(ashr_4_exittail)
711#endif
712
713	pxor	%xmm0, %xmm0
714	sub	$0x1000, %edi
715	jmp	L(gobble_ashr_4)
716
717	.p2align 4
718L(ashr_4_exittail):
719	movdqa	(%eax, %ecx), %xmm1
720	psrldq	$4, %xmm0
721	psrldq	$4, %xmm3
722	jmp	L(aftertail)
723
724/*
725 * The following cases will be handled by ashr_5
726 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
727 *        n(11~15)            n -11            4(15 +(n-11) - n)         ashr_5
728 */
729	.p2align 4
730L(ashr_5):
731	mov	$0xffff, %esi
732	pxor	%xmm0, %xmm0
733	movdqa	(%edx), %xmm2
734	movdqa	(%eax), %xmm1
735	pcmpeqb	%xmm1, %xmm0
736	pslldq	$11, %xmm2
737	pcmpeqb	%xmm1, %xmm2
738	psubb	%xmm0, %xmm2
739	pmovmskb %xmm2, %edi
740	shr	%cl, %esi
741	shr	%cl, %edi
742	sub	%edi, %esi
743	lea	-11(%ecx), %edi
744	jnz	L(less32bytes)
745
746	UPDATE_STRNCMP_COUNTER
747
748	movdqa	(%edx), %xmm3
749	pxor	%xmm0, %xmm0
750	mov	$16, %ecx
751	or	$5, %ebx
752	lea	5(%edx), %edi
753	and	$0xfff, %edi
754	sub	$0x1000, %edi
755
756	.p2align 4
757L(loop_ashr_5):
758	add	$16, %edi
759	jg	L(nibble_ashr_5)
760
761L(gobble_ashr_5):
762	movdqa	(%eax, %ecx), %xmm1
763	movdqa	(%edx, %ecx), %xmm2
764	movdqa	%xmm2, %xmm4
765
766	palignr	$5, %xmm3, %xmm2
767
768	pcmpeqb	%xmm1, %xmm0
769	pcmpeqb	%xmm2, %xmm1
770	psubb	%xmm0, %xmm1
771	pmovmskb %xmm1, %esi
772	sub	$0xffff, %esi
773	jnz	L(exit)
774
775#ifdef USE_AS_STRNCMP
776	cmp	$16, %ebp
777	lea	-16(%ebp), %ebp
778	jbe	L(more8byteseq)
779#endif
780	add	$16, %ecx
781	movdqa	%xmm4, %xmm3
782
783	add	$16, %edi
784	jg	L(nibble_ashr_5)
785
786	movdqa	(%eax, %ecx), %xmm1
787	movdqa	(%edx, %ecx), %xmm2
788	movdqa	%xmm2, %xmm4
789
790	palignr	$5, %xmm3, %xmm2
791
792	pcmpeqb	%xmm1, %xmm0
793	pcmpeqb	%xmm2, %xmm1
794	psubb	%xmm0, %xmm1
795	pmovmskb %xmm1, %esi
796	sub	$0xffff, %esi
797	jnz	L(exit)
798
799#ifdef USE_AS_STRNCMP
800	cmp	$16, %ebp
801	lea	-16(%ebp), %ebp
802	jbe	L(more8byteseq)
803#endif
804	add	$16, %ecx
805	movdqa	%xmm4, %xmm3
806	jmp	L(loop_ashr_5)
807
808	.p2align 4
809L(nibble_ashr_5):
810	pcmpeqb	%xmm3, %xmm0
811	pmovmskb %xmm0, %esi
812	test	$0xffe0, %esi
813	jnz	L(ashr_5_exittail)
814
815#ifdef USE_AS_STRNCMP
816	cmp	$11, %ebp
817	jbe	L(ashr_5_exittail)
818#endif
819	pxor	%xmm0, %xmm0
820	sub	$0x1000, %edi
821	jmp	L(gobble_ashr_5)
822
823	.p2align 4
824L(ashr_5_exittail):
825	movdqa	(%eax, %ecx), %xmm1
826	psrldq	$5, %xmm0
827	psrldq	$5, %xmm3
828	jmp	L(aftertail)
829
830/*
831 * The following cases will be handled by ashr_6
832 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
833 *        n(10~15)            n -10            5(15 +(n-10) - n)         ashr_6
834 */
835
836	.p2align 4
837L(ashr_6):
838	mov	$0xffff, %esi
839	pxor	%xmm0, %xmm0
840	movdqa	(%edx), %xmm2
841	movdqa	(%eax), %xmm1
842	pcmpeqb	%xmm1, %xmm0
843	pslldq	$10, %xmm2
844	pcmpeqb	%xmm1, %xmm2
845	psubb	%xmm0, %xmm2
846	pmovmskb %xmm2, %edi
847	shr	%cl, %esi
848	shr	%cl, %edi
849	sub	%edi, %esi
850	lea	-10(%ecx), %edi
851	jnz	L(less32bytes)
852
853	UPDATE_STRNCMP_COUNTER
854
855	movdqa	(%edx), %xmm3
856	pxor	%xmm0, %xmm0
857	mov	$16, %ecx
858	or	$6, %ebx
859	lea	6(%edx), %edi
860	and	$0xfff, %edi
861	sub	$0x1000, %edi
862
863	.p2align 4
864L(loop_ashr_6):
865	add	$16, %edi
866	jg	L(nibble_ashr_6)
867
868L(gobble_ashr_6):
869	movdqa	(%eax, %ecx), %xmm1
870	movdqa	(%edx, %ecx), %xmm2
871	movdqa	%xmm2, %xmm4
872
873	palignr	$6, %xmm3, %xmm2
874
875	pcmpeqb	%xmm1, %xmm0
876	pcmpeqb	%xmm2, %xmm1
877	psubb	%xmm0, %xmm1
878	pmovmskb %xmm1, %esi
879	sub	$0xffff, %esi
880	jnz	L(exit)
881
882#ifdef USE_AS_STRNCMP
883	cmp	$16, %ebp
884	lea	-16(%ebp), %ebp
885	jbe	L(more8byteseq)
886#endif
887
888	add	$16, %ecx
889	movdqa	%xmm4, %xmm3
890
891	add	$16, %edi
892	jg	L(nibble_ashr_6)
893
894	movdqa	(%eax, %ecx), %xmm1
895	movdqa	(%edx, %ecx), %xmm2
896	movdqa	%xmm2, %xmm4
897
898	palignr	$6, %xmm3, %xmm2
899
900	pcmpeqb	%xmm1, %xmm0
901	pcmpeqb	%xmm2, %xmm1
902	psubb	%xmm0, %xmm1
903	pmovmskb %xmm1, %esi
904	sub	$0xffff, %esi
905	jnz	L(exit)
906#ifdef USE_AS_STRNCMP
907	cmp	$16, %ebp
908	lea	-16(%ebp), %ebp
909	jbe	L(more8byteseq)
910#endif
911
912	add	$16, %ecx
913	movdqa	%xmm4, %xmm3
914	jmp	L(loop_ashr_6)
915
916	.p2align 4
917L(nibble_ashr_6):
918	pcmpeqb	%xmm3, %xmm0
919	pmovmskb %xmm0, %esi
920	test	$0xffc0, %esi
921	jnz	L(ashr_6_exittail)
922
923#ifdef USE_AS_STRNCMP
924	cmp	$10, %ebp
925	jbe	L(ashr_6_exittail)
926#endif
927	pxor	%xmm0, %xmm0
928	sub	$0x1000, %edi
929	jmp	L(gobble_ashr_6)
930
931	.p2align 4
932L(ashr_6_exittail):
933	movdqa	(%eax, %ecx), %xmm1
934	psrldq	$6, %xmm0
935	psrldq	$6, %xmm3
936	jmp	L(aftertail)
937
938/*
939 * The following cases will be handled by ashr_7
940 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
941 *        n(9~15)            n - 9            6(15 +(n-9) - n)         ashr_7
942 */
943
944	.p2align 4
945L(ashr_7):
946	mov	$0xffff, %esi
947	pxor	%xmm0, %xmm0
948	movdqa	(%edx), %xmm2
949	movdqa	(%eax), %xmm1
950	pcmpeqb	%xmm1, %xmm0
951	pslldq	$9, %xmm2
952	pcmpeqb	%xmm1, %xmm2
953	psubb	%xmm0, %xmm2
954	pmovmskb %xmm2, %edi
955	shr	%cl, %esi
956	shr	%cl, %edi
957	sub	%edi, %esi
958	lea	-9(%ecx), %edi
959	jnz	L(less32bytes)
960
961	UPDATE_STRNCMP_COUNTER
962
963	movdqa	(%edx), %xmm3
964	pxor	%xmm0, %xmm0
965	mov	$16, %ecx
966	or	$7, %ebx
967	lea	8(%edx), %edi
968	and	$0xfff, %edi
969	sub	$0x1000, %edi
970
971	.p2align 4
972L(loop_ashr_7):
973	add	$16, %edi
974	jg	L(nibble_ashr_7)
975
976L(gobble_ashr_7):
977	movdqa	(%eax, %ecx), %xmm1
978	movdqa	(%edx, %ecx), %xmm2
979	movdqa	%xmm2, %xmm4
980
981	palignr	$7, %xmm3, %xmm2
982
983	pcmpeqb	%xmm1, %xmm0
984	pcmpeqb	%xmm2, %xmm1
985	psubb	%xmm0, %xmm1
986	pmovmskb %xmm1, %esi
987	sub	$0xffff, %esi
988	jnz	L(exit)
989
990#ifdef USE_AS_STRNCMP
991	cmp	$16, %ebp
992	lea	-16(%ebp), %ebp
993	jbe	L(more8byteseq)
994#endif
995
996	add	$16, %ecx
997	movdqa	%xmm4, %xmm3
998
999	add	$16, %edi
1000	jg	L(nibble_ashr_7)
1001
1002	movdqa	(%eax, %ecx), %xmm1
1003	movdqa	(%edx, %ecx), %xmm2
1004	movdqa	%xmm2, %xmm4
1005
1006	palignr	$7, %xmm3, %xmm2
1007
1008	pcmpeqb	%xmm1, %xmm0
1009	pcmpeqb	%xmm2, %xmm1
1010	psubb	%xmm0, %xmm1
1011	pmovmskb %xmm1, %esi
1012	sub	$0xffff, %esi
1013	jnz	L(exit)
1014
1015#ifdef USE_AS_STRNCMP
1016	cmp	$16, %ebp
1017	lea	-16(%ebp), %ebp
1018	jbe	L(more8byteseq)
1019#endif
1020
1021	add	$16, %ecx
1022	movdqa	%xmm4, %xmm3
1023	jmp	L(loop_ashr_7)
1024
1025	.p2align 4
1026L(nibble_ashr_7):
1027	pcmpeqb	%xmm3, %xmm0
1028	pmovmskb %xmm0, %esi
1029	test	$0xff80, %esi
1030	jnz	L(ashr_7_exittail)
1031
1032#ifdef USE_AS_STRNCMP
1033	cmp	$9, %ebp
1034	jbe	L(ashr_7_exittail)
1035#endif
1036	pxor	%xmm0, %xmm0
1037	pxor	%xmm0, %xmm0
1038	sub	$0x1000, %edi
1039	jmp	L(gobble_ashr_7)
1040
1041	.p2align 4
1042L(ashr_7_exittail):
1043	movdqa	(%eax, %ecx), %xmm1
1044	psrldq	$7, %xmm0
1045	psrldq	$7, %xmm3
1046	jmp	L(aftertail)
1047
1048/*
1049 * The following cases will be handled by ashr_8
1050 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1051 *        n(8~15)            n - 8            7(15 +(n-8) - n)         ashr_8
1052 */
1053	.p2align 4
1054L(ashr_8):
1055	mov	$0xffff, %esi
1056	pxor	%xmm0, %xmm0
1057	movdqa	(%edx), %xmm2
1058	movdqa	(%eax), %xmm1
1059	pcmpeqb	%xmm1, %xmm0
1060	pslldq	$8, %xmm2
1061	pcmpeqb	%xmm1, %xmm2
1062	psubb	%xmm0, %xmm2
1063	pmovmskb %xmm2, %edi
1064	shr	%cl, %esi
1065	shr	%cl, %edi
1066	sub	%edi, %esi
1067	lea	-8(%ecx), %edi
1068	jnz	L(less32bytes)
1069
1070	UPDATE_STRNCMP_COUNTER
1071
1072	movdqa	(%edx), %xmm3
1073	pxor	%xmm0, %xmm0
1074	mov	$16, %ecx
1075	or	$8, %ebx
1076	lea	8(%edx), %edi
1077	and	$0xfff, %edi
1078	sub	$0x1000, %edi
1079
1080	.p2align 4
1081L(loop_ashr_8):
1082	add	$16, %edi
1083	jg	L(nibble_ashr_8)
1084
1085L(gobble_ashr_8):
1086	movdqa	(%eax, %ecx), %xmm1
1087	movdqa	(%edx, %ecx), %xmm2
1088	movdqa	%xmm2, %xmm4
1089
1090	palignr	$8, %xmm3, %xmm2
1091
1092	pcmpeqb	%xmm1, %xmm0
1093	pcmpeqb	%xmm2, %xmm1
1094	psubb	%xmm0, %xmm1
1095	pmovmskb %xmm1, %esi
1096	sub	$0xffff, %esi
1097	jnz	L(exit)
1098
1099#ifdef USE_AS_STRNCMP
1100	cmp	$16, %ebp
1101	lea	-16(%ebp), %ebp
1102	jbe	L(more8byteseq)
1103#endif
1104	add	$16, %ecx
1105	movdqa	%xmm4, %xmm3
1106
1107	add	$16, %edi
1108	jg	L(nibble_ashr_8)
1109
1110	movdqa	(%eax, %ecx), %xmm1
1111	movdqa	(%edx, %ecx), %xmm2
1112	movdqa	%xmm2, %xmm4
1113
1114	palignr	$8, %xmm3, %xmm2
1115
1116	pcmpeqb	%xmm1, %xmm0
1117	pcmpeqb	%xmm2, %xmm1
1118	psubb	%xmm0, %xmm1
1119	pmovmskb %xmm1, %esi
1120	sub	$0xffff, %esi
1121	jnz	L(exit)
1122
1123#ifdef USE_AS_STRNCMP
1124	cmp	$16, %ebp
1125	lea	-16(%ebp), %ebp
1126	jbe	L(more8byteseq)
1127#endif
1128	add	$16, %ecx
1129	movdqa	%xmm4, %xmm3
1130	jmp	L(loop_ashr_8)
1131
1132	.p2align 4
1133L(nibble_ashr_8):
1134	pcmpeqb	%xmm3, %xmm0
1135	pmovmskb %xmm0, %esi
1136	test	$0xff00, %esi
1137	jnz	L(ashr_8_exittail)
1138
1139#ifdef USE_AS_STRNCMP
1140	cmp	$8, %ebp
1141	jbe	L(ashr_8_exittail)
1142#endif
1143	pxor	%xmm0, %xmm0
1144	pxor	%xmm0, %xmm0
1145	sub	$0x1000, %edi
1146	jmp	L(gobble_ashr_8)
1147
1148	.p2align 4
1149L(ashr_8_exittail):
1150	movdqa	(%eax, %ecx), %xmm1
1151	psrldq	$8, %xmm0
1152	psrldq	$8, %xmm3
1153	jmp	L(aftertail)
1154
1155/*
1156 * The following cases will be handled by ashr_9
1157 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1158 *        n(7~15)            n - 7            8(15 +(n-7) - n)         ashr_9
1159 */
1160	.p2align 4
1161L(ashr_9):
1162	mov	$0xffff, %esi
1163	pxor	%xmm0, %xmm0
1164	movdqa	(%edx), %xmm2
1165	movdqa	(%eax), %xmm1
1166	pcmpeqb	%xmm1, %xmm0
1167	pslldq	$7, %xmm2
1168	pcmpeqb	%xmm1, %xmm2
1169	psubb	%xmm0, %xmm2
1170	pmovmskb %xmm2, %edi
1171	shr	%cl, %esi
1172	shr	%cl, %edi
1173	sub	%edi, %esi
1174	lea	-7(%ecx), %edi
1175	jnz	L(less32bytes)
1176
1177	UPDATE_STRNCMP_COUNTER
1178
1179	movdqa	(%edx), %xmm3
1180	pxor	%xmm0, %xmm0
1181	mov	$16, %ecx
1182	or	$9, %ebx
1183	lea	9(%edx), %edi
1184	and	$0xfff, %edi
1185	sub	$0x1000, %edi
1186
1187	.p2align 4
1188L(loop_ashr_9):
1189	add	$16, %edi
1190	jg	L(nibble_ashr_9)
1191
1192L(gobble_ashr_9):
1193	movdqa	(%eax, %ecx), %xmm1
1194	movdqa	(%edx, %ecx), %xmm2
1195	movdqa	%xmm2, %xmm4
1196
1197	palignr	$9, %xmm3, %xmm2
1198
1199	pcmpeqb	%xmm1, %xmm0
1200	pcmpeqb	%xmm2, %xmm1
1201	psubb	%xmm0, %xmm1
1202	pmovmskb %xmm1, %esi
1203	sub	$0xffff, %esi
1204	jnz	L(exit)
1205
1206#ifdef USE_AS_STRNCMP
1207	cmp	$16, %ebp
1208	lea	-16(%ebp), %ebp
1209	jbe	L(more8byteseq)
1210#endif
1211	add	$16, %ecx
1212	movdqa	%xmm4, %xmm3
1213
1214	add	$16, %edi
1215	jg	L(nibble_ashr_9)
1216
1217	movdqa	(%eax, %ecx), %xmm1
1218	movdqa	(%edx, %ecx), %xmm2
1219	movdqa	%xmm2, %xmm4
1220
1221	palignr	$9, %xmm3, %xmm2
1222
1223	pcmpeqb	%xmm1, %xmm0
1224	pcmpeqb	%xmm2, %xmm1
1225	psubb	%xmm0, %xmm1
1226	pmovmskb %xmm1, %esi
1227	sub	$0xffff, %esi
1228	jnz	L(exit)
1229
1230#ifdef USE_AS_STRNCMP
1231	cmp	$16, %ebp
1232	lea	-16(%ebp), %ebp
1233	jbe	L(more8byteseq)
1234#endif
1235	add	$16, %ecx
1236	movdqa	%xmm4, %xmm3
1237	jmp	L(loop_ashr_9)
1238
1239	.p2align 4
1240L(nibble_ashr_9):
1241	pcmpeqb	%xmm3, %xmm0
1242	pmovmskb %xmm0, %esi
1243	test	$0xfe00, %esi
1244	jnz	L(ashr_9_exittail)
1245
1246#ifdef USE_AS_STRNCMP
1247	cmp	$7, %ebp
1248	jbe	L(ashr_9_exittail)
1249#endif
1250	pxor	%xmm0, %xmm0
1251	sub	$0x1000, %edi
1252	jmp	L(gobble_ashr_9)
1253
1254	.p2align 4
1255L(ashr_9_exittail):
1256	movdqa	(%eax, %ecx), %xmm1
1257	psrldq	$9, %xmm0
1258	psrldq	$9, %xmm3
1259	jmp	L(aftertail)
1260
1261/*
1262 * The following cases will be handled by ashr_10
1263 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1264 *        n(6~15)            n - 6            9(15 +(n-6) - n)         ashr_10
1265 */
1266	.p2align 4
1267L(ashr_10):
1268	mov	$0xffff, %esi
1269	pxor	%xmm0, %xmm0
1270	movdqa	(%edx), %xmm2
1271	movdqa	(%eax), %xmm1
1272	pcmpeqb	%xmm1, %xmm0
1273	pslldq	$6, %xmm2
1274	pcmpeqb	%xmm1, %xmm2
1275	psubb	%xmm0, %xmm2
1276	pmovmskb %xmm2, %edi
1277	shr	%cl, %esi
1278	shr	%cl, %edi
1279	sub	%edi, %esi
1280	lea	-6(%ecx), %edi
1281	jnz	L(less32bytes)
1282
1283	UPDATE_STRNCMP_COUNTER
1284
1285	movdqa	(%edx), %xmm3
1286	pxor	%xmm0, %xmm0
1287	mov	$16, %ecx
1288	or	$10, %ebx
1289	lea	10(%edx), %edi
1290	and	$0xfff, %edi
1291	sub	$0x1000, %edi
1292
1293	.p2align 4
1294L(loop_ashr_10):
1295	add	$16, %edi
1296	jg	L(nibble_ashr_10)
1297
1298L(gobble_ashr_10):
1299	movdqa	(%eax, %ecx), %xmm1
1300	movdqa	(%edx, %ecx), %xmm2
1301	movdqa	%xmm2, %xmm4
1302
1303	palignr	$10, %xmm3, %xmm2
1304
1305	pcmpeqb	%xmm1, %xmm0
1306	pcmpeqb	%xmm2, %xmm1
1307	psubb	%xmm0, %xmm1
1308	pmovmskb %xmm1, %esi
1309	sub	$0xffff, %esi
1310	jnz	L(exit)
1311
1312#ifdef USE_AS_STRNCMP
1313	cmp	$16, %ebp
1314	lea	-16(%ebp), %ebp
1315	jbe	L(more8byteseq)
1316#endif
1317	add	$16, %ecx
1318	movdqa	%xmm4, %xmm3
1319
1320	add	$16, %edi
1321	jg	L(nibble_ashr_10)
1322
1323	movdqa	(%eax, %ecx), %xmm1
1324	movdqa	(%edx, %ecx), %xmm2
1325	movdqa	%xmm2, %xmm4
1326
1327	palignr	$10, %xmm3, %xmm2
1328
1329	pcmpeqb	%xmm1, %xmm0
1330	pcmpeqb	%xmm2, %xmm1
1331	psubb	%xmm0, %xmm1
1332	pmovmskb %xmm1, %esi
1333	sub	$0xffff, %esi
1334	jnz	L(exit)
1335
1336#ifdef USE_AS_STRNCMP
1337	cmp	$16, %ebp
1338	lea	-16(%ebp), %ebp
1339	jbe	L(more8byteseq)
1340#endif
1341	add	$16, %ecx
1342	movdqa	%xmm4, %xmm3
1343	jmp	L(loop_ashr_10)
1344
1345	.p2align 4
1346L(nibble_ashr_10):
1347	pcmpeqb	%xmm3, %xmm0
1348	pmovmskb %xmm0, %esi
1349	test	$0xfc00, %esi
1350	jnz	L(ashr_10_exittail)
1351
1352#ifdef USE_AS_STRNCMP
1353	cmp	$6, %ebp
1354	jbe	L(ashr_10_exittail)
1355#endif
1356	pxor	%xmm0, %xmm0
1357	sub	$0x1000, %edi
1358	jmp	L(gobble_ashr_10)
1359
1360	.p2align 4
1361L(ashr_10_exittail):
1362	movdqa	(%eax, %ecx), %xmm1
1363	psrldq	$10, %xmm0
1364	psrldq	$10, %xmm3
1365	jmp	L(aftertail)
1366
1367/*
1368 * The following cases will be handled by ashr_11
1369 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1370 *        n(5~15)            n - 5            10(15 +(n-5) - n)         ashr_11
1371 */
1372	.p2align 4
1373L(ashr_11):
1374	mov	$0xffff, %esi
1375	pxor	%xmm0, %xmm0
1376	movdqa	(%edx), %xmm2
1377	movdqa	(%eax), %xmm1
1378	pcmpeqb	%xmm1, %xmm0
1379	pslldq	$5, %xmm2
1380	pcmpeqb	%xmm1, %xmm2
1381	psubb	%xmm0, %xmm2
1382	pmovmskb %xmm2, %edi
1383	shr	%cl, %esi
1384	shr	%cl, %edi
1385	sub	%edi, %esi
1386	lea	-5(%ecx), %edi
1387	jnz	L(less32bytes)
1388
1389	UPDATE_STRNCMP_COUNTER
1390
1391	movdqa	(%edx), %xmm3
1392	pxor	%xmm0, %xmm0
1393	mov	$16, %ecx
1394	or	$11, %ebx
1395	lea	11(%edx), %edi
1396	and	$0xfff, %edi
1397	sub	$0x1000, %edi
1398
1399	.p2align 4
1400L(loop_ashr_11):
1401	add	$16, %edi
1402	jg	L(nibble_ashr_11)
1403
1404L(gobble_ashr_11):
1405	movdqa	(%eax, %ecx), %xmm1
1406	movdqa	(%edx, %ecx), %xmm2
1407	movdqa	%xmm2, %xmm4
1408
1409	palignr	$11, %xmm3, %xmm2
1410
1411	pcmpeqb	%xmm1, %xmm0
1412	pcmpeqb	%xmm2, %xmm1
1413	psubb	%xmm0, %xmm1
1414	pmovmskb %xmm1, %esi
1415	sub	$0xffff, %esi
1416	jnz	L(exit)
1417
1418#ifdef USE_AS_STRNCMP
1419	cmp	$16, %ebp
1420	lea	-16(%ebp), %ebp
1421	jbe	L(more8byteseq)
1422#endif
1423	add	$16, %ecx
1424	movdqa	%xmm4, %xmm3
1425
1426	add	$16, %edi
1427	jg	L(nibble_ashr_11)
1428
1429	movdqa	(%eax, %ecx), %xmm1
1430	movdqa	(%edx, %ecx), %xmm2
1431	movdqa	%xmm2, %xmm4
1432
1433	palignr	$11, %xmm3, %xmm2
1434
1435	pcmpeqb	%xmm1, %xmm0
1436	pcmpeqb	%xmm2, %xmm1
1437	psubb	%xmm0, %xmm1
1438	pmovmskb %xmm1, %esi
1439	sub	$0xffff, %esi
1440	jnz	L(exit)
1441
1442#ifdef USE_AS_STRNCMP
1443	cmp	$16, %ebp
1444	lea	-16(%ebp), %ebp
1445	jbe	L(more8byteseq)
1446#endif
1447	add	$16, %ecx
1448	movdqa	%xmm4, %xmm3
1449	jmp	L(loop_ashr_11)
1450
1451	.p2align 4
1452L(nibble_ashr_11):
1453	pcmpeqb	%xmm3, %xmm0
1454	pmovmskb %xmm0, %esi
1455	test	$0xf800, %esi
1456	jnz	L(ashr_11_exittail)
1457
1458#ifdef USE_AS_STRNCMP
1459	cmp	$5, %ebp
1460	jbe	L(ashr_11_exittail)
1461#endif
1462	pxor	%xmm0, %xmm0
1463	sub	$0x1000, %edi
1464	jmp	L(gobble_ashr_11)
1465
1466	.p2align 4
1467L(ashr_11_exittail):
1468	movdqa	(%eax, %ecx), %xmm1
1469	psrldq	$11, %xmm0
1470	psrldq	$11, %xmm3
1471	jmp	L(aftertail)
1472
1473/*
1474 * The following cases will be handled by ashr_12
1475 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1476 *        n(4~15)            n - 4            11(15 +(n-4) - n)         ashr_12
1477 */
1478	.p2align 4
1479L(ashr_12):
1480	mov	$0xffff, %esi
1481	pxor	%xmm0, %xmm0
1482	movdqa	(%edx), %xmm2
1483	movdqa	(%eax), %xmm1
1484	pcmpeqb	%xmm1, %xmm0
1485	pslldq	$4, %xmm2
1486	pcmpeqb	%xmm1, %xmm2
1487	psubb	%xmm0, %xmm2
1488	pmovmskb %xmm2, %edi
1489	shr	%cl, %esi
1490	shr	%cl, %edi
1491	sub	%edi, %esi
1492	lea	-4(%ecx), %edi
1493	jnz	L(less32bytes)
1494
1495	UPDATE_STRNCMP_COUNTER
1496
1497	movdqa	(%edx), %xmm3
1498	pxor	%xmm0, %xmm0
1499	mov	$16, %ecx
1500	or	$12, %ebx
1501	lea	12(%edx), %edi
1502	and	$0xfff, %edi
1503	sub	$0x1000, %edi
1504
1505	.p2align 4
1506L(loop_ashr_12):
1507	add	$16, %edi
1508	jg	L(nibble_ashr_12)
1509
1510L(gobble_ashr_12):
1511	movdqa	(%eax, %ecx), %xmm1
1512	movdqa	(%edx, %ecx), %xmm2
1513	movdqa	%xmm2, %xmm4
1514
1515	palignr	$12, %xmm3, %xmm2
1516
1517	pcmpeqb	%xmm1, %xmm0
1518	pcmpeqb	%xmm2, %xmm1
1519	psubb	%xmm0, %xmm1
1520	pmovmskb %xmm1, %esi
1521	sub	$0xffff, %esi
1522	jnz	L(exit)
1523
1524	add	$16, %ecx
1525	movdqa	%xmm4, %xmm3
1526
1527	add	$16, %edi
1528	jg	L(nibble_ashr_12)
1529
1530#ifdef USE_AS_STRNCMP
1531	cmp	$16, %ebp
1532	lea	-16(%ebp), %ebp
1533	jbe	L(more8byteseq)
1534#endif
1535	movdqa	(%eax, %ecx), %xmm1
1536	movdqa	(%edx, %ecx), %xmm2
1537	movdqa	%xmm2, %xmm4
1538
1539	palignr	$12, %xmm3, %xmm2
1540
1541	pcmpeqb	%xmm1, %xmm0
1542	pcmpeqb	%xmm2, %xmm1
1543	psubb	%xmm0, %xmm1
1544	pmovmskb %xmm1, %esi
1545	sub	$0xffff, %esi
1546	jnz	L(exit)
1547
1548#ifdef USE_AS_STRNCMP
1549	cmp	$16, %ebp
1550	lea	-16(%ebp), %ebp
1551	jbe	L(more8byteseq)
1552#endif
1553	add	$16, %ecx
1554	movdqa	%xmm4, %xmm3
1555	jmp	L(loop_ashr_12)
1556
1557	.p2align 4
1558L(nibble_ashr_12):
1559	pcmpeqb	%xmm3, %xmm0
1560	pmovmskb %xmm0, %esi
1561	test	$0xf000, %esi
1562	jnz	L(ashr_12_exittail)
1563
1564#ifdef USE_AS_STRNCMP
1565	cmp	$4, %ebp
1566	jbe	L(ashr_12_exittail)
1567#endif
1568	pxor	%xmm0, %xmm0
1569	sub	$0x1000, %edi
1570	jmp	L(gobble_ashr_12)
1571
1572	.p2align 4
1573L(ashr_12_exittail):
1574	movdqa	(%eax, %ecx), %xmm1
1575	psrldq	$12, %xmm0
1576	psrldq	$12, %xmm3
1577	jmp	L(aftertail)
1578
1579/*
1580 * The following cases will be handled by ashr_13
1581 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1582 *        n(3~15)            n - 3            12(15 +(n-3) - n)         ashr_13
1583 */
1584	.p2align 4
1585L(ashr_13):
1586	mov	$0xffff, %esi
1587	pxor	%xmm0, %xmm0
1588	movdqa	(%edx), %xmm2
1589	movdqa	(%eax), %xmm1
1590	pcmpeqb	%xmm1, %xmm0
1591	pslldq	$3, %xmm2
1592	pcmpeqb	%xmm1, %xmm2
1593	psubb	%xmm0, %xmm2
1594	pmovmskb %xmm2, %edi
1595	shr	%cl, %esi
1596	shr	%cl, %edi
1597	sub	%edi, %esi
1598	lea	-3(%ecx), %edi
1599	jnz	L(less32bytes)
1600
1601	UPDATE_STRNCMP_COUNTER
1602
1603	movdqa	(%edx), %xmm3
1604	pxor	%xmm0, %xmm0
1605	mov	$16, %ecx
1606	or	$13, %ebx
1607	lea	13(%edx), %edi
1608	and	$0xfff, %edi
1609	sub	$0x1000, %edi
1610
1611	.p2align 4
1612L(loop_ashr_13):
1613	add	$16, %edi
1614	jg	L(nibble_ashr_13)
1615
1616L(gobble_ashr_13):
1617	movdqa	(%eax, %ecx), %xmm1
1618	movdqa	(%edx, %ecx), %xmm2
1619	movdqa	%xmm2, %xmm4
1620
1621	palignr	$13, %xmm3, %xmm2
1622
1623	pcmpeqb	%xmm1, %xmm0
1624	pcmpeqb	%xmm2, %xmm1
1625	psubb	%xmm0, %xmm1
1626	pmovmskb %xmm1, %esi
1627	sub	$0xffff, %esi
1628	jnz	L(exit)
1629
1630#ifdef USE_AS_STRNCMP
1631	cmp	$16, %ebp
1632	lea	-16(%ebp), %ebp
1633	jbe	L(more8byteseq)
1634#endif
1635	add	$16, %ecx
1636	movdqa	%xmm4, %xmm3
1637
1638	add	$16, %edi
1639	jg	L(nibble_ashr_13)
1640
1641	movdqa	(%eax, %ecx), %xmm1
1642	movdqa	(%edx, %ecx), %xmm2
1643	movdqa	%xmm2, %xmm4
1644
1645	palignr	$13, %xmm3, %xmm2
1646
1647	pcmpeqb	%xmm1, %xmm0
1648	pcmpeqb	%xmm2, %xmm1
1649	psubb	%xmm0, %xmm1
1650	pmovmskb %xmm1, %esi
1651	sub	$0xffff, %esi
1652	jnz	L(exit)
1653
1654#ifdef USE_AS_STRNCMP
1655	cmp	$16, %ebp
1656	lea	-16(%ebp), %ebp
1657	jbe	L(more8byteseq)
1658#endif
1659	add	$16, %ecx
1660	movdqa	%xmm4, %xmm3
1661	jmp	L(loop_ashr_13)
1662
1663	.p2align 4
1664L(nibble_ashr_13):
1665	pcmpeqb	%xmm3, %xmm0
1666	pmovmskb %xmm0, %esi
1667	test	$0xe000, %esi
1668	jnz	L(ashr_13_exittail)
1669
1670#ifdef USE_AS_STRNCMP
1671	cmp	$3, %ebp
1672	jbe	L(ashr_13_exittail)
1673#endif
1674	pxor	%xmm0, %xmm0
1675	sub	$0x1000, %edi
1676	jmp	L(gobble_ashr_13)
1677
1678	.p2align 4
1679L(ashr_13_exittail):
1680	movdqa	(%eax, %ecx), %xmm1
1681	psrldq	$13, %xmm0
1682	psrldq	$13, %xmm3
1683	jmp	L(aftertail)
1684
1685/*
1686 * The following cases will be handled by ashr_14
1687 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1688 *        n(2~15)            n - 2            13(15 +(n-2) - n)         ashr_14
1689 */
1690	.p2align 4
1691L(ashr_14):
1692	mov	$0xffff, %esi
1693	pxor	%xmm0, %xmm0
1694	movdqa	(%edx), %xmm2
1695	movdqa	(%eax), %xmm1
1696	pcmpeqb	%xmm1, %xmm0
1697	pslldq	$2, %xmm2
1698	pcmpeqb	%xmm1, %xmm2
1699	psubb	%xmm0, %xmm2
1700	pmovmskb %xmm2, %edi
1701	shr	%cl, %esi
1702	shr	%cl, %edi
1703	sub	%edi, %esi
1704	lea	-2(%ecx), %edi
1705	jnz	L(less32bytes)
1706
1707	UPDATE_STRNCMP_COUNTER
1708
1709	movdqa	(%edx), %xmm3
1710	pxor	%xmm0, %xmm0
1711	mov	$16, %ecx
1712	or	$14, %ebx
1713	lea	14(%edx), %edi
1714	and	$0xfff, %edi
1715	sub	$0x1000, %edi
1716
1717	.p2align 4
1718L(loop_ashr_14):
1719	add	$16, %edi
1720	jg	L(nibble_ashr_14)
1721
1722L(gobble_ashr_14):
1723	movdqa	(%eax, %ecx), %xmm1
1724	movdqa	(%edx, %ecx), %xmm2
1725	movdqa	%xmm2, %xmm4
1726
1727	palignr	$14, %xmm3, %xmm2
1728
1729	pcmpeqb	%xmm1, %xmm0
1730	pcmpeqb	%xmm2, %xmm1
1731	psubb	%xmm0, %xmm1
1732	pmovmskb %xmm1, %esi
1733	sub	$0xffff, %esi
1734	jnz	L(exit)
1735
1736#ifdef USE_AS_STRNCMP
1737	cmp	$16, %ebp
1738	lea	-16(%ebp), %ebp
1739	jbe	L(more8byteseq)
1740#endif
1741	add	$16, %ecx
1742	movdqa	%xmm4, %xmm3
1743
1744	add	$16, %edi
1745	jg	L(nibble_ashr_14)
1746
1747	movdqa	(%eax, %ecx), %xmm1
1748	movdqa	(%edx, %ecx), %xmm2
1749	movdqa	%xmm2, %xmm4
1750
1751	palignr	$14, %xmm3, %xmm2
1752
1753	pcmpeqb	%xmm1, %xmm0
1754	pcmpeqb	%xmm2, %xmm1
1755	psubb	%xmm0, %xmm1
1756	pmovmskb %xmm1, %esi
1757	sub	$0xffff, %esi
1758	jnz	L(exit)
1759
1760#ifdef USE_AS_STRNCMP
1761	cmp	$16, %ebp
1762	lea	-16(%ebp), %ebp
1763	jbe	L(more8byteseq)
1764#endif
1765	add	$16, %ecx
1766	movdqa	%xmm4, %xmm3
1767	jmp	L(loop_ashr_14)
1768
1769	.p2align 4
1770L(nibble_ashr_14):
1771	pcmpeqb	%xmm3, %xmm0
1772	pmovmskb %xmm0, %esi
1773	test	$0xc000, %esi
1774	jnz	L(ashr_14_exittail)
1775
1776#ifdef USE_AS_STRNCMP
1777	cmp	$2, %ebp
1778	jbe	L(ashr_14_exittail)
1779#endif
1780	pxor	%xmm0, %xmm0
1781	sub	$0x1000, %edi
1782	jmp	L(gobble_ashr_14)
1783
1784	.p2align 4
1785L(ashr_14_exittail):
1786	movdqa	(%eax, %ecx), %xmm1
1787	psrldq	$14, %xmm0
1788	psrldq	$14, %xmm3
1789	jmp	L(aftertail)
1790
1791/*
1792 * The following cases will be handled by ashr_14
1793 * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case
1794 *        n(1~15)            n - 1            14(15 +(n-1) - n)         ashr_15
1795 */
1796
1797	.p2align 4
1798L(ashr_15):
1799	mov	$0xffff, %esi
1800	pxor	%xmm0, %xmm0
1801	movdqa	(%edx), %xmm2
1802	movdqa	(%eax), %xmm1
1803	pcmpeqb	%xmm1, %xmm0
1804	pslldq	$1, %xmm2
1805	pcmpeqb	%xmm1, %xmm2
1806	psubb	%xmm0, %xmm2
1807	pmovmskb %xmm2, %edi
1808	shr	%cl, %esi
1809	shr	%cl, %edi
1810	sub	%edi, %esi
1811	lea	-1(%ecx), %edi
1812	jnz	L(less32bytes)
1813
1814	UPDATE_STRNCMP_COUNTER
1815
1816	movdqa	(%edx), %xmm3
1817	pxor	%xmm0, %xmm0
1818	mov	$16, %ecx
1819	or	$15, %ebx
1820	lea	15(%edx), %edi
1821	and	$0xfff, %edi
1822	sub	$0x1000, %edi
1823
1824	.p2align 4
1825L(loop_ashr_15):
1826	add	$16, %edi
1827	jg	L(nibble_ashr_15)
1828
1829L(gobble_ashr_15):
1830	movdqa	(%eax, %ecx), %xmm1
1831	movdqa	(%edx, %ecx), %xmm2
1832	movdqa	%xmm2, %xmm4
1833
1834	palignr	$15, %xmm3, %xmm2
1835
1836	pcmpeqb	%xmm1, %xmm0
1837	pcmpeqb	%xmm2, %xmm1
1838	psubb	%xmm0, %xmm1
1839	pmovmskb %xmm1, %esi
1840	sub	$0xffff, %esi
1841	jnz	L(exit)
1842
1843#ifdef USE_AS_STRNCMP
1844	cmp	$16, %ebp
1845	lea	-16(%ebp), %ebp
1846	jbe	L(more8byteseq)
1847#endif
1848	add	$16, %ecx
1849	movdqa	%xmm4, %xmm3
1850
1851	add	$16, %edi
1852	jg	L(nibble_ashr_15)
1853
1854	movdqa	(%eax, %ecx), %xmm1
1855	movdqa	(%edx, %ecx), %xmm2
1856	movdqa	%xmm2, %xmm4
1857
1858	palignr	$15, %xmm3, %xmm2
1859
1860	pcmpeqb	%xmm1, %xmm0
1861	pcmpeqb	%xmm2, %xmm1
1862	psubb	%xmm0, %xmm1
1863	pmovmskb %xmm1, %esi
1864	sub	$0xffff, %esi
1865	jnz	L(exit)
1866
1867#ifdef USE_AS_STRNCMP
1868	cmp	$16, %ebp
1869	lea	-16(%ebp), %ebp
1870	jbe	L(more8byteseq)
1871#endif
1872	add	$16, %ecx
1873	movdqa	%xmm4, %xmm3
1874	jmp	L(loop_ashr_15)
1875
1876	.p2align 4
1877L(nibble_ashr_15):
1878	pcmpeqb	%xmm3, %xmm0
1879	pmovmskb %xmm0, %esi
1880	test	$0x8000, %esi
1881	jnz	L(ashr_15_exittail)
1882
1883#ifdef USE_AS_STRNCMP
1884	cmp	$1, %ebp
1885	jbe	L(ashr_15_exittail)
1886#endif
1887	pxor	%xmm0, %xmm0
1888	sub	$0x1000, %edi
1889	jmp	L(gobble_ashr_15)
1890
1891	.p2align 4
1892L(ashr_15_exittail):
1893	movdqa	(%eax, %ecx), %xmm1
1894	psrldq	$15, %xmm0
1895	psrldq	$15, %xmm3
1896	jmp	L(aftertail)
1897
1898	.p2align 4
1899L(aftertail):
1900	pcmpeqb	%xmm3, %xmm1
1901	psubb	%xmm0, %xmm1
1902	pmovmskb %xmm1, %esi
1903	not	%esi
1904L(exit):
1905	mov	%ebx, %edi
1906	and	$0x1f, %edi
1907	lea	-16(%edi, %ecx), %edi
1908L(less32bytes):
1909	add	%edi, %edx
1910	add	%ecx, %eax
1911	test	$0x20, %ebx
1912	jz	L(ret2)
1913	xchg	%eax, %edx
1914
1915	.p2align 4
1916L(ret2):
1917	mov	%esi, %ecx
1918	POP	(%esi)
1919	POP	(%edi)
1920	POP	(%ebx)
1921L(less16bytes):
1922	test	%cl, %cl
1923	jz	L(2next_8_bytes)
1924
1925	test	$0x01, %cl
1926	jnz	L(Byte0)
1927
1928	test	$0x02, %cl
1929	jnz	L(Byte1)
1930
1931	test	$0x04, %cl
1932	jnz	L(Byte2)
1933
1934	test	$0x08, %cl
1935	jnz	L(Byte3)
1936
1937	test	$0x10, %cl
1938	jnz	L(Byte4)
1939
1940	test	$0x20, %cl
1941	jnz	L(Byte5)
1942
1943	test	$0x40, %cl
1944	jnz	L(Byte6)
1945#ifdef USE_AS_STRNCMP
1946	cmp	$7, %ebp
1947	jbe	L(eq)
1948#endif
1949
1950	movzx	7(%eax), %ecx
1951	movzx	7(%edx), %eax
1952
1953	sub	%ecx, %eax
1954	RETURN
1955
1956	.p2align 4
1957L(Byte0):
1958#ifdef USE_AS_STRNCMP
1959	cmp	$0, %ebp
1960	jbe	L(eq)
1961#endif
1962	movzx	(%eax), %ecx
1963	movzx	(%edx), %eax
1964
1965	sub	%ecx, %eax
1966	RETURN
1967
1968	.p2align 4
1969L(Byte1):
1970#ifdef USE_AS_STRNCMP
1971	cmp	$1, %ebp
1972	jbe	L(eq)
1973#endif
1974	movzx	1(%eax), %ecx
1975	movzx	1(%edx), %eax
1976
1977	sub	%ecx, %eax
1978	RETURN
1979
1980	.p2align 4
1981L(Byte2):
1982#ifdef USE_AS_STRNCMP
1983	cmp	$2, %ebp
1984	jbe	L(eq)
1985#endif
1986	movzx	2(%eax), %ecx
1987	movzx	2(%edx), %eax
1988
1989	sub	%ecx, %eax
1990	RETURN
1991
1992	.p2align 4
1993L(Byte3):
1994#ifdef USE_AS_STRNCMP
1995	cmp	$3, %ebp
1996	jbe	L(eq)
1997#endif
1998	movzx	3(%eax), %ecx
1999	movzx	3(%edx), %eax
2000
2001	sub	%ecx, %eax
2002	RETURN
2003
2004	.p2align 4
2005L(Byte4):
2006#ifdef USE_AS_STRNCMP
2007	cmp	$4, %ebp
2008	jbe	L(eq)
2009#endif
2010	movzx	4(%eax), %ecx
2011	movzx	4(%edx), %eax
2012
2013	sub	%ecx, %eax
2014	RETURN
2015
2016	.p2align 4
2017L(Byte5):
2018#ifdef USE_AS_STRNCMP
2019	cmp	$5, %ebp
2020	jbe	L(eq)
2021#endif
2022	movzx	5(%eax), %ecx
2023	movzx	5(%edx), %eax
2024
2025	sub	%ecx, %eax
2026	RETURN
2027
2028	.p2align 4
2029L(Byte6):
2030#ifdef USE_AS_STRNCMP
2031	cmp	$6, %ebp
2032	jbe	L(eq)
2033#endif
2034	movzx	6(%eax), %ecx
2035	movzx	6(%edx), %eax
2036
2037	sub	%ecx, %eax
2038	RETURN
2039
2040	.p2align 4
2041L(2next_8_bytes):
2042	add	$8, %eax
2043	add	$8, %edx
2044#ifdef USE_AS_STRNCMP
2045	cmp	$8, %ebp
2046	lea	-8(%ebp), %ebp
2047	jbe	L(eq)
2048#endif
2049
2050	test	$0x01, %ch
2051	jnz	L(Byte0)
2052
2053	test	$0x02, %ch
2054	jnz	L(Byte1)
2055
2056	test	$0x04, %ch
2057	jnz	L(Byte2)
2058
2059	test	$0x08, %ch
2060	jnz	L(Byte3)
2061
2062	test	$0x10, %ch
2063	jnz	L(Byte4)
2064
2065	test	$0x20, %ch
2066	jnz	L(Byte5)
2067
2068	test	$0x40, %ch
2069	jnz	L(Byte6)
2070
2071#ifdef USE_AS_STRNCMP
2072	cmp	$7, %ebp
2073	jbe	L(eq)
2074#endif
2075	movzx	7(%eax), %ecx
2076	movzx	7(%edx), %eax
2077
2078	sub	%ecx, %eax
2079	RETURN
2080
2081	.p2align 4
2082L(neq):
2083	mov	$1, %eax
2084	ja	L(neq_bigger)
2085	neg	%eax
2086L(neq_bigger):
2087	RETURN
2088
2089#ifdef USE_AS_STRNCMP
2090	CFI_PUSH (%ebx)
2091	CFI_PUSH (%edi)
2092	CFI_PUSH (%esi)
2093
2094	.p2align 4
2095L(more8byteseq):
2096	POP	(%esi)
2097	POP	(%edi)
2098	POP	(%ebx)
2099#endif
2100
2101L(eq):
2102
2103#ifdef USE_AS_STRNCMP
2104	POP	(%ebp)
2105#endif
2106	xorl	%eax, %eax
2107	ret
2108
2109#ifdef USE_AS_STRNCMP
2110	CFI_PUSH (%ebp)
2111
2112	.p2align 4
2113L(less16bytes_sncmp):
2114	test	%ebp, %ebp
2115	jz	L(eq)
2116
2117	movzbl	(%eax), %ecx
2118	cmpb	%cl, (%edx)
2119	jne	L(neq)
2120	test	%cl, %cl
2121	je	L(eq)
2122
2123	cmp	$1, %ebp
2124	je	L(eq)
2125
2126	movzbl	1(%eax), %ecx
2127	cmpb	%cl, 1(%edx)
2128	jne	L(neq)
2129	test	%cl, %cl
2130	je	L(eq)
2131
2132	cmp	$2, %ebp
2133	je	L(eq)
2134
2135	movzbl	2(%eax), %ecx
2136	cmpb	%cl, 2(%edx)
2137	jne	L(neq)
2138	test	%cl, %cl
2139	je	L(eq)
2140
2141	cmp	$3, %ebp
2142	je	L(eq)
2143
2144	movzbl	3(%eax), %ecx
2145	cmpb	%cl, 3(%edx)
2146	jne	L(neq)
2147	test	%cl, %cl
2148	je	L(eq)
2149
2150	cmp	$4, %ebp
2151	je	L(eq)
2152
2153	movzbl	4(%eax), %ecx
2154	cmpb	%cl, 4(%edx)
2155	jne	L(neq)
2156	test	%cl, %cl
2157	je	L(eq)
2158
2159	cmp	$5, %ebp
2160	je	L(eq)
2161
2162	movzbl	5(%eax), %ecx
2163	cmpb	%cl, 5(%edx)
2164	jne	L(neq)
2165	test	%cl, %cl
2166	je	L(eq)
2167
2168	cmp	$6, %ebp
2169	je	L(eq)
2170
2171	movzbl	6(%eax), %ecx
2172	cmpb	%cl, 6(%edx)
2173	jne	L(neq)
2174	test	%cl, %cl
2175	je	L(eq)
2176
2177	cmp	$7, %ebp
2178	je	L(eq)
2179
2180	movzbl	7(%eax), %ecx
2181	cmpb	%cl, 7(%edx)
2182	jne	L(neq)
2183	test	%cl, %cl
2184	je	L(eq)
2185
2186
2187	cmp	$8, %ebp
2188	je	L(eq)
2189
2190	movzbl	8(%eax), %ecx
2191	cmpb	%cl, 8(%edx)
2192	jne	L(neq)
2193	test	%cl, %cl
2194	je	L(eq)
2195
2196	cmp	$9, %ebp
2197	je	L(eq)
2198
2199	movzbl	9(%eax), %ecx
2200	cmpb	%cl, 9(%edx)
2201	jne	L(neq)
2202	test	%cl, %cl
2203	je	L(eq)
2204
2205	cmp	$10, %ebp
2206	je	L(eq)
2207
2208	movzbl	10(%eax), %ecx
2209	cmpb	%cl, 10(%edx)
2210	jne	L(neq)
2211	test	%cl, %cl
2212	je	L(eq)
2213
2214	cmp	$11, %ebp
2215	je	L(eq)
2216
2217	movzbl	11(%eax), %ecx
2218	cmpb	%cl, 11(%edx)
2219	jne	L(neq)
2220	test	%cl, %cl
2221	je	L(eq)
2222
2223
2224	cmp	$12, %ebp
2225	je	L(eq)
2226
2227	movzbl	12(%eax), %ecx
2228	cmpb	%cl, 12(%edx)
2229	jne	L(neq)
2230	test	%cl, %cl
2231	je	L(eq)
2232
2233	cmp	$13, %ebp
2234	je	L(eq)
2235
2236	movzbl	13(%eax), %ecx
2237	cmpb	%cl, 13(%edx)
2238	jne	L(neq)
2239	test	%cl, %cl
2240	je	L(eq)
2241
2242	cmp	$14, %ebp
2243	je	L(eq)
2244
2245	movzbl	14(%eax), %ecx
2246	cmpb	%cl, 14(%edx)
2247	jne	L(neq)
2248	test	%cl, %cl
2249	je	L(eq)
2250
2251	cmp	$15, %ebp
2252	je	L(eq)
2253
2254	movzbl	15(%eax), %ecx
2255	cmpb	%cl, 15(%edx)
2256	jne	L(neq)
2257	test	%cl, %cl
2258	je	L(eq)
2259
2260	POP	(%ebp)
2261	xor	%eax, %eax
2262	ret
2263#endif
2264
2265END (ssse3_strcmp_latest)
2266