• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#include <private/bionic_asm.h>
32
33#include "cache.h"
34
35#ifndef L
36# define L(label)	.L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n)	.p2align n
41#endif
42
43#define CFI_PUSH(REG)						\
44  .cfi_adjust_cfa_offset 4;					\
45  .cfi_rel_offset REG, 0
46
47#define CFI_POP(REG)						\
48  .cfi_adjust_cfa_offset -4;					\
49  .cfi_restore REG
50
51#define PUSH(REG)	pushl REG; CFI_PUSH(REG)
52#define POP(REG)	popl REG; CFI_POP(REG)
53
54#define PARMS 8 /* Preserve EBX. */
55#define DST PARMS
56#define CHR (DST+4)
57#define LEN (CHR+4)
58#define CHK_DST_LEN (LEN+4)
59#define SETRTNVAL	movl DST(%esp), %eax
60
61# define ENTRANCE	PUSH(%ebx);
62# define RETURN_END	POP(%ebx); ret
63# define RETURN		RETURN_END; CFI_PUSH(%ebx)
64# define JMPTBL(I, B)	I - B
65
66/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
67   jump table with relative offsets.   */
68# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
69    /* We first load PC into EBX.  */				\
70    call	__x86.get_pc_thunk.bx;				\
71    /* Get the address of the jump table.  */			\
72    add		$(TABLE - .), %ebx;				\
73    /* Get the entry and convert the relative offset to the	\
74       absolute address.  */					\
75    add		(%ebx,%ecx,4), %ebx;				\
76    add		%ecx, %edx;					\
77    /* We loaded the jump table and adjusted EDX. Go.  */	\
78    jmp		*%ebx
79
80	.section	.gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
81	.globl	__x86.get_pc_thunk.bx
82	.hidden	__x86.get_pc_thunk.bx
83	ALIGN(4)
84	.type	__x86.get_pc_thunk.bx,@function
85__x86.get_pc_thunk.bx:
86	movl	(%esp), %ebx
87	ret
88
89ENTRY(__memset_chk_generic)
90  ENTRANCE
91
92  movl LEN(%esp), %ecx
93  cmpl CHK_DST_LEN(%esp), %ecx
94  jna L(memset_length_loaded)
95
96  POP(%ebx) // Undo ENTRANCE without returning.
97  jmp __memset_chk_fail
98END(__memset_chk_generic)
99
100	.section .text.sse2,"ax",@progbits
101	ALIGN(4)
102ENTRY(memset_generic)
103	ENTRANCE
104
105	movl	LEN(%esp), %ecx
106L(memset_length_loaded):
107	cmp	$0, %ecx
108	ja	L(1byteormore)
109	SETRTNVAL
110	RETURN
111
112L(1byteormore):
113	movzbl	CHR(%esp), %eax
114	movb	%al, %ah
115	/* Fill the whole EAX with pattern.  */
116	movl	%eax, %edx
117	shl	 $16, %eax
118	or	%edx, %eax
119	movl	DST(%esp), %edx
120	cmp	$1, %ecx
121	je	L(1byte)
122	cmp	$16, %ecx
123	jae	L(16bytesormore)
124
125	cmp	$4, %ecx
126	jb	L(4bytesless)
127	movl	%eax, (%edx)
128	movl	%eax, -4(%edx, %ecx)
129	cmp	$8, %ecx
130	jb	L(8bytesless)
131	movl	%eax, 4(%edx)
132	movl	%eax, -8(%edx, %ecx)
133L(8bytesless):
134	SETRTNVAL
135	RETURN
136
137L(4bytesless):
138	movw	%ax, (%edx)
139	movw	%ax, -2(%edx, %ecx)
140	SETRTNVAL
141	RETURN
142
143L(1byte):
144	movb	%al, (%edx)
145	SETRTNVAL
146	RETURN
147
148	ALIGN(4)
149L(16bytesormore):
150	movd	%eax, %xmm0
151	pshufd	$0, %xmm0, %xmm0
152
153	cmp	$64, %ecx
154	ja	L(64bytesmore)
155	movdqu	%xmm0, (%edx)
156	movdqu	%xmm0, -16(%edx, %ecx)
157	cmp	$32, %ecx
158	jbe	L(32bytesless)
159	movdqu	%xmm0, 16(%edx)
160	movdqu	%xmm0, -32(%edx, %ecx)
161L(32bytesless):
162	SETRTNVAL
163	RETURN
164
165L(64bytesmore):
166	testl	$0xf, %edx
167	jz	L(aligned_16)
168L(not_aligned_16):
169	movdqu	%xmm0, (%edx)
170	movl	%edx, %eax
171	and	$-16, %edx
172	add	$16, %edx
173	sub	%edx, %eax
174	add	%eax, %ecx
175	movd	%xmm0, %eax
176
177	ALIGN(4)
178L(aligned_16):
179	cmp	$128, %ecx
180	jae	L(128bytesormore)
181
182L(aligned_16_less128bytes):
183	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
184
185	ALIGN(4)
186L(128bytesormore):
187	PUSH(%ebx)
188	mov	$SHARED_CACHE_SIZE, %ebx
189	cmp	%ebx, %ecx
190	jae	L(128bytesormore_nt_start)
191
192	POP(%ebx)
193
194	PUSH(%ebx)
195	mov	$DATA_CACHE_SIZE, %ebx
196
197	cmp	%ebx, %ecx
198	jae	L(128bytes_L2_normal)
199	subl	$128, %ecx
200L(128bytesormore_normal):
201	sub	$128, %ecx
202	movdqa	%xmm0, (%edx)
203	movaps	%xmm0, 0x10(%edx)
204	movaps	%xmm0, 0x20(%edx)
205	movaps	%xmm0, 0x30(%edx)
206	movaps	%xmm0, 0x40(%edx)
207	movaps	%xmm0, 0x50(%edx)
208	movaps	%xmm0, 0x60(%edx)
209	movaps	%xmm0, 0x70(%edx)
210	lea	128(%edx), %edx
211	jb	L(128bytesless_normal)
212
213
214	sub	$128, %ecx
215	movdqa	%xmm0, (%edx)
216	movaps	%xmm0, 0x10(%edx)
217	movaps	%xmm0, 0x20(%edx)
218	movaps	%xmm0, 0x30(%edx)
219	movaps	%xmm0, 0x40(%edx)
220	movaps	%xmm0, 0x50(%edx)
221	movaps	%xmm0, 0x60(%edx)
222	movaps	%xmm0, 0x70(%edx)
223	lea	128(%edx), %edx
224	jae	L(128bytesormore_normal)
225
226L(128bytesless_normal):
227	lea	128(%ecx), %ecx
228	POP(%ebx)
229	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
230
231	ALIGN(4)
232L(128bytes_L2_normal):
233	prefetchnta	0x380(%edx)
234	prefetchnta	0x3c0(%edx)
235	sub	$128, %ecx
236	movdqa	%xmm0, (%edx)
237	movaps	%xmm0, 0x10(%edx)
238	movaps	%xmm0, 0x20(%edx)
239	movaps	%xmm0, 0x30(%edx)
240	movaps	%xmm0, 0x40(%edx)
241	movaps	%xmm0, 0x50(%edx)
242	movaps	%xmm0, 0x60(%edx)
243	movaps	%xmm0, 0x70(%edx)
244	add	$128, %edx
245	cmp	$128, %ecx
246	jae	L(128bytes_L2_normal)
247
248L(128bytesless_L2_normal):
249	POP(%ebx)
250	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
251
252L(128bytesormore_nt_start):
253	sub	%ebx, %ecx
254	ALIGN(4)
255L(128bytesormore_shared_cache_loop):
256	prefetchnta	0x3c0(%edx)
257	prefetchnta	0x380(%edx)
258	sub	$0x80, %ebx
259	movdqa	%xmm0, (%edx)
260	movaps	%xmm0, 0x10(%edx)
261	movaps	%xmm0, 0x20(%edx)
262	movaps	%xmm0, 0x30(%edx)
263	movaps	%xmm0, 0x40(%edx)
264	movaps	%xmm0, 0x50(%edx)
265	movaps	%xmm0, 0x60(%edx)
266	movaps	%xmm0, 0x70(%edx)
267	add	$0x80, %edx
268	cmp	$0x80, %ebx
269	jae	L(128bytesormore_shared_cache_loop)
270	cmp	$0x80, %ecx
271	jb	L(shared_cache_loop_end)
272	ALIGN(4)
273L(128bytesormore_nt):
274	sub	$0x80, %ecx
275	movntdq	%xmm0, (%edx)
276	movntdq	%xmm0, 0x10(%edx)
277	movntdq	%xmm0, 0x20(%edx)
278	movntdq	%xmm0, 0x30(%edx)
279	movntdq	%xmm0, 0x40(%edx)
280	movntdq	%xmm0, 0x50(%edx)
281	movntdq	%xmm0, 0x60(%edx)
282	movntdq	%xmm0, 0x70(%edx)
283	add	$0x80, %edx
284	cmp	$0x80, %ecx
285	jae	L(128bytesormore_nt)
286	sfence
287L(shared_cache_loop_end):
288	POP(%ebx)
289	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
290
291
292	.pushsection .rodata.sse2,"a",@progbits
293	ALIGN(2)
294L(table_16_128bytes):
295	.int	JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes))
296	.int	JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes))
297	.int	JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes))
298	.int	JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes))
299	.int	JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes))
300	.int	JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes))
301	.int	JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes))
302	.int	JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes))
303	.int	JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes))
304	.int	JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes))
305	.int	JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes))
306	.int	JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes))
307	.int	JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes))
308	.int	JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes))
309	.int	JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes))
310	.int	JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes))
311	.int	JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes))
312	.int	JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes))
313	.int	JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes))
314	.int	JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes))
315	.int	JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes))
316	.int	JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes))
317	.int	JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes))
318	.int	JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes))
319	.int	JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes))
320	.int	JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes))
321	.int	JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes))
322	.int	JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes))
323	.int	JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes))
324	.int	JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes))
325	.int	JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes))
326	.int	JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes))
327	.int	JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes))
328	.int	JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes))
329	.int	JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes))
330	.int	JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes))
331	.int	JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes))
332	.int	JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes))
333	.int	JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes))
334	.int	JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes))
335	.int	JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes))
336	.int	JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes))
337	.int	JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes))
338	.int	JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes))
339	.int	JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes))
340	.int	JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes))
341	.int	JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes))
342	.int	JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes))
343	.int	JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes))
344	.int	JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes))
345	.int	JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes))
346	.int	JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes))
347	.int	JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes))
348	.int	JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes))
349	.int	JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes))
350	.int	JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes))
351	.int	JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes))
352	.int	JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes))
353	.int	JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes))
354	.int	JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes))
355	.int	JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes))
356	.int	JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes))
357	.int	JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes))
358	.int	JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes))
359	.int	JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes))
360	.int	JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes))
361	.int	JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes))
362	.int	JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes))
363	.int	JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes))
364	.int	JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes))
365	.int	JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes))
366	.int	JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes))
367	.int	JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes))
368	.int	JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes))
369	.int	JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes))
370	.int	JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes))
371	.int	JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes))
372	.int	JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes))
373	.int	JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes))
374	.int	JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes))
375	.int	JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes))
376	.int	JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes))
377	.int	JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes))
378	.int	JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes))
379	.int	JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes))
380	.int	JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes))
381	.int	JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes))
382	.int	JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes))
383	.int	JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes))
384	.int	JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes))
385	.int	JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes))
386	.int	JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes))
387	.int	JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes))
388	.int	JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes))
389	.int	JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes))
390	.int	JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes))
391	.int	JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes))
392	.int	JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes))
393	.int	JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes))
394	.int	JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes))
395	.int	JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes))
396	.int	JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes))
397	.int	JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes))
398	.int	JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes))
399	.int	JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes))
400	.int	JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes))
401	.int	JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes))
402	.int	JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes))
403	.int	JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes))
404	.int	JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes))
405	.int	JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes))
406	.int	JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes))
407	.int	JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes))
408	.int	JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes))
409	.int	JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes))
410	.int	JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes))
411	.int	JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes))
412	.int	JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes))
413	.int	JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes))
414	.int	JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes))
415	.int	JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes))
416	.int	JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes))
417	.int	JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes))
418	.int	JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes))
419	.int	JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes))
420	.int	JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes))
421	.int	JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes))
422	.int	JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes))
423	.popsection
424
425	ALIGN(4)
426L(aligned_16_112bytes):
427	movdqa	%xmm0, -112(%edx)
428L(aligned_16_96bytes):
429	movdqa	%xmm0, -96(%edx)
430L(aligned_16_80bytes):
431	movdqa	%xmm0, -80(%edx)
432L(aligned_16_64bytes):
433	movdqa	%xmm0, -64(%edx)
434L(aligned_16_48bytes):
435	movdqa	%xmm0, -48(%edx)
436L(aligned_16_32bytes):
437	movdqa	%xmm0, -32(%edx)
438L(aligned_16_16bytes):
439	movdqa	%xmm0, -16(%edx)
440L(aligned_16_0bytes):
441	SETRTNVAL
442	RETURN
443
444	ALIGN(4)
445L(aligned_16_113bytes):
446	movdqa	%xmm0, -113(%edx)
447L(aligned_16_97bytes):
448	movdqa	%xmm0, -97(%edx)
449L(aligned_16_81bytes):
450	movdqa	%xmm0, -81(%edx)
451L(aligned_16_65bytes):
452	movdqa	%xmm0, -65(%edx)
453L(aligned_16_49bytes):
454	movdqa	%xmm0, -49(%edx)
455L(aligned_16_33bytes):
456	movdqa	%xmm0, -33(%edx)
457L(aligned_16_17bytes):
458	movdqa	%xmm0, -17(%edx)
459L(aligned_16_1bytes):
460	movb	%al, -1(%edx)
461	SETRTNVAL
462	RETURN
463
464	ALIGN(4)
465L(aligned_16_114bytes):
466	movdqa	%xmm0, -114(%edx)
467L(aligned_16_98bytes):
468	movdqa	%xmm0, -98(%edx)
469L(aligned_16_82bytes):
470	movdqa	%xmm0, -82(%edx)
471L(aligned_16_66bytes):
472	movdqa	%xmm0, -66(%edx)
473L(aligned_16_50bytes):
474	movdqa	%xmm0, -50(%edx)
475L(aligned_16_34bytes):
476	movdqa	%xmm0, -34(%edx)
477L(aligned_16_18bytes):
478	movdqa	%xmm0, -18(%edx)
479L(aligned_16_2bytes):
480	movw	%ax, -2(%edx)
481	SETRTNVAL
482	RETURN
483
484	ALIGN(4)
485L(aligned_16_115bytes):
486	movdqa	%xmm0, -115(%edx)
487L(aligned_16_99bytes):
488	movdqa	%xmm0, -99(%edx)
489L(aligned_16_83bytes):
490	movdqa	%xmm0, -83(%edx)
491L(aligned_16_67bytes):
492	movdqa	%xmm0, -67(%edx)
493L(aligned_16_51bytes):
494	movdqa	%xmm0, -51(%edx)
495L(aligned_16_35bytes):
496	movdqa	%xmm0, -35(%edx)
497L(aligned_16_19bytes):
498	movdqa	%xmm0, -19(%edx)
499L(aligned_16_3bytes):
500	movw	%ax, -3(%edx)
501	movb	%al, -1(%edx)
502	SETRTNVAL
503	RETURN
504
505	ALIGN(4)
506L(aligned_16_116bytes):
507	movdqa	%xmm0, -116(%edx)
508L(aligned_16_100bytes):
509	movdqa	%xmm0, -100(%edx)
510L(aligned_16_84bytes):
511	movdqa	%xmm0, -84(%edx)
512L(aligned_16_68bytes):
513	movdqa	%xmm0, -68(%edx)
514L(aligned_16_52bytes):
515	movdqa	%xmm0, -52(%edx)
516L(aligned_16_36bytes):
517	movdqa	%xmm0, -36(%edx)
518L(aligned_16_20bytes):
519	movdqa	%xmm0, -20(%edx)
520L(aligned_16_4bytes):
521	movl	%eax, -4(%edx)
522	SETRTNVAL
523	RETURN
524
525	ALIGN(4)
526L(aligned_16_117bytes):
527	movdqa	%xmm0, -117(%edx)
528L(aligned_16_101bytes):
529	movdqa	%xmm0, -101(%edx)
530L(aligned_16_85bytes):
531	movdqa	%xmm0, -85(%edx)
532L(aligned_16_69bytes):
533	movdqa	%xmm0, -69(%edx)
534L(aligned_16_53bytes):
535	movdqa	%xmm0, -53(%edx)
536L(aligned_16_37bytes):
537	movdqa	%xmm0, -37(%edx)
538L(aligned_16_21bytes):
539	movdqa	%xmm0, -21(%edx)
540L(aligned_16_5bytes):
541	movl	%eax, -5(%edx)
542	movb	%al, -1(%edx)
543	SETRTNVAL
544	RETURN
545
546	ALIGN(4)
547L(aligned_16_118bytes):
548	movdqa	%xmm0, -118(%edx)
549L(aligned_16_102bytes):
550	movdqa	%xmm0, -102(%edx)
551L(aligned_16_86bytes):
552	movdqa	%xmm0, -86(%edx)
553L(aligned_16_70bytes):
554	movdqa	%xmm0, -70(%edx)
555L(aligned_16_54bytes):
556	movdqa	%xmm0, -54(%edx)
557L(aligned_16_38bytes):
558	movdqa	%xmm0, -38(%edx)
559L(aligned_16_22bytes):
560	movdqa	%xmm0, -22(%edx)
561L(aligned_16_6bytes):
562	movl	%eax, -6(%edx)
563	movw	%ax, -2(%edx)
564	SETRTNVAL
565	RETURN
566
567	ALIGN(4)
568L(aligned_16_119bytes):
569	movdqa	%xmm0, -119(%edx)
570L(aligned_16_103bytes):
571	movdqa	%xmm0, -103(%edx)
572L(aligned_16_87bytes):
573	movdqa	%xmm0, -87(%edx)
574L(aligned_16_71bytes):
575	movdqa	%xmm0, -71(%edx)
576L(aligned_16_55bytes):
577	movdqa	%xmm0, -55(%edx)
578L(aligned_16_39bytes):
579	movdqa	%xmm0, -39(%edx)
580L(aligned_16_23bytes):
581	movdqa	%xmm0, -23(%edx)
582L(aligned_16_7bytes):
583	movl	%eax, -7(%edx)
584	movw	%ax, -3(%edx)
585	movb	%al, -1(%edx)
586	SETRTNVAL
587	RETURN
588
589	ALIGN(4)
590L(aligned_16_120bytes):
591	movdqa	%xmm0, -120(%edx)
592L(aligned_16_104bytes):
593	movdqa	%xmm0, -104(%edx)
594L(aligned_16_88bytes):
595	movdqa	%xmm0, -88(%edx)
596L(aligned_16_72bytes):
597	movdqa	%xmm0, -72(%edx)
598L(aligned_16_56bytes):
599	movdqa	%xmm0, -56(%edx)
600L(aligned_16_40bytes):
601	movdqa	%xmm0, -40(%edx)
602L(aligned_16_24bytes):
603	movdqa	%xmm0, -24(%edx)
604L(aligned_16_8bytes):
605	movq	%xmm0, -8(%edx)
606	SETRTNVAL
607	RETURN
608
609	ALIGN(4)
610L(aligned_16_121bytes):
611	movdqa	%xmm0, -121(%edx)
612L(aligned_16_105bytes):
613	movdqa	%xmm0, -105(%edx)
614L(aligned_16_89bytes):
615	movdqa	%xmm0, -89(%edx)
616L(aligned_16_73bytes):
617	movdqa	%xmm0, -73(%edx)
618L(aligned_16_57bytes):
619	movdqa	%xmm0, -57(%edx)
620L(aligned_16_41bytes):
621	movdqa	%xmm0, -41(%edx)
622L(aligned_16_25bytes):
623	movdqa	%xmm0, -25(%edx)
624L(aligned_16_9bytes):
625	movq	%xmm0, -9(%edx)
626	movb	%al, -1(%edx)
627	SETRTNVAL
628	RETURN
629
630	ALIGN(4)
631L(aligned_16_122bytes):
632	movdqa	%xmm0, -122(%edx)
633L(aligned_16_106bytes):
634	movdqa	%xmm0, -106(%edx)
635L(aligned_16_90bytes):
636	movdqa	%xmm0, -90(%edx)
637L(aligned_16_74bytes):
638	movdqa	%xmm0, -74(%edx)
639L(aligned_16_58bytes):
640	movdqa	%xmm0, -58(%edx)
641L(aligned_16_42bytes):
642	movdqa	%xmm0, -42(%edx)
643L(aligned_16_26bytes):
644	movdqa	%xmm0, -26(%edx)
645L(aligned_16_10bytes):
646	movq	%xmm0, -10(%edx)
647	movw	%ax, -2(%edx)
648	SETRTNVAL
649	RETURN
650
651	ALIGN(4)
652L(aligned_16_123bytes):
653	movdqa	%xmm0, -123(%edx)
654L(aligned_16_107bytes):
655	movdqa	%xmm0, -107(%edx)
656L(aligned_16_91bytes):
657	movdqa	%xmm0, -91(%edx)
658L(aligned_16_75bytes):
659	movdqa	%xmm0, -75(%edx)
660L(aligned_16_59bytes):
661	movdqa	%xmm0, -59(%edx)
662L(aligned_16_43bytes):
663	movdqa	%xmm0, -43(%edx)
664L(aligned_16_27bytes):
665	movdqa	%xmm0, -27(%edx)
666L(aligned_16_11bytes):
667	movq	%xmm0, -11(%edx)
668	movw	%ax, -3(%edx)
669	movb	%al, -1(%edx)
670	SETRTNVAL
671	RETURN
672
673	ALIGN(4)
674L(aligned_16_124bytes):
675	movdqa	%xmm0, -124(%edx)
676L(aligned_16_108bytes):
677	movdqa	%xmm0, -108(%edx)
678L(aligned_16_92bytes):
679	movdqa	%xmm0, -92(%edx)
680L(aligned_16_76bytes):
681	movdqa	%xmm0, -76(%edx)
682L(aligned_16_60bytes):
683	movdqa	%xmm0, -60(%edx)
684L(aligned_16_44bytes):
685	movdqa	%xmm0, -44(%edx)
686L(aligned_16_28bytes):
687	movdqa	%xmm0, -28(%edx)
688L(aligned_16_12bytes):
689	movq	%xmm0, -12(%edx)
690	movl	%eax, -4(%edx)
691	SETRTNVAL
692	RETURN
693
694	ALIGN(4)
695L(aligned_16_125bytes):
696	movdqa	%xmm0, -125(%edx)
697L(aligned_16_109bytes):
698	movdqa	%xmm0, -109(%edx)
699L(aligned_16_93bytes):
700	movdqa	%xmm0, -93(%edx)
701L(aligned_16_77bytes):
702	movdqa	%xmm0, -77(%edx)
703L(aligned_16_61bytes):
704	movdqa	%xmm0, -61(%edx)
705L(aligned_16_45bytes):
706	movdqa	%xmm0, -45(%edx)
707L(aligned_16_29bytes):
708	movdqa	%xmm0, -29(%edx)
709L(aligned_16_13bytes):
710	movq	%xmm0, -13(%edx)
711	movl	%eax, -5(%edx)
712	movb	%al, -1(%edx)
713	SETRTNVAL
714	RETURN
715
716	ALIGN(4)
717L(aligned_16_126bytes):
718	movdqa	%xmm0, -126(%edx)
719L(aligned_16_110bytes):
720	movdqa	%xmm0, -110(%edx)
721L(aligned_16_94bytes):
722	movdqa	%xmm0, -94(%edx)
723L(aligned_16_78bytes):
724	movdqa	%xmm0, -78(%edx)
725L(aligned_16_62bytes):
726	movdqa	%xmm0, -62(%edx)
727L(aligned_16_46bytes):
728	movdqa	%xmm0, -46(%edx)
729L(aligned_16_30bytes):
730	movdqa	%xmm0, -30(%edx)
731L(aligned_16_14bytes):
732	movq	%xmm0, -14(%edx)
733	movl	%eax, -6(%edx)
734	movw	%ax, -2(%edx)
735	SETRTNVAL
736	RETURN
737
738	ALIGN(4)
739L(aligned_16_127bytes):
740	movdqa	%xmm0, -127(%edx)
741L(aligned_16_111bytes):
742	movdqa	%xmm0, -111(%edx)
743L(aligned_16_95bytes):
744	movdqa	%xmm0, -95(%edx)
745L(aligned_16_79bytes):
746	movdqa	%xmm0, -79(%edx)
747L(aligned_16_63bytes):
748	movdqa	%xmm0, -63(%edx)
749L(aligned_16_47bytes):
750	movdqa	%xmm0, -47(%edx)
751L(aligned_16_31bytes):
752	movdqa	%xmm0, -31(%edx)
753L(aligned_16_15bytes):
754	movq	%xmm0, -15(%edx)
755	movl	%eax, -7(%edx)
756	movw	%ax, -3(%edx)
757	movb	%al, -1(%edx)
758	SETRTNVAL
759	RETURN_END
760
761END(memset_generic)
762