• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%ifdef BORINGSSL_PREFIX
5%include "boringssl_prefix_symbols_nasm.inc"
6%endif
7%ifidn __OUTPUT_FORMAT__,obj
8section	code	use32 class=code align=64
9%elifidn __OUTPUT_FORMAT__,win32
10%ifdef __YASM_VERSION_ID__
11%if __YASM_VERSION_ID__ < 01010000h
12%error yasm version 1.1.0 or later needed.
13%endif
14; Yasm automatically includes .00 and complains about redefining it.
15; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
16%else
17$@feat.00 equ 1
18%endif
19section	.text	code align=64
20%else
21section	.text	code
22%endif
23global	_ChaCha20_ctr32
24align	16
25_ChaCha20_ctr32:
26L$_ChaCha20_ctr32_begin:
27	push	ebp
28	push	ebx
29	push	esi
30	push	edi
31	xor	eax,eax
32	cmp	eax,DWORD [28+esp]
33	je	NEAR L$000no_data
34	call	L$pic_point
35L$pic_point:
36	pop	eax
37	lea	ebp,[_OPENSSL_ia32cap_P]
38	test	DWORD [ebp],16777216
39	jz	NEAR L$001x86
40	test	DWORD [4+ebp],512
41	jz	NEAR L$001x86
42	jmp	NEAR L$ssse3_shortcut
43L$001x86:
44	mov	esi,DWORD [32+esp]
45	mov	edi,DWORD [36+esp]
46	sub	esp,132
47	mov	eax,DWORD [esi]
48	mov	ebx,DWORD [4+esi]
49	mov	ecx,DWORD [8+esi]
50	mov	edx,DWORD [12+esi]
51	mov	DWORD [80+esp],eax
52	mov	DWORD [84+esp],ebx
53	mov	DWORD [88+esp],ecx
54	mov	DWORD [92+esp],edx
55	mov	eax,DWORD [16+esi]
56	mov	ebx,DWORD [20+esi]
57	mov	ecx,DWORD [24+esi]
58	mov	edx,DWORD [28+esi]
59	mov	DWORD [96+esp],eax
60	mov	DWORD [100+esp],ebx
61	mov	DWORD [104+esp],ecx
62	mov	DWORD [108+esp],edx
63	mov	eax,DWORD [edi]
64	mov	ebx,DWORD [4+edi]
65	mov	ecx,DWORD [8+edi]
66	mov	edx,DWORD [12+edi]
67	sub	eax,1
68	mov	DWORD [112+esp],eax
69	mov	DWORD [116+esp],ebx
70	mov	DWORD [120+esp],ecx
71	mov	DWORD [124+esp],edx
72	jmp	NEAR L$002entry
73align	16
74L$003outer_loop:
75	mov	DWORD [156+esp],ebx
76	mov	DWORD [152+esp],eax
77	mov	DWORD [160+esp],ecx
78L$002entry:
79	mov	eax,1634760805
80	mov	DWORD [4+esp],857760878
81	mov	DWORD [8+esp],2036477234
82	mov	DWORD [12+esp],1797285236
83	mov	ebx,DWORD [84+esp]
84	mov	ebp,DWORD [88+esp]
85	mov	ecx,DWORD [104+esp]
86	mov	esi,DWORD [108+esp]
87	mov	edx,DWORD [116+esp]
88	mov	edi,DWORD [120+esp]
89	mov	DWORD [20+esp],ebx
90	mov	DWORD [24+esp],ebp
91	mov	DWORD [40+esp],ecx
92	mov	DWORD [44+esp],esi
93	mov	DWORD [52+esp],edx
94	mov	DWORD [56+esp],edi
95	mov	ebx,DWORD [92+esp]
96	mov	edi,DWORD [124+esp]
97	mov	edx,DWORD [112+esp]
98	mov	ebp,DWORD [80+esp]
99	mov	ecx,DWORD [96+esp]
100	mov	esi,DWORD [100+esp]
101	add	edx,1
102	mov	DWORD [28+esp],ebx
103	mov	DWORD [60+esp],edi
104	mov	DWORD [112+esp],edx
105	mov	ebx,10
106	jmp	NEAR L$004loop
107align	16
108L$004loop:
109	add	eax,ebp
110	mov	DWORD [128+esp],ebx
111	mov	ebx,ebp
112	xor	edx,eax
113	rol	edx,16
114	add	ecx,edx
115	xor	ebx,ecx
116	mov	edi,DWORD [52+esp]
117	rol	ebx,12
118	mov	ebp,DWORD [20+esp]
119	add	eax,ebx
120	xor	edx,eax
121	mov	DWORD [esp],eax
122	rol	edx,8
123	mov	eax,DWORD [4+esp]
124	add	ecx,edx
125	mov	DWORD [48+esp],edx
126	xor	ebx,ecx
127	add	eax,ebp
128	rol	ebx,7
129	xor	edi,eax
130	mov	DWORD [32+esp],ecx
131	rol	edi,16
132	mov	DWORD [16+esp],ebx
133	add	esi,edi
134	mov	ecx,DWORD [40+esp]
135	xor	ebp,esi
136	mov	edx,DWORD [56+esp]
137	rol	ebp,12
138	mov	ebx,DWORD [24+esp]
139	add	eax,ebp
140	xor	edi,eax
141	mov	DWORD [4+esp],eax
142	rol	edi,8
143	mov	eax,DWORD [8+esp]
144	add	esi,edi
145	mov	DWORD [52+esp],edi
146	xor	ebp,esi
147	add	eax,ebx
148	rol	ebp,7
149	xor	edx,eax
150	mov	DWORD [36+esp],esi
151	rol	edx,16
152	mov	DWORD [20+esp],ebp
153	add	ecx,edx
154	mov	esi,DWORD [44+esp]
155	xor	ebx,ecx
156	mov	edi,DWORD [60+esp]
157	rol	ebx,12
158	mov	ebp,DWORD [28+esp]
159	add	eax,ebx
160	xor	edx,eax
161	mov	DWORD [8+esp],eax
162	rol	edx,8
163	mov	eax,DWORD [12+esp]
164	add	ecx,edx
165	mov	DWORD [56+esp],edx
166	xor	ebx,ecx
167	add	eax,ebp
168	rol	ebx,7
169	xor	edi,eax
170	rol	edi,16
171	mov	DWORD [24+esp],ebx
172	add	esi,edi
173	xor	ebp,esi
174	rol	ebp,12
175	mov	ebx,DWORD [20+esp]
176	add	eax,ebp
177	xor	edi,eax
178	mov	DWORD [12+esp],eax
179	rol	edi,8
180	mov	eax,DWORD [esp]
181	add	esi,edi
182	mov	edx,edi
183	xor	ebp,esi
184	add	eax,ebx
185	rol	ebp,7
186	xor	edx,eax
187	rol	edx,16
188	mov	DWORD [28+esp],ebp
189	add	ecx,edx
190	xor	ebx,ecx
191	mov	edi,DWORD [48+esp]
192	rol	ebx,12
193	mov	ebp,DWORD [24+esp]
194	add	eax,ebx
195	xor	edx,eax
196	mov	DWORD [esp],eax
197	rol	edx,8
198	mov	eax,DWORD [4+esp]
199	add	ecx,edx
200	mov	DWORD [60+esp],edx
201	xor	ebx,ecx
202	add	eax,ebp
203	rol	ebx,7
204	xor	edi,eax
205	mov	DWORD [40+esp],ecx
206	rol	edi,16
207	mov	DWORD [20+esp],ebx
208	add	esi,edi
209	mov	ecx,DWORD [32+esp]
210	xor	ebp,esi
211	mov	edx,DWORD [52+esp]
212	rol	ebp,12
213	mov	ebx,DWORD [28+esp]
214	add	eax,ebp
215	xor	edi,eax
216	mov	DWORD [4+esp],eax
217	rol	edi,8
218	mov	eax,DWORD [8+esp]
219	add	esi,edi
220	mov	DWORD [48+esp],edi
221	xor	ebp,esi
222	add	eax,ebx
223	rol	ebp,7
224	xor	edx,eax
225	mov	DWORD [44+esp],esi
226	rol	edx,16
227	mov	DWORD [24+esp],ebp
228	add	ecx,edx
229	mov	esi,DWORD [36+esp]
230	xor	ebx,ecx
231	mov	edi,DWORD [56+esp]
232	rol	ebx,12
233	mov	ebp,DWORD [16+esp]
234	add	eax,ebx
235	xor	edx,eax
236	mov	DWORD [8+esp],eax
237	rol	edx,8
238	mov	eax,DWORD [12+esp]
239	add	ecx,edx
240	mov	DWORD [52+esp],edx
241	xor	ebx,ecx
242	add	eax,ebp
243	rol	ebx,7
244	xor	edi,eax
245	rol	edi,16
246	mov	DWORD [28+esp],ebx
247	add	esi,edi
248	xor	ebp,esi
249	mov	edx,DWORD [48+esp]
250	rol	ebp,12
251	mov	ebx,DWORD [128+esp]
252	add	eax,ebp
253	xor	edi,eax
254	mov	DWORD [12+esp],eax
255	rol	edi,8
256	mov	eax,DWORD [esp]
257	add	esi,edi
258	mov	DWORD [56+esp],edi
259	xor	ebp,esi
260	rol	ebp,7
261	dec	ebx
262	jnz	NEAR L$004loop
263	mov	ebx,DWORD [160+esp]
264	add	eax,1634760805
265	add	ebp,DWORD [80+esp]
266	add	ecx,DWORD [96+esp]
267	add	esi,DWORD [100+esp]
268	cmp	ebx,64
269	jb	NEAR L$005tail
270	mov	ebx,DWORD [156+esp]
271	add	edx,DWORD [112+esp]
272	add	edi,DWORD [120+esp]
273	xor	eax,DWORD [ebx]
274	xor	ebp,DWORD [16+ebx]
275	mov	DWORD [esp],eax
276	mov	eax,DWORD [152+esp]
277	xor	ecx,DWORD [32+ebx]
278	xor	esi,DWORD [36+ebx]
279	xor	edx,DWORD [48+ebx]
280	xor	edi,DWORD [56+ebx]
281	mov	DWORD [16+eax],ebp
282	mov	DWORD [32+eax],ecx
283	mov	DWORD [36+eax],esi
284	mov	DWORD [48+eax],edx
285	mov	DWORD [56+eax],edi
286	mov	ebp,DWORD [4+esp]
287	mov	ecx,DWORD [8+esp]
288	mov	esi,DWORD [12+esp]
289	mov	edx,DWORD [20+esp]
290	mov	edi,DWORD [24+esp]
291	add	ebp,857760878
292	add	ecx,2036477234
293	add	esi,1797285236
294	add	edx,DWORD [84+esp]
295	add	edi,DWORD [88+esp]
296	xor	ebp,DWORD [4+ebx]
297	xor	ecx,DWORD [8+ebx]
298	xor	esi,DWORD [12+ebx]
299	xor	edx,DWORD [20+ebx]
300	xor	edi,DWORD [24+ebx]
301	mov	DWORD [4+eax],ebp
302	mov	DWORD [8+eax],ecx
303	mov	DWORD [12+eax],esi
304	mov	DWORD [20+eax],edx
305	mov	DWORD [24+eax],edi
306	mov	ebp,DWORD [28+esp]
307	mov	ecx,DWORD [40+esp]
308	mov	esi,DWORD [44+esp]
309	mov	edx,DWORD [52+esp]
310	mov	edi,DWORD [60+esp]
311	add	ebp,DWORD [92+esp]
312	add	ecx,DWORD [104+esp]
313	add	esi,DWORD [108+esp]
314	add	edx,DWORD [116+esp]
315	add	edi,DWORD [124+esp]
316	xor	ebp,DWORD [28+ebx]
317	xor	ecx,DWORD [40+ebx]
318	xor	esi,DWORD [44+ebx]
319	xor	edx,DWORD [52+ebx]
320	xor	edi,DWORD [60+ebx]
321	lea	ebx,[64+ebx]
322	mov	DWORD [28+eax],ebp
323	mov	ebp,DWORD [esp]
324	mov	DWORD [40+eax],ecx
325	mov	ecx,DWORD [160+esp]
326	mov	DWORD [44+eax],esi
327	mov	DWORD [52+eax],edx
328	mov	DWORD [60+eax],edi
329	mov	DWORD [eax],ebp
330	lea	eax,[64+eax]
331	sub	ecx,64
332	jnz	NEAR L$003outer_loop
333	jmp	NEAR L$006done
334L$005tail:
335	add	edx,DWORD [112+esp]
336	add	edi,DWORD [120+esp]
337	mov	DWORD [esp],eax
338	mov	DWORD [16+esp],ebp
339	mov	DWORD [32+esp],ecx
340	mov	DWORD [36+esp],esi
341	mov	DWORD [48+esp],edx
342	mov	DWORD [56+esp],edi
343	mov	ebp,DWORD [4+esp]
344	mov	ecx,DWORD [8+esp]
345	mov	esi,DWORD [12+esp]
346	mov	edx,DWORD [20+esp]
347	mov	edi,DWORD [24+esp]
348	add	ebp,857760878
349	add	ecx,2036477234
350	add	esi,1797285236
351	add	edx,DWORD [84+esp]
352	add	edi,DWORD [88+esp]
353	mov	DWORD [4+esp],ebp
354	mov	DWORD [8+esp],ecx
355	mov	DWORD [12+esp],esi
356	mov	DWORD [20+esp],edx
357	mov	DWORD [24+esp],edi
358	mov	ebp,DWORD [28+esp]
359	mov	ecx,DWORD [40+esp]
360	mov	esi,DWORD [44+esp]
361	mov	edx,DWORD [52+esp]
362	mov	edi,DWORD [60+esp]
363	add	ebp,DWORD [92+esp]
364	add	ecx,DWORD [104+esp]
365	add	esi,DWORD [108+esp]
366	add	edx,DWORD [116+esp]
367	add	edi,DWORD [124+esp]
368	mov	DWORD [28+esp],ebp
369	mov	ebp,DWORD [156+esp]
370	mov	DWORD [40+esp],ecx
371	mov	ecx,DWORD [152+esp]
372	mov	DWORD [44+esp],esi
373	xor	esi,esi
374	mov	DWORD [52+esp],edx
375	mov	DWORD [60+esp],edi
376	xor	eax,eax
377	xor	edx,edx
378L$007tail_loop:
379	mov	al,BYTE [ebp*1+esi]
380	mov	dl,BYTE [esi*1+esp]
381	lea	esi,[1+esi]
382	xor	al,dl
383	mov	BYTE [esi*1+ecx-1],al
384	dec	ebx
385	jnz	NEAR L$007tail_loop
386L$006done:
387	add	esp,132
388L$000no_data:
389	pop	edi
390	pop	esi
391	pop	ebx
392	pop	ebp
393	ret
394global	_ChaCha20_ssse3
395align	16
396_ChaCha20_ssse3:
397L$_ChaCha20_ssse3_begin:
398	push	ebp
399	push	ebx
400	push	esi
401	push	edi
402L$ssse3_shortcut:
403	mov	edi,DWORD [20+esp]
404	mov	esi,DWORD [24+esp]
405	mov	ecx,DWORD [28+esp]
406	mov	edx,DWORD [32+esp]
407	mov	ebx,DWORD [36+esp]
408	mov	ebp,esp
409	sub	esp,524
410	and	esp,-64
411	mov	DWORD [512+esp],ebp
412	lea	eax,[(L$ssse3_data-L$pic_point)+eax]
413	movdqu	xmm3,[ebx]
414	cmp	ecx,256
415	jb	NEAR L$0081x
416	mov	DWORD [516+esp],edx
417	mov	DWORD [520+esp],ebx
418	sub	ecx,256
419	lea	ebp,[384+esp]
420	movdqu	xmm7,[edx]
421	pshufd	xmm0,xmm3,0
422	pshufd	xmm1,xmm3,85
423	pshufd	xmm2,xmm3,170
424	pshufd	xmm3,xmm3,255
425	paddd	xmm0,[48+eax]
426	pshufd	xmm4,xmm7,0
427	pshufd	xmm5,xmm7,85
428	psubd	xmm0,[64+eax]
429	pshufd	xmm6,xmm7,170
430	pshufd	xmm7,xmm7,255
431	movdqa	[64+ebp],xmm0
432	movdqa	[80+ebp],xmm1
433	movdqa	[96+ebp],xmm2
434	movdqa	[112+ebp],xmm3
435	movdqu	xmm3,[16+edx]
436	movdqa	[ebp-64],xmm4
437	movdqa	[ebp-48],xmm5
438	movdqa	[ebp-32],xmm6
439	movdqa	[ebp-16],xmm7
440	movdqa	xmm7,[32+eax]
441	lea	ebx,[128+esp]
442	pshufd	xmm0,xmm3,0
443	pshufd	xmm1,xmm3,85
444	pshufd	xmm2,xmm3,170
445	pshufd	xmm3,xmm3,255
446	pshufd	xmm4,xmm7,0
447	pshufd	xmm5,xmm7,85
448	pshufd	xmm6,xmm7,170
449	pshufd	xmm7,xmm7,255
450	movdqa	[ebp],xmm0
451	movdqa	[16+ebp],xmm1
452	movdqa	[32+ebp],xmm2
453	movdqa	[48+ebp],xmm3
454	movdqa	[ebp-128],xmm4
455	movdqa	[ebp-112],xmm5
456	movdqa	[ebp-96],xmm6
457	movdqa	[ebp-80],xmm7
458	lea	esi,[128+esi]
459	lea	edi,[128+edi]
460	jmp	NEAR L$009outer_loop
461align	16
462L$009outer_loop:
463	movdqa	xmm1,[ebp-112]
464	movdqa	xmm2,[ebp-96]
465	movdqa	xmm3,[ebp-80]
466	movdqa	xmm5,[ebp-48]
467	movdqa	xmm6,[ebp-32]
468	movdqa	xmm7,[ebp-16]
469	movdqa	[ebx-112],xmm1
470	movdqa	[ebx-96],xmm2
471	movdqa	[ebx-80],xmm3
472	movdqa	[ebx-48],xmm5
473	movdqa	[ebx-32],xmm6
474	movdqa	[ebx-16],xmm7
475	movdqa	xmm2,[32+ebp]
476	movdqa	xmm3,[48+ebp]
477	movdqa	xmm4,[64+ebp]
478	movdqa	xmm5,[80+ebp]
479	movdqa	xmm6,[96+ebp]
480	movdqa	xmm7,[112+ebp]
481	paddd	xmm4,[64+eax]
482	movdqa	[32+ebx],xmm2
483	movdqa	[48+ebx],xmm3
484	movdqa	[64+ebx],xmm4
485	movdqa	[80+ebx],xmm5
486	movdqa	[96+ebx],xmm6
487	movdqa	[112+ebx],xmm7
488	movdqa	[64+ebp],xmm4
489	movdqa	xmm0,[ebp-128]
490	movdqa	xmm6,xmm4
491	movdqa	xmm3,[ebp-64]
492	movdqa	xmm4,[ebp]
493	movdqa	xmm5,[16+ebp]
494	mov	edx,10
495	nop
496align	16
497L$010loop:
498	paddd	xmm0,xmm3
499	movdqa	xmm2,xmm3
500	pxor	xmm6,xmm0
501	pshufb	xmm6,[eax]
502	paddd	xmm4,xmm6
503	pxor	xmm2,xmm4
504	movdqa	xmm3,[ebx-48]
505	movdqa	xmm1,xmm2
506	pslld	xmm2,12
507	psrld	xmm1,20
508	por	xmm2,xmm1
509	movdqa	xmm1,[ebx-112]
510	paddd	xmm0,xmm2
511	movdqa	xmm7,[80+ebx]
512	pxor	xmm6,xmm0
513	movdqa	[ebx-128],xmm0
514	pshufb	xmm6,[16+eax]
515	paddd	xmm4,xmm6
516	movdqa	[64+ebx],xmm6
517	pxor	xmm2,xmm4
518	paddd	xmm1,xmm3
519	movdqa	xmm0,xmm2
520	pslld	xmm2,7
521	psrld	xmm0,25
522	pxor	xmm7,xmm1
523	por	xmm2,xmm0
524	movdqa	[ebx],xmm4
525	pshufb	xmm7,[eax]
526	movdqa	[ebx-64],xmm2
527	paddd	xmm5,xmm7
528	movdqa	xmm4,[32+ebx]
529	pxor	xmm3,xmm5
530	movdqa	xmm2,[ebx-32]
531	movdqa	xmm0,xmm3
532	pslld	xmm3,12
533	psrld	xmm0,20
534	por	xmm3,xmm0
535	movdqa	xmm0,[ebx-96]
536	paddd	xmm1,xmm3
537	movdqa	xmm6,[96+ebx]
538	pxor	xmm7,xmm1
539	movdqa	[ebx-112],xmm1
540	pshufb	xmm7,[16+eax]
541	paddd	xmm5,xmm7
542	movdqa	[80+ebx],xmm7
543	pxor	xmm3,xmm5
544	paddd	xmm0,xmm2
545	movdqa	xmm1,xmm3
546	pslld	xmm3,7
547	psrld	xmm1,25
548	pxor	xmm6,xmm0
549	por	xmm3,xmm1
550	movdqa	[16+ebx],xmm5
551	pshufb	xmm6,[eax]
552	movdqa	[ebx-48],xmm3
553	paddd	xmm4,xmm6
554	movdqa	xmm5,[48+ebx]
555	pxor	xmm2,xmm4
556	movdqa	xmm3,[ebx-16]
557	movdqa	xmm1,xmm2
558	pslld	xmm2,12
559	psrld	xmm1,20
560	por	xmm2,xmm1
561	movdqa	xmm1,[ebx-80]
562	paddd	xmm0,xmm2
563	movdqa	xmm7,[112+ebx]
564	pxor	xmm6,xmm0
565	movdqa	[ebx-96],xmm0
566	pshufb	xmm6,[16+eax]
567	paddd	xmm4,xmm6
568	movdqa	[96+ebx],xmm6
569	pxor	xmm2,xmm4
570	paddd	xmm1,xmm3
571	movdqa	xmm0,xmm2
572	pslld	xmm2,7
573	psrld	xmm0,25
574	pxor	xmm7,xmm1
575	por	xmm2,xmm0
576	pshufb	xmm7,[eax]
577	movdqa	[ebx-32],xmm2
578	paddd	xmm5,xmm7
579	pxor	xmm3,xmm5
580	movdqa	xmm2,[ebx-48]
581	movdqa	xmm0,xmm3
582	pslld	xmm3,12
583	psrld	xmm0,20
584	por	xmm3,xmm0
585	movdqa	xmm0,[ebx-128]
586	paddd	xmm1,xmm3
587	pxor	xmm7,xmm1
588	movdqa	[ebx-80],xmm1
589	pshufb	xmm7,[16+eax]
590	paddd	xmm5,xmm7
591	movdqa	xmm6,xmm7
592	pxor	xmm3,xmm5
593	paddd	xmm0,xmm2
594	movdqa	xmm1,xmm3
595	pslld	xmm3,7
596	psrld	xmm1,25
597	pxor	xmm6,xmm0
598	por	xmm3,xmm1
599	pshufb	xmm6,[eax]
600	movdqa	[ebx-16],xmm3
601	paddd	xmm4,xmm6
602	pxor	xmm2,xmm4
603	movdqa	xmm3,[ebx-32]
604	movdqa	xmm1,xmm2
605	pslld	xmm2,12
606	psrld	xmm1,20
607	por	xmm2,xmm1
608	movdqa	xmm1,[ebx-112]
609	paddd	xmm0,xmm2
610	movdqa	xmm7,[64+ebx]
611	pxor	xmm6,xmm0
612	movdqa	[ebx-128],xmm0
613	pshufb	xmm6,[16+eax]
614	paddd	xmm4,xmm6
615	movdqa	[112+ebx],xmm6
616	pxor	xmm2,xmm4
617	paddd	xmm1,xmm3
618	movdqa	xmm0,xmm2
619	pslld	xmm2,7
620	psrld	xmm0,25
621	pxor	xmm7,xmm1
622	por	xmm2,xmm0
623	movdqa	[32+ebx],xmm4
624	pshufb	xmm7,[eax]
625	movdqa	[ebx-48],xmm2
626	paddd	xmm5,xmm7
627	movdqa	xmm4,[ebx]
628	pxor	xmm3,xmm5
629	movdqa	xmm2,[ebx-16]
630	movdqa	xmm0,xmm3
631	pslld	xmm3,12
632	psrld	xmm0,20
633	por	xmm3,xmm0
634	movdqa	xmm0,[ebx-96]
635	paddd	xmm1,xmm3
636	movdqa	xmm6,[80+ebx]
637	pxor	xmm7,xmm1
638	movdqa	[ebx-112],xmm1
639	pshufb	xmm7,[16+eax]
640	paddd	xmm5,xmm7
641	movdqa	[64+ebx],xmm7
642	pxor	xmm3,xmm5
643	paddd	xmm0,xmm2
644	movdqa	xmm1,xmm3
645	pslld	xmm3,7
646	psrld	xmm1,25
647	pxor	xmm6,xmm0
648	por	xmm3,xmm1
649	movdqa	[48+ebx],xmm5
650	pshufb	xmm6,[eax]
651	movdqa	[ebx-32],xmm3
652	paddd	xmm4,xmm6
653	movdqa	xmm5,[16+ebx]
654	pxor	xmm2,xmm4
655	movdqa	xmm3,[ebx-64]
656	movdqa	xmm1,xmm2
657	pslld	xmm2,12
658	psrld	xmm1,20
659	por	xmm2,xmm1
660	movdqa	xmm1,[ebx-80]
661	paddd	xmm0,xmm2
662	movdqa	xmm7,[96+ebx]
663	pxor	xmm6,xmm0
664	movdqa	[ebx-96],xmm0
665	pshufb	xmm6,[16+eax]
666	paddd	xmm4,xmm6
667	movdqa	[80+ebx],xmm6
668	pxor	xmm2,xmm4
669	paddd	xmm1,xmm3
670	movdqa	xmm0,xmm2
671	pslld	xmm2,7
672	psrld	xmm0,25
673	pxor	xmm7,xmm1
674	por	xmm2,xmm0
675	pshufb	xmm7,[eax]
676	movdqa	[ebx-16],xmm2
677	paddd	xmm5,xmm7
678	pxor	xmm3,xmm5
679	movdqa	xmm0,xmm3
680	pslld	xmm3,12
681	psrld	xmm0,20
682	por	xmm3,xmm0
683	movdqa	xmm0,[ebx-128]
684	paddd	xmm1,xmm3
685	movdqa	xmm6,[64+ebx]
686	pxor	xmm7,xmm1
687	movdqa	[ebx-80],xmm1
688	pshufb	xmm7,[16+eax]
689	paddd	xmm5,xmm7
690	movdqa	[96+ebx],xmm7
691	pxor	xmm3,xmm5
692	movdqa	xmm1,xmm3
693	pslld	xmm3,7
694	psrld	xmm1,25
695	por	xmm3,xmm1
696	dec	edx
697	jnz	NEAR L$010loop
698	movdqa	[ebx-64],xmm3
699	movdqa	[ebx],xmm4
700	movdqa	[16+ebx],xmm5
701	movdqa	[64+ebx],xmm6
702	movdqa	[96+ebx],xmm7
703	movdqa	xmm1,[ebx-112]
704	movdqa	xmm2,[ebx-96]
705	movdqa	xmm3,[ebx-80]
706	paddd	xmm0,[ebp-128]
707	paddd	xmm1,[ebp-112]
708	paddd	xmm2,[ebp-96]
709	paddd	xmm3,[ebp-80]
710	movdqa	xmm6,xmm0
711	punpckldq	xmm0,xmm1
712	movdqa	xmm7,xmm2
713	punpckldq	xmm2,xmm3
714	punpckhdq	xmm6,xmm1
715	punpckhdq	xmm7,xmm3
716	movdqa	xmm1,xmm0
717	punpcklqdq	xmm0,xmm2
718	movdqa	xmm3,xmm6
719	punpcklqdq	xmm6,xmm7
720	punpckhqdq	xmm1,xmm2
721	punpckhqdq	xmm3,xmm7
722	movdqu	xmm4,[esi-128]
723	movdqu	xmm5,[esi-64]
724	movdqu	xmm2,[esi]
725	movdqu	xmm7,[64+esi]
726	lea	esi,[16+esi]
727	pxor	xmm4,xmm0
728	movdqa	xmm0,[ebx-64]
729	pxor	xmm5,xmm1
730	movdqa	xmm1,[ebx-48]
731	pxor	xmm6,xmm2
732	movdqa	xmm2,[ebx-32]
733	pxor	xmm7,xmm3
734	movdqa	xmm3,[ebx-16]
735	movdqu	[edi-128],xmm4
736	movdqu	[edi-64],xmm5
737	movdqu	[edi],xmm6
738	movdqu	[64+edi],xmm7
739	lea	edi,[16+edi]
740	paddd	xmm0,[ebp-64]
741	paddd	xmm1,[ebp-48]
742	paddd	xmm2,[ebp-32]
743	paddd	xmm3,[ebp-16]
744	movdqa	xmm6,xmm0
745	punpckldq	xmm0,xmm1
746	movdqa	xmm7,xmm2
747	punpckldq	xmm2,xmm3
748	punpckhdq	xmm6,xmm1
749	punpckhdq	xmm7,xmm3
750	movdqa	xmm1,xmm0
751	punpcklqdq	xmm0,xmm2
752	movdqa	xmm3,xmm6
753	punpcklqdq	xmm6,xmm7
754	punpckhqdq	xmm1,xmm2
755	punpckhqdq	xmm3,xmm7
756	movdqu	xmm4,[esi-128]
757	movdqu	xmm5,[esi-64]
758	movdqu	xmm2,[esi]
759	movdqu	xmm7,[64+esi]
760	lea	esi,[16+esi]
761	pxor	xmm4,xmm0
762	movdqa	xmm0,[ebx]
763	pxor	xmm5,xmm1
764	movdqa	xmm1,[16+ebx]
765	pxor	xmm6,xmm2
766	movdqa	xmm2,[32+ebx]
767	pxor	xmm7,xmm3
768	movdqa	xmm3,[48+ebx]
769	movdqu	[edi-128],xmm4
770	movdqu	[edi-64],xmm5
771	movdqu	[edi],xmm6
772	movdqu	[64+edi],xmm7
773	lea	edi,[16+edi]
774	paddd	xmm0,[ebp]
775	paddd	xmm1,[16+ebp]
776	paddd	xmm2,[32+ebp]
777	paddd	xmm3,[48+ebp]
778	movdqa	xmm6,xmm0
779	punpckldq	xmm0,xmm1
780	movdqa	xmm7,xmm2
781	punpckldq	xmm2,xmm3
782	punpckhdq	xmm6,xmm1
783	punpckhdq	xmm7,xmm3
784	movdqa	xmm1,xmm0
785	punpcklqdq	xmm0,xmm2
786	movdqa	xmm3,xmm6
787	punpcklqdq	xmm6,xmm7
788	punpckhqdq	xmm1,xmm2
789	punpckhqdq	xmm3,xmm7
790	movdqu	xmm4,[esi-128]
791	movdqu	xmm5,[esi-64]
792	movdqu	xmm2,[esi]
793	movdqu	xmm7,[64+esi]
794	lea	esi,[16+esi]
795	pxor	xmm4,xmm0
796	movdqa	xmm0,[64+ebx]
797	pxor	xmm5,xmm1
798	movdqa	xmm1,[80+ebx]
799	pxor	xmm6,xmm2
800	movdqa	xmm2,[96+ebx]
801	pxor	xmm7,xmm3
802	movdqa	xmm3,[112+ebx]
803	movdqu	[edi-128],xmm4
804	movdqu	[edi-64],xmm5
805	movdqu	[edi],xmm6
806	movdqu	[64+edi],xmm7
807	lea	edi,[16+edi]
808	paddd	xmm0,[64+ebp]
809	paddd	xmm1,[80+ebp]
810	paddd	xmm2,[96+ebp]
811	paddd	xmm3,[112+ebp]
812	movdqa	xmm6,xmm0
813	punpckldq	xmm0,xmm1
814	movdqa	xmm7,xmm2
815	punpckldq	xmm2,xmm3
816	punpckhdq	xmm6,xmm1
817	punpckhdq	xmm7,xmm3
818	movdqa	xmm1,xmm0
819	punpcklqdq	xmm0,xmm2
820	movdqa	xmm3,xmm6
821	punpcklqdq	xmm6,xmm7
822	punpckhqdq	xmm1,xmm2
823	punpckhqdq	xmm3,xmm7
824	movdqu	xmm4,[esi-128]
825	movdqu	xmm5,[esi-64]
826	movdqu	xmm2,[esi]
827	movdqu	xmm7,[64+esi]
828	lea	esi,[208+esi]
829	pxor	xmm4,xmm0
830	pxor	xmm5,xmm1
831	pxor	xmm6,xmm2
832	pxor	xmm7,xmm3
833	movdqu	[edi-128],xmm4
834	movdqu	[edi-64],xmm5
835	movdqu	[edi],xmm6
836	movdqu	[64+edi],xmm7
837	lea	edi,[208+edi]
838	sub	ecx,256
839	jnc	NEAR L$009outer_loop
840	add	ecx,256
841	jz	NEAR L$011done
842	mov	ebx,DWORD [520+esp]
843	lea	esi,[esi-128]
844	mov	edx,DWORD [516+esp]
845	lea	edi,[edi-128]
846	movd	xmm2,DWORD [64+ebp]
847	movdqu	xmm3,[ebx]
848	paddd	xmm2,[96+eax]
849	pand	xmm3,[112+eax]
850	por	xmm3,xmm2
851L$0081x:
852	movdqa	xmm0,[32+eax]
853	movdqu	xmm1,[edx]
854	movdqu	xmm2,[16+edx]
855	movdqa	xmm6,[eax]
856	movdqa	xmm7,[16+eax]
857	mov	DWORD [48+esp],ebp
858	movdqa	[esp],xmm0
859	movdqa	[16+esp],xmm1
860	movdqa	[32+esp],xmm2
861	movdqa	[48+esp],xmm3
862	mov	edx,10
863	jmp	NEAR L$012loop1x
864align	16
865L$013outer1x:
866	movdqa	xmm3,[80+eax]
867	movdqa	xmm0,[esp]
868	movdqa	xmm1,[16+esp]
869	movdqa	xmm2,[32+esp]
870	paddd	xmm3,[48+esp]
871	mov	edx,10
872	movdqa	[48+esp],xmm3
873	jmp	NEAR L$012loop1x
874align	16
875L$012loop1x:
876	paddd	xmm0,xmm1
877	pxor	xmm3,xmm0
878db	102,15,56,0,222
879	paddd	xmm2,xmm3
880	pxor	xmm1,xmm2
881	movdqa	xmm4,xmm1
882	psrld	xmm1,20
883	pslld	xmm4,12
884	por	xmm1,xmm4
885	paddd	xmm0,xmm1
886	pxor	xmm3,xmm0
887db	102,15,56,0,223
888	paddd	xmm2,xmm3
889	pxor	xmm1,xmm2
890	movdqa	xmm4,xmm1
891	psrld	xmm1,25
892	pslld	xmm4,7
893	por	xmm1,xmm4
894	pshufd	xmm2,xmm2,78
895	pshufd	xmm1,xmm1,57
896	pshufd	xmm3,xmm3,147
897	nop
898	paddd	xmm0,xmm1
899	pxor	xmm3,xmm0
900db	102,15,56,0,222
901	paddd	xmm2,xmm3
902	pxor	xmm1,xmm2
903	movdqa	xmm4,xmm1
904	psrld	xmm1,20
905	pslld	xmm4,12
906	por	xmm1,xmm4
907	paddd	xmm0,xmm1
908	pxor	xmm3,xmm0
909db	102,15,56,0,223
910	paddd	xmm2,xmm3
911	pxor	xmm1,xmm2
912	movdqa	xmm4,xmm1
913	psrld	xmm1,25
914	pslld	xmm4,7
915	por	xmm1,xmm4
916	pshufd	xmm2,xmm2,78
917	pshufd	xmm1,xmm1,147
918	pshufd	xmm3,xmm3,57
919	dec	edx
920	jnz	NEAR L$012loop1x
921	paddd	xmm0,[esp]
922	paddd	xmm1,[16+esp]
923	paddd	xmm2,[32+esp]
924	paddd	xmm3,[48+esp]
925	cmp	ecx,64
926	jb	NEAR L$014tail
927	movdqu	xmm4,[esi]
928	movdqu	xmm5,[16+esi]
929	pxor	xmm0,xmm4
930	movdqu	xmm4,[32+esi]
931	pxor	xmm1,xmm5
932	movdqu	xmm5,[48+esi]
933	pxor	xmm2,xmm4
934	pxor	xmm3,xmm5
935	lea	esi,[64+esi]
936	movdqu	[edi],xmm0
937	movdqu	[16+edi],xmm1
938	movdqu	[32+edi],xmm2
939	movdqu	[48+edi],xmm3
940	lea	edi,[64+edi]
941	sub	ecx,64
942	jnz	NEAR L$013outer1x
943	jmp	NEAR L$011done
944L$014tail:
945	movdqa	[esp],xmm0
946	movdqa	[16+esp],xmm1
947	movdqa	[32+esp],xmm2
948	movdqa	[48+esp],xmm3
949	xor	eax,eax
950	xor	edx,edx
951	xor	ebp,ebp
952L$015tail_loop:
953	mov	al,BYTE [ebp*1+esp]
954	mov	dl,BYTE [ebp*1+esi]
955	lea	ebp,[1+ebp]
956	xor	al,dl
957	mov	BYTE [ebp*1+edi-1],al
958	dec	ecx
959	jnz	NEAR L$015tail_loop
960L$011done:
961	mov	esp,DWORD [512+esp]
962	pop	edi
963	pop	esi
964	pop	ebx
965	pop	ebp
966	ret
967align	64
968L$ssse3_data:
969db	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
970db	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
971dd	1634760805,857760878,2036477234,1797285236
972dd	0,1,2,3
973dd	4,4,4,4
974dd	1,0,0,0
975dd	4,0,0,0
976dd	0,-1,-1,-1
977align	64
978db	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
979db	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
980db	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
981db	114,103,62,0
982segment	.bss
983common	_OPENSSL_ia32cap_P 16
984