• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; This file is generated from a similarly-named Perl script in the BoringSSL
2; source tree. Do not edit by hand.
3
4%include "ring_core_generated/prefix_symbols_nasm.inc"
5%ifidn __OUTPUT_FORMAT__,obj
6section	code	use32 class=code align=64
7%elifidn __OUTPUT_FORMAT__,win32
8$@feat.00 equ 1
9section	.text	code align=64
10%else
11section	.text	code
12%endif
13global	_ChaCha20_ctr32
14align	16
15_ChaCha20_ctr32:
16L$_ChaCha20_ctr32_begin:
17	push	ebp
18	push	ebx
19	push	esi
20	push	edi
21	xor	eax,eax
22	cmp	eax,DWORD [28+esp]
23	je	NEAR L$000no_data
24	call	L$pic_point
25L$pic_point:
26	pop	eax
27	lea	ebp,[_OPENSSL_ia32cap_P]
28	test	DWORD [ebp],16777216
29	jz	NEAR L$001x86
30	test	DWORD [4+ebp],512
31	jz	NEAR L$001x86
32	jmp	NEAR L$ssse3_shortcut
33L$001x86:
34	mov	esi,DWORD [32+esp]
35	mov	edi,DWORD [36+esp]
36	sub	esp,132
37	mov	eax,DWORD [esi]
38	mov	ebx,DWORD [4+esi]
39	mov	ecx,DWORD [8+esi]
40	mov	edx,DWORD [12+esi]
41	mov	DWORD [80+esp],eax
42	mov	DWORD [84+esp],ebx
43	mov	DWORD [88+esp],ecx
44	mov	DWORD [92+esp],edx
45	mov	eax,DWORD [16+esi]
46	mov	ebx,DWORD [20+esi]
47	mov	ecx,DWORD [24+esi]
48	mov	edx,DWORD [28+esi]
49	mov	DWORD [96+esp],eax
50	mov	DWORD [100+esp],ebx
51	mov	DWORD [104+esp],ecx
52	mov	DWORD [108+esp],edx
53	mov	eax,DWORD [edi]
54	mov	ebx,DWORD [4+edi]
55	mov	ecx,DWORD [8+edi]
56	mov	edx,DWORD [12+edi]
57	sub	eax,1
58	mov	DWORD [112+esp],eax
59	mov	DWORD [116+esp],ebx
60	mov	DWORD [120+esp],ecx
61	mov	DWORD [124+esp],edx
62	jmp	NEAR L$002entry
63align	16
64L$003outer_loop:
65	mov	DWORD [156+esp],ebx
66	mov	DWORD [152+esp],eax
67	mov	DWORD [160+esp],ecx
68L$002entry:
69	mov	eax,1634760805
70	mov	DWORD [4+esp],857760878
71	mov	DWORD [8+esp],2036477234
72	mov	DWORD [12+esp],1797285236
73	mov	ebx,DWORD [84+esp]
74	mov	ebp,DWORD [88+esp]
75	mov	ecx,DWORD [104+esp]
76	mov	esi,DWORD [108+esp]
77	mov	edx,DWORD [116+esp]
78	mov	edi,DWORD [120+esp]
79	mov	DWORD [20+esp],ebx
80	mov	DWORD [24+esp],ebp
81	mov	DWORD [40+esp],ecx
82	mov	DWORD [44+esp],esi
83	mov	DWORD [52+esp],edx
84	mov	DWORD [56+esp],edi
85	mov	ebx,DWORD [92+esp]
86	mov	edi,DWORD [124+esp]
87	mov	edx,DWORD [112+esp]
88	mov	ebp,DWORD [80+esp]
89	mov	ecx,DWORD [96+esp]
90	mov	esi,DWORD [100+esp]
91	add	edx,1
92	mov	DWORD [28+esp],ebx
93	mov	DWORD [60+esp],edi
94	mov	DWORD [112+esp],edx
95	mov	ebx,10
96	jmp	NEAR L$004loop
97align	16
98L$004loop:
99	add	eax,ebp
100	mov	DWORD [128+esp],ebx
101	mov	ebx,ebp
102	xor	edx,eax
103	rol	edx,16
104	add	ecx,edx
105	xor	ebx,ecx
106	mov	edi,DWORD [52+esp]
107	rol	ebx,12
108	mov	ebp,DWORD [20+esp]
109	add	eax,ebx
110	xor	edx,eax
111	mov	DWORD [esp],eax
112	rol	edx,8
113	mov	eax,DWORD [4+esp]
114	add	ecx,edx
115	mov	DWORD [48+esp],edx
116	xor	ebx,ecx
117	add	eax,ebp
118	rol	ebx,7
119	xor	edi,eax
120	mov	DWORD [32+esp],ecx
121	rol	edi,16
122	mov	DWORD [16+esp],ebx
123	add	esi,edi
124	mov	ecx,DWORD [40+esp]
125	xor	ebp,esi
126	mov	edx,DWORD [56+esp]
127	rol	ebp,12
128	mov	ebx,DWORD [24+esp]
129	add	eax,ebp
130	xor	edi,eax
131	mov	DWORD [4+esp],eax
132	rol	edi,8
133	mov	eax,DWORD [8+esp]
134	add	esi,edi
135	mov	DWORD [52+esp],edi
136	xor	ebp,esi
137	add	eax,ebx
138	rol	ebp,7
139	xor	edx,eax
140	mov	DWORD [36+esp],esi
141	rol	edx,16
142	mov	DWORD [20+esp],ebp
143	add	ecx,edx
144	mov	esi,DWORD [44+esp]
145	xor	ebx,ecx
146	mov	edi,DWORD [60+esp]
147	rol	ebx,12
148	mov	ebp,DWORD [28+esp]
149	add	eax,ebx
150	xor	edx,eax
151	mov	DWORD [8+esp],eax
152	rol	edx,8
153	mov	eax,DWORD [12+esp]
154	add	ecx,edx
155	mov	DWORD [56+esp],edx
156	xor	ebx,ecx
157	add	eax,ebp
158	rol	ebx,7
159	xor	edi,eax
160	rol	edi,16
161	mov	DWORD [24+esp],ebx
162	add	esi,edi
163	xor	ebp,esi
164	rol	ebp,12
165	mov	ebx,DWORD [20+esp]
166	add	eax,ebp
167	xor	edi,eax
168	mov	DWORD [12+esp],eax
169	rol	edi,8
170	mov	eax,DWORD [esp]
171	add	esi,edi
172	mov	edx,edi
173	xor	ebp,esi
174	add	eax,ebx
175	rol	ebp,7
176	xor	edx,eax
177	rol	edx,16
178	mov	DWORD [28+esp],ebp
179	add	ecx,edx
180	xor	ebx,ecx
181	mov	edi,DWORD [48+esp]
182	rol	ebx,12
183	mov	ebp,DWORD [24+esp]
184	add	eax,ebx
185	xor	edx,eax
186	mov	DWORD [esp],eax
187	rol	edx,8
188	mov	eax,DWORD [4+esp]
189	add	ecx,edx
190	mov	DWORD [60+esp],edx
191	xor	ebx,ecx
192	add	eax,ebp
193	rol	ebx,7
194	xor	edi,eax
195	mov	DWORD [40+esp],ecx
196	rol	edi,16
197	mov	DWORD [20+esp],ebx
198	add	esi,edi
199	mov	ecx,DWORD [32+esp]
200	xor	ebp,esi
201	mov	edx,DWORD [52+esp]
202	rol	ebp,12
203	mov	ebx,DWORD [28+esp]
204	add	eax,ebp
205	xor	edi,eax
206	mov	DWORD [4+esp],eax
207	rol	edi,8
208	mov	eax,DWORD [8+esp]
209	add	esi,edi
210	mov	DWORD [48+esp],edi
211	xor	ebp,esi
212	add	eax,ebx
213	rol	ebp,7
214	xor	edx,eax
215	mov	DWORD [44+esp],esi
216	rol	edx,16
217	mov	DWORD [24+esp],ebp
218	add	ecx,edx
219	mov	esi,DWORD [36+esp]
220	xor	ebx,ecx
221	mov	edi,DWORD [56+esp]
222	rol	ebx,12
223	mov	ebp,DWORD [16+esp]
224	add	eax,ebx
225	xor	edx,eax
226	mov	DWORD [8+esp],eax
227	rol	edx,8
228	mov	eax,DWORD [12+esp]
229	add	ecx,edx
230	mov	DWORD [52+esp],edx
231	xor	ebx,ecx
232	add	eax,ebp
233	rol	ebx,7
234	xor	edi,eax
235	rol	edi,16
236	mov	DWORD [28+esp],ebx
237	add	esi,edi
238	xor	ebp,esi
239	mov	edx,DWORD [48+esp]
240	rol	ebp,12
241	mov	ebx,DWORD [128+esp]
242	add	eax,ebp
243	xor	edi,eax
244	mov	DWORD [12+esp],eax
245	rol	edi,8
246	mov	eax,DWORD [esp]
247	add	esi,edi
248	mov	DWORD [56+esp],edi
249	xor	ebp,esi
250	rol	ebp,7
251	dec	ebx
252	jnz	NEAR L$004loop
253	mov	ebx,DWORD [160+esp]
254	add	eax,1634760805
255	add	ebp,DWORD [80+esp]
256	add	ecx,DWORD [96+esp]
257	add	esi,DWORD [100+esp]
258	cmp	ebx,64
259	jb	NEAR L$005tail
260	mov	ebx,DWORD [156+esp]
261	add	edx,DWORD [112+esp]
262	add	edi,DWORD [120+esp]
263	xor	eax,DWORD [ebx]
264	xor	ebp,DWORD [16+ebx]
265	mov	DWORD [esp],eax
266	mov	eax,DWORD [152+esp]
267	xor	ecx,DWORD [32+ebx]
268	xor	esi,DWORD [36+ebx]
269	xor	edx,DWORD [48+ebx]
270	xor	edi,DWORD [56+ebx]
271	mov	DWORD [16+eax],ebp
272	mov	DWORD [32+eax],ecx
273	mov	DWORD [36+eax],esi
274	mov	DWORD [48+eax],edx
275	mov	DWORD [56+eax],edi
276	mov	ebp,DWORD [4+esp]
277	mov	ecx,DWORD [8+esp]
278	mov	esi,DWORD [12+esp]
279	mov	edx,DWORD [20+esp]
280	mov	edi,DWORD [24+esp]
281	add	ebp,857760878
282	add	ecx,2036477234
283	add	esi,1797285236
284	add	edx,DWORD [84+esp]
285	add	edi,DWORD [88+esp]
286	xor	ebp,DWORD [4+ebx]
287	xor	ecx,DWORD [8+ebx]
288	xor	esi,DWORD [12+ebx]
289	xor	edx,DWORD [20+ebx]
290	xor	edi,DWORD [24+ebx]
291	mov	DWORD [4+eax],ebp
292	mov	DWORD [8+eax],ecx
293	mov	DWORD [12+eax],esi
294	mov	DWORD [20+eax],edx
295	mov	DWORD [24+eax],edi
296	mov	ebp,DWORD [28+esp]
297	mov	ecx,DWORD [40+esp]
298	mov	esi,DWORD [44+esp]
299	mov	edx,DWORD [52+esp]
300	mov	edi,DWORD [60+esp]
301	add	ebp,DWORD [92+esp]
302	add	ecx,DWORD [104+esp]
303	add	esi,DWORD [108+esp]
304	add	edx,DWORD [116+esp]
305	add	edi,DWORD [124+esp]
306	xor	ebp,DWORD [28+ebx]
307	xor	ecx,DWORD [40+ebx]
308	xor	esi,DWORD [44+ebx]
309	xor	edx,DWORD [52+ebx]
310	xor	edi,DWORD [60+ebx]
311	lea	ebx,[64+ebx]
312	mov	DWORD [28+eax],ebp
313	mov	ebp,DWORD [esp]
314	mov	DWORD [40+eax],ecx
315	mov	ecx,DWORD [160+esp]
316	mov	DWORD [44+eax],esi
317	mov	DWORD [52+eax],edx
318	mov	DWORD [60+eax],edi
319	mov	DWORD [eax],ebp
320	lea	eax,[64+eax]
321	sub	ecx,64
322	jnz	NEAR L$003outer_loop
323	jmp	NEAR L$006done
324L$005tail:
325	add	edx,DWORD [112+esp]
326	add	edi,DWORD [120+esp]
327	mov	DWORD [esp],eax
328	mov	DWORD [16+esp],ebp
329	mov	DWORD [32+esp],ecx
330	mov	DWORD [36+esp],esi
331	mov	DWORD [48+esp],edx
332	mov	DWORD [56+esp],edi
333	mov	ebp,DWORD [4+esp]
334	mov	ecx,DWORD [8+esp]
335	mov	esi,DWORD [12+esp]
336	mov	edx,DWORD [20+esp]
337	mov	edi,DWORD [24+esp]
338	add	ebp,857760878
339	add	ecx,2036477234
340	add	esi,1797285236
341	add	edx,DWORD [84+esp]
342	add	edi,DWORD [88+esp]
343	mov	DWORD [4+esp],ebp
344	mov	DWORD [8+esp],ecx
345	mov	DWORD [12+esp],esi
346	mov	DWORD [20+esp],edx
347	mov	DWORD [24+esp],edi
348	mov	ebp,DWORD [28+esp]
349	mov	ecx,DWORD [40+esp]
350	mov	esi,DWORD [44+esp]
351	mov	edx,DWORD [52+esp]
352	mov	edi,DWORD [60+esp]
353	add	ebp,DWORD [92+esp]
354	add	ecx,DWORD [104+esp]
355	add	esi,DWORD [108+esp]
356	add	edx,DWORD [116+esp]
357	add	edi,DWORD [124+esp]
358	mov	DWORD [28+esp],ebp
359	mov	ebp,DWORD [156+esp]
360	mov	DWORD [40+esp],ecx
361	mov	ecx,DWORD [152+esp]
362	mov	DWORD [44+esp],esi
363	xor	esi,esi
364	mov	DWORD [52+esp],edx
365	mov	DWORD [60+esp],edi
366	xor	eax,eax
367	xor	edx,edx
368L$007tail_loop:
369	mov	al,BYTE [ebp*1+esi]
370	mov	dl,BYTE [esi*1+esp]
371	lea	esi,[1+esi]
372	xor	al,dl
373	mov	BYTE [esi*1+ecx-1],al
374	dec	ebx
375	jnz	NEAR L$007tail_loop
376L$006done:
377	add	esp,132
378L$000no_data:
379	pop	edi
380	pop	esi
381	pop	ebx
382	pop	ebp
383	ret
384align	16
385__ChaCha20_ssse3:
386	push	ebp
387	push	ebx
388	push	esi
389	push	edi
390L$ssse3_shortcut:
391	mov	edi,DWORD [20+esp]
392	mov	esi,DWORD [24+esp]
393	mov	ecx,DWORD [28+esp]
394	mov	edx,DWORD [32+esp]
395	mov	ebx,DWORD [36+esp]
396	mov	ebp,esp
397	sub	esp,524
398	and	esp,-64
399	mov	DWORD [512+esp],ebp
400	lea	eax,[(L$ssse3_data-L$pic_point)+eax]
401	movdqu	xmm3,[ebx]
402	cmp	ecx,256
403	jb	NEAR L$0081x
404	mov	DWORD [516+esp],edx
405	mov	DWORD [520+esp],ebx
406	sub	ecx,256
407	lea	ebp,[384+esp]
408	movdqu	xmm7,[edx]
409	pshufd	xmm0,xmm3,0
410	pshufd	xmm1,xmm3,85
411	pshufd	xmm2,xmm3,170
412	pshufd	xmm3,xmm3,255
413	paddd	xmm0,[48+eax]
414	pshufd	xmm4,xmm7,0
415	pshufd	xmm5,xmm7,85
416	psubd	xmm0,[64+eax]
417	pshufd	xmm6,xmm7,170
418	pshufd	xmm7,xmm7,255
419	movdqa	[64+ebp],xmm0
420	movdqa	[80+ebp],xmm1
421	movdqa	[96+ebp],xmm2
422	movdqa	[112+ebp],xmm3
423	movdqu	xmm3,[16+edx]
424	movdqa	[ebp-64],xmm4
425	movdqa	[ebp-48],xmm5
426	movdqa	[ebp-32],xmm6
427	movdqa	[ebp-16],xmm7
428	movdqa	xmm7,[32+eax]
429	lea	ebx,[128+esp]
430	pshufd	xmm0,xmm3,0
431	pshufd	xmm1,xmm3,85
432	pshufd	xmm2,xmm3,170
433	pshufd	xmm3,xmm3,255
434	pshufd	xmm4,xmm7,0
435	pshufd	xmm5,xmm7,85
436	pshufd	xmm6,xmm7,170
437	pshufd	xmm7,xmm7,255
438	movdqa	[ebp],xmm0
439	movdqa	[16+ebp],xmm1
440	movdqa	[32+ebp],xmm2
441	movdqa	[48+ebp],xmm3
442	movdqa	[ebp-128],xmm4
443	movdqa	[ebp-112],xmm5
444	movdqa	[ebp-96],xmm6
445	movdqa	[ebp-80],xmm7
446	lea	esi,[128+esi]
447	lea	edi,[128+edi]
448	jmp	NEAR L$009outer_loop
449align	16
450L$009outer_loop:
451	movdqa	xmm1,[ebp-112]
452	movdqa	xmm2,[ebp-96]
453	movdqa	xmm3,[ebp-80]
454	movdqa	xmm5,[ebp-48]
455	movdqa	xmm6,[ebp-32]
456	movdqa	xmm7,[ebp-16]
457	movdqa	[ebx-112],xmm1
458	movdqa	[ebx-96],xmm2
459	movdqa	[ebx-80],xmm3
460	movdqa	[ebx-48],xmm5
461	movdqa	[ebx-32],xmm6
462	movdqa	[ebx-16],xmm7
463	movdqa	xmm2,[32+ebp]
464	movdqa	xmm3,[48+ebp]
465	movdqa	xmm4,[64+ebp]
466	movdqa	xmm5,[80+ebp]
467	movdqa	xmm6,[96+ebp]
468	movdqa	xmm7,[112+ebp]
469	paddd	xmm4,[64+eax]
470	movdqa	[32+ebx],xmm2
471	movdqa	[48+ebx],xmm3
472	movdqa	[64+ebx],xmm4
473	movdqa	[80+ebx],xmm5
474	movdqa	[96+ebx],xmm6
475	movdqa	[112+ebx],xmm7
476	movdqa	[64+ebp],xmm4
477	movdqa	xmm0,[ebp-128]
478	movdqa	xmm6,xmm4
479	movdqa	xmm3,[ebp-64]
480	movdqa	xmm4,[ebp]
481	movdqa	xmm5,[16+ebp]
482	mov	edx,10
483	nop
484align	16
485L$010loop:
486	paddd	xmm0,xmm3
487	movdqa	xmm2,xmm3
488	pxor	xmm6,xmm0
489	pshufb	xmm6,[eax]
490	paddd	xmm4,xmm6
491	pxor	xmm2,xmm4
492	movdqa	xmm3,[ebx-48]
493	movdqa	xmm1,xmm2
494	pslld	xmm2,12
495	psrld	xmm1,20
496	por	xmm2,xmm1
497	movdqa	xmm1,[ebx-112]
498	paddd	xmm0,xmm2
499	movdqa	xmm7,[80+ebx]
500	pxor	xmm6,xmm0
501	movdqa	[ebx-128],xmm0
502	pshufb	xmm6,[16+eax]
503	paddd	xmm4,xmm6
504	movdqa	[64+ebx],xmm6
505	pxor	xmm2,xmm4
506	paddd	xmm1,xmm3
507	movdqa	xmm0,xmm2
508	pslld	xmm2,7
509	psrld	xmm0,25
510	pxor	xmm7,xmm1
511	por	xmm2,xmm0
512	movdqa	[ebx],xmm4
513	pshufb	xmm7,[eax]
514	movdqa	[ebx-64],xmm2
515	paddd	xmm5,xmm7
516	movdqa	xmm4,[32+ebx]
517	pxor	xmm3,xmm5
518	movdqa	xmm2,[ebx-32]
519	movdqa	xmm0,xmm3
520	pslld	xmm3,12
521	psrld	xmm0,20
522	por	xmm3,xmm0
523	movdqa	xmm0,[ebx-96]
524	paddd	xmm1,xmm3
525	movdqa	xmm6,[96+ebx]
526	pxor	xmm7,xmm1
527	movdqa	[ebx-112],xmm1
528	pshufb	xmm7,[16+eax]
529	paddd	xmm5,xmm7
530	movdqa	[80+ebx],xmm7
531	pxor	xmm3,xmm5
532	paddd	xmm0,xmm2
533	movdqa	xmm1,xmm3
534	pslld	xmm3,7
535	psrld	xmm1,25
536	pxor	xmm6,xmm0
537	por	xmm3,xmm1
538	movdqa	[16+ebx],xmm5
539	pshufb	xmm6,[eax]
540	movdqa	[ebx-48],xmm3
541	paddd	xmm4,xmm6
542	movdqa	xmm5,[48+ebx]
543	pxor	xmm2,xmm4
544	movdqa	xmm3,[ebx-16]
545	movdqa	xmm1,xmm2
546	pslld	xmm2,12
547	psrld	xmm1,20
548	por	xmm2,xmm1
549	movdqa	xmm1,[ebx-80]
550	paddd	xmm0,xmm2
551	movdqa	xmm7,[112+ebx]
552	pxor	xmm6,xmm0
553	movdqa	[ebx-96],xmm0
554	pshufb	xmm6,[16+eax]
555	paddd	xmm4,xmm6
556	movdqa	[96+ebx],xmm6
557	pxor	xmm2,xmm4
558	paddd	xmm1,xmm3
559	movdqa	xmm0,xmm2
560	pslld	xmm2,7
561	psrld	xmm0,25
562	pxor	xmm7,xmm1
563	por	xmm2,xmm0
564	pshufb	xmm7,[eax]
565	movdqa	[ebx-32],xmm2
566	paddd	xmm5,xmm7
567	pxor	xmm3,xmm5
568	movdqa	xmm2,[ebx-48]
569	movdqa	xmm0,xmm3
570	pslld	xmm3,12
571	psrld	xmm0,20
572	por	xmm3,xmm0
573	movdqa	xmm0,[ebx-128]
574	paddd	xmm1,xmm3
575	pxor	xmm7,xmm1
576	movdqa	[ebx-80],xmm1
577	pshufb	xmm7,[16+eax]
578	paddd	xmm5,xmm7
579	movdqa	xmm6,xmm7
580	pxor	xmm3,xmm5
581	paddd	xmm0,xmm2
582	movdqa	xmm1,xmm3
583	pslld	xmm3,7
584	psrld	xmm1,25
585	pxor	xmm6,xmm0
586	por	xmm3,xmm1
587	pshufb	xmm6,[eax]
588	movdqa	[ebx-16],xmm3
589	paddd	xmm4,xmm6
590	pxor	xmm2,xmm4
591	movdqa	xmm3,[ebx-32]
592	movdqa	xmm1,xmm2
593	pslld	xmm2,12
594	psrld	xmm1,20
595	por	xmm2,xmm1
596	movdqa	xmm1,[ebx-112]
597	paddd	xmm0,xmm2
598	movdqa	xmm7,[64+ebx]
599	pxor	xmm6,xmm0
600	movdqa	[ebx-128],xmm0
601	pshufb	xmm6,[16+eax]
602	paddd	xmm4,xmm6
603	movdqa	[112+ebx],xmm6
604	pxor	xmm2,xmm4
605	paddd	xmm1,xmm3
606	movdqa	xmm0,xmm2
607	pslld	xmm2,7
608	psrld	xmm0,25
609	pxor	xmm7,xmm1
610	por	xmm2,xmm0
611	movdqa	[32+ebx],xmm4
612	pshufb	xmm7,[eax]
613	movdqa	[ebx-48],xmm2
614	paddd	xmm5,xmm7
615	movdqa	xmm4,[ebx]
616	pxor	xmm3,xmm5
617	movdqa	xmm2,[ebx-16]
618	movdqa	xmm0,xmm3
619	pslld	xmm3,12
620	psrld	xmm0,20
621	por	xmm3,xmm0
622	movdqa	xmm0,[ebx-96]
623	paddd	xmm1,xmm3
624	movdqa	xmm6,[80+ebx]
625	pxor	xmm7,xmm1
626	movdqa	[ebx-112],xmm1
627	pshufb	xmm7,[16+eax]
628	paddd	xmm5,xmm7
629	movdqa	[64+ebx],xmm7
630	pxor	xmm3,xmm5
631	paddd	xmm0,xmm2
632	movdqa	xmm1,xmm3
633	pslld	xmm3,7
634	psrld	xmm1,25
635	pxor	xmm6,xmm0
636	por	xmm3,xmm1
637	movdqa	[48+ebx],xmm5
638	pshufb	xmm6,[eax]
639	movdqa	[ebx-32],xmm3
640	paddd	xmm4,xmm6
641	movdqa	xmm5,[16+ebx]
642	pxor	xmm2,xmm4
643	movdqa	xmm3,[ebx-64]
644	movdqa	xmm1,xmm2
645	pslld	xmm2,12
646	psrld	xmm1,20
647	por	xmm2,xmm1
648	movdqa	xmm1,[ebx-80]
649	paddd	xmm0,xmm2
650	movdqa	xmm7,[96+ebx]
651	pxor	xmm6,xmm0
652	movdqa	[ebx-96],xmm0
653	pshufb	xmm6,[16+eax]
654	paddd	xmm4,xmm6
655	movdqa	[80+ebx],xmm6
656	pxor	xmm2,xmm4
657	paddd	xmm1,xmm3
658	movdqa	xmm0,xmm2
659	pslld	xmm2,7
660	psrld	xmm0,25
661	pxor	xmm7,xmm1
662	por	xmm2,xmm0
663	pshufb	xmm7,[eax]
664	movdqa	[ebx-16],xmm2
665	paddd	xmm5,xmm7
666	pxor	xmm3,xmm5
667	movdqa	xmm0,xmm3
668	pslld	xmm3,12
669	psrld	xmm0,20
670	por	xmm3,xmm0
671	movdqa	xmm0,[ebx-128]
672	paddd	xmm1,xmm3
673	movdqa	xmm6,[64+ebx]
674	pxor	xmm7,xmm1
675	movdqa	[ebx-80],xmm1
676	pshufb	xmm7,[16+eax]
677	paddd	xmm5,xmm7
678	movdqa	[96+ebx],xmm7
679	pxor	xmm3,xmm5
680	movdqa	xmm1,xmm3
681	pslld	xmm3,7
682	psrld	xmm1,25
683	por	xmm3,xmm1
684	dec	edx
685	jnz	NEAR L$010loop
686	movdqa	[ebx-64],xmm3
687	movdqa	[ebx],xmm4
688	movdqa	[16+ebx],xmm5
689	movdqa	[64+ebx],xmm6
690	movdqa	[96+ebx],xmm7
691	movdqa	xmm1,[ebx-112]
692	movdqa	xmm2,[ebx-96]
693	movdqa	xmm3,[ebx-80]
694	paddd	xmm0,[ebp-128]
695	paddd	xmm1,[ebp-112]
696	paddd	xmm2,[ebp-96]
697	paddd	xmm3,[ebp-80]
698	movdqa	xmm6,xmm0
699	punpckldq	xmm0,xmm1
700	movdqa	xmm7,xmm2
701	punpckldq	xmm2,xmm3
702	punpckhdq	xmm6,xmm1
703	punpckhdq	xmm7,xmm3
704	movdqa	xmm1,xmm0
705	punpcklqdq	xmm0,xmm2
706	movdqa	xmm3,xmm6
707	punpcklqdq	xmm6,xmm7
708	punpckhqdq	xmm1,xmm2
709	punpckhqdq	xmm3,xmm7
710	movdqu	xmm4,[esi-128]
711	movdqu	xmm5,[esi-64]
712	movdqu	xmm2,[esi]
713	movdqu	xmm7,[64+esi]
714	lea	esi,[16+esi]
715	pxor	xmm4,xmm0
716	movdqa	xmm0,[ebx-64]
717	pxor	xmm5,xmm1
718	movdqa	xmm1,[ebx-48]
719	pxor	xmm6,xmm2
720	movdqa	xmm2,[ebx-32]
721	pxor	xmm7,xmm3
722	movdqa	xmm3,[ebx-16]
723	movdqu	[edi-128],xmm4
724	movdqu	[edi-64],xmm5
725	movdqu	[edi],xmm6
726	movdqu	[64+edi],xmm7
727	lea	edi,[16+edi]
728	paddd	xmm0,[ebp-64]
729	paddd	xmm1,[ebp-48]
730	paddd	xmm2,[ebp-32]
731	paddd	xmm3,[ebp-16]
732	movdqa	xmm6,xmm0
733	punpckldq	xmm0,xmm1
734	movdqa	xmm7,xmm2
735	punpckldq	xmm2,xmm3
736	punpckhdq	xmm6,xmm1
737	punpckhdq	xmm7,xmm3
738	movdqa	xmm1,xmm0
739	punpcklqdq	xmm0,xmm2
740	movdqa	xmm3,xmm6
741	punpcklqdq	xmm6,xmm7
742	punpckhqdq	xmm1,xmm2
743	punpckhqdq	xmm3,xmm7
744	movdqu	xmm4,[esi-128]
745	movdqu	xmm5,[esi-64]
746	movdqu	xmm2,[esi]
747	movdqu	xmm7,[64+esi]
748	lea	esi,[16+esi]
749	pxor	xmm4,xmm0
750	movdqa	xmm0,[ebx]
751	pxor	xmm5,xmm1
752	movdqa	xmm1,[16+ebx]
753	pxor	xmm6,xmm2
754	movdqa	xmm2,[32+ebx]
755	pxor	xmm7,xmm3
756	movdqa	xmm3,[48+ebx]
757	movdqu	[edi-128],xmm4
758	movdqu	[edi-64],xmm5
759	movdqu	[edi],xmm6
760	movdqu	[64+edi],xmm7
761	lea	edi,[16+edi]
762	paddd	xmm0,[ebp]
763	paddd	xmm1,[16+ebp]
764	paddd	xmm2,[32+ebp]
765	paddd	xmm3,[48+ebp]
766	movdqa	xmm6,xmm0
767	punpckldq	xmm0,xmm1
768	movdqa	xmm7,xmm2
769	punpckldq	xmm2,xmm3
770	punpckhdq	xmm6,xmm1
771	punpckhdq	xmm7,xmm3
772	movdqa	xmm1,xmm0
773	punpcklqdq	xmm0,xmm2
774	movdqa	xmm3,xmm6
775	punpcklqdq	xmm6,xmm7
776	punpckhqdq	xmm1,xmm2
777	punpckhqdq	xmm3,xmm7
778	movdqu	xmm4,[esi-128]
779	movdqu	xmm5,[esi-64]
780	movdqu	xmm2,[esi]
781	movdqu	xmm7,[64+esi]
782	lea	esi,[16+esi]
783	pxor	xmm4,xmm0
784	movdqa	xmm0,[64+ebx]
785	pxor	xmm5,xmm1
786	movdqa	xmm1,[80+ebx]
787	pxor	xmm6,xmm2
788	movdqa	xmm2,[96+ebx]
789	pxor	xmm7,xmm3
790	movdqa	xmm3,[112+ebx]
791	movdqu	[edi-128],xmm4
792	movdqu	[edi-64],xmm5
793	movdqu	[edi],xmm6
794	movdqu	[64+edi],xmm7
795	lea	edi,[16+edi]
796	paddd	xmm0,[64+ebp]
797	paddd	xmm1,[80+ebp]
798	paddd	xmm2,[96+ebp]
799	paddd	xmm3,[112+ebp]
800	movdqa	xmm6,xmm0
801	punpckldq	xmm0,xmm1
802	movdqa	xmm7,xmm2
803	punpckldq	xmm2,xmm3
804	punpckhdq	xmm6,xmm1
805	punpckhdq	xmm7,xmm3
806	movdqa	xmm1,xmm0
807	punpcklqdq	xmm0,xmm2
808	movdqa	xmm3,xmm6
809	punpcklqdq	xmm6,xmm7
810	punpckhqdq	xmm1,xmm2
811	punpckhqdq	xmm3,xmm7
812	movdqu	xmm4,[esi-128]
813	movdqu	xmm5,[esi-64]
814	movdqu	xmm2,[esi]
815	movdqu	xmm7,[64+esi]
816	lea	esi,[208+esi]
817	pxor	xmm4,xmm0
818	pxor	xmm5,xmm1
819	pxor	xmm6,xmm2
820	pxor	xmm7,xmm3
821	movdqu	[edi-128],xmm4
822	movdqu	[edi-64],xmm5
823	movdqu	[edi],xmm6
824	movdqu	[64+edi],xmm7
825	lea	edi,[208+edi]
826	sub	ecx,256
827	jnc	NEAR L$009outer_loop
828	add	ecx,256
829	jz	NEAR L$011done
830	mov	ebx,DWORD [520+esp]
831	lea	esi,[esi-128]
832	mov	edx,DWORD [516+esp]
833	lea	edi,[edi-128]
834	movd	xmm2,DWORD [64+ebp]
835	movdqu	xmm3,[ebx]
836	paddd	xmm2,[96+eax]
837	pand	xmm3,[112+eax]
838	por	xmm3,xmm2
839L$0081x:
840	movdqa	xmm0,[32+eax]
841	movdqu	xmm1,[edx]
842	movdqu	xmm2,[16+edx]
843	movdqa	xmm6,[eax]
844	movdqa	xmm7,[16+eax]
845	mov	DWORD [48+esp],ebp
846	movdqa	[esp],xmm0
847	movdqa	[16+esp],xmm1
848	movdqa	[32+esp],xmm2
849	movdqa	[48+esp],xmm3
850	mov	edx,10
851	jmp	NEAR L$012loop1x
852align	16
853L$013outer1x:
854	movdqa	xmm3,[80+eax]
855	movdqa	xmm0,[esp]
856	movdqa	xmm1,[16+esp]
857	movdqa	xmm2,[32+esp]
858	paddd	xmm3,[48+esp]
859	mov	edx,10
860	movdqa	[48+esp],xmm3
861	jmp	NEAR L$012loop1x
862align	16
863L$012loop1x:
864	paddd	xmm0,xmm1
865	pxor	xmm3,xmm0
866db	102,15,56,0,222
867	paddd	xmm2,xmm3
868	pxor	xmm1,xmm2
869	movdqa	xmm4,xmm1
870	psrld	xmm1,20
871	pslld	xmm4,12
872	por	xmm1,xmm4
873	paddd	xmm0,xmm1
874	pxor	xmm3,xmm0
875db	102,15,56,0,223
876	paddd	xmm2,xmm3
877	pxor	xmm1,xmm2
878	movdqa	xmm4,xmm1
879	psrld	xmm1,25
880	pslld	xmm4,7
881	por	xmm1,xmm4
882	pshufd	xmm2,xmm2,78
883	pshufd	xmm1,xmm1,57
884	pshufd	xmm3,xmm3,147
885	nop
886	paddd	xmm0,xmm1
887	pxor	xmm3,xmm0
888db	102,15,56,0,222
889	paddd	xmm2,xmm3
890	pxor	xmm1,xmm2
891	movdqa	xmm4,xmm1
892	psrld	xmm1,20
893	pslld	xmm4,12
894	por	xmm1,xmm4
895	paddd	xmm0,xmm1
896	pxor	xmm3,xmm0
897db	102,15,56,0,223
898	paddd	xmm2,xmm3
899	pxor	xmm1,xmm2
900	movdqa	xmm4,xmm1
901	psrld	xmm1,25
902	pslld	xmm4,7
903	por	xmm1,xmm4
904	pshufd	xmm2,xmm2,78
905	pshufd	xmm1,xmm1,147
906	pshufd	xmm3,xmm3,57
907	dec	edx
908	jnz	NEAR L$012loop1x
909	paddd	xmm0,[esp]
910	paddd	xmm1,[16+esp]
911	paddd	xmm2,[32+esp]
912	paddd	xmm3,[48+esp]
913	cmp	ecx,64
914	jb	NEAR L$014tail
915	movdqu	xmm4,[esi]
916	movdqu	xmm5,[16+esi]
917	pxor	xmm0,xmm4
918	movdqu	xmm4,[32+esi]
919	pxor	xmm1,xmm5
920	movdqu	xmm5,[48+esi]
921	pxor	xmm2,xmm4
922	pxor	xmm3,xmm5
923	lea	esi,[64+esi]
924	movdqu	[edi],xmm0
925	movdqu	[16+edi],xmm1
926	movdqu	[32+edi],xmm2
927	movdqu	[48+edi],xmm3
928	lea	edi,[64+edi]
929	sub	ecx,64
930	jnz	NEAR L$013outer1x
931	jmp	NEAR L$011done
932L$014tail:
933	movdqa	[esp],xmm0
934	movdqa	[16+esp],xmm1
935	movdqa	[32+esp],xmm2
936	movdqa	[48+esp],xmm3
937	xor	eax,eax
938	xor	edx,edx
939	xor	ebp,ebp
940L$015tail_loop:
941	mov	al,BYTE [ebp*1+esp]
942	mov	dl,BYTE [ebp*1+esi]
943	lea	ebp,[1+ebp]
944	xor	al,dl
945	mov	BYTE [ebp*1+edi-1],al
946	dec	ecx
947	jnz	NEAR L$015tail_loop
948L$011done:
949	mov	esp,DWORD [512+esp]
950	pop	edi
951	pop	esi
952	pop	ebx
953	pop	ebp
954	ret
955align	64
956L$ssse3_data:
957db	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
958db	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
959dd	1634760805,857760878,2036477234,1797285236
960dd	0,1,2,3
961dd	4,4,4,4
962dd	1,0,0,0
963dd	4,0,0,0
964dd	0,-1,-1,-1
965align	64
966db	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
967db	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
968db	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
969db	114,103,62,0
970segment	.bss
971common	_OPENSSL_ia32cap_P 16
972