• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#include <openssl/asm_base.h>
5
6#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
7.text
8.globl	ChaCha20_ctr32
9.hidden	ChaCha20_ctr32
10.type	ChaCha20_ctr32,@function
11.align	16
12ChaCha20_ctr32:
13.L_ChaCha20_ctr32_begin:
14	pushl	%ebp
15	pushl	%ebx
16	pushl	%esi
17	pushl	%edi
18	xorl	%eax,%eax
19	cmpl	28(%esp),%eax
20	je	.L000no_data
21	call	.Lpic_point
22.Lpic_point:
23	popl	%eax
24	leal	OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
25	testl	$16777216,(%ebp)
26	jz	.L001x86
27	testl	$512,4(%ebp)
28	jz	.L001x86
29	jmp	.Lssse3_shortcut
30.L001x86:
31	movl	32(%esp),%esi
32	movl	36(%esp),%edi
33	subl	$132,%esp
34	movl	(%esi),%eax
35	movl	4(%esi),%ebx
36	movl	8(%esi),%ecx
37	movl	12(%esi),%edx
38	movl	%eax,80(%esp)
39	movl	%ebx,84(%esp)
40	movl	%ecx,88(%esp)
41	movl	%edx,92(%esp)
42	movl	16(%esi),%eax
43	movl	20(%esi),%ebx
44	movl	24(%esi),%ecx
45	movl	28(%esi),%edx
46	movl	%eax,96(%esp)
47	movl	%ebx,100(%esp)
48	movl	%ecx,104(%esp)
49	movl	%edx,108(%esp)
50	movl	(%edi),%eax
51	movl	4(%edi),%ebx
52	movl	8(%edi),%ecx
53	movl	12(%edi),%edx
54	subl	$1,%eax
55	movl	%eax,112(%esp)
56	movl	%ebx,116(%esp)
57	movl	%ecx,120(%esp)
58	movl	%edx,124(%esp)
59	jmp	.L002entry
60.align	16
61.L003outer_loop:
62	movl	%ebx,156(%esp)
63	movl	%eax,152(%esp)
64	movl	%ecx,160(%esp)
65.L002entry:
66	movl	$1634760805,%eax
67	movl	$857760878,4(%esp)
68	movl	$2036477234,8(%esp)
69	movl	$1797285236,12(%esp)
70	movl	84(%esp),%ebx
71	movl	88(%esp),%ebp
72	movl	104(%esp),%ecx
73	movl	108(%esp),%esi
74	movl	116(%esp),%edx
75	movl	120(%esp),%edi
76	movl	%ebx,20(%esp)
77	movl	%ebp,24(%esp)
78	movl	%ecx,40(%esp)
79	movl	%esi,44(%esp)
80	movl	%edx,52(%esp)
81	movl	%edi,56(%esp)
82	movl	92(%esp),%ebx
83	movl	124(%esp),%edi
84	movl	112(%esp),%edx
85	movl	80(%esp),%ebp
86	movl	96(%esp),%ecx
87	movl	100(%esp),%esi
88	addl	$1,%edx
89	movl	%ebx,28(%esp)
90	movl	%edi,60(%esp)
91	movl	%edx,112(%esp)
92	movl	$10,%ebx
93	jmp	.L004loop
94.align	16
95.L004loop:
96	addl	%ebp,%eax
97	movl	%ebx,128(%esp)
98	movl	%ebp,%ebx
99	xorl	%eax,%edx
100	roll	$16,%edx
101	addl	%edx,%ecx
102	xorl	%ecx,%ebx
103	movl	52(%esp),%edi
104	roll	$12,%ebx
105	movl	20(%esp),%ebp
106	addl	%ebx,%eax
107	xorl	%eax,%edx
108	movl	%eax,(%esp)
109	roll	$8,%edx
110	movl	4(%esp),%eax
111	addl	%edx,%ecx
112	movl	%edx,48(%esp)
113	xorl	%ecx,%ebx
114	addl	%ebp,%eax
115	roll	$7,%ebx
116	xorl	%eax,%edi
117	movl	%ecx,32(%esp)
118	roll	$16,%edi
119	movl	%ebx,16(%esp)
120	addl	%edi,%esi
121	movl	40(%esp),%ecx
122	xorl	%esi,%ebp
123	movl	56(%esp),%edx
124	roll	$12,%ebp
125	movl	24(%esp),%ebx
126	addl	%ebp,%eax
127	xorl	%eax,%edi
128	movl	%eax,4(%esp)
129	roll	$8,%edi
130	movl	8(%esp),%eax
131	addl	%edi,%esi
132	movl	%edi,52(%esp)
133	xorl	%esi,%ebp
134	addl	%ebx,%eax
135	roll	$7,%ebp
136	xorl	%eax,%edx
137	movl	%esi,36(%esp)
138	roll	$16,%edx
139	movl	%ebp,20(%esp)
140	addl	%edx,%ecx
141	movl	44(%esp),%esi
142	xorl	%ecx,%ebx
143	movl	60(%esp),%edi
144	roll	$12,%ebx
145	movl	28(%esp),%ebp
146	addl	%ebx,%eax
147	xorl	%eax,%edx
148	movl	%eax,8(%esp)
149	roll	$8,%edx
150	movl	12(%esp),%eax
151	addl	%edx,%ecx
152	movl	%edx,56(%esp)
153	xorl	%ecx,%ebx
154	addl	%ebp,%eax
155	roll	$7,%ebx
156	xorl	%eax,%edi
157	roll	$16,%edi
158	movl	%ebx,24(%esp)
159	addl	%edi,%esi
160	xorl	%esi,%ebp
161	roll	$12,%ebp
162	movl	20(%esp),%ebx
163	addl	%ebp,%eax
164	xorl	%eax,%edi
165	movl	%eax,12(%esp)
166	roll	$8,%edi
167	movl	(%esp),%eax
168	addl	%edi,%esi
169	movl	%edi,%edx
170	xorl	%esi,%ebp
171	addl	%ebx,%eax
172	roll	$7,%ebp
173	xorl	%eax,%edx
174	roll	$16,%edx
175	movl	%ebp,28(%esp)
176	addl	%edx,%ecx
177	xorl	%ecx,%ebx
178	movl	48(%esp),%edi
179	roll	$12,%ebx
180	movl	24(%esp),%ebp
181	addl	%ebx,%eax
182	xorl	%eax,%edx
183	movl	%eax,(%esp)
184	roll	$8,%edx
185	movl	4(%esp),%eax
186	addl	%edx,%ecx
187	movl	%edx,60(%esp)
188	xorl	%ecx,%ebx
189	addl	%ebp,%eax
190	roll	$7,%ebx
191	xorl	%eax,%edi
192	movl	%ecx,40(%esp)
193	roll	$16,%edi
194	movl	%ebx,20(%esp)
195	addl	%edi,%esi
196	movl	32(%esp),%ecx
197	xorl	%esi,%ebp
198	movl	52(%esp),%edx
199	roll	$12,%ebp
200	movl	28(%esp),%ebx
201	addl	%ebp,%eax
202	xorl	%eax,%edi
203	movl	%eax,4(%esp)
204	roll	$8,%edi
205	movl	8(%esp),%eax
206	addl	%edi,%esi
207	movl	%edi,48(%esp)
208	xorl	%esi,%ebp
209	addl	%ebx,%eax
210	roll	$7,%ebp
211	xorl	%eax,%edx
212	movl	%esi,44(%esp)
213	roll	$16,%edx
214	movl	%ebp,24(%esp)
215	addl	%edx,%ecx
216	movl	36(%esp),%esi
217	xorl	%ecx,%ebx
218	movl	56(%esp),%edi
219	roll	$12,%ebx
220	movl	16(%esp),%ebp
221	addl	%ebx,%eax
222	xorl	%eax,%edx
223	movl	%eax,8(%esp)
224	roll	$8,%edx
225	movl	12(%esp),%eax
226	addl	%edx,%ecx
227	movl	%edx,52(%esp)
228	xorl	%ecx,%ebx
229	addl	%ebp,%eax
230	roll	$7,%ebx
231	xorl	%eax,%edi
232	roll	$16,%edi
233	movl	%ebx,28(%esp)
234	addl	%edi,%esi
235	xorl	%esi,%ebp
236	movl	48(%esp),%edx
237	roll	$12,%ebp
238	movl	128(%esp),%ebx
239	addl	%ebp,%eax
240	xorl	%eax,%edi
241	movl	%eax,12(%esp)
242	roll	$8,%edi
243	movl	(%esp),%eax
244	addl	%edi,%esi
245	movl	%edi,56(%esp)
246	xorl	%esi,%ebp
247	roll	$7,%ebp
248	decl	%ebx
249	jnz	.L004loop
250	movl	160(%esp),%ebx
251	addl	$1634760805,%eax
252	addl	80(%esp),%ebp
253	addl	96(%esp),%ecx
254	addl	100(%esp),%esi
255	cmpl	$64,%ebx
256	jb	.L005tail
257	movl	156(%esp),%ebx
258	addl	112(%esp),%edx
259	addl	120(%esp),%edi
260	xorl	(%ebx),%eax
261	xorl	16(%ebx),%ebp
262	movl	%eax,(%esp)
263	movl	152(%esp),%eax
264	xorl	32(%ebx),%ecx
265	xorl	36(%ebx),%esi
266	xorl	48(%ebx),%edx
267	xorl	56(%ebx),%edi
268	movl	%ebp,16(%eax)
269	movl	%ecx,32(%eax)
270	movl	%esi,36(%eax)
271	movl	%edx,48(%eax)
272	movl	%edi,56(%eax)
273	movl	4(%esp),%ebp
274	movl	8(%esp),%ecx
275	movl	12(%esp),%esi
276	movl	20(%esp),%edx
277	movl	24(%esp),%edi
278	addl	$857760878,%ebp
279	addl	$2036477234,%ecx
280	addl	$1797285236,%esi
281	addl	84(%esp),%edx
282	addl	88(%esp),%edi
283	xorl	4(%ebx),%ebp
284	xorl	8(%ebx),%ecx
285	xorl	12(%ebx),%esi
286	xorl	20(%ebx),%edx
287	xorl	24(%ebx),%edi
288	movl	%ebp,4(%eax)
289	movl	%ecx,8(%eax)
290	movl	%esi,12(%eax)
291	movl	%edx,20(%eax)
292	movl	%edi,24(%eax)
293	movl	28(%esp),%ebp
294	movl	40(%esp),%ecx
295	movl	44(%esp),%esi
296	movl	52(%esp),%edx
297	movl	60(%esp),%edi
298	addl	92(%esp),%ebp
299	addl	104(%esp),%ecx
300	addl	108(%esp),%esi
301	addl	116(%esp),%edx
302	addl	124(%esp),%edi
303	xorl	28(%ebx),%ebp
304	xorl	40(%ebx),%ecx
305	xorl	44(%ebx),%esi
306	xorl	52(%ebx),%edx
307	xorl	60(%ebx),%edi
308	leal	64(%ebx),%ebx
309	movl	%ebp,28(%eax)
310	movl	(%esp),%ebp
311	movl	%ecx,40(%eax)
312	movl	160(%esp),%ecx
313	movl	%esi,44(%eax)
314	movl	%edx,52(%eax)
315	movl	%edi,60(%eax)
316	movl	%ebp,(%eax)
317	leal	64(%eax),%eax
318	subl	$64,%ecx
319	jnz	.L003outer_loop
320	jmp	.L006done
321.L005tail:
322	addl	112(%esp),%edx
323	addl	120(%esp),%edi
324	movl	%eax,(%esp)
325	movl	%ebp,16(%esp)
326	movl	%ecx,32(%esp)
327	movl	%esi,36(%esp)
328	movl	%edx,48(%esp)
329	movl	%edi,56(%esp)
330	movl	4(%esp),%ebp
331	movl	8(%esp),%ecx
332	movl	12(%esp),%esi
333	movl	20(%esp),%edx
334	movl	24(%esp),%edi
335	addl	$857760878,%ebp
336	addl	$2036477234,%ecx
337	addl	$1797285236,%esi
338	addl	84(%esp),%edx
339	addl	88(%esp),%edi
340	movl	%ebp,4(%esp)
341	movl	%ecx,8(%esp)
342	movl	%esi,12(%esp)
343	movl	%edx,20(%esp)
344	movl	%edi,24(%esp)
345	movl	28(%esp),%ebp
346	movl	40(%esp),%ecx
347	movl	44(%esp),%esi
348	movl	52(%esp),%edx
349	movl	60(%esp),%edi
350	addl	92(%esp),%ebp
351	addl	104(%esp),%ecx
352	addl	108(%esp),%esi
353	addl	116(%esp),%edx
354	addl	124(%esp),%edi
355	movl	%ebp,28(%esp)
356	movl	156(%esp),%ebp
357	movl	%ecx,40(%esp)
358	movl	152(%esp),%ecx
359	movl	%esi,44(%esp)
360	xorl	%esi,%esi
361	movl	%edx,52(%esp)
362	movl	%edi,60(%esp)
363	xorl	%eax,%eax
364	xorl	%edx,%edx
365.L007tail_loop:
366	movb	(%esi,%ebp,1),%al
367	movb	(%esp,%esi,1),%dl
368	leal	1(%esi),%esi
369	xorb	%dl,%al
370	movb	%al,-1(%ecx,%esi,1)
371	decl	%ebx
372	jnz	.L007tail_loop
373.L006done:
374	addl	$132,%esp
375.L000no_data:
376	popl	%edi
377	popl	%esi
378	popl	%ebx
379	popl	%ebp
380	ret
381.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
382.globl	ChaCha20_ssse3
383.hidden	ChaCha20_ssse3
384.type	ChaCha20_ssse3,@function
385.align	16
386ChaCha20_ssse3:
387.L_ChaCha20_ssse3_begin:
388	pushl	%ebp
389	pushl	%ebx
390	pushl	%esi
391	pushl	%edi
392.Lssse3_shortcut:
393	movl	20(%esp),%edi
394	movl	24(%esp),%esi
395	movl	28(%esp),%ecx
396	movl	32(%esp),%edx
397	movl	36(%esp),%ebx
398	movl	%esp,%ebp
399	subl	$524,%esp
400	andl	$-64,%esp
401	movl	%ebp,512(%esp)
402	leal	.Lssse3_data-.Lpic_point(%eax),%eax
403	movdqu	(%ebx),%xmm3
404	cmpl	$256,%ecx
405	jb	.L0081x
406	movl	%edx,516(%esp)
407	movl	%ebx,520(%esp)
408	subl	$256,%ecx
409	leal	384(%esp),%ebp
410	movdqu	(%edx),%xmm7
411	pshufd	$0,%xmm3,%xmm0
412	pshufd	$85,%xmm3,%xmm1
413	pshufd	$170,%xmm3,%xmm2
414	pshufd	$255,%xmm3,%xmm3
415	paddd	48(%eax),%xmm0
416	pshufd	$0,%xmm7,%xmm4
417	pshufd	$85,%xmm7,%xmm5
418	psubd	64(%eax),%xmm0
419	pshufd	$170,%xmm7,%xmm6
420	pshufd	$255,%xmm7,%xmm7
421	movdqa	%xmm0,64(%ebp)
422	movdqa	%xmm1,80(%ebp)
423	movdqa	%xmm2,96(%ebp)
424	movdqa	%xmm3,112(%ebp)
425	movdqu	16(%edx),%xmm3
426	movdqa	%xmm4,-64(%ebp)
427	movdqa	%xmm5,-48(%ebp)
428	movdqa	%xmm6,-32(%ebp)
429	movdqa	%xmm7,-16(%ebp)
430	movdqa	32(%eax),%xmm7
431	leal	128(%esp),%ebx
432	pshufd	$0,%xmm3,%xmm0
433	pshufd	$85,%xmm3,%xmm1
434	pshufd	$170,%xmm3,%xmm2
435	pshufd	$255,%xmm3,%xmm3
436	pshufd	$0,%xmm7,%xmm4
437	pshufd	$85,%xmm7,%xmm5
438	pshufd	$170,%xmm7,%xmm6
439	pshufd	$255,%xmm7,%xmm7
440	movdqa	%xmm0,(%ebp)
441	movdqa	%xmm1,16(%ebp)
442	movdqa	%xmm2,32(%ebp)
443	movdqa	%xmm3,48(%ebp)
444	movdqa	%xmm4,-128(%ebp)
445	movdqa	%xmm5,-112(%ebp)
446	movdqa	%xmm6,-96(%ebp)
447	movdqa	%xmm7,-80(%ebp)
448	leal	128(%esi),%esi
449	leal	128(%edi),%edi
450	jmp	.L009outer_loop
451.align	16
452.L009outer_loop:
453	movdqa	-112(%ebp),%xmm1
454	movdqa	-96(%ebp),%xmm2
455	movdqa	-80(%ebp),%xmm3
456	movdqa	-48(%ebp),%xmm5
457	movdqa	-32(%ebp),%xmm6
458	movdqa	-16(%ebp),%xmm7
459	movdqa	%xmm1,-112(%ebx)
460	movdqa	%xmm2,-96(%ebx)
461	movdqa	%xmm3,-80(%ebx)
462	movdqa	%xmm5,-48(%ebx)
463	movdqa	%xmm6,-32(%ebx)
464	movdqa	%xmm7,-16(%ebx)
465	movdqa	32(%ebp),%xmm2
466	movdqa	48(%ebp),%xmm3
467	movdqa	64(%ebp),%xmm4
468	movdqa	80(%ebp),%xmm5
469	movdqa	96(%ebp),%xmm6
470	movdqa	112(%ebp),%xmm7
471	paddd	64(%eax),%xmm4
472	movdqa	%xmm2,32(%ebx)
473	movdqa	%xmm3,48(%ebx)
474	movdqa	%xmm4,64(%ebx)
475	movdqa	%xmm5,80(%ebx)
476	movdqa	%xmm6,96(%ebx)
477	movdqa	%xmm7,112(%ebx)
478	movdqa	%xmm4,64(%ebp)
479	movdqa	-128(%ebp),%xmm0
480	movdqa	%xmm4,%xmm6
481	movdqa	-64(%ebp),%xmm3
482	movdqa	(%ebp),%xmm4
483	movdqa	16(%ebp),%xmm5
484	movl	$10,%edx
485	nop
486.align	16
487.L010loop:
488	paddd	%xmm3,%xmm0
489	movdqa	%xmm3,%xmm2
490	pxor	%xmm0,%xmm6
491	pshufb	(%eax),%xmm6
492	paddd	%xmm6,%xmm4
493	pxor	%xmm4,%xmm2
494	movdqa	-48(%ebx),%xmm3
495	movdqa	%xmm2,%xmm1
496	pslld	$12,%xmm2
497	psrld	$20,%xmm1
498	por	%xmm1,%xmm2
499	movdqa	-112(%ebx),%xmm1
500	paddd	%xmm2,%xmm0
501	movdqa	80(%ebx),%xmm7
502	pxor	%xmm0,%xmm6
503	movdqa	%xmm0,-128(%ebx)
504	pshufb	16(%eax),%xmm6
505	paddd	%xmm6,%xmm4
506	movdqa	%xmm6,64(%ebx)
507	pxor	%xmm4,%xmm2
508	paddd	%xmm3,%xmm1
509	movdqa	%xmm2,%xmm0
510	pslld	$7,%xmm2
511	psrld	$25,%xmm0
512	pxor	%xmm1,%xmm7
513	por	%xmm0,%xmm2
514	movdqa	%xmm4,(%ebx)
515	pshufb	(%eax),%xmm7
516	movdqa	%xmm2,-64(%ebx)
517	paddd	%xmm7,%xmm5
518	movdqa	32(%ebx),%xmm4
519	pxor	%xmm5,%xmm3
520	movdqa	-32(%ebx),%xmm2
521	movdqa	%xmm3,%xmm0
522	pslld	$12,%xmm3
523	psrld	$20,%xmm0
524	por	%xmm0,%xmm3
525	movdqa	-96(%ebx),%xmm0
526	paddd	%xmm3,%xmm1
527	movdqa	96(%ebx),%xmm6
528	pxor	%xmm1,%xmm7
529	movdqa	%xmm1,-112(%ebx)
530	pshufb	16(%eax),%xmm7
531	paddd	%xmm7,%xmm5
532	movdqa	%xmm7,80(%ebx)
533	pxor	%xmm5,%xmm3
534	paddd	%xmm2,%xmm0
535	movdqa	%xmm3,%xmm1
536	pslld	$7,%xmm3
537	psrld	$25,%xmm1
538	pxor	%xmm0,%xmm6
539	por	%xmm1,%xmm3
540	movdqa	%xmm5,16(%ebx)
541	pshufb	(%eax),%xmm6
542	movdqa	%xmm3,-48(%ebx)
543	paddd	%xmm6,%xmm4
544	movdqa	48(%ebx),%xmm5
545	pxor	%xmm4,%xmm2
546	movdqa	-16(%ebx),%xmm3
547	movdqa	%xmm2,%xmm1
548	pslld	$12,%xmm2
549	psrld	$20,%xmm1
550	por	%xmm1,%xmm2
551	movdqa	-80(%ebx),%xmm1
552	paddd	%xmm2,%xmm0
553	movdqa	112(%ebx),%xmm7
554	pxor	%xmm0,%xmm6
555	movdqa	%xmm0,-96(%ebx)
556	pshufb	16(%eax),%xmm6
557	paddd	%xmm6,%xmm4
558	movdqa	%xmm6,96(%ebx)
559	pxor	%xmm4,%xmm2
560	paddd	%xmm3,%xmm1
561	movdqa	%xmm2,%xmm0
562	pslld	$7,%xmm2
563	psrld	$25,%xmm0
564	pxor	%xmm1,%xmm7
565	por	%xmm0,%xmm2
566	pshufb	(%eax),%xmm7
567	movdqa	%xmm2,-32(%ebx)
568	paddd	%xmm7,%xmm5
569	pxor	%xmm5,%xmm3
570	movdqa	-48(%ebx),%xmm2
571	movdqa	%xmm3,%xmm0
572	pslld	$12,%xmm3
573	psrld	$20,%xmm0
574	por	%xmm0,%xmm3
575	movdqa	-128(%ebx),%xmm0
576	paddd	%xmm3,%xmm1
577	pxor	%xmm1,%xmm7
578	movdqa	%xmm1,-80(%ebx)
579	pshufb	16(%eax),%xmm7
580	paddd	%xmm7,%xmm5
581	movdqa	%xmm7,%xmm6
582	pxor	%xmm5,%xmm3
583	paddd	%xmm2,%xmm0
584	movdqa	%xmm3,%xmm1
585	pslld	$7,%xmm3
586	psrld	$25,%xmm1
587	pxor	%xmm0,%xmm6
588	por	%xmm1,%xmm3
589	pshufb	(%eax),%xmm6
590	movdqa	%xmm3,-16(%ebx)
591	paddd	%xmm6,%xmm4
592	pxor	%xmm4,%xmm2
593	movdqa	-32(%ebx),%xmm3
594	movdqa	%xmm2,%xmm1
595	pslld	$12,%xmm2
596	psrld	$20,%xmm1
597	por	%xmm1,%xmm2
598	movdqa	-112(%ebx),%xmm1
599	paddd	%xmm2,%xmm0
600	movdqa	64(%ebx),%xmm7
601	pxor	%xmm0,%xmm6
602	movdqa	%xmm0,-128(%ebx)
603	pshufb	16(%eax),%xmm6
604	paddd	%xmm6,%xmm4
605	movdqa	%xmm6,112(%ebx)
606	pxor	%xmm4,%xmm2
607	paddd	%xmm3,%xmm1
608	movdqa	%xmm2,%xmm0
609	pslld	$7,%xmm2
610	psrld	$25,%xmm0
611	pxor	%xmm1,%xmm7
612	por	%xmm0,%xmm2
613	movdqa	%xmm4,32(%ebx)
614	pshufb	(%eax),%xmm7
615	movdqa	%xmm2,-48(%ebx)
616	paddd	%xmm7,%xmm5
617	movdqa	(%ebx),%xmm4
618	pxor	%xmm5,%xmm3
619	movdqa	-16(%ebx),%xmm2
620	movdqa	%xmm3,%xmm0
621	pslld	$12,%xmm3
622	psrld	$20,%xmm0
623	por	%xmm0,%xmm3
624	movdqa	-96(%ebx),%xmm0
625	paddd	%xmm3,%xmm1
626	movdqa	80(%ebx),%xmm6
627	pxor	%xmm1,%xmm7
628	movdqa	%xmm1,-112(%ebx)
629	pshufb	16(%eax),%xmm7
630	paddd	%xmm7,%xmm5
631	movdqa	%xmm7,64(%ebx)
632	pxor	%xmm5,%xmm3
633	paddd	%xmm2,%xmm0
634	movdqa	%xmm3,%xmm1
635	pslld	$7,%xmm3
636	psrld	$25,%xmm1
637	pxor	%xmm0,%xmm6
638	por	%xmm1,%xmm3
639	movdqa	%xmm5,48(%ebx)
640	pshufb	(%eax),%xmm6
641	movdqa	%xmm3,-32(%ebx)
642	paddd	%xmm6,%xmm4
643	movdqa	16(%ebx),%xmm5
644	pxor	%xmm4,%xmm2
645	movdqa	-64(%ebx),%xmm3
646	movdqa	%xmm2,%xmm1
647	pslld	$12,%xmm2
648	psrld	$20,%xmm1
649	por	%xmm1,%xmm2
650	movdqa	-80(%ebx),%xmm1
651	paddd	%xmm2,%xmm0
652	movdqa	96(%ebx),%xmm7
653	pxor	%xmm0,%xmm6
654	movdqa	%xmm0,-96(%ebx)
655	pshufb	16(%eax),%xmm6
656	paddd	%xmm6,%xmm4
657	movdqa	%xmm6,80(%ebx)
658	pxor	%xmm4,%xmm2
659	paddd	%xmm3,%xmm1
660	movdqa	%xmm2,%xmm0
661	pslld	$7,%xmm2
662	psrld	$25,%xmm0
663	pxor	%xmm1,%xmm7
664	por	%xmm0,%xmm2
665	pshufb	(%eax),%xmm7
666	movdqa	%xmm2,-16(%ebx)
667	paddd	%xmm7,%xmm5
668	pxor	%xmm5,%xmm3
669	movdqa	%xmm3,%xmm0
670	pslld	$12,%xmm3
671	psrld	$20,%xmm0
672	por	%xmm0,%xmm3
673	movdqa	-128(%ebx),%xmm0
674	paddd	%xmm3,%xmm1
675	movdqa	64(%ebx),%xmm6
676	pxor	%xmm1,%xmm7
677	movdqa	%xmm1,-80(%ebx)
678	pshufb	16(%eax),%xmm7
679	paddd	%xmm7,%xmm5
680	movdqa	%xmm7,96(%ebx)
681	pxor	%xmm5,%xmm3
682	movdqa	%xmm3,%xmm1
683	pslld	$7,%xmm3
684	psrld	$25,%xmm1
685	por	%xmm1,%xmm3
686	decl	%edx
687	jnz	.L010loop
688	movdqa	%xmm3,-64(%ebx)
689	movdqa	%xmm4,(%ebx)
690	movdqa	%xmm5,16(%ebx)
691	movdqa	%xmm6,64(%ebx)
692	movdqa	%xmm7,96(%ebx)
693	movdqa	-112(%ebx),%xmm1
694	movdqa	-96(%ebx),%xmm2
695	movdqa	-80(%ebx),%xmm3
696	paddd	-128(%ebp),%xmm0
697	paddd	-112(%ebp),%xmm1
698	paddd	-96(%ebp),%xmm2
699	paddd	-80(%ebp),%xmm3
700	movdqa	%xmm0,%xmm6
701	punpckldq	%xmm1,%xmm0
702	movdqa	%xmm2,%xmm7
703	punpckldq	%xmm3,%xmm2
704	punpckhdq	%xmm1,%xmm6
705	punpckhdq	%xmm3,%xmm7
706	movdqa	%xmm0,%xmm1
707	punpcklqdq	%xmm2,%xmm0
708	movdqa	%xmm6,%xmm3
709	punpcklqdq	%xmm7,%xmm6
710	punpckhqdq	%xmm2,%xmm1
711	punpckhqdq	%xmm7,%xmm3
712	movdqu	-128(%esi),%xmm4
713	movdqu	-64(%esi),%xmm5
714	movdqu	(%esi),%xmm2
715	movdqu	64(%esi),%xmm7
716	leal	16(%esi),%esi
717	pxor	%xmm0,%xmm4
718	movdqa	-64(%ebx),%xmm0
719	pxor	%xmm1,%xmm5
720	movdqa	-48(%ebx),%xmm1
721	pxor	%xmm2,%xmm6
722	movdqa	-32(%ebx),%xmm2
723	pxor	%xmm3,%xmm7
724	movdqa	-16(%ebx),%xmm3
725	movdqu	%xmm4,-128(%edi)
726	movdqu	%xmm5,-64(%edi)
727	movdqu	%xmm6,(%edi)
728	movdqu	%xmm7,64(%edi)
729	leal	16(%edi),%edi
730	paddd	-64(%ebp),%xmm0
731	paddd	-48(%ebp),%xmm1
732	paddd	-32(%ebp),%xmm2
733	paddd	-16(%ebp),%xmm3
734	movdqa	%xmm0,%xmm6
735	punpckldq	%xmm1,%xmm0
736	movdqa	%xmm2,%xmm7
737	punpckldq	%xmm3,%xmm2
738	punpckhdq	%xmm1,%xmm6
739	punpckhdq	%xmm3,%xmm7
740	movdqa	%xmm0,%xmm1
741	punpcklqdq	%xmm2,%xmm0
742	movdqa	%xmm6,%xmm3
743	punpcklqdq	%xmm7,%xmm6
744	punpckhqdq	%xmm2,%xmm1
745	punpckhqdq	%xmm7,%xmm3
746	movdqu	-128(%esi),%xmm4
747	movdqu	-64(%esi),%xmm5
748	movdqu	(%esi),%xmm2
749	movdqu	64(%esi),%xmm7
750	leal	16(%esi),%esi
751	pxor	%xmm0,%xmm4
752	movdqa	(%ebx),%xmm0
753	pxor	%xmm1,%xmm5
754	movdqa	16(%ebx),%xmm1
755	pxor	%xmm2,%xmm6
756	movdqa	32(%ebx),%xmm2
757	pxor	%xmm3,%xmm7
758	movdqa	48(%ebx),%xmm3
759	movdqu	%xmm4,-128(%edi)
760	movdqu	%xmm5,-64(%edi)
761	movdqu	%xmm6,(%edi)
762	movdqu	%xmm7,64(%edi)
763	leal	16(%edi),%edi
764	paddd	(%ebp),%xmm0
765	paddd	16(%ebp),%xmm1
766	paddd	32(%ebp),%xmm2
767	paddd	48(%ebp),%xmm3
768	movdqa	%xmm0,%xmm6
769	punpckldq	%xmm1,%xmm0
770	movdqa	%xmm2,%xmm7
771	punpckldq	%xmm3,%xmm2
772	punpckhdq	%xmm1,%xmm6
773	punpckhdq	%xmm3,%xmm7
774	movdqa	%xmm0,%xmm1
775	punpcklqdq	%xmm2,%xmm0
776	movdqa	%xmm6,%xmm3
777	punpcklqdq	%xmm7,%xmm6
778	punpckhqdq	%xmm2,%xmm1
779	punpckhqdq	%xmm7,%xmm3
780	movdqu	-128(%esi),%xmm4
781	movdqu	-64(%esi),%xmm5
782	movdqu	(%esi),%xmm2
783	movdqu	64(%esi),%xmm7
784	leal	16(%esi),%esi
785	pxor	%xmm0,%xmm4
786	movdqa	64(%ebx),%xmm0
787	pxor	%xmm1,%xmm5
788	movdqa	80(%ebx),%xmm1
789	pxor	%xmm2,%xmm6
790	movdqa	96(%ebx),%xmm2
791	pxor	%xmm3,%xmm7
792	movdqa	112(%ebx),%xmm3
793	movdqu	%xmm4,-128(%edi)
794	movdqu	%xmm5,-64(%edi)
795	movdqu	%xmm6,(%edi)
796	movdqu	%xmm7,64(%edi)
797	leal	16(%edi),%edi
798	paddd	64(%ebp),%xmm0
799	paddd	80(%ebp),%xmm1
800	paddd	96(%ebp),%xmm2
801	paddd	112(%ebp),%xmm3
802	movdqa	%xmm0,%xmm6
803	punpckldq	%xmm1,%xmm0
804	movdqa	%xmm2,%xmm7
805	punpckldq	%xmm3,%xmm2
806	punpckhdq	%xmm1,%xmm6
807	punpckhdq	%xmm3,%xmm7
808	movdqa	%xmm0,%xmm1
809	punpcklqdq	%xmm2,%xmm0
810	movdqa	%xmm6,%xmm3
811	punpcklqdq	%xmm7,%xmm6
812	punpckhqdq	%xmm2,%xmm1
813	punpckhqdq	%xmm7,%xmm3
814	movdqu	-128(%esi),%xmm4
815	movdqu	-64(%esi),%xmm5
816	movdqu	(%esi),%xmm2
817	movdqu	64(%esi),%xmm7
818	leal	208(%esi),%esi
819	pxor	%xmm0,%xmm4
820	pxor	%xmm1,%xmm5
821	pxor	%xmm2,%xmm6
822	pxor	%xmm3,%xmm7
823	movdqu	%xmm4,-128(%edi)
824	movdqu	%xmm5,-64(%edi)
825	movdqu	%xmm6,(%edi)
826	movdqu	%xmm7,64(%edi)
827	leal	208(%edi),%edi
828	subl	$256,%ecx
829	jnc	.L009outer_loop
830	addl	$256,%ecx
831	jz	.L011done
832	movl	520(%esp),%ebx
833	leal	-128(%esi),%esi
834	movl	516(%esp),%edx
835	leal	-128(%edi),%edi
836	movd	64(%ebp),%xmm2
837	movdqu	(%ebx),%xmm3
838	paddd	96(%eax),%xmm2
839	pand	112(%eax),%xmm3
840	por	%xmm2,%xmm3
841.L0081x:
842	movdqa	32(%eax),%xmm0
843	movdqu	(%edx),%xmm1
844	movdqu	16(%edx),%xmm2
845	movdqa	(%eax),%xmm6
846	movdqa	16(%eax),%xmm7
847	movl	%ebp,48(%esp)
848	movdqa	%xmm0,(%esp)
849	movdqa	%xmm1,16(%esp)
850	movdqa	%xmm2,32(%esp)
851	movdqa	%xmm3,48(%esp)
852	movl	$10,%edx
853	jmp	.L012loop1x
854.align	16
855.L013outer1x:
856	movdqa	80(%eax),%xmm3
857	movdqa	(%esp),%xmm0
858	movdqa	16(%esp),%xmm1
859	movdqa	32(%esp),%xmm2
860	paddd	48(%esp),%xmm3
861	movl	$10,%edx
862	movdqa	%xmm3,48(%esp)
863	jmp	.L012loop1x
864.align	16
865.L012loop1x:
866	paddd	%xmm1,%xmm0
867	pxor	%xmm0,%xmm3
868.byte	102,15,56,0,222
869	paddd	%xmm3,%xmm2
870	pxor	%xmm2,%xmm1
871	movdqa	%xmm1,%xmm4
872	psrld	$20,%xmm1
873	pslld	$12,%xmm4
874	por	%xmm4,%xmm1
875	paddd	%xmm1,%xmm0
876	pxor	%xmm0,%xmm3
877.byte	102,15,56,0,223
878	paddd	%xmm3,%xmm2
879	pxor	%xmm2,%xmm1
880	movdqa	%xmm1,%xmm4
881	psrld	$25,%xmm1
882	pslld	$7,%xmm4
883	por	%xmm4,%xmm1
884	pshufd	$78,%xmm2,%xmm2
885	pshufd	$57,%xmm1,%xmm1
886	pshufd	$147,%xmm3,%xmm3
887	nop
888	paddd	%xmm1,%xmm0
889	pxor	%xmm0,%xmm3
890.byte	102,15,56,0,222
891	paddd	%xmm3,%xmm2
892	pxor	%xmm2,%xmm1
893	movdqa	%xmm1,%xmm4
894	psrld	$20,%xmm1
895	pslld	$12,%xmm4
896	por	%xmm4,%xmm1
897	paddd	%xmm1,%xmm0
898	pxor	%xmm0,%xmm3
899.byte	102,15,56,0,223
900	paddd	%xmm3,%xmm2
901	pxor	%xmm2,%xmm1
902	movdqa	%xmm1,%xmm4
903	psrld	$25,%xmm1
904	pslld	$7,%xmm4
905	por	%xmm4,%xmm1
906	pshufd	$78,%xmm2,%xmm2
907	pshufd	$147,%xmm1,%xmm1
908	pshufd	$57,%xmm3,%xmm3
909	decl	%edx
910	jnz	.L012loop1x
911	paddd	(%esp),%xmm0
912	paddd	16(%esp),%xmm1
913	paddd	32(%esp),%xmm2
914	paddd	48(%esp),%xmm3
915	cmpl	$64,%ecx
916	jb	.L014tail
917	movdqu	(%esi),%xmm4
918	movdqu	16(%esi),%xmm5
919	pxor	%xmm4,%xmm0
920	movdqu	32(%esi),%xmm4
921	pxor	%xmm5,%xmm1
922	movdqu	48(%esi),%xmm5
923	pxor	%xmm4,%xmm2
924	pxor	%xmm5,%xmm3
925	leal	64(%esi),%esi
926	movdqu	%xmm0,(%edi)
927	movdqu	%xmm1,16(%edi)
928	movdqu	%xmm2,32(%edi)
929	movdqu	%xmm3,48(%edi)
930	leal	64(%edi),%edi
931	subl	$64,%ecx
932	jnz	.L013outer1x
933	jmp	.L011done
934.L014tail:
935	movdqa	%xmm0,(%esp)
936	movdqa	%xmm1,16(%esp)
937	movdqa	%xmm2,32(%esp)
938	movdqa	%xmm3,48(%esp)
939	xorl	%eax,%eax
940	xorl	%edx,%edx
941	xorl	%ebp,%ebp
942.L015tail_loop:
943	movb	(%esp,%ebp,1),%al
944	movb	(%esi,%ebp,1),%dl
945	leal	1(%ebp),%ebp
946	xorb	%dl,%al
947	movb	%al,-1(%edi,%ebp,1)
948	decl	%ecx
949	jnz	.L015tail_loop
950.L011done:
951	movl	512(%esp),%esp
952	popl	%edi
953	popl	%esi
954	popl	%ebx
955	popl	%ebp
956	ret
957.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
958.align	64
959.Lssse3_data:
960.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
961.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
962.long	1634760805,857760878,2036477234,1797285236
963.long	0,1,2,3
964.long	4,4,4,4
965.long	1,0,0,0
966.long	4,0,0,0
967.long	0,-1,-1,-1
968.align	64
969.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
970.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
971.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
972.byte	114,103,62,0
973#endif  // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__)
974