• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__i386__)
5#include "ring_core_generated/prefix_symbols_asm.h"
6.text
7.globl	ChaCha20_ctr32
8.hidden	ChaCha20_ctr32
9.type	ChaCha20_ctr32,@function
10.align	16
11ChaCha20_ctr32:
12.L_ChaCha20_ctr32_begin:
13	pushl	%ebp
14	pushl	%ebx
15	pushl	%esi
16	pushl	%edi
17	xorl	%eax,%eax
18	cmpl	28(%esp),%eax
19	je	.L000no_data
20	call	.Lpic_point
21.Lpic_point:
22	popl	%eax
23	leal	OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
24	testl	$16777216,(%ebp)
25	jz	.L001x86
26	testl	$512,4(%ebp)
27	jz	.L001x86
28	jmp	.Lssse3_shortcut
29.L001x86:
30	movl	32(%esp),%esi
31	movl	36(%esp),%edi
32	subl	$132,%esp
33	movl	(%esi),%eax
34	movl	4(%esi),%ebx
35	movl	8(%esi),%ecx
36	movl	12(%esi),%edx
37	movl	%eax,80(%esp)
38	movl	%ebx,84(%esp)
39	movl	%ecx,88(%esp)
40	movl	%edx,92(%esp)
41	movl	16(%esi),%eax
42	movl	20(%esi),%ebx
43	movl	24(%esi),%ecx
44	movl	28(%esi),%edx
45	movl	%eax,96(%esp)
46	movl	%ebx,100(%esp)
47	movl	%ecx,104(%esp)
48	movl	%edx,108(%esp)
49	movl	(%edi),%eax
50	movl	4(%edi),%ebx
51	movl	8(%edi),%ecx
52	movl	12(%edi),%edx
53	subl	$1,%eax
54	movl	%eax,112(%esp)
55	movl	%ebx,116(%esp)
56	movl	%ecx,120(%esp)
57	movl	%edx,124(%esp)
58	jmp	.L002entry
59.align	16
60.L003outer_loop:
61	movl	%ebx,156(%esp)
62	movl	%eax,152(%esp)
63	movl	%ecx,160(%esp)
64.L002entry:
65	movl	$1634760805,%eax
66	movl	$857760878,4(%esp)
67	movl	$2036477234,8(%esp)
68	movl	$1797285236,12(%esp)
69	movl	84(%esp),%ebx
70	movl	88(%esp),%ebp
71	movl	104(%esp),%ecx
72	movl	108(%esp),%esi
73	movl	116(%esp),%edx
74	movl	120(%esp),%edi
75	movl	%ebx,20(%esp)
76	movl	%ebp,24(%esp)
77	movl	%ecx,40(%esp)
78	movl	%esi,44(%esp)
79	movl	%edx,52(%esp)
80	movl	%edi,56(%esp)
81	movl	92(%esp),%ebx
82	movl	124(%esp),%edi
83	movl	112(%esp),%edx
84	movl	80(%esp),%ebp
85	movl	96(%esp),%ecx
86	movl	100(%esp),%esi
87	addl	$1,%edx
88	movl	%ebx,28(%esp)
89	movl	%edi,60(%esp)
90	movl	%edx,112(%esp)
91	movl	$10,%ebx
92	jmp	.L004loop
93.align	16
94.L004loop:
95	addl	%ebp,%eax
96	movl	%ebx,128(%esp)
97	movl	%ebp,%ebx
98	xorl	%eax,%edx
99	roll	$16,%edx
100	addl	%edx,%ecx
101	xorl	%ecx,%ebx
102	movl	52(%esp),%edi
103	roll	$12,%ebx
104	movl	20(%esp),%ebp
105	addl	%ebx,%eax
106	xorl	%eax,%edx
107	movl	%eax,(%esp)
108	roll	$8,%edx
109	movl	4(%esp),%eax
110	addl	%edx,%ecx
111	movl	%edx,48(%esp)
112	xorl	%ecx,%ebx
113	addl	%ebp,%eax
114	roll	$7,%ebx
115	xorl	%eax,%edi
116	movl	%ecx,32(%esp)
117	roll	$16,%edi
118	movl	%ebx,16(%esp)
119	addl	%edi,%esi
120	movl	40(%esp),%ecx
121	xorl	%esi,%ebp
122	movl	56(%esp),%edx
123	roll	$12,%ebp
124	movl	24(%esp),%ebx
125	addl	%ebp,%eax
126	xorl	%eax,%edi
127	movl	%eax,4(%esp)
128	roll	$8,%edi
129	movl	8(%esp),%eax
130	addl	%edi,%esi
131	movl	%edi,52(%esp)
132	xorl	%esi,%ebp
133	addl	%ebx,%eax
134	roll	$7,%ebp
135	xorl	%eax,%edx
136	movl	%esi,36(%esp)
137	roll	$16,%edx
138	movl	%ebp,20(%esp)
139	addl	%edx,%ecx
140	movl	44(%esp),%esi
141	xorl	%ecx,%ebx
142	movl	60(%esp),%edi
143	roll	$12,%ebx
144	movl	28(%esp),%ebp
145	addl	%ebx,%eax
146	xorl	%eax,%edx
147	movl	%eax,8(%esp)
148	roll	$8,%edx
149	movl	12(%esp),%eax
150	addl	%edx,%ecx
151	movl	%edx,56(%esp)
152	xorl	%ecx,%ebx
153	addl	%ebp,%eax
154	roll	$7,%ebx
155	xorl	%eax,%edi
156	roll	$16,%edi
157	movl	%ebx,24(%esp)
158	addl	%edi,%esi
159	xorl	%esi,%ebp
160	roll	$12,%ebp
161	movl	20(%esp),%ebx
162	addl	%ebp,%eax
163	xorl	%eax,%edi
164	movl	%eax,12(%esp)
165	roll	$8,%edi
166	movl	(%esp),%eax
167	addl	%edi,%esi
168	movl	%edi,%edx
169	xorl	%esi,%ebp
170	addl	%ebx,%eax
171	roll	$7,%ebp
172	xorl	%eax,%edx
173	roll	$16,%edx
174	movl	%ebp,28(%esp)
175	addl	%edx,%ecx
176	xorl	%ecx,%ebx
177	movl	48(%esp),%edi
178	roll	$12,%ebx
179	movl	24(%esp),%ebp
180	addl	%ebx,%eax
181	xorl	%eax,%edx
182	movl	%eax,(%esp)
183	roll	$8,%edx
184	movl	4(%esp),%eax
185	addl	%edx,%ecx
186	movl	%edx,60(%esp)
187	xorl	%ecx,%ebx
188	addl	%ebp,%eax
189	roll	$7,%ebx
190	xorl	%eax,%edi
191	movl	%ecx,40(%esp)
192	roll	$16,%edi
193	movl	%ebx,20(%esp)
194	addl	%edi,%esi
195	movl	32(%esp),%ecx
196	xorl	%esi,%ebp
197	movl	52(%esp),%edx
198	roll	$12,%ebp
199	movl	28(%esp),%ebx
200	addl	%ebp,%eax
201	xorl	%eax,%edi
202	movl	%eax,4(%esp)
203	roll	$8,%edi
204	movl	8(%esp),%eax
205	addl	%edi,%esi
206	movl	%edi,48(%esp)
207	xorl	%esi,%ebp
208	addl	%ebx,%eax
209	roll	$7,%ebp
210	xorl	%eax,%edx
211	movl	%esi,44(%esp)
212	roll	$16,%edx
213	movl	%ebp,24(%esp)
214	addl	%edx,%ecx
215	movl	36(%esp),%esi
216	xorl	%ecx,%ebx
217	movl	56(%esp),%edi
218	roll	$12,%ebx
219	movl	16(%esp),%ebp
220	addl	%ebx,%eax
221	xorl	%eax,%edx
222	movl	%eax,8(%esp)
223	roll	$8,%edx
224	movl	12(%esp),%eax
225	addl	%edx,%ecx
226	movl	%edx,52(%esp)
227	xorl	%ecx,%ebx
228	addl	%ebp,%eax
229	roll	$7,%ebx
230	xorl	%eax,%edi
231	roll	$16,%edi
232	movl	%ebx,28(%esp)
233	addl	%edi,%esi
234	xorl	%esi,%ebp
235	movl	48(%esp),%edx
236	roll	$12,%ebp
237	movl	128(%esp),%ebx
238	addl	%ebp,%eax
239	xorl	%eax,%edi
240	movl	%eax,12(%esp)
241	roll	$8,%edi
242	movl	(%esp),%eax
243	addl	%edi,%esi
244	movl	%edi,56(%esp)
245	xorl	%esi,%ebp
246	roll	$7,%ebp
247	decl	%ebx
248	jnz	.L004loop
249	movl	160(%esp),%ebx
250	addl	$1634760805,%eax
251	addl	80(%esp),%ebp
252	addl	96(%esp),%ecx
253	addl	100(%esp),%esi
254	cmpl	$64,%ebx
255	jb	.L005tail
256	movl	156(%esp),%ebx
257	addl	112(%esp),%edx
258	addl	120(%esp),%edi
259	xorl	(%ebx),%eax
260	xorl	16(%ebx),%ebp
261	movl	%eax,(%esp)
262	movl	152(%esp),%eax
263	xorl	32(%ebx),%ecx
264	xorl	36(%ebx),%esi
265	xorl	48(%ebx),%edx
266	xorl	56(%ebx),%edi
267	movl	%ebp,16(%eax)
268	movl	%ecx,32(%eax)
269	movl	%esi,36(%eax)
270	movl	%edx,48(%eax)
271	movl	%edi,56(%eax)
272	movl	4(%esp),%ebp
273	movl	8(%esp),%ecx
274	movl	12(%esp),%esi
275	movl	20(%esp),%edx
276	movl	24(%esp),%edi
277	addl	$857760878,%ebp
278	addl	$2036477234,%ecx
279	addl	$1797285236,%esi
280	addl	84(%esp),%edx
281	addl	88(%esp),%edi
282	xorl	4(%ebx),%ebp
283	xorl	8(%ebx),%ecx
284	xorl	12(%ebx),%esi
285	xorl	20(%ebx),%edx
286	xorl	24(%ebx),%edi
287	movl	%ebp,4(%eax)
288	movl	%ecx,8(%eax)
289	movl	%esi,12(%eax)
290	movl	%edx,20(%eax)
291	movl	%edi,24(%eax)
292	movl	28(%esp),%ebp
293	movl	40(%esp),%ecx
294	movl	44(%esp),%esi
295	movl	52(%esp),%edx
296	movl	60(%esp),%edi
297	addl	92(%esp),%ebp
298	addl	104(%esp),%ecx
299	addl	108(%esp),%esi
300	addl	116(%esp),%edx
301	addl	124(%esp),%edi
302	xorl	28(%ebx),%ebp
303	xorl	40(%ebx),%ecx
304	xorl	44(%ebx),%esi
305	xorl	52(%ebx),%edx
306	xorl	60(%ebx),%edi
307	leal	64(%ebx),%ebx
308	movl	%ebp,28(%eax)
309	movl	(%esp),%ebp
310	movl	%ecx,40(%eax)
311	movl	160(%esp),%ecx
312	movl	%esi,44(%eax)
313	movl	%edx,52(%eax)
314	movl	%edi,60(%eax)
315	movl	%ebp,(%eax)
316	leal	64(%eax),%eax
317	subl	$64,%ecx
318	jnz	.L003outer_loop
319	jmp	.L006done
320.L005tail:
321	addl	112(%esp),%edx
322	addl	120(%esp),%edi
323	movl	%eax,(%esp)
324	movl	%ebp,16(%esp)
325	movl	%ecx,32(%esp)
326	movl	%esi,36(%esp)
327	movl	%edx,48(%esp)
328	movl	%edi,56(%esp)
329	movl	4(%esp),%ebp
330	movl	8(%esp),%ecx
331	movl	12(%esp),%esi
332	movl	20(%esp),%edx
333	movl	24(%esp),%edi
334	addl	$857760878,%ebp
335	addl	$2036477234,%ecx
336	addl	$1797285236,%esi
337	addl	84(%esp),%edx
338	addl	88(%esp),%edi
339	movl	%ebp,4(%esp)
340	movl	%ecx,8(%esp)
341	movl	%esi,12(%esp)
342	movl	%edx,20(%esp)
343	movl	%edi,24(%esp)
344	movl	28(%esp),%ebp
345	movl	40(%esp),%ecx
346	movl	44(%esp),%esi
347	movl	52(%esp),%edx
348	movl	60(%esp),%edi
349	addl	92(%esp),%ebp
350	addl	104(%esp),%ecx
351	addl	108(%esp),%esi
352	addl	116(%esp),%edx
353	addl	124(%esp),%edi
354	movl	%ebp,28(%esp)
355	movl	156(%esp),%ebp
356	movl	%ecx,40(%esp)
357	movl	152(%esp),%ecx
358	movl	%esi,44(%esp)
359	xorl	%esi,%esi
360	movl	%edx,52(%esp)
361	movl	%edi,60(%esp)
362	xorl	%eax,%eax
363	xorl	%edx,%edx
364.L007tail_loop:
365	movb	(%esi,%ebp,1),%al
366	movb	(%esp,%esi,1),%dl
367	leal	1(%esi),%esi
368	xorb	%dl,%al
369	movb	%al,-1(%ecx,%esi,1)
370	decl	%ebx
371	jnz	.L007tail_loop
372.L006done:
373	addl	$132,%esp
374.L000no_data:
375	popl	%edi
376	popl	%esi
377	popl	%ebx
378	popl	%ebp
379	ret
380.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
381.hidden	_ChaCha20_ssse3
382.type	_ChaCha20_ssse3,@function
383.align	16
384_ChaCha20_ssse3:
385	pushl	%ebp
386	pushl	%ebx
387	pushl	%esi
388	pushl	%edi
389.Lssse3_shortcut:
390	movl	20(%esp),%edi
391	movl	24(%esp),%esi
392	movl	28(%esp),%ecx
393	movl	32(%esp),%edx
394	movl	36(%esp),%ebx
395	movl	%esp,%ebp
396	subl	$524,%esp
397	andl	$-64,%esp
398	movl	%ebp,512(%esp)
399	leal	.Lssse3_data-.Lpic_point(%eax),%eax
400	movdqu	(%ebx),%xmm3
401	cmpl	$256,%ecx
402	jb	.L0081x
403	movl	%edx,516(%esp)
404	movl	%ebx,520(%esp)
405	subl	$256,%ecx
406	leal	384(%esp),%ebp
407	movdqu	(%edx),%xmm7
408	pshufd	$0,%xmm3,%xmm0
409	pshufd	$85,%xmm3,%xmm1
410	pshufd	$170,%xmm3,%xmm2
411	pshufd	$255,%xmm3,%xmm3
412	paddd	48(%eax),%xmm0
413	pshufd	$0,%xmm7,%xmm4
414	pshufd	$85,%xmm7,%xmm5
415	psubd	64(%eax),%xmm0
416	pshufd	$170,%xmm7,%xmm6
417	pshufd	$255,%xmm7,%xmm7
418	movdqa	%xmm0,64(%ebp)
419	movdqa	%xmm1,80(%ebp)
420	movdqa	%xmm2,96(%ebp)
421	movdqa	%xmm3,112(%ebp)
422	movdqu	16(%edx),%xmm3
423	movdqa	%xmm4,-64(%ebp)
424	movdqa	%xmm5,-48(%ebp)
425	movdqa	%xmm6,-32(%ebp)
426	movdqa	%xmm7,-16(%ebp)
427	movdqa	32(%eax),%xmm7
428	leal	128(%esp),%ebx
429	pshufd	$0,%xmm3,%xmm0
430	pshufd	$85,%xmm3,%xmm1
431	pshufd	$170,%xmm3,%xmm2
432	pshufd	$255,%xmm3,%xmm3
433	pshufd	$0,%xmm7,%xmm4
434	pshufd	$85,%xmm7,%xmm5
435	pshufd	$170,%xmm7,%xmm6
436	pshufd	$255,%xmm7,%xmm7
437	movdqa	%xmm0,(%ebp)
438	movdqa	%xmm1,16(%ebp)
439	movdqa	%xmm2,32(%ebp)
440	movdqa	%xmm3,48(%ebp)
441	movdqa	%xmm4,-128(%ebp)
442	movdqa	%xmm5,-112(%ebp)
443	movdqa	%xmm6,-96(%ebp)
444	movdqa	%xmm7,-80(%ebp)
445	leal	128(%esi),%esi
446	leal	128(%edi),%edi
447	jmp	.L009outer_loop
448.align	16
449.L009outer_loop:
450	movdqa	-112(%ebp),%xmm1
451	movdqa	-96(%ebp),%xmm2
452	movdqa	-80(%ebp),%xmm3
453	movdqa	-48(%ebp),%xmm5
454	movdqa	-32(%ebp),%xmm6
455	movdqa	-16(%ebp),%xmm7
456	movdqa	%xmm1,-112(%ebx)
457	movdqa	%xmm2,-96(%ebx)
458	movdqa	%xmm3,-80(%ebx)
459	movdqa	%xmm5,-48(%ebx)
460	movdqa	%xmm6,-32(%ebx)
461	movdqa	%xmm7,-16(%ebx)
462	movdqa	32(%ebp),%xmm2
463	movdqa	48(%ebp),%xmm3
464	movdqa	64(%ebp),%xmm4
465	movdqa	80(%ebp),%xmm5
466	movdqa	96(%ebp),%xmm6
467	movdqa	112(%ebp),%xmm7
468	paddd	64(%eax),%xmm4
469	movdqa	%xmm2,32(%ebx)
470	movdqa	%xmm3,48(%ebx)
471	movdqa	%xmm4,64(%ebx)
472	movdqa	%xmm5,80(%ebx)
473	movdqa	%xmm6,96(%ebx)
474	movdqa	%xmm7,112(%ebx)
475	movdqa	%xmm4,64(%ebp)
476	movdqa	-128(%ebp),%xmm0
477	movdqa	%xmm4,%xmm6
478	movdqa	-64(%ebp),%xmm3
479	movdqa	(%ebp),%xmm4
480	movdqa	16(%ebp),%xmm5
481	movl	$10,%edx
482	nop
483.align	16
484.L010loop:
485	paddd	%xmm3,%xmm0
486	movdqa	%xmm3,%xmm2
487	pxor	%xmm0,%xmm6
488	pshufb	(%eax),%xmm6
489	paddd	%xmm6,%xmm4
490	pxor	%xmm4,%xmm2
491	movdqa	-48(%ebx),%xmm3
492	movdqa	%xmm2,%xmm1
493	pslld	$12,%xmm2
494	psrld	$20,%xmm1
495	por	%xmm1,%xmm2
496	movdqa	-112(%ebx),%xmm1
497	paddd	%xmm2,%xmm0
498	movdqa	80(%ebx),%xmm7
499	pxor	%xmm0,%xmm6
500	movdqa	%xmm0,-128(%ebx)
501	pshufb	16(%eax),%xmm6
502	paddd	%xmm6,%xmm4
503	movdqa	%xmm6,64(%ebx)
504	pxor	%xmm4,%xmm2
505	paddd	%xmm3,%xmm1
506	movdqa	%xmm2,%xmm0
507	pslld	$7,%xmm2
508	psrld	$25,%xmm0
509	pxor	%xmm1,%xmm7
510	por	%xmm0,%xmm2
511	movdqa	%xmm4,(%ebx)
512	pshufb	(%eax),%xmm7
513	movdqa	%xmm2,-64(%ebx)
514	paddd	%xmm7,%xmm5
515	movdqa	32(%ebx),%xmm4
516	pxor	%xmm5,%xmm3
517	movdqa	-32(%ebx),%xmm2
518	movdqa	%xmm3,%xmm0
519	pslld	$12,%xmm3
520	psrld	$20,%xmm0
521	por	%xmm0,%xmm3
522	movdqa	-96(%ebx),%xmm0
523	paddd	%xmm3,%xmm1
524	movdqa	96(%ebx),%xmm6
525	pxor	%xmm1,%xmm7
526	movdqa	%xmm1,-112(%ebx)
527	pshufb	16(%eax),%xmm7
528	paddd	%xmm7,%xmm5
529	movdqa	%xmm7,80(%ebx)
530	pxor	%xmm5,%xmm3
531	paddd	%xmm2,%xmm0
532	movdqa	%xmm3,%xmm1
533	pslld	$7,%xmm3
534	psrld	$25,%xmm1
535	pxor	%xmm0,%xmm6
536	por	%xmm1,%xmm3
537	movdqa	%xmm5,16(%ebx)
538	pshufb	(%eax),%xmm6
539	movdqa	%xmm3,-48(%ebx)
540	paddd	%xmm6,%xmm4
541	movdqa	48(%ebx),%xmm5
542	pxor	%xmm4,%xmm2
543	movdqa	-16(%ebx),%xmm3
544	movdqa	%xmm2,%xmm1
545	pslld	$12,%xmm2
546	psrld	$20,%xmm1
547	por	%xmm1,%xmm2
548	movdqa	-80(%ebx),%xmm1
549	paddd	%xmm2,%xmm0
550	movdqa	112(%ebx),%xmm7
551	pxor	%xmm0,%xmm6
552	movdqa	%xmm0,-96(%ebx)
553	pshufb	16(%eax),%xmm6
554	paddd	%xmm6,%xmm4
555	movdqa	%xmm6,96(%ebx)
556	pxor	%xmm4,%xmm2
557	paddd	%xmm3,%xmm1
558	movdqa	%xmm2,%xmm0
559	pslld	$7,%xmm2
560	psrld	$25,%xmm0
561	pxor	%xmm1,%xmm7
562	por	%xmm0,%xmm2
563	pshufb	(%eax),%xmm7
564	movdqa	%xmm2,-32(%ebx)
565	paddd	%xmm7,%xmm5
566	pxor	%xmm5,%xmm3
567	movdqa	-48(%ebx),%xmm2
568	movdqa	%xmm3,%xmm0
569	pslld	$12,%xmm3
570	psrld	$20,%xmm0
571	por	%xmm0,%xmm3
572	movdqa	-128(%ebx),%xmm0
573	paddd	%xmm3,%xmm1
574	pxor	%xmm1,%xmm7
575	movdqa	%xmm1,-80(%ebx)
576	pshufb	16(%eax),%xmm7
577	paddd	%xmm7,%xmm5
578	movdqa	%xmm7,%xmm6
579	pxor	%xmm5,%xmm3
580	paddd	%xmm2,%xmm0
581	movdqa	%xmm3,%xmm1
582	pslld	$7,%xmm3
583	psrld	$25,%xmm1
584	pxor	%xmm0,%xmm6
585	por	%xmm1,%xmm3
586	pshufb	(%eax),%xmm6
587	movdqa	%xmm3,-16(%ebx)
588	paddd	%xmm6,%xmm4
589	pxor	%xmm4,%xmm2
590	movdqa	-32(%ebx),%xmm3
591	movdqa	%xmm2,%xmm1
592	pslld	$12,%xmm2
593	psrld	$20,%xmm1
594	por	%xmm1,%xmm2
595	movdqa	-112(%ebx),%xmm1
596	paddd	%xmm2,%xmm0
597	movdqa	64(%ebx),%xmm7
598	pxor	%xmm0,%xmm6
599	movdqa	%xmm0,-128(%ebx)
600	pshufb	16(%eax),%xmm6
601	paddd	%xmm6,%xmm4
602	movdqa	%xmm6,112(%ebx)
603	pxor	%xmm4,%xmm2
604	paddd	%xmm3,%xmm1
605	movdqa	%xmm2,%xmm0
606	pslld	$7,%xmm2
607	psrld	$25,%xmm0
608	pxor	%xmm1,%xmm7
609	por	%xmm0,%xmm2
610	movdqa	%xmm4,32(%ebx)
611	pshufb	(%eax),%xmm7
612	movdqa	%xmm2,-48(%ebx)
613	paddd	%xmm7,%xmm5
614	movdqa	(%ebx),%xmm4
615	pxor	%xmm5,%xmm3
616	movdqa	-16(%ebx),%xmm2
617	movdqa	%xmm3,%xmm0
618	pslld	$12,%xmm3
619	psrld	$20,%xmm0
620	por	%xmm0,%xmm3
621	movdqa	-96(%ebx),%xmm0
622	paddd	%xmm3,%xmm1
623	movdqa	80(%ebx),%xmm6
624	pxor	%xmm1,%xmm7
625	movdqa	%xmm1,-112(%ebx)
626	pshufb	16(%eax),%xmm7
627	paddd	%xmm7,%xmm5
628	movdqa	%xmm7,64(%ebx)
629	pxor	%xmm5,%xmm3
630	paddd	%xmm2,%xmm0
631	movdqa	%xmm3,%xmm1
632	pslld	$7,%xmm3
633	psrld	$25,%xmm1
634	pxor	%xmm0,%xmm6
635	por	%xmm1,%xmm3
636	movdqa	%xmm5,48(%ebx)
637	pshufb	(%eax),%xmm6
638	movdqa	%xmm3,-32(%ebx)
639	paddd	%xmm6,%xmm4
640	movdqa	16(%ebx),%xmm5
641	pxor	%xmm4,%xmm2
642	movdqa	-64(%ebx),%xmm3
643	movdqa	%xmm2,%xmm1
644	pslld	$12,%xmm2
645	psrld	$20,%xmm1
646	por	%xmm1,%xmm2
647	movdqa	-80(%ebx),%xmm1
648	paddd	%xmm2,%xmm0
649	movdqa	96(%ebx),%xmm7
650	pxor	%xmm0,%xmm6
651	movdqa	%xmm0,-96(%ebx)
652	pshufb	16(%eax),%xmm6
653	paddd	%xmm6,%xmm4
654	movdqa	%xmm6,80(%ebx)
655	pxor	%xmm4,%xmm2
656	paddd	%xmm3,%xmm1
657	movdqa	%xmm2,%xmm0
658	pslld	$7,%xmm2
659	psrld	$25,%xmm0
660	pxor	%xmm1,%xmm7
661	por	%xmm0,%xmm2
662	pshufb	(%eax),%xmm7
663	movdqa	%xmm2,-16(%ebx)
664	paddd	%xmm7,%xmm5
665	pxor	%xmm5,%xmm3
666	movdqa	%xmm3,%xmm0
667	pslld	$12,%xmm3
668	psrld	$20,%xmm0
669	por	%xmm0,%xmm3
670	movdqa	-128(%ebx),%xmm0
671	paddd	%xmm3,%xmm1
672	movdqa	64(%ebx),%xmm6
673	pxor	%xmm1,%xmm7
674	movdqa	%xmm1,-80(%ebx)
675	pshufb	16(%eax),%xmm7
676	paddd	%xmm7,%xmm5
677	movdqa	%xmm7,96(%ebx)
678	pxor	%xmm5,%xmm3
679	movdqa	%xmm3,%xmm1
680	pslld	$7,%xmm3
681	psrld	$25,%xmm1
682	por	%xmm1,%xmm3
683	decl	%edx
684	jnz	.L010loop
685	movdqa	%xmm3,-64(%ebx)
686	movdqa	%xmm4,(%ebx)
687	movdqa	%xmm5,16(%ebx)
688	movdqa	%xmm6,64(%ebx)
689	movdqa	%xmm7,96(%ebx)
690	movdqa	-112(%ebx),%xmm1
691	movdqa	-96(%ebx),%xmm2
692	movdqa	-80(%ebx),%xmm3
693	paddd	-128(%ebp),%xmm0
694	paddd	-112(%ebp),%xmm1
695	paddd	-96(%ebp),%xmm2
696	paddd	-80(%ebp),%xmm3
697	movdqa	%xmm0,%xmm6
698	punpckldq	%xmm1,%xmm0
699	movdqa	%xmm2,%xmm7
700	punpckldq	%xmm3,%xmm2
701	punpckhdq	%xmm1,%xmm6
702	punpckhdq	%xmm3,%xmm7
703	movdqa	%xmm0,%xmm1
704	punpcklqdq	%xmm2,%xmm0
705	movdqa	%xmm6,%xmm3
706	punpcklqdq	%xmm7,%xmm6
707	punpckhqdq	%xmm2,%xmm1
708	punpckhqdq	%xmm7,%xmm3
709	movdqu	-128(%esi),%xmm4
710	movdqu	-64(%esi),%xmm5
711	movdqu	(%esi),%xmm2
712	movdqu	64(%esi),%xmm7
713	leal	16(%esi),%esi
714	pxor	%xmm0,%xmm4
715	movdqa	-64(%ebx),%xmm0
716	pxor	%xmm1,%xmm5
717	movdqa	-48(%ebx),%xmm1
718	pxor	%xmm2,%xmm6
719	movdqa	-32(%ebx),%xmm2
720	pxor	%xmm3,%xmm7
721	movdqa	-16(%ebx),%xmm3
722	movdqu	%xmm4,-128(%edi)
723	movdqu	%xmm5,-64(%edi)
724	movdqu	%xmm6,(%edi)
725	movdqu	%xmm7,64(%edi)
726	leal	16(%edi),%edi
727	paddd	-64(%ebp),%xmm0
728	paddd	-48(%ebp),%xmm1
729	paddd	-32(%ebp),%xmm2
730	paddd	-16(%ebp),%xmm3
731	movdqa	%xmm0,%xmm6
732	punpckldq	%xmm1,%xmm0
733	movdqa	%xmm2,%xmm7
734	punpckldq	%xmm3,%xmm2
735	punpckhdq	%xmm1,%xmm6
736	punpckhdq	%xmm3,%xmm7
737	movdqa	%xmm0,%xmm1
738	punpcklqdq	%xmm2,%xmm0
739	movdqa	%xmm6,%xmm3
740	punpcklqdq	%xmm7,%xmm6
741	punpckhqdq	%xmm2,%xmm1
742	punpckhqdq	%xmm7,%xmm3
743	movdqu	-128(%esi),%xmm4
744	movdqu	-64(%esi),%xmm5
745	movdqu	(%esi),%xmm2
746	movdqu	64(%esi),%xmm7
747	leal	16(%esi),%esi
748	pxor	%xmm0,%xmm4
749	movdqa	(%ebx),%xmm0
750	pxor	%xmm1,%xmm5
751	movdqa	16(%ebx),%xmm1
752	pxor	%xmm2,%xmm6
753	movdqa	32(%ebx),%xmm2
754	pxor	%xmm3,%xmm7
755	movdqa	48(%ebx),%xmm3
756	movdqu	%xmm4,-128(%edi)
757	movdqu	%xmm5,-64(%edi)
758	movdqu	%xmm6,(%edi)
759	movdqu	%xmm7,64(%edi)
760	leal	16(%edi),%edi
761	paddd	(%ebp),%xmm0
762	paddd	16(%ebp),%xmm1
763	paddd	32(%ebp),%xmm2
764	paddd	48(%ebp),%xmm3
765	movdqa	%xmm0,%xmm6
766	punpckldq	%xmm1,%xmm0
767	movdqa	%xmm2,%xmm7
768	punpckldq	%xmm3,%xmm2
769	punpckhdq	%xmm1,%xmm6
770	punpckhdq	%xmm3,%xmm7
771	movdqa	%xmm0,%xmm1
772	punpcklqdq	%xmm2,%xmm0
773	movdqa	%xmm6,%xmm3
774	punpcklqdq	%xmm7,%xmm6
775	punpckhqdq	%xmm2,%xmm1
776	punpckhqdq	%xmm7,%xmm3
777	movdqu	-128(%esi),%xmm4
778	movdqu	-64(%esi),%xmm5
779	movdqu	(%esi),%xmm2
780	movdqu	64(%esi),%xmm7
781	leal	16(%esi),%esi
782	pxor	%xmm0,%xmm4
783	movdqa	64(%ebx),%xmm0
784	pxor	%xmm1,%xmm5
785	movdqa	80(%ebx),%xmm1
786	pxor	%xmm2,%xmm6
787	movdqa	96(%ebx),%xmm2
788	pxor	%xmm3,%xmm7
789	movdqa	112(%ebx),%xmm3
790	movdqu	%xmm4,-128(%edi)
791	movdqu	%xmm5,-64(%edi)
792	movdqu	%xmm6,(%edi)
793	movdqu	%xmm7,64(%edi)
794	leal	16(%edi),%edi
795	paddd	64(%ebp),%xmm0
796	paddd	80(%ebp),%xmm1
797	paddd	96(%ebp),%xmm2
798	paddd	112(%ebp),%xmm3
799	movdqa	%xmm0,%xmm6
800	punpckldq	%xmm1,%xmm0
801	movdqa	%xmm2,%xmm7
802	punpckldq	%xmm3,%xmm2
803	punpckhdq	%xmm1,%xmm6
804	punpckhdq	%xmm3,%xmm7
805	movdqa	%xmm0,%xmm1
806	punpcklqdq	%xmm2,%xmm0
807	movdqa	%xmm6,%xmm3
808	punpcklqdq	%xmm7,%xmm6
809	punpckhqdq	%xmm2,%xmm1
810	punpckhqdq	%xmm7,%xmm3
811	movdqu	-128(%esi),%xmm4
812	movdqu	-64(%esi),%xmm5
813	movdqu	(%esi),%xmm2
814	movdqu	64(%esi),%xmm7
815	leal	208(%esi),%esi
816	pxor	%xmm0,%xmm4
817	pxor	%xmm1,%xmm5
818	pxor	%xmm2,%xmm6
819	pxor	%xmm3,%xmm7
820	movdqu	%xmm4,-128(%edi)
821	movdqu	%xmm5,-64(%edi)
822	movdqu	%xmm6,(%edi)
823	movdqu	%xmm7,64(%edi)
824	leal	208(%edi),%edi
825	subl	$256,%ecx
826	jnc	.L009outer_loop
827	addl	$256,%ecx
828	jz	.L011done
829	movl	520(%esp),%ebx
830	leal	-128(%esi),%esi
831	movl	516(%esp),%edx
832	leal	-128(%edi),%edi
833	movd	64(%ebp),%xmm2
834	movdqu	(%ebx),%xmm3
835	paddd	96(%eax),%xmm2
836	pand	112(%eax),%xmm3
837	por	%xmm2,%xmm3
838.L0081x:
839	movdqa	32(%eax),%xmm0
840	movdqu	(%edx),%xmm1
841	movdqu	16(%edx),%xmm2
842	movdqa	(%eax),%xmm6
843	movdqa	16(%eax),%xmm7
844	movl	%ebp,48(%esp)
845	movdqa	%xmm0,(%esp)
846	movdqa	%xmm1,16(%esp)
847	movdqa	%xmm2,32(%esp)
848	movdqa	%xmm3,48(%esp)
849	movl	$10,%edx
850	jmp	.L012loop1x
851.align	16
852.L013outer1x:
853	movdqa	80(%eax),%xmm3
854	movdqa	(%esp),%xmm0
855	movdqa	16(%esp),%xmm1
856	movdqa	32(%esp),%xmm2
857	paddd	48(%esp),%xmm3
858	movl	$10,%edx
859	movdqa	%xmm3,48(%esp)
860	jmp	.L012loop1x
861.align	16
862.L012loop1x:
863	paddd	%xmm1,%xmm0
864	pxor	%xmm0,%xmm3
865.byte	102,15,56,0,222
866	paddd	%xmm3,%xmm2
867	pxor	%xmm2,%xmm1
868	movdqa	%xmm1,%xmm4
869	psrld	$20,%xmm1
870	pslld	$12,%xmm4
871	por	%xmm4,%xmm1
872	paddd	%xmm1,%xmm0
873	pxor	%xmm0,%xmm3
874.byte	102,15,56,0,223
875	paddd	%xmm3,%xmm2
876	pxor	%xmm2,%xmm1
877	movdqa	%xmm1,%xmm4
878	psrld	$25,%xmm1
879	pslld	$7,%xmm4
880	por	%xmm4,%xmm1
881	pshufd	$78,%xmm2,%xmm2
882	pshufd	$57,%xmm1,%xmm1
883	pshufd	$147,%xmm3,%xmm3
884	nop
885	paddd	%xmm1,%xmm0
886	pxor	%xmm0,%xmm3
887.byte	102,15,56,0,222
888	paddd	%xmm3,%xmm2
889	pxor	%xmm2,%xmm1
890	movdqa	%xmm1,%xmm4
891	psrld	$20,%xmm1
892	pslld	$12,%xmm4
893	por	%xmm4,%xmm1
894	paddd	%xmm1,%xmm0
895	pxor	%xmm0,%xmm3
896.byte	102,15,56,0,223
897	paddd	%xmm3,%xmm2
898	pxor	%xmm2,%xmm1
899	movdqa	%xmm1,%xmm4
900	psrld	$25,%xmm1
901	pslld	$7,%xmm4
902	por	%xmm4,%xmm1
903	pshufd	$78,%xmm2,%xmm2
904	pshufd	$147,%xmm1,%xmm1
905	pshufd	$57,%xmm3,%xmm3
906	decl	%edx
907	jnz	.L012loop1x
908	paddd	(%esp),%xmm0
909	paddd	16(%esp),%xmm1
910	paddd	32(%esp),%xmm2
911	paddd	48(%esp),%xmm3
912	cmpl	$64,%ecx
913	jb	.L014tail
914	movdqu	(%esi),%xmm4
915	movdqu	16(%esi),%xmm5
916	pxor	%xmm4,%xmm0
917	movdqu	32(%esi),%xmm4
918	pxor	%xmm5,%xmm1
919	movdqu	48(%esi),%xmm5
920	pxor	%xmm4,%xmm2
921	pxor	%xmm5,%xmm3
922	leal	64(%esi),%esi
923	movdqu	%xmm0,(%edi)
924	movdqu	%xmm1,16(%edi)
925	movdqu	%xmm2,32(%edi)
926	movdqu	%xmm3,48(%edi)
927	leal	64(%edi),%edi
928	subl	$64,%ecx
929	jnz	.L013outer1x
930	jmp	.L011done
931.L014tail:
932	movdqa	%xmm0,(%esp)
933	movdqa	%xmm1,16(%esp)
934	movdqa	%xmm2,32(%esp)
935	movdqa	%xmm3,48(%esp)
936	xorl	%eax,%eax
937	xorl	%edx,%edx
938	xorl	%ebp,%ebp
939.L015tail_loop:
940	movb	(%esp,%ebp,1),%al
941	movb	(%esi,%ebp,1),%dl
942	leal	1(%ebp),%ebp
943	xorb	%dl,%al
944	movb	%al,-1(%edi,%ebp,1)
945	decl	%ecx
946	jnz	.L015tail_loop
947.L011done:
948	movl	512(%esp),%esp
949	popl	%edi
950	popl	%esi
951	popl	%ebx
952	popl	%ebp
953	ret
954.size	_ChaCha20_ssse3,.-_ChaCha20_ssse3
955.align	64
956.Lssse3_data:
957.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
958.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
959.long	1634760805,857760878,2036477234,1797285236
960.long	0,1,2,3
961.long	4,4,4,4
962.long	1,0,0,0
963.long	4,0,0,0
964.long	0,-1,-1,-1
965.align	64
966.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
967.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
968.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
969.byte	114,103,62,0
970#endif
971.section	.note.GNU-stack,"",@progbits
972