• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__i386__)
5#if defined(BORINGSSL_PREFIX)
6#include <boringssl_prefix_symbols_asm.h>
7#endif
8.text
9.globl	ChaCha20_ctr32
10.hidden	ChaCha20_ctr32
11.type	ChaCha20_ctr32,@function
12.align	16
13ChaCha20_ctr32:
14.L_ChaCha20_ctr32_begin:
15	pushl	%ebp
16	pushl	%ebx
17	pushl	%esi
18	pushl	%edi
19	xorl	%eax,%eax
20	cmpl	28(%esp),%eax
21	je	.L000no_data
22	call	.Lpic_point
23.Lpic_point:
24	popl	%eax
25	leal	OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
26	testl	$16777216,(%ebp)
27	jz	.L001x86
28	testl	$512,4(%ebp)
29	jz	.L001x86
30	jmp	.Lssse3_shortcut
31.L001x86:
32	movl	32(%esp),%esi
33	movl	36(%esp),%edi
34	subl	$132,%esp
35	movl	(%esi),%eax
36	movl	4(%esi),%ebx
37	movl	8(%esi),%ecx
38	movl	12(%esi),%edx
39	movl	%eax,80(%esp)
40	movl	%ebx,84(%esp)
41	movl	%ecx,88(%esp)
42	movl	%edx,92(%esp)
43	movl	16(%esi),%eax
44	movl	20(%esi),%ebx
45	movl	24(%esi),%ecx
46	movl	28(%esi),%edx
47	movl	%eax,96(%esp)
48	movl	%ebx,100(%esp)
49	movl	%ecx,104(%esp)
50	movl	%edx,108(%esp)
51	movl	(%edi),%eax
52	movl	4(%edi),%ebx
53	movl	8(%edi),%ecx
54	movl	12(%edi),%edx
55	subl	$1,%eax
56	movl	%eax,112(%esp)
57	movl	%ebx,116(%esp)
58	movl	%ecx,120(%esp)
59	movl	%edx,124(%esp)
60	jmp	.L002entry
61.align	16
62.L003outer_loop:
63	movl	%ebx,156(%esp)
64	movl	%eax,152(%esp)
65	movl	%ecx,160(%esp)
66.L002entry:
67	movl	$1634760805,%eax
68	movl	$857760878,4(%esp)
69	movl	$2036477234,8(%esp)
70	movl	$1797285236,12(%esp)
71	movl	84(%esp),%ebx
72	movl	88(%esp),%ebp
73	movl	104(%esp),%ecx
74	movl	108(%esp),%esi
75	movl	116(%esp),%edx
76	movl	120(%esp),%edi
77	movl	%ebx,20(%esp)
78	movl	%ebp,24(%esp)
79	movl	%ecx,40(%esp)
80	movl	%esi,44(%esp)
81	movl	%edx,52(%esp)
82	movl	%edi,56(%esp)
83	movl	92(%esp),%ebx
84	movl	124(%esp),%edi
85	movl	112(%esp),%edx
86	movl	80(%esp),%ebp
87	movl	96(%esp),%ecx
88	movl	100(%esp),%esi
89	addl	$1,%edx
90	movl	%ebx,28(%esp)
91	movl	%edi,60(%esp)
92	movl	%edx,112(%esp)
93	movl	$10,%ebx
94	jmp	.L004loop
95.align	16
96.L004loop:
97	addl	%ebp,%eax
98	movl	%ebx,128(%esp)
99	movl	%ebp,%ebx
100	xorl	%eax,%edx
101	roll	$16,%edx
102	addl	%edx,%ecx
103	xorl	%ecx,%ebx
104	movl	52(%esp),%edi
105	roll	$12,%ebx
106	movl	20(%esp),%ebp
107	addl	%ebx,%eax
108	xorl	%eax,%edx
109	movl	%eax,(%esp)
110	roll	$8,%edx
111	movl	4(%esp),%eax
112	addl	%edx,%ecx
113	movl	%edx,48(%esp)
114	xorl	%ecx,%ebx
115	addl	%ebp,%eax
116	roll	$7,%ebx
117	xorl	%eax,%edi
118	movl	%ecx,32(%esp)
119	roll	$16,%edi
120	movl	%ebx,16(%esp)
121	addl	%edi,%esi
122	movl	40(%esp),%ecx
123	xorl	%esi,%ebp
124	movl	56(%esp),%edx
125	roll	$12,%ebp
126	movl	24(%esp),%ebx
127	addl	%ebp,%eax
128	xorl	%eax,%edi
129	movl	%eax,4(%esp)
130	roll	$8,%edi
131	movl	8(%esp),%eax
132	addl	%edi,%esi
133	movl	%edi,52(%esp)
134	xorl	%esi,%ebp
135	addl	%ebx,%eax
136	roll	$7,%ebp
137	xorl	%eax,%edx
138	movl	%esi,36(%esp)
139	roll	$16,%edx
140	movl	%ebp,20(%esp)
141	addl	%edx,%ecx
142	movl	44(%esp),%esi
143	xorl	%ecx,%ebx
144	movl	60(%esp),%edi
145	roll	$12,%ebx
146	movl	28(%esp),%ebp
147	addl	%ebx,%eax
148	xorl	%eax,%edx
149	movl	%eax,8(%esp)
150	roll	$8,%edx
151	movl	12(%esp),%eax
152	addl	%edx,%ecx
153	movl	%edx,56(%esp)
154	xorl	%ecx,%ebx
155	addl	%ebp,%eax
156	roll	$7,%ebx
157	xorl	%eax,%edi
158	roll	$16,%edi
159	movl	%ebx,24(%esp)
160	addl	%edi,%esi
161	xorl	%esi,%ebp
162	roll	$12,%ebp
163	movl	20(%esp),%ebx
164	addl	%ebp,%eax
165	xorl	%eax,%edi
166	movl	%eax,12(%esp)
167	roll	$8,%edi
168	movl	(%esp),%eax
169	addl	%edi,%esi
170	movl	%edi,%edx
171	xorl	%esi,%ebp
172	addl	%ebx,%eax
173	roll	$7,%ebp
174	xorl	%eax,%edx
175	roll	$16,%edx
176	movl	%ebp,28(%esp)
177	addl	%edx,%ecx
178	xorl	%ecx,%ebx
179	movl	48(%esp),%edi
180	roll	$12,%ebx
181	movl	24(%esp),%ebp
182	addl	%ebx,%eax
183	xorl	%eax,%edx
184	movl	%eax,(%esp)
185	roll	$8,%edx
186	movl	4(%esp),%eax
187	addl	%edx,%ecx
188	movl	%edx,60(%esp)
189	xorl	%ecx,%ebx
190	addl	%ebp,%eax
191	roll	$7,%ebx
192	xorl	%eax,%edi
193	movl	%ecx,40(%esp)
194	roll	$16,%edi
195	movl	%ebx,20(%esp)
196	addl	%edi,%esi
197	movl	32(%esp),%ecx
198	xorl	%esi,%ebp
199	movl	52(%esp),%edx
200	roll	$12,%ebp
201	movl	28(%esp),%ebx
202	addl	%ebp,%eax
203	xorl	%eax,%edi
204	movl	%eax,4(%esp)
205	roll	$8,%edi
206	movl	8(%esp),%eax
207	addl	%edi,%esi
208	movl	%edi,48(%esp)
209	xorl	%esi,%ebp
210	addl	%ebx,%eax
211	roll	$7,%ebp
212	xorl	%eax,%edx
213	movl	%esi,44(%esp)
214	roll	$16,%edx
215	movl	%ebp,24(%esp)
216	addl	%edx,%ecx
217	movl	36(%esp),%esi
218	xorl	%ecx,%ebx
219	movl	56(%esp),%edi
220	roll	$12,%ebx
221	movl	16(%esp),%ebp
222	addl	%ebx,%eax
223	xorl	%eax,%edx
224	movl	%eax,8(%esp)
225	roll	$8,%edx
226	movl	12(%esp),%eax
227	addl	%edx,%ecx
228	movl	%edx,52(%esp)
229	xorl	%ecx,%ebx
230	addl	%ebp,%eax
231	roll	$7,%ebx
232	xorl	%eax,%edi
233	roll	$16,%edi
234	movl	%ebx,28(%esp)
235	addl	%edi,%esi
236	xorl	%esi,%ebp
237	movl	48(%esp),%edx
238	roll	$12,%ebp
239	movl	128(%esp),%ebx
240	addl	%ebp,%eax
241	xorl	%eax,%edi
242	movl	%eax,12(%esp)
243	roll	$8,%edi
244	movl	(%esp),%eax
245	addl	%edi,%esi
246	movl	%edi,56(%esp)
247	xorl	%esi,%ebp
248	roll	$7,%ebp
249	decl	%ebx
250	jnz	.L004loop
251	movl	160(%esp),%ebx
252	addl	$1634760805,%eax
253	addl	80(%esp),%ebp
254	addl	96(%esp),%ecx
255	addl	100(%esp),%esi
256	cmpl	$64,%ebx
257	jb	.L005tail
258	movl	156(%esp),%ebx
259	addl	112(%esp),%edx
260	addl	120(%esp),%edi
261	xorl	(%ebx),%eax
262	xorl	16(%ebx),%ebp
263	movl	%eax,(%esp)
264	movl	152(%esp),%eax
265	xorl	32(%ebx),%ecx
266	xorl	36(%ebx),%esi
267	xorl	48(%ebx),%edx
268	xorl	56(%ebx),%edi
269	movl	%ebp,16(%eax)
270	movl	%ecx,32(%eax)
271	movl	%esi,36(%eax)
272	movl	%edx,48(%eax)
273	movl	%edi,56(%eax)
274	movl	4(%esp),%ebp
275	movl	8(%esp),%ecx
276	movl	12(%esp),%esi
277	movl	20(%esp),%edx
278	movl	24(%esp),%edi
279	addl	$857760878,%ebp
280	addl	$2036477234,%ecx
281	addl	$1797285236,%esi
282	addl	84(%esp),%edx
283	addl	88(%esp),%edi
284	xorl	4(%ebx),%ebp
285	xorl	8(%ebx),%ecx
286	xorl	12(%ebx),%esi
287	xorl	20(%ebx),%edx
288	xorl	24(%ebx),%edi
289	movl	%ebp,4(%eax)
290	movl	%ecx,8(%eax)
291	movl	%esi,12(%eax)
292	movl	%edx,20(%eax)
293	movl	%edi,24(%eax)
294	movl	28(%esp),%ebp
295	movl	40(%esp),%ecx
296	movl	44(%esp),%esi
297	movl	52(%esp),%edx
298	movl	60(%esp),%edi
299	addl	92(%esp),%ebp
300	addl	104(%esp),%ecx
301	addl	108(%esp),%esi
302	addl	116(%esp),%edx
303	addl	124(%esp),%edi
304	xorl	28(%ebx),%ebp
305	xorl	40(%ebx),%ecx
306	xorl	44(%ebx),%esi
307	xorl	52(%ebx),%edx
308	xorl	60(%ebx),%edi
309	leal	64(%ebx),%ebx
310	movl	%ebp,28(%eax)
311	movl	(%esp),%ebp
312	movl	%ecx,40(%eax)
313	movl	160(%esp),%ecx
314	movl	%esi,44(%eax)
315	movl	%edx,52(%eax)
316	movl	%edi,60(%eax)
317	movl	%ebp,(%eax)
318	leal	64(%eax),%eax
319	subl	$64,%ecx
320	jnz	.L003outer_loop
321	jmp	.L006done
322.L005tail:
323	addl	112(%esp),%edx
324	addl	120(%esp),%edi
325	movl	%eax,(%esp)
326	movl	%ebp,16(%esp)
327	movl	%ecx,32(%esp)
328	movl	%esi,36(%esp)
329	movl	%edx,48(%esp)
330	movl	%edi,56(%esp)
331	movl	4(%esp),%ebp
332	movl	8(%esp),%ecx
333	movl	12(%esp),%esi
334	movl	20(%esp),%edx
335	movl	24(%esp),%edi
336	addl	$857760878,%ebp
337	addl	$2036477234,%ecx
338	addl	$1797285236,%esi
339	addl	84(%esp),%edx
340	addl	88(%esp),%edi
341	movl	%ebp,4(%esp)
342	movl	%ecx,8(%esp)
343	movl	%esi,12(%esp)
344	movl	%edx,20(%esp)
345	movl	%edi,24(%esp)
346	movl	28(%esp),%ebp
347	movl	40(%esp),%ecx
348	movl	44(%esp),%esi
349	movl	52(%esp),%edx
350	movl	60(%esp),%edi
351	addl	92(%esp),%ebp
352	addl	104(%esp),%ecx
353	addl	108(%esp),%esi
354	addl	116(%esp),%edx
355	addl	124(%esp),%edi
356	movl	%ebp,28(%esp)
357	movl	156(%esp),%ebp
358	movl	%ecx,40(%esp)
359	movl	152(%esp),%ecx
360	movl	%esi,44(%esp)
361	xorl	%esi,%esi
362	movl	%edx,52(%esp)
363	movl	%edi,60(%esp)
364	xorl	%eax,%eax
365	xorl	%edx,%edx
366.L007tail_loop:
367	movb	(%esi,%ebp,1),%al
368	movb	(%esp,%esi,1),%dl
369	leal	1(%esi),%esi
370	xorb	%dl,%al
371	movb	%al,-1(%ecx,%esi,1)
372	decl	%ebx
373	jnz	.L007tail_loop
374.L006done:
375	addl	$132,%esp
376.L000no_data:
377	popl	%edi
378	popl	%esi
379	popl	%ebx
380	popl	%ebp
381	ret
382.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
383.globl	ChaCha20_ssse3
384.hidden	ChaCha20_ssse3
385.type	ChaCha20_ssse3,@function
386.align	16
387ChaCha20_ssse3:
388.L_ChaCha20_ssse3_begin:
389	pushl	%ebp
390	pushl	%ebx
391	pushl	%esi
392	pushl	%edi
393.Lssse3_shortcut:
394	movl	20(%esp),%edi
395	movl	24(%esp),%esi
396	movl	28(%esp),%ecx
397	movl	32(%esp),%edx
398	movl	36(%esp),%ebx
399	movl	%esp,%ebp
400	subl	$524,%esp
401	andl	$-64,%esp
402	movl	%ebp,512(%esp)
403	leal	.Lssse3_data-.Lpic_point(%eax),%eax
404	movdqu	(%ebx),%xmm3
405	cmpl	$256,%ecx
406	jb	.L0081x
407	movl	%edx,516(%esp)
408	movl	%ebx,520(%esp)
409	subl	$256,%ecx
410	leal	384(%esp),%ebp
411	movdqu	(%edx),%xmm7
412	pshufd	$0,%xmm3,%xmm0
413	pshufd	$85,%xmm3,%xmm1
414	pshufd	$170,%xmm3,%xmm2
415	pshufd	$255,%xmm3,%xmm3
416	paddd	48(%eax),%xmm0
417	pshufd	$0,%xmm7,%xmm4
418	pshufd	$85,%xmm7,%xmm5
419	psubd	64(%eax),%xmm0
420	pshufd	$170,%xmm7,%xmm6
421	pshufd	$255,%xmm7,%xmm7
422	movdqa	%xmm0,64(%ebp)
423	movdqa	%xmm1,80(%ebp)
424	movdqa	%xmm2,96(%ebp)
425	movdqa	%xmm3,112(%ebp)
426	movdqu	16(%edx),%xmm3
427	movdqa	%xmm4,-64(%ebp)
428	movdqa	%xmm5,-48(%ebp)
429	movdqa	%xmm6,-32(%ebp)
430	movdqa	%xmm7,-16(%ebp)
431	movdqa	32(%eax),%xmm7
432	leal	128(%esp),%ebx
433	pshufd	$0,%xmm3,%xmm0
434	pshufd	$85,%xmm3,%xmm1
435	pshufd	$170,%xmm3,%xmm2
436	pshufd	$255,%xmm3,%xmm3
437	pshufd	$0,%xmm7,%xmm4
438	pshufd	$85,%xmm7,%xmm5
439	pshufd	$170,%xmm7,%xmm6
440	pshufd	$255,%xmm7,%xmm7
441	movdqa	%xmm0,(%ebp)
442	movdqa	%xmm1,16(%ebp)
443	movdqa	%xmm2,32(%ebp)
444	movdqa	%xmm3,48(%ebp)
445	movdqa	%xmm4,-128(%ebp)
446	movdqa	%xmm5,-112(%ebp)
447	movdqa	%xmm6,-96(%ebp)
448	movdqa	%xmm7,-80(%ebp)
449	leal	128(%esi),%esi
450	leal	128(%edi),%edi
451	jmp	.L009outer_loop
452.align	16
453.L009outer_loop:
454	movdqa	-112(%ebp),%xmm1
455	movdqa	-96(%ebp),%xmm2
456	movdqa	-80(%ebp),%xmm3
457	movdqa	-48(%ebp),%xmm5
458	movdqa	-32(%ebp),%xmm6
459	movdqa	-16(%ebp),%xmm7
460	movdqa	%xmm1,-112(%ebx)
461	movdqa	%xmm2,-96(%ebx)
462	movdqa	%xmm3,-80(%ebx)
463	movdqa	%xmm5,-48(%ebx)
464	movdqa	%xmm6,-32(%ebx)
465	movdqa	%xmm7,-16(%ebx)
466	movdqa	32(%ebp),%xmm2
467	movdqa	48(%ebp),%xmm3
468	movdqa	64(%ebp),%xmm4
469	movdqa	80(%ebp),%xmm5
470	movdqa	96(%ebp),%xmm6
471	movdqa	112(%ebp),%xmm7
472	paddd	64(%eax),%xmm4
473	movdqa	%xmm2,32(%ebx)
474	movdqa	%xmm3,48(%ebx)
475	movdqa	%xmm4,64(%ebx)
476	movdqa	%xmm5,80(%ebx)
477	movdqa	%xmm6,96(%ebx)
478	movdqa	%xmm7,112(%ebx)
479	movdqa	%xmm4,64(%ebp)
480	movdqa	-128(%ebp),%xmm0
481	movdqa	%xmm4,%xmm6
482	movdqa	-64(%ebp),%xmm3
483	movdqa	(%ebp),%xmm4
484	movdqa	16(%ebp),%xmm5
485	movl	$10,%edx
486	nop
487.align	16
488.L010loop:
489	paddd	%xmm3,%xmm0
490	movdqa	%xmm3,%xmm2
491	pxor	%xmm0,%xmm6
492	pshufb	(%eax),%xmm6
493	paddd	%xmm6,%xmm4
494	pxor	%xmm4,%xmm2
495	movdqa	-48(%ebx),%xmm3
496	movdqa	%xmm2,%xmm1
497	pslld	$12,%xmm2
498	psrld	$20,%xmm1
499	por	%xmm1,%xmm2
500	movdqa	-112(%ebx),%xmm1
501	paddd	%xmm2,%xmm0
502	movdqa	80(%ebx),%xmm7
503	pxor	%xmm0,%xmm6
504	movdqa	%xmm0,-128(%ebx)
505	pshufb	16(%eax),%xmm6
506	paddd	%xmm6,%xmm4
507	movdqa	%xmm6,64(%ebx)
508	pxor	%xmm4,%xmm2
509	paddd	%xmm3,%xmm1
510	movdqa	%xmm2,%xmm0
511	pslld	$7,%xmm2
512	psrld	$25,%xmm0
513	pxor	%xmm1,%xmm7
514	por	%xmm0,%xmm2
515	movdqa	%xmm4,(%ebx)
516	pshufb	(%eax),%xmm7
517	movdqa	%xmm2,-64(%ebx)
518	paddd	%xmm7,%xmm5
519	movdqa	32(%ebx),%xmm4
520	pxor	%xmm5,%xmm3
521	movdqa	-32(%ebx),%xmm2
522	movdqa	%xmm3,%xmm0
523	pslld	$12,%xmm3
524	psrld	$20,%xmm0
525	por	%xmm0,%xmm3
526	movdqa	-96(%ebx),%xmm0
527	paddd	%xmm3,%xmm1
528	movdqa	96(%ebx),%xmm6
529	pxor	%xmm1,%xmm7
530	movdqa	%xmm1,-112(%ebx)
531	pshufb	16(%eax),%xmm7
532	paddd	%xmm7,%xmm5
533	movdqa	%xmm7,80(%ebx)
534	pxor	%xmm5,%xmm3
535	paddd	%xmm2,%xmm0
536	movdqa	%xmm3,%xmm1
537	pslld	$7,%xmm3
538	psrld	$25,%xmm1
539	pxor	%xmm0,%xmm6
540	por	%xmm1,%xmm3
541	movdqa	%xmm5,16(%ebx)
542	pshufb	(%eax),%xmm6
543	movdqa	%xmm3,-48(%ebx)
544	paddd	%xmm6,%xmm4
545	movdqa	48(%ebx),%xmm5
546	pxor	%xmm4,%xmm2
547	movdqa	-16(%ebx),%xmm3
548	movdqa	%xmm2,%xmm1
549	pslld	$12,%xmm2
550	psrld	$20,%xmm1
551	por	%xmm1,%xmm2
552	movdqa	-80(%ebx),%xmm1
553	paddd	%xmm2,%xmm0
554	movdqa	112(%ebx),%xmm7
555	pxor	%xmm0,%xmm6
556	movdqa	%xmm0,-96(%ebx)
557	pshufb	16(%eax),%xmm6
558	paddd	%xmm6,%xmm4
559	movdqa	%xmm6,96(%ebx)
560	pxor	%xmm4,%xmm2
561	paddd	%xmm3,%xmm1
562	movdqa	%xmm2,%xmm0
563	pslld	$7,%xmm2
564	psrld	$25,%xmm0
565	pxor	%xmm1,%xmm7
566	por	%xmm0,%xmm2
567	pshufb	(%eax),%xmm7
568	movdqa	%xmm2,-32(%ebx)
569	paddd	%xmm7,%xmm5
570	pxor	%xmm5,%xmm3
571	movdqa	-48(%ebx),%xmm2
572	movdqa	%xmm3,%xmm0
573	pslld	$12,%xmm3
574	psrld	$20,%xmm0
575	por	%xmm0,%xmm3
576	movdqa	-128(%ebx),%xmm0
577	paddd	%xmm3,%xmm1
578	pxor	%xmm1,%xmm7
579	movdqa	%xmm1,-80(%ebx)
580	pshufb	16(%eax),%xmm7
581	paddd	%xmm7,%xmm5
582	movdqa	%xmm7,%xmm6
583	pxor	%xmm5,%xmm3
584	paddd	%xmm2,%xmm0
585	movdqa	%xmm3,%xmm1
586	pslld	$7,%xmm3
587	psrld	$25,%xmm1
588	pxor	%xmm0,%xmm6
589	por	%xmm1,%xmm3
590	pshufb	(%eax),%xmm6
591	movdqa	%xmm3,-16(%ebx)
592	paddd	%xmm6,%xmm4
593	pxor	%xmm4,%xmm2
594	movdqa	-32(%ebx),%xmm3
595	movdqa	%xmm2,%xmm1
596	pslld	$12,%xmm2
597	psrld	$20,%xmm1
598	por	%xmm1,%xmm2
599	movdqa	-112(%ebx),%xmm1
600	paddd	%xmm2,%xmm0
601	movdqa	64(%ebx),%xmm7
602	pxor	%xmm0,%xmm6
603	movdqa	%xmm0,-128(%ebx)
604	pshufb	16(%eax),%xmm6
605	paddd	%xmm6,%xmm4
606	movdqa	%xmm6,112(%ebx)
607	pxor	%xmm4,%xmm2
608	paddd	%xmm3,%xmm1
609	movdqa	%xmm2,%xmm0
610	pslld	$7,%xmm2
611	psrld	$25,%xmm0
612	pxor	%xmm1,%xmm7
613	por	%xmm0,%xmm2
614	movdqa	%xmm4,32(%ebx)
615	pshufb	(%eax),%xmm7
616	movdqa	%xmm2,-48(%ebx)
617	paddd	%xmm7,%xmm5
618	movdqa	(%ebx),%xmm4
619	pxor	%xmm5,%xmm3
620	movdqa	-16(%ebx),%xmm2
621	movdqa	%xmm3,%xmm0
622	pslld	$12,%xmm3
623	psrld	$20,%xmm0
624	por	%xmm0,%xmm3
625	movdqa	-96(%ebx),%xmm0
626	paddd	%xmm3,%xmm1
627	movdqa	80(%ebx),%xmm6
628	pxor	%xmm1,%xmm7
629	movdqa	%xmm1,-112(%ebx)
630	pshufb	16(%eax),%xmm7
631	paddd	%xmm7,%xmm5
632	movdqa	%xmm7,64(%ebx)
633	pxor	%xmm5,%xmm3
634	paddd	%xmm2,%xmm0
635	movdqa	%xmm3,%xmm1
636	pslld	$7,%xmm3
637	psrld	$25,%xmm1
638	pxor	%xmm0,%xmm6
639	por	%xmm1,%xmm3
640	movdqa	%xmm5,48(%ebx)
641	pshufb	(%eax),%xmm6
642	movdqa	%xmm3,-32(%ebx)
643	paddd	%xmm6,%xmm4
644	movdqa	16(%ebx),%xmm5
645	pxor	%xmm4,%xmm2
646	movdqa	-64(%ebx),%xmm3
647	movdqa	%xmm2,%xmm1
648	pslld	$12,%xmm2
649	psrld	$20,%xmm1
650	por	%xmm1,%xmm2
651	movdqa	-80(%ebx),%xmm1
652	paddd	%xmm2,%xmm0
653	movdqa	96(%ebx),%xmm7
654	pxor	%xmm0,%xmm6
655	movdqa	%xmm0,-96(%ebx)
656	pshufb	16(%eax),%xmm6
657	paddd	%xmm6,%xmm4
658	movdqa	%xmm6,80(%ebx)
659	pxor	%xmm4,%xmm2
660	paddd	%xmm3,%xmm1
661	movdqa	%xmm2,%xmm0
662	pslld	$7,%xmm2
663	psrld	$25,%xmm0
664	pxor	%xmm1,%xmm7
665	por	%xmm0,%xmm2
666	pshufb	(%eax),%xmm7
667	movdqa	%xmm2,-16(%ebx)
668	paddd	%xmm7,%xmm5
669	pxor	%xmm5,%xmm3
670	movdqa	%xmm3,%xmm0
671	pslld	$12,%xmm3
672	psrld	$20,%xmm0
673	por	%xmm0,%xmm3
674	movdqa	-128(%ebx),%xmm0
675	paddd	%xmm3,%xmm1
676	movdqa	64(%ebx),%xmm6
677	pxor	%xmm1,%xmm7
678	movdqa	%xmm1,-80(%ebx)
679	pshufb	16(%eax),%xmm7
680	paddd	%xmm7,%xmm5
681	movdqa	%xmm7,96(%ebx)
682	pxor	%xmm5,%xmm3
683	movdqa	%xmm3,%xmm1
684	pslld	$7,%xmm3
685	psrld	$25,%xmm1
686	por	%xmm1,%xmm3
687	decl	%edx
688	jnz	.L010loop
689	movdqa	%xmm3,-64(%ebx)
690	movdqa	%xmm4,(%ebx)
691	movdqa	%xmm5,16(%ebx)
692	movdqa	%xmm6,64(%ebx)
693	movdqa	%xmm7,96(%ebx)
694	movdqa	-112(%ebx),%xmm1
695	movdqa	-96(%ebx),%xmm2
696	movdqa	-80(%ebx),%xmm3
697	paddd	-128(%ebp),%xmm0
698	paddd	-112(%ebp),%xmm1
699	paddd	-96(%ebp),%xmm2
700	paddd	-80(%ebp),%xmm3
701	movdqa	%xmm0,%xmm6
702	punpckldq	%xmm1,%xmm0
703	movdqa	%xmm2,%xmm7
704	punpckldq	%xmm3,%xmm2
705	punpckhdq	%xmm1,%xmm6
706	punpckhdq	%xmm3,%xmm7
707	movdqa	%xmm0,%xmm1
708	punpcklqdq	%xmm2,%xmm0
709	movdqa	%xmm6,%xmm3
710	punpcklqdq	%xmm7,%xmm6
711	punpckhqdq	%xmm2,%xmm1
712	punpckhqdq	%xmm7,%xmm3
713	movdqu	-128(%esi),%xmm4
714	movdqu	-64(%esi),%xmm5
715	movdqu	(%esi),%xmm2
716	movdqu	64(%esi),%xmm7
717	leal	16(%esi),%esi
718	pxor	%xmm0,%xmm4
719	movdqa	-64(%ebx),%xmm0
720	pxor	%xmm1,%xmm5
721	movdqa	-48(%ebx),%xmm1
722	pxor	%xmm2,%xmm6
723	movdqa	-32(%ebx),%xmm2
724	pxor	%xmm3,%xmm7
725	movdqa	-16(%ebx),%xmm3
726	movdqu	%xmm4,-128(%edi)
727	movdqu	%xmm5,-64(%edi)
728	movdqu	%xmm6,(%edi)
729	movdqu	%xmm7,64(%edi)
730	leal	16(%edi),%edi
731	paddd	-64(%ebp),%xmm0
732	paddd	-48(%ebp),%xmm1
733	paddd	-32(%ebp),%xmm2
734	paddd	-16(%ebp),%xmm3
735	movdqa	%xmm0,%xmm6
736	punpckldq	%xmm1,%xmm0
737	movdqa	%xmm2,%xmm7
738	punpckldq	%xmm3,%xmm2
739	punpckhdq	%xmm1,%xmm6
740	punpckhdq	%xmm3,%xmm7
741	movdqa	%xmm0,%xmm1
742	punpcklqdq	%xmm2,%xmm0
743	movdqa	%xmm6,%xmm3
744	punpcklqdq	%xmm7,%xmm6
745	punpckhqdq	%xmm2,%xmm1
746	punpckhqdq	%xmm7,%xmm3
747	movdqu	-128(%esi),%xmm4
748	movdqu	-64(%esi),%xmm5
749	movdqu	(%esi),%xmm2
750	movdqu	64(%esi),%xmm7
751	leal	16(%esi),%esi
752	pxor	%xmm0,%xmm4
753	movdqa	(%ebx),%xmm0
754	pxor	%xmm1,%xmm5
755	movdqa	16(%ebx),%xmm1
756	pxor	%xmm2,%xmm6
757	movdqa	32(%ebx),%xmm2
758	pxor	%xmm3,%xmm7
759	movdqa	48(%ebx),%xmm3
760	movdqu	%xmm4,-128(%edi)
761	movdqu	%xmm5,-64(%edi)
762	movdqu	%xmm6,(%edi)
763	movdqu	%xmm7,64(%edi)
764	leal	16(%edi),%edi
765	paddd	(%ebp),%xmm0
766	paddd	16(%ebp),%xmm1
767	paddd	32(%ebp),%xmm2
768	paddd	48(%ebp),%xmm3
769	movdqa	%xmm0,%xmm6
770	punpckldq	%xmm1,%xmm0
771	movdqa	%xmm2,%xmm7
772	punpckldq	%xmm3,%xmm2
773	punpckhdq	%xmm1,%xmm6
774	punpckhdq	%xmm3,%xmm7
775	movdqa	%xmm0,%xmm1
776	punpcklqdq	%xmm2,%xmm0
777	movdqa	%xmm6,%xmm3
778	punpcklqdq	%xmm7,%xmm6
779	punpckhqdq	%xmm2,%xmm1
780	punpckhqdq	%xmm7,%xmm3
781	movdqu	-128(%esi),%xmm4
782	movdqu	-64(%esi),%xmm5
783	movdqu	(%esi),%xmm2
784	movdqu	64(%esi),%xmm7
785	leal	16(%esi),%esi
786	pxor	%xmm0,%xmm4
787	movdqa	64(%ebx),%xmm0
788	pxor	%xmm1,%xmm5
789	movdqa	80(%ebx),%xmm1
790	pxor	%xmm2,%xmm6
791	movdqa	96(%ebx),%xmm2
792	pxor	%xmm3,%xmm7
793	movdqa	112(%ebx),%xmm3
794	movdqu	%xmm4,-128(%edi)
795	movdqu	%xmm5,-64(%edi)
796	movdqu	%xmm6,(%edi)
797	movdqu	%xmm7,64(%edi)
798	leal	16(%edi),%edi
799	paddd	64(%ebp),%xmm0
800	paddd	80(%ebp),%xmm1
801	paddd	96(%ebp),%xmm2
802	paddd	112(%ebp),%xmm3
803	movdqa	%xmm0,%xmm6
804	punpckldq	%xmm1,%xmm0
805	movdqa	%xmm2,%xmm7
806	punpckldq	%xmm3,%xmm2
807	punpckhdq	%xmm1,%xmm6
808	punpckhdq	%xmm3,%xmm7
809	movdqa	%xmm0,%xmm1
810	punpcklqdq	%xmm2,%xmm0
811	movdqa	%xmm6,%xmm3
812	punpcklqdq	%xmm7,%xmm6
813	punpckhqdq	%xmm2,%xmm1
814	punpckhqdq	%xmm7,%xmm3
815	movdqu	-128(%esi),%xmm4
816	movdqu	-64(%esi),%xmm5
817	movdqu	(%esi),%xmm2
818	movdqu	64(%esi),%xmm7
819	leal	208(%esi),%esi
820	pxor	%xmm0,%xmm4
821	pxor	%xmm1,%xmm5
822	pxor	%xmm2,%xmm6
823	pxor	%xmm3,%xmm7
824	movdqu	%xmm4,-128(%edi)
825	movdqu	%xmm5,-64(%edi)
826	movdqu	%xmm6,(%edi)
827	movdqu	%xmm7,64(%edi)
828	leal	208(%edi),%edi
829	subl	$256,%ecx
830	jnc	.L009outer_loop
831	addl	$256,%ecx
832	jz	.L011done
833	movl	520(%esp),%ebx
834	leal	-128(%esi),%esi
835	movl	516(%esp),%edx
836	leal	-128(%edi),%edi
837	movd	64(%ebp),%xmm2
838	movdqu	(%ebx),%xmm3
839	paddd	96(%eax),%xmm2
840	pand	112(%eax),%xmm3
841	por	%xmm2,%xmm3
842.L0081x:
843	movdqa	32(%eax),%xmm0
844	movdqu	(%edx),%xmm1
845	movdqu	16(%edx),%xmm2
846	movdqa	(%eax),%xmm6
847	movdqa	16(%eax),%xmm7
848	movl	%ebp,48(%esp)
849	movdqa	%xmm0,(%esp)
850	movdqa	%xmm1,16(%esp)
851	movdqa	%xmm2,32(%esp)
852	movdqa	%xmm3,48(%esp)
853	movl	$10,%edx
854	jmp	.L012loop1x
855.align	16
856.L013outer1x:
857	movdqa	80(%eax),%xmm3
858	movdqa	(%esp),%xmm0
859	movdqa	16(%esp),%xmm1
860	movdqa	32(%esp),%xmm2
861	paddd	48(%esp),%xmm3
862	movl	$10,%edx
863	movdqa	%xmm3,48(%esp)
864	jmp	.L012loop1x
865.align	16
866.L012loop1x:
867	paddd	%xmm1,%xmm0
868	pxor	%xmm0,%xmm3
869.byte	102,15,56,0,222
870	paddd	%xmm3,%xmm2
871	pxor	%xmm2,%xmm1
872	movdqa	%xmm1,%xmm4
873	psrld	$20,%xmm1
874	pslld	$12,%xmm4
875	por	%xmm4,%xmm1
876	paddd	%xmm1,%xmm0
877	pxor	%xmm0,%xmm3
878.byte	102,15,56,0,223
879	paddd	%xmm3,%xmm2
880	pxor	%xmm2,%xmm1
881	movdqa	%xmm1,%xmm4
882	psrld	$25,%xmm1
883	pslld	$7,%xmm4
884	por	%xmm4,%xmm1
885	pshufd	$78,%xmm2,%xmm2
886	pshufd	$57,%xmm1,%xmm1
887	pshufd	$147,%xmm3,%xmm3
888	nop
889	paddd	%xmm1,%xmm0
890	pxor	%xmm0,%xmm3
891.byte	102,15,56,0,222
892	paddd	%xmm3,%xmm2
893	pxor	%xmm2,%xmm1
894	movdqa	%xmm1,%xmm4
895	psrld	$20,%xmm1
896	pslld	$12,%xmm4
897	por	%xmm4,%xmm1
898	paddd	%xmm1,%xmm0
899	pxor	%xmm0,%xmm3
900.byte	102,15,56,0,223
901	paddd	%xmm3,%xmm2
902	pxor	%xmm2,%xmm1
903	movdqa	%xmm1,%xmm4
904	psrld	$25,%xmm1
905	pslld	$7,%xmm4
906	por	%xmm4,%xmm1
907	pshufd	$78,%xmm2,%xmm2
908	pshufd	$147,%xmm1,%xmm1
909	pshufd	$57,%xmm3,%xmm3
910	decl	%edx
911	jnz	.L012loop1x
912	paddd	(%esp),%xmm0
913	paddd	16(%esp),%xmm1
914	paddd	32(%esp),%xmm2
915	paddd	48(%esp),%xmm3
916	cmpl	$64,%ecx
917	jb	.L014tail
918	movdqu	(%esi),%xmm4
919	movdqu	16(%esi),%xmm5
920	pxor	%xmm4,%xmm0
921	movdqu	32(%esi),%xmm4
922	pxor	%xmm5,%xmm1
923	movdqu	48(%esi),%xmm5
924	pxor	%xmm4,%xmm2
925	pxor	%xmm5,%xmm3
926	leal	64(%esi),%esi
927	movdqu	%xmm0,(%edi)
928	movdqu	%xmm1,16(%edi)
929	movdqu	%xmm2,32(%edi)
930	movdqu	%xmm3,48(%edi)
931	leal	64(%edi),%edi
932	subl	$64,%ecx
933	jnz	.L013outer1x
934	jmp	.L011done
935.L014tail:
936	movdqa	%xmm0,(%esp)
937	movdqa	%xmm1,16(%esp)
938	movdqa	%xmm2,32(%esp)
939	movdqa	%xmm3,48(%esp)
940	xorl	%eax,%eax
941	xorl	%edx,%edx
942	xorl	%ebp,%ebp
943.L015tail_loop:
944	movb	(%esp,%ebp,1),%al
945	movb	(%esi,%ebp,1),%dl
946	leal	1(%ebp),%ebp
947	xorb	%dl,%al
948	movb	%al,-1(%edi,%ebp,1)
949	decl	%ecx
950	jnz	.L015tail_loop
951.L011done:
952	movl	512(%esp),%esp
953	popl	%edi
954	popl	%esi
955	popl	%ebx
956	popl	%ebp
957	ret
958.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
959.align	64
960.Lssse3_data:
961.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
962.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
963.long	1634760805,857760878,2036477234,1797285236
964.long	0,1,2,3
965.long	4,4,4,4
966.long	1,0,0,0
967.long	4,0,0,0
968.long	0,-1,-1,-1
969.align	64
970.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
971.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
972.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
973.byte	114,103,62,0
974#endif
975.section	.note.GNU-stack,"",@progbits
976