• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1.text
2.globl	ChaCha20_ctr32
3.type	ChaCha20_ctr32,@function
4.align	16
5ChaCha20_ctr32:
6.L_ChaCha20_ctr32_begin:
7	%ifdef __CET__
8
9.byte	243,15,30,251
10	%endif
11
12	pushl	%ebp
13	pushl	%ebx
14	pushl	%esi
15	pushl	%edi
16	xorl	%eax,%eax
17	cmpl	28(%esp),%eax
18	je	.L000no_data
19	call	.Lpic_point
20.Lpic_point:
21	popl	%eax
22	leal	OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
23	testl	$16777216,(%ebp)
24	jz	.L001x86
25	testl	$512,4(%ebp)
26	jz	.L001x86
27	jmp	.Lssse3_shortcut
28.L001x86:
29	movl	32(%esp),%esi
30	movl	36(%esp),%edi
31	subl	$132,%esp
32	movl	(%esi),%eax
33	movl	4(%esi),%ebx
34	movl	8(%esi),%ecx
35	movl	12(%esi),%edx
36	movl	%eax,80(%esp)
37	movl	%ebx,84(%esp)
38	movl	%ecx,88(%esp)
39	movl	%edx,92(%esp)
40	movl	16(%esi),%eax
41	movl	20(%esi),%ebx
42	movl	24(%esi),%ecx
43	movl	28(%esi),%edx
44	movl	%eax,96(%esp)
45	movl	%ebx,100(%esp)
46	movl	%ecx,104(%esp)
47	movl	%edx,108(%esp)
48	movl	(%edi),%eax
49	movl	4(%edi),%ebx
50	movl	8(%edi),%ecx
51	movl	12(%edi),%edx
52	subl	$1,%eax
53	movl	%eax,112(%esp)
54	movl	%ebx,116(%esp)
55	movl	%ecx,120(%esp)
56	movl	%edx,124(%esp)
57	jmp	.L002entry
58.align	16
59.L003outer_loop:
60	movl	%ebx,156(%esp)
61	movl	%eax,152(%esp)
62	movl	%ecx,160(%esp)
63.L002entry:
64	movl	$1634760805,%eax
65	movl	$857760878,4(%esp)
66	movl	$2036477234,8(%esp)
67	movl	$1797285236,12(%esp)
68	movl	84(%esp),%ebx
69	movl	88(%esp),%ebp
70	movl	104(%esp),%ecx
71	movl	108(%esp),%esi
72	movl	116(%esp),%edx
73	movl	120(%esp),%edi
74	movl	%ebx,20(%esp)
75	movl	%ebp,24(%esp)
76	movl	%ecx,40(%esp)
77	movl	%esi,44(%esp)
78	movl	%edx,52(%esp)
79	movl	%edi,56(%esp)
80	movl	92(%esp),%ebx
81	movl	124(%esp),%edi
82	movl	112(%esp),%edx
83	movl	80(%esp),%ebp
84	movl	96(%esp),%ecx
85	movl	100(%esp),%esi
86	addl	$1,%edx
87	movl	%ebx,28(%esp)
88	movl	%edi,60(%esp)
89	movl	%edx,112(%esp)
90	movl	$10,%ebx
91	jmp	.L004loop
92.align	16
93.L004loop:
94	addl	%ebp,%eax
95	movl	%ebx,128(%esp)
96	movl	%ebp,%ebx
97	xorl	%eax,%edx
98	roll	$16,%edx
99	addl	%edx,%ecx
100	xorl	%ecx,%ebx
101	movl	52(%esp),%edi
102	roll	$12,%ebx
103	movl	20(%esp),%ebp
104	addl	%ebx,%eax
105	xorl	%eax,%edx
106	movl	%eax,(%esp)
107	roll	$8,%edx
108	movl	4(%esp),%eax
109	addl	%edx,%ecx
110	movl	%edx,48(%esp)
111	xorl	%ecx,%ebx
112	addl	%ebp,%eax
113	roll	$7,%ebx
114	xorl	%eax,%edi
115	movl	%ecx,32(%esp)
116	roll	$16,%edi
117	movl	%ebx,16(%esp)
118	addl	%edi,%esi
119	movl	40(%esp),%ecx
120	xorl	%esi,%ebp
121	movl	56(%esp),%edx
122	roll	$12,%ebp
123	movl	24(%esp),%ebx
124	addl	%ebp,%eax
125	xorl	%eax,%edi
126	movl	%eax,4(%esp)
127	roll	$8,%edi
128	movl	8(%esp),%eax
129	addl	%edi,%esi
130	movl	%edi,52(%esp)
131	xorl	%esi,%ebp
132	addl	%ebx,%eax
133	roll	$7,%ebp
134	xorl	%eax,%edx
135	movl	%esi,36(%esp)
136	roll	$16,%edx
137	movl	%ebp,20(%esp)
138	addl	%edx,%ecx
139	movl	44(%esp),%esi
140	xorl	%ecx,%ebx
141	movl	60(%esp),%edi
142	roll	$12,%ebx
143	movl	28(%esp),%ebp
144	addl	%ebx,%eax
145	xorl	%eax,%edx
146	movl	%eax,8(%esp)
147	roll	$8,%edx
148	movl	12(%esp),%eax
149	addl	%edx,%ecx
150	movl	%edx,56(%esp)
151	xorl	%ecx,%ebx
152	addl	%ebp,%eax
153	roll	$7,%ebx
154	xorl	%eax,%edi
155	roll	$16,%edi
156	movl	%ebx,24(%esp)
157	addl	%edi,%esi
158	xorl	%esi,%ebp
159	roll	$12,%ebp
160	movl	20(%esp),%ebx
161	addl	%ebp,%eax
162	xorl	%eax,%edi
163	movl	%eax,12(%esp)
164	roll	$8,%edi
165	movl	(%esp),%eax
166	addl	%edi,%esi
167	movl	%edi,%edx
168	xorl	%esi,%ebp
169	addl	%ebx,%eax
170	roll	$7,%ebp
171	xorl	%eax,%edx
172	roll	$16,%edx
173	movl	%ebp,28(%esp)
174	addl	%edx,%ecx
175	xorl	%ecx,%ebx
176	movl	48(%esp),%edi
177	roll	$12,%ebx
178	movl	24(%esp),%ebp
179	addl	%ebx,%eax
180	xorl	%eax,%edx
181	movl	%eax,(%esp)
182	roll	$8,%edx
183	movl	4(%esp),%eax
184	addl	%edx,%ecx
185	movl	%edx,60(%esp)
186	xorl	%ecx,%ebx
187	addl	%ebp,%eax
188	roll	$7,%ebx
189	xorl	%eax,%edi
190	movl	%ecx,40(%esp)
191	roll	$16,%edi
192	movl	%ebx,20(%esp)
193	addl	%edi,%esi
194	movl	32(%esp),%ecx
195	xorl	%esi,%ebp
196	movl	52(%esp),%edx
197	roll	$12,%ebp
198	movl	28(%esp),%ebx
199	addl	%ebp,%eax
200	xorl	%eax,%edi
201	movl	%eax,4(%esp)
202	roll	$8,%edi
203	movl	8(%esp),%eax
204	addl	%edi,%esi
205	movl	%edi,48(%esp)
206	xorl	%esi,%ebp
207	addl	%ebx,%eax
208	roll	$7,%ebp
209	xorl	%eax,%edx
210	movl	%esi,44(%esp)
211	roll	$16,%edx
212	movl	%ebp,24(%esp)
213	addl	%edx,%ecx
214	movl	36(%esp),%esi
215	xorl	%ecx,%ebx
216	movl	56(%esp),%edi
217	roll	$12,%ebx
218	movl	16(%esp),%ebp
219	addl	%ebx,%eax
220	xorl	%eax,%edx
221	movl	%eax,8(%esp)
222	roll	$8,%edx
223	movl	12(%esp),%eax
224	addl	%edx,%ecx
225	movl	%edx,52(%esp)
226	xorl	%ecx,%ebx
227	addl	%ebp,%eax
228	roll	$7,%ebx
229	xorl	%eax,%edi
230	roll	$16,%edi
231	movl	%ebx,28(%esp)
232	addl	%edi,%esi
233	xorl	%esi,%ebp
234	movl	48(%esp),%edx
235	roll	$12,%ebp
236	movl	128(%esp),%ebx
237	addl	%ebp,%eax
238	xorl	%eax,%edi
239	movl	%eax,12(%esp)
240	roll	$8,%edi
241	movl	(%esp),%eax
242	addl	%edi,%esi
243	movl	%edi,56(%esp)
244	xorl	%esi,%ebp
245	roll	$7,%ebp
246	decl	%ebx
247	jnz	.L004loop
248	movl	160(%esp),%ebx
249	addl	$1634760805,%eax
250	addl	80(%esp),%ebp
251	addl	96(%esp),%ecx
252	addl	100(%esp),%esi
253	cmpl	$64,%ebx
254	jb	.L005tail
255	movl	156(%esp),%ebx
256	addl	112(%esp),%edx
257	addl	120(%esp),%edi
258	xorl	(%ebx),%eax
259	xorl	16(%ebx),%ebp
260	movl	%eax,(%esp)
261	movl	152(%esp),%eax
262	xorl	32(%ebx),%ecx
263	xorl	36(%ebx),%esi
264	xorl	48(%ebx),%edx
265	xorl	56(%ebx),%edi
266	movl	%ebp,16(%eax)
267	movl	%ecx,32(%eax)
268	movl	%esi,36(%eax)
269	movl	%edx,48(%eax)
270	movl	%edi,56(%eax)
271	movl	4(%esp),%ebp
272	movl	8(%esp),%ecx
273	movl	12(%esp),%esi
274	movl	20(%esp),%edx
275	movl	24(%esp),%edi
276	addl	$857760878,%ebp
277	addl	$2036477234,%ecx
278	addl	$1797285236,%esi
279	addl	84(%esp),%edx
280	addl	88(%esp),%edi
281	xorl	4(%ebx),%ebp
282	xorl	8(%ebx),%ecx
283	xorl	12(%ebx),%esi
284	xorl	20(%ebx),%edx
285	xorl	24(%ebx),%edi
286	movl	%ebp,4(%eax)
287	movl	%ecx,8(%eax)
288	movl	%esi,12(%eax)
289	movl	%edx,20(%eax)
290	movl	%edi,24(%eax)
291	movl	28(%esp),%ebp
292	movl	40(%esp),%ecx
293	movl	44(%esp),%esi
294	movl	52(%esp),%edx
295	movl	60(%esp),%edi
296	addl	92(%esp),%ebp
297	addl	104(%esp),%ecx
298	addl	108(%esp),%esi
299	addl	116(%esp),%edx
300	addl	124(%esp),%edi
301	xorl	28(%ebx),%ebp
302	xorl	40(%ebx),%ecx
303	xorl	44(%ebx),%esi
304	xorl	52(%ebx),%edx
305	xorl	60(%ebx),%edi
306	leal	64(%ebx),%ebx
307	movl	%ebp,28(%eax)
308	movl	(%esp),%ebp
309	movl	%ecx,40(%eax)
310	movl	160(%esp),%ecx
311	movl	%esi,44(%eax)
312	movl	%edx,52(%eax)
313	movl	%edi,60(%eax)
314	movl	%ebp,(%eax)
315	leal	64(%eax),%eax
316	subl	$64,%ecx
317	jnz	.L003outer_loop
318	jmp	.L006done
319.L005tail:
320	addl	112(%esp),%edx
321	addl	120(%esp),%edi
322	movl	%eax,(%esp)
323	movl	%ebp,16(%esp)
324	movl	%ecx,32(%esp)
325	movl	%esi,36(%esp)
326	movl	%edx,48(%esp)
327	movl	%edi,56(%esp)
328	movl	4(%esp),%ebp
329	movl	8(%esp),%ecx
330	movl	12(%esp),%esi
331	movl	20(%esp),%edx
332	movl	24(%esp),%edi
333	addl	$857760878,%ebp
334	addl	$2036477234,%ecx
335	addl	$1797285236,%esi
336	addl	84(%esp),%edx
337	addl	88(%esp),%edi
338	movl	%ebp,4(%esp)
339	movl	%ecx,8(%esp)
340	movl	%esi,12(%esp)
341	movl	%edx,20(%esp)
342	movl	%edi,24(%esp)
343	movl	28(%esp),%ebp
344	movl	40(%esp),%ecx
345	movl	44(%esp),%esi
346	movl	52(%esp),%edx
347	movl	60(%esp),%edi
348	addl	92(%esp),%ebp
349	addl	104(%esp),%ecx
350	addl	108(%esp),%esi
351	addl	116(%esp),%edx
352	addl	124(%esp),%edi
353	movl	%ebp,28(%esp)
354	movl	156(%esp),%ebp
355	movl	%ecx,40(%esp)
356	movl	152(%esp),%ecx
357	movl	%esi,44(%esp)
358	xorl	%esi,%esi
359	movl	%edx,52(%esp)
360	movl	%edi,60(%esp)
361	xorl	%eax,%eax
362	xorl	%edx,%edx
363.L007tail_loop:
364	movb	(%esi,%ebp,1),%al
365	movb	(%esp,%esi,1),%dl
366	leal	1(%esi),%esi
367	xorb	%dl,%al
368	movb	%al,-1(%ecx,%esi,1)
369	decl	%ebx
370	jnz	.L007tail_loop
371.L006done:
372	addl	$132,%esp
373.L000no_data:
374	popl	%edi
375	popl	%esi
376	popl	%ebx
377	popl	%ebp
378	ret
379.size	ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
380.globl	ChaCha20_ssse3
381.type	ChaCha20_ssse3,@function
382.align	16
383ChaCha20_ssse3:
384.L_ChaCha20_ssse3_begin:
385	%ifdef __CET__
386
387.byte	243,15,30,251
388	%endif
389
390	pushl	%ebp
391	pushl	%ebx
392	pushl	%esi
393	pushl	%edi
394.Lssse3_shortcut:
395	testl	$2048,4(%ebp)
396	jnz	.Lxop_shortcut
397	movl	20(%esp),%edi
398	movl	24(%esp),%esi
399	movl	28(%esp),%ecx
400	movl	32(%esp),%edx
401	movl	36(%esp),%ebx
402	movl	%esp,%ebp
403	subl	$524,%esp
404	andl	$-64,%esp
405	movl	%ebp,512(%esp)
406	leal	.Lssse3_data-.Lpic_point(%eax),%eax
407	movdqu	(%ebx),%xmm3
408	cmpl	$256,%ecx
409	jb	.L0081x
410	movl	%edx,516(%esp)
411	movl	%ebx,520(%esp)
412	subl	$256,%ecx
413	leal	384(%esp),%ebp
414	movdqu	(%edx),%xmm7
415	pshufd	$0,%xmm3,%xmm0
416	pshufd	$85,%xmm3,%xmm1
417	pshufd	$170,%xmm3,%xmm2
418	pshufd	$255,%xmm3,%xmm3
419	paddd	48(%eax),%xmm0
420	pshufd	$0,%xmm7,%xmm4
421	pshufd	$85,%xmm7,%xmm5
422	psubd	64(%eax),%xmm0
423	pshufd	$170,%xmm7,%xmm6
424	pshufd	$255,%xmm7,%xmm7
425	movdqa	%xmm0,64(%ebp)
426	movdqa	%xmm1,80(%ebp)
427	movdqa	%xmm2,96(%ebp)
428	movdqa	%xmm3,112(%ebp)
429	movdqu	16(%edx),%xmm3
430	movdqa	%xmm4,-64(%ebp)
431	movdqa	%xmm5,-48(%ebp)
432	movdqa	%xmm6,-32(%ebp)
433	movdqa	%xmm7,-16(%ebp)
434	movdqa	32(%eax),%xmm7
435	leal	128(%esp),%ebx
436	pshufd	$0,%xmm3,%xmm0
437	pshufd	$85,%xmm3,%xmm1
438	pshufd	$170,%xmm3,%xmm2
439	pshufd	$255,%xmm3,%xmm3
440	pshufd	$0,%xmm7,%xmm4
441	pshufd	$85,%xmm7,%xmm5
442	pshufd	$170,%xmm7,%xmm6
443	pshufd	$255,%xmm7,%xmm7
444	movdqa	%xmm0,(%ebp)
445	movdqa	%xmm1,16(%ebp)
446	movdqa	%xmm2,32(%ebp)
447	movdqa	%xmm3,48(%ebp)
448	movdqa	%xmm4,-128(%ebp)
449	movdqa	%xmm5,-112(%ebp)
450	movdqa	%xmm6,-96(%ebp)
451	movdqa	%xmm7,-80(%ebp)
452	leal	128(%esi),%esi
453	leal	128(%edi),%edi
454	jmp	.L009outer_loop
455.align	16
456.L009outer_loop:
457	movdqa	-112(%ebp),%xmm1
458	movdqa	-96(%ebp),%xmm2
459	movdqa	-80(%ebp),%xmm3
460	movdqa	-48(%ebp),%xmm5
461	movdqa	-32(%ebp),%xmm6
462	movdqa	-16(%ebp),%xmm7
463	movdqa	%xmm1,-112(%ebx)
464	movdqa	%xmm2,-96(%ebx)
465	movdqa	%xmm3,-80(%ebx)
466	movdqa	%xmm5,-48(%ebx)
467	movdqa	%xmm6,-32(%ebx)
468	movdqa	%xmm7,-16(%ebx)
469	movdqa	32(%ebp),%xmm2
470	movdqa	48(%ebp),%xmm3
471	movdqa	64(%ebp),%xmm4
472	movdqa	80(%ebp),%xmm5
473	movdqa	96(%ebp),%xmm6
474	movdqa	112(%ebp),%xmm7
475	paddd	64(%eax),%xmm4
476	movdqa	%xmm2,32(%ebx)
477	movdqa	%xmm3,48(%ebx)
478	movdqa	%xmm4,64(%ebx)
479	movdqa	%xmm5,80(%ebx)
480	movdqa	%xmm6,96(%ebx)
481	movdqa	%xmm7,112(%ebx)
482	movdqa	%xmm4,64(%ebp)
483	movdqa	-128(%ebp),%xmm0
484	movdqa	%xmm4,%xmm6
485	movdqa	-64(%ebp),%xmm3
486	movdqa	(%ebp),%xmm4
487	movdqa	16(%ebp),%xmm5
488	movl	$10,%edx
489	nop
490.align	16
491.L010loop:
492	paddd	%xmm3,%xmm0
493	movdqa	%xmm3,%xmm2
494	pxor	%xmm0,%xmm6
495	pshufb	(%eax),%xmm6
496	paddd	%xmm6,%xmm4
497	pxor	%xmm4,%xmm2
498	movdqa	-48(%ebx),%xmm3
499	movdqa	%xmm2,%xmm1
500	pslld	$12,%xmm2
501	psrld	$20,%xmm1
502	por	%xmm1,%xmm2
503	movdqa	-112(%ebx),%xmm1
504	paddd	%xmm2,%xmm0
505	movdqa	80(%ebx),%xmm7
506	pxor	%xmm0,%xmm6
507	movdqa	%xmm0,-128(%ebx)
508	pshufb	16(%eax),%xmm6
509	paddd	%xmm6,%xmm4
510	movdqa	%xmm6,64(%ebx)
511	pxor	%xmm4,%xmm2
512	paddd	%xmm3,%xmm1
513	movdqa	%xmm2,%xmm0
514	pslld	$7,%xmm2
515	psrld	$25,%xmm0
516	pxor	%xmm1,%xmm7
517	por	%xmm0,%xmm2
518	movdqa	%xmm4,(%ebx)
519	pshufb	(%eax),%xmm7
520	movdqa	%xmm2,-64(%ebx)
521	paddd	%xmm7,%xmm5
522	movdqa	32(%ebx),%xmm4
523	pxor	%xmm5,%xmm3
524	movdqa	-32(%ebx),%xmm2
525	movdqa	%xmm3,%xmm0
526	pslld	$12,%xmm3
527	psrld	$20,%xmm0
528	por	%xmm0,%xmm3
529	movdqa	-96(%ebx),%xmm0
530	paddd	%xmm3,%xmm1
531	movdqa	96(%ebx),%xmm6
532	pxor	%xmm1,%xmm7
533	movdqa	%xmm1,-112(%ebx)
534	pshufb	16(%eax),%xmm7
535	paddd	%xmm7,%xmm5
536	movdqa	%xmm7,80(%ebx)
537	pxor	%xmm5,%xmm3
538	paddd	%xmm2,%xmm0
539	movdqa	%xmm3,%xmm1
540	pslld	$7,%xmm3
541	psrld	$25,%xmm1
542	pxor	%xmm0,%xmm6
543	por	%xmm1,%xmm3
544	movdqa	%xmm5,16(%ebx)
545	pshufb	(%eax),%xmm6
546	movdqa	%xmm3,-48(%ebx)
547	paddd	%xmm6,%xmm4
548	movdqa	48(%ebx),%xmm5
549	pxor	%xmm4,%xmm2
550	movdqa	-16(%ebx),%xmm3
551	movdqa	%xmm2,%xmm1
552	pslld	$12,%xmm2
553	psrld	$20,%xmm1
554	por	%xmm1,%xmm2
555	movdqa	-80(%ebx),%xmm1
556	paddd	%xmm2,%xmm0
557	movdqa	112(%ebx),%xmm7
558	pxor	%xmm0,%xmm6
559	movdqa	%xmm0,-96(%ebx)
560	pshufb	16(%eax),%xmm6
561	paddd	%xmm6,%xmm4
562	movdqa	%xmm6,96(%ebx)
563	pxor	%xmm4,%xmm2
564	paddd	%xmm3,%xmm1
565	movdqa	%xmm2,%xmm0
566	pslld	$7,%xmm2
567	psrld	$25,%xmm0
568	pxor	%xmm1,%xmm7
569	por	%xmm0,%xmm2
570	pshufb	(%eax),%xmm7
571	movdqa	%xmm2,-32(%ebx)
572	paddd	%xmm7,%xmm5
573	pxor	%xmm5,%xmm3
574	movdqa	-48(%ebx),%xmm2
575	movdqa	%xmm3,%xmm0
576	pslld	$12,%xmm3
577	psrld	$20,%xmm0
578	por	%xmm0,%xmm3
579	movdqa	-128(%ebx),%xmm0
580	paddd	%xmm3,%xmm1
581	pxor	%xmm1,%xmm7
582	movdqa	%xmm1,-80(%ebx)
583	pshufb	16(%eax),%xmm7
584	paddd	%xmm7,%xmm5
585	movdqa	%xmm7,%xmm6
586	pxor	%xmm5,%xmm3
587	paddd	%xmm2,%xmm0
588	movdqa	%xmm3,%xmm1
589	pslld	$7,%xmm3
590	psrld	$25,%xmm1
591	pxor	%xmm0,%xmm6
592	por	%xmm1,%xmm3
593	pshufb	(%eax),%xmm6
594	movdqa	%xmm3,-16(%ebx)
595	paddd	%xmm6,%xmm4
596	pxor	%xmm4,%xmm2
597	movdqa	-32(%ebx),%xmm3
598	movdqa	%xmm2,%xmm1
599	pslld	$12,%xmm2
600	psrld	$20,%xmm1
601	por	%xmm1,%xmm2
602	movdqa	-112(%ebx),%xmm1
603	paddd	%xmm2,%xmm0
604	movdqa	64(%ebx),%xmm7
605	pxor	%xmm0,%xmm6
606	movdqa	%xmm0,-128(%ebx)
607	pshufb	16(%eax),%xmm6
608	paddd	%xmm6,%xmm4
609	movdqa	%xmm6,112(%ebx)
610	pxor	%xmm4,%xmm2
611	paddd	%xmm3,%xmm1
612	movdqa	%xmm2,%xmm0
613	pslld	$7,%xmm2
614	psrld	$25,%xmm0
615	pxor	%xmm1,%xmm7
616	por	%xmm0,%xmm2
617	movdqa	%xmm4,32(%ebx)
618	pshufb	(%eax),%xmm7
619	movdqa	%xmm2,-48(%ebx)
620	paddd	%xmm7,%xmm5
621	movdqa	(%ebx),%xmm4
622	pxor	%xmm5,%xmm3
623	movdqa	-16(%ebx),%xmm2
624	movdqa	%xmm3,%xmm0
625	pslld	$12,%xmm3
626	psrld	$20,%xmm0
627	por	%xmm0,%xmm3
628	movdqa	-96(%ebx),%xmm0
629	paddd	%xmm3,%xmm1
630	movdqa	80(%ebx),%xmm6
631	pxor	%xmm1,%xmm7
632	movdqa	%xmm1,-112(%ebx)
633	pshufb	16(%eax),%xmm7
634	paddd	%xmm7,%xmm5
635	movdqa	%xmm7,64(%ebx)
636	pxor	%xmm5,%xmm3
637	paddd	%xmm2,%xmm0
638	movdqa	%xmm3,%xmm1
639	pslld	$7,%xmm3
640	psrld	$25,%xmm1
641	pxor	%xmm0,%xmm6
642	por	%xmm1,%xmm3
643	movdqa	%xmm5,48(%ebx)
644	pshufb	(%eax),%xmm6
645	movdqa	%xmm3,-32(%ebx)
646	paddd	%xmm6,%xmm4
647	movdqa	16(%ebx),%xmm5
648	pxor	%xmm4,%xmm2
649	movdqa	-64(%ebx),%xmm3
650	movdqa	%xmm2,%xmm1
651	pslld	$12,%xmm2
652	psrld	$20,%xmm1
653	por	%xmm1,%xmm2
654	movdqa	-80(%ebx),%xmm1
655	paddd	%xmm2,%xmm0
656	movdqa	96(%ebx),%xmm7
657	pxor	%xmm0,%xmm6
658	movdqa	%xmm0,-96(%ebx)
659	pshufb	16(%eax),%xmm6
660	paddd	%xmm6,%xmm4
661	movdqa	%xmm6,80(%ebx)
662	pxor	%xmm4,%xmm2
663	paddd	%xmm3,%xmm1
664	movdqa	%xmm2,%xmm0
665	pslld	$7,%xmm2
666	psrld	$25,%xmm0
667	pxor	%xmm1,%xmm7
668	por	%xmm0,%xmm2
669	pshufb	(%eax),%xmm7
670	movdqa	%xmm2,-16(%ebx)
671	paddd	%xmm7,%xmm5
672	pxor	%xmm5,%xmm3
673	movdqa	%xmm3,%xmm0
674	pslld	$12,%xmm3
675	psrld	$20,%xmm0
676	por	%xmm0,%xmm3
677	movdqa	-128(%ebx),%xmm0
678	paddd	%xmm3,%xmm1
679	movdqa	64(%ebx),%xmm6
680	pxor	%xmm1,%xmm7
681	movdqa	%xmm1,-80(%ebx)
682	pshufb	16(%eax),%xmm7
683	paddd	%xmm7,%xmm5
684	movdqa	%xmm7,96(%ebx)
685	pxor	%xmm5,%xmm3
686	movdqa	%xmm3,%xmm1
687	pslld	$7,%xmm3
688	psrld	$25,%xmm1
689	por	%xmm1,%xmm3
690	decl	%edx
691	jnz	.L010loop
692	movdqa	%xmm3,-64(%ebx)
693	movdqa	%xmm4,(%ebx)
694	movdqa	%xmm5,16(%ebx)
695	movdqa	%xmm6,64(%ebx)
696	movdqa	%xmm7,96(%ebx)
697	movdqa	-112(%ebx),%xmm1
698	movdqa	-96(%ebx),%xmm2
699	movdqa	-80(%ebx),%xmm3
700	paddd	-128(%ebp),%xmm0
701	paddd	-112(%ebp),%xmm1
702	paddd	-96(%ebp),%xmm2
703	paddd	-80(%ebp),%xmm3
704	movdqa	%xmm0,%xmm6
705	punpckldq	%xmm1,%xmm0
706	movdqa	%xmm2,%xmm7
707	punpckldq	%xmm3,%xmm2
708	punpckhdq	%xmm1,%xmm6
709	punpckhdq	%xmm3,%xmm7
710	movdqa	%xmm0,%xmm1
711	punpcklqdq	%xmm2,%xmm0
712	movdqa	%xmm6,%xmm3
713	punpcklqdq	%xmm7,%xmm6
714	punpckhqdq	%xmm2,%xmm1
715	punpckhqdq	%xmm7,%xmm3
716	movdqu	-128(%esi),%xmm4
717	movdqu	-64(%esi),%xmm5
718	movdqu	(%esi),%xmm2
719	movdqu	64(%esi),%xmm7
720	leal	16(%esi),%esi
721	pxor	%xmm0,%xmm4
722	movdqa	-64(%ebx),%xmm0
723	pxor	%xmm1,%xmm5
724	movdqa	-48(%ebx),%xmm1
725	pxor	%xmm2,%xmm6
726	movdqa	-32(%ebx),%xmm2
727	pxor	%xmm3,%xmm7
728	movdqa	-16(%ebx),%xmm3
729	movdqu	%xmm4,-128(%edi)
730	movdqu	%xmm5,-64(%edi)
731	movdqu	%xmm6,(%edi)
732	movdqu	%xmm7,64(%edi)
733	leal	16(%edi),%edi
734	paddd	-64(%ebp),%xmm0
735	paddd	-48(%ebp),%xmm1
736	paddd	-32(%ebp),%xmm2
737	paddd	-16(%ebp),%xmm3
738	movdqa	%xmm0,%xmm6
739	punpckldq	%xmm1,%xmm0
740	movdqa	%xmm2,%xmm7
741	punpckldq	%xmm3,%xmm2
742	punpckhdq	%xmm1,%xmm6
743	punpckhdq	%xmm3,%xmm7
744	movdqa	%xmm0,%xmm1
745	punpcklqdq	%xmm2,%xmm0
746	movdqa	%xmm6,%xmm3
747	punpcklqdq	%xmm7,%xmm6
748	punpckhqdq	%xmm2,%xmm1
749	punpckhqdq	%xmm7,%xmm3
750	movdqu	-128(%esi),%xmm4
751	movdqu	-64(%esi),%xmm5
752	movdqu	(%esi),%xmm2
753	movdqu	64(%esi),%xmm7
754	leal	16(%esi),%esi
755	pxor	%xmm0,%xmm4
756	movdqa	(%ebx),%xmm0
757	pxor	%xmm1,%xmm5
758	movdqa	16(%ebx),%xmm1
759	pxor	%xmm2,%xmm6
760	movdqa	32(%ebx),%xmm2
761	pxor	%xmm3,%xmm7
762	movdqa	48(%ebx),%xmm3
763	movdqu	%xmm4,-128(%edi)
764	movdqu	%xmm5,-64(%edi)
765	movdqu	%xmm6,(%edi)
766	movdqu	%xmm7,64(%edi)
767	leal	16(%edi),%edi
768	paddd	(%ebp),%xmm0
769	paddd	16(%ebp),%xmm1
770	paddd	32(%ebp),%xmm2
771	paddd	48(%ebp),%xmm3
772	movdqa	%xmm0,%xmm6
773	punpckldq	%xmm1,%xmm0
774	movdqa	%xmm2,%xmm7
775	punpckldq	%xmm3,%xmm2
776	punpckhdq	%xmm1,%xmm6
777	punpckhdq	%xmm3,%xmm7
778	movdqa	%xmm0,%xmm1
779	punpcklqdq	%xmm2,%xmm0
780	movdqa	%xmm6,%xmm3
781	punpcklqdq	%xmm7,%xmm6
782	punpckhqdq	%xmm2,%xmm1
783	punpckhqdq	%xmm7,%xmm3
784	movdqu	-128(%esi),%xmm4
785	movdqu	-64(%esi),%xmm5
786	movdqu	(%esi),%xmm2
787	movdqu	64(%esi),%xmm7
788	leal	16(%esi),%esi
789	pxor	%xmm0,%xmm4
790	movdqa	64(%ebx),%xmm0
791	pxor	%xmm1,%xmm5
792	movdqa	80(%ebx),%xmm1
793	pxor	%xmm2,%xmm6
794	movdqa	96(%ebx),%xmm2
795	pxor	%xmm3,%xmm7
796	movdqa	112(%ebx),%xmm3
797	movdqu	%xmm4,-128(%edi)
798	movdqu	%xmm5,-64(%edi)
799	movdqu	%xmm6,(%edi)
800	movdqu	%xmm7,64(%edi)
801	leal	16(%edi),%edi
802	paddd	64(%ebp),%xmm0
803	paddd	80(%ebp),%xmm1
804	paddd	96(%ebp),%xmm2
805	paddd	112(%ebp),%xmm3
806	movdqa	%xmm0,%xmm6
807	punpckldq	%xmm1,%xmm0
808	movdqa	%xmm2,%xmm7
809	punpckldq	%xmm3,%xmm2
810	punpckhdq	%xmm1,%xmm6
811	punpckhdq	%xmm3,%xmm7
812	movdqa	%xmm0,%xmm1
813	punpcklqdq	%xmm2,%xmm0
814	movdqa	%xmm6,%xmm3
815	punpcklqdq	%xmm7,%xmm6
816	punpckhqdq	%xmm2,%xmm1
817	punpckhqdq	%xmm7,%xmm3
818	movdqu	-128(%esi),%xmm4
819	movdqu	-64(%esi),%xmm5
820	movdqu	(%esi),%xmm2
821	movdqu	64(%esi),%xmm7
822	leal	208(%esi),%esi
823	pxor	%xmm0,%xmm4
824	pxor	%xmm1,%xmm5
825	pxor	%xmm2,%xmm6
826	pxor	%xmm3,%xmm7
827	movdqu	%xmm4,-128(%edi)
828	movdqu	%xmm5,-64(%edi)
829	movdqu	%xmm6,(%edi)
830	movdqu	%xmm7,64(%edi)
831	leal	208(%edi),%edi
832	subl	$256,%ecx
833	jnc	.L009outer_loop
834	addl	$256,%ecx
835	jz	.L011done
836	movl	520(%esp),%ebx
837	leal	-128(%esi),%esi
838	movl	516(%esp),%edx
839	leal	-128(%edi),%edi
840	movd	64(%ebp),%xmm2
841	movdqu	(%ebx),%xmm3
842	paddd	96(%eax),%xmm2
843	pand	112(%eax),%xmm3
844	por	%xmm2,%xmm3
845.L0081x:
846	movdqa	32(%eax),%xmm0
847	movdqu	(%edx),%xmm1
848	movdqu	16(%edx),%xmm2
849	movdqa	(%eax),%xmm6
850	movdqa	16(%eax),%xmm7
851	movl	%ebp,48(%esp)
852	movdqa	%xmm0,(%esp)
853	movdqa	%xmm1,16(%esp)
854	movdqa	%xmm2,32(%esp)
855	movdqa	%xmm3,48(%esp)
856	movl	$10,%edx
857	jmp	.L012loop1x
858.align	16
859.L013outer1x:
860	movdqa	80(%eax),%xmm3
861	movdqa	(%esp),%xmm0
862	movdqa	16(%esp),%xmm1
863	movdqa	32(%esp),%xmm2
864	paddd	48(%esp),%xmm3
865	movl	$10,%edx
866	movdqa	%xmm3,48(%esp)
867	jmp	.L012loop1x
868.align	16
869.L012loop1x:
870	paddd	%xmm1,%xmm0
871	pxor	%xmm0,%xmm3
872.byte	102,15,56,0,222
873	paddd	%xmm3,%xmm2
874	pxor	%xmm2,%xmm1
875	movdqa	%xmm1,%xmm4
876	psrld	$20,%xmm1
877	pslld	$12,%xmm4
878	por	%xmm4,%xmm1
879	paddd	%xmm1,%xmm0
880	pxor	%xmm0,%xmm3
881.byte	102,15,56,0,223
882	paddd	%xmm3,%xmm2
883	pxor	%xmm2,%xmm1
884	movdqa	%xmm1,%xmm4
885	psrld	$25,%xmm1
886	pslld	$7,%xmm4
887	por	%xmm4,%xmm1
888	pshufd	$78,%xmm2,%xmm2
889	pshufd	$57,%xmm1,%xmm1
890	pshufd	$147,%xmm3,%xmm3
891	nop
892	paddd	%xmm1,%xmm0
893	pxor	%xmm0,%xmm3
894.byte	102,15,56,0,222
895	paddd	%xmm3,%xmm2
896	pxor	%xmm2,%xmm1
897	movdqa	%xmm1,%xmm4
898	psrld	$20,%xmm1
899	pslld	$12,%xmm4
900	por	%xmm4,%xmm1
901	paddd	%xmm1,%xmm0
902	pxor	%xmm0,%xmm3
903.byte	102,15,56,0,223
904	paddd	%xmm3,%xmm2
905	pxor	%xmm2,%xmm1
906	movdqa	%xmm1,%xmm4
907	psrld	$25,%xmm1
908	pslld	$7,%xmm4
909	por	%xmm4,%xmm1
910	pshufd	$78,%xmm2,%xmm2
911	pshufd	$147,%xmm1,%xmm1
912	pshufd	$57,%xmm3,%xmm3
913	decl	%edx
914	jnz	.L012loop1x
915	paddd	(%esp),%xmm0
916	paddd	16(%esp),%xmm1
917	paddd	32(%esp),%xmm2
918	paddd	48(%esp),%xmm3
919	cmpl	$64,%ecx
920	jb	.L014tail
921	movdqu	(%esi),%xmm4
922	movdqu	16(%esi),%xmm5
923	pxor	%xmm4,%xmm0
924	movdqu	32(%esi),%xmm4
925	pxor	%xmm5,%xmm1
926	movdqu	48(%esi),%xmm5
927	pxor	%xmm4,%xmm2
928	pxor	%xmm5,%xmm3
929	leal	64(%esi),%esi
930	movdqu	%xmm0,(%edi)
931	movdqu	%xmm1,16(%edi)
932	movdqu	%xmm2,32(%edi)
933	movdqu	%xmm3,48(%edi)
934	leal	64(%edi),%edi
935	subl	$64,%ecx
936	jnz	.L013outer1x
937	jmp	.L011done
938.L014tail:
939	movdqa	%xmm0,(%esp)
940	movdqa	%xmm1,16(%esp)
941	movdqa	%xmm2,32(%esp)
942	movdqa	%xmm3,48(%esp)
943	xorl	%eax,%eax
944	xorl	%edx,%edx
945	xorl	%ebp,%ebp
946.L015tail_loop:
947	movb	(%esp,%ebp,1),%al
948	movb	(%esi,%ebp,1),%dl
949	leal	1(%ebp),%ebp
950	xorb	%dl,%al
951	movb	%al,-1(%edi,%ebp,1)
952	decl	%ecx
953	jnz	.L015tail_loop
954.L011done:
955	movl	512(%esp),%esp
956	popl	%edi
957	popl	%esi
958	popl	%ebx
959	popl	%ebp
960	ret
961.size	ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
962.align	64
963.Lssse3_data:
964.byte	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
965.byte	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
966.long	1634760805,857760878,2036477234,1797285236
967.long	0,1,2,3
968.long	4,4,4,4
969.long	1,0,0,0
970.long	4,0,0,0
971.long	0,-1,-1,-1
972.align	64
973.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
974.byte	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
975.byte	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
976.byte	114,103,62,0
977.globl	ChaCha20_xop
978.type	ChaCha20_xop,@function
979.align	16
980ChaCha20_xop:
981.L_ChaCha20_xop_begin:
982	%ifdef __CET__
983
984.byte	243,15,30,251
985	%endif
986
987	pushl	%ebp
988	pushl	%ebx
989	pushl	%esi
990	pushl	%edi
991.Lxop_shortcut:
992	movl	20(%esp),%edi
993	movl	24(%esp),%esi
994	movl	28(%esp),%ecx
995	movl	32(%esp),%edx
996	movl	36(%esp),%ebx
997	vzeroupper
998	movl	%esp,%ebp
999	subl	$524,%esp
1000	andl	$-64,%esp
1001	movl	%ebp,512(%esp)
1002	leal	.Lssse3_data-.Lpic_point(%eax),%eax
1003	vmovdqu	(%ebx),%xmm3
1004	cmpl	$256,%ecx
1005	jb	.L0161x
1006	movl	%edx,516(%esp)
1007	movl	%ebx,520(%esp)
1008	subl	$256,%ecx
1009	leal	384(%esp),%ebp
1010	vmovdqu	(%edx),%xmm7
1011	vpshufd	$0,%xmm3,%xmm0
1012	vpshufd	$85,%xmm3,%xmm1
1013	vpshufd	$170,%xmm3,%xmm2
1014	vpshufd	$255,%xmm3,%xmm3
1015	vpaddd	48(%eax),%xmm0,%xmm0
1016	vpshufd	$0,%xmm7,%xmm4
1017	vpshufd	$85,%xmm7,%xmm5
1018	vpsubd	64(%eax),%xmm0,%xmm0
1019	vpshufd	$170,%xmm7,%xmm6
1020	vpshufd	$255,%xmm7,%xmm7
1021	vmovdqa	%xmm0,64(%ebp)
1022	vmovdqa	%xmm1,80(%ebp)
1023	vmovdqa	%xmm2,96(%ebp)
1024	vmovdqa	%xmm3,112(%ebp)
1025	vmovdqu	16(%edx),%xmm3
1026	vmovdqa	%xmm4,-64(%ebp)
1027	vmovdqa	%xmm5,-48(%ebp)
1028	vmovdqa	%xmm6,-32(%ebp)
1029	vmovdqa	%xmm7,-16(%ebp)
1030	vmovdqa	32(%eax),%xmm7
1031	leal	128(%esp),%ebx
1032	vpshufd	$0,%xmm3,%xmm0
1033	vpshufd	$85,%xmm3,%xmm1
1034	vpshufd	$170,%xmm3,%xmm2
1035	vpshufd	$255,%xmm3,%xmm3
1036	vpshufd	$0,%xmm7,%xmm4
1037	vpshufd	$85,%xmm7,%xmm5
1038	vpshufd	$170,%xmm7,%xmm6
1039	vpshufd	$255,%xmm7,%xmm7
1040	vmovdqa	%xmm0,(%ebp)
1041	vmovdqa	%xmm1,16(%ebp)
1042	vmovdqa	%xmm2,32(%ebp)
1043	vmovdqa	%xmm3,48(%ebp)
1044	vmovdqa	%xmm4,-128(%ebp)
1045	vmovdqa	%xmm5,-112(%ebp)
1046	vmovdqa	%xmm6,-96(%ebp)
1047	vmovdqa	%xmm7,-80(%ebp)
1048	leal	128(%esi),%esi
1049	leal	128(%edi),%edi
1050	jmp	.L017outer_loop
1051.align	32
1052.L017outer_loop:
1053	vmovdqa	-112(%ebp),%xmm1
1054	vmovdqa	-96(%ebp),%xmm2
1055	vmovdqa	-80(%ebp),%xmm3
1056	vmovdqa	-48(%ebp),%xmm5
1057	vmovdqa	-32(%ebp),%xmm6
1058	vmovdqa	-16(%ebp),%xmm7
1059	vmovdqa	%xmm1,-112(%ebx)
1060	vmovdqa	%xmm2,-96(%ebx)
1061	vmovdqa	%xmm3,-80(%ebx)
1062	vmovdqa	%xmm5,-48(%ebx)
1063	vmovdqa	%xmm6,-32(%ebx)
1064	vmovdqa	%xmm7,-16(%ebx)
1065	vmovdqa	32(%ebp),%xmm2
1066	vmovdqa	48(%ebp),%xmm3
1067	vmovdqa	64(%ebp),%xmm4
1068	vmovdqa	80(%ebp),%xmm5
1069	vmovdqa	96(%ebp),%xmm6
1070	vmovdqa	112(%ebp),%xmm7
1071	vpaddd	64(%eax),%xmm4,%xmm4
1072	vmovdqa	%xmm2,32(%ebx)
1073	vmovdqa	%xmm3,48(%ebx)
1074	vmovdqa	%xmm4,64(%ebx)
1075	vmovdqa	%xmm5,80(%ebx)
1076	vmovdqa	%xmm6,96(%ebx)
1077	vmovdqa	%xmm7,112(%ebx)
1078	vmovdqa	%xmm4,64(%ebp)
1079	vmovdqa	-128(%ebp),%xmm0
1080	vmovdqa	%xmm4,%xmm6
1081	vmovdqa	-64(%ebp),%xmm3
1082	vmovdqa	(%ebp),%xmm4
1083	vmovdqa	16(%ebp),%xmm5
1084	movl	$10,%edx
1085	nop
1086.align	32
1087.L018loop:
1088	vpaddd	%xmm3,%xmm0,%xmm0
1089	vpxor	%xmm0,%xmm6,%xmm6
1090.byte	143,232,120,194,246,16
1091	vpaddd	%xmm6,%xmm4,%xmm4
1092	vpxor	%xmm4,%xmm3,%xmm2
1093	vmovdqa	-112(%ebx),%xmm1
1094.byte	143,232,120,194,210,12
1095	vmovdqa	-48(%ebx),%xmm3
1096	vpaddd	%xmm2,%xmm0,%xmm0
1097	vmovdqa	80(%ebx),%xmm7
1098	vpxor	%xmm0,%xmm6,%xmm6
1099	vpaddd	%xmm3,%xmm1,%xmm1
1100.byte	143,232,120,194,246,8
1101	vmovdqa	%xmm0,-128(%ebx)
1102	vpaddd	%xmm6,%xmm4,%xmm4
1103	vmovdqa	%xmm6,64(%ebx)
1104	vpxor	%xmm4,%xmm2,%xmm2
1105	vpxor	%xmm1,%xmm7,%xmm7
1106.byte	143,232,120,194,210,7
1107	vmovdqa	%xmm4,(%ebx)
1108.byte	143,232,120,194,255,16
1109	vmovdqa	%xmm2,-64(%ebx)
1110	vpaddd	%xmm7,%xmm5,%xmm5
1111	vmovdqa	32(%ebx),%xmm4
1112	vpxor	%xmm5,%xmm3,%xmm3
1113	vmovdqa	-96(%ebx),%xmm0
1114.byte	143,232,120,194,219,12
1115	vmovdqa	-32(%ebx),%xmm2
1116	vpaddd	%xmm3,%xmm1,%xmm1
1117	vmovdqa	96(%ebx),%xmm6
1118	vpxor	%xmm1,%xmm7,%xmm7
1119	vpaddd	%xmm2,%xmm0,%xmm0
1120.byte	143,232,120,194,255,8
1121	vmovdqa	%xmm1,-112(%ebx)
1122	vpaddd	%xmm7,%xmm5,%xmm5
1123	vmovdqa	%xmm7,80(%ebx)
1124	vpxor	%xmm5,%xmm3,%xmm3
1125	vpxor	%xmm0,%xmm6,%xmm6
1126.byte	143,232,120,194,219,7
1127	vmovdqa	%xmm5,16(%ebx)
1128.byte	143,232,120,194,246,16
1129	vmovdqa	%xmm3,-48(%ebx)
1130	vpaddd	%xmm6,%xmm4,%xmm4
1131	vmovdqa	48(%ebx),%xmm5
1132	vpxor	%xmm4,%xmm2,%xmm2
1133	vmovdqa	-80(%ebx),%xmm1
1134.byte	143,232,120,194,210,12
1135	vmovdqa	-16(%ebx),%xmm3
1136	vpaddd	%xmm2,%xmm0,%xmm0
1137	vmovdqa	112(%ebx),%xmm7
1138	vpxor	%xmm0,%xmm6,%xmm6
1139	vpaddd	%xmm3,%xmm1,%xmm1
1140.byte	143,232,120,194,246,8
1141	vmovdqa	%xmm0,-96(%ebx)
1142	vpaddd	%xmm6,%xmm4,%xmm4
1143	vmovdqa	%xmm6,96(%ebx)
1144	vpxor	%xmm4,%xmm2,%xmm2
1145	vpxor	%xmm1,%xmm7,%xmm7
1146.byte	143,232,120,194,210,7
1147.byte	143,232,120,194,255,16
1148	vmovdqa	%xmm2,-32(%ebx)
1149	vpaddd	%xmm7,%xmm5,%xmm5
1150	vpxor	%xmm5,%xmm3,%xmm3
1151	vmovdqa	-128(%ebx),%xmm0
1152.byte	143,232,120,194,219,12
1153	vmovdqa	-48(%ebx),%xmm2
1154	vpaddd	%xmm3,%xmm1,%xmm1
1155	vpxor	%xmm1,%xmm7,%xmm7
1156	vpaddd	%xmm2,%xmm0,%xmm0
1157.byte	143,232,120,194,255,8
1158	vmovdqa	%xmm1,-80(%ebx)
1159	vpaddd	%xmm7,%xmm5,%xmm5
1160	vpxor	%xmm5,%xmm3,%xmm3
1161	vpxor	%xmm0,%xmm7,%xmm6
1162.byte	143,232,120,194,219,7
1163.byte	143,232,120,194,246,16
1164	vmovdqa	%xmm3,-16(%ebx)
1165	vpaddd	%xmm6,%xmm4,%xmm4
1166	vpxor	%xmm4,%xmm2,%xmm2
1167	vmovdqa	-112(%ebx),%xmm1
1168.byte	143,232,120,194,210,12
1169	vmovdqa	-32(%ebx),%xmm3
1170	vpaddd	%xmm2,%xmm0,%xmm0
1171	vmovdqa	64(%ebx),%xmm7
1172	vpxor	%xmm0,%xmm6,%xmm6
1173	vpaddd	%xmm3,%xmm1,%xmm1
1174.byte	143,232,120,194,246,8
1175	vmovdqa	%xmm0,-128(%ebx)
1176	vpaddd	%xmm6,%xmm4,%xmm4
1177	vmovdqa	%xmm6,112(%ebx)
1178	vpxor	%xmm4,%xmm2,%xmm2
1179	vpxor	%xmm1,%xmm7,%xmm7
1180.byte	143,232,120,194,210,7
1181	vmovdqa	%xmm4,32(%ebx)
1182.byte	143,232,120,194,255,16
1183	vmovdqa	%xmm2,-48(%ebx)
1184	vpaddd	%xmm7,%xmm5,%xmm5
1185	vmovdqa	(%ebx),%xmm4
1186	vpxor	%xmm5,%xmm3,%xmm3
1187	vmovdqa	-96(%ebx),%xmm0
1188.byte	143,232,120,194,219,12
1189	vmovdqa	-16(%ebx),%xmm2
1190	vpaddd	%xmm3,%xmm1,%xmm1
1191	vmovdqa	80(%ebx),%xmm6
1192	vpxor	%xmm1,%xmm7,%xmm7
1193	vpaddd	%xmm2,%xmm0,%xmm0
1194.byte	143,232,120,194,255,8
1195	vmovdqa	%xmm1,-112(%ebx)
1196	vpaddd	%xmm7,%xmm5,%xmm5
1197	vmovdqa	%xmm7,64(%ebx)
1198	vpxor	%xmm5,%xmm3,%xmm3
1199	vpxor	%xmm0,%xmm6,%xmm6
1200.byte	143,232,120,194,219,7
1201	vmovdqa	%xmm5,48(%ebx)
1202.byte	143,232,120,194,246,16
1203	vmovdqa	%xmm3,-32(%ebx)
1204	vpaddd	%xmm6,%xmm4,%xmm4
1205	vmovdqa	16(%ebx),%xmm5
1206	vpxor	%xmm4,%xmm2,%xmm2
1207	vmovdqa	-80(%ebx),%xmm1
1208.byte	143,232,120,194,210,12
1209	vmovdqa	-64(%ebx),%xmm3
1210	vpaddd	%xmm2,%xmm0,%xmm0
1211	vmovdqa	96(%ebx),%xmm7
1212	vpxor	%xmm0,%xmm6,%xmm6
1213	vpaddd	%xmm3,%xmm1,%xmm1
1214.byte	143,232,120,194,246,8
1215	vmovdqa	%xmm0,-96(%ebx)
1216	vpaddd	%xmm6,%xmm4,%xmm4
1217	vmovdqa	%xmm6,80(%ebx)
1218	vpxor	%xmm4,%xmm2,%xmm2
1219	vpxor	%xmm1,%xmm7,%xmm7
1220.byte	143,232,120,194,210,7
1221.byte	143,232,120,194,255,16
1222	vmovdqa	%xmm2,-16(%ebx)
1223	vpaddd	%xmm7,%xmm5,%xmm5
1224	vpxor	%xmm5,%xmm3,%xmm3
1225	vmovdqa	-128(%ebx),%xmm0
1226.byte	143,232,120,194,219,12
1227	vpaddd	%xmm3,%xmm1,%xmm1
1228	vmovdqa	64(%ebx),%xmm6
1229	vpxor	%xmm1,%xmm7,%xmm7
1230.byte	143,232,120,194,255,8
1231	vmovdqa	%xmm1,-80(%ebx)
1232	vpaddd	%xmm7,%xmm5,%xmm5
1233	vmovdqa	%xmm7,96(%ebx)
1234	vpxor	%xmm5,%xmm3,%xmm3
1235.byte	143,232,120,194,219,7
1236	decl	%edx
1237	jnz	.L018loop
1238	vmovdqa	%xmm3,-64(%ebx)
1239	vmovdqa	%xmm4,(%ebx)
1240	vmovdqa	%xmm5,16(%ebx)
1241	vmovdqa	%xmm6,64(%ebx)
1242	vmovdqa	%xmm7,96(%ebx)
1243	vmovdqa	-112(%ebx),%xmm1
1244	vmovdqa	-96(%ebx),%xmm2
1245	vmovdqa	-80(%ebx),%xmm3
1246	vpaddd	-128(%ebp),%xmm0,%xmm0
1247	vpaddd	-112(%ebp),%xmm1,%xmm1
1248	vpaddd	-96(%ebp),%xmm2,%xmm2
1249	vpaddd	-80(%ebp),%xmm3,%xmm3
1250	vpunpckldq	%xmm1,%xmm0,%xmm6
1251	vpunpckldq	%xmm3,%xmm2,%xmm7
1252	vpunpckhdq	%xmm1,%xmm0,%xmm0
1253	vpunpckhdq	%xmm3,%xmm2,%xmm2
1254	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1255	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1256	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1257	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1258	vpxor	-128(%esi),%xmm1,%xmm4
1259	vpxor	-64(%esi),%xmm6,%xmm5
1260	vpxor	(%esi),%xmm7,%xmm6
1261	vpxor	64(%esi),%xmm3,%xmm7
1262	leal	16(%esi),%esi
1263	vmovdqa	-64(%ebx),%xmm0
1264	vmovdqa	-48(%ebx),%xmm1
1265	vmovdqa	-32(%ebx),%xmm2
1266	vmovdqa	-16(%ebx),%xmm3
1267	vmovdqu	%xmm4,-128(%edi)
1268	vmovdqu	%xmm5,-64(%edi)
1269	vmovdqu	%xmm6,(%edi)
1270	vmovdqu	%xmm7,64(%edi)
1271	leal	16(%edi),%edi
1272	vpaddd	-64(%ebp),%xmm0,%xmm0
1273	vpaddd	-48(%ebp),%xmm1,%xmm1
1274	vpaddd	-32(%ebp),%xmm2,%xmm2
1275	vpaddd	-16(%ebp),%xmm3,%xmm3
1276	vpunpckldq	%xmm1,%xmm0,%xmm6
1277	vpunpckldq	%xmm3,%xmm2,%xmm7
1278	vpunpckhdq	%xmm1,%xmm0,%xmm0
1279	vpunpckhdq	%xmm3,%xmm2,%xmm2
1280	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1281	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1282	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1283	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1284	vpxor	-128(%esi),%xmm1,%xmm4
1285	vpxor	-64(%esi),%xmm6,%xmm5
1286	vpxor	(%esi),%xmm7,%xmm6
1287	vpxor	64(%esi),%xmm3,%xmm7
1288	leal	16(%esi),%esi
1289	vmovdqa	(%ebx),%xmm0
1290	vmovdqa	16(%ebx),%xmm1
1291	vmovdqa	32(%ebx),%xmm2
1292	vmovdqa	48(%ebx),%xmm3
1293	vmovdqu	%xmm4,-128(%edi)
1294	vmovdqu	%xmm5,-64(%edi)
1295	vmovdqu	%xmm6,(%edi)
1296	vmovdqu	%xmm7,64(%edi)
1297	leal	16(%edi),%edi
1298	vpaddd	(%ebp),%xmm0,%xmm0
1299	vpaddd	16(%ebp),%xmm1,%xmm1
1300	vpaddd	32(%ebp),%xmm2,%xmm2
1301	vpaddd	48(%ebp),%xmm3,%xmm3
1302	vpunpckldq	%xmm1,%xmm0,%xmm6
1303	vpunpckldq	%xmm3,%xmm2,%xmm7
1304	vpunpckhdq	%xmm1,%xmm0,%xmm0
1305	vpunpckhdq	%xmm3,%xmm2,%xmm2
1306	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1307	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1308	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1309	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1310	vpxor	-128(%esi),%xmm1,%xmm4
1311	vpxor	-64(%esi),%xmm6,%xmm5
1312	vpxor	(%esi),%xmm7,%xmm6
1313	vpxor	64(%esi),%xmm3,%xmm7
1314	leal	16(%esi),%esi
1315	vmovdqa	64(%ebx),%xmm0
1316	vmovdqa	80(%ebx),%xmm1
1317	vmovdqa	96(%ebx),%xmm2
1318	vmovdqa	112(%ebx),%xmm3
1319	vmovdqu	%xmm4,-128(%edi)
1320	vmovdqu	%xmm5,-64(%edi)
1321	vmovdqu	%xmm6,(%edi)
1322	vmovdqu	%xmm7,64(%edi)
1323	leal	16(%edi),%edi
1324	vpaddd	64(%ebp),%xmm0,%xmm0
1325	vpaddd	80(%ebp),%xmm1,%xmm1
1326	vpaddd	96(%ebp),%xmm2,%xmm2
1327	vpaddd	112(%ebp),%xmm3,%xmm3
1328	vpunpckldq	%xmm1,%xmm0,%xmm6
1329	vpunpckldq	%xmm3,%xmm2,%xmm7
1330	vpunpckhdq	%xmm1,%xmm0,%xmm0
1331	vpunpckhdq	%xmm3,%xmm2,%xmm2
1332	vpunpcklqdq	%xmm7,%xmm6,%xmm1
1333	vpunpckhqdq	%xmm7,%xmm6,%xmm6
1334	vpunpcklqdq	%xmm2,%xmm0,%xmm7
1335	vpunpckhqdq	%xmm2,%xmm0,%xmm3
1336	vpxor	-128(%esi),%xmm1,%xmm4
1337	vpxor	-64(%esi),%xmm6,%xmm5
1338	vpxor	(%esi),%xmm7,%xmm6
1339	vpxor	64(%esi),%xmm3,%xmm7
1340	leal	208(%esi),%esi
1341	vmovdqu	%xmm4,-128(%edi)
1342	vmovdqu	%xmm5,-64(%edi)
1343	vmovdqu	%xmm6,(%edi)
1344	vmovdqu	%xmm7,64(%edi)
1345	leal	208(%edi),%edi
1346	subl	$256,%ecx
1347	jnc	.L017outer_loop
1348	addl	$256,%ecx
1349	jz	.L019done
1350	movl	520(%esp),%ebx
1351	leal	-128(%esi),%esi
1352	movl	516(%esp),%edx
1353	leal	-128(%edi),%edi
1354	vmovd	64(%ebp),%xmm2
1355	vmovdqu	(%ebx),%xmm3
1356	vpaddd	96(%eax),%xmm2,%xmm2
1357	vpand	112(%eax),%xmm3,%xmm3
1358	vpor	%xmm2,%xmm3,%xmm3
1359.L0161x:
1360	vmovdqa	32(%eax),%xmm0
1361	vmovdqu	(%edx),%xmm1
1362	vmovdqu	16(%edx),%xmm2
1363	vmovdqa	(%eax),%xmm6
1364	vmovdqa	16(%eax),%xmm7
1365	movl	%ebp,48(%esp)
1366	vmovdqa	%xmm0,(%esp)
1367	vmovdqa	%xmm1,16(%esp)
1368	vmovdqa	%xmm2,32(%esp)
1369	vmovdqa	%xmm3,48(%esp)
1370	movl	$10,%edx
1371	jmp	.L020loop1x
1372.align	16
1373.L021outer1x:
1374	vmovdqa	80(%eax),%xmm3
1375	vmovdqa	(%esp),%xmm0
1376	vmovdqa	16(%esp),%xmm1
1377	vmovdqa	32(%esp),%xmm2
1378	vpaddd	48(%esp),%xmm3,%xmm3
1379	movl	$10,%edx
1380	vmovdqa	%xmm3,48(%esp)
1381	jmp	.L020loop1x
1382.align	16
1383.L020loop1x:
1384	vpaddd	%xmm1,%xmm0,%xmm0
1385	vpxor	%xmm0,%xmm3,%xmm3
1386.byte	143,232,120,194,219,16
1387	vpaddd	%xmm3,%xmm2,%xmm2
1388	vpxor	%xmm2,%xmm1,%xmm1
1389.byte	143,232,120,194,201,12
1390	vpaddd	%xmm1,%xmm0,%xmm0
1391	vpxor	%xmm0,%xmm3,%xmm3
1392.byte	143,232,120,194,219,8
1393	vpaddd	%xmm3,%xmm2,%xmm2
1394	vpxor	%xmm2,%xmm1,%xmm1
1395.byte	143,232,120,194,201,7
1396	vpshufd	$78,%xmm2,%xmm2
1397	vpshufd	$57,%xmm1,%xmm1
1398	vpshufd	$147,%xmm3,%xmm3
1399	vpaddd	%xmm1,%xmm0,%xmm0
1400	vpxor	%xmm0,%xmm3,%xmm3
1401.byte	143,232,120,194,219,16
1402	vpaddd	%xmm3,%xmm2,%xmm2
1403	vpxor	%xmm2,%xmm1,%xmm1
1404.byte	143,232,120,194,201,12
1405	vpaddd	%xmm1,%xmm0,%xmm0
1406	vpxor	%xmm0,%xmm3,%xmm3
1407.byte	143,232,120,194,219,8
1408	vpaddd	%xmm3,%xmm2,%xmm2
1409	vpxor	%xmm2,%xmm1,%xmm1
1410.byte	143,232,120,194,201,7
1411	vpshufd	$78,%xmm2,%xmm2
1412	vpshufd	$147,%xmm1,%xmm1
1413	vpshufd	$57,%xmm3,%xmm3
1414	decl	%edx
1415	jnz	.L020loop1x
1416	vpaddd	(%esp),%xmm0,%xmm0
1417	vpaddd	16(%esp),%xmm1,%xmm1
1418	vpaddd	32(%esp),%xmm2,%xmm2
1419	vpaddd	48(%esp),%xmm3,%xmm3
1420	cmpl	$64,%ecx
1421	jb	.L022tail
1422	vpxor	(%esi),%xmm0,%xmm0
1423	vpxor	16(%esi),%xmm1,%xmm1
1424	vpxor	32(%esi),%xmm2,%xmm2
1425	vpxor	48(%esi),%xmm3,%xmm3
1426	leal	64(%esi),%esi
1427	vmovdqu	%xmm0,(%edi)
1428	vmovdqu	%xmm1,16(%edi)
1429	vmovdqu	%xmm2,32(%edi)
1430	vmovdqu	%xmm3,48(%edi)
1431	leal	64(%edi),%edi
1432	subl	$64,%ecx
1433	jnz	.L021outer1x
1434	jmp	.L019done
1435.L022tail:
1436	vmovdqa	%xmm0,(%esp)
1437	vmovdqa	%xmm1,16(%esp)
1438	vmovdqa	%xmm2,32(%esp)
1439	vmovdqa	%xmm3,48(%esp)
1440	xorl	%eax,%eax
1441	xorl	%edx,%edx
1442	xorl	%ebp,%ebp
1443.L023tail_loop:
1444	movb	(%esp,%ebp,1),%al
1445	movb	(%esi,%ebp,1),%dl
1446	leal	1(%ebp),%ebp
1447	xorb	%dl,%al
1448	movb	%al,-1(%edi,%ebp,1)
1449	decl	%ecx
1450	jnz	.L023tail_loop
1451.L019done:
1452	vzeroupper
1453	movl	512(%esp),%esp
1454	popl	%edi
1455	popl	%esi
1456	popl	%ebx
1457	popl	%ebp
1458	ret
1459.size	ChaCha20_xop,.-.L_ChaCha20_xop_begin
1460.comm	OPENSSL_ia32cap_P,16,4
1461
1462	.section ".note.gnu.property", "a"
1463	.p2align 2
1464	.long 1f - 0f
1465	.long 4f - 1f
1466	.long 5
14670:
1468	.asciz "GNU"
14691:
1470	.p2align 2
1471	.long 0xc0000002
1472	.long 3f - 2f
14732:
1474	.long 3
14753:
1476	.p2align 2
14774:
1478