• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#include "ring_core_generated/prefix_symbols_asm.h"
12.text
13.extern	OPENSSL_ia32cap_P
14.hidden OPENSSL_ia32cap_P
15
16chacha20_poly1305_constants:
17
18.align	64
19.Lchacha20_consts:
20.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
21.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
22.Lrol8:
23.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
24.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
25.Lrol16:
26.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
27.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
28.Lavx2_init:
29.long	0,0,0,0
30.Lsse_inc:
31.long	1,0,0,0
32.Lavx2_inc:
33.long	2,0,0,0,2,0,0,0
34.Lclamp:
35.quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
36.quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
37.align	16
38.Land_masks:
39.byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
40.byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
41.byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
42.byte	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
43.byte	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
44.byte	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
45.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
46.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
47.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
48.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
49.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
50.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
51.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
52.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
53.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
54.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
55
56.type	poly_hash_ad_internal,@function
57.align	64
58poly_hash_ad_internal:
59.cfi_startproc
60.cfi_def_cfa	rsp, 8
61	xorq	%r10,%r10
62	xorq	%r11,%r11
63	xorq	%r12,%r12
64	cmpq	$13,%r8
65	jne	.Lhash_ad_loop
66.Lpoly_fast_tls_ad:
67
68	movq	(%rcx),%r10
69	movq	5(%rcx),%r11
70	shrq	$24,%r11
71	movq	$1,%r12
72	movq	0+0+0(%rbp),%rax
73	movq	%rax,%r15
74	mulq	%r10
75	movq	%rax,%r13
76	movq	%rdx,%r14
77	movq	0+0+0(%rbp),%rax
78	mulq	%r11
79	imulq	%r12,%r15
80	addq	%rax,%r14
81	adcq	%rdx,%r15
82	movq	8+0+0(%rbp),%rax
83	movq	%rax,%r9
84	mulq	%r10
85	addq	%rax,%r14
86	adcq	$0,%rdx
87	movq	%rdx,%r10
88	movq	8+0+0(%rbp),%rax
89	mulq	%r11
90	addq	%rax,%r15
91	adcq	$0,%rdx
92	imulq	%r12,%r9
93	addq	%r10,%r15
94	adcq	%rdx,%r9
95	movq	%r13,%r10
96	movq	%r14,%r11
97	movq	%r15,%r12
98	andq	$3,%r12
99	movq	%r15,%r13
100	andq	$-4,%r13
101	movq	%r9,%r14
102	shrdq	$2,%r9,%r15
103	shrq	$2,%r9
104	addq	%r13,%r15
105	adcq	%r14,%r9
106	addq	%r15,%r10
107	adcq	%r9,%r11
108	adcq	$0,%r12
109
110	.byte	0xf3,0xc3
111.Lhash_ad_loop:
112
113	cmpq	$16,%r8
114	jb	.Lhash_ad_tail
115	addq	0+0(%rcx),%r10
116	adcq	8+0(%rcx),%r11
117	adcq	$1,%r12
118	movq	0+0+0(%rbp),%rax
119	movq	%rax,%r15
120	mulq	%r10
121	movq	%rax,%r13
122	movq	%rdx,%r14
123	movq	0+0+0(%rbp),%rax
124	mulq	%r11
125	imulq	%r12,%r15
126	addq	%rax,%r14
127	adcq	%rdx,%r15
128	movq	8+0+0(%rbp),%rax
129	movq	%rax,%r9
130	mulq	%r10
131	addq	%rax,%r14
132	adcq	$0,%rdx
133	movq	%rdx,%r10
134	movq	8+0+0(%rbp),%rax
135	mulq	%r11
136	addq	%rax,%r15
137	adcq	$0,%rdx
138	imulq	%r12,%r9
139	addq	%r10,%r15
140	adcq	%rdx,%r9
141	movq	%r13,%r10
142	movq	%r14,%r11
143	movq	%r15,%r12
144	andq	$3,%r12
145	movq	%r15,%r13
146	andq	$-4,%r13
147	movq	%r9,%r14
148	shrdq	$2,%r9,%r15
149	shrq	$2,%r9
150	addq	%r13,%r15
151	adcq	%r14,%r9
152	addq	%r15,%r10
153	adcq	%r9,%r11
154	adcq	$0,%r12
155
156	leaq	16(%rcx),%rcx
157	subq	$16,%r8
158	jmp	.Lhash_ad_loop
159.Lhash_ad_tail:
160	cmpq	$0,%r8
161	je	.Lhash_ad_done
162
163	xorq	%r13,%r13
164	xorq	%r14,%r14
165	xorq	%r15,%r15
166	addq	%r8,%rcx
167.Lhash_ad_tail_loop:
168	shldq	$8,%r13,%r14
169	shlq	$8,%r13
170	movzbq	-1(%rcx),%r15
171	xorq	%r15,%r13
172	decq	%rcx
173	decq	%r8
174	jne	.Lhash_ad_tail_loop
175
176	addq	%r13,%r10
177	adcq	%r14,%r11
178	adcq	$1,%r12
179	movq	0+0+0(%rbp),%rax
180	movq	%rax,%r15
181	mulq	%r10
182	movq	%rax,%r13
183	movq	%rdx,%r14
184	movq	0+0+0(%rbp),%rax
185	mulq	%r11
186	imulq	%r12,%r15
187	addq	%rax,%r14
188	adcq	%rdx,%r15
189	movq	8+0+0(%rbp),%rax
190	movq	%rax,%r9
191	mulq	%r10
192	addq	%rax,%r14
193	adcq	$0,%rdx
194	movq	%rdx,%r10
195	movq	8+0+0(%rbp),%rax
196	mulq	%r11
197	addq	%rax,%r15
198	adcq	$0,%rdx
199	imulq	%r12,%r9
200	addq	%r10,%r15
201	adcq	%rdx,%r9
202	movq	%r13,%r10
203	movq	%r14,%r11
204	movq	%r15,%r12
205	andq	$3,%r12
206	movq	%r15,%r13
207	andq	$-4,%r13
208	movq	%r9,%r14
209	shrdq	$2,%r9,%r15
210	shrq	$2,%r9
211	addq	%r13,%r15
212	adcq	%r14,%r9
213	addq	%r15,%r10
214	adcq	%r9,%r11
215	adcq	$0,%r12
216
217
218.Lhash_ad_done:
219	.byte	0xf3,0xc3
220.cfi_endproc
221.size	poly_hash_ad_internal, .-poly_hash_ad_internal
222
223.globl	chacha20_poly1305_open
224.hidden chacha20_poly1305_open
225.type	chacha20_poly1305_open,@function
226.align	64
227chacha20_poly1305_open:
228.cfi_startproc
229	pushq	%rbp
230.cfi_adjust_cfa_offset	8
231.cfi_offset	%rbp,-16
232	pushq	%rbx
233.cfi_adjust_cfa_offset	8
234.cfi_offset	%rbx,-24
235	pushq	%r12
236.cfi_adjust_cfa_offset	8
237.cfi_offset	%r12,-32
238	pushq	%r13
239.cfi_adjust_cfa_offset	8
240.cfi_offset	%r13,-40
241	pushq	%r14
242.cfi_adjust_cfa_offset	8
243.cfi_offset	%r14,-48
244	pushq	%r15
245.cfi_adjust_cfa_offset	8
246.cfi_offset	%r15,-56
247
248
249	pushq	%r9
250.cfi_adjust_cfa_offset	8
251.cfi_offset	%r9,-64
252	subq	$288 + 0 + 32,%rsp
253.cfi_adjust_cfa_offset	288 + 32
254
255	leaq	32(%rsp),%rbp
256	andq	$-32,%rbp
257
258	movq	%rdx,%rbx
259	movq	%r8,0+0+32(%rbp)
260	movq	%rbx,8+0+32(%rbp)
261
262	movl	OPENSSL_ia32cap_P+8(%rip),%eax
263	andl	$288,%eax
264	xorl	$288,%eax
265	jz	chacha20_poly1305_open_avx2
266
267	cmpq	$128,%rbx
268	jbe	.Lopen_sse_128
269
270	movdqa	.Lchacha20_consts(%rip),%xmm0
271	movdqu	0(%r9),%xmm4
272	movdqu	16(%r9),%xmm8
273	movdqu	32(%r9),%xmm12
274
275	movdqa	%xmm12,%xmm7
276
277	movdqa	%xmm4,0+48(%rbp)
278	movdqa	%xmm8,0+64(%rbp)
279	movdqa	%xmm12,0+96(%rbp)
280	movq	$10,%r10
281.Lopen_sse_init_rounds:
282	paddd	%xmm4,%xmm0
283	pxor	%xmm0,%xmm12
284	pshufb	.Lrol16(%rip),%xmm12
285	paddd	%xmm12,%xmm8
286	pxor	%xmm8,%xmm4
287	movdqa	%xmm4,%xmm3
288	pslld	$12,%xmm3
289	psrld	$20,%xmm4
290	pxor	%xmm3,%xmm4
291	paddd	%xmm4,%xmm0
292	pxor	%xmm0,%xmm12
293	pshufb	.Lrol8(%rip),%xmm12
294	paddd	%xmm12,%xmm8
295	pxor	%xmm8,%xmm4
296	movdqa	%xmm4,%xmm3
297	pslld	$7,%xmm3
298	psrld	$25,%xmm4
299	pxor	%xmm3,%xmm4
300.byte	102,15,58,15,228,4
301.byte	102,69,15,58,15,192,8
302.byte	102,69,15,58,15,228,12
303	paddd	%xmm4,%xmm0
304	pxor	%xmm0,%xmm12
305	pshufb	.Lrol16(%rip),%xmm12
306	paddd	%xmm12,%xmm8
307	pxor	%xmm8,%xmm4
308	movdqa	%xmm4,%xmm3
309	pslld	$12,%xmm3
310	psrld	$20,%xmm4
311	pxor	%xmm3,%xmm4
312	paddd	%xmm4,%xmm0
313	pxor	%xmm0,%xmm12
314	pshufb	.Lrol8(%rip),%xmm12
315	paddd	%xmm12,%xmm8
316	pxor	%xmm8,%xmm4
317	movdqa	%xmm4,%xmm3
318	pslld	$7,%xmm3
319	psrld	$25,%xmm4
320	pxor	%xmm3,%xmm4
321.byte	102,15,58,15,228,12
322.byte	102,69,15,58,15,192,8
323.byte	102,69,15,58,15,228,4
324
325	decq	%r10
326	jne	.Lopen_sse_init_rounds
327
328	paddd	.Lchacha20_consts(%rip),%xmm0
329	paddd	0+48(%rbp),%xmm4
330
331	pand	.Lclamp(%rip),%xmm0
332	movdqa	%xmm0,0+0(%rbp)
333	movdqa	%xmm4,0+16(%rbp)
334
335	movq	%r8,%r8
336	call	poly_hash_ad_internal
337.Lopen_sse_main_loop:
338	cmpq	$256,%rbx
339	jb	.Lopen_sse_tail
340
341	movdqa	.Lchacha20_consts(%rip),%xmm0
342	movdqa	0+48(%rbp),%xmm4
343	movdqa	0+64(%rbp),%xmm8
344	movdqa	%xmm0,%xmm1
345	movdqa	%xmm4,%xmm5
346	movdqa	%xmm8,%xmm9
347	movdqa	%xmm0,%xmm2
348	movdqa	%xmm4,%xmm6
349	movdqa	%xmm8,%xmm10
350	movdqa	%xmm0,%xmm3
351	movdqa	%xmm4,%xmm7
352	movdqa	%xmm8,%xmm11
353	movdqa	0+96(%rbp),%xmm15
354	paddd	.Lsse_inc(%rip),%xmm15
355	movdqa	%xmm15,%xmm14
356	paddd	.Lsse_inc(%rip),%xmm14
357	movdqa	%xmm14,%xmm13
358	paddd	.Lsse_inc(%rip),%xmm13
359	movdqa	%xmm13,%xmm12
360	paddd	.Lsse_inc(%rip),%xmm12
361	movdqa	%xmm12,0+96(%rbp)
362	movdqa	%xmm13,0+112(%rbp)
363	movdqa	%xmm14,0+128(%rbp)
364	movdqa	%xmm15,0+144(%rbp)
365
366
367
368	movq	$4,%rcx
369	movq	%rsi,%r8
370.Lopen_sse_main_loop_rounds:
371	movdqa	%xmm8,0+80(%rbp)
372	movdqa	.Lrol16(%rip),%xmm8
373	paddd	%xmm7,%xmm3
374	paddd	%xmm6,%xmm2
375	paddd	%xmm5,%xmm1
376	paddd	%xmm4,%xmm0
377	pxor	%xmm3,%xmm15
378	pxor	%xmm2,%xmm14
379	pxor	%xmm1,%xmm13
380	pxor	%xmm0,%xmm12
381.byte	102,69,15,56,0,248
382.byte	102,69,15,56,0,240
383.byte	102,69,15,56,0,232
384.byte	102,69,15,56,0,224
385	movdqa	0+80(%rbp),%xmm8
386	paddd	%xmm15,%xmm11
387	paddd	%xmm14,%xmm10
388	paddd	%xmm13,%xmm9
389	paddd	%xmm12,%xmm8
390	pxor	%xmm11,%xmm7
391	addq	0+0(%r8),%r10
392	adcq	8+0(%r8),%r11
393	adcq	$1,%r12
394
395	leaq	16(%r8),%r8
396	pxor	%xmm10,%xmm6
397	pxor	%xmm9,%xmm5
398	pxor	%xmm8,%xmm4
399	movdqa	%xmm8,0+80(%rbp)
400	movdqa	%xmm7,%xmm8
401	psrld	$20,%xmm8
402	pslld	$32-20,%xmm7
403	pxor	%xmm8,%xmm7
404	movdqa	%xmm6,%xmm8
405	psrld	$20,%xmm8
406	pslld	$32-20,%xmm6
407	pxor	%xmm8,%xmm6
408	movdqa	%xmm5,%xmm8
409	psrld	$20,%xmm8
410	pslld	$32-20,%xmm5
411	pxor	%xmm8,%xmm5
412	movdqa	%xmm4,%xmm8
413	psrld	$20,%xmm8
414	pslld	$32-20,%xmm4
415	pxor	%xmm8,%xmm4
416	movq	0+0+0(%rbp),%rax
417	movq	%rax,%r15
418	mulq	%r10
419	movq	%rax,%r13
420	movq	%rdx,%r14
421	movq	0+0+0(%rbp),%rax
422	mulq	%r11
423	imulq	%r12,%r15
424	addq	%rax,%r14
425	adcq	%rdx,%r15
426	movdqa	.Lrol8(%rip),%xmm8
427	paddd	%xmm7,%xmm3
428	paddd	%xmm6,%xmm2
429	paddd	%xmm5,%xmm1
430	paddd	%xmm4,%xmm0
431	pxor	%xmm3,%xmm15
432	pxor	%xmm2,%xmm14
433	pxor	%xmm1,%xmm13
434	pxor	%xmm0,%xmm12
435.byte	102,69,15,56,0,248
436.byte	102,69,15,56,0,240
437.byte	102,69,15,56,0,232
438.byte	102,69,15,56,0,224
439	movdqa	0+80(%rbp),%xmm8
440	paddd	%xmm15,%xmm11
441	paddd	%xmm14,%xmm10
442	paddd	%xmm13,%xmm9
443	paddd	%xmm12,%xmm8
444	pxor	%xmm11,%xmm7
445	pxor	%xmm10,%xmm6
446	movq	8+0+0(%rbp),%rax
447	movq	%rax,%r9
448	mulq	%r10
449	addq	%rax,%r14
450	adcq	$0,%rdx
451	movq	%rdx,%r10
452	movq	8+0+0(%rbp),%rax
453	mulq	%r11
454	addq	%rax,%r15
455	adcq	$0,%rdx
456	pxor	%xmm9,%xmm5
457	pxor	%xmm8,%xmm4
458	movdqa	%xmm8,0+80(%rbp)
459	movdqa	%xmm7,%xmm8
460	psrld	$25,%xmm8
461	pslld	$32-25,%xmm7
462	pxor	%xmm8,%xmm7
463	movdqa	%xmm6,%xmm8
464	psrld	$25,%xmm8
465	pslld	$32-25,%xmm6
466	pxor	%xmm8,%xmm6
467	movdqa	%xmm5,%xmm8
468	psrld	$25,%xmm8
469	pslld	$32-25,%xmm5
470	pxor	%xmm8,%xmm5
471	movdqa	%xmm4,%xmm8
472	psrld	$25,%xmm8
473	pslld	$32-25,%xmm4
474	pxor	%xmm8,%xmm4
475	movdqa	0+80(%rbp),%xmm8
476	imulq	%r12,%r9
477	addq	%r10,%r15
478	adcq	%rdx,%r9
479.byte	102,15,58,15,255,4
480.byte	102,69,15,58,15,219,8
481.byte	102,69,15,58,15,255,12
482.byte	102,15,58,15,246,4
483.byte	102,69,15,58,15,210,8
484.byte	102,69,15,58,15,246,12
485.byte	102,15,58,15,237,4
486.byte	102,69,15,58,15,201,8
487.byte	102,69,15,58,15,237,12
488.byte	102,15,58,15,228,4
489.byte	102,69,15,58,15,192,8
490.byte	102,69,15,58,15,228,12
491	movdqa	%xmm8,0+80(%rbp)
492	movdqa	.Lrol16(%rip),%xmm8
493	paddd	%xmm7,%xmm3
494	paddd	%xmm6,%xmm2
495	paddd	%xmm5,%xmm1
496	paddd	%xmm4,%xmm0
497	pxor	%xmm3,%xmm15
498	pxor	%xmm2,%xmm14
499	movq	%r13,%r10
500	movq	%r14,%r11
501	movq	%r15,%r12
502	andq	$3,%r12
503	movq	%r15,%r13
504	andq	$-4,%r13
505	movq	%r9,%r14
506	shrdq	$2,%r9,%r15
507	shrq	$2,%r9
508	addq	%r13,%r15
509	adcq	%r14,%r9
510	addq	%r15,%r10
511	adcq	%r9,%r11
512	adcq	$0,%r12
513	pxor	%xmm1,%xmm13
514	pxor	%xmm0,%xmm12
515.byte	102,69,15,56,0,248
516.byte	102,69,15,56,0,240
517.byte	102,69,15,56,0,232
518.byte	102,69,15,56,0,224
519	movdqa	0+80(%rbp),%xmm8
520	paddd	%xmm15,%xmm11
521	paddd	%xmm14,%xmm10
522	paddd	%xmm13,%xmm9
523	paddd	%xmm12,%xmm8
524	pxor	%xmm11,%xmm7
525	pxor	%xmm10,%xmm6
526	pxor	%xmm9,%xmm5
527	pxor	%xmm8,%xmm4
528	movdqa	%xmm8,0+80(%rbp)
529	movdqa	%xmm7,%xmm8
530	psrld	$20,%xmm8
531	pslld	$32-20,%xmm7
532	pxor	%xmm8,%xmm7
533	movdqa	%xmm6,%xmm8
534	psrld	$20,%xmm8
535	pslld	$32-20,%xmm6
536	pxor	%xmm8,%xmm6
537	movdqa	%xmm5,%xmm8
538	psrld	$20,%xmm8
539	pslld	$32-20,%xmm5
540	pxor	%xmm8,%xmm5
541	movdqa	%xmm4,%xmm8
542	psrld	$20,%xmm8
543	pslld	$32-20,%xmm4
544	pxor	%xmm8,%xmm4
545	movdqa	.Lrol8(%rip),%xmm8
546	paddd	%xmm7,%xmm3
547	paddd	%xmm6,%xmm2
548	paddd	%xmm5,%xmm1
549	paddd	%xmm4,%xmm0
550	pxor	%xmm3,%xmm15
551	pxor	%xmm2,%xmm14
552	pxor	%xmm1,%xmm13
553	pxor	%xmm0,%xmm12
554.byte	102,69,15,56,0,248
555.byte	102,69,15,56,0,240
556.byte	102,69,15,56,0,232
557.byte	102,69,15,56,0,224
558	movdqa	0+80(%rbp),%xmm8
559	paddd	%xmm15,%xmm11
560	paddd	%xmm14,%xmm10
561	paddd	%xmm13,%xmm9
562	paddd	%xmm12,%xmm8
563	pxor	%xmm11,%xmm7
564	pxor	%xmm10,%xmm6
565	pxor	%xmm9,%xmm5
566	pxor	%xmm8,%xmm4
567	movdqa	%xmm8,0+80(%rbp)
568	movdqa	%xmm7,%xmm8
569	psrld	$25,%xmm8
570	pslld	$32-25,%xmm7
571	pxor	%xmm8,%xmm7
572	movdqa	%xmm6,%xmm8
573	psrld	$25,%xmm8
574	pslld	$32-25,%xmm6
575	pxor	%xmm8,%xmm6
576	movdqa	%xmm5,%xmm8
577	psrld	$25,%xmm8
578	pslld	$32-25,%xmm5
579	pxor	%xmm8,%xmm5
580	movdqa	%xmm4,%xmm8
581	psrld	$25,%xmm8
582	pslld	$32-25,%xmm4
583	pxor	%xmm8,%xmm4
584	movdqa	0+80(%rbp),%xmm8
585.byte	102,15,58,15,255,12
586.byte	102,69,15,58,15,219,8
587.byte	102,69,15,58,15,255,4
588.byte	102,15,58,15,246,12
589.byte	102,69,15,58,15,210,8
590.byte	102,69,15,58,15,246,4
591.byte	102,15,58,15,237,12
592.byte	102,69,15,58,15,201,8
593.byte	102,69,15,58,15,237,4
594.byte	102,15,58,15,228,12
595.byte	102,69,15,58,15,192,8
596.byte	102,69,15,58,15,228,4
597
598	decq	%rcx
599	jge	.Lopen_sse_main_loop_rounds
600	addq	0+0(%r8),%r10
601	adcq	8+0(%r8),%r11
602	adcq	$1,%r12
603	movq	0+0+0(%rbp),%rax
604	movq	%rax,%r15
605	mulq	%r10
606	movq	%rax,%r13
607	movq	%rdx,%r14
608	movq	0+0+0(%rbp),%rax
609	mulq	%r11
610	imulq	%r12,%r15
611	addq	%rax,%r14
612	adcq	%rdx,%r15
613	movq	8+0+0(%rbp),%rax
614	movq	%rax,%r9
615	mulq	%r10
616	addq	%rax,%r14
617	adcq	$0,%rdx
618	movq	%rdx,%r10
619	movq	8+0+0(%rbp),%rax
620	mulq	%r11
621	addq	%rax,%r15
622	adcq	$0,%rdx
623	imulq	%r12,%r9
624	addq	%r10,%r15
625	adcq	%rdx,%r9
626	movq	%r13,%r10
627	movq	%r14,%r11
628	movq	%r15,%r12
629	andq	$3,%r12
630	movq	%r15,%r13
631	andq	$-4,%r13
632	movq	%r9,%r14
633	shrdq	$2,%r9,%r15
634	shrq	$2,%r9
635	addq	%r13,%r15
636	adcq	%r14,%r9
637	addq	%r15,%r10
638	adcq	%r9,%r11
639	adcq	$0,%r12
640
641	leaq	16(%r8),%r8
642	cmpq	$-6,%rcx
643	jg	.Lopen_sse_main_loop_rounds
644	paddd	.Lchacha20_consts(%rip),%xmm3
645	paddd	0+48(%rbp),%xmm7
646	paddd	0+64(%rbp),%xmm11
647	paddd	0+144(%rbp),%xmm15
648	paddd	.Lchacha20_consts(%rip),%xmm2
649	paddd	0+48(%rbp),%xmm6
650	paddd	0+64(%rbp),%xmm10
651	paddd	0+128(%rbp),%xmm14
652	paddd	.Lchacha20_consts(%rip),%xmm1
653	paddd	0+48(%rbp),%xmm5
654	paddd	0+64(%rbp),%xmm9
655	paddd	0+112(%rbp),%xmm13
656	paddd	.Lchacha20_consts(%rip),%xmm0
657	paddd	0+48(%rbp),%xmm4
658	paddd	0+64(%rbp),%xmm8
659	paddd	0+96(%rbp),%xmm12
660	movdqa	%xmm12,0+80(%rbp)
661	movdqu	0 + 0(%rsi),%xmm12
662	pxor	%xmm3,%xmm12
663	movdqu	%xmm12,0 + 0(%rdi)
664	movdqu	16 + 0(%rsi),%xmm12
665	pxor	%xmm7,%xmm12
666	movdqu	%xmm12,16 + 0(%rdi)
667	movdqu	32 + 0(%rsi),%xmm12
668	pxor	%xmm11,%xmm12
669	movdqu	%xmm12,32 + 0(%rdi)
670	movdqu	48 + 0(%rsi),%xmm12
671	pxor	%xmm15,%xmm12
672	movdqu	%xmm12,48 + 0(%rdi)
673	movdqu	0 + 64(%rsi),%xmm3
674	movdqu	16 + 64(%rsi),%xmm7
675	movdqu	32 + 64(%rsi),%xmm11
676	movdqu	48 + 64(%rsi),%xmm15
677	pxor	%xmm3,%xmm2
678	pxor	%xmm7,%xmm6
679	pxor	%xmm11,%xmm10
680	pxor	%xmm14,%xmm15
681	movdqu	%xmm2,0 + 64(%rdi)
682	movdqu	%xmm6,16 + 64(%rdi)
683	movdqu	%xmm10,32 + 64(%rdi)
684	movdqu	%xmm15,48 + 64(%rdi)
685	movdqu	0 + 128(%rsi),%xmm3
686	movdqu	16 + 128(%rsi),%xmm7
687	movdqu	32 + 128(%rsi),%xmm11
688	movdqu	48 + 128(%rsi),%xmm15
689	pxor	%xmm3,%xmm1
690	pxor	%xmm7,%xmm5
691	pxor	%xmm11,%xmm9
692	pxor	%xmm13,%xmm15
693	movdqu	%xmm1,0 + 128(%rdi)
694	movdqu	%xmm5,16 + 128(%rdi)
695	movdqu	%xmm9,32 + 128(%rdi)
696	movdqu	%xmm15,48 + 128(%rdi)
697	movdqu	0 + 192(%rsi),%xmm3
698	movdqu	16 + 192(%rsi),%xmm7
699	movdqu	32 + 192(%rsi),%xmm11
700	movdqu	48 + 192(%rsi),%xmm15
701	pxor	%xmm3,%xmm0
702	pxor	%xmm7,%xmm4
703	pxor	%xmm11,%xmm8
704	pxor	0+80(%rbp),%xmm15
705	movdqu	%xmm0,0 + 192(%rdi)
706	movdqu	%xmm4,16 + 192(%rdi)
707	movdqu	%xmm8,32 + 192(%rdi)
708	movdqu	%xmm15,48 + 192(%rdi)
709
710	leaq	256(%rsi),%rsi
711	leaq	256(%rdi),%rdi
712	subq	$256,%rbx
713	jmp	.Lopen_sse_main_loop
714.Lopen_sse_tail:
715
716	testq	%rbx,%rbx
717	jz	.Lopen_sse_finalize
718	cmpq	$192,%rbx
719	ja	.Lopen_sse_tail_256
720	cmpq	$128,%rbx
721	ja	.Lopen_sse_tail_192
722	cmpq	$64,%rbx
723	ja	.Lopen_sse_tail_128
724	movdqa	.Lchacha20_consts(%rip),%xmm0
725	movdqa	0+48(%rbp),%xmm4
726	movdqa	0+64(%rbp),%xmm8
727	movdqa	0+96(%rbp),%xmm12
728	paddd	.Lsse_inc(%rip),%xmm12
729	movdqa	%xmm12,0+96(%rbp)
730
731	xorq	%r8,%r8
732	movq	%rbx,%rcx
733	cmpq	$16,%rcx
734	jb	.Lopen_sse_tail_64_rounds
735.Lopen_sse_tail_64_rounds_and_x1hash:
736	addq	0+0(%rsi,%r8,1),%r10
737	adcq	8+0(%rsi,%r8,1),%r11
738	adcq	$1,%r12
739	movq	0+0+0(%rbp),%rax
740	movq	%rax,%r15
741	mulq	%r10
742	movq	%rax,%r13
743	movq	%rdx,%r14
744	movq	0+0+0(%rbp),%rax
745	mulq	%r11
746	imulq	%r12,%r15
747	addq	%rax,%r14
748	adcq	%rdx,%r15
749	movq	8+0+0(%rbp),%rax
750	movq	%rax,%r9
751	mulq	%r10
752	addq	%rax,%r14
753	adcq	$0,%rdx
754	movq	%rdx,%r10
755	movq	8+0+0(%rbp),%rax
756	mulq	%r11
757	addq	%rax,%r15
758	adcq	$0,%rdx
759	imulq	%r12,%r9
760	addq	%r10,%r15
761	adcq	%rdx,%r9
762	movq	%r13,%r10
763	movq	%r14,%r11
764	movq	%r15,%r12
765	andq	$3,%r12
766	movq	%r15,%r13
767	andq	$-4,%r13
768	movq	%r9,%r14
769	shrdq	$2,%r9,%r15
770	shrq	$2,%r9
771	addq	%r13,%r15
772	adcq	%r14,%r9
773	addq	%r15,%r10
774	adcq	%r9,%r11
775	adcq	$0,%r12
776
777	subq	$16,%rcx
778.Lopen_sse_tail_64_rounds:
779	addq	$16,%r8
780	paddd	%xmm4,%xmm0
781	pxor	%xmm0,%xmm12
782	pshufb	.Lrol16(%rip),%xmm12
783	paddd	%xmm12,%xmm8
784	pxor	%xmm8,%xmm4
785	movdqa	%xmm4,%xmm3
786	pslld	$12,%xmm3
787	psrld	$20,%xmm4
788	pxor	%xmm3,%xmm4
789	paddd	%xmm4,%xmm0
790	pxor	%xmm0,%xmm12
791	pshufb	.Lrol8(%rip),%xmm12
792	paddd	%xmm12,%xmm8
793	pxor	%xmm8,%xmm4
794	movdqa	%xmm4,%xmm3
795	pslld	$7,%xmm3
796	psrld	$25,%xmm4
797	pxor	%xmm3,%xmm4
798.byte	102,15,58,15,228,4
799.byte	102,69,15,58,15,192,8
800.byte	102,69,15,58,15,228,12
801	paddd	%xmm4,%xmm0
802	pxor	%xmm0,%xmm12
803	pshufb	.Lrol16(%rip),%xmm12
804	paddd	%xmm12,%xmm8
805	pxor	%xmm8,%xmm4
806	movdqa	%xmm4,%xmm3
807	pslld	$12,%xmm3
808	psrld	$20,%xmm4
809	pxor	%xmm3,%xmm4
810	paddd	%xmm4,%xmm0
811	pxor	%xmm0,%xmm12
812	pshufb	.Lrol8(%rip),%xmm12
813	paddd	%xmm12,%xmm8
814	pxor	%xmm8,%xmm4
815	movdqa	%xmm4,%xmm3
816	pslld	$7,%xmm3
817	psrld	$25,%xmm4
818	pxor	%xmm3,%xmm4
819.byte	102,15,58,15,228,12
820.byte	102,69,15,58,15,192,8
821.byte	102,69,15,58,15,228,4
822
823	cmpq	$16,%rcx
824	jae	.Lopen_sse_tail_64_rounds_and_x1hash
825	cmpq	$160,%r8
826	jne	.Lopen_sse_tail_64_rounds
827	paddd	.Lchacha20_consts(%rip),%xmm0
828	paddd	0+48(%rbp),%xmm4
829	paddd	0+64(%rbp),%xmm8
830	paddd	0+96(%rbp),%xmm12
831
832	jmp	.Lopen_sse_tail_64_dec_loop
833
834.Lopen_sse_tail_128:
835	movdqa	.Lchacha20_consts(%rip),%xmm0
836	movdqa	0+48(%rbp),%xmm4
837	movdqa	0+64(%rbp),%xmm8
838	movdqa	%xmm0,%xmm1
839	movdqa	%xmm4,%xmm5
840	movdqa	%xmm8,%xmm9
841	movdqa	0+96(%rbp),%xmm13
842	paddd	.Lsse_inc(%rip),%xmm13
843	movdqa	%xmm13,%xmm12
844	paddd	.Lsse_inc(%rip),%xmm12
845	movdqa	%xmm12,0+96(%rbp)
846	movdqa	%xmm13,0+112(%rbp)
847
848	movq	%rbx,%rcx
849	andq	$-16,%rcx
850	xorq	%r8,%r8
851.Lopen_sse_tail_128_rounds_and_x1hash:
852	addq	0+0(%rsi,%r8,1),%r10
853	adcq	8+0(%rsi,%r8,1),%r11
854	adcq	$1,%r12
855	movq	0+0+0(%rbp),%rax
856	movq	%rax,%r15
857	mulq	%r10
858	movq	%rax,%r13
859	movq	%rdx,%r14
860	movq	0+0+0(%rbp),%rax
861	mulq	%r11
862	imulq	%r12,%r15
863	addq	%rax,%r14
864	adcq	%rdx,%r15
865	movq	8+0+0(%rbp),%rax
866	movq	%rax,%r9
867	mulq	%r10
868	addq	%rax,%r14
869	adcq	$0,%rdx
870	movq	%rdx,%r10
871	movq	8+0+0(%rbp),%rax
872	mulq	%r11
873	addq	%rax,%r15
874	adcq	$0,%rdx
875	imulq	%r12,%r9
876	addq	%r10,%r15
877	adcq	%rdx,%r9
878	movq	%r13,%r10
879	movq	%r14,%r11
880	movq	%r15,%r12
881	andq	$3,%r12
882	movq	%r15,%r13
883	andq	$-4,%r13
884	movq	%r9,%r14
885	shrdq	$2,%r9,%r15
886	shrq	$2,%r9
887	addq	%r13,%r15
888	adcq	%r14,%r9
889	addq	%r15,%r10
890	adcq	%r9,%r11
891	adcq	$0,%r12
892
893.Lopen_sse_tail_128_rounds:
894	addq	$16,%r8
895	paddd	%xmm4,%xmm0
896	pxor	%xmm0,%xmm12
897	pshufb	.Lrol16(%rip),%xmm12
898	paddd	%xmm12,%xmm8
899	pxor	%xmm8,%xmm4
900	movdqa	%xmm4,%xmm3
901	pslld	$12,%xmm3
902	psrld	$20,%xmm4
903	pxor	%xmm3,%xmm4
904	paddd	%xmm4,%xmm0
905	pxor	%xmm0,%xmm12
906	pshufb	.Lrol8(%rip),%xmm12
907	paddd	%xmm12,%xmm8
908	pxor	%xmm8,%xmm4
909	movdqa	%xmm4,%xmm3
910	pslld	$7,%xmm3
911	psrld	$25,%xmm4
912	pxor	%xmm3,%xmm4
913.byte	102,15,58,15,228,4
914.byte	102,69,15,58,15,192,8
915.byte	102,69,15,58,15,228,12
916	paddd	%xmm5,%xmm1
917	pxor	%xmm1,%xmm13
918	pshufb	.Lrol16(%rip),%xmm13
919	paddd	%xmm13,%xmm9
920	pxor	%xmm9,%xmm5
921	movdqa	%xmm5,%xmm3
922	pslld	$12,%xmm3
923	psrld	$20,%xmm5
924	pxor	%xmm3,%xmm5
925	paddd	%xmm5,%xmm1
926	pxor	%xmm1,%xmm13
927	pshufb	.Lrol8(%rip),%xmm13
928	paddd	%xmm13,%xmm9
929	pxor	%xmm9,%xmm5
930	movdqa	%xmm5,%xmm3
931	pslld	$7,%xmm3
932	psrld	$25,%xmm5
933	pxor	%xmm3,%xmm5
934.byte	102,15,58,15,237,4
935.byte	102,69,15,58,15,201,8
936.byte	102,69,15,58,15,237,12
937	paddd	%xmm4,%xmm0
938	pxor	%xmm0,%xmm12
939	pshufb	.Lrol16(%rip),%xmm12
940	paddd	%xmm12,%xmm8
941	pxor	%xmm8,%xmm4
942	movdqa	%xmm4,%xmm3
943	pslld	$12,%xmm3
944	psrld	$20,%xmm4
945	pxor	%xmm3,%xmm4
946	paddd	%xmm4,%xmm0
947	pxor	%xmm0,%xmm12
948	pshufb	.Lrol8(%rip),%xmm12
949	paddd	%xmm12,%xmm8
950	pxor	%xmm8,%xmm4
951	movdqa	%xmm4,%xmm3
952	pslld	$7,%xmm3
953	psrld	$25,%xmm4
954	pxor	%xmm3,%xmm4
955.byte	102,15,58,15,228,12
956.byte	102,69,15,58,15,192,8
957.byte	102,69,15,58,15,228,4
958	paddd	%xmm5,%xmm1
959	pxor	%xmm1,%xmm13
960	pshufb	.Lrol16(%rip),%xmm13
961	paddd	%xmm13,%xmm9
962	pxor	%xmm9,%xmm5
963	movdqa	%xmm5,%xmm3
964	pslld	$12,%xmm3
965	psrld	$20,%xmm5
966	pxor	%xmm3,%xmm5
967	paddd	%xmm5,%xmm1
968	pxor	%xmm1,%xmm13
969	pshufb	.Lrol8(%rip),%xmm13
970	paddd	%xmm13,%xmm9
971	pxor	%xmm9,%xmm5
972	movdqa	%xmm5,%xmm3
973	pslld	$7,%xmm3
974	psrld	$25,%xmm5
975	pxor	%xmm3,%xmm5
976.byte	102,15,58,15,237,12
977.byte	102,69,15,58,15,201,8
978.byte	102,69,15,58,15,237,4
979
980	cmpq	%rcx,%r8
981	jb	.Lopen_sse_tail_128_rounds_and_x1hash
982	cmpq	$160,%r8
983	jne	.Lopen_sse_tail_128_rounds
984	paddd	.Lchacha20_consts(%rip),%xmm1
985	paddd	0+48(%rbp),%xmm5
986	paddd	0+64(%rbp),%xmm9
987	paddd	0+112(%rbp),%xmm13
988	paddd	.Lchacha20_consts(%rip),%xmm0
989	paddd	0+48(%rbp),%xmm4
990	paddd	0+64(%rbp),%xmm8
991	paddd	0+96(%rbp),%xmm12
992	movdqu	0 + 0(%rsi),%xmm3
993	movdqu	16 + 0(%rsi),%xmm7
994	movdqu	32 + 0(%rsi),%xmm11
995	movdqu	48 + 0(%rsi),%xmm15
996	pxor	%xmm3,%xmm1
997	pxor	%xmm7,%xmm5
998	pxor	%xmm11,%xmm9
999	pxor	%xmm13,%xmm15
1000	movdqu	%xmm1,0 + 0(%rdi)
1001	movdqu	%xmm5,16 + 0(%rdi)
1002	movdqu	%xmm9,32 + 0(%rdi)
1003	movdqu	%xmm15,48 + 0(%rdi)
1004
1005	subq	$64,%rbx
1006	leaq	64(%rsi),%rsi
1007	leaq	64(%rdi),%rdi
1008	jmp	.Lopen_sse_tail_64_dec_loop
1009
1010.Lopen_sse_tail_192:
1011	movdqa	.Lchacha20_consts(%rip),%xmm0
1012	movdqa	0+48(%rbp),%xmm4
1013	movdqa	0+64(%rbp),%xmm8
1014	movdqa	%xmm0,%xmm1
1015	movdqa	%xmm4,%xmm5
1016	movdqa	%xmm8,%xmm9
1017	movdqa	%xmm0,%xmm2
1018	movdqa	%xmm4,%xmm6
1019	movdqa	%xmm8,%xmm10
1020	movdqa	0+96(%rbp),%xmm14
1021	paddd	.Lsse_inc(%rip),%xmm14
1022	movdqa	%xmm14,%xmm13
1023	paddd	.Lsse_inc(%rip),%xmm13
1024	movdqa	%xmm13,%xmm12
1025	paddd	.Lsse_inc(%rip),%xmm12
1026	movdqa	%xmm12,0+96(%rbp)
1027	movdqa	%xmm13,0+112(%rbp)
1028	movdqa	%xmm14,0+128(%rbp)
1029
1030	movq	%rbx,%rcx
1031	movq	$160,%r8
1032	cmpq	$160,%rcx
1033	cmovgq	%r8,%rcx
1034	andq	$-16,%rcx
1035	xorq	%r8,%r8
1036.Lopen_sse_tail_192_rounds_and_x1hash:
1037	addq	0+0(%rsi,%r8,1),%r10
1038	adcq	8+0(%rsi,%r8,1),%r11
1039	adcq	$1,%r12
1040	movq	0+0+0(%rbp),%rax
1041	movq	%rax,%r15
1042	mulq	%r10
1043	movq	%rax,%r13
1044	movq	%rdx,%r14
1045	movq	0+0+0(%rbp),%rax
1046	mulq	%r11
1047	imulq	%r12,%r15
1048	addq	%rax,%r14
1049	adcq	%rdx,%r15
1050	movq	8+0+0(%rbp),%rax
1051	movq	%rax,%r9
1052	mulq	%r10
1053	addq	%rax,%r14
1054	adcq	$0,%rdx
1055	movq	%rdx,%r10
1056	movq	8+0+0(%rbp),%rax
1057	mulq	%r11
1058	addq	%rax,%r15
1059	adcq	$0,%rdx
1060	imulq	%r12,%r9
1061	addq	%r10,%r15
1062	adcq	%rdx,%r9
1063	movq	%r13,%r10
1064	movq	%r14,%r11
1065	movq	%r15,%r12
1066	andq	$3,%r12
1067	movq	%r15,%r13
1068	andq	$-4,%r13
1069	movq	%r9,%r14
1070	shrdq	$2,%r9,%r15
1071	shrq	$2,%r9
1072	addq	%r13,%r15
1073	adcq	%r14,%r9
1074	addq	%r15,%r10
1075	adcq	%r9,%r11
1076	adcq	$0,%r12
1077
1078.Lopen_sse_tail_192_rounds:
1079	addq	$16,%r8
1080	paddd	%xmm4,%xmm0
1081	pxor	%xmm0,%xmm12
1082	pshufb	.Lrol16(%rip),%xmm12
1083	paddd	%xmm12,%xmm8
1084	pxor	%xmm8,%xmm4
1085	movdqa	%xmm4,%xmm3
1086	pslld	$12,%xmm3
1087	psrld	$20,%xmm4
1088	pxor	%xmm3,%xmm4
1089	paddd	%xmm4,%xmm0
1090	pxor	%xmm0,%xmm12
1091	pshufb	.Lrol8(%rip),%xmm12
1092	paddd	%xmm12,%xmm8
1093	pxor	%xmm8,%xmm4
1094	movdqa	%xmm4,%xmm3
1095	pslld	$7,%xmm3
1096	psrld	$25,%xmm4
1097	pxor	%xmm3,%xmm4
1098.byte	102,15,58,15,228,4
1099.byte	102,69,15,58,15,192,8
1100.byte	102,69,15,58,15,228,12
1101	paddd	%xmm5,%xmm1
1102	pxor	%xmm1,%xmm13
1103	pshufb	.Lrol16(%rip),%xmm13
1104	paddd	%xmm13,%xmm9
1105	pxor	%xmm9,%xmm5
1106	movdqa	%xmm5,%xmm3
1107	pslld	$12,%xmm3
1108	psrld	$20,%xmm5
1109	pxor	%xmm3,%xmm5
1110	paddd	%xmm5,%xmm1
1111	pxor	%xmm1,%xmm13
1112	pshufb	.Lrol8(%rip),%xmm13
1113	paddd	%xmm13,%xmm9
1114	pxor	%xmm9,%xmm5
1115	movdqa	%xmm5,%xmm3
1116	pslld	$7,%xmm3
1117	psrld	$25,%xmm5
1118	pxor	%xmm3,%xmm5
1119.byte	102,15,58,15,237,4
1120.byte	102,69,15,58,15,201,8
1121.byte	102,69,15,58,15,237,12
1122	paddd	%xmm6,%xmm2
1123	pxor	%xmm2,%xmm14
1124	pshufb	.Lrol16(%rip),%xmm14
1125	paddd	%xmm14,%xmm10
1126	pxor	%xmm10,%xmm6
1127	movdqa	%xmm6,%xmm3
1128	pslld	$12,%xmm3
1129	psrld	$20,%xmm6
1130	pxor	%xmm3,%xmm6
1131	paddd	%xmm6,%xmm2
1132	pxor	%xmm2,%xmm14
1133	pshufb	.Lrol8(%rip),%xmm14
1134	paddd	%xmm14,%xmm10
1135	pxor	%xmm10,%xmm6
1136	movdqa	%xmm6,%xmm3
1137	pslld	$7,%xmm3
1138	psrld	$25,%xmm6
1139	pxor	%xmm3,%xmm6
1140.byte	102,15,58,15,246,4
1141.byte	102,69,15,58,15,210,8
1142.byte	102,69,15,58,15,246,12
1143	paddd	%xmm4,%xmm0
1144	pxor	%xmm0,%xmm12
1145	pshufb	.Lrol16(%rip),%xmm12
1146	paddd	%xmm12,%xmm8
1147	pxor	%xmm8,%xmm4
1148	movdqa	%xmm4,%xmm3
1149	pslld	$12,%xmm3
1150	psrld	$20,%xmm4
1151	pxor	%xmm3,%xmm4
1152	paddd	%xmm4,%xmm0
1153	pxor	%xmm0,%xmm12
1154	pshufb	.Lrol8(%rip),%xmm12
1155	paddd	%xmm12,%xmm8
1156	pxor	%xmm8,%xmm4
1157	movdqa	%xmm4,%xmm3
1158	pslld	$7,%xmm3
1159	psrld	$25,%xmm4
1160	pxor	%xmm3,%xmm4
1161.byte	102,15,58,15,228,12
1162.byte	102,69,15,58,15,192,8
1163.byte	102,69,15,58,15,228,4
1164	paddd	%xmm5,%xmm1
1165	pxor	%xmm1,%xmm13
1166	pshufb	.Lrol16(%rip),%xmm13
1167	paddd	%xmm13,%xmm9
1168	pxor	%xmm9,%xmm5
1169	movdqa	%xmm5,%xmm3
1170	pslld	$12,%xmm3
1171	psrld	$20,%xmm5
1172	pxor	%xmm3,%xmm5
1173	paddd	%xmm5,%xmm1
1174	pxor	%xmm1,%xmm13
1175	pshufb	.Lrol8(%rip),%xmm13
1176	paddd	%xmm13,%xmm9
1177	pxor	%xmm9,%xmm5
1178	movdqa	%xmm5,%xmm3
1179	pslld	$7,%xmm3
1180	psrld	$25,%xmm5
1181	pxor	%xmm3,%xmm5
1182.byte	102,15,58,15,237,12
1183.byte	102,69,15,58,15,201,8
1184.byte	102,69,15,58,15,237,4
1185	paddd	%xmm6,%xmm2
1186	pxor	%xmm2,%xmm14
1187	pshufb	.Lrol16(%rip),%xmm14
1188	paddd	%xmm14,%xmm10
1189	pxor	%xmm10,%xmm6
1190	movdqa	%xmm6,%xmm3
1191	pslld	$12,%xmm3
1192	psrld	$20,%xmm6
1193	pxor	%xmm3,%xmm6
1194	paddd	%xmm6,%xmm2
1195	pxor	%xmm2,%xmm14
1196	pshufb	.Lrol8(%rip),%xmm14
1197	paddd	%xmm14,%xmm10
1198	pxor	%xmm10,%xmm6
1199	movdqa	%xmm6,%xmm3
1200	pslld	$7,%xmm3
1201	psrld	$25,%xmm6
1202	pxor	%xmm3,%xmm6
1203.byte	102,15,58,15,246,12
1204.byte	102,69,15,58,15,210,8
1205.byte	102,69,15,58,15,246,4
1206
1207	cmpq	%rcx,%r8
1208	jb	.Lopen_sse_tail_192_rounds_and_x1hash
1209	cmpq	$160,%r8
1210	jne	.Lopen_sse_tail_192_rounds
1211	cmpq	$176,%rbx
1212	jb	.Lopen_sse_tail_192_finish
1213	addq	0+160(%rsi),%r10
1214	adcq	8+160(%rsi),%r11
1215	adcq	$1,%r12
1216	movq	0+0+0(%rbp),%rax
1217	movq	%rax,%r15
1218	mulq	%r10
1219	movq	%rax,%r13
1220	movq	%rdx,%r14
1221	movq	0+0+0(%rbp),%rax
1222	mulq	%r11
1223	imulq	%r12,%r15
1224	addq	%rax,%r14
1225	adcq	%rdx,%r15
1226	movq	8+0+0(%rbp),%rax
1227	movq	%rax,%r9
1228	mulq	%r10
1229	addq	%rax,%r14
1230	adcq	$0,%rdx
1231	movq	%rdx,%r10
1232	movq	8+0+0(%rbp),%rax
1233	mulq	%r11
1234	addq	%rax,%r15
1235	adcq	$0,%rdx
1236	imulq	%r12,%r9
1237	addq	%r10,%r15
1238	adcq	%rdx,%r9
1239	movq	%r13,%r10
1240	movq	%r14,%r11
1241	movq	%r15,%r12
1242	andq	$3,%r12
1243	movq	%r15,%r13
1244	andq	$-4,%r13
1245	movq	%r9,%r14
1246	shrdq	$2,%r9,%r15
1247	shrq	$2,%r9
1248	addq	%r13,%r15
1249	adcq	%r14,%r9
1250	addq	%r15,%r10
1251	adcq	%r9,%r11
1252	adcq	$0,%r12
1253
1254	cmpq	$192,%rbx
1255	jb	.Lopen_sse_tail_192_finish
1256	addq	0+176(%rsi),%r10
1257	adcq	8+176(%rsi),%r11
1258	adcq	$1,%r12
1259	movq	0+0+0(%rbp),%rax
1260	movq	%rax,%r15
1261	mulq	%r10
1262	movq	%rax,%r13
1263	movq	%rdx,%r14
1264	movq	0+0+0(%rbp),%rax
1265	mulq	%r11
1266	imulq	%r12,%r15
1267	addq	%rax,%r14
1268	adcq	%rdx,%r15
1269	movq	8+0+0(%rbp),%rax
1270	movq	%rax,%r9
1271	mulq	%r10
1272	addq	%rax,%r14
1273	adcq	$0,%rdx
1274	movq	%rdx,%r10
1275	movq	8+0+0(%rbp),%rax
1276	mulq	%r11
1277	addq	%rax,%r15
1278	adcq	$0,%rdx
1279	imulq	%r12,%r9
1280	addq	%r10,%r15
1281	adcq	%rdx,%r9
1282	movq	%r13,%r10
1283	movq	%r14,%r11
1284	movq	%r15,%r12
1285	andq	$3,%r12
1286	movq	%r15,%r13
1287	andq	$-4,%r13
1288	movq	%r9,%r14
1289	shrdq	$2,%r9,%r15
1290	shrq	$2,%r9
1291	addq	%r13,%r15
1292	adcq	%r14,%r9
1293	addq	%r15,%r10
1294	adcq	%r9,%r11
1295	adcq	$0,%r12
1296
1297.Lopen_sse_tail_192_finish:
1298	paddd	.Lchacha20_consts(%rip),%xmm2
1299	paddd	0+48(%rbp),%xmm6
1300	paddd	0+64(%rbp),%xmm10
1301	paddd	0+128(%rbp),%xmm14
1302	paddd	.Lchacha20_consts(%rip),%xmm1
1303	paddd	0+48(%rbp),%xmm5
1304	paddd	0+64(%rbp),%xmm9
1305	paddd	0+112(%rbp),%xmm13
1306	paddd	.Lchacha20_consts(%rip),%xmm0
1307	paddd	0+48(%rbp),%xmm4
1308	paddd	0+64(%rbp),%xmm8
1309	paddd	0+96(%rbp),%xmm12
1310	movdqu	0 + 0(%rsi),%xmm3
1311	movdqu	16 + 0(%rsi),%xmm7
1312	movdqu	32 + 0(%rsi),%xmm11
1313	movdqu	48 + 0(%rsi),%xmm15
1314	pxor	%xmm3,%xmm2
1315	pxor	%xmm7,%xmm6
1316	pxor	%xmm11,%xmm10
1317	pxor	%xmm14,%xmm15
1318	movdqu	%xmm2,0 + 0(%rdi)
1319	movdqu	%xmm6,16 + 0(%rdi)
1320	movdqu	%xmm10,32 + 0(%rdi)
1321	movdqu	%xmm15,48 + 0(%rdi)
1322	movdqu	0 + 64(%rsi),%xmm3
1323	movdqu	16 + 64(%rsi),%xmm7
1324	movdqu	32 + 64(%rsi),%xmm11
1325	movdqu	48 + 64(%rsi),%xmm15
1326	pxor	%xmm3,%xmm1
1327	pxor	%xmm7,%xmm5
1328	pxor	%xmm11,%xmm9
1329	pxor	%xmm13,%xmm15
1330	movdqu	%xmm1,0 + 64(%rdi)
1331	movdqu	%xmm5,16 + 64(%rdi)
1332	movdqu	%xmm9,32 + 64(%rdi)
1333	movdqu	%xmm15,48 + 64(%rdi)
1334
1335	subq	$128,%rbx
1336	leaq	128(%rsi),%rsi
1337	leaq	128(%rdi),%rdi
1338	jmp	.Lopen_sse_tail_64_dec_loop
1339
1340.Lopen_sse_tail_256:
1341	movdqa	.Lchacha20_consts(%rip),%xmm0
1342	movdqa	0+48(%rbp),%xmm4
1343	movdqa	0+64(%rbp),%xmm8
1344	movdqa	%xmm0,%xmm1
1345	movdqa	%xmm4,%xmm5
1346	movdqa	%xmm8,%xmm9
1347	movdqa	%xmm0,%xmm2
1348	movdqa	%xmm4,%xmm6
1349	movdqa	%xmm8,%xmm10
1350	movdqa	%xmm0,%xmm3
1351	movdqa	%xmm4,%xmm7
1352	movdqa	%xmm8,%xmm11
1353	movdqa	0+96(%rbp),%xmm15
1354	paddd	.Lsse_inc(%rip),%xmm15
1355	movdqa	%xmm15,%xmm14
1356	paddd	.Lsse_inc(%rip),%xmm14
1357	movdqa	%xmm14,%xmm13
1358	paddd	.Lsse_inc(%rip),%xmm13
1359	movdqa	%xmm13,%xmm12
1360	paddd	.Lsse_inc(%rip),%xmm12
1361	movdqa	%xmm12,0+96(%rbp)
1362	movdqa	%xmm13,0+112(%rbp)
1363	movdqa	%xmm14,0+128(%rbp)
1364	movdqa	%xmm15,0+144(%rbp)
1365
1366	xorq	%r8,%r8
1367.Lopen_sse_tail_256_rounds_and_x1hash:
1368	addq	0+0(%rsi,%r8,1),%r10
1369	adcq	8+0(%rsi,%r8,1),%r11
1370	adcq	$1,%r12
1371	movdqa	%xmm11,0+80(%rbp)
1372	paddd	%xmm4,%xmm0
1373	pxor	%xmm0,%xmm12
1374	pshufb	.Lrol16(%rip),%xmm12
1375	paddd	%xmm12,%xmm8
1376	pxor	%xmm8,%xmm4
1377	movdqa	%xmm4,%xmm11
1378	pslld	$12,%xmm11
1379	psrld	$20,%xmm4
1380	pxor	%xmm11,%xmm4
1381	paddd	%xmm4,%xmm0
1382	pxor	%xmm0,%xmm12
1383	pshufb	.Lrol8(%rip),%xmm12
1384	paddd	%xmm12,%xmm8
1385	pxor	%xmm8,%xmm4
1386	movdqa	%xmm4,%xmm11
1387	pslld	$7,%xmm11
1388	psrld	$25,%xmm4
1389	pxor	%xmm11,%xmm4
1390.byte	102,15,58,15,228,4
1391.byte	102,69,15,58,15,192,8
1392.byte	102,69,15,58,15,228,12
1393	paddd	%xmm5,%xmm1
1394	pxor	%xmm1,%xmm13
1395	pshufb	.Lrol16(%rip),%xmm13
1396	paddd	%xmm13,%xmm9
1397	pxor	%xmm9,%xmm5
1398	movdqa	%xmm5,%xmm11
1399	pslld	$12,%xmm11
1400	psrld	$20,%xmm5
1401	pxor	%xmm11,%xmm5
1402	paddd	%xmm5,%xmm1
1403	pxor	%xmm1,%xmm13
1404	pshufb	.Lrol8(%rip),%xmm13
1405	paddd	%xmm13,%xmm9
1406	pxor	%xmm9,%xmm5
1407	movdqa	%xmm5,%xmm11
1408	pslld	$7,%xmm11
1409	psrld	$25,%xmm5
1410	pxor	%xmm11,%xmm5
1411.byte	102,15,58,15,237,4
1412.byte	102,69,15,58,15,201,8
1413.byte	102,69,15,58,15,237,12
1414	paddd	%xmm6,%xmm2
1415	pxor	%xmm2,%xmm14
1416	pshufb	.Lrol16(%rip),%xmm14
1417	paddd	%xmm14,%xmm10
1418	pxor	%xmm10,%xmm6
1419	movdqa	%xmm6,%xmm11
1420	pslld	$12,%xmm11
1421	psrld	$20,%xmm6
1422	pxor	%xmm11,%xmm6
1423	paddd	%xmm6,%xmm2
1424	pxor	%xmm2,%xmm14
1425	pshufb	.Lrol8(%rip),%xmm14
1426	paddd	%xmm14,%xmm10
1427	pxor	%xmm10,%xmm6
1428	movdqa	%xmm6,%xmm11
1429	pslld	$7,%xmm11
1430	psrld	$25,%xmm6
1431	pxor	%xmm11,%xmm6
1432.byte	102,15,58,15,246,4
1433.byte	102,69,15,58,15,210,8
1434.byte	102,69,15,58,15,246,12
1435	movdqa	0+80(%rbp),%xmm11
1436	movq	0+0+0(%rbp),%rax
1437	movq	%rax,%r15
1438	mulq	%r10
1439	movq	%rax,%r13
1440	movq	%rdx,%r14
1441	movq	0+0+0(%rbp),%rax
1442	mulq	%r11
1443	imulq	%r12,%r15
1444	addq	%rax,%r14
1445	adcq	%rdx,%r15
1446	movdqa	%xmm9,0+80(%rbp)
1447	paddd	%xmm7,%xmm3
1448	pxor	%xmm3,%xmm15
1449	pshufb	.Lrol16(%rip),%xmm15
1450	paddd	%xmm15,%xmm11
1451	pxor	%xmm11,%xmm7
1452	movdqa	%xmm7,%xmm9
1453	pslld	$12,%xmm9
1454	psrld	$20,%xmm7
1455	pxor	%xmm9,%xmm7
1456	paddd	%xmm7,%xmm3
1457	pxor	%xmm3,%xmm15
1458	pshufb	.Lrol8(%rip),%xmm15
1459	paddd	%xmm15,%xmm11
1460	pxor	%xmm11,%xmm7
1461	movdqa	%xmm7,%xmm9
1462	pslld	$7,%xmm9
1463	psrld	$25,%xmm7
1464	pxor	%xmm9,%xmm7
1465.byte	102,15,58,15,255,4
1466.byte	102,69,15,58,15,219,8
1467.byte	102,69,15,58,15,255,12
1468	movdqa	0+80(%rbp),%xmm9
1469	movq	8+0+0(%rbp),%rax
1470	movq	%rax,%r9
1471	mulq	%r10
1472	addq	%rax,%r14
1473	adcq	$0,%rdx
1474	movq	%rdx,%r10
1475	movq	8+0+0(%rbp),%rax
1476	mulq	%r11
1477	addq	%rax,%r15
1478	adcq	$0,%rdx
1479	movdqa	%xmm11,0+80(%rbp)
1480	paddd	%xmm4,%xmm0
1481	pxor	%xmm0,%xmm12
1482	pshufb	.Lrol16(%rip),%xmm12
1483	paddd	%xmm12,%xmm8
1484	pxor	%xmm8,%xmm4
1485	movdqa	%xmm4,%xmm11
1486	pslld	$12,%xmm11
1487	psrld	$20,%xmm4
1488	pxor	%xmm11,%xmm4
1489	paddd	%xmm4,%xmm0
1490	pxor	%xmm0,%xmm12
1491	pshufb	.Lrol8(%rip),%xmm12
1492	paddd	%xmm12,%xmm8
1493	pxor	%xmm8,%xmm4
1494	movdqa	%xmm4,%xmm11
1495	pslld	$7,%xmm11
1496	psrld	$25,%xmm4
1497	pxor	%xmm11,%xmm4
1498.byte	102,15,58,15,228,12
1499.byte	102,69,15,58,15,192,8
1500.byte	102,69,15,58,15,228,4
1501	paddd	%xmm5,%xmm1
1502	pxor	%xmm1,%xmm13
1503	pshufb	.Lrol16(%rip),%xmm13
1504	paddd	%xmm13,%xmm9
1505	pxor	%xmm9,%xmm5
1506	movdqa	%xmm5,%xmm11
1507	pslld	$12,%xmm11
1508	psrld	$20,%xmm5
1509	pxor	%xmm11,%xmm5
1510	paddd	%xmm5,%xmm1
1511	pxor	%xmm1,%xmm13
1512	pshufb	.Lrol8(%rip),%xmm13
1513	paddd	%xmm13,%xmm9
1514	pxor	%xmm9,%xmm5
1515	movdqa	%xmm5,%xmm11
1516	pslld	$7,%xmm11
1517	psrld	$25,%xmm5
1518	pxor	%xmm11,%xmm5
1519.byte	102,15,58,15,237,12
1520.byte	102,69,15,58,15,201,8
1521.byte	102,69,15,58,15,237,4
1522	imulq	%r12,%r9
1523	addq	%r10,%r15
1524	adcq	%rdx,%r9
1525	paddd	%xmm6,%xmm2
1526	pxor	%xmm2,%xmm14
1527	pshufb	.Lrol16(%rip),%xmm14
1528	paddd	%xmm14,%xmm10
1529	pxor	%xmm10,%xmm6
1530	movdqa	%xmm6,%xmm11
1531	pslld	$12,%xmm11
1532	psrld	$20,%xmm6
1533	pxor	%xmm11,%xmm6
1534	paddd	%xmm6,%xmm2
1535	pxor	%xmm2,%xmm14
1536	pshufb	.Lrol8(%rip),%xmm14
1537	paddd	%xmm14,%xmm10
1538	pxor	%xmm10,%xmm6
1539	movdqa	%xmm6,%xmm11
1540	pslld	$7,%xmm11
1541	psrld	$25,%xmm6
1542	pxor	%xmm11,%xmm6
1543.byte	102,15,58,15,246,12
1544.byte	102,69,15,58,15,210,8
1545.byte	102,69,15,58,15,246,4
1546	movdqa	0+80(%rbp),%xmm11
1547	movq	%r13,%r10
1548	movq	%r14,%r11
1549	movq	%r15,%r12
1550	andq	$3,%r12
1551	movq	%r15,%r13
1552	andq	$-4,%r13
1553	movq	%r9,%r14
1554	shrdq	$2,%r9,%r15
1555	shrq	$2,%r9
1556	addq	%r13,%r15
1557	adcq	%r14,%r9
1558	addq	%r15,%r10
1559	adcq	%r9,%r11
1560	adcq	$0,%r12
1561	movdqa	%xmm9,0+80(%rbp)
1562	paddd	%xmm7,%xmm3
1563	pxor	%xmm3,%xmm15
1564	pshufb	.Lrol16(%rip),%xmm15
1565	paddd	%xmm15,%xmm11
1566	pxor	%xmm11,%xmm7
1567	movdqa	%xmm7,%xmm9
1568	pslld	$12,%xmm9
1569	psrld	$20,%xmm7
1570	pxor	%xmm9,%xmm7
1571	paddd	%xmm7,%xmm3
1572	pxor	%xmm3,%xmm15
1573	pshufb	.Lrol8(%rip),%xmm15
1574	paddd	%xmm15,%xmm11
1575	pxor	%xmm11,%xmm7
1576	movdqa	%xmm7,%xmm9
1577	pslld	$7,%xmm9
1578	psrld	$25,%xmm7
1579	pxor	%xmm9,%xmm7
1580.byte	102,15,58,15,255,12
1581.byte	102,69,15,58,15,219,8
1582.byte	102,69,15,58,15,255,4
1583	movdqa	0+80(%rbp),%xmm9
1584
1585	addq	$16,%r8
1586	cmpq	$160,%r8
1587	jb	.Lopen_sse_tail_256_rounds_and_x1hash
1588
1589	movq	%rbx,%rcx
1590	andq	$-16,%rcx
1591.Lopen_sse_tail_256_hash:
1592	addq	0+0(%rsi,%r8,1),%r10
1593	adcq	8+0(%rsi,%r8,1),%r11
1594	adcq	$1,%r12
1595	movq	0+0+0(%rbp),%rax
1596	movq	%rax,%r15
1597	mulq	%r10
1598	movq	%rax,%r13
1599	movq	%rdx,%r14
1600	movq	0+0+0(%rbp),%rax
1601	mulq	%r11
1602	imulq	%r12,%r15
1603	addq	%rax,%r14
1604	adcq	%rdx,%r15
1605	movq	8+0+0(%rbp),%rax
1606	movq	%rax,%r9
1607	mulq	%r10
1608	addq	%rax,%r14
1609	adcq	$0,%rdx
1610	movq	%rdx,%r10
1611	movq	8+0+0(%rbp),%rax
1612	mulq	%r11
1613	addq	%rax,%r15
1614	adcq	$0,%rdx
1615	imulq	%r12,%r9
1616	addq	%r10,%r15
1617	adcq	%rdx,%r9
1618	movq	%r13,%r10
1619	movq	%r14,%r11
1620	movq	%r15,%r12
1621	andq	$3,%r12
1622	movq	%r15,%r13
1623	andq	$-4,%r13
1624	movq	%r9,%r14
1625	shrdq	$2,%r9,%r15
1626	shrq	$2,%r9
1627	addq	%r13,%r15
1628	adcq	%r14,%r9
1629	addq	%r15,%r10
1630	adcq	%r9,%r11
1631	adcq	$0,%r12
1632
1633	addq	$16,%r8
1634	cmpq	%rcx,%r8
1635	jb	.Lopen_sse_tail_256_hash
1636	paddd	.Lchacha20_consts(%rip),%xmm3
1637	paddd	0+48(%rbp),%xmm7
1638	paddd	0+64(%rbp),%xmm11
1639	paddd	0+144(%rbp),%xmm15
1640	paddd	.Lchacha20_consts(%rip),%xmm2
1641	paddd	0+48(%rbp),%xmm6
1642	paddd	0+64(%rbp),%xmm10
1643	paddd	0+128(%rbp),%xmm14
1644	paddd	.Lchacha20_consts(%rip),%xmm1
1645	paddd	0+48(%rbp),%xmm5
1646	paddd	0+64(%rbp),%xmm9
1647	paddd	0+112(%rbp),%xmm13
1648	paddd	.Lchacha20_consts(%rip),%xmm0
1649	paddd	0+48(%rbp),%xmm4
1650	paddd	0+64(%rbp),%xmm8
1651	paddd	0+96(%rbp),%xmm12
1652	movdqa	%xmm12,0+80(%rbp)
1653	movdqu	0 + 0(%rsi),%xmm12
1654	pxor	%xmm3,%xmm12
1655	movdqu	%xmm12,0 + 0(%rdi)
1656	movdqu	16 + 0(%rsi),%xmm12
1657	pxor	%xmm7,%xmm12
1658	movdqu	%xmm12,16 + 0(%rdi)
1659	movdqu	32 + 0(%rsi),%xmm12
1660	pxor	%xmm11,%xmm12
1661	movdqu	%xmm12,32 + 0(%rdi)
1662	movdqu	48 + 0(%rsi),%xmm12
1663	pxor	%xmm15,%xmm12
1664	movdqu	%xmm12,48 + 0(%rdi)
1665	movdqu	0 + 64(%rsi),%xmm3
1666	movdqu	16 + 64(%rsi),%xmm7
1667	movdqu	32 + 64(%rsi),%xmm11
1668	movdqu	48 + 64(%rsi),%xmm15
1669	pxor	%xmm3,%xmm2
1670	pxor	%xmm7,%xmm6
1671	pxor	%xmm11,%xmm10
1672	pxor	%xmm14,%xmm15
1673	movdqu	%xmm2,0 + 64(%rdi)
1674	movdqu	%xmm6,16 + 64(%rdi)
1675	movdqu	%xmm10,32 + 64(%rdi)
1676	movdqu	%xmm15,48 + 64(%rdi)
1677	movdqu	0 + 128(%rsi),%xmm3
1678	movdqu	16 + 128(%rsi),%xmm7
1679	movdqu	32 + 128(%rsi),%xmm11
1680	movdqu	48 + 128(%rsi),%xmm15
1681	pxor	%xmm3,%xmm1
1682	pxor	%xmm7,%xmm5
1683	pxor	%xmm11,%xmm9
1684	pxor	%xmm13,%xmm15
1685	movdqu	%xmm1,0 + 128(%rdi)
1686	movdqu	%xmm5,16 + 128(%rdi)
1687	movdqu	%xmm9,32 + 128(%rdi)
1688	movdqu	%xmm15,48 + 128(%rdi)
1689
1690	movdqa	0+80(%rbp),%xmm12
1691	subq	$192,%rbx
1692	leaq	192(%rsi),%rsi
1693	leaq	192(%rdi),%rdi
1694
1695
1696.Lopen_sse_tail_64_dec_loop:
1697	cmpq	$16,%rbx
1698	jb	.Lopen_sse_tail_16_init
1699	subq	$16,%rbx
1700	movdqu	(%rsi),%xmm3
1701	pxor	%xmm3,%xmm0
1702	movdqu	%xmm0,(%rdi)
1703	leaq	16(%rsi),%rsi
1704	leaq	16(%rdi),%rdi
1705	movdqa	%xmm4,%xmm0
1706	movdqa	%xmm8,%xmm4
1707	movdqa	%xmm12,%xmm8
1708	jmp	.Lopen_sse_tail_64_dec_loop
1709.Lopen_sse_tail_16_init:
1710	movdqa	%xmm0,%xmm1
1711
1712
1713.Lopen_sse_tail_16:
1714	testq	%rbx,%rbx
1715	jz	.Lopen_sse_finalize
1716
1717
1718
1719	pxor	%xmm3,%xmm3
1720	leaq	-1(%rsi,%rbx,1),%rsi
1721	movq	%rbx,%r8
1722.Lopen_sse_tail_16_compose:
1723	pslldq	$1,%xmm3
1724	pinsrb	$0,(%rsi),%xmm3
1725	subq	$1,%rsi
1726	subq	$1,%r8
1727	jnz	.Lopen_sse_tail_16_compose
1728
1729.byte	102,73,15,126,221
1730	pextrq	$1,%xmm3,%r14
1731
1732	pxor	%xmm1,%xmm3
1733
1734
1735.Lopen_sse_tail_16_extract:
1736	pextrb	$0,%xmm3,(%rdi)
1737	psrldq	$1,%xmm3
1738	addq	$1,%rdi
1739	subq	$1,%rbx
1740	jne	.Lopen_sse_tail_16_extract
1741
1742	addq	%r13,%r10
1743	adcq	%r14,%r11
1744	adcq	$1,%r12
1745	movq	0+0+0(%rbp),%rax
1746	movq	%rax,%r15
1747	mulq	%r10
1748	movq	%rax,%r13
1749	movq	%rdx,%r14
1750	movq	0+0+0(%rbp),%rax
1751	mulq	%r11
1752	imulq	%r12,%r15
1753	addq	%rax,%r14
1754	adcq	%rdx,%r15
1755	movq	8+0+0(%rbp),%rax
1756	movq	%rax,%r9
1757	mulq	%r10
1758	addq	%rax,%r14
1759	adcq	$0,%rdx
1760	movq	%rdx,%r10
1761	movq	8+0+0(%rbp),%rax
1762	mulq	%r11
1763	addq	%rax,%r15
1764	adcq	$0,%rdx
1765	imulq	%r12,%r9
1766	addq	%r10,%r15
1767	adcq	%rdx,%r9
1768	movq	%r13,%r10
1769	movq	%r14,%r11
1770	movq	%r15,%r12
1771	andq	$3,%r12
1772	movq	%r15,%r13
1773	andq	$-4,%r13
1774	movq	%r9,%r14
1775	shrdq	$2,%r9,%r15
1776	shrq	$2,%r9
1777	addq	%r13,%r15
1778	adcq	%r14,%r9
1779	addq	%r15,%r10
1780	adcq	%r9,%r11
1781	adcq	$0,%r12
1782
1783
1784.Lopen_sse_finalize:
1785	addq	0+0+32(%rbp),%r10
1786	adcq	8+0+32(%rbp),%r11
1787	adcq	$1,%r12
1788	movq	0+0+0(%rbp),%rax
1789	movq	%rax,%r15
1790	mulq	%r10
1791	movq	%rax,%r13
1792	movq	%rdx,%r14
1793	movq	0+0+0(%rbp),%rax
1794	mulq	%r11
1795	imulq	%r12,%r15
1796	addq	%rax,%r14
1797	adcq	%rdx,%r15
1798	movq	8+0+0(%rbp),%rax
1799	movq	%rax,%r9
1800	mulq	%r10
1801	addq	%rax,%r14
1802	adcq	$0,%rdx
1803	movq	%rdx,%r10
1804	movq	8+0+0(%rbp),%rax
1805	mulq	%r11
1806	addq	%rax,%r15
1807	adcq	$0,%rdx
1808	imulq	%r12,%r9
1809	addq	%r10,%r15
1810	adcq	%rdx,%r9
1811	movq	%r13,%r10
1812	movq	%r14,%r11
1813	movq	%r15,%r12
1814	andq	$3,%r12
1815	movq	%r15,%r13
1816	andq	$-4,%r13
1817	movq	%r9,%r14
1818	shrdq	$2,%r9,%r15
1819	shrq	$2,%r9
1820	addq	%r13,%r15
1821	adcq	%r14,%r9
1822	addq	%r15,%r10
1823	adcq	%r9,%r11
1824	adcq	$0,%r12
1825
1826
1827	movq	%r10,%r13
1828	movq	%r11,%r14
1829	movq	%r12,%r15
1830	subq	$-5,%r10
1831	sbbq	$-1,%r11
1832	sbbq	$3,%r12
1833	cmovcq	%r13,%r10
1834	cmovcq	%r14,%r11
1835	cmovcq	%r15,%r12
1836
1837	addq	0+0+16(%rbp),%r10
1838	adcq	8+0+16(%rbp),%r11
1839
1840.cfi_remember_state
1841	addq	$288 + 0 + 32,%rsp
1842.cfi_adjust_cfa_offset	-(288 + 32)
1843
1844	popq	%r9
1845.cfi_adjust_cfa_offset	-8
1846.cfi_restore	%r9
1847	movq	%r10,(%r9)
1848	movq	%r11,8(%r9)
1849	popq	%r15
1850.cfi_adjust_cfa_offset	-8
1851.cfi_restore	%r15
1852	popq	%r14
1853.cfi_adjust_cfa_offset	-8
1854.cfi_restore	%r14
1855	popq	%r13
1856.cfi_adjust_cfa_offset	-8
1857.cfi_restore	%r13
1858	popq	%r12
1859.cfi_adjust_cfa_offset	-8
1860.cfi_restore	%r12
1861	popq	%rbx
1862.cfi_adjust_cfa_offset	-8
1863.cfi_restore	%rbx
1864	popq	%rbp
1865.cfi_adjust_cfa_offset	-8
1866.cfi_restore	%rbp
1867	.byte	0xf3,0xc3
1868
1869.Lopen_sse_128:
1870.cfi_restore_state
1871	movdqu	.Lchacha20_consts(%rip),%xmm0
1872	movdqa	%xmm0,%xmm1
1873	movdqa	%xmm0,%xmm2
1874	movdqu	0(%r9),%xmm4
1875	movdqa	%xmm4,%xmm5
1876	movdqa	%xmm4,%xmm6
1877	movdqu	16(%r9),%xmm8
1878	movdqa	%xmm8,%xmm9
1879	movdqa	%xmm8,%xmm10
1880	movdqu	32(%r9),%xmm12
1881	movdqa	%xmm12,%xmm13
1882	paddd	.Lsse_inc(%rip),%xmm13
1883	movdqa	%xmm13,%xmm14
1884	paddd	.Lsse_inc(%rip),%xmm14
1885	movdqa	%xmm4,%xmm7
1886	movdqa	%xmm8,%xmm11
1887	movdqa	%xmm13,%xmm15
1888	movq	$10,%r10
1889
1890.Lopen_sse_128_rounds:
1891	paddd	%xmm4,%xmm0
1892	pxor	%xmm0,%xmm12
1893	pshufb	.Lrol16(%rip),%xmm12
1894	paddd	%xmm12,%xmm8
1895	pxor	%xmm8,%xmm4
1896	movdqa	%xmm4,%xmm3
1897	pslld	$12,%xmm3
1898	psrld	$20,%xmm4
1899	pxor	%xmm3,%xmm4
1900	paddd	%xmm4,%xmm0
1901	pxor	%xmm0,%xmm12
1902	pshufb	.Lrol8(%rip),%xmm12
1903	paddd	%xmm12,%xmm8
1904	pxor	%xmm8,%xmm4
1905	movdqa	%xmm4,%xmm3
1906	pslld	$7,%xmm3
1907	psrld	$25,%xmm4
1908	pxor	%xmm3,%xmm4
1909.byte	102,15,58,15,228,4
1910.byte	102,69,15,58,15,192,8
1911.byte	102,69,15,58,15,228,12
1912	paddd	%xmm5,%xmm1
1913	pxor	%xmm1,%xmm13
1914	pshufb	.Lrol16(%rip),%xmm13
1915	paddd	%xmm13,%xmm9
1916	pxor	%xmm9,%xmm5
1917	movdqa	%xmm5,%xmm3
1918	pslld	$12,%xmm3
1919	psrld	$20,%xmm5
1920	pxor	%xmm3,%xmm5
1921	paddd	%xmm5,%xmm1
1922	pxor	%xmm1,%xmm13
1923	pshufb	.Lrol8(%rip),%xmm13
1924	paddd	%xmm13,%xmm9
1925	pxor	%xmm9,%xmm5
1926	movdqa	%xmm5,%xmm3
1927	pslld	$7,%xmm3
1928	psrld	$25,%xmm5
1929	pxor	%xmm3,%xmm5
1930.byte	102,15,58,15,237,4
1931.byte	102,69,15,58,15,201,8
1932.byte	102,69,15,58,15,237,12
1933	paddd	%xmm6,%xmm2
1934	pxor	%xmm2,%xmm14
1935	pshufb	.Lrol16(%rip),%xmm14
1936	paddd	%xmm14,%xmm10
1937	pxor	%xmm10,%xmm6
1938	movdqa	%xmm6,%xmm3
1939	pslld	$12,%xmm3
1940	psrld	$20,%xmm6
1941	pxor	%xmm3,%xmm6
1942	paddd	%xmm6,%xmm2
1943	pxor	%xmm2,%xmm14
1944	pshufb	.Lrol8(%rip),%xmm14
1945	paddd	%xmm14,%xmm10
1946	pxor	%xmm10,%xmm6
1947	movdqa	%xmm6,%xmm3
1948	pslld	$7,%xmm3
1949	psrld	$25,%xmm6
1950	pxor	%xmm3,%xmm6
1951.byte	102,15,58,15,246,4
1952.byte	102,69,15,58,15,210,8
1953.byte	102,69,15,58,15,246,12
1954	paddd	%xmm4,%xmm0
1955	pxor	%xmm0,%xmm12
1956	pshufb	.Lrol16(%rip),%xmm12
1957	paddd	%xmm12,%xmm8
1958	pxor	%xmm8,%xmm4
1959	movdqa	%xmm4,%xmm3
1960	pslld	$12,%xmm3
1961	psrld	$20,%xmm4
1962	pxor	%xmm3,%xmm4
1963	paddd	%xmm4,%xmm0
1964	pxor	%xmm0,%xmm12
1965	pshufb	.Lrol8(%rip),%xmm12
1966	paddd	%xmm12,%xmm8
1967	pxor	%xmm8,%xmm4
1968	movdqa	%xmm4,%xmm3
1969	pslld	$7,%xmm3
1970	psrld	$25,%xmm4
1971	pxor	%xmm3,%xmm4
1972.byte	102,15,58,15,228,12
1973.byte	102,69,15,58,15,192,8
1974.byte	102,69,15,58,15,228,4
1975	paddd	%xmm5,%xmm1
1976	pxor	%xmm1,%xmm13
1977	pshufb	.Lrol16(%rip),%xmm13
1978	paddd	%xmm13,%xmm9
1979	pxor	%xmm9,%xmm5
1980	movdqa	%xmm5,%xmm3
1981	pslld	$12,%xmm3
1982	psrld	$20,%xmm5
1983	pxor	%xmm3,%xmm5
1984	paddd	%xmm5,%xmm1
1985	pxor	%xmm1,%xmm13
1986	pshufb	.Lrol8(%rip),%xmm13
1987	paddd	%xmm13,%xmm9
1988	pxor	%xmm9,%xmm5
1989	movdqa	%xmm5,%xmm3
1990	pslld	$7,%xmm3
1991	psrld	$25,%xmm5
1992	pxor	%xmm3,%xmm5
1993.byte	102,15,58,15,237,12
1994.byte	102,69,15,58,15,201,8
1995.byte	102,69,15,58,15,237,4
1996	paddd	%xmm6,%xmm2
1997	pxor	%xmm2,%xmm14
1998	pshufb	.Lrol16(%rip),%xmm14
1999	paddd	%xmm14,%xmm10
2000	pxor	%xmm10,%xmm6
2001	movdqa	%xmm6,%xmm3
2002	pslld	$12,%xmm3
2003	psrld	$20,%xmm6
2004	pxor	%xmm3,%xmm6
2005	paddd	%xmm6,%xmm2
2006	pxor	%xmm2,%xmm14
2007	pshufb	.Lrol8(%rip),%xmm14
2008	paddd	%xmm14,%xmm10
2009	pxor	%xmm10,%xmm6
2010	movdqa	%xmm6,%xmm3
2011	pslld	$7,%xmm3
2012	psrld	$25,%xmm6
2013	pxor	%xmm3,%xmm6
2014.byte	102,15,58,15,246,12
2015.byte	102,69,15,58,15,210,8
2016.byte	102,69,15,58,15,246,4
2017
2018	decq	%r10
2019	jnz	.Lopen_sse_128_rounds
2020	paddd	.Lchacha20_consts(%rip),%xmm0
2021	paddd	.Lchacha20_consts(%rip),%xmm1
2022	paddd	.Lchacha20_consts(%rip),%xmm2
2023	paddd	%xmm7,%xmm4
2024	paddd	%xmm7,%xmm5
2025	paddd	%xmm7,%xmm6
2026	paddd	%xmm11,%xmm9
2027	paddd	%xmm11,%xmm10
2028	paddd	%xmm15,%xmm13
2029	paddd	.Lsse_inc(%rip),%xmm15
2030	paddd	%xmm15,%xmm14
2031
2032	pand	.Lclamp(%rip),%xmm0
2033	movdqa	%xmm0,0+0(%rbp)
2034	movdqa	%xmm4,0+16(%rbp)
2035
2036	movq	%r8,%r8
2037	call	poly_hash_ad_internal
2038.Lopen_sse_128_xor_hash:
2039	cmpq	$16,%rbx
2040	jb	.Lopen_sse_tail_16
2041	subq	$16,%rbx
2042	addq	0+0(%rsi),%r10
2043	adcq	8+0(%rsi),%r11
2044	adcq	$1,%r12
2045
2046
2047	movdqu	0(%rsi),%xmm3
2048	pxor	%xmm3,%xmm1
2049	movdqu	%xmm1,0(%rdi)
2050	leaq	16(%rsi),%rsi
2051	leaq	16(%rdi),%rdi
2052	movq	0+0+0(%rbp),%rax
2053	movq	%rax,%r15
2054	mulq	%r10
2055	movq	%rax,%r13
2056	movq	%rdx,%r14
2057	movq	0+0+0(%rbp),%rax
2058	mulq	%r11
2059	imulq	%r12,%r15
2060	addq	%rax,%r14
2061	adcq	%rdx,%r15
2062	movq	8+0+0(%rbp),%rax
2063	movq	%rax,%r9
2064	mulq	%r10
2065	addq	%rax,%r14
2066	adcq	$0,%rdx
2067	movq	%rdx,%r10
2068	movq	8+0+0(%rbp),%rax
2069	mulq	%r11
2070	addq	%rax,%r15
2071	adcq	$0,%rdx
2072	imulq	%r12,%r9
2073	addq	%r10,%r15
2074	adcq	%rdx,%r9
2075	movq	%r13,%r10
2076	movq	%r14,%r11
2077	movq	%r15,%r12
2078	andq	$3,%r12
2079	movq	%r15,%r13
2080	andq	$-4,%r13
2081	movq	%r9,%r14
2082	shrdq	$2,%r9,%r15
2083	shrq	$2,%r9
2084	addq	%r13,%r15
2085	adcq	%r14,%r9
2086	addq	%r15,%r10
2087	adcq	%r9,%r11
2088	adcq	$0,%r12
2089
2090
2091	movdqa	%xmm5,%xmm1
2092	movdqa	%xmm9,%xmm5
2093	movdqa	%xmm13,%xmm9
2094	movdqa	%xmm2,%xmm13
2095	movdqa	%xmm6,%xmm2
2096	movdqa	%xmm10,%xmm6
2097	movdqa	%xmm14,%xmm10
2098	jmp	.Lopen_sse_128_xor_hash
2099.size	chacha20_poly1305_open, .-chacha20_poly1305_open
2100.cfi_endproc
2101
2102
2103
2104
2105
2106
2107
2108.globl	chacha20_poly1305_seal
2109.hidden chacha20_poly1305_seal
2110.type	chacha20_poly1305_seal,@function
2111.align	64
2112chacha20_poly1305_seal:
2113.cfi_startproc
2114	pushq	%rbp
2115.cfi_adjust_cfa_offset	8
2116.cfi_offset	%rbp,-16
2117	pushq	%rbx
2118.cfi_adjust_cfa_offset	8
2119.cfi_offset	%rbx,-24
2120	pushq	%r12
2121.cfi_adjust_cfa_offset	8
2122.cfi_offset	%r12,-32
2123	pushq	%r13
2124.cfi_adjust_cfa_offset	8
2125.cfi_offset	%r13,-40
2126	pushq	%r14
2127.cfi_adjust_cfa_offset	8
2128.cfi_offset	%r14,-48
2129	pushq	%r15
2130.cfi_adjust_cfa_offset	8
2131.cfi_offset	%r15,-56
2132
2133
2134	pushq	%r9
2135.cfi_adjust_cfa_offset	8
2136.cfi_offset	%r9,-64
2137	subq	$288 + 0 + 32,%rsp
2138.cfi_adjust_cfa_offset	288 + 32
2139	leaq	32(%rsp),%rbp
2140	andq	$-32,%rbp
2141
2142	movq	56(%r9),%rbx
2143	addq	%rdx,%rbx
2144	movq	%r8,0+0+32(%rbp)
2145	movq	%rbx,8+0+32(%rbp)
2146	movq	%rdx,%rbx
2147
2148	movl	OPENSSL_ia32cap_P+8(%rip),%eax
2149	andl	$288,%eax
2150	xorl	$288,%eax
2151	jz	chacha20_poly1305_seal_avx2
2152
2153	cmpq	$128,%rbx
2154	jbe	.Lseal_sse_128
2155
2156	movdqa	.Lchacha20_consts(%rip),%xmm0
2157	movdqu	0(%r9),%xmm4
2158	movdqu	16(%r9),%xmm8
2159	movdqu	32(%r9),%xmm12
2160
2161	movdqa	%xmm0,%xmm1
2162	movdqa	%xmm0,%xmm2
2163	movdqa	%xmm0,%xmm3
2164	movdqa	%xmm4,%xmm5
2165	movdqa	%xmm4,%xmm6
2166	movdqa	%xmm4,%xmm7
2167	movdqa	%xmm8,%xmm9
2168	movdqa	%xmm8,%xmm10
2169	movdqa	%xmm8,%xmm11
2170	movdqa	%xmm12,%xmm15
2171	paddd	.Lsse_inc(%rip),%xmm12
2172	movdqa	%xmm12,%xmm14
2173	paddd	.Lsse_inc(%rip),%xmm12
2174	movdqa	%xmm12,%xmm13
2175	paddd	.Lsse_inc(%rip),%xmm12
2176
2177	movdqa	%xmm4,0+48(%rbp)
2178	movdqa	%xmm8,0+64(%rbp)
2179	movdqa	%xmm12,0+96(%rbp)
2180	movdqa	%xmm13,0+112(%rbp)
2181	movdqa	%xmm14,0+128(%rbp)
2182	movdqa	%xmm15,0+144(%rbp)
2183	movq	$10,%r10
2184.Lseal_sse_init_rounds:
2185	movdqa	%xmm8,0+80(%rbp)
2186	movdqa	.Lrol16(%rip),%xmm8
2187	paddd	%xmm7,%xmm3
2188	paddd	%xmm6,%xmm2
2189	paddd	%xmm5,%xmm1
2190	paddd	%xmm4,%xmm0
2191	pxor	%xmm3,%xmm15
2192	pxor	%xmm2,%xmm14
2193	pxor	%xmm1,%xmm13
2194	pxor	%xmm0,%xmm12
2195.byte	102,69,15,56,0,248
2196.byte	102,69,15,56,0,240
2197.byte	102,69,15,56,0,232
2198.byte	102,69,15,56,0,224
2199	movdqa	0+80(%rbp),%xmm8
2200	paddd	%xmm15,%xmm11
2201	paddd	%xmm14,%xmm10
2202	paddd	%xmm13,%xmm9
2203	paddd	%xmm12,%xmm8
2204	pxor	%xmm11,%xmm7
2205	pxor	%xmm10,%xmm6
2206	pxor	%xmm9,%xmm5
2207	pxor	%xmm8,%xmm4
2208	movdqa	%xmm8,0+80(%rbp)
2209	movdqa	%xmm7,%xmm8
2210	psrld	$20,%xmm8
2211	pslld	$32-20,%xmm7
2212	pxor	%xmm8,%xmm7
2213	movdqa	%xmm6,%xmm8
2214	psrld	$20,%xmm8
2215	pslld	$32-20,%xmm6
2216	pxor	%xmm8,%xmm6
2217	movdqa	%xmm5,%xmm8
2218	psrld	$20,%xmm8
2219	pslld	$32-20,%xmm5
2220	pxor	%xmm8,%xmm5
2221	movdqa	%xmm4,%xmm8
2222	psrld	$20,%xmm8
2223	pslld	$32-20,%xmm4
2224	pxor	%xmm8,%xmm4
2225	movdqa	.Lrol8(%rip),%xmm8
2226	paddd	%xmm7,%xmm3
2227	paddd	%xmm6,%xmm2
2228	paddd	%xmm5,%xmm1
2229	paddd	%xmm4,%xmm0
2230	pxor	%xmm3,%xmm15
2231	pxor	%xmm2,%xmm14
2232	pxor	%xmm1,%xmm13
2233	pxor	%xmm0,%xmm12
2234.byte	102,69,15,56,0,248
2235.byte	102,69,15,56,0,240
2236.byte	102,69,15,56,0,232
2237.byte	102,69,15,56,0,224
2238	movdqa	0+80(%rbp),%xmm8
2239	paddd	%xmm15,%xmm11
2240	paddd	%xmm14,%xmm10
2241	paddd	%xmm13,%xmm9
2242	paddd	%xmm12,%xmm8
2243	pxor	%xmm11,%xmm7
2244	pxor	%xmm10,%xmm6
2245	pxor	%xmm9,%xmm5
2246	pxor	%xmm8,%xmm4
2247	movdqa	%xmm8,0+80(%rbp)
2248	movdqa	%xmm7,%xmm8
2249	psrld	$25,%xmm8
2250	pslld	$32-25,%xmm7
2251	pxor	%xmm8,%xmm7
2252	movdqa	%xmm6,%xmm8
2253	psrld	$25,%xmm8
2254	pslld	$32-25,%xmm6
2255	pxor	%xmm8,%xmm6
2256	movdqa	%xmm5,%xmm8
2257	psrld	$25,%xmm8
2258	pslld	$32-25,%xmm5
2259	pxor	%xmm8,%xmm5
2260	movdqa	%xmm4,%xmm8
2261	psrld	$25,%xmm8
2262	pslld	$32-25,%xmm4
2263	pxor	%xmm8,%xmm4
2264	movdqa	0+80(%rbp),%xmm8
2265.byte	102,15,58,15,255,4
2266.byte	102,69,15,58,15,219,8
2267.byte	102,69,15,58,15,255,12
2268.byte	102,15,58,15,246,4
2269.byte	102,69,15,58,15,210,8
2270.byte	102,69,15,58,15,246,12
2271.byte	102,15,58,15,237,4
2272.byte	102,69,15,58,15,201,8
2273.byte	102,69,15,58,15,237,12
2274.byte	102,15,58,15,228,4
2275.byte	102,69,15,58,15,192,8
2276.byte	102,69,15,58,15,228,12
2277	movdqa	%xmm8,0+80(%rbp)
2278	movdqa	.Lrol16(%rip),%xmm8
2279	paddd	%xmm7,%xmm3
2280	paddd	%xmm6,%xmm2
2281	paddd	%xmm5,%xmm1
2282	paddd	%xmm4,%xmm0
2283	pxor	%xmm3,%xmm15
2284	pxor	%xmm2,%xmm14
2285	pxor	%xmm1,%xmm13
2286	pxor	%xmm0,%xmm12
2287.byte	102,69,15,56,0,248
2288.byte	102,69,15,56,0,240
2289.byte	102,69,15,56,0,232
2290.byte	102,69,15,56,0,224
2291	movdqa	0+80(%rbp),%xmm8
2292	paddd	%xmm15,%xmm11
2293	paddd	%xmm14,%xmm10
2294	paddd	%xmm13,%xmm9
2295	paddd	%xmm12,%xmm8
2296	pxor	%xmm11,%xmm7
2297	pxor	%xmm10,%xmm6
2298	pxor	%xmm9,%xmm5
2299	pxor	%xmm8,%xmm4
2300	movdqa	%xmm8,0+80(%rbp)
2301	movdqa	%xmm7,%xmm8
2302	psrld	$20,%xmm8
2303	pslld	$32-20,%xmm7
2304	pxor	%xmm8,%xmm7
2305	movdqa	%xmm6,%xmm8
2306	psrld	$20,%xmm8
2307	pslld	$32-20,%xmm6
2308	pxor	%xmm8,%xmm6
2309	movdqa	%xmm5,%xmm8
2310	psrld	$20,%xmm8
2311	pslld	$32-20,%xmm5
2312	pxor	%xmm8,%xmm5
2313	movdqa	%xmm4,%xmm8
2314	psrld	$20,%xmm8
2315	pslld	$32-20,%xmm4
2316	pxor	%xmm8,%xmm4
2317	movdqa	.Lrol8(%rip),%xmm8
2318	paddd	%xmm7,%xmm3
2319	paddd	%xmm6,%xmm2
2320	paddd	%xmm5,%xmm1
2321	paddd	%xmm4,%xmm0
2322	pxor	%xmm3,%xmm15
2323	pxor	%xmm2,%xmm14
2324	pxor	%xmm1,%xmm13
2325	pxor	%xmm0,%xmm12
2326.byte	102,69,15,56,0,248
2327.byte	102,69,15,56,0,240
2328.byte	102,69,15,56,0,232
2329.byte	102,69,15,56,0,224
2330	movdqa	0+80(%rbp),%xmm8
2331	paddd	%xmm15,%xmm11
2332	paddd	%xmm14,%xmm10
2333	paddd	%xmm13,%xmm9
2334	paddd	%xmm12,%xmm8
2335	pxor	%xmm11,%xmm7
2336	pxor	%xmm10,%xmm6
2337	pxor	%xmm9,%xmm5
2338	pxor	%xmm8,%xmm4
2339	movdqa	%xmm8,0+80(%rbp)
2340	movdqa	%xmm7,%xmm8
2341	psrld	$25,%xmm8
2342	pslld	$32-25,%xmm7
2343	pxor	%xmm8,%xmm7
2344	movdqa	%xmm6,%xmm8
2345	psrld	$25,%xmm8
2346	pslld	$32-25,%xmm6
2347	pxor	%xmm8,%xmm6
2348	movdqa	%xmm5,%xmm8
2349	psrld	$25,%xmm8
2350	pslld	$32-25,%xmm5
2351	pxor	%xmm8,%xmm5
2352	movdqa	%xmm4,%xmm8
2353	psrld	$25,%xmm8
2354	pslld	$32-25,%xmm4
2355	pxor	%xmm8,%xmm4
2356	movdqa	0+80(%rbp),%xmm8
2357.byte	102,15,58,15,255,12
2358.byte	102,69,15,58,15,219,8
2359.byte	102,69,15,58,15,255,4
2360.byte	102,15,58,15,246,12
2361.byte	102,69,15,58,15,210,8
2362.byte	102,69,15,58,15,246,4
2363.byte	102,15,58,15,237,12
2364.byte	102,69,15,58,15,201,8
2365.byte	102,69,15,58,15,237,4
2366.byte	102,15,58,15,228,12
2367.byte	102,69,15,58,15,192,8
2368.byte	102,69,15,58,15,228,4
2369
2370	decq	%r10
2371	jnz	.Lseal_sse_init_rounds
2372	paddd	.Lchacha20_consts(%rip),%xmm3
2373	paddd	0+48(%rbp),%xmm7
2374	paddd	0+64(%rbp),%xmm11
2375	paddd	0+144(%rbp),%xmm15
2376	paddd	.Lchacha20_consts(%rip),%xmm2
2377	paddd	0+48(%rbp),%xmm6
2378	paddd	0+64(%rbp),%xmm10
2379	paddd	0+128(%rbp),%xmm14
2380	paddd	.Lchacha20_consts(%rip),%xmm1
2381	paddd	0+48(%rbp),%xmm5
2382	paddd	0+64(%rbp),%xmm9
2383	paddd	0+112(%rbp),%xmm13
2384	paddd	.Lchacha20_consts(%rip),%xmm0
2385	paddd	0+48(%rbp),%xmm4
2386	paddd	0+64(%rbp),%xmm8
2387	paddd	0+96(%rbp),%xmm12
2388
2389
2390	pand	.Lclamp(%rip),%xmm3
2391	movdqa	%xmm3,0+0(%rbp)
2392	movdqa	%xmm7,0+16(%rbp)
2393
2394	movq	%r8,%r8
2395	call	poly_hash_ad_internal
2396	movdqu	0 + 0(%rsi),%xmm3
2397	movdqu	16 + 0(%rsi),%xmm7
2398	movdqu	32 + 0(%rsi),%xmm11
2399	movdqu	48 + 0(%rsi),%xmm15
2400	pxor	%xmm3,%xmm2
2401	pxor	%xmm7,%xmm6
2402	pxor	%xmm11,%xmm10
2403	pxor	%xmm14,%xmm15
2404	movdqu	%xmm2,0 + 0(%rdi)
2405	movdqu	%xmm6,16 + 0(%rdi)
2406	movdqu	%xmm10,32 + 0(%rdi)
2407	movdqu	%xmm15,48 + 0(%rdi)
2408	movdqu	0 + 64(%rsi),%xmm3
2409	movdqu	16 + 64(%rsi),%xmm7
2410	movdqu	32 + 64(%rsi),%xmm11
2411	movdqu	48 + 64(%rsi),%xmm15
2412	pxor	%xmm3,%xmm1
2413	pxor	%xmm7,%xmm5
2414	pxor	%xmm11,%xmm9
2415	pxor	%xmm13,%xmm15
2416	movdqu	%xmm1,0 + 64(%rdi)
2417	movdqu	%xmm5,16 + 64(%rdi)
2418	movdqu	%xmm9,32 + 64(%rdi)
2419	movdqu	%xmm15,48 + 64(%rdi)
2420
2421	cmpq	$192,%rbx
2422	ja	.Lseal_sse_main_init
2423	movq	$128,%rcx
2424	subq	$128,%rbx
2425	leaq	128(%rsi),%rsi
2426	jmp	.Lseal_sse_128_tail_hash
2427.Lseal_sse_main_init:
2428	movdqu	0 + 128(%rsi),%xmm3
2429	movdqu	16 + 128(%rsi),%xmm7
2430	movdqu	32 + 128(%rsi),%xmm11
2431	movdqu	48 + 128(%rsi),%xmm15
2432	pxor	%xmm3,%xmm0
2433	pxor	%xmm7,%xmm4
2434	pxor	%xmm11,%xmm8
2435	pxor	%xmm12,%xmm15
2436	movdqu	%xmm0,0 + 128(%rdi)
2437	movdqu	%xmm4,16 + 128(%rdi)
2438	movdqu	%xmm8,32 + 128(%rdi)
2439	movdqu	%xmm15,48 + 128(%rdi)
2440
2441	movq	$192,%rcx
2442	subq	$192,%rbx
2443	leaq	192(%rsi),%rsi
2444	movq	$2,%rcx
2445	movq	$8,%r8
2446	cmpq	$64,%rbx
2447	jbe	.Lseal_sse_tail_64
2448	cmpq	$128,%rbx
2449	jbe	.Lseal_sse_tail_128
2450	cmpq	$192,%rbx
2451	jbe	.Lseal_sse_tail_192
2452
2453.Lseal_sse_main_loop:
2454	movdqa	.Lchacha20_consts(%rip),%xmm0
2455	movdqa	0+48(%rbp),%xmm4
2456	movdqa	0+64(%rbp),%xmm8
2457	movdqa	%xmm0,%xmm1
2458	movdqa	%xmm4,%xmm5
2459	movdqa	%xmm8,%xmm9
2460	movdqa	%xmm0,%xmm2
2461	movdqa	%xmm4,%xmm6
2462	movdqa	%xmm8,%xmm10
2463	movdqa	%xmm0,%xmm3
2464	movdqa	%xmm4,%xmm7
2465	movdqa	%xmm8,%xmm11
2466	movdqa	0+96(%rbp),%xmm15
2467	paddd	.Lsse_inc(%rip),%xmm15
2468	movdqa	%xmm15,%xmm14
2469	paddd	.Lsse_inc(%rip),%xmm14
2470	movdqa	%xmm14,%xmm13
2471	paddd	.Lsse_inc(%rip),%xmm13
2472	movdqa	%xmm13,%xmm12
2473	paddd	.Lsse_inc(%rip),%xmm12
2474	movdqa	%xmm12,0+96(%rbp)
2475	movdqa	%xmm13,0+112(%rbp)
2476	movdqa	%xmm14,0+128(%rbp)
2477	movdqa	%xmm15,0+144(%rbp)
2478
2479.align	32
2480.Lseal_sse_main_rounds:
2481	movdqa	%xmm8,0+80(%rbp)
2482	movdqa	.Lrol16(%rip),%xmm8
2483	paddd	%xmm7,%xmm3
2484	paddd	%xmm6,%xmm2
2485	paddd	%xmm5,%xmm1
2486	paddd	%xmm4,%xmm0
2487	pxor	%xmm3,%xmm15
2488	pxor	%xmm2,%xmm14
2489	pxor	%xmm1,%xmm13
2490	pxor	%xmm0,%xmm12
2491.byte	102,69,15,56,0,248
2492.byte	102,69,15,56,0,240
2493.byte	102,69,15,56,0,232
2494.byte	102,69,15,56,0,224
2495	movdqa	0+80(%rbp),%xmm8
2496	paddd	%xmm15,%xmm11
2497	paddd	%xmm14,%xmm10
2498	paddd	%xmm13,%xmm9
2499	paddd	%xmm12,%xmm8
2500	pxor	%xmm11,%xmm7
2501	addq	0+0(%rdi),%r10
2502	adcq	8+0(%rdi),%r11
2503	adcq	$1,%r12
2504	pxor	%xmm10,%xmm6
2505	pxor	%xmm9,%xmm5
2506	pxor	%xmm8,%xmm4
2507	movdqa	%xmm8,0+80(%rbp)
2508	movdqa	%xmm7,%xmm8
2509	psrld	$20,%xmm8
2510	pslld	$32-20,%xmm7
2511	pxor	%xmm8,%xmm7
2512	movdqa	%xmm6,%xmm8
2513	psrld	$20,%xmm8
2514	pslld	$32-20,%xmm6
2515	pxor	%xmm8,%xmm6
2516	movdqa	%xmm5,%xmm8
2517	psrld	$20,%xmm8
2518	pslld	$32-20,%xmm5
2519	pxor	%xmm8,%xmm5
2520	movdqa	%xmm4,%xmm8
2521	psrld	$20,%xmm8
2522	pslld	$32-20,%xmm4
2523	pxor	%xmm8,%xmm4
2524	movq	0+0+0(%rbp),%rax
2525	movq	%rax,%r15
2526	mulq	%r10
2527	movq	%rax,%r13
2528	movq	%rdx,%r14
2529	movq	0+0+0(%rbp),%rax
2530	mulq	%r11
2531	imulq	%r12,%r15
2532	addq	%rax,%r14
2533	adcq	%rdx,%r15
2534	movdqa	.Lrol8(%rip),%xmm8
2535	paddd	%xmm7,%xmm3
2536	paddd	%xmm6,%xmm2
2537	paddd	%xmm5,%xmm1
2538	paddd	%xmm4,%xmm0
2539	pxor	%xmm3,%xmm15
2540	pxor	%xmm2,%xmm14
2541	pxor	%xmm1,%xmm13
2542	pxor	%xmm0,%xmm12
2543.byte	102,69,15,56,0,248
2544.byte	102,69,15,56,0,240
2545.byte	102,69,15,56,0,232
2546.byte	102,69,15,56,0,224
2547	movdqa	0+80(%rbp),%xmm8
2548	paddd	%xmm15,%xmm11
2549	paddd	%xmm14,%xmm10
2550	paddd	%xmm13,%xmm9
2551	paddd	%xmm12,%xmm8
2552	pxor	%xmm11,%xmm7
2553	pxor	%xmm10,%xmm6
2554	movq	8+0+0(%rbp),%rax
2555	movq	%rax,%r9
2556	mulq	%r10
2557	addq	%rax,%r14
2558	adcq	$0,%rdx
2559	movq	%rdx,%r10
2560	movq	8+0+0(%rbp),%rax
2561	mulq	%r11
2562	addq	%rax,%r15
2563	adcq	$0,%rdx
2564	pxor	%xmm9,%xmm5
2565	pxor	%xmm8,%xmm4
2566	movdqa	%xmm8,0+80(%rbp)
2567	movdqa	%xmm7,%xmm8
2568	psrld	$25,%xmm8
2569	pslld	$32-25,%xmm7
2570	pxor	%xmm8,%xmm7
2571	movdqa	%xmm6,%xmm8
2572	psrld	$25,%xmm8
2573	pslld	$32-25,%xmm6
2574	pxor	%xmm8,%xmm6
2575	movdqa	%xmm5,%xmm8
2576	psrld	$25,%xmm8
2577	pslld	$32-25,%xmm5
2578	pxor	%xmm8,%xmm5
2579	movdqa	%xmm4,%xmm8
2580	psrld	$25,%xmm8
2581	pslld	$32-25,%xmm4
2582	pxor	%xmm8,%xmm4
2583	movdqa	0+80(%rbp),%xmm8
2584	imulq	%r12,%r9
2585	addq	%r10,%r15
2586	adcq	%rdx,%r9
2587.byte	102,15,58,15,255,4
2588.byte	102,69,15,58,15,219,8
2589.byte	102,69,15,58,15,255,12
2590.byte	102,15,58,15,246,4
2591.byte	102,69,15,58,15,210,8
2592.byte	102,69,15,58,15,246,12
2593.byte	102,15,58,15,237,4
2594.byte	102,69,15,58,15,201,8
2595.byte	102,69,15,58,15,237,12
2596.byte	102,15,58,15,228,4
2597.byte	102,69,15,58,15,192,8
2598.byte	102,69,15,58,15,228,12
2599	movdqa	%xmm8,0+80(%rbp)
2600	movdqa	.Lrol16(%rip),%xmm8
2601	paddd	%xmm7,%xmm3
2602	paddd	%xmm6,%xmm2
2603	paddd	%xmm5,%xmm1
2604	paddd	%xmm4,%xmm0
2605	pxor	%xmm3,%xmm15
2606	pxor	%xmm2,%xmm14
2607	movq	%r13,%r10
2608	movq	%r14,%r11
2609	movq	%r15,%r12
2610	andq	$3,%r12
2611	movq	%r15,%r13
2612	andq	$-4,%r13
2613	movq	%r9,%r14
2614	shrdq	$2,%r9,%r15
2615	shrq	$2,%r9
2616	addq	%r13,%r15
2617	adcq	%r14,%r9
2618	addq	%r15,%r10
2619	adcq	%r9,%r11
2620	adcq	$0,%r12
2621	pxor	%xmm1,%xmm13
2622	pxor	%xmm0,%xmm12
2623.byte	102,69,15,56,0,248
2624.byte	102,69,15,56,0,240
2625.byte	102,69,15,56,0,232
2626.byte	102,69,15,56,0,224
2627	movdqa	0+80(%rbp),%xmm8
2628	paddd	%xmm15,%xmm11
2629	paddd	%xmm14,%xmm10
2630	paddd	%xmm13,%xmm9
2631	paddd	%xmm12,%xmm8
2632	pxor	%xmm11,%xmm7
2633	pxor	%xmm10,%xmm6
2634	pxor	%xmm9,%xmm5
2635	pxor	%xmm8,%xmm4
2636	movdqa	%xmm8,0+80(%rbp)
2637	movdqa	%xmm7,%xmm8
2638	psrld	$20,%xmm8
2639	pslld	$32-20,%xmm7
2640	pxor	%xmm8,%xmm7
2641	movdqa	%xmm6,%xmm8
2642	psrld	$20,%xmm8
2643	pslld	$32-20,%xmm6
2644	pxor	%xmm8,%xmm6
2645	movdqa	%xmm5,%xmm8
2646	psrld	$20,%xmm8
2647	pslld	$32-20,%xmm5
2648	pxor	%xmm8,%xmm5
2649	movdqa	%xmm4,%xmm8
2650	psrld	$20,%xmm8
2651	pslld	$32-20,%xmm4
2652	pxor	%xmm8,%xmm4
2653	movdqa	.Lrol8(%rip),%xmm8
2654	paddd	%xmm7,%xmm3
2655	paddd	%xmm6,%xmm2
2656	paddd	%xmm5,%xmm1
2657	paddd	%xmm4,%xmm0
2658	pxor	%xmm3,%xmm15
2659	pxor	%xmm2,%xmm14
2660	pxor	%xmm1,%xmm13
2661	pxor	%xmm0,%xmm12
2662.byte	102,69,15,56,0,248
2663.byte	102,69,15,56,0,240
2664.byte	102,69,15,56,0,232
2665.byte	102,69,15,56,0,224
2666	movdqa	0+80(%rbp),%xmm8
2667	paddd	%xmm15,%xmm11
2668	paddd	%xmm14,%xmm10
2669	paddd	%xmm13,%xmm9
2670	paddd	%xmm12,%xmm8
2671	pxor	%xmm11,%xmm7
2672	pxor	%xmm10,%xmm6
2673	pxor	%xmm9,%xmm5
2674	pxor	%xmm8,%xmm4
2675	movdqa	%xmm8,0+80(%rbp)
2676	movdqa	%xmm7,%xmm8
2677	psrld	$25,%xmm8
2678	pslld	$32-25,%xmm7
2679	pxor	%xmm8,%xmm7
2680	movdqa	%xmm6,%xmm8
2681	psrld	$25,%xmm8
2682	pslld	$32-25,%xmm6
2683	pxor	%xmm8,%xmm6
2684	movdqa	%xmm5,%xmm8
2685	psrld	$25,%xmm8
2686	pslld	$32-25,%xmm5
2687	pxor	%xmm8,%xmm5
2688	movdqa	%xmm4,%xmm8
2689	psrld	$25,%xmm8
2690	pslld	$32-25,%xmm4
2691	pxor	%xmm8,%xmm4
2692	movdqa	0+80(%rbp),%xmm8
2693.byte	102,15,58,15,255,12
2694.byte	102,69,15,58,15,219,8
2695.byte	102,69,15,58,15,255,4
2696.byte	102,15,58,15,246,12
2697.byte	102,69,15,58,15,210,8
2698.byte	102,69,15,58,15,246,4
2699.byte	102,15,58,15,237,12
2700.byte	102,69,15,58,15,201,8
2701.byte	102,69,15,58,15,237,4
2702.byte	102,15,58,15,228,12
2703.byte	102,69,15,58,15,192,8
2704.byte	102,69,15,58,15,228,4
2705
2706	leaq	16(%rdi),%rdi
2707	decq	%r8
2708	jge	.Lseal_sse_main_rounds
2709	addq	0+0(%rdi),%r10
2710	adcq	8+0(%rdi),%r11
2711	adcq	$1,%r12
2712	movq	0+0+0(%rbp),%rax
2713	movq	%rax,%r15
2714	mulq	%r10
2715	movq	%rax,%r13
2716	movq	%rdx,%r14
2717	movq	0+0+0(%rbp),%rax
2718	mulq	%r11
2719	imulq	%r12,%r15
2720	addq	%rax,%r14
2721	adcq	%rdx,%r15
2722	movq	8+0+0(%rbp),%rax
2723	movq	%rax,%r9
2724	mulq	%r10
2725	addq	%rax,%r14
2726	adcq	$0,%rdx
2727	movq	%rdx,%r10
2728	movq	8+0+0(%rbp),%rax
2729	mulq	%r11
2730	addq	%rax,%r15
2731	adcq	$0,%rdx
2732	imulq	%r12,%r9
2733	addq	%r10,%r15
2734	adcq	%rdx,%r9
2735	movq	%r13,%r10
2736	movq	%r14,%r11
2737	movq	%r15,%r12
2738	andq	$3,%r12
2739	movq	%r15,%r13
2740	andq	$-4,%r13
2741	movq	%r9,%r14
2742	shrdq	$2,%r9,%r15
2743	shrq	$2,%r9
2744	addq	%r13,%r15
2745	adcq	%r14,%r9
2746	addq	%r15,%r10
2747	adcq	%r9,%r11
2748	adcq	$0,%r12
2749
2750	leaq	16(%rdi),%rdi
2751	decq	%rcx
2752	jg	.Lseal_sse_main_rounds
2753	paddd	.Lchacha20_consts(%rip),%xmm3
2754	paddd	0+48(%rbp),%xmm7
2755	paddd	0+64(%rbp),%xmm11
2756	paddd	0+144(%rbp),%xmm15
2757	paddd	.Lchacha20_consts(%rip),%xmm2
2758	paddd	0+48(%rbp),%xmm6
2759	paddd	0+64(%rbp),%xmm10
2760	paddd	0+128(%rbp),%xmm14
2761	paddd	.Lchacha20_consts(%rip),%xmm1
2762	paddd	0+48(%rbp),%xmm5
2763	paddd	0+64(%rbp),%xmm9
2764	paddd	0+112(%rbp),%xmm13
2765	paddd	.Lchacha20_consts(%rip),%xmm0
2766	paddd	0+48(%rbp),%xmm4
2767	paddd	0+64(%rbp),%xmm8
2768	paddd	0+96(%rbp),%xmm12
2769
2770	movdqa	%xmm14,0+80(%rbp)
2771	movdqa	%xmm14,0+80(%rbp)
2772	movdqu	0 + 0(%rsi),%xmm14
2773	pxor	%xmm3,%xmm14
2774	movdqu	%xmm14,0 + 0(%rdi)
2775	movdqu	16 + 0(%rsi),%xmm14
2776	pxor	%xmm7,%xmm14
2777	movdqu	%xmm14,16 + 0(%rdi)
2778	movdqu	32 + 0(%rsi),%xmm14
2779	pxor	%xmm11,%xmm14
2780	movdqu	%xmm14,32 + 0(%rdi)
2781	movdqu	48 + 0(%rsi),%xmm14
2782	pxor	%xmm15,%xmm14
2783	movdqu	%xmm14,48 + 0(%rdi)
2784
2785	movdqa	0+80(%rbp),%xmm14
2786	movdqu	0 + 64(%rsi),%xmm3
2787	movdqu	16 + 64(%rsi),%xmm7
2788	movdqu	32 + 64(%rsi),%xmm11
2789	movdqu	48 + 64(%rsi),%xmm15
2790	pxor	%xmm3,%xmm2
2791	pxor	%xmm7,%xmm6
2792	pxor	%xmm11,%xmm10
2793	pxor	%xmm14,%xmm15
2794	movdqu	%xmm2,0 + 64(%rdi)
2795	movdqu	%xmm6,16 + 64(%rdi)
2796	movdqu	%xmm10,32 + 64(%rdi)
2797	movdqu	%xmm15,48 + 64(%rdi)
2798	movdqu	0 + 128(%rsi),%xmm3
2799	movdqu	16 + 128(%rsi),%xmm7
2800	movdqu	32 + 128(%rsi),%xmm11
2801	movdqu	48 + 128(%rsi),%xmm15
2802	pxor	%xmm3,%xmm1
2803	pxor	%xmm7,%xmm5
2804	pxor	%xmm11,%xmm9
2805	pxor	%xmm13,%xmm15
2806	movdqu	%xmm1,0 + 128(%rdi)
2807	movdqu	%xmm5,16 + 128(%rdi)
2808	movdqu	%xmm9,32 + 128(%rdi)
2809	movdqu	%xmm15,48 + 128(%rdi)
2810
2811	cmpq	$256,%rbx
2812	ja	.Lseal_sse_main_loop_xor
2813
2814	movq	$192,%rcx
2815	subq	$192,%rbx
2816	leaq	192(%rsi),%rsi
2817	jmp	.Lseal_sse_128_tail_hash
2818.Lseal_sse_main_loop_xor:
2819	movdqu	0 + 192(%rsi),%xmm3
2820	movdqu	16 + 192(%rsi),%xmm7
2821	movdqu	32 + 192(%rsi),%xmm11
2822	movdqu	48 + 192(%rsi),%xmm15
2823	pxor	%xmm3,%xmm0
2824	pxor	%xmm7,%xmm4
2825	pxor	%xmm11,%xmm8
2826	pxor	%xmm12,%xmm15
2827	movdqu	%xmm0,0 + 192(%rdi)
2828	movdqu	%xmm4,16 + 192(%rdi)
2829	movdqu	%xmm8,32 + 192(%rdi)
2830	movdqu	%xmm15,48 + 192(%rdi)
2831
2832	leaq	256(%rsi),%rsi
2833	subq	$256,%rbx
2834	movq	$6,%rcx
2835	movq	$4,%r8
2836	cmpq	$192,%rbx
2837	jg	.Lseal_sse_main_loop
2838	movq	%rbx,%rcx
2839	testq	%rbx,%rbx
2840	je	.Lseal_sse_128_tail_hash
2841	movq	$6,%rcx
2842	cmpq	$128,%rbx
2843	ja	.Lseal_sse_tail_192
2844	cmpq	$64,%rbx
2845	ja	.Lseal_sse_tail_128
2846
2847.Lseal_sse_tail_64:
2848	movdqa	.Lchacha20_consts(%rip),%xmm0
2849	movdqa	0+48(%rbp),%xmm4
2850	movdqa	0+64(%rbp),%xmm8
2851	movdqa	0+96(%rbp),%xmm12
2852	paddd	.Lsse_inc(%rip),%xmm12
2853	movdqa	%xmm12,0+96(%rbp)
2854
2855.Lseal_sse_tail_64_rounds_and_x2hash:
2856	addq	0+0(%rdi),%r10
2857	adcq	8+0(%rdi),%r11
2858	adcq	$1,%r12
2859	movq	0+0+0(%rbp),%rax
2860	movq	%rax,%r15
2861	mulq	%r10
2862	movq	%rax,%r13
2863	movq	%rdx,%r14
2864	movq	0+0+0(%rbp),%rax
2865	mulq	%r11
2866	imulq	%r12,%r15
2867	addq	%rax,%r14
2868	adcq	%rdx,%r15
2869	movq	8+0+0(%rbp),%rax
2870	movq	%rax,%r9
2871	mulq	%r10
2872	addq	%rax,%r14
2873	adcq	$0,%rdx
2874	movq	%rdx,%r10
2875	movq	8+0+0(%rbp),%rax
2876	mulq	%r11
2877	addq	%rax,%r15
2878	adcq	$0,%rdx
2879	imulq	%r12,%r9
2880	addq	%r10,%r15
2881	adcq	%rdx,%r9
2882	movq	%r13,%r10
2883	movq	%r14,%r11
2884	movq	%r15,%r12
2885	andq	$3,%r12
2886	movq	%r15,%r13
2887	andq	$-4,%r13
2888	movq	%r9,%r14
2889	shrdq	$2,%r9,%r15
2890	shrq	$2,%r9
2891	addq	%r13,%r15
2892	adcq	%r14,%r9
2893	addq	%r15,%r10
2894	adcq	%r9,%r11
2895	adcq	$0,%r12
2896
2897	leaq	16(%rdi),%rdi
2898.Lseal_sse_tail_64_rounds_and_x1hash:
2899	paddd	%xmm4,%xmm0
2900	pxor	%xmm0,%xmm12
2901	pshufb	.Lrol16(%rip),%xmm12
2902	paddd	%xmm12,%xmm8
2903	pxor	%xmm8,%xmm4
2904	movdqa	%xmm4,%xmm3
2905	pslld	$12,%xmm3
2906	psrld	$20,%xmm4
2907	pxor	%xmm3,%xmm4
2908	paddd	%xmm4,%xmm0
2909	pxor	%xmm0,%xmm12
2910	pshufb	.Lrol8(%rip),%xmm12
2911	paddd	%xmm12,%xmm8
2912	pxor	%xmm8,%xmm4
2913	movdqa	%xmm4,%xmm3
2914	pslld	$7,%xmm3
2915	psrld	$25,%xmm4
2916	pxor	%xmm3,%xmm4
2917.byte	102,15,58,15,228,4
2918.byte	102,69,15,58,15,192,8
2919.byte	102,69,15,58,15,228,12
2920	paddd	%xmm4,%xmm0
2921	pxor	%xmm0,%xmm12
2922	pshufb	.Lrol16(%rip),%xmm12
2923	paddd	%xmm12,%xmm8
2924	pxor	%xmm8,%xmm4
2925	movdqa	%xmm4,%xmm3
2926	pslld	$12,%xmm3
2927	psrld	$20,%xmm4
2928	pxor	%xmm3,%xmm4
2929	paddd	%xmm4,%xmm0
2930	pxor	%xmm0,%xmm12
2931	pshufb	.Lrol8(%rip),%xmm12
2932	paddd	%xmm12,%xmm8
2933	pxor	%xmm8,%xmm4
2934	movdqa	%xmm4,%xmm3
2935	pslld	$7,%xmm3
2936	psrld	$25,%xmm4
2937	pxor	%xmm3,%xmm4
2938.byte	102,15,58,15,228,12
2939.byte	102,69,15,58,15,192,8
2940.byte	102,69,15,58,15,228,4
2941	addq	0+0(%rdi),%r10
2942	adcq	8+0(%rdi),%r11
2943	adcq	$1,%r12
2944	movq	0+0+0(%rbp),%rax
2945	movq	%rax,%r15
2946	mulq	%r10
2947	movq	%rax,%r13
2948	movq	%rdx,%r14
2949	movq	0+0+0(%rbp),%rax
2950	mulq	%r11
2951	imulq	%r12,%r15
2952	addq	%rax,%r14
2953	adcq	%rdx,%r15
2954	movq	8+0+0(%rbp),%rax
2955	movq	%rax,%r9
2956	mulq	%r10
2957	addq	%rax,%r14
2958	adcq	$0,%rdx
2959	movq	%rdx,%r10
2960	movq	8+0+0(%rbp),%rax
2961	mulq	%r11
2962	addq	%rax,%r15
2963	adcq	$0,%rdx
2964	imulq	%r12,%r9
2965	addq	%r10,%r15
2966	adcq	%rdx,%r9
2967	movq	%r13,%r10
2968	movq	%r14,%r11
2969	movq	%r15,%r12
2970	andq	$3,%r12
2971	movq	%r15,%r13
2972	andq	$-4,%r13
2973	movq	%r9,%r14
2974	shrdq	$2,%r9,%r15
2975	shrq	$2,%r9
2976	addq	%r13,%r15
2977	adcq	%r14,%r9
2978	addq	%r15,%r10
2979	adcq	%r9,%r11
2980	adcq	$0,%r12
2981
2982	leaq	16(%rdi),%rdi
2983	decq	%rcx
2984	jg	.Lseal_sse_tail_64_rounds_and_x2hash
2985	decq	%r8
2986	jge	.Lseal_sse_tail_64_rounds_and_x1hash
2987	paddd	.Lchacha20_consts(%rip),%xmm0
2988	paddd	0+48(%rbp),%xmm4
2989	paddd	0+64(%rbp),%xmm8
2990	paddd	0+96(%rbp),%xmm12
2991
2992	jmp	.Lseal_sse_128_tail_xor
2993
2994.Lseal_sse_tail_128:
2995	movdqa	.Lchacha20_consts(%rip),%xmm0
2996	movdqa	0+48(%rbp),%xmm4
2997	movdqa	0+64(%rbp),%xmm8
2998	movdqa	%xmm0,%xmm1
2999	movdqa	%xmm4,%xmm5
3000	movdqa	%xmm8,%xmm9
3001	movdqa	0+96(%rbp),%xmm13
3002	paddd	.Lsse_inc(%rip),%xmm13
3003	movdqa	%xmm13,%xmm12
3004	paddd	.Lsse_inc(%rip),%xmm12
3005	movdqa	%xmm12,0+96(%rbp)
3006	movdqa	%xmm13,0+112(%rbp)
3007
3008.Lseal_sse_tail_128_rounds_and_x2hash:
3009	addq	0+0(%rdi),%r10
3010	adcq	8+0(%rdi),%r11
3011	adcq	$1,%r12
3012	movq	0+0+0(%rbp),%rax
3013	movq	%rax,%r15
3014	mulq	%r10
3015	movq	%rax,%r13
3016	movq	%rdx,%r14
3017	movq	0+0+0(%rbp),%rax
3018	mulq	%r11
3019	imulq	%r12,%r15
3020	addq	%rax,%r14
3021	adcq	%rdx,%r15
3022	movq	8+0+0(%rbp),%rax
3023	movq	%rax,%r9
3024	mulq	%r10
3025	addq	%rax,%r14
3026	adcq	$0,%rdx
3027	movq	%rdx,%r10
3028	movq	8+0+0(%rbp),%rax
3029	mulq	%r11
3030	addq	%rax,%r15
3031	adcq	$0,%rdx
3032	imulq	%r12,%r9
3033	addq	%r10,%r15
3034	adcq	%rdx,%r9
3035	movq	%r13,%r10
3036	movq	%r14,%r11
3037	movq	%r15,%r12
3038	andq	$3,%r12
3039	movq	%r15,%r13
3040	andq	$-4,%r13
3041	movq	%r9,%r14
3042	shrdq	$2,%r9,%r15
3043	shrq	$2,%r9
3044	addq	%r13,%r15
3045	adcq	%r14,%r9
3046	addq	%r15,%r10
3047	adcq	%r9,%r11
3048	adcq	$0,%r12
3049
3050	leaq	16(%rdi),%rdi
3051.Lseal_sse_tail_128_rounds_and_x1hash:
3052	paddd	%xmm4,%xmm0
3053	pxor	%xmm0,%xmm12
3054	pshufb	.Lrol16(%rip),%xmm12
3055	paddd	%xmm12,%xmm8
3056	pxor	%xmm8,%xmm4
3057	movdqa	%xmm4,%xmm3
3058	pslld	$12,%xmm3
3059	psrld	$20,%xmm4
3060	pxor	%xmm3,%xmm4
3061	paddd	%xmm4,%xmm0
3062	pxor	%xmm0,%xmm12
3063	pshufb	.Lrol8(%rip),%xmm12
3064	paddd	%xmm12,%xmm8
3065	pxor	%xmm8,%xmm4
3066	movdqa	%xmm4,%xmm3
3067	pslld	$7,%xmm3
3068	psrld	$25,%xmm4
3069	pxor	%xmm3,%xmm4
3070.byte	102,15,58,15,228,4
3071.byte	102,69,15,58,15,192,8
3072.byte	102,69,15,58,15,228,12
3073	paddd	%xmm5,%xmm1
3074	pxor	%xmm1,%xmm13
3075	pshufb	.Lrol16(%rip),%xmm13
3076	paddd	%xmm13,%xmm9
3077	pxor	%xmm9,%xmm5
3078	movdqa	%xmm5,%xmm3
3079	pslld	$12,%xmm3
3080	psrld	$20,%xmm5
3081	pxor	%xmm3,%xmm5
3082	paddd	%xmm5,%xmm1
3083	pxor	%xmm1,%xmm13
3084	pshufb	.Lrol8(%rip),%xmm13
3085	paddd	%xmm13,%xmm9
3086	pxor	%xmm9,%xmm5
3087	movdqa	%xmm5,%xmm3
3088	pslld	$7,%xmm3
3089	psrld	$25,%xmm5
3090	pxor	%xmm3,%xmm5
3091.byte	102,15,58,15,237,4
3092.byte	102,69,15,58,15,201,8
3093.byte	102,69,15,58,15,237,12
3094	addq	0+0(%rdi),%r10
3095	adcq	8+0(%rdi),%r11
3096	adcq	$1,%r12
3097	movq	0+0+0(%rbp),%rax
3098	movq	%rax,%r15
3099	mulq	%r10
3100	movq	%rax,%r13
3101	movq	%rdx,%r14
3102	movq	0+0+0(%rbp),%rax
3103	mulq	%r11
3104	imulq	%r12,%r15
3105	addq	%rax,%r14
3106	adcq	%rdx,%r15
3107	movq	8+0+0(%rbp),%rax
3108	movq	%rax,%r9
3109	mulq	%r10
3110	addq	%rax,%r14
3111	adcq	$0,%rdx
3112	movq	%rdx,%r10
3113	movq	8+0+0(%rbp),%rax
3114	mulq	%r11
3115	addq	%rax,%r15
3116	adcq	$0,%rdx
3117	imulq	%r12,%r9
3118	addq	%r10,%r15
3119	adcq	%rdx,%r9
3120	movq	%r13,%r10
3121	movq	%r14,%r11
3122	movq	%r15,%r12
3123	andq	$3,%r12
3124	movq	%r15,%r13
3125	andq	$-4,%r13
3126	movq	%r9,%r14
3127	shrdq	$2,%r9,%r15
3128	shrq	$2,%r9
3129	addq	%r13,%r15
3130	adcq	%r14,%r9
3131	addq	%r15,%r10
3132	adcq	%r9,%r11
3133	adcq	$0,%r12
3134	paddd	%xmm4,%xmm0
3135	pxor	%xmm0,%xmm12
3136	pshufb	.Lrol16(%rip),%xmm12
3137	paddd	%xmm12,%xmm8
3138	pxor	%xmm8,%xmm4
3139	movdqa	%xmm4,%xmm3
3140	pslld	$12,%xmm3
3141	psrld	$20,%xmm4
3142	pxor	%xmm3,%xmm4
3143	paddd	%xmm4,%xmm0
3144	pxor	%xmm0,%xmm12
3145	pshufb	.Lrol8(%rip),%xmm12
3146	paddd	%xmm12,%xmm8
3147	pxor	%xmm8,%xmm4
3148	movdqa	%xmm4,%xmm3
3149	pslld	$7,%xmm3
3150	psrld	$25,%xmm4
3151	pxor	%xmm3,%xmm4
3152.byte	102,15,58,15,228,12
3153.byte	102,69,15,58,15,192,8
3154.byte	102,69,15,58,15,228,4
3155	paddd	%xmm5,%xmm1
3156	pxor	%xmm1,%xmm13
3157	pshufb	.Lrol16(%rip),%xmm13
3158	paddd	%xmm13,%xmm9
3159	pxor	%xmm9,%xmm5
3160	movdqa	%xmm5,%xmm3
3161	pslld	$12,%xmm3
3162	psrld	$20,%xmm5
3163	pxor	%xmm3,%xmm5
3164	paddd	%xmm5,%xmm1
3165	pxor	%xmm1,%xmm13
3166	pshufb	.Lrol8(%rip),%xmm13
3167	paddd	%xmm13,%xmm9
3168	pxor	%xmm9,%xmm5
3169	movdqa	%xmm5,%xmm3
3170	pslld	$7,%xmm3
3171	psrld	$25,%xmm5
3172	pxor	%xmm3,%xmm5
3173.byte	102,15,58,15,237,12
3174.byte	102,69,15,58,15,201,8
3175.byte	102,69,15,58,15,237,4
3176
3177	leaq	16(%rdi),%rdi
3178	decq	%rcx
3179	jg	.Lseal_sse_tail_128_rounds_and_x2hash
3180	decq	%r8
3181	jge	.Lseal_sse_tail_128_rounds_and_x1hash
3182	paddd	.Lchacha20_consts(%rip),%xmm1
3183	paddd	0+48(%rbp),%xmm5
3184	paddd	0+64(%rbp),%xmm9
3185	paddd	0+112(%rbp),%xmm13
3186	paddd	.Lchacha20_consts(%rip),%xmm0
3187	paddd	0+48(%rbp),%xmm4
3188	paddd	0+64(%rbp),%xmm8
3189	paddd	0+96(%rbp),%xmm12
3190	movdqu	0 + 0(%rsi),%xmm3
3191	movdqu	16 + 0(%rsi),%xmm7
3192	movdqu	32 + 0(%rsi),%xmm11
3193	movdqu	48 + 0(%rsi),%xmm15
3194	pxor	%xmm3,%xmm1
3195	pxor	%xmm7,%xmm5
3196	pxor	%xmm11,%xmm9
3197	pxor	%xmm13,%xmm15
3198	movdqu	%xmm1,0 + 0(%rdi)
3199	movdqu	%xmm5,16 + 0(%rdi)
3200	movdqu	%xmm9,32 + 0(%rdi)
3201	movdqu	%xmm15,48 + 0(%rdi)
3202
3203	movq	$64,%rcx
3204	subq	$64,%rbx
3205	leaq	64(%rsi),%rsi
3206	jmp	.Lseal_sse_128_tail_hash
3207
3208.Lseal_sse_tail_192:
3209	movdqa	.Lchacha20_consts(%rip),%xmm0
3210	movdqa	0+48(%rbp),%xmm4
3211	movdqa	0+64(%rbp),%xmm8
3212	movdqa	%xmm0,%xmm1
3213	movdqa	%xmm4,%xmm5
3214	movdqa	%xmm8,%xmm9
3215	movdqa	%xmm0,%xmm2
3216	movdqa	%xmm4,%xmm6
3217	movdqa	%xmm8,%xmm10
3218	movdqa	0+96(%rbp),%xmm14
3219	paddd	.Lsse_inc(%rip),%xmm14
3220	movdqa	%xmm14,%xmm13
3221	paddd	.Lsse_inc(%rip),%xmm13
3222	movdqa	%xmm13,%xmm12
3223	paddd	.Lsse_inc(%rip),%xmm12
3224	movdqa	%xmm12,0+96(%rbp)
3225	movdqa	%xmm13,0+112(%rbp)
3226	movdqa	%xmm14,0+128(%rbp)
3227
3228.Lseal_sse_tail_192_rounds_and_x2hash:
3229	addq	0+0(%rdi),%r10
3230	adcq	8+0(%rdi),%r11
3231	adcq	$1,%r12
3232	movq	0+0+0(%rbp),%rax
3233	movq	%rax,%r15
3234	mulq	%r10
3235	movq	%rax,%r13
3236	movq	%rdx,%r14
3237	movq	0+0+0(%rbp),%rax
3238	mulq	%r11
3239	imulq	%r12,%r15
3240	addq	%rax,%r14
3241	adcq	%rdx,%r15
3242	movq	8+0+0(%rbp),%rax
3243	movq	%rax,%r9
3244	mulq	%r10
3245	addq	%rax,%r14
3246	adcq	$0,%rdx
3247	movq	%rdx,%r10
3248	movq	8+0+0(%rbp),%rax
3249	mulq	%r11
3250	addq	%rax,%r15
3251	adcq	$0,%rdx
3252	imulq	%r12,%r9
3253	addq	%r10,%r15
3254	adcq	%rdx,%r9
3255	movq	%r13,%r10
3256	movq	%r14,%r11
3257	movq	%r15,%r12
3258	andq	$3,%r12
3259	movq	%r15,%r13
3260	andq	$-4,%r13
3261	movq	%r9,%r14
3262	shrdq	$2,%r9,%r15
3263	shrq	$2,%r9
3264	addq	%r13,%r15
3265	adcq	%r14,%r9
3266	addq	%r15,%r10
3267	adcq	%r9,%r11
3268	adcq	$0,%r12
3269
3270	leaq	16(%rdi),%rdi
3271.Lseal_sse_tail_192_rounds_and_x1hash:
3272	paddd	%xmm4,%xmm0
3273	pxor	%xmm0,%xmm12
3274	pshufb	.Lrol16(%rip),%xmm12
3275	paddd	%xmm12,%xmm8
3276	pxor	%xmm8,%xmm4
3277	movdqa	%xmm4,%xmm3
3278	pslld	$12,%xmm3
3279	psrld	$20,%xmm4
3280	pxor	%xmm3,%xmm4
3281	paddd	%xmm4,%xmm0
3282	pxor	%xmm0,%xmm12
3283	pshufb	.Lrol8(%rip),%xmm12
3284	paddd	%xmm12,%xmm8
3285	pxor	%xmm8,%xmm4
3286	movdqa	%xmm4,%xmm3
3287	pslld	$7,%xmm3
3288	psrld	$25,%xmm4
3289	pxor	%xmm3,%xmm4
3290.byte	102,15,58,15,228,4
3291.byte	102,69,15,58,15,192,8
3292.byte	102,69,15,58,15,228,12
3293	paddd	%xmm5,%xmm1
3294	pxor	%xmm1,%xmm13
3295	pshufb	.Lrol16(%rip),%xmm13
3296	paddd	%xmm13,%xmm9
3297	pxor	%xmm9,%xmm5
3298	movdqa	%xmm5,%xmm3
3299	pslld	$12,%xmm3
3300	psrld	$20,%xmm5
3301	pxor	%xmm3,%xmm5
3302	paddd	%xmm5,%xmm1
3303	pxor	%xmm1,%xmm13
3304	pshufb	.Lrol8(%rip),%xmm13
3305	paddd	%xmm13,%xmm9
3306	pxor	%xmm9,%xmm5
3307	movdqa	%xmm5,%xmm3
3308	pslld	$7,%xmm3
3309	psrld	$25,%xmm5
3310	pxor	%xmm3,%xmm5
3311.byte	102,15,58,15,237,4
3312.byte	102,69,15,58,15,201,8
3313.byte	102,69,15,58,15,237,12
3314	paddd	%xmm6,%xmm2
3315	pxor	%xmm2,%xmm14
3316	pshufb	.Lrol16(%rip),%xmm14
3317	paddd	%xmm14,%xmm10
3318	pxor	%xmm10,%xmm6
3319	movdqa	%xmm6,%xmm3
3320	pslld	$12,%xmm3
3321	psrld	$20,%xmm6
3322	pxor	%xmm3,%xmm6
3323	paddd	%xmm6,%xmm2
3324	pxor	%xmm2,%xmm14
3325	pshufb	.Lrol8(%rip),%xmm14
3326	paddd	%xmm14,%xmm10
3327	pxor	%xmm10,%xmm6
3328	movdqa	%xmm6,%xmm3
3329	pslld	$7,%xmm3
3330	psrld	$25,%xmm6
3331	pxor	%xmm3,%xmm6
3332.byte	102,15,58,15,246,4
3333.byte	102,69,15,58,15,210,8
3334.byte	102,69,15,58,15,246,12
3335	addq	0+0(%rdi),%r10
3336	adcq	8+0(%rdi),%r11
3337	adcq	$1,%r12
3338	movq	0+0+0(%rbp),%rax
3339	movq	%rax,%r15
3340	mulq	%r10
3341	movq	%rax,%r13
3342	movq	%rdx,%r14
3343	movq	0+0+0(%rbp),%rax
3344	mulq	%r11
3345	imulq	%r12,%r15
3346	addq	%rax,%r14
3347	adcq	%rdx,%r15
3348	movq	8+0+0(%rbp),%rax
3349	movq	%rax,%r9
3350	mulq	%r10
3351	addq	%rax,%r14
3352	adcq	$0,%rdx
3353	movq	%rdx,%r10
3354	movq	8+0+0(%rbp),%rax
3355	mulq	%r11
3356	addq	%rax,%r15
3357	adcq	$0,%rdx
3358	imulq	%r12,%r9
3359	addq	%r10,%r15
3360	adcq	%rdx,%r9
3361	movq	%r13,%r10
3362	movq	%r14,%r11
3363	movq	%r15,%r12
3364	andq	$3,%r12
3365	movq	%r15,%r13
3366	andq	$-4,%r13
3367	movq	%r9,%r14
3368	shrdq	$2,%r9,%r15
3369	shrq	$2,%r9
3370	addq	%r13,%r15
3371	adcq	%r14,%r9
3372	addq	%r15,%r10
3373	adcq	%r9,%r11
3374	adcq	$0,%r12
3375	paddd	%xmm4,%xmm0
3376	pxor	%xmm0,%xmm12
3377	pshufb	.Lrol16(%rip),%xmm12
3378	paddd	%xmm12,%xmm8
3379	pxor	%xmm8,%xmm4
3380	movdqa	%xmm4,%xmm3
3381	pslld	$12,%xmm3
3382	psrld	$20,%xmm4
3383	pxor	%xmm3,%xmm4
3384	paddd	%xmm4,%xmm0
3385	pxor	%xmm0,%xmm12
3386	pshufb	.Lrol8(%rip),%xmm12
3387	paddd	%xmm12,%xmm8
3388	pxor	%xmm8,%xmm4
3389	movdqa	%xmm4,%xmm3
3390	pslld	$7,%xmm3
3391	psrld	$25,%xmm4
3392	pxor	%xmm3,%xmm4
3393.byte	102,15,58,15,228,12
3394.byte	102,69,15,58,15,192,8
3395.byte	102,69,15,58,15,228,4
3396	paddd	%xmm5,%xmm1
3397	pxor	%xmm1,%xmm13
3398	pshufb	.Lrol16(%rip),%xmm13
3399	paddd	%xmm13,%xmm9
3400	pxor	%xmm9,%xmm5
3401	movdqa	%xmm5,%xmm3
3402	pslld	$12,%xmm3
3403	psrld	$20,%xmm5
3404	pxor	%xmm3,%xmm5
3405	paddd	%xmm5,%xmm1
3406	pxor	%xmm1,%xmm13
3407	pshufb	.Lrol8(%rip),%xmm13
3408	paddd	%xmm13,%xmm9
3409	pxor	%xmm9,%xmm5
3410	movdqa	%xmm5,%xmm3
3411	pslld	$7,%xmm3
3412	psrld	$25,%xmm5
3413	pxor	%xmm3,%xmm5
3414.byte	102,15,58,15,237,12
3415.byte	102,69,15,58,15,201,8
3416.byte	102,69,15,58,15,237,4
3417	paddd	%xmm6,%xmm2
3418	pxor	%xmm2,%xmm14
3419	pshufb	.Lrol16(%rip),%xmm14
3420	paddd	%xmm14,%xmm10
3421	pxor	%xmm10,%xmm6
3422	movdqa	%xmm6,%xmm3
3423	pslld	$12,%xmm3
3424	psrld	$20,%xmm6
3425	pxor	%xmm3,%xmm6
3426	paddd	%xmm6,%xmm2
3427	pxor	%xmm2,%xmm14
3428	pshufb	.Lrol8(%rip),%xmm14
3429	paddd	%xmm14,%xmm10
3430	pxor	%xmm10,%xmm6
3431	movdqa	%xmm6,%xmm3
3432	pslld	$7,%xmm3
3433	psrld	$25,%xmm6
3434	pxor	%xmm3,%xmm6
3435.byte	102,15,58,15,246,12
3436.byte	102,69,15,58,15,210,8
3437.byte	102,69,15,58,15,246,4
3438
3439	leaq	16(%rdi),%rdi
3440	decq	%rcx
3441	jg	.Lseal_sse_tail_192_rounds_and_x2hash
3442	decq	%r8
3443	jge	.Lseal_sse_tail_192_rounds_and_x1hash
3444	paddd	.Lchacha20_consts(%rip),%xmm2
3445	paddd	0+48(%rbp),%xmm6
3446	paddd	0+64(%rbp),%xmm10
3447	paddd	0+128(%rbp),%xmm14
3448	paddd	.Lchacha20_consts(%rip),%xmm1
3449	paddd	0+48(%rbp),%xmm5
3450	paddd	0+64(%rbp),%xmm9
3451	paddd	0+112(%rbp),%xmm13
3452	paddd	.Lchacha20_consts(%rip),%xmm0
3453	paddd	0+48(%rbp),%xmm4
3454	paddd	0+64(%rbp),%xmm8
3455	paddd	0+96(%rbp),%xmm12
3456	movdqu	0 + 0(%rsi),%xmm3
3457	movdqu	16 + 0(%rsi),%xmm7
3458	movdqu	32 + 0(%rsi),%xmm11
3459	movdqu	48 + 0(%rsi),%xmm15
3460	pxor	%xmm3,%xmm2
3461	pxor	%xmm7,%xmm6
3462	pxor	%xmm11,%xmm10
3463	pxor	%xmm14,%xmm15
3464	movdqu	%xmm2,0 + 0(%rdi)
3465	movdqu	%xmm6,16 + 0(%rdi)
3466	movdqu	%xmm10,32 + 0(%rdi)
3467	movdqu	%xmm15,48 + 0(%rdi)
3468	movdqu	0 + 64(%rsi),%xmm3
3469	movdqu	16 + 64(%rsi),%xmm7
3470	movdqu	32 + 64(%rsi),%xmm11
3471	movdqu	48 + 64(%rsi),%xmm15
3472	pxor	%xmm3,%xmm1
3473	pxor	%xmm7,%xmm5
3474	pxor	%xmm11,%xmm9
3475	pxor	%xmm13,%xmm15
3476	movdqu	%xmm1,0 + 64(%rdi)
3477	movdqu	%xmm5,16 + 64(%rdi)
3478	movdqu	%xmm9,32 + 64(%rdi)
3479	movdqu	%xmm15,48 + 64(%rdi)
3480
3481	movq	$128,%rcx
3482	subq	$128,%rbx
3483	leaq	128(%rsi),%rsi
3484
3485.Lseal_sse_128_tail_hash:
3486	cmpq	$16,%rcx
3487	jb	.Lseal_sse_128_tail_xor
3488	addq	0+0(%rdi),%r10
3489	adcq	8+0(%rdi),%r11
3490	adcq	$1,%r12
3491	movq	0+0+0(%rbp),%rax
3492	movq	%rax,%r15
3493	mulq	%r10
3494	movq	%rax,%r13
3495	movq	%rdx,%r14
3496	movq	0+0+0(%rbp),%rax
3497	mulq	%r11
3498	imulq	%r12,%r15
3499	addq	%rax,%r14
3500	adcq	%rdx,%r15
3501	movq	8+0+0(%rbp),%rax
3502	movq	%rax,%r9
3503	mulq	%r10
3504	addq	%rax,%r14
3505	adcq	$0,%rdx
3506	movq	%rdx,%r10
3507	movq	8+0+0(%rbp),%rax
3508	mulq	%r11
3509	addq	%rax,%r15
3510	adcq	$0,%rdx
3511	imulq	%r12,%r9
3512	addq	%r10,%r15
3513	adcq	%rdx,%r9
3514	movq	%r13,%r10
3515	movq	%r14,%r11
3516	movq	%r15,%r12
3517	andq	$3,%r12
3518	movq	%r15,%r13
3519	andq	$-4,%r13
3520	movq	%r9,%r14
3521	shrdq	$2,%r9,%r15
3522	shrq	$2,%r9
3523	addq	%r13,%r15
3524	adcq	%r14,%r9
3525	addq	%r15,%r10
3526	adcq	%r9,%r11
3527	adcq	$0,%r12
3528
3529	subq	$16,%rcx
3530	leaq	16(%rdi),%rdi
3531	jmp	.Lseal_sse_128_tail_hash
3532
3533.Lseal_sse_128_tail_xor:
3534	cmpq	$16,%rbx
3535	jb	.Lseal_sse_tail_16
3536	subq	$16,%rbx
3537
3538	movdqu	0(%rsi),%xmm3
3539	pxor	%xmm3,%xmm0
3540	movdqu	%xmm0,0(%rdi)
3541
3542	addq	0(%rdi),%r10
3543	adcq	8(%rdi),%r11
3544	adcq	$1,%r12
3545	leaq	16(%rsi),%rsi
3546	leaq	16(%rdi),%rdi
3547	movq	0+0+0(%rbp),%rax
3548	movq	%rax,%r15
3549	mulq	%r10
3550	movq	%rax,%r13
3551	movq	%rdx,%r14
3552	movq	0+0+0(%rbp),%rax
3553	mulq	%r11
3554	imulq	%r12,%r15
3555	addq	%rax,%r14
3556	adcq	%rdx,%r15
3557	movq	8+0+0(%rbp),%rax
3558	movq	%rax,%r9
3559	mulq	%r10
3560	addq	%rax,%r14
3561	adcq	$0,%rdx
3562	movq	%rdx,%r10
3563	movq	8+0+0(%rbp),%rax
3564	mulq	%r11
3565	addq	%rax,%r15
3566	adcq	$0,%rdx
3567	imulq	%r12,%r9
3568	addq	%r10,%r15
3569	adcq	%rdx,%r9
3570	movq	%r13,%r10
3571	movq	%r14,%r11
3572	movq	%r15,%r12
3573	andq	$3,%r12
3574	movq	%r15,%r13
3575	andq	$-4,%r13
3576	movq	%r9,%r14
3577	shrdq	$2,%r9,%r15
3578	shrq	$2,%r9
3579	addq	%r13,%r15
3580	adcq	%r14,%r9
3581	addq	%r15,%r10
3582	adcq	%r9,%r11
3583	adcq	$0,%r12
3584
3585
3586	movdqa	%xmm4,%xmm0
3587	movdqa	%xmm8,%xmm4
3588	movdqa	%xmm12,%xmm8
3589	movdqa	%xmm1,%xmm12
3590	movdqa	%xmm5,%xmm1
3591	movdqa	%xmm9,%xmm5
3592	movdqa	%xmm13,%xmm9
3593	jmp	.Lseal_sse_128_tail_xor
3594
3595.Lseal_sse_tail_16:
3596	testq	%rbx,%rbx
3597	jz	.Lprocess_blocks_of_extra_in
3598
3599	movq	%rbx,%r8
3600	movq	%rbx,%rcx
3601	leaq	-1(%rsi,%rbx,1),%rsi
3602	pxor	%xmm15,%xmm15
3603.Lseal_sse_tail_16_compose:
3604	pslldq	$1,%xmm15
3605	pinsrb	$0,(%rsi),%xmm15
3606	leaq	-1(%rsi),%rsi
3607	decq	%rcx
3608	jne	.Lseal_sse_tail_16_compose
3609
3610
3611	pxor	%xmm0,%xmm15
3612
3613
3614	movq	%rbx,%rcx
3615	movdqu	%xmm15,%xmm0
3616.Lseal_sse_tail_16_extract:
3617	pextrb	$0,%xmm0,(%rdi)
3618	psrldq	$1,%xmm0
3619	addq	$1,%rdi
3620	subq	$1,%rcx
3621	jnz	.Lseal_sse_tail_16_extract
3622
3623
3624
3625
3626
3627
3628
3629
3630	movq	288 + 0 + 32(%rsp),%r9
3631	movq	56(%r9),%r14
3632	movq	48(%r9),%r13
3633	testq	%r14,%r14
3634	jz	.Lprocess_partial_block
3635
3636	movq	$16,%r15
3637	subq	%rbx,%r15
3638	cmpq	%r15,%r14
3639
3640	jge	.Lload_extra_in
3641	movq	%r14,%r15
3642
3643.Lload_extra_in:
3644
3645
3646	leaq	-1(%r13,%r15,1),%rsi
3647
3648
3649	addq	%r15,%r13
3650	subq	%r15,%r14
3651	movq	%r13,48(%r9)
3652	movq	%r14,56(%r9)
3653
3654
3655
3656	addq	%r15,%r8
3657
3658
3659	pxor	%xmm11,%xmm11
3660.Lload_extra_load_loop:
3661	pslldq	$1,%xmm11
3662	pinsrb	$0,(%rsi),%xmm11
3663	leaq	-1(%rsi),%rsi
3664	subq	$1,%r15
3665	jnz	.Lload_extra_load_loop
3666
3667
3668
3669
3670	movq	%rbx,%r15
3671
3672.Lload_extra_shift_loop:
3673	pslldq	$1,%xmm11
3674	subq	$1,%r15
3675	jnz	.Lload_extra_shift_loop
3676
3677
3678
3679
3680	leaq	.Land_masks(%rip),%r15
3681	shlq	$4,%rbx
3682	pand	-16(%r15,%rbx,1),%xmm15
3683
3684
3685	por	%xmm11,%xmm15
3686
3687
3688
3689.byte	102,77,15,126,253
3690	pextrq	$1,%xmm15,%r14
3691	addq	%r13,%r10
3692	adcq	%r14,%r11
3693	adcq	$1,%r12
3694	movq	0+0+0(%rbp),%rax
3695	movq	%rax,%r15
3696	mulq	%r10
3697	movq	%rax,%r13
3698	movq	%rdx,%r14
3699	movq	0+0+0(%rbp),%rax
3700	mulq	%r11
3701	imulq	%r12,%r15
3702	addq	%rax,%r14
3703	adcq	%rdx,%r15
3704	movq	8+0+0(%rbp),%rax
3705	movq	%rax,%r9
3706	mulq	%r10
3707	addq	%rax,%r14
3708	adcq	$0,%rdx
3709	movq	%rdx,%r10
3710	movq	8+0+0(%rbp),%rax
3711	mulq	%r11
3712	addq	%rax,%r15
3713	adcq	$0,%rdx
3714	imulq	%r12,%r9
3715	addq	%r10,%r15
3716	adcq	%rdx,%r9
3717	movq	%r13,%r10
3718	movq	%r14,%r11
3719	movq	%r15,%r12
3720	andq	$3,%r12
3721	movq	%r15,%r13
3722	andq	$-4,%r13
3723	movq	%r9,%r14
3724	shrdq	$2,%r9,%r15
3725	shrq	$2,%r9
3726	addq	%r13,%r15
3727	adcq	%r14,%r9
3728	addq	%r15,%r10
3729	adcq	%r9,%r11
3730	adcq	$0,%r12
3731
3732
3733.Lprocess_blocks_of_extra_in:
3734
3735	movq	288+32+0 (%rsp),%r9
3736	movq	48(%r9),%rsi
3737	movq	56(%r9),%r8
3738	movq	%r8,%rcx
3739	shrq	$4,%r8
3740
3741.Lprocess_extra_hash_loop:
3742	jz	process_extra_in_trailer
3743	addq	0+0(%rsi),%r10
3744	adcq	8+0(%rsi),%r11
3745	adcq	$1,%r12
3746	movq	0+0+0(%rbp),%rax
3747	movq	%rax,%r15
3748	mulq	%r10
3749	movq	%rax,%r13
3750	movq	%rdx,%r14
3751	movq	0+0+0(%rbp),%rax
3752	mulq	%r11
3753	imulq	%r12,%r15
3754	addq	%rax,%r14
3755	adcq	%rdx,%r15
3756	movq	8+0+0(%rbp),%rax
3757	movq	%rax,%r9
3758	mulq	%r10
3759	addq	%rax,%r14
3760	adcq	$0,%rdx
3761	movq	%rdx,%r10
3762	movq	8+0+0(%rbp),%rax
3763	mulq	%r11
3764	addq	%rax,%r15
3765	adcq	$0,%rdx
3766	imulq	%r12,%r9
3767	addq	%r10,%r15
3768	adcq	%rdx,%r9
3769	movq	%r13,%r10
3770	movq	%r14,%r11
3771	movq	%r15,%r12
3772	andq	$3,%r12
3773	movq	%r15,%r13
3774	andq	$-4,%r13
3775	movq	%r9,%r14
3776	shrdq	$2,%r9,%r15
3777	shrq	$2,%r9
3778	addq	%r13,%r15
3779	adcq	%r14,%r9
3780	addq	%r15,%r10
3781	adcq	%r9,%r11
3782	adcq	$0,%r12
3783
3784	leaq	16(%rsi),%rsi
3785	subq	$1,%r8
3786	jmp	.Lprocess_extra_hash_loop
3787process_extra_in_trailer:
3788	andq	$15,%rcx
3789	movq	%rcx,%rbx
3790	jz	.Ldo_length_block
3791	leaq	-1(%rsi,%rcx,1),%rsi
3792
3793.Lprocess_extra_in_trailer_load:
3794	pslldq	$1,%xmm15
3795	pinsrb	$0,(%rsi),%xmm15
3796	leaq	-1(%rsi),%rsi
3797	subq	$1,%rcx
3798	jnz	.Lprocess_extra_in_trailer_load
3799
3800.Lprocess_partial_block:
3801
3802	leaq	.Land_masks(%rip),%r15
3803	shlq	$4,%rbx
3804	pand	-16(%r15,%rbx,1),%xmm15
3805.byte	102,77,15,126,253
3806	pextrq	$1,%xmm15,%r14
3807	addq	%r13,%r10
3808	adcq	%r14,%r11
3809	adcq	$1,%r12
3810	movq	0+0+0(%rbp),%rax
3811	movq	%rax,%r15
3812	mulq	%r10
3813	movq	%rax,%r13
3814	movq	%rdx,%r14
3815	movq	0+0+0(%rbp),%rax
3816	mulq	%r11
3817	imulq	%r12,%r15
3818	addq	%rax,%r14
3819	adcq	%rdx,%r15
3820	movq	8+0+0(%rbp),%rax
3821	movq	%rax,%r9
3822	mulq	%r10
3823	addq	%rax,%r14
3824	adcq	$0,%rdx
3825	movq	%rdx,%r10
3826	movq	8+0+0(%rbp),%rax
3827	mulq	%r11
3828	addq	%rax,%r15
3829	adcq	$0,%rdx
3830	imulq	%r12,%r9
3831	addq	%r10,%r15
3832	adcq	%rdx,%r9
3833	movq	%r13,%r10
3834	movq	%r14,%r11
3835	movq	%r15,%r12
3836	andq	$3,%r12
3837	movq	%r15,%r13
3838	andq	$-4,%r13
3839	movq	%r9,%r14
3840	shrdq	$2,%r9,%r15
3841	shrq	$2,%r9
3842	addq	%r13,%r15
3843	adcq	%r14,%r9
3844	addq	%r15,%r10
3845	adcq	%r9,%r11
3846	adcq	$0,%r12
3847
3848
3849.Ldo_length_block:
3850	addq	0+0+32(%rbp),%r10
3851	adcq	8+0+32(%rbp),%r11
3852	adcq	$1,%r12
3853	movq	0+0+0(%rbp),%rax
3854	movq	%rax,%r15
3855	mulq	%r10
3856	movq	%rax,%r13
3857	movq	%rdx,%r14
3858	movq	0+0+0(%rbp),%rax
3859	mulq	%r11
3860	imulq	%r12,%r15
3861	addq	%rax,%r14
3862	adcq	%rdx,%r15
3863	movq	8+0+0(%rbp),%rax
3864	movq	%rax,%r9
3865	mulq	%r10
3866	addq	%rax,%r14
3867	adcq	$0,%rdx
3868	movq	%rdx,%r10
3869	movq	8+0+0(%rbp),%rax
3870	mulq	%r11
3871	addq	%rax,%r15
3872	adcq	$0,%rdx
3873	imulq	%r12,%r9
3874	addq	%r10,%r15
3875	adcq	%rdx,%r9
3876	movq	%r13,%r10
3877	movq	%r14,%r11
3878	movq	%r15,%r12
3879	andq	$3,%r12
3880	movq	%r15,%r13
3881	andq	$-4,%r13
3882	movq	%r9,%r14
3883	shrdq	$2,%r9,%r15
3884	shrq	$2,%r9
3885	addq	%r13,%r15
3886	adcq	%r14,%r9
3887	addq	%r15,%r10
3888	adcq	%r9,%r11
3889	adcq	$0,%r12
3890
3891
3892	movq	%r10,%r13
3893	movq	%r11,%r14
3894	movq	%r12,%r15
3895	subq	$-5,%r10
3896	sbbq	$-1,%r11
3897	sbbq	$3,%r12
3898	cmovcq	%r13,%r10
3899	cmovcq	%r14,%r11
3900	cmovcq	%r15,%r12
3901
3902	addq	0+0+16(%rbp),%r10
3903	adcq	8+0+16(%rbp),%r11
3904
3905.cfi_remember_state
3906	addq	$288 + 0 + 32,%rsp
3907.cfi_adjust_cfa_offset	-(288 + 32)
3908
3909	popq	%r9
3910.cfi_adjust_cfa_offset	-8
3911.cfi_restore	%r9
3912	movq	%r10,(%r9)
3913	movq	%r11,8(%r9)
3914	popq	%r15
3915.cfi_adjust_cfa_offset	-8
3916.cfi_restore	%r15
3917	popq	%r14
3918.cfi_adjust_cfa_offset	-8
3919.cfi_restore	%r14
3920	popq	%r13
3921.cfi_adjust_cfa_offset	-8
3922.cfi_restore	%r13
3923	popq	%r12
3924.cfi_adjust_cfa_offset	-8
3925.cfi_restore	%r12
3926	popq	%rbx
3927.cfi_adjust_cfa_offset	-8
3928.cfi_restore	%rbx
3929	popq	%rbp
3930.cfi_adjust_cfa_offset	-8
3931.cfi_restore	%rbp
3932	.byte	0xf3,0xc3
3933
3934.Lseal_sse_128:
3935.cfi_restore_state
3936	movdqu	.Lchacha20_consts(%rip),%xmm0
3937	movdqa	%xmm0,%xmm1
3938	movdqa	%xmm0,%xmm2
3939	movdqu	0(%r9),%xmm4
3940	movdqa	%xmm4,%xmm5
3941	movdqa	%xmm4,%xmm6
3942	movdqu	16(%r9),%xmm8
3943	movdqa	%xmm8,%xmm9
3944	movdqa	%xmm8,%xmm10
3945	movdqu	32(%r9),%xmm14
3946	movdqa	%xmm14,%xmm12
3947	paddd	.Lsse_inc(%rip),%xmm12
3948	movdqa	%xmm12,%xmm13
3949	paddd	.Lsse_inc(%rip),%xmm13
3950	movdqa	%xmm4,%xmm7
3951	movdqa	%xmm8,%xmm11
3952	movdqa	%xmm12,%xmm15
3953	movq	$10,%r10
3954
3955.Lseal_sse_128_rounds:
3956	paddd	%xmm4,%xmm0
3957	pxor	%xmm0,%xmm12
3958	pshufb	.Lrol16(%rip),%xmm12
3959	paddd	%xmm12,%xmm8
3960	pxor	%xmm8,%xmm4
3961	movdqa	%xmm4,%xmm3
3962	pslld	$12,%xmm3
3963	psrld	$20,%xmm4
3964	pxor	%xmm3,%xmm4
3965	paddd	%xmm4,%xmm0
3966	pxor	%xmm0,%xmm12
3967	pshufb	.Lrol8(%rip),%xmm12
3968	paddd	%xmm12,%xmm8
3969	pxor	%xmm8,%xmm4
3970	movdqa	%xmm4,%xmm3
3971	pslld	$7,%xmm3
3972	psrld	$25,%xmm4
3973	pxor	%xmm3,%xmm4
3974.byte	102,15,58,15,228,4
3975.byte	102,69,15,58,15,192,8
3976.byte	102,69,15,58,15,228,12
3977	paddd	%xmm5,%xmm1
3978	pxor	%xmm1,%xmm13
3979	pshufb	.Lrol16(%rip),%xmm13
3980	paddd	%xmm13,%xmm9
3981	pxor	%xmm9,%xmm5
3982	movdqa	%xmm5,%xmm3
3983	pslld	$12,%xmm3
3984	psrld	$20,%xmm5
3985	pxor	%xmm3,%xmm5
3986	paddd	%xmm5,%xmm1
3987	pxor	%xmm1,%xmm13
3988	pshufb	.Lrol8(%rip),%xmm13
3989	paddd	%xmm13,%xmm9
3990	pxor	%xmm9,%xmm5
3991	movdqa	%xmm5,%xmm3
3992	pslld	$7,%xmm3
3993	psrld	$25,%xmm5
3994	pxor	%xmm3,%xmm5
3995.byte	102,15,58,15,237,4
3996.byte	102,69,15,58,15,201,8
3997.byte	102,69,15,58,15,237,12
3998	paddd	%xmm6,%xmm2
3999	pxor	%xmm2,%xmm14
4000	pshufb	.Lrol16(%rip),%xmm14
4001	paddd	%xmm14,%xmm10
4002	pxor	%xmm10,%xmm6
4003	movdqa	%xmm6,%xmm3
4004	pslld	$12,%xmm3
4005	psrld	$20,%xmm6
4006	pxor	%xmm3,%xmm6
4007	paddd	%xmm6,%xmm2
4008	pxor	%xmm2,%xmm14
4009	pshufb	.Lrol8(%rip),%xmm14
4010	paddd	%xmm14,%xmm10
4011	pxor	%xmm10,%xmm6
4012	movdqa	%xmm6,%xmm3
4013	pslld	$7,%xmm3
4014	psrld	$25,%xmm6
4015	pxor	%xmm3,%xmm6
4016.byte	102,15,58,15,246,4
4017.byte	102,69,15,58,15,210,8
4018.byte	102,69,15,58,15,246,12
4019	paddd	%xmm4,%xmm0
4020	pxor	%xmm0,%xmm12
4021	pshufb	.Lrol16(%rip),%xmm12
4022	paddd	%xmm12,%xmm8
4023	pxor	%xmm8,%xmm4
4024	movdqa	%xmm4,%xmm3
4025	pslld	$12,%xmm3
4026	psrld	$20,%xmm4
4027	pxor	%xmm3,%xmm4
4028	paddd	%xmm4,%xmm0
4029	pxor	%xmm0,%xmm12
4030	pshufb	.Lrol8(%rip),%xmm12
4031	paddd	%xmm12,%xmm8
4032	pxor	%xmm8,%xmm4
4033	movdqa	%xmm4,%xmm3
4034	pslld	$7,%xmm3
4035	psrld	$25,%xmm4
4036	pxor	%xmm3,%xmm4
4037.byte	102,15,58,15,228,12
4038.byte	102,69,15,58,15,192,8
4039.byte	102,69,15,58,15,228,4
4040	paddd	%xmm5,%xmm1
4041	pxor	%xmm1,%xmm13
4042	pshufb	.Lrol16(%rip),%xmm13
4043	paddd	%xmm13,%xmm9
4044	pxor	%xmm9,%xmm5
4045	movdqa	%xmm5,%xmm3
4046	pslld	$12,%xmm3
4047	psrld	$20,%xmm5
4048	pxor	%xmm3,%xmm5
4049	paddd	%xmm5,%xmm1
4050	pxor	%xmm1,%xmm13
4051	pshufb	.Lrol8(%rip),%xmm13
4052	paddd	%xmm13,%xmm9
4053	pxor	%xmm9,%xmm5
4054	movdqa	%xmm5,%xmm3
4055	pslld	$7,%xmm3
4056	psrld	$25,%xmm5
4057	pxor	%xmm3,%xmm5
4058.byte	102,15,58,15,237,12
4059.byte	102,69,15,58,15,201,8
4060.byte	102,69,15,58,15,237,4
4061	paddd	%xmm6,%xmm2
4062	pxor	%xmm2,%xmm14
4063	pshufb	.Lrol16(%rip),%xmm14
4064	paddd	%xmm14,%xmm10
4065	pxor	%xmm10,%xmm6
4066	movdqa	%xmm6,%xmm3
4067	pslld	$12,%xmm3
4068	psrld	$20,%xmm6
4069	pxor	%xmm3,%xmm6
4070	paddd	%xmm6,%xmm2
4071	pxor	%xmm2,%xmm14
4072	pshufb	.Lrol8(%rip),%xmm14
4073	paddd	%xmm14,%xmm10
4074	pxor	%xmm10,%xmm6
4075	movdqa	%xmm6,%xmm3
4076	pslld	$7,%xmm3
4077	psrld	$25,%xmm6
4078	pxor	%xmm3,%xmm6
4079.byte	102,15,58,15,246,12
4080.byte	102,69,15,58,15,210,8
4081.byte	102,69,15,58,15,246,4
4082
4083	decq	%r10
4084	jnz	.Lseal_sse_128_rounds
4085	paddd	.Lchacha20_consts(%rip),%xmm0
4086	paddd	.Lchacha20_consts(%rip),%xmm1
4087	paddd	.Lchacha20_consts(%rip),%xmm2
4088	paddd	%xmm7,%xmm4
4089	paddd	%xmm7,%xmm5
4090	paddd	%xmm7,%xmm6
4091	paddd	%xmm11,%xmm8
4092	paddd	%xmm11,%xmm9
4093	paddd	%xmm15,%xmm12
4094	paddd	.Lsse_inc(%rip),%xmm15
4095	paddd	%xmm15,%xmm13
4096
4097	pand	.Lclamp(%rip),%xmm2
4098	movdqa	%xmm2,0+0(%rbp)
4099	movdqa	%xmm6,0+16(%rbp)
4100
4101	movq	%r8,%r8
4102	call	poly_hash_ad_internal
4103	jmp	.Lseal_sse_128_tail_xor
4104.size	chacha20_poly1305_seal, .-chacha20_poly1305_seal
4105.cfi_endproc
4106
4107
4108.type	chacha20_poly1305_open_avx2,@function
4109.align	64
4110chacha20_poly1305_open_avx2:
4111.cfi_startproc
4112
4113
4114.cfi_adjust_cfa_offset	8
4115.cfi_offset	%rbp,-16
4116.cfi_adjust_cfa_offset	8
4117.cfi_offset	%rbx,-24
4118.cfi_adjust_cfa_offset	8
4119.cfi_offset	%r12,-32
4120.cfi_adjust_cfa_offset	8
4121.cfi_offset	%r13,-40
4122.cfi_adjust_cfa_offset	8
4123.cfi_offset	%r14,-48
4124.cfi_adjust_cfa_offset	8
4125.cfi_offset	%r15,-56
4126.cfi_adjust_cfa_offset	8
4127.cfi_offset	%r9,-64
4128.cfi_adjust_cfa_offset	288 + 32
4129
4130	vzeroupper
4131	vmovdqa	.Lchacha20_consts(%rip),%ymm0
4132	vbroadcasti128	0(%r9),%ymm4
4133	vbroadcasti128	16(%r9),%ymm8
4134	vbroadcasti128	32(%r9),%ymm12
4135	vpaddd	.Lavx2_init(%rip),%ymm12,%ymm12
4136	cmpq	$192,%rbx
4137	jbe	.Lopen_avx2_192
4138	cmpq	$320,%rbx
4139	jbe	.Lopen_avx2_320
4140
4141	vmovdqa	%ymm4,0+64(%rbp)
4142	vmovdqa	%ymm8,0+96(%rbp)
4143	vmovdqa	%ymm12,0+160(%rbp)
4144	movq	$10,%r10
4145.Lopen_avx2_init_rounds:
4146	vpaddd	%ymm4,%ymm0,%ymm0
4147	vpxor	%ymm0,%ymm12,%ymm12
4148	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4149	vpaddd	%ymm12,%ymm8,%ymm8
4150	vpxor	%ymm8,%ymm4,%ymm4
4151	vpsrld	$20,%ymm4,%ymm3
4152	vpslld	$12,%ymm4,%ymm4
4153	vpxor	%ymm3,%ymm4,%ymm4
4154	vpaddd	%ymm4,%ymm0,%ymm0
4155	vpxor	%ymm0,%ymm12,%ymm12
4156	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4157	vpaddd	%ymm12,%ymm8,%ymm8
4158	vpxor	%ymm8,%ymm4,%ymm4
4159	vpslld	$7,%ymm4,%ymm3
4160	vpsrld	$25,%ymm4,%ymm4
4161	vpxor	%ymm3,%ymm4,%ymm4
4162	vpalignr	$12,%ymm12,%ymm12,%ymm12
4163	vpalignr	$8,%ymm8,%ymm8,%ymm8
4164	vpalignr	$4,%ymm4,%ymm4,%ymm4
4165	vpaddd	%ymm4,%ymm0,%ymm0
4166	vpxor	%ymm0,%ymm12,%ymm12
4167	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4168	vpaddd	%ymm12,%ymm8,%ymm8
4169	vpxor	%ymm8,%ymm4,%ymm4
4170	vpsrld	$20,%ymm4,%ymm3
4171	vpslld	$12,%ymm4,%ymm4
4172	vpxor	%ymm3,%ymm4,%ymm4
4173	vpaddd	%ymm4,%ymm0,%ymm0
4174	vpxor	%ymm0,%ymm12,%ymm12
4175	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4176	vpaddd	%ymm12,%ymm8,%ymm8
4177	vpxor	%ymm8,%ymm4,%ymm4
4178	vpslld	$7,%ymm4,%ymm3
4179	vpsrld	$25,%ymm4,%ymm4
4180	vpxor	%ymm3,%ymm4,%ymm4
4181	vpalignr	$4,%ymm12,%ymm12,%ymm12
4182	vpalignr	$8,%ymm8,%ymm8,%ymm8
4183	vpalignr	$12,%ymm4,%ymm4,%ymm4
4184
4185	decq	%r10
4186	jne	.Lopen_avx2_init_rounds
4187	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
4188	vpaddd	0+64(%rbp),%ymm4,%ymm4
4189	vpaddd	0+96(%rbp),%ymm8,%ymm8
4190	vpaddd	0+160(%rbp),%ymm12,%ymm12
4191
4192	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
4193
4194	vpand	.Lclamp(%rip),%ymm3,%ymm3
4195	vmovdqa	%ymm3,0+0(%rbp)
4196
4197	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
4198	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
4199
4200	movq	%r8,%r8
4201	call	poly_hash_ad_internal
4202
4203	xorq	%rcx,%rcx
4204.Lopen_avx2_init_hash:
4205	addq	0+0(%rsi,%rcx,1),%r10
4206	adcq	8+0(%rsi,%rcx,1),%r11
4207	adcq	$1,%r12
4208	movq	0+0+0(%rbp),%rax
4209	movq	%rax,%r15
4210	mulq	%r10
4211	movq	%rax,%r13
4212	movq	%rdx,%r14
4213	movq	0+0+0(%rbp),%rax
4214	mulq	%r11
4215	imulq	%r12,%r15
4216	addq	%rax,%r14
4217	adcq	%rdx,%r15
4218	movq	8+0+0(%rbp),%rax
4219	movq	%rax,%r9
4220	mulq	%r10
4221	addq	%rax,%r14
4222	adcq	$0,%rdx
4223	movq	%rdx,%r10
4224	movq	8+0+0(%rbp),%rax
4225	mulq	%r11
4226	addq	%rax,%r15
4227	adcq	$0,%rdx
4228	imulq	%r12,%r9
4229	addq	%r10,%r15
4230	adcq	%rdx,%r9
4231	movq	%r13,%r10
4232	movq	%r14,%r11
4233	movq	%r15,%r12
4234	andq	$3,%r12
4235	movq	%r15,%r13
4236	andq	$-4,%r13
4237	movq	%r9,%r14
4238	shrdq	$2,%r9,%r15
4239	shrq	$2,%r9
4240	addq	%r13,%r15
4241	adcq	%r14,%r9
4242	addq	%r15,%r10
4243	adcq	%r9,%r11
4244	adcq	$0,%r12
4245
4246	addq	$16,%rcx
4247	cmpq	$64,%rcx
4248	jne	.Lopen_avx2_init_hash
4249
4250	vpxor	0(%rsi),%ymm0,%ymm0
4251	vpxor	32(%rsi),%ymm4,%ymm4
4252
4253	vmovdqu	%ymm0,0(%rdi)
4254	vmovdqu	%ymm4,32(%rdi)
4255	leaq	64(%rsi),%rsi
4256	leaq	64(%rdi),%rdi
4257	subq	$64,%rbx
4258.Lopen_avx2_main_loop:
4259
4260	cmpq	$512,%rbx
4261	jb	.Lopen_avx2_main_loop_done
4262	vmovdqa	.Lchacha20_consts(%rip),%ymm0
4263	vmovdqa	0+64(%rbp),%ymm4
4264	vmovdqa	0+96(%rbp),%ymm8
4265	vmovdqa	%ymm0,%ymm1
4266	vmovdqa	%ymm4,%ymm5
4267	vmovdqa	%ymm8,%ymm9
4268	vmovdqa	%ymm0,%ymm2
4269	vmovdqa	%ymm4,%ymm6
4270	vmovdqa	%ymm8,%ymm10
4271	vmovdqa	%ymm0,%ymm3
4272	vmovdqa	%ymm4,%ymm7
4273	vmovdqa	%ymm8,%ymm11
4274	vmovdqa	.Lavx2_inc(%rip),%ymm12
4275	vpaddd	0+160(%rbp),%ymm12,%ymm15
4276	vpaddd	%ymm15,%ymm12,%ymm14
4277	vpaddd	%ymm14,%ymm12,%ymm13
4278	vpaddd	%ymm13,%ymm12,%ymm12
4279	vmovdqa	%ymm15,0+256(%rbp)
4280	vmovdqa	%ymm14,0+224(%rbp)
4281	vmovdqa	%ymm13,0+192(%rbp)
4282	vmovdqa	%ymm12,0+160(%rbp)
4283
4284	xorq	%rcx,%rcx
4285.Lopen_avx2_main_loop_rounds:
4286	addq	0+0(%rsi,%rcx,1),%r10
4287	adcq	8+0(%rsi,%rcx,1),%r11
4288	adcq	$1,%r12
4289	vmovdqa	%ymm8,0+128(%rbp)
4290	vmovdqa	.Lrol16(%rip),%ymm8
4291	vpaddd	%ymm7,%ymm3,%ymm3
4292	vpaddd	%ymm6,%ymm2,%ymm2
4293	vpaddd	%ymm5,%ymm1,%ymm1
4294	vpaddd	%ymm4,%ymm0,%ymm0
4295	vpxor	%ymm3,%ymm15,%ymm15
4296	vpxor	%ymm2,%ymm14,%ymm14
4297	vpxor	%ymm1,%ymm13,%ymm13
4298	vpxor	%ymm0,%ymm12,%ymm12
4299	movq	0+0+0(%rbp),%rdx
4300	movq	%rdx,%r15
4301	mulxq	%r10,%r13,%r14
4302	mulxq	%r11,%rax,%rdx
4303	imulq	%r12,%r15
4304	addq	%rax,%r14
4305	adcq	%rdx,%r15
4306	vpshufb	%ymm8,%ymm15,%ymm15
4307	vpshufb	%ymm8,%ymm14,%ymm14
4308	vpshufb	%ymm8,%ymm13,%ymm13
4309	vpshufb	%ymm8,%ymm12,%ymm12
4310	vpaddd	%ymm15,%ymm11,%ymm11
4311	vpaddd	%ymm14,%ymm10,%ymm10
4312	vpaddd	%ymm13,%ymm9,%ymm9
4313	vpaddd	0+128(%rbp),%ymm12,%ymm8
4314	vpxor	%ymm11,%ymm7,%ymm7
4315	movq	8+0+0(%rbp),%rdx
4316	mulxq	%r10,%r10,%rax
4317	addq	%r10,%r14
4318	mulxq	%r11,%r11,%r9
4319	adcq	%r11,%r15
4320	adcq	$0,%r9
4321	imulq	%r12,%rdx
4322	vpxor	%ymm10,%ymm6,%ymm6
4323	vpxor	%ymm9,%ymm5,%ymm5
4324	vpxor	%ymm8,%ymm4,%ymm4
4325	vmovdqa	%ymm8,0+128(%rbp)
4326	vpsrld	$20,%ymm7,%ymm8
4327	vpslld	$32-20,%ymm7,%ymm7
4328	vpxor	%ymm8,%ymm7,%ymm7
4329	vpsrld	$20,%ymm6,%ymm8
4330	vpslld	$32-20,%ymm6,%ymm6
4331	vpxor	%ymm8,%ymm6,%ymm6
4332	vpsrld	$20,%ymm5,%ymm8
4333	vpslld	$32-20,%ymm5,%ymm5
4334	addq	%rax,%r15
4335	adcq	%rdx,%r9
4336	vpxor	%ymm8,%ymm5,%ymm5
4337	vpsrld	$20,%ymm4,%ymm8
4338	vpslld	$32-20,%ymm4,%ymm4
4339	vpxor	%ymm8,%ymm4,%ymm4
4340	vmovdqa	.Lrol8(%rip),%ymm8
4341	vpaddd	%ymm7,%ymm3,%ymm3
4342	vpaddd	%ymm6,%ymm2,%ymm2
4343	vpaddd	%ymm5,%ymm1,%ymm1
4344	vpaddd	%ymm4,%ymm0,%ymm0
4345	vpxor	%ymm3,%ymm15,%ymm15
4346	movq	%r13,%r10
4347	movq	%r14,%r11
4348	movq	%r15,%r12
4349	andq	$3,%r12
4350	movq	%r15,%r13
4351	andq	$-4,%r13
4352	movq	%r9,%r14
4353	shrdq	$2,%r9,%r15
4354	shrq	$2,%r9
4355	addq	%r13,%r15
4356	adcq	%r14,%r9
4357	addq	%r15,%r10
4358	adcq	%r9,%r11
4359	adcq	$0,%r12
4360	vpxor	%ymm2,%ymm14,%ymm14
4361	vpxor	%ymm1,%ymm13,%ymm13
4362	vpxor	%ymm0,%ymm12,%ymm12
4363	vpshufb	%ymm8,%ymm15,%ymm15
4364	vpshufb	%ymm8,%ymm14,%ymm14
4365	vpshufb	%ymm8,%ymm13,%ymm13
4366	vpshufb	%ymm8,%ymm12,%ymm12
4367	vpaddd	%ymm15,%ymm11,%ymm11
4368	vpaddd	%ymm14,%ymm10,%ymm10
4369	addq	0+16(%rsi,%rcx,1),%r10
4370	adcq	8+16(%rsi,%rcx,1),%r11
4371	adcq	$1,%r12
4372	vpaddd	%ymm13,%ymm9,%ymm9
4373	vpaddd	0+128(%rbp),%ymm12,%ymm8
4374	vpxor	%ymm11,%ymm7,%ymm7
4375	vpxor	%ymm10,%ymm6,%ymm6
4376	vpxor	%ymm9,%ymm5,%ymm5
4377	vpxor	%ymm8,%ymm4,%ymm4
4378	vmovdqa	%ymm8,0+128(%rbp)
4379	vpsrld	$25,%ymm7,%ymm8
4380	movq	0+0+0(%rbp),%rdx
4381	movq	%rdx,%r15
4382	mulxq	%r10,%r13,%r14
4383	mulxq	%r11,%rax,%rdx
4384	imulq	%r12,%r15
4385	addq	%rax,%r14
4386	adcq	%rdx,%r15
4387	vpslld	$32-25,%ymm7,%ymm7
4388	vpxor	%ymm8,%ymm7,%ymm7
4389	vpsrld	$25,%ymm6,%ymm8
4390	vpslld	$32-25,%ymm6,%ymm6
4391	vpxor	%ymm8,%ymm6,%ymm6
4392	vpsrld	$25,%ymm5,%ymm8
4393	vpslld	$32-25,%ymm5,%ymm5
4394	vpxor	%ymm8,%ymm5,%ymm5
4395	vpsrld	$25,%ymm4,%ymm8
4396	vpslld	$32-25,%ymm4,%ymm4
4397	vpxor	%ymm8,%ymm4,%ymm4
4398	vmovdqa	0+128(%rbp),%ymm8
4399	vpalignr	$4,%ymm7,%ymm7,%ymm7
4400	vpalignr	$8,%ymm11,%ymm11,%ymm11
4401	vpalignr	$12,%ymm15,%ymm15,%ymm15
4402	vpalignr	$4,%ymm6,%ymm6,%ymm6
4403	vpalignr	$8,%ymm10,%ymm10,%ymm10
4404	vpalignr	$12,%ymm14,%ymm14,%ymm14
4405	movq	8+0+0(%rbp),%rdx
4406	mulxq	%r10,%r10,%rax
4407	addq	%r10,%r14
4408	mulxq	%r11,%r11,%r9
4409	adcq	%r11,%r15
4410	adcq	$0,%r9
4411	imulq	%r12,%rdx
4412	vpalignr	$4,%ymm5,%ymm5,%ymm5
4413	vpalignr	$8,%ymm9,%ymm9,%ymm9
4414	vpalignr	$12,%ymm13,%ymm13,%ymm13
4415	vpalignr	$4,%ymm4,%ymm4,%ymm4
4416	vpalignr	$8,%ymm8,%ymm8,%ymm8
4417	vpalignr	$12,%ymm12,%ymm12,%ymm12
4418	vmovdqa	%ymm8,0+128(%rbp)
4419	vmovdqa	.Lrol16(%rip),%ymm8
4420	vpaddd	%ymm7,%ymm3,%ymm3
4421	vpaddd	%ymm6,%ymm2,%ymm2
4422	vpaddd	%ymm5,%ymm1,%ymm1
4423	vpaddd	%ymm4,%ymm0,%ymm0
4424	vpxor	%ymm3,%ymm15,%ymm15
4425	vpxor	%ymm2,%ymm14,%ymm14
4426	vpxor	%ymm1,%ymm13,%ymm13
4427	vpxor	%ymm0,%ymm12,%ymm12
4428	vpshufb	%ymm8,%ymm15,%ymm15
4429	vpshufb	%ymm8,%ymm14,%ymm14
4430	addq	%rax,%r15
4431	adcq	%rdx,%r9
4432	vpshufb	%ymm8,%ymm13,%ymm13
4433	vpshufb	%ymm8,%ymm12,%ymm12
4434	vpaddd	%ymm15,%ymm11,%ymm11
4435	vpaddd	%ymm14,%ymm10,%ymm10
4436	vpaddd	%ymm13,%ymm9,%ymm9
4437	vpaddd	0+128(%rbp),%ymm12,%ymm8
4438	vpxor	%ymm11,%ymm7,%ymm7
4439	vpxor	%ymm10,%ymm6,%ymm6
4440	vpxor	%ymm9,%ymm5,%ymm5
4441	movq	%r13,%r10
4442	movq	%r14,%r11
4443	movq	%r15,%r12
4444	andq	$3,%r12
4445	movq	%r15,%r13
4446	andq	$-4,%r13
4447	movq	%r9,%r14
4448	shrdq	$2,%r9,%r15
4449	shrq	$2,%r9
4450	addq	%r13,%r15
4451	adcq	%r14,%r9
4452	addq	%r15,%r10
4453	adcq	%r9,%r11
4454	adcq	$0,%r12
4455	vpxor	%ymm8,%ymm4,%ymm4
4456	vmovdqa	%ymm8,0+128(%rbp)
4457	vpsrld	$20,%ymm7,%ymm8
4458	vpslld	$32-20,%ymm7,%ymm7
4459	vpxor	%ymm8,%ymm7,%ymm7
4460	vpsrld	$20,%ymm6,%ymm8
4461	vpslld	$32-20,%ymm6,%ymm6
4462	vpxor	%ymm8,%ymm6,%ymm6
4463	addq	0+32(%rsi,%rcx,1),%r10
4464	adcq	8+32(%rsi,%rcx,1),%r11
4465	adcq	$1,%r12
4466
4467	leaq	48(%rcx),%rcx
4468	vpsrld	$20,%ymm5,%ymm8
4469	vpslld	$32-20,%ymm5,%ymm5
4470	vpxor	%ymm8,%ymm5,%ymm5
4471	vpsrld	$20,%ymm4,%ymm8
4472	vpslld	$32-20,%ymm4,%ymm4
4473	vpxor	%ymm8,%ymm4,%ymm4
4474	vmovdqa	.Lrol8(%rip),%ymm8
4475	vpaddd	%ymm7,%ymm3,%ymm3
4476	vpaddd	%ymm6,%ymm2,%ymm2
4477	vpaddd	%ymm5,%ymm1,%ymm1
4478	vpaddd	%ymm4,%ymm0,%ymm0
4479	vpxor	%ymm3,%ymm15,%ymm15
4480	vpxor	%ymm2,%ymm14,%ymm14
4481	vpxor	%ymm1,%ymm13,%ymm13
4482	vpxor	%ymm0,%ymm12,%ymm12
4483	vpshufb	%ymm8,%ymm15,%ymm15
4484	vpshufb	%ymm8,%ymm14,%ymm14
4485	vpshufb	%ymm8,%ymm13,%ymm13
4486	movq	0+0+0(%rbp),%rdx
4487	movq	%rdx,%r15
4488	mulxq	%r10,%r13,%r14
4489	mulxq	%r11,%rax,%rdx
4490	imulq	%r12,%r15
4491	addq	%rax,%r14
4492	adcq	%rdx,%r15
4493	vpshufb	%ymm8,%ymm12,%ymm12
4494	vpaddd	%ymm15,%ymm11,%ymm11
4495	vpaddd	%ymm14,%ymm10,%ymm10
4496	vpaddd	%ymm13,%ymm9,%ymm9
4497	vpaddd	0+128(%rbp),%ymm12,%ymm8
4498	vpxor	%ymm11,%ymm7,%ymm7
4499	vpxor	%ymm10,%ymm6,%ymm6
4500	vpxor	%ymm9,%ymm5,%ymm5
4501	movq	8+0+0(%rbp),%rdx
4502	mulxq	%r10,%r10,%rax
4503	addq	%r10,%r14
4504	mulxq	%r11,%r11,%r9
4505	adcq	%r11,%r15
4506	adcq	$0,%r9
4507	imulq	%r12,%rdx
4508	vpxor	%ymm8,%ymm4,%ymm4
4509	vmovdqa	%ymm8,0+128(%rbp)
4510	vpsrld	$25,%ymm7,%ymm8
4511	vpslld	$32-25,%ymm7,%ymm7
4512	vpxor	%ymm8,%ymm7,%ymm7
4513	vpsrld	$25,%ymm6,%ymm8
4514	vpslld	$32-25,%ymm6,%ymm6
4515	vpxor	%ymm8,%ymm6,%ymm6
4516	addq	%rax,%r15
4517	adcq	%rdx,%r9
4518	vpsrld	$25,%ymm5,%ymm8
4519	vpslld	$32-25,%ymm5,%ymm5
4520	vpxor	%ymm8,%ymm5,%ymm5
4521	vpsrld	$25,%ymm4,%ymm8
4522	vpslld	$32-25,%ymm4,%ymm4
4523	vpxor	%ymm8,%ymm4,%ymm4
4524	vmovdqa	0+128(%rbp),%ymm8
4525	vpalignr	$12,%ymm7,%ymm7,%ymm7
4526	vpalignr	$8,%ymm11,%ymm11,%ymm11
4527	vpalignr	$4,%ymm15,%ymm15,%ymm15
4528	vpalignr	$12,%ymm6,%ymm6,%ymm6
4529	vpalignr	$8,%ymm10,%ymm10,%ymm10
4530	vpalignr	$4,%ymm14,%ymm14,%ymm14
4531	vpalignr	$12,%ymm5,%ymm5,%ymm5
4532	vpalignr	$8,%ymm9,%ymm9,%ymm9
4533	vpalignr	$4,%ymm13,%ymm13,%ymm13
4534	vpalignr	$12,%ymm4,%ymm4,%ymm4
4535	vpalignr	$8,%ymm8,%ymm8,%ymm8
4536	movq	%r13,%r10
4537	movq	%r14,%r11
4538	movq	%r15,%r12
4539	andq	$3,%r12
4540	movq	%r15,%r13
4541	andq	$-4,%r13
4542	movq	%r9,%r14
4543	shrdq	$2,%r9,%r15
4544	shrq	$2,%r9
4545	addq	%r13,%r15
4546	adcq	%r14,%r9
4547	addq	%r15,%r10
4548	adcq	%r9,%r11
4549	adcq	$0,%r12
4550	vpalignr	$4,%ymm12,%ymm12,%ymm12
4551
4552	cmpq	$60*8,%rcx
4553	jne	.Lopen_avx2_main_loop_rounds
4554	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
4555	vpaddd	0+64(%rbp),%ymm7,%ymm7
4556	vpaddd	0+96(%rbp),%ymm11,%ymm11
4557	vpaddd	0+256(%rbp),%ymm15,%ymm15
4558	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
4559	vpaddd	0+64(%rbp),%ymm6,%ymm6
4560	vpaddd	0+96(%rbp),%ymm10,%ymm10
4561	vpaddd	0+224(%rbp),%ymm14,%ymm14
4562	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
4563	vpaddd	0+64(%rbp),%ymm5,%ymm5
4564	vpaddd	0+96(%rbp),%ymm9,%ymm9
4565	vpaddd	0+192(%rbp),%ymm13,%ymm13
4566	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
4567	vpaddd	0+64(%rbp),%ymm4,%ymm4
4568	vpaddd	0+96(%rbp),%ymm8,%ymm8
4569	vpaddd	0+160(%rbp),%ymm12,%ymm12
4570
4571	vmovdqa	%ymm0,0+128(%rbp)
4572	addq	0+60*8(%rsi),%r10
4573	adcq	8+60*8(%rsi),%r11
4574	adcq	$1,%r12
4575	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
4576	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
4577	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
4578	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
4579	vpxor	0+0(%rsi),%ymm0,%ymm0
4580	vpxor	32+0(%rsi),%ymm3,%ymm3
4581	vpxor	64+0(%rsi),%ymm7,%ymm7
4582	vpxor	96+0(%rsi),%ymm11,%ymm11
4583	vmovdqu	%ymm0,0+0(%rdi)
4584	vmovdqu	%ymm3,32+0(%rdi)
4585	vmovdqu	%ymm7,64+0(%rdi)
4586	vmovdqu	%ymm11,96+0(%rdi)
4587
4588	vmovdqa	0+128(%rbp),%ymm0
4589	movq	0+0+0(%rbp),%rax
4590	movq	%rax,%r15
4591	mulq	%r10
4592	movq	%rax,%r13
4593	movq	%rdx,%r14
4594	movq	0+0+0(%rbp),%rax
4595	mulq	%r11
4596	imulq	%r12,%r15
4597	addq	%rax,%r14
4598	adcq	%rdx,%r15
4599	movq	8+0+0(%rbp),%rax
4600	movq	%rax,%r9
4601	mulq	%r10
4602	addq	%rax,%r14
4603	adcq	$0,%rdx
4604	movq	%rdx,%r10
4605	movq	8+0+0(%rbp),%rax
4606	mulq	%r11
4607	addq	%rax,%r15
4608	adcq	$0,%rdx
4609	imulq	%r12,%r9
4610	addq	%r10,%r15
4611	adcq	%rdx,%r9
4612	movq	%r13,%r10
4613	movq	%r14,%r11
4614	movq	%r15,%r12
4615	andq	$3,%r12
4616	movq	%r15,%r13
4617	andq	$-4,%r13
4618	movq	%r9,%r14
4619	shrdq	$2,%r9,%r15
4620	shrq	$2,%r9
4621	addq	%r13,%r15
4622	adcq	%r14,%r9
4623	addq	%r15,%r10
4624	adcq	%r9,%r11
4625	adcq	$0,%r12
4626	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
4627	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
4628	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
4629	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
4630	vpxor	0+128(%rsi),%ymm3,%ymm3
4631	vpxor	32+128(%rsi),%ymm2,%ymm2
4632	vpxor	64+128(%rsi),%ymm6,%ymm6
4633	vpxor	96+128(%rsi),%ymm10,%ymm10
4634	vmovdqu	%ymm3,0+128(%rdi)
4635	vmovdqu	%ymm2,32+128(%rdi)
4636	vmovdqu	%ymm6,64+128(%rdi)
4637	vmovdqu	%ymm10,96+128(%rdi)
4638	addq	0+60*8+16(%rsi),%r10
4639	adcq	8+60*8+16(%rsi),%r11
4640	adcq	$1,%r12
4641	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
4642	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
4643	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
4644	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
4645	vpxor	0+256(%rsi),%ymm3,%ymm3
4646	vpxor	32+256(%rsi),%ymm1,%ymm1
4647	vpxor	64+256(%rsi),%ymm5,%ymm5
4648	vpxor	96+256(%rsi),%ymm9,%ymm9
4649	vmovdqu	%ymm3,0+256(%rdi)
4650	vmovdqu	%ymm1,32+256(%rdi)
4651	vmovdqu	%ymm5,64+256(%rdi)
4652	vmovdqu	%ymm9,96+256(%rdi)
4653	movq	0+0+0(%rbp),%rax
4654	movq	%rax,%r15
4655	mulq	%r10
4656	movq	%rax,%r13
4657	movq	%rdx,%r14
4658	movq	0+0+0(%rbp),%rax
4659	mulq	%r11
4660	imulq	%r12,%r15
4661	addq	%rax,%r14
4662	adcq	%rdx,%r15
4663	movq	8+0+0(%rbp),%rax
4664	movq	%rax,%r9
4665	mulq	%r10
4666	addq	%rax,%r14
4667	adcq	$0,%rdx
4668	movq	%rdx,%r10
4669	movq	8+0+0(%rbp),%rax
4670	mulq	%r11
4671	addq	%rax,%r15
4672	adcq	$0,%rdx
4673	imulq	%r12,%r9
4674	addq	%r10,%r15
4675	adcq	%rdx,%r9
4676	movq	%r13,%r10
4677	movq	%r14,%r11
4678	movq	%r15,%r12
4679	andq	$3,%r12
4680	movq	%r15,%r13
4681	andq	$-4,%r13
4682	movq	%r9,%r14
4683	shrdq	$2,%r9,%r15
4684	shrq	$2,%r9
4685	addq	%r13,%r15
4686	adcq	%r14,%r9
4687	addq	%r15,%r10
4688	adcq	%r9,%r11
4689	adcq	$0,%r12
4690	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
4691	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
4692	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
4693	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
4694	vpxor	0+384(%rsi),%ymm3,%ymm3
4695	vpxor	32+384(%rsi),%ymm0,%ymm0
4696	vpxor	64+384(%rsi),%ymm4,%ymm4
4697	vpxor	96+384(%rsi),%ymm8,%ymm8
4698	vmovdqu	%ymm3,0+384(%rdi)
4699	vmovdqu	%ymm0,32+384(%rdi)
4700	vmovdqu	%ymm4,64+384(%rdi)
4701	vmovdqu	%ymm8,96+384(%rdi)
4702
4703	leaq	512(%rsi),%rsi
4704	leaq	512(%rdi),%rdi
4705	subq	$512,%rbx
4706	jmp	.Lopen_avx2_main_loop
4707.Lopen_avx2_main_loop_done:
4708	testq	%rbx,%rbx
4709	vzeroupper
4710	je	.Lopen_sse_finalize
4711
4712	cmpq	$384,%rbx
4713	ja	.Lopen_avx2_tail_512
4714	cmpq	$256,%rbx
4715	ja	.Lopen_avx2_tail_384
4716	cmpq	$128,%rbx
4717	ja	.Lopen_avx2_tail_256
4718	vmovdqa	.Lchacha20_consts(%rip),%ymm0
4719	vmovdqa	0+64(%rbp),%ymm4
4720	vmovdqa	0+96(%rbp),%ymm8
4721	vmovdqa	.Lavx2_inc(%rip),%ymm12
4722	vpaddd	0+160(%rbp),%ymm12,%ymm12
4723	vmovdqa	%ymm12,0+160(%rbp)
4724
4725	xorq	%r8,%r8
4726	movq	%rbx,%rcx
4727	andq	$-16,%rcx
4728	testq	%rcx,%rcx
4729	je	.Lopen_avx2_tail_128_rounds
4730.Lopen_avx2_tail_128_rounds_and_x1hash:
4731	addq	0+0(%rsi,%r8,1),%r10
4732	adcq	8+0(%rsi,%r8,1),%r11
4733	adcq	$1,%r12
4734	movq	0+0+0(%rbp),%rax
4735	movq	%rax,%r15
4736	mulq	%r10
4737	movq	%rax,%r13
4738	movq	%rdx,%r14
4739	movq	0+0+0(%rbp),%rax
4740	mulq	%r11
4741	imulq	%r12,%r15
4742	addq	%rax,%r14
4743	adcq	%rdx,%r15
4744	movq	8+0+0(%rbp),%rax
4745	movq	%rax,%r9
4746	mulq	%r10
4747	addq	%rax,%r14
4748	adcq	$0,%rdx
4749	movq	%rdx,%r10
4750	movq	8+0+0(%rbp),%rax
4751	mulq	%r11
4752	addq	%rax,%r15
4753	adcq	$0,%rdx
4754	imulq	%r12,%r9
4755	addq	%r10,%r15
4756	adcq	%rdx,%r9
4757	movq	%r13,%r10
4758	movq	%r14,%r11
4759	movq	%r15,%r12
4760	andq	$3,%r12
4761	movq	%r15,%r13
4762	andq	$-4,%r13
4763	movq	%r9,%r14
4764	shrdq	$2,%r9,%r15
4765	shrq	$2,%r9
4766	addq	%r13,%r15
4767	adcq	%r14,%r9
4768	addq	%r15,%r10
4769	adcq	%r9,%r11
4770	adcq	$0,%r12
4771
4772.Lopen_avx2_tail_128_rounds:
4773	addq	$16,%r8
4774	vpaddd	%ymm4,%ymm0,%ymm0
4775	vpxor	%ymm0,%ymm12,%ymm12
4776	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4777	vpaddd	%ymm12,%ymm8,%ymm8
4778	vpxor	%ymm8,%ymm4,%ymm4
4779	vpsrld	$20,%ymm4,%ymm3
4780	vpslld	$12,%ymm4,%ymm4
4781	vpxor	%ymm3,%ymm4,%ymm4
4782	vpaddd	%ymm4,%ymm0,%ymm0
4783	vpxor	%ymm0,%ymm12,%ymm12
4784	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4785	vpaddd	%ymm12,%ymm8,%ymm8
4786	vpxor	%ymm8,%ymm4,%ymm4
4787	vpslld	$7,%ymm4,%ymm3
4788	vpsrld	$25,%ymm4,%ymm4
4789	vpxor	%ymm3,%ymm4,%ymm4
4790	vpalignr	$12,%ymm12,%ymm12,%ymm12
4791	vpalignr	$8,%ymm8,%ymm8,%ymm8
4792	vpalignr	$4,%ymm4,%ymm4,%ymm4
4793	vpaddd	%ymm4,%ymm0,%ymm0
4794	vpxor	%ymm0,%ymm12,%ymm12
4795	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4796	vpaddd	%ymm12,%ymm8,%ymm8
4797	vpxor	%ymm8,%ymm4,%ymm4
4798	vpsrld	$20,%ymm4,%ymm3
4799	vpslld	$12,%ymm4,%ymm4
4800	vpxor	%ymm3,%ymm4,%ymm4
4801	vpaddd	%ymm4,%ymm0,%ymm0
4802	vpxor	%ymm0,%ymm12,%ymm12
4803	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4804	vpaddd	%ymm12,%ymm8,%ymm8
4805	vpxor	%ymm8,%ymm4,%ymm4
4806	vpslld	$7,%ymm4,%ymm3
4807	vpsrld	$25,%ymm4,%ymm4
4808	vpxor	%ymm3,%ymm4,%ymm4
4809	vpalignr	$4,%ymm12,%ymm12,%ymm12
4810	vpalignr	$8,%ymm8,%ymm8,%ymm8
4811	vpalignr	$12,%ymm4,%ymm4,%ymm4
4812
4813	cmpq	%rcx,%r8
4814	jb	.Lopen_avx2_tail_128_rounds_and_x1hash
4815	cmpq	$160,%r8
4816	jne	.Lopen_avx2_tail_128_rounds
4817	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
4818	vpaddd	0+64(%rbp),%ymm4,%ymm4
4819	vpaddd	0+96(%rbp),%ymm8,%ymm8
4820	vpaddd	0+160(%rbp),%ymm12,%ymm12
4821	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
4822	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
4823	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
4824	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
4825	vmovdqa	%ymm3,%ymm8
4826
4827	jmp	.Lopen_avx2_tail_128_xor
4828
4829.Lopen_avx2_tail_256:
4830	vmovdqa	.Lchacha20_consts(%rip),%ymm0
4831	vmovdqa	0+64(%rbp),%ymm4
4832	vmovdqa	0+96(%rbp),%ymm8
4833	vmovdqa	%ymm0,%ymm1
4834	vmovdqa	%ymm4,%ymm5
4835	vmovdqa	%ymm8,%ymm9
4836	vmovdqa	.Lavx2_inc(%rip),%ymm12
4837	vpaddd	0+160(%rbp),%ymm12,%ymm13
4838	vpaddd	%ymm13,%ymm12,%ymm12
4839	vmovdqa	%ymm12,0+160(%rbp)
4840	vmovdqa	%ymm13,0+192(%rbp)
4841
4842	movq	%rbx,0+128(%rbp)
4843	movq	%rbx,%rcx
4844	subq	$128,%rcx
4845	shrq	$4,%rcx
4846	movq	$10,%r8
4847	cmpq	$10,%rcx
4848	cmovgq	%r8,%rcx
4849	movq	%rsi,%rbx
4850	xorq	%r8,%r8
4851.Lopen_avx2_tail_256_rounds_and_x1hash:
4852	addq	0+0(%rbx),%r10
4853	adcq	8+0(%rbx),%r11
4854	adcq	$1,%r12
4855	movq	0+0+0(%rbp),%rdx
4856	movq	%rdx,%r15
4857	mulxq	%r10,%r13,%r14
4858	mulxq	%r11,%rax,%rdx
4859	imulq	%r12,%r15
4860	addq	%rax,%r14
4861	adcq	%rdx,%r15
4862	movq	8+0+0(%rbp),%rdx
4863	mulxq	%r10,%r10,%rax
4864	addq	%r10,%r14
4865	mulxq	%r11,%r11,%r9
4866	adcq	%r11,%r15
4867	adcq	$0,%r9
4868	imulq	%r12,%rdx
4869	addq	%rax,%r15
4870	adcq	%rdx,%r9
4871	movq	%r13,%r10
4872	movq	%r14,%r11
4873	movq	%r15,%r12
4874	andq	$3,%r12
4875	movq	%r15,%r13
4876	andq	$-4,%r13
4877	movq	%r9,%r14
4878	shrdq	$2,%r9,%r15
4879	shrq	$2,%r9
4880	addq	%r13,%r15
4881	adcq	%r14,%r9
4882	addq	%r15,%r10
4883	adcq	%r9,%r11
4884	adcq	$0,%r12
4885
4886	leaq	16(%rbx),%rbx
4887.Lopen_avx2_tail_256_rounds:
4888	vpaddd	%ymm4,%ymm0,%ymm0
4889	vpxor	%ymm0,%ymm12,%ymm12
4890	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4891	vpaddd	%ymm12,%ymm8,%ymm8
4892	vpxor	%ymm8,%ymm4,%ymm4
4893	vpsrld	$20,%ymm4,%ymm3
4894	vpslld	$12,%ymm4,%ymm4
4895	vpxor	%ymm3,%ymm4,%ymm4
4896	vpaddd	%ymm4,%ymm0,%ymm0
4897	vpxor	%ymm0,%ymm12,%ymm12
4898	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4899	vpaddd	%ymm12,%ymm8,%ymm8
4900	vpxor	%ymm8,%ymm4,%ymm4
4901	vpslld	$7,%ymm4,%ymm3
4902	vpsrld	$25,%ymm4,%ymm4
4903	vpxor	%ymm3,%ymm4,%ymm4
4904	vpalignr	$12,%ymm12,%ymm12,%ymm12
4905	vpalignr	$8,%ymm8,%ymm8,%ymm8
4906	vpalignr	$4,%ymm4,%ymm4,%ymm4
4907	vpaddd	%ymm5,%ymm1,%ymm1
4908	vpxor	%ymm1,%ymm13,%ymm13
4909	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
4910	vpaddd	%ymm13,%ymm9,%ymm9
4911	vpxor	%ymm9,%ymm5,%ymm5
4912	vpsrld	$20,%ymm5,%ymm3
4913	vpslld	$12,%ymm5,%ymm5
4914	vpxor	%ymm3,%ymm5,%ymm5
4915	vpaddd	%ymm5,%ymm1,%ymm1
4916	vpxor	%ymm1,%ymm13,%ymm13
4917	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
4918	vpaddd	%ymm13,%ymm9,%ymm9
4919	vpxor	%ymm9,%ymm5,%ymm5
4920	vpslld	$7,%ymm5,%ymm3
4921	vpsrld	$25,%ymm5,%ymm5
4922	vpxor	%ymm3,%ymm5,%ymm5
4923	vpalignr	$12,%ymm13,%ymm13,%ymm13
4924	vpalignr	$8,%ymm9,%ymm9,%ymm9
4925	vpalignr	$4,%ymm5,%ymm5,%ymm5
4926
4927	incq	%r8
4928	vpaddd	%ymm4,%ymm0,%ymm0
4929	vpxor	%ymm0,%ymm12,%ymm12
4930	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4931	vpaddd	%ymm12,%ymm8,%ymm8
4932	vpxor	%ymm8,%ymm4,%ymm4
4933	vpsrld	$20,%ymm4,%ymm3
4934	vpslld	$12,%ymm4,%ymm4
4935	vpxor	%ymm3,%ymm4,%ymm4
4936	vpaddd	%ymm4,%ymm0,%ymm0
4937	vpxor	%ymm0,%ymm12,%ymm12
4938	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4939	vpaddd	%ymm12,%ymm8,%ymm8
4940	vpxor	%ymm8,%ymm4,%ymm4
4941	vpslld	$7,%ymm4,%ymm3
4942	vpsrld	$25,%ymm4,%ymm4
4943	vpxor	%ymm3,%ymm4,%ymm4
4944	vpalignr	$4,%ymm12,%ymm12,%ymm12
4945	vpalignr	$8,%ymm8,%ymm8,%ymm8
4946	vpalignr	$12,%ymm4,%ymm4,%ymm4
4947	vpaddd	%ymm5,%ymm1,%ymm1
4948	vpxor	%ymm1,%ymm13,%ymm13
4949	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
4950	vpaddd	%ymm13,%ymm9,%ymm9
4951	vpxor	%ymm9,%ymm5,%ymm5
4952	vpsrld	$20,%ymm5,%ymm3
4953	vpslld	$12,%ymm5,%ymm5
4954	vpxor	%ymm3,%ymm5,%ymm5
4955	vpaddd	%ymm5,%ymm1,%ymm1
4956	vpxor	%ymm1,%ymm13,%ymm13
4957	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
4958	vpaddd	%ymm13,%ymm9,%ymm9
4959	vpxor	%ymm9,%ymm5,%ymm5
4960	vpslld	$7,%ymm5,%ymm3
4961	vpsrld	$25,%ymm5,%ymm5
4962	vpxor	%ymm3,%ymm5,%ymm5
4963	vpalignr	$4,%ymm13,%ymm13,%ymm13
4964	vpalignr	$8,%ymm9,%ymm9,%ymm9
4965	vpalignr	$12,%ymm5,%ymm5,%ymm5
4966	vpaddd	%ymm6,%ymm2,%ymm2
4967	vpxor	%ymm2,%ymm14,%ymm14
4968	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
4969	vpaddd	%ymm14,%ymm10,%ymm10
4970	vpxor	%ymm10,%ymm6,%ymm6
4971	vpsrld	$20,%ymm6,%ymm3
4972	vpslld	$12,%ymm6,%ymm6
4973	vpxor	%ymm3,%ymm6,%ymm6
4974	vpaddd	%ymm6,%ymm2,%ymm2
4975	vpxor	%ymm2,%ymm14,%ymm14
4976	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
4977	vpaddd	%ymm14,%ymm10,%ymm10
4978	vpxor	%ymm10,%ymm6,%ymm6
4979	vpslld	$7,%ymm6,%ymm3
4980	vpsrld	$25,%ymm6,%ymm6
4981	vpxor	%ymm3,%ymm6,%ymm6
4982	vpalignr	$4,%ymm14,%ymm14,%ymm14
4983	vpalignr	$8,%ymm10,%ymm10,%ymm10
4984	vpalignr	$12,%ymm6,%ymm6,%ymm6
4985
4986	cmpq	%rcx,%r8
4987	jb	.Lopen_avx2_tail_256_rounds_and_x1hash
4988	cmpq	$10,%r8
4989	jne	.Lopen_avx2_tail_256_rounds
4990	movq	%rbx,%r8
4991	subq	%rsi,%rbx
4992	movq	%rbx,%rcx
4993	movq	0+128(%rbp),%rbx
4994.Lopen_avx2_tail_256_hash:
4995	addq	$16,%rcx
4996	cmpq	%rbx,%rcx
4997	jg	.Lopen_avx2_tail_256_done
4998	addq	0+0(%r8),%r10
4999	adcq	8+0(%r8),%r11
5000	adcq	$1,%r12
5001	movq	0+0+0(%rbp),%rdx
5002	movq	%rdx,%r15
5003	mulxq	%r10,%r13,%r14
5004	mulxq	%r11,%rax,%rdx
5005	imulq	%r12,%r15
5006	addq	%rax,%r14
5007	adcq	%rdx,%r15
5008	movq	8+0+0(%rbp),%rdx
5009	mulxq	%r10,%r10,%rax
5010	addq	%r10,%r14
5011	mulxq	%r11,%r11,%r9
5012	adcq	%r11,%r15
5013	adcq	$0,%r9
5014	imulq	%r12,%rdx
5015	addq	%rax,%r15
5016	adcq	%rdx,%r9
5017	movq	%r13,%r10
5018	movq	%r14,%r11
5019	movq	%r15,%r12
5020	andq	$3,%r12
5021	movq	%r15,%r13
5022	andq	$-4,%r13
5023	movq	%r9,%r14
5024	shrdq	$2,%r9,%r15
5025	shrq	$2,%r9
5026	addq	%r13,%r15
5027	adcq	%r14,%r9
5028	addq	%r15,%r10
5029	adcq	%r9,%r11
5030	adcq	$0,%r12
5031
5032	leaq	16(%r8),%r8
5033	jmp	.Lopen_avx2_tail_256_hash
5034.Lopen_avx2_tail_256_done:
5035	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
5036	vpaddd	0+64(%rbp),%ymm5,%ymm5
5037	vpaddd	0+96(%rbp),%ymm9,%ymm9
5038	vpaddd	0+192(%rbp),%ymm13,%ymm13
5039	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
5040	vpaddd	0+64(%rbp),%ymm4,%ymm4
5041	vpaddd	0+96(%rbp),%ymm8,%ymm8
5042	vpaddd	0+160(%rbp),%ymm12,%ymm12
5043	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5044	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5045	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5046	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5047	vpxor	0+0(%rsi),%ymm3,%ymm3
5048	vpxor	32+0(%rsi),%ymm1,%ymm1
5049	vpxor	64+0(%rsi),%ymm5,%ymm5
5050	vpxor	96+0(%rsi),%ymm9,%ymm9
5051	vmovdqu	%ymm3,0+0(%rdi)
5052	vmovdqu	%ymm1,32+0(%rdi)
5053	vmovdqu	%ymm5,64+0(%rdi)
5054	vmovdqu	%ymm9,96+0(%rdi)
5055	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5056	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5057	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5058	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5059	vmovdqa	%ymm3,%ymm8
5060
5061	leaq	128(%rsi),%rsi
5062	leaq	128(%rdi),%rdi
5063	subq	$128,%rbx
5064	jmp	.Lopen_avx2_tail_128_xor
5065
5066.Lopen_avx2_tail_384:
5067	vmovdqa	.Lchacha20_consts(%rip),%ymm0
5068	vmovdqa	0+64(%rbp),%ymm4
5069	vmovdqa	0+96(%rbp),%ymm8
5070	vmovdqa	%ymm0,%ymm1
5071	vmovdqa	%ymm4,%ymm5
5072	vmovdqa	%ymm8,%ymm9
5073	vmovdqa	%ymm0,%ymm2
5074	vmovdqa	%ymm4,%ymm6
5075	vmovdqa	%ymm8,%ymm10
5076	vmovdqa	.Lavx2_inc(%rip),%ymm12
5077	vpaddd	0+160(%rbp),%ymm12,%ymm14
5078	vpaddd	%ymm14,%ymm12,%ymm13
5079	vpaddd	%ymm13,%ymm12,%ymm12
5080	vmovdqa	%ymm12,0+160(%rbp)
5081	vmovdqa	%ymm13,0+192(%rbp)
5082	vmovdqa	%ymm14,0+224(%rbp)
5083
5084	movq	%rbx,0+128(%rbp)
5085	movq	%rbx,%rcx
5086	subq	$256,%rcx
5087	shrq	$4,%rcx
5088	addq	$6,%rcx
5089	movq	$10,%r8
5090	cmpq	$10,%rcx
5091	cmovgq	%r8,%rcx
5092	movq	%rsi,%rbx
5093	xorq	%r8,%r8
5094.Lopen_avx2_tail_384_rounds_and_x2hash:
5095	addq	0+0(%rbx),%r10
5096	adcq	8+0(%rbx),%r11
5097	adcq	$1,%r12
5098	movq	0+0+0(%rbp),%rdx
5099	movq	%rdx,%r15
5100	mulxq	%r10,%r13,%r14
5101	mulxq	%r11,%rax,%rdx
5102	imulq	%r12,%r15
5103	addq	%rax,%r14
5104	adcq	%rdx,%r15
5105	movq	8+0+0(%rbp),%rdx
5106	mulxq	%r10,%r10,%rax
5107	addq	%r10,%r14
5108	mulxq	%r11,%r11,%r9
5109	adcq	%r11,%r15
5110	adcq	$0,%r9
5111	imulq	%r12,%rdx
5112	addq	%rax,%r15
5113	adcq	%rdx,%r9
5114	movq	%r13,%r10
5115	movq	%r14,%r11
5116	movq	%r15,%r12
5117	andq	$3,%r12
5118	movq	%r15,%r13
5119	andq	$-4,%r13
5120	movq	%r9,%r14
5121	shrdq	$2,%r9,%r15
5122	shrq	$2,%r9
5123	addq	%r13,%r15
5124	adcq	%r14,%r9
5125	addq	%r15,%r10
5126	adcq	%r9,%r11
5127	adcq	$0,%r12
5128
5129	leaq	16(%rbx),%rbx
5130.Lopen_avx2_tail_384_rounds_and_x1hash:
5131	vpaddd	%ymm6,%ymm2,%ymm2
5132	vpxor	%ymm2,%ymm14,%ymm14
5133	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
5134	vpaddd	%ymm14,%ymm10,%ymm10
5135	vpxor	%ymm10,%ymm6,%ymm6
5136	vpsrld	$20,%ymm6,%ymm3
5137	vpslld	$12,%ymm6,%ymm6
5138	vpxor	%ymm3,%ymm6,%ymm6
5139	vpaddd	%ymm6,%ymm2,%ymm2
5140	vpxor	%ymm2,%ymm14,%ymm14
5141	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
5142	vpaddd	%ymm14,%ymm10,%ymm10
5143	vpxor	%ymm10,%ymm6,%ymm6
5144	vpslld	$7,%ymm6,%ymm3
5145	vpsrld	$25,%ymm6,%ymm6
5146	vpxor	%ymm3,%ymm6,%ymm6
5147	vpalignr	$12,%ymm14,%ymm14,%ymm14
5148	vpalignr	$8,%ymm10,%ymm10,%ymm10
5149	vpalignr	$4,%ymm6,%ymm6,%ymm6
5150	vpaddd	%ymm5,%ymm1,%ymm1
5151	vpxor	%ymm1,%ymm13,%ymm13
5152	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
5153	vpaddd	%ymm13,%ymm9,%ymm9
5154	vpxor	%ymm9,%ymm5,%ymm5
5155	vpsrld	$20,%ymm5,%ymm3
5156	vpslld	$12,%ymm5,%ymm5
5157	vpxor	%ymm3,%ymm5,%ymm5
5158	vpaddd	%ymm5,%ymm1,%ymm1
5159	vpxor	%ymm1,%ymm13,%ymm13
5160	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
5161	vpaddd	%ymm13,%ymm9,%ymm9
5162	vpxor	%ymm9,%ymm5,%ymm5
5163	vpslld	$7,%ymm5,%ymm3
5164	vpsrld	$25,%ymm5,%ymm5
5165	vpxor	%ymm3,%ymm5,%ymm5
5166	vpalignr	$12,%ymm13,%ymm13,%ymm13
5167	vpalignr	$8,%ymm9,%ymm9,%ymm9
5168	vpalignr	$4,%ymm5,%ymm5,%ymm5
5169	vpaddd	%ymm4,%ymm0,%ymm0
5170	vpxor	%ymm0,%ymm12,%ymm12
5171	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
5172	vpaddd	%ymm12,%ymm8,%ymm8
5173	vpxor	%ymm8,%ymm4,%ymm4
5174	vpsrld	$20,%ymm4,%ymm3
5175	vpslld	$12,%ymm4,%ymm4
5176	vpxor	%ymm3,%ymm4,%ymm4
5177	vpaddd	%ymm4,%ymm0,%ymm0
5178	vpxor	%ymm0,%ymm12,%ymm12
5179	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
5180	vpaddd	%ymm12,%ymm8,%ymm8
5181	vpxor	%ymm8,%ymm4,%ymm4
5182	vpslld	$7,%ymm4,%ymm3
5183	vpsrld	$25,%ymm4,%ymm4
5184	vpxor	%ymm3,%ymm4,%ymm4
5185	vpalignr	$12,%ymm12,%ymm12,%ymm12
5186	vpalignr	$8,%ymm8,%ymm8,%ymm8
5187	vpalignr	$4,%ymm4,%ymm4,%ymm4
5188	addq	0+0(%rbx),%r10
5189	adcq	8+0(%rbx),%r11
5190	adcq	$1,%r12
5191	movq	0+0+0(%rbp),%rax
5192	movq	%rax,%r15
5193	mulq	%r10
5194	movq	%rax,%r13
5195	movq	%rdx,%r14
5196	movq	0+0+0(%rbp),%rax
5197	mulq	%r11
5198	imulq	%r12,%r15
5199	addq	%rax,%r14
5200	adcq	%rdx,%r15
5201	movq	8+0+0(%rbp),%rax
5202	movq	%rax,%r9
5203	mulq	%r10
5204	addq	%rax,%r14
5205	adcq	$0,%rdx
5206	movq	%rdx,%r10
5207	movq	8+0+0(%rbp),%rax
5208	mulq	%r11
5209	addq	%rax,%r15
5210	adcq	$0,%rdx
5211	imulq	%r12,%r9
5212	addq	%r10,%r15
5213	adcq	%rdx,%r9
5214	movq	%r13,%r10
5215	movq	%r14,%r11
5216	movq	%r15,%r12
5217	andq	$3,%r12
5218	movq	%r15,%r13
5219	andq	$-4,%r13
5220	movq	%r9,%r14
5221	shrdq	$2,%r9,%r15
5222	shrq	$2,%r9
5223	addq	%r13,%r15
5224	adcq	%r14,%r9
5225	addq	%r15,%r10
5226	adcq	%r9,%r11
5227	adcq	$0,%r12
5228
5229	leaq	16(%rbx),%rbx
5230	incq	%r8
5231	vpaddd	%ymm6,%ymm2,%ymm2
5232	vpxor	%ymm2,%ymm14,%ymm14
5233	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
5234	vpaddd	%ymm14,%ymm10,%ymm10
5235	vpxor	%ymm10,%ymm6,%ymm6
5236	vpsrld	$20,%ymm6,%ymm3
5237	vpslld	$12,%ymm6,%ymm6
5238	vpxor	%ymm3,%ymm6,%ymm6
5239	vpaddd	%ymm6,%ymm2,%ymm2
5240	vpxor	%ymm2,%ymm14,%ymm14
5241	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
5242	vpaddd	%ymm14,%ymm10,%ymm10
5243	vpxor	%ymm10,%ymm6,%ymm6
5244	vpslld	$7,%ymm6,%ymm3
5245	vpsrld	$25,%ymm6,%ymm6
5246	vpxor	%ymm3,%ymm6,%ymm6
5247	vpalignr	$4,%ymm14,%ymm14,%ymm14
5248	vpalignr	$8,%ymm10,%ymm10,%ymm10
5249	vpalignr	$12,%ymm6,%ymm6,%ymm6
5250	vpaddd	%ymm5,%ymm1,%ymm1
5251	vpxor	%ymm1,%ymm13,%ymm13
5252	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
5253	vpaddd	%ymm13,%ymm9,%ymm9
5254	vpxor	%ymm9,%ymm5,%ymm5
5255	vpsrld	$20,%ymm5,%ymm3
5256	vpslld	$12,%ymm5,%ymm5
5257	vpxor	%ymm3,%ymm5,%ymm5
5258	vpaddd	%ymm5,%ymm1,%ymm1
5259	vpxor	%ymm1,%ymm13,%ymm13
5260	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
5261	vpaddd	%ymm13,%ymm9,%ymm9
5262	vpxor	%ymm9,%ymm5,%ymm5
5263	vpslld	$7,%ymm5,%ymm3
5264	vpsrld	$25,%ymm5,%ymm5
5265	vpxor	%ymm3,%ymm5,%ymm5
5266	vpalignr	$4,%ymm13,%ymm13,%ymm13
5267	vpalignr	$8,%ymm9,%ymm9,%ymm9
5268	vpalignr	$12,%ymm5,%ymm5,%ymm5
5269	vpaddd	%ymm4,%ymm0,%ymm0
5270	vpxor	%ymm0,%ymm12,%ymm12
5271	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
5272	vpaddd	%ymm12,%ymm8,%ymm8
5273	vpxor	%ymm8,%ymm4,%ymm4
5274	vpsrld	$20,%ymm4,%ymm3
5275	vpslld	$12,%ymm4,%ymm4
5276	vpxor	%ymm3,%ymm4,%ymm4
5277	vpaddd	%ymm4,%ymm0,%ymm0
5278	vpxor	%ymm0,%ymm12,%ymm12
5279	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
5280	vpaddd	%ymm12,%ymm8,%ymm8
5281	vpxor	%ymm8,%ymm4,%ymm4
5282	vpslld	$7,%ymm4,%ymm3
5283	vpsrld	$25,%ymm4,%ymm4
5284	vpxor	%ymm3,%ymm4,%ymm4
5285	vpalignr	$4,%ymm12,%ymm12,%ymm12
5286	vpalignr	$8,%ymm8,%ymm8,%ymm8
5287	vpalignr	$12,%ymm4,%ymm4,%ymm4
5288
5289	cmpq	%rcx,%r8
5290	jb	.Lopen_avx2_tail_384_rounds_and_x2hash
5291	cmpq	$10,%r8
5292	jne	.Lopen_avx2_tail_384_rounds_and_x1hash
5293	movq	%rbx,%r8
5294	subq	%rsi,%rbx
5295	movq	%rbx,%rcx
5296	movq	0+128(%rbp),%rbx
5297.Lopen_avx2_384_tail_hash:
5298	addq	$16,%rcx
5299	cmpq	%rbx,%rcx
5300	jg	.Lopen_avx2_384_tail_done
5301	addq	0+0(%r8),%r10
5302	adcq	8+0(%r8),%r11
5303	adcq	$1,%r12
5304	movq	0+0+0(%rbp),%rdx
5305	movq	%rdx,%r15
5306	mulxq	%r10,%r13,%r14
5307	mulxq	%r11,%rax,%rdx
5308	imulq	%r12,%r15
5309	addq	%rax,%r14
5310	adcq	%rdx,%r15
5311	movq	8+0+0(%rbp),%rdx
5312	mulxq	%r10,%r10,%rax
5313	addq	%r10,%r14
5314	mulxq	%r11,%r11,%r9
5315	adcq	%r11,%r15
5316	adcq	$0,%r9
5317	imulq	%r12,%rdx
5318	addq	%rax,%r15
5319	adcq	%rdx,%r9
5320	movq	%r13,%r10
5321	movq	%r14,%r11
5322	movq	%r15,%r12
5323	andq	$3,%r12
5324	movq	%r15,%r13
5325	andq	$-4,%r13
5326	movq	%r9,%r14
5327	shrdq	$2,%r9,%r15
5328	shrq	$2,%r9
5329	addq	%r13,%r15
5330	adcq	%r14,%r9
5331	addq	%r15,%r10
5332	adcq	%r9,%r11
5333	adcq	$0,%r12
5334
5335	leaq	16(%r8),%r8
5336	jmp	.Lopen_avx2_384_tail_hash
5337.Lopen_avx2_384_tail_done:
5338	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
5339	vpaddd	0+64(%rbp),%ymm6,%ymm6
5340	vpaddd	0+96(%rbp),%ymm10,%ymm10
5341	vpaddd	0+224(%rbp),%ymm14,%ymm14
5342	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
5343	vpaddd	0+64(%rbp),%ymm5,%ymm5
5344	vpaddd	0+96(%rbp),%ymm9,%ymm9
5345	vpaddd	0+192(%rbp),%ymm13,%ymm13
5346	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
5347	vpaddd	0+64(%rbp),%ymm4,%ymm4
5348	vpaddd	0+96(%rbp),%ymm8,%ymm8
5349	vpaddd	0+160(%rbp),%ymm12,%ymm12
5350	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
5351	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
5352	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
5353	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
5354	vpxor	0+0(%rsi),%ymm3,%ymm3
5355	vpxor	32+0(%rsi),%ymm2,%ymm2
5356	vpxor	64+0(%rsi),%ymm6,%ymm6
5357	vpxor	96+0(%rsi),%ymm10,%ymm10
5358	vmovdqu	%ymm3,0+0(%rdi)
5359	vmovdqu	%ymm2,32+0(%rdi)
5360	vmovdqu	%ymm6,64+0(%rdi)
5361	vmovdqu	%ymm10,96+0(%rdi)
5362	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5363	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5364	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5365	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5366	vpxor	0+128(%rsi),%ymm3,%ymm3
5367	vpxor	32+128(%rsi),%ymm1,%ymm1
5368	vpxor	64+128(%rsi),%ymm5,%ymm5
5369	vpxor	96+128(%rsi),%ymm9,%ymm9
5370	vmovdqu	%ymm3,0+128(%rdi)
5371	vmovdqu	%ymm1,32+128(%rdi)
5372	vmovdqu	%ymm5,64+128(%rdi)
5373	vmovdqu	%ymm9,96+128(%rdi)
5374	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5375	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5376	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5377	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5378	vmovdqa	%ymm3,%ymm8
5379
5380	leaq	256(%rsi),%rsi
5381	leaq	256(%rdi),%rdi
5382	subq	$256,%rbx
5383	jmp	.Lopen_avx2_tail_128_xor
5384
5385.Lopen_avx2_tail_512:
5386	vmovdqa	.Lchacha20_consts(%rip),%ymm0
5387	vmovdqa	0+64(%rbp),%ymm4
5388	vmovdqa	0+96(%rbp),%ymm8
5389	vmovdqa	%ymm0,%ymm1
5390	vmovdqa	%ymm4,%ymm5
5391	vmovdqa	%ymm8,%ymm9
5392	vmovdqa	%ymm0,%ymm2
5393	vmovdqa	%ymm4,%ymm6
5394	vmovdqa	%ymm8,%ymm10
5395	vmovdqa	%ymm0,%ymm3
5396	vmovdqa	%ymm4,%ymm7
5397	vmovdqa	%ymm8,%ymm11
5398	vmovdqa	.Lavx2_inc(%rip),%ymm12
5399	vpaddd	0+160(%rbp),%ymm12,%ymm15
5400	vpaddd	%ymm15,%ymm12,%ymm14
5401	vpaddd	%ymm14,%ymm12,%ymm13
5402	vpaddd	%ymm13,%ymm12,%ymm12
5403	vmovdqa	%ymm15,0+256(%rbp)
5404	vmovdqa	%ymm14,0+224(%rbp)
5405	vmovdqa	%ymm13,0+192(%rbp)
5406	vmovdqa	%ymm12,0+160(%rbp)
5407
5408	xorq	%rcx,%rcx
5409	movq	%rsi,%r8
5410.Lopen_avx2_tail_512_rounds_and_x2hash:
5411	addq	0+0(%r8),%r10
5412	adcq	8+0(%r8),%r11
5413	adcq	$1,%r12
5414	movq	0+0+0(%rbp),%rax
5415	movq	%rax,%r15
5416	mulq	%r10
5417	movq	%rax,%r13
5418	movq	%rdx,%r14
5419	movq	0+0+0(%rbp),%rax
5420	mulq	%r11
5421	imulq	%r12,%r15
5422	addq	%rax,%r14
5423	adcq	%rdx,%r15
5424	movq	8+0+0(%rbp),%rax
5425	movq	%rax,%r9
5426	mulq	%r10
5427	addq	%rax,%r14
5428	adcq	$0,%rdx
5429	movq	%rdx,%r10
5430	movq	8+0+0(%rbp),%rax
5431	mulq	%r11
5432	addq	%rax,%r15
5433	adcq	$0,%rdx
5434	imulq	%r12,%r9
5435	addq	%r10,%r15
5436	adcq	%rdx,%r9
5437	movq	%r13,%r10
5438	movq	%r14,%r11
5439	movq	%r15,%r12
5440	andq	$3,%r12
5441	movq	%r15,%r13
5442	andq	$-4,%r13
5443	movq	%r9,%r14
5444	shrdq	$2,%r9,%r15
5445	shrq	$2,%r9
5446	addq	%r13,%r15
5447	adcq	%r14,%r9
5448	addq	%r15,%r10
5449	adcq	%r9,%r11
5450	adcq	$0,%r12
5451
5452	leaq	16(%r8),%r8
5453.Lopen_avx2_tail_512_rounds_and_x1hash:
5454	vmovdqa	%ymm8,0+128(%rbp)
5455	vmovdqa	.Lrol16(%rip),%ymm8
5456	vpaddd	%ymm7,%ymm3,%ymm3
5457	vpaddd	%ymm6,%ymm2,%ymm2
5458	vpaddd	%ymm5,%ymm1,%ymm1
5459	vpaddd	%ymm4,%ymm0,%ymm0
5460	vpxor	%ymm3,%ymm15,%ymm15
5461	vpxor	%ymm2,%ymm14,%ymm14
5462	vpxor	%ymm1,%ymm13,%ymm13
5463	vpxor	%ymm0,%ymm12,%ymm12
5464	vpshufb	%ymm8,%ymm15,%ymm15
5465	vpshufb	%ymm8,%ymm14,%ymm14
5466	vpshufb	%ymm8,%ymm13,%ymm13
5467	vpshufb	%ymm8,%ymm12,%ymm12
5468	vpaddd	%ymm15,%ymm11,%ymm11
5469	vpaddd	%ymm14,%ymm10,%ymm10
5470	vpaddd	%ymm13,%ymm9,%ymm9
5471	vpaddd	0+128(%rbp),%ymm12,%ymm8
5472	vpxor	%ymm11,%ymm7,%ymm7
5473	vpxor	%ymm10,%ymm6,%ymm6
5474	vpxor	%ymm9,%ymm5,%ymm5
5475	vpxor	%ymm8,%ymm4,%ymm4
5476	vmovdqa	%ymm8,0+128(%rbp)
5477	vpsrld	$20,%ymm7,%ymm8
5478	vpslld	$32-20,%ymm7,%ymm7
5479	vpxor	%ymm8,%ymm7,%ymm7
5480	vpsrld	$20,%ymm6,%ymm8
5481	vpslld	$32-20,%ymm6,%ymm6
5482	vpxor	%ymm8,%ymm6,%ymm6
5483	vpsrld	$20,%ymm5,%ymm8
5484	vpslld	$32-20,%ymm5,%ymm5
5485	vpxor	%ymm8,%ymm5,%ymm5
5486	vpsrld	$20,%ymm4,%ymm8
5487	vpslld	$32-20,%ymm4,%ymm4
5488	vpxor	%ymm8,%ymm4,%ymm4
5489	vmovdqa	.Lrol8(%rip),%ymm8
5490	vpaddd	%ymm7,%ymm3,%ymm3
5491	addq	0+0(%r8),%r10
5492	adcq	8+0(%r8),%r11
5493	adcq	$1,%r12
5494	movq	0+0+0(%rbp),%rdx
5495	movq	%rdx,%r15
5496	mulxq	%r10,%r13,%r14
5497	mulxq	%r11,%rax,%rdx
5498	imulq	%r12,%r15
5499	addq	%rax,%r14
5500	adcq	%rdx,%r15
5501	movq	8+0+0(%rbp),%rdx
5502	mulxq	%r10,%r10,%rax
5503	addq	%r10,%r14
5504	mulxq	%r11,%r11,%r9
5505	adcq	%r11,%r15
5506	adcq	$0,%r9
5507	imulq	%r12,%rdx
5508	addq	%rax,%r15
5509	adcq	%rdx,%r9
5510	movq	%r13,%r10
5511	movq	%r14,%r11
5512	movq	%r15,%r12
5513	andq	$3,%r12
5514	movq	%r15,%r13
5515	andq	$-4,%r13
5516	movq	%r9,%r14
5517	shrdq	$2,%r9,%r15
5518	shrq	$2,%r9
5519	addq	%r13,%r15
5520	adcq	%r14,%r9
5521	addq	%r15,%r10
5522	adcq	%r9,%r11
5523	adcq	$0,%r12
5524	vpaddd	%ymm6,%ymm2,%ymm2
5525	vpaddd	%ymm5,%ymm1,%ymm1
5526	vpaddd	%ymm4,%ymm0,%ymm0
5527	vpxor	%ymm3,%ymm15,%ymm15
5528	vpxor	%ymm2,%ymm14,%ymm14
5529	vpxor	%ymm1,%ymm13,%ymm13
5530	vpxor	%ymm0,%ymm12,%ymm12
5531	vpshufb	%ymm8,%ymm15,%ymm15
5532	vpshufb	%ymm8,%ymm14,%ymm14
5533	vpshufb	%ymm8,%ymm13,%ymm13
5534	vpshufb	%ymm8,%ymm12,%ymm12
5535	vpaddd	%ymm15,%ymm11,%ymm11
5536	vpaddd	%ymm14,%ymm10,%ymm10
5537	vpaddd	%ymm13,%ymm9,%ymm9
5538	vpaddd	0+128(%rbp),%ymm12,%ymm8
5539	vpxor	%ymm11,%ymm7,%ymm7
5540	vpxor	%ymm10,%ymm6,%ymm6
5541	vpxor	%ymm9,%ymm5,%ymm5
5542	vpxor	%ymm8,%ymm4,%ymm4
5543	vmovdqa	%ymm8,0+128(%rbp)
5544	vpsrld	$25,%ymm7,%ymm8
5545	vpslld	$32-25,%ymm7,%ymm7
5546	vpxor	%ymm8,%ymm7,%ymm7
5547	vpsrld	$25,%ymm6,%ymm8
5548	vpslld	$32-25,%ymm6,%ymm6
5549	vpxor	%ymm8,%ymm6,%ymm6
5550	vpsrld	$25,%ymm5,%ymm8
5551	vpslld	$32-25,%ymm5,%ymm5
5552	vpxor	%ymm8,%ymm5,%ymm5
5553	vpsrld	$25,%ymm4,%ymm8
5554	vpslld	$32-25,%ymm4,%ymm4
5555	vpxor	%ymm8,%ymm4,%ymm4
5556	vmovdqa	0+128(%rbp),%ymm8
5557	vpalignr	$4,%ymm7,%ymm7,%ymm7
5558	vpalignr	$8,%ymm11,%ymm11,%ymm11
5559	vpalignr	$12,%ymm15,%ymm15,%ymm15
5560	vpalignr	$4,%ymm6,%ymm6,%ymm6
5561	vpalignr	$8,%ymm10,%ymm10,%ymm10
5562	vpalignr	$12,%ymm14,%ymm14,%ymm14
5563	vpalignr	$4,%ymm5,%ymm5,%ymm5
5564	vpalignr	$8,%ymm9,%ymm9,%ymm9
5565	vpalignr	$12,%ymm13,%ymm13,%ymm13
5566	vpalignr	$4,%ymm4,%ymm4,%ymm4
5567	vpalignr	$8,%ymm8,%ymm8,%ymm8
5568	vpalignr	$12,%ymm12,%ymm12,%ymm12
5569	vmovdqa	%ymm8,0+128(%rbp)
5570	vmovdqa	.Lrol16(%rip),%ymm8
5571	vpaddd	%ymm7,%ymm3,%ymm3
5572	addq	0+16(%r8),%r10
5573	adcq	8+16(%r8),%r11
5574	adcq	$1,%r12
5575	movq	0+0+0(%rbp),%rdx
5576	movq	%rdx,%r15
5577	mulxq	%r10,%r13,%r14
5578	mulxq	%r11,%rax,%rdx
5579	imulq	%r12,%r15
5580	addq	%rax,%r14
5581	adcq	%rdx,%r15
5582	movq	8+0+0(%rbp),%rdx
5583	mulxq	%r10,%r10,%rax
5584	addq	%r10,%r14
5585	mulxq	%r11,%r11,%r9
5586	adcq	%r11,%r15
5587	adcq	$0,%r9
5588	imulq	%r12,%rdx
5589	addq	%rax,%r15
5590	adcq	%rdx,%r9
5591	movq	%r13,%r10
5592	movq	%r14,%r11
5593	movq	%r15,%r12
5594	andq	$3,%r12
5595	movq	%r15,%r13
5596	andq	$-4,%r13
5597	movq	%r9,%r14
5598	shrdq	$2,%r9,%r15
5599	shrq	$2,%r9
5600	addq	%r13,%r15
5601	adcq	%r14,%r9
5602	addq	%r15,%r10
5603	adcq	%r9,%r11
5604	adcq	$0,%r12
5605
5606	leaq	32(%r8),%r8
5607	vpaddd	%ymm6,%ymm2,%ymm2
5608	vpaddd	%ymm5,%ymm1,%ymm1
5609	vpaddd	%ymm4,%ymm0,%ymm0
5610	vpxor	%ymm3,%ymm15,%ymm15
5611	vpxor	%ymm2,%ymm14,%ymm14
5612	vpxor	%ymm1,%ymm13,%ymm13
5613	vpxor	%ymm0,%ymm12,%ymm12
5614	vpshufb	%ymm8,%ymm15,%ymm15
5615	vpshufb	%ymm8,%ymm14,%ymm14
5616	vpshufb	%ymm8,%ymm13,%ymm13
5617	vpshufb	%ymm8,%ymm12,%ymm12
5618	vpaddd	%ymm15,%ymm11,%ymm11
5619	vpaddd	%ymm14,%ymm10,%ymm10
5620	vpaddd	%ymm13,%ymm9,%ymm9
5621	vpaddd	0+128(%rbp),%ymm12,%ymm8
5622	vpxor	%ymm11,%ymm7,%ymm7
5623	vpxor	%ymm10,%ymm6,%ymm6
5624	vpxor	%ymm9,%ymm5,%ymm5
5625	vpxor	%ymm8,%ymm4,%ymm4
5626	vmovdqa	%ymm8,0+128(%rbp)
5627	vpsrld	$20,%ymm7,%ymm8
5628	vpslld	$32-20,%ymm7,%ymm7
5629	vpxor	%ymm8,%ymm7,%ymm7
5630	vpsrld	$20,%ymm6,%ymm8
5631	vpslld	$32-20,%ymm6,%ymm6
5632	vpxor	%ymm8,%ymm6,%ymm6
5633	vpsrld	$20,%ymm5,%ymm8
5634	vpslld	$32-20,%ymm5,%ymm5
5635	vpxor	%ymm8,%ymm5,%ymm5
5636	vpsrld	$20,%ymm4,%ymm8
5637	vpslld	$32-20,%ymm4,%ymm4
5638	vpxor	%ymm8,%ymm4,%ymm4
5639	vmovdqa	.Lrol8(%rip),%ymm8
5640	vpaddd	%ymm7,%ymm3,%ymm3
5641	vpaddd	%ymm6,%ymm2,%ymm2
5642	vpaddd	%ymm5,%ymm1,%ymm1
5643	vpaddd	%ymm4,%ymm0,%ymm0
5644	vpxor	%ymm3,%ymm15,%ymm15
5645	vpxor	%ymm2,%ymm14,%ymm14
5646	vpxor	%ymm1,%ymm13,%ymm13
5647	vpxor	%ymm0,%ymm12,%ymm12
5648	vpshufb	%ymm8,%ymm15,%ymm15
5649	vpshufb	%ymm8,%ymm14,%ymm14
5650	vpshufb	%ymm8,%ymm13,%ymm13
5651	vpshufb	%ymm8,%ymm12,%ymm12
5652	vpaddd	%ymm15,%ymm11,%ymm11
5653	vpaddd	%ymm14,%ymm10,%ymm10
5654	vpaddd	%ymm13,%ymm9,%ymm9
5655	vpaddd	0+128(%rbp),%ymm12,%ymm8
5656	vpxor	%ymm11,%ymm7,%ymm7
5657	vpxor	%ymm10,%ymm6,%ymm6
5658	vpxor	%ymm9,%ymm5,%ymm5
5659	vpxor	%ymm8,%ymm4,%ymm4
5660	vmovdqa	%ymm8,0+128(%rbp)
5661	vpsrld	$25,%ymm7,%ymm8
5662	vpslld	$32-25,%ymm7,%ymm7
5663	vpxor	%ymm8,%ymm7,%ymm7
5664	vpsrld	$25,%ymm6,%ymm8
5665	vpslld	$32-25,%ymm6,%ymm6
5666	vpxor	%ymm8,%ymm6,%ymm6
5667	vpsrld	$25,%ymm5,%ymm8
5668	vpslld	$32-25,%ymm5,%ymm5
5669	vpxor	%ymm8,%ymm5,%ymm5
5670	vpsrld	$25,%ymm4,%ymm8
5671	vpslld	$32-25,%ymm4,%ymm4
5672	vpxor	%ymm8,%ymm4,%ymm4
5673	vmovdqa	0+128(%rbp),%ymm8
5674	vpalignr	$12,%ymm7,%ymm7,%ymm7
5675	vpalignr	$8,%ymm11,%ymm11,%ymm11
5676	vpalignr	$4,%ymm15,%ymm15,%ymm15
5677	vpalignr	$12,%ymm6,%ymm6,%ymm6
5678	vpalignr	$8,%ymm10,%ymm10,%ymm10
5679	vpalignr	$4,%ymm14,%ymm14,%ymm14
5680	vpalignr	$12,%ymm5,%ymm5,%ymm5
5681	vpalignr	$8,%ymm9,%ymm9,%ymm9
5682	vpalignr	$4,%ymm13,%ymm13,%ymm13
5683	vpalignr	$12,%ymm4,%ymm4,%ymm4
5684	vpalignr	$8,%ymm8,%ymm8,%ymm8
5685	vpalignr	$4,%ymm12,%ymm12,%ymm12
5686
5687	incq	%rcx
5688	cmpq	$4,%rcx
5689	jl	.Lopen_avx2_tail_512_rounds_and_x2hash
5690	cmpq	$10,%rcx
5691	jne	.Lopen_avx2_tail_512_rounds_and_x1hash
5692	movq	%rbx,%rcx
5693	subq	$384,%rcx
5694	andq	$-16,%rcx
5695.Lopen_avx2_tail_512_hash:
5696	testq	%rcx,%rcx
5697	je	.Lopen_avx2_tail_512_done
5698	addq	0+0(%r8),%r10
5699	adcq	8+0(%r8),%r11
5700	adcq	$1,%r12
5701	movq	0+0+0(%rbp),%rdx
5702	movq	%rdx,%r15
5703	mulxq	%r10,%r13,%r14
5704	mulxq	%r11,%rax,%rdx
5705	imulq	%r12,%r15
5706	addq	%rax,%r14
5707	adcq	%rdx,%r15
5708	movq	8+0+0(%rbp),%rdx
5709	mulxq	%r10,%r10,%rax
5710	addq	%r10,%r14
5711	mulxq	%r11,%r11,%r9
5712	adcq	%r11,%r15
5713	adcq	$0,%r9
5714	imulq	%r12,%rdx
5715	addq	%rax,%r15
5716	adcq	%rdx,%r9
5717	movq	%r13,%r10
5718	movq	%r14,%r11
5719	movq	%r15,%r12
5720	andq	$3,%r12
5721	movq	%r15,%r13
5722	andq	$-4,%r13
5723	movq	%r9,%r14
5724	shrdq	$2,%r9,%r15
5725	shrq	$2,%r9
5726	addq	%r13,%r15
5727	adcq	%r14,%r9
5728	addq	%r15,%r10
5729	adcq	%r9,%r11
5730	adcq	$0,%r12
5731
5732	leaq	16(%r8),%r8
5733	subq	$16,%rcx
5734	jmp	.Lopen_avx2_tail_512_hash
5735.Lopen_avx2_tail_512_done:
5736	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
5737	vpaddd	0+64(%rbp),%ymm7,%ymm7
5738	vpaddd	0+96(%rbp),%ymm11,%ymm11
5739	vpaddd	0+256(%rbp),%ymm15,%ymm15
5740	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
5741	vpaddd	0+64(%rbp),%ymm6,%ymm6
5742	vpaddd	0+96(%rbp),%ymm10,%ymm10
5743	vpaddd	0+224(%rbp),%ymm14,%ymm14
5744	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
5745	vpaddd	0+64(%rbp),%ymm5,%ymm5
5746	vpaddd	0+96(%rbp),%ymm9,%ymm9
5747	vpaddd	0+192(%rbp),%ymm13,%ymm13
5748	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
5749	vpaddd	0+64(%rbp),%ymm4,%ymm4
5750	vpaddd	0+96(%rbp),%ymm8,%ymm8
5751	vpaddd	0+160(%rbp),%ymm12,%ymm12
5752
5753	vmovdqa	%ymm0,0+128(%rbp)
5754	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
5755	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
5756	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
5757	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
5758	vpxor	0+0(%rsi),%ymm0,%ymm0
5759	vpxor	32+0(%rsi),%ymm3,%ymm3
5760	vpxor	64+0(%rsi),%ymm7,%ymm7
5761	vpxor	96+0(%rsi),%ymm11,%ymm11
5762	vmovdqu	%ymm0,0+0(%rdi)
5763	vmovdqu	%ymm3,32+0(%rdi)
5764	vmovdqu	%ymm7,64+0(%rdi)
5765	vmovdqu	%ymm11,96+0(%rdi)
5766
5767	vmovdqa	0+128(%rbp),%ymm0
5768	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
5769	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
5770	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
5771	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
5772	vpxor	0+128(%rsi),%ymm3,%ymm3
5773	vpxor	32+128(%rsi),%ymm2,%ymm2
5774	vpxor	64+128(%rsi),%ymm6,%ymm6
5775	vpxor	96+128(%rsi),%ymm10,%ymm10
5776	vmovdqu	%ymm3,0+128(%rdi)
5777	vmovdqu	%ymm2,32+128(%rdi)
5778	vmovdqu	%ymm6,64+128(%rdi)
5779	vmovdqu	%ymm10,96+128(%rdi)
5780	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5781	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5782	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5783	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5784	vpxor	0+256(%rsi),%ymm3,%ymm3
5785	vpxor	32+256(%rsi),%ymm1,%ymm1
5786	vpxor	64+256(%rsi),%ymm5,%ymm5
5787	vpxor	96+256(%rsi),%ymm9,%ymm9
5788	vmovdqu	%ymm3,0+256(%rdi)
5789	vmovdqu	%ymm1,32+256(%rdi)
5790	vmovdqu	%ymm5,64+256(%rdi)
5791	vmovdqu	%ymm9,96+256(%rdi)
5792	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5793	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5794	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5795	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5796	vmovdqa	%ymm3,%ymm8
5797
5798	leaq	384(%rsi),%rsi
5799	leaq	384(%rdi),%rdi
5800	subq	$384,%rbx
5801.Lopen_avx2_tail_128_xor:
5802	cmpq	$32,%rbx
5803	jb	.Lopen_avx2_tail_32_xor
5804	subq	$32,%rbx
5805	vpxor	(%rsi),%ymm0,%ymm0
5806	vmovdqu	%ymm0,(%rdi)
5807	leaq	32(%rsi),%rsi
5808	leaq	32(%rdi),%rdi
5809	vmovdqa	%ymm4,%ymm0
5810	vmovdqa	%ymm8,%ymm4
5811	vmovdqa	%ymm12,%ymm8
5812	jmp	.Lopen_avx2_tail_128_xor
5813.Lopen_avx2_tail_32_xor:
5814	cmpq	$16,%rbx
5815	vmovdqa	%xmm0,%xmm1
5816	jb	.Lopen_avx2_exit
5817	subq	$16,%rbx
5818
5819	vpxor	(%rsi),%xmm0,%xmm1
5820	vmovdqu	%xmm1,(%rdi)
5821	leaq	16(%rsi),%rsi
5822	leaq	16(%rdi),%rdi
5823	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
5824	vmovdqa	%xmm0,%xmm1
5825.Lopen_avx2_exit:
5826	vzeroupper
5827	jmp	.Lopen_sse_tail_16
5828
5829.Lopen_avx2_192:
5830	vmovdqa	%ymm0,%ymm1
5831	vmovdqa	%ymm0,%ymm2
5832	vmovdqa	%ymm4,%ymm5
5833	vmovdqa	%ymm4,%ymm6
5834	vmovdqa	%ymm8,%ymm9
5835	vmovdqa	%ymm8,%ymm10
5836	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
5837	vmovdqa	%ymm12,%ymm11
5838	vmovdqa	%ymm13,%ymm15
5839	movq	$10,%r10
5840.Lopen_avx2_192_rounds:
5841	vpaddd	%ymm4,%ymm0,%ymm0
5842	vpxor	%ymm0,%ymm12,%ymm12
5843	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
5844	vpaddd	%ymm12,%ymm8,%ymm8
5845	vpxor	%ymm8,%ymm4,%ymm4
5846	vpsrld	$20,%ymm4,%ymm3
5847	vpslld	$12,%ymm4,%ymm4
5848	vpxor	%ymm3,%ymm4,%ymm4
5849	vpaddd	%ymm4,%ymm0,%ymm0
5850	vpxor	%ymm0,%ymm12,%ymm12
5851	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
5852	vpaddd	%ymm12,%ymm8,%ymm8
5853	vpxor	%ymm8,%ymm4,%ymm4
5854	vpslld	$7,%ymm4,%ymm3
5855	vpsrld	$25,%ymm4,%ymm4
5856	vpxor	%ymm3,%ymm4,%ymm4
5857	vpalignr	$12,%ymm12,%ymm12,%ymm12
5858	vpalignr	$8,%ymm8,%ymm8,%ymm8
5859	vpalignr	$4,%ymm4,%ymm4,%ymm4
5860	vpaddd	%ymm5,%ymm1,%ymm1
5861	vpxor	%ymm1,%ymm13,%ymm13
5862	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
5863	vpaddd	%ymm13,%ymm9,%ymm9
5864	vpxor	%ymm9,%ymm5,%ymm5
5865	vpsrld	$20,%ymm5,%ymm3
5866	vpslld	$12,%ymm5,%ymm5
5867	vpxor	%ymm3,%ymm5,%ymm5
5868	vpaddd	%ymm5,%ymm1,%ymm1
5869	vpxor	%ymm1,%ymm13,%ymm13
5870	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
5871	vpaddd	%ymm13,%ymm9,%ymm9
5872	vpxor	%ymm9,%ymm5,%ymm5
5873	vpslld	$7,%ymm5,%ymm3
5874	vpsrld	$25,%ymm5,%ymm5
5875	vpxor	%ymm3,%ymm5,%ymm5
5876	vpalignr	$12,%ymm13,%ymm13,%ymm13
5877	vpalignr	$8,%ymm9,%ymm9,%ymm9
5878	vpalignr	$4,%ymm5,%ymm5,%ymm5
5879	vpaddd	%ymm4,%ymm0,%ymm0
5880	vpxor	%ymm0,%ymm12,%ymm12
5881	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
5882	vpaddd	%ymm12,%ymm8,%ymm8
5883	vpxor	%ymm8,%ymm4,%ymm4
5884	vpsrld	$20,%ymm4,%ymm3
5885	vpslld	$12,%ymm4,%ymm4
5886	vpxor	%ymm3,%ymm4,%ymm4
5887	vpaddd	%ymm4,%ymm0,%ymm0
5888	vpxor	%ymm0,%ymm12,%ymm12
5889	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
5890	vpaddd	%ymm12,%ymm8,%ymm8
5891	vpxor	%ymm8,%ymm4,%ymm4
5892	vpslld	$7,%ymm4,%ymm3
5893	vpsrld	$25,%ymm4,%ymm4
5894	vpxor	%ymm3,%ymm4,%ymm4
5895	vpalignr	$4,%ymm12,%ymm12,%ymm12
5896	vpalignr	$8,%ymm8,%ymm8,%ymm8
5897	vpalignr	$12,%ymm4,%ymm4,%ymm4
5898	vpaddd	%ymm5,%ymm1,%ymm1
5899	vpxor	%ymm1,%ymm13,%ymm13
5900	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
5901	vpaddd	%ymm13,%ymm9,%ymm9
5902	vpxor	%ymm9,%ymm5,%ymm5
5903	vpsrld	$20,%ymm5,%ymm3
5904	vpslld	$12,%ymm5,%ymm5
5905	vpxor	%ymm3,%ymm5,%ymm5
5906	vpaddd	%ymm5,%ymm1,%ymm1
5907	vpxor	%ymm1,%ymm13,%ymm13
5908	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
5909	vpaddd	%ymm13,%ymm9,%ymm9
5910	vpxor	%ymm9,%ymm5,%ymm5
5911	vpslld	$7,%ymm5,%ymm3
5912	vpsrld	$25,%ymm5,%ymm5
5913	vpxor	%ymm3,%ymm5,%ymm5
5914	vpalignr	$4,%ymm13,%ymm13,%ymm13
5915	vpalignr	$8,%ymm9,%ymm9,%ymm9
5916	vpalignr	$12,%ymm5,%ymm5,%ymm5
5917
5918	decq	%r10
5919	jne	.Lopen_avx2_192_rounds
5920	vpaddd	%ymm2,%ymm0,%ymm0
5921	vpaddd	%ymm2,%ymm1,%ymm1
5922	vpaddd	%ymm6,%ymm4,%ymm4
5923	vpaddd	%ymm6,%ymm5,%ymm5
5924	vpaddd	%ymm10,%ymm8,%ymm8
5925	vpaddd	%ymm10,%ymm9,%ymm9
5926	vpaddd	%ymm11,%ymm12,%ymm12
5927	vpaddd	%ymm15,%ymm13,%ymm13
5928	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
5929
5930	vpand	.Lclamp(%rip),%ymm3,%ymm3
5931	vmovdqa	%ymm3,0+0(%rbp)
5932
5933	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
5934	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
5935	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
5936	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
5937	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
5938	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
5939.Lopen_avx2_short:
5940	movq	%r8,%r8
5941	call	poly_hash_ad_internal
5942.Lopen_avx2_short_hash_and_xor_loop:
5943	cmpq	$32,%rbx
5944	jb	.Lopen_avx2_short_tail_32
5945	subq	$32,%rbx
5946	addq	0+0(%rsi),%r10
5947	adcq	8+0(%rsi),%r11
5948	adcq	$1,%r12
5949	movq	0+0+0(%rbp),%rax
5950	movq	%rax,%r15
5951	mulq	%r10
5952	movq	%rax,%r13
5953	movq	%rdx,%r14
5954	movq	0+0+0(%rbp),%rax
5955	mulq	%r11
5956	imulq	%r12,%r15
5957	addq	%rax,%r14
5958	adcq	%rdx,%r15
5959	movq	8+0+0(%rbp),%rax
5960	movq	%rax,%r9
5961	mulq	%r10
5962	addq	%rax,%r14
5963	adcq	$0,%rdx
5964	movq	%rdx,%r10
5965	movq	8+0+0(%rbp),%rax
5966	mulq	%r11
5967	addq	%rax,%r15
5968	adcq	$0,%rdx
5969	imulq	%r12,%r9
5970	addq	%r10,%r15
5971	adcq	%rdx,%r9
5972	movq	%r13,%r10
5973	movq	%r14,%r11
5974	movq	%r15,%r12
5975	andq	$3,%r12
5976	movq	%r15,%r13
5977	andq	$-4,%r13
5978	movq	%r9,%r14
5979	shrdq	$2,%r9,%r15
5980	shrq	$2,%r9
5981	addq	%r13,%r15
5982	adcq	%r14,%r9
5983	addq	%r15,%r10
5984	adcq	%r9,%r11
5985	adcq	$0,%r12
5986	addq	0+16(%rsi),%r10
5987	adcq	8+16(%rsi),%r11
5988	adcq	$1,%r12
5989	movq	0+0+0(%rbp),%rax
5990	movq	%rax,%r15
5991	mulq	%r10
5992	movq	%rax,%r13
5993	movq	%rdx,%r14
5994	movq	0+0+0(%rbp),%rax
5995	mulq	%r11
5996	imulq	%r12,%r15
5997	addq	%rax,%r14
5998	adcq	%rdx,%r15
5999	movq	8+0+0(%rbp),%rax
6000	movq	%rax,%r9
6001	mulq	%r10
6002	addq	%rax,%r14
6003	adcq	$0,%rdx
6004	movq	%rdx,%r10
6005	movq	8+0+0(%rbp),%rax
6006	mulq	%r11
6007	addq	%rax,%r15
6008	adcq	$0,%rdx
6009	imulq	%r12,%r9
6010	addq	%r10,%r15
6011	adcq	%rdx,%r9
6012	movq	%r13,%r10
6013	movq	%r14,%r11
6014	movq	%r15,%r12
6015	andq	$3,%r12
6016	movq	%r15,%r13
6017	andq	$-4,%r13
6018	movq	%r9,%r14
6019	shrdq	$2,%r9,%r15
6020	shrq	$2,%r9
6021	addq	%r13,%r15
6022	adcq	%r14,%r9
6023	addq	%r15,%r10
6024	adcq	%r9,%r11
6025	adcq	$0,%r12
6026
6027
6028	vpxor	(%rsi),%ymm0,%ymm0
6029	vmovdqu	%ymm0,(%rdi)
6030	leaq	32(%rsi),%rsi
6031	leaq	32(%rdi),%rdi
6032
6033	vmovdqa	%ymm4,%ymm0
6034	vmovdqa	%ymm8,%ymm4
6035	vmovdqa	%ymm12,%ymm8
6036	vmovdqa	%ymm1,%ymm12
6037	vmovdqa	%ymm5,%ymm1
6038	vmovdqa	%ymm9,%ymm5
6039	vmovdqa	%ymm13,%ymm9
6040	vmovdqa	%ymm2,%ymm13
6041	vmovdqa	%ymm6,%ymm2
6042	jmp	.Lopen_avx2_short_hash_and_xor_loop
6043.Lopen_avx2_short_tail_32:
6044	cmpq	$16,%rbx
6045	vmovdqa	%xmm0,%xmm1
6046	jb	.Lopen_avx2_short_tail_32_exit
6047	subq	$16,%rbx
6048	addq	0+0(%rsi),%r10
6049	adcq	8+0(%rsi),%r11
6050	adcq	$1,%r12
6051	movq	0+0+0(%rbp),%rax
6052	movq	%rax,%r15
6053	mulq	%r10
6054	movq	%rax,%r13
6055	movq	%rdx,%r14
6056	movq	0+0+0(%rbp),%rax
6057	mulq	%r11
6058	imulq	%r12,%r15
6059	addq	%rax,%r14
6060	adcq	%rdx,%r15
6061	movq	8+0+0(%rbp),%rax
6062	movq	%rax,%r9
6063	mulq	%r10
6064	addq	%rax,%r14
6065	adcq	$0,%rdx
6066	movq	%rdx,%r10
6067	movq	8+0+0(%rbp),%rax
6068	mulq	%r11
6069	addq	%rax,%r15
6070	adcq	$0,%rdx
6071	imulq	%r12,%r9
6072	addq	%r10,%r15
6073	adcq	%rdx,%r9
6074	movq	%r13,%r10
6075	movq	%r14,%r11
6076	movq	%r15,%r12
6077	andq	$3,%r12
6078	movq	%r15,%r13
6079	andq	$-4,%r13
6080	movq	%r9,%r14
6081	shrdq	$2,%r9,%r15
6082	shrq	$2,%r9
6083	addq	%r13,%r15
6084	adcq	%r14,%r9
6085	addq	%r15,%r10
6086	adcq	%r9,%r11
6087	adcq	$0,%r12
6088
6089	vpxor	(%rsi),%xmm0,%xmm3
6090	vmovdqu	%xmm3,(%rdi)
6091	leaq	16(%rsi),%rsi
6092	leaq	16(%rdi),%rdi
6093	vextracti128	$1,%ymm0,%xmm1
6094.Lopen_avx2_short_tail_32_exit:
6095	vzeroupper
6096	jmp	.Lopen_sse_tail_16
6097
6098.Lopen_avx2_320:
6099	vmovdqa	%ymm0,%ymm1
6100	vmovdqa	%ymm0,%ymm2
6101	vmovdqa	%ymm4,%ymm5
6102	vmovdqa	%ymm4,%ymm6
6103	vmovdqa	%ymm8,%ymm9
6104	vmovdqa	%ymm8,%ymm10
6105	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
6106	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm14
6107	vmovdqa	%ymm4,%ymm7
6108	vmovdqa	%ymm8,%ymm11
6109	vmovdqa	%ymm12,0+160(%rbp)
6110	vmovdqa	%ymm13,0+192(%rbp)
6111	vmovdqa	%ymm14,0+224(%rbp)
6112	movq	$10,%r10
6113.Lopen_avx2_320_rounds:
6114	vpaddd	%ymm4,%ymm0,%ymm0
6115	vpxor	%ymm0,%ymm12,%ymm12
6116	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
6117	vpaddd	%ymm12,%ymm8,%ymm8
6118	vpxor	%ymm8,%ymm4,%ymm4
6119	vpsrld	$20,%ymm4,%ymm3
6120	vpslld	$12,%ymm4,%ymm4
6121	vpxor	%ymm3,%ymm4,%ymm4
6122	vpaddd	%ymm4,%ymm0,%ymm0
6123	vpxor	%ymm0,%ymm12,%ymm12
6124	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
6125	vpaddd	%ymm12,%ymm8,%ymm8
6126	vpxor	%ymm8,%ymm4,%ymm4
6127	vpslld	$7,%ymm4,%ymm3
6128	vpsrld	$25,%ymm4,%ymm4
6129	vpxor	%ymm3,%ymm4,%ymm4
6130	vpalignr	$12,%ymm12,%ymm12,%ymm12
6131	vpalignr	$8,%ymm8,%ymm8,%ymm8
6132	vpalignr	$4,%ymm4,%ymm4,%ymm4
6133	vpaddd	%ymm5,%ymm1,%ymm1
6134	vpxor	%ymm1,%ymm13,%ymm13
6135	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
6136	vpaddd	%ymm13,%ymm9,%ymm9
6137	vpxor	%ymm9,%ymm5,%ymm5
6138	vpsrld	$20,%ymm5,%ymm3
6139	vpslld	$12,%ymm5,%ymm5
6140	vpxor	%ymm3,%ymm5,%ymm5
6141	vpaddd	%ymm5,%ymm1,%ymm1
6142	vpxor	%ymm1,%ymm13,%ymm13
6143	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
6144	vpaddd	%ymm13,%ymm9,%ymm9
6145	vpxor	%ymm9,%ymm5,%ymm5
6146	vpslld	$7,%ymm5,%ymm3
6147	vpsrld	$25,%ymm5,%ymm5
6148	vpxor	%ymm3,%ymm5,%ymm5
6149	vpalignr	$12,%ymm13,%ymm13,%ymm13
6150	vpalignr	$8,%ymm9,%ymm9,%ymm9
6151	vpalignr	$4,%ymm5,%ymm5,%ymm5
6152	vpaddd	%ymm6,%ymm2,%ymm2
6153	vpxor	%ymm2,%ymm14,%ymm14
6154	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
6155	vpaddd	%ymm14,%ymm10,%ymm10
6156	vpxor	%ymm10,%ymm6,%ymm6
6157	vpsrld	$20,%ymm6,%ymm3
6158	vpslld	$12,%ymm6,%ymm6
6159	vpxor	%ymm3,%ymm6,%ymm6
6160	vpaddd	%ymm6,%ymm2,%ymm2
6161	vpxor	%ymm2,%ymm14,%ymm14
6162	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
6163	vpaddd	%ymm14,%ymm10,%ymm10
6164	vpxor	%ymm10,%ymm6,%ymm6
6165	vpslld	$7,%ymm6,%ymm3
6166	vpsrld	$25,%ymm6,%ymm6
6167	vpxor	%ymm3,%ymm6,%ymm6
6168	vpalignr	$12,%ymm14,%ymm14,%ymm14
6169	vpalignr	$8,%ymm10,%ymm10,%ymm10
6170	vpalignr	$4,%ymm6,%ymm6,%ymm6
6171	vpaddd	%ymm4,%ymm0,%ymm0
6172	vpxor	%ymm0,%ymm12,%ymm12
6173	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
6174	vpaddd	%ymm12,%ymm8,%ymm8
6175	vpxor	%ymm8,%ymm4,%ymm4
6176	vpsrld	$20,%ymm4,%ymm3
6177	vpslld	$12,%ymm4,%ymm4
6178	vpxor	%ymm3,%ymm4,%ymm4
6179	vpaddd	%ymm4,%ymm0,%ymm0
6180	vpxor	%ymm0,%ymm12,%ymm12
6181	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
6182	vpaddd	%ymm12,%ymm8,%ymm8
6183	vpxor	%ymm8,%ymm4,%ymm4
6184	vpslld	$7,%ymm4,%ymm3
6185	vpsrld	$25,%ymm4,%ymm4
6186	vpxor	%ymm3,%ymm4,%ymm4
6187	vpalignr	$4,%ymm12,%ymm12,%ymm12
6188	vpalignr	$8,%ymm8,%ymm8,%ymm8
6189	vpalignr	$12,%ymm4,%ymm4,%ymm4
6190	vpaddd	%ymm5,%ymm1,%ymm1
6191	vpxor	%ymm1,%ymm13,%ymm13
6192	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
6193	vpaddd	%ymm13,%ymm9,%ymm9
6194	vpxor	%ymm9,%ymm5,%ymm5
6195	vpsrld	$20,%ymm5,%ymm3
6196	vpslld	$12,%ymm5,%ymm5
6197	vpxor	%ymm3,%ymm5,%ymm5
6198	vpaddd	%ymm5,%ymm1,%ymm1
6199	vpxor	%ymm1,%ymm13,%ymm13
6200	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
6201	vpaddd	%ymm13,%ymm9,%ymm9
6202	vpxor	%ymm9,%ymm5,%ymm5
6203	vpslld	$7,%ymm5,%ymm3
6204	vpsrld	$25,%ymm5,%ymm5
6205	vpxor	%ymm3,%ymm5,%ymm5
6206	vpalignr	$4,%ymm13,%ymm13,%ymm13
6207	vpalignr	$8,%ymm9,%ymm9,%ymm9
6208	vpalignr	$12,%ymm5,%ymm5,%ymm5
6209	vpaddd	%ymm6,%ymm2,%ymm2
6210	vpxor	%ymm2,%ymm14,%ymm14
6211	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
6212	vpaddd	%ymm14,%ymm10,%ymm10
6213	vpxor	%ymm10,%ymm6,%ymm6
6214	vpsrld	$20,%ymm6,%ymm3
6215	vpslld	$12,%ymm6,%ymm6
6216	vpxor	%ymm3,%ymm6,%ymm6
6217	vpaddd	%ymm6,%ymm2,%ymm2
6218	vpxor	%ymm2,%ymm14,%ymm14
6219	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
6220	vpaddd	%ymm14,%ymm10,%ymm10
6221	vpxor	%ymm10,%ymm6,%ymm6
6222	vpslld	$7,%ymm6,%ymm3
6223	vpsrld	$25,%ymm6,%ymm6
6224	vpxor	%ymm3,%ymm6,%ymm6
6225	vpalignr	$4,%ymm14,%ymm14,%ymm14
6226	vpalignr	$8,%ymm10,%ymm10,%ymm10
6227	vpalignr	$12,%ymm6,%ymm6,%ymm6
6228
6229	decq	%r10
6230	jne	.Lopen_avx2_320_rounds
6231	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
6232	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
6233	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
6234	vpaddd	%ymm7,%ymm4,%ymm4
6235	vpaddd	%ymm7,%ymm5,%ymm5
6236	vpaddd	%ymm7,%ymm6,%ymm6
6237	vpaddd	%ymm11,%ymm8,%ymm8
6238	vpaddd	%ymm11,%ymm9,%ymm9
6239	vpaddd	%ymm11,%ymm10,%ymm10
6240	vpaddd	0+160(%rbp),%ymm12,%ymm12
6241	vpaddd	0+192(%rbp),%ymm13,%ymm13
6242	vpaddd	0+224(%rbp),%ymm14,%ymm14
6243	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
6244
6245	vpand	.Lclamp(%rip),%ymm3,%ymm3
6246	vmovdqa	%ymm3,0+0(%rbp)
6247
6248	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
6249	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
6250	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
6251	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
6252	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
6253	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
6254	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
6255	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
6256	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
6257	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
6258	jmp	.Lopen_avx2_short
6259.size	chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2
6260.cfi_endproc
6261
6262
6263.type	chacha20_poly1305_seal_avx2,@function
6264.align	64
6265chacha20_poly1305_seal_avx2:
6266.cfi_startproc
6267
6268
6269.cfi_adjust_cfa_offset	8
6270.cfi_offset	%rbp,-16
6271.cfi_adjust_cfa_offset	8
6272.cfi_offset	%rbx,-24
6273.cfi_adjust_cfa_offset	8
6274.cfi_offset	%r12,-32
6275.cfi_adjust_cfa_offset	8
6276.cfi_offset	%r13,-40
6277.cfi_adjust_cfa_offset	8
6278.cfi_offset	%r14,-48
6279.cfi_adjust_cfa_offset	8
6280.cfi_offset	%r15,-56
6281.cfi_adjust_cfa_offset	8
6282.cfi_offset	%r9,-64
6283.cfi_adjust_cfa_offset	288 + 32
6284
6285	vzeroupper
6286	vmovdqa	.Lchacha20_consts(%rip),%ymm0
6287	vbroadcasti128	0(%r9),%ymm4
6288	vbroadcasti128	16(%r9),%ymm8
6289	vbroadcasti128	32(%r9),%ymm12
6290	vpaddd	.Lavx2_init(%rip),%ymm12,%ymm12
6291	cmpq	$192,%rbx
6292	jbe	.Lseal_avx2_192
6293	cmpq	$320,%rbx
6294	jbe	.Lseal_avx2_320
6295	vmovdqa	%ymm0,%ymm1
6296	vmovdqa	%ymm0,%ymm2
6297	vmovdqa	%ymm0,%ymm3
6298	vmovdqa	%ymm4,%ymm5
6299	vmovdqa	%ymm4,%ymm6
6300	vmovdqa	%ymm4,%ymm7
6301	vmovdqa	%ymm4,0+64(%rbp)
6302	vmovdqa	%ymm8,%ymm9
6303	vmovdqa	%ymm8,%ymm10
6304	vmovdqa	%ymm8,%ymm11
6305	vmovdqa	%ymm8,0+96(%rbp)
6306	vmovdqa	%ymm12,%ymm15
6307	vpaddd	.Lavx2_inc(%rip),%ymm15,%ymm14
6308	vpaddd	.Lavx2_inc(%rip),%ymm14,%ymm13
6309	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm12
6310	vmovdqa	%ymm12,0+160(%rbp)
6311	vmovdqa	%ymm13,0+192(%rbp)
6312	vmovdqa	%ymm14,0+224(%rbp)
6313	vmovdqa	%ymm15,0+256(%rbp)
6314	movq	$10,%r10
6315.Lseal_avx2_init_rounds:
6316	vmovdqa	%ymm8,0+128(%rbp)
6317	vmovdqa	.Lrol16(%rip),%ymm8
6318	vpaddd	%ymm7,%ymm3,%ymm3
6319	vpaddd	%ymm6,%ymm2,%ymm2
6320	vpaddd	%ymm5,%ymm1,%ymm1
6321	vpaddd	%ymm4,%ymm0,%ymm0
6322	vpxor	%ymm3,%ymm15,%ymm15
6323	vpxor	%ymm2,%ymm14,%ymm14
6324	vpxor	%ymm1,%ymm13,%ymm13
6325	vpxor	%ymm0,%ymm12,%ymm12
6326	vpshufb	%ymm8,%ymm15,%ymm15
6327	vpshufb	%ymm8,%ymm14,%ymm14
6328	vpshufb	%ymm8,%ymm13,%ymm13
6329	vpshufb	%ymm8,%ymm12,%ymm12
6330	vpaddd	%ymm15,%ymm11,%ymm11
6331	vpaddd	%ymm14,%ymm10,%ymm10
6332	vpaddd	%ymm13,%ymm9,%ymm9
6333	vpaddd	0+128(%rbp),%ymm12,%ymm8
6334	vpxor	%ymm11,%ymm7,%ymm7
6335	vpxor	%ymm10,%ymm6,%ymm6
6336	vpxor	%ymm9,%ymm5,%ymm5
6337	vpxor	%ymm8,%ymm4,%ymm4
6338	vmovdqa	%ymm8,0+128(%rbp)
6339	vpsrld	$20,%ymm7,%ymm8
6340	vpslld	$32-20,%ymm7,%ymm7
6341	vpxor	%ymm8,%ymm7,%ymm7
6342	vpsrld	$20,%ymm6,%ymm8
6343	vpslld	$32-20,%ymm6,%ymm6
6344	vpxor	%ymm8,%ymm6,%ymm6
6345	vpsrld	$20,%ymm5,%ymm8
6346	vpslld	$32-20,%ymm5,%ymm5
6347	vpxor	%ymm8,%ymm5,%ymm5
6348	vpsrld	$20,%ymm4,%ymm8
6349	vpslld	$32-20,%ymm4,%ymm4
6350	vpxor	%ymm8,%ymm4,%ymm4
6351	vmovdqa	.Lrol8(%rip),%ymm8
6352	vpaddd	%ymm7,%ymm3,%ymm3
6353	vpaddd	%ymm6,%ymm2,%ymm2
6354	vpaddd	%ymm5,%ymm1,%ymm1
6355	vpaddd	%ymm4,%ymm0,%ymm0
6356	vpxor	%ymm3,%ymm15,%ymm15
6357	vpxor	%ymm2,%ymm14,%ymm14
6358	vpxor	%ymm1,%ymm13,%ymm13
6359	vpxor	%ymm0,%ymm12,%ymm12
6360	vpshufb	%ymm8,%ymm15,%ymm15
6361	vpshufb	%ymm8,%ymm14,%ymm14
6362	vpshufb	%ymm8,%ymm13,%ymm13
6363	vpshufb	%ymm8,%ymm12,%ymm12
6364	vpaddd	%ymm15,%ymm11,%ymm11
6365	vpaddd	%ymm14,%ymm10,%ymm10
6366	vpaddd	%ymm13,%ymm9,%ymm9
6367	vpaddd	0+128(%rbp),%ymm12,%ymm8
6368	vpxor	%ymm11,%ymm7,%ymm7
6369	vpxor	%ymm10,%ymm6,%ymm6
6370	vpxor	%ymm9,%ymm5,%ymm5
6371	vpxor	%ymm8,%ymm4,%ymm4
6372	vmovdqa	%ymm8,0+128(%rbp)
6373	vpsrld	$25,%ymm7,%ymm8
6374	vpslld	$32-25,%ymm7,%ymm7
6375	vpxor	%ymm8,%ymm7,%ymm7
6376	vpsrld	$25,%ymm6,%ymm8
6377	vpslld	$32-25,%ymm6,%ymm6
6378	vpxor	%ymm8,%ymm6,%ymm6
6379	vpsrld	$25,%ymm5,%ymm8
6380	vpslld	$32-25,%ymm5,%ymm5
6381	vpxor	%ymm8,%ymm5,%ymm5
6382	vpsrld	$25,%ymm4,%ymm8
6383	vpslld	$32-25,%ymm4,%ymm4
6384	vpxor	%ymm8,%ymm4,%ymm4
6385	vmovdqa	0+128(%rbp),%ymm8
6386	vpalignr	$4,%ymm7,%ymm7,%ymm7
6387	vpalignr	$8,%ymm11,%ymm11,%ymm11
6388	vpalignr	$12,%ymm15,%ymm15,%ymm15
6389	vpalignr	$4,%ymm6,%ymm6,%ymm6
6390	vpalignr	$8,%ymm10,%ymm10,%ymm10
6391	vpalignr	$12,%ymm14,%ymm14,%ymm14
6392	vpalignr	$4,%ymm5,%ymm5,%ymm5
6393	vpalignr	$8,%ymm9,%ymm9,%ymm9
6394	vpalignr	$12,%ymm13,%ymm13,%ymm13
6395	vpalignr	$4,%ymm4,%ymm4,%ymm4
6396	vpalignr	$8,%ymm8,%ymm8,%ymm8
6397	vpalignr	$12,%ymm12,%ymm12,%ymm12
6398	vmovdqa	%ymm8,0+128(%rbp)
6399	vmovdqa	.Lrol16(%rip),%ymm8
6400	vpaddd	%ymm7,%ymm3,%ymm3
6401	vpaddd	%ymm6,%ymm2,%ymm2
6402	vpaddd	%ymm5,%ymm1,%ymm1
6403	vpaddd	%ymm4,%ymm0,%ymm0
6404	vpxor	%ymm3,%ymm15,%ymm15
6405	vpxor	%ymm2,%ymm14,%ymm14
6406	vpxor	%ymm1,%ymm13,%ymm13
6407	vpxor	%ymm0,%ymm12,%ymm12
6408	vpshufb	%ymm8,%ymm15,%ymm15
6409	vpshufb	%ymm8,%ymm14,%ymm14
6410	vpshufb	%ymm8,%ymm13,%ymm13
6411	vpshufb	%ymm8,%ymm12,%ymm12
6412	vpaddd	%ymm15,%ymm11,%ymm11
6413	vpaddd	%ymm14,%ymm10,%ymm10
6414	vpaddd	%ymm13,%ymm9,%ymm9
6415	vpaddd	0+128(%rbp),%ymm12,%ymm8
6416	vpxor	%ymm11,%ymm7,%ymm7
6417	vpxor	%ymm10,%ymm6,%ymm6
6418	vpxor	%ymm9,%ymm5,%ymm5
6419	vpxor	%ymm8,%ymm4,%ymm4
6420	vmovdqa	%ymm8,0+128(%rbp)
6421	vpsrld	$20,%ymm7,%ymm8
6422	vpslld	$32-20,%ymm7,%ymm7
6423	vpxor	%ymm8,%ymm7,%ymm7
6424	vpsrld	$20,%ymm6,%ymm8
6425	vpslld	$32-20,%ymm6,%ymm6
6426	vpxor	%ymm8,%ymm6,%ymm6
6427	vpsrld	$20,%ymm5,%ymm8
6428	vpslld	$32-20,%ymm5,%ymm5
6429	vpxor	%ymm8,%ymm5,%ymm5
6430	vpsrld	$20,%ymm4,%ymm8
6431	vpslld	$32-20,%ymm4,%ymm4
6432	vpxor	%ymm8,%ymm4,%ymm4
6433	vmovdqa	.Lrol8(%rip),%ymm8
6434	vpaddd	%ymm7,%ymm3,%ymm3
6435	vpaddd	%ymm6,%ymm2,%ymm2
6436	vpaddd	%ymm5,%ymm1,%ymm1
6437	vpaddd	%ymm4,%ymm0,%ymm0
6438	vpxor	%ymm3,%ymm15,%ymm15
6439	vpxor	%ymm2,%ymm14,%ymm14
6440	vpxor	%ymm1,%ymm13,%ymm13
6441	vpxor	%ymm0,%ymm12,%ymm12
6442	vpshufb	%ymm8,%ymm15,%ymm15
6443	vpshufb	%ymm8,%ymm14,%ymm14
6444	vpshufb	%ymm8,%ymm13,%ymm13
6445	vpshufb	%ymm8,%ymm12,%ymm12
6446	vpaddd	%ymm15,%ymm11,%ymm11
6447	vpaddd	%ymm14,%ymm10,%ymm10
6448	vpaddd	%ymm13,%ymm9,%ymm9
6449	vpaddd	0+128(%rbp),%ymm12,%ymm8
6450	vpxor	%ymm11,%ymm7,%ymm7
6451	vpxor	%ymm10,%ymm6,%ymm6
6452	vpxor	%ymm9,%ymm5,%ymm5
6453	vpxor	%ymm8,%ymm4,%ymm4
6454	vmovdqa	%ymm8,0+128(%rbp)
6455	vpsrld	$25,%ymm7,%ymm8
6456	vpslld	$32-25,%ymm7,%ymm7
6457	vpxor	%ymm8,%ymm7,%ymm7
6458	vpsrld	$25,%ymm6,%ymm8
6459	vpslld	$32-25,%ymm6,%ymm6
6460	vpxor	%ymm8,%ymm6,%ymm6
6461	vpsrld	$25,%ymm5,%ymm8
6462	vpslld	$32-25,%ymm5,%ymm5
6463	vpxor	%ymm8,%ymm5,%ymm5
6464	vpsrld	$25,%ymm4,%ymm8
6465	vpslld	$32-25,%ymm4,%ymm4
6466	vpxor	%ymm8,%ymm4,%ymm4
6467	vmovdqa	0+128(%rbp),%ymm8
6468	vpalignr	$12,%ymm7,%ymm7,%ymm7
6469	vpalignr	$8,%ymm11,%ymm11,%ymm11
6470	vpalignr	$4,%ymm15,%ymm15,%ymm15
6471	vpalignr	$12,%ymm6,%ymm6,%ymm6
6472	vpalignr	$8,%ymm10,%ymm10,%ymm10
6473	vpalignr	$4,%ymm14,%ymm14,%ymm14
6474	vpalignr	$12,%ymm5,%ymm5,%ymm5
6475	vpalignr	$8,%ymm9,%ymm9,%ymm9
6476	vpalignr	$4,%ymm13,%ymm13,%ymm13
6477	vpalignr	$12,%ymm4,%ymm4,%ymm4
6478	vpalignr	$8,%ymm8,%ymm8,%ymm8
6479	vpalignr	$4,%ymm12,%ymm12,%ymm12
6480
6481	decq	%r10
6482	jnz	.Lseal_avx2_init_rounds
6483	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
6484	vpaddd	0+64(%rbp),%ymm7,%ymm7
6485	vpaddd	0+96(%rbp),%ymm11,%ymm11
6486	vpaddd	0+256(%rbp),%ymm15,%ymm15
6487	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
6488	vpaddd	0+64(%rbp),%ymm6,%ymm6
6489	vpaddd	0+96(%rbp),%ymm10,%ymm10
6490	vpaddd	0+224(%rbp),%ymm14,%ymm14
6491	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
6492	vpaddd	0+64(%rbp),%ymm5,%ymm5
6493	vpaddd	0+96(%rbp),%ymm9,%ymm9
6494	vpaddd	0+192(%rbp),%ymm13,%ymm13
6495	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
6496	vpaddd	0+64(%rbp),%ymm4,%ymm4
6497	vpaddd	0+96(%rbp),%ymm8,%ymm8
6498	vpaddd	0+160(%rbp),%ymm12,%ymm12
6499
6500	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
6501	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
6502	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
6503	vpand	.Lclamp(%rip),%ymm15,%ymm15
6504	vmovdqa	%ymm15,0+0(%rbp)
6505	movq	%r8,%r8
6506	call	poly_hash_ad_internal
6507
6508	vpxor	0(%rsi),%ymm3,%ymm3
6509	vpxor	32(%rsi),%ymm11,%ymm11
6510	vmovdqu	%ymm3,0(%rdi)
6511	vmovdqu	%ymm11,32(%rdi)
6512	vperm2i128	$0x02,%ymm2,%ymm6,%ymm15
6513	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
6514	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
6515	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
6516	vpxor	0+64(%rsi),%ymm15,%ymm15
6517	vpxor	32+64(%rsi),%ymm2,%ymm2
6518	vpxor	64+64(%rsi),%ymm6,%ymm6
6519	vpxor	96+64(%rsi),%ymm10,%ymm10
6520	vmovdqu	%ymm15,0+64(%rdi)
6521	vmovdqu	%ymm2,32+64(%rdi)
6522	vmovdqu	%ymm6,64+64(%rdi)
6523	vmovdqu	%ymm10,96+64(%rdi)
6524	vperm2i128	$0x02,%ymm1,%ymm5,%ymm15
6525	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
6526	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
6527	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
6528	vpxor	0+192(%rsi),%ymm15,%ymm15
6529	vpxor	32+192(%rsi),%ymm1,%ymm1
6530	vpxor	64+192(%rsi),%ymm5,%ymm5
6531	vpxor	96+192(%rsi),%ymm9,%ymm9
6532	vmovdqu	%ymm15,0+192(%rdi)
6533	vmovdqu	%ymm1,32+192(%rdi)
6534	vmovdqu	%ymm5,64+192(%rdi)
6535	vmovdqu	%ymm9,96+192(%rdi)
6536	vperm2i128	$0x13,%ymm0,%ymm4,%ymm15
6537	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
6538	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
6539	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
6540	vmovdqa	%ymm15,%ymm8
6541
6542	leaq	320(%rsi),%rsi
6543	subq	$320,%rbx
6544	movq	$320,%rcx
6545	cmpq	$128,%rbx
6546	jbe	.Lseal_avx2_short_hash_remainder
6547	vpxor	0(%rsi),%ymm0,%ymm0
6548	vpxor	32(%rsi),%ymm4,%ymm4
6549	vpxor	64(%rsi),%ymm8,%ymm8
6550	vpxor	96(%rsi),%ymm12,%ymm12
6551	vmovdqu	%ymm0,320(%rdi)
6552	vmovdqu	%ymm4,352(%rdi)
6553	vmovdqu	%ymm8,384(%rdi)
6554	vmovdqu	%ymm12,416(%rdi)
6555	leaq	128(%rsi),%rsi
6556	subq	$128,%rbx
6557	movq	$8,%rcx
6558	movq	$2,%r8
6559	cmpq	$128,%rbx
6560	jbe	.Lseal_avx2_tail_128
6561	cmpq	$256,%rbx
6562	jbe	.Lseal_avx2_tail_256
6563	cmpq	$384,%rbx
6564	jbe	.Lseal_avx2_tail_384
6565	cmpq	$512,%rbx
6566	jbe	.Lseal_avx2_tail_512
6567	vmovdqa	.Lchacha20_consts(%rip),%ymm0
6568	vmovdqa	0+64(%rbp),%ymm4
6569	vmovdqa	0+96(%rbp),%ymm8
6570	vmovdqa	%ymm0,%ymm1
6571	vmovdqa	%ymm4,%ymm5
6572	vmovdqa	%ymm8,%ymm9
6573	vmovdqa	%ymm0,%ymm2
6574	vmovdqa	%ymm4,%ymm6
6575	vmovdqa	%ymm8,%ymm10
6576	vmovdqa	%ymm0,%ymm3
6577	vmovdqa	%ymm4,%ymm7
6578	vmovdqa	%ymm8,%ymm11
6579	vmovdqa	.Lavx2_inc(%rip),%ymm12
6580	vpaddd	0+160(%rbp),%ymm12,%ymm15
6581	vpaddd	%ymm15,%ymm12,%ymm14
6582	vpaddd	%ymm14,%ymm12,%ymm13
6583	vpaddd	%ymm13,%ymm12,%ymm12
6584	vmovdqa	%ymm15,0+256(%rbp)
6585	vmovdqa	%ymm14,0+224(%rbp)
6586	vmovdqa	%ymm13,0+192(%rbp)
6587	vmovdqa	%ymm12,0+160(%rbp)
6588	vmovdqa	%ymm8,0+128(%rbp)
6589	vmovdqa	.Lrol16(%rip),%ymm8
6590	vpaddd	%ymm7,%ymm3,%ymm3
6591	vpaddd	%ymm6,%ymm2,%ymm2
6592	vpaddd	%ymm5,%ymm1,%ymm1
6593	vpaddd	%ymm4,%ymm0,%ymm0
6594	vpxor	%ymm3,%ymm15,%ymm15
6595	vpxor	%ymm2,%ymm14,%ymm14
6596	vpxor	%ymm1,%ymm13,%ymm13
6597	vpxor	%ymm0,%ymm12,%ymm12
6598	vpshufb	%ymm8,%ymm15,%ymm15
6599	vpshufb	%ymm8,%ymm14,%ymm14
6600	vpshufb	%ymm8,%ymm13,%ymm13
6601	vpshufb	%ymm8,%ymm12,%ymm12
6602	vpaddd	%ymm15,%ymm11,%ymm11
6603	vpaddd	%ymm14,%ymm10,%ymm10
6604	vpaddd	%ymm13,%ymm9,%ymm9
6605	vpaddd	0+128(%rbp),%ymm12,%ymm8
6606	vpxor	%ymm11,%ymm7,%ymm7
6607	vpxor	%ymm10,%ymm6,%ymm6
6608	vpxor	%ymm9,%ymm5,%ymm5
6609	vpxor	%ymm8,%ymm4,%ymm4
6610	vmovdqa	%ymm8,0+128(%rbp)
6611	vpsrld	$20,%ymm7,%ymm8
6612	vpslld	$32-20,%ymm7,%ymm7
6613	vpxor	%ymm8,%ymm7,%ymm7
6614	vpsrld	$20,%ymm6,%ymm8
6615	vpslld	$32-20,%ymm6,%ymm6
6616	vpxor	%ymm8,%ymm6,%ymm6
6617	vpsrld	$20,%ymm5,%ymm8
6618	vpslld	$32-20,%ymm5,%ymm5
6619	vpxor	%ymm8,%ymm5,%ymm5
6620	vpsrld	$20,%ymm4,%ymm8
6621	vpslld	$32-20,%ymm4,%ymm4
6622	vpxor	%ymm8,%ymm4,%ymm4
6623	vmovdqa	.Lrol8(%rip),%ymm8
6624	vpaddd	%ymm7,%ymm3,%ymm3
6625	vpaddd	%ymm6,%ymm2,%ymm2
6626	vpaddd	%ymm5,%ymm1,%ymm1
6627	vpaddd	%ymm4,%ymm0,%ymm0
6628	vpxor	%ymm3,%ymm15,%ymm15
6629	vpxor	%ymm2,%ymm14,%ymm14
6630	vpxor	%ymm1,%ymm13,%ymm13
6631	vpxor	%ymm0,%ymm12,%ymm12
6632	vpshufb	%ymm8,%ymm15,%ymm15
6633	vpshufb	%ymm8,%ymm14,%ymm14
6634	vpshufb	%ymm8,%ymm13,%ymm13
6635	vpshufb	%ymm8,%ymm12,%ymm12
6636	vpaddd	%ymm15,%ymm11,%ymm11
6637	vpaddd	%ymm14,%ymm10,%ymm10
6638	vpaddd	%ymm13,%ymm9,%ymm9
6639	vpaddd	0+128(%rbp),%ymm12,%ymm8
6640	vpxor	%ymm11,%ymm7,%ymm7
6641	vpxor	%ymm10,%ymm6,%ymm6
6642	vpxor	%ymm9,%ymm5,%ymm5
6643	vpxor	%ymm8,%ymm4,%ymm4
6644	vmovdqa	%ymm8,0+128(%rbp)
6645	vpsrld	$25,%ymm7,%ymm8
6646	vpslld	$32-25,%ymm7,%ymm7
6647	vpxor	%ymm8,%ymm7,%ymm7
6648	vpsrld	$25,%ymm6,%ymm8
6649	vpslld	$32-25,%ymm6,%ymm6
6650	vpxor	%ymm8,%ymm6,%ymm6
6651	vpsrld	$25,%ymm5,%ymm8
6652	vpslld	$32-25,%ymm5,%ymm5
6653	vpxor	%ymm8,%ymm5,%ymm5
6654	vpsrld	$25,%ymm4,%ymm8
6655	vpslld	$32-25,%ymm4,%ymm4
6656	vpxor	%ymm8,%ymm4,%ymm4
6657	vmovdqa	0+128(%rbp),%ymm8
6658	vpalignr	$4,%ymm7,%ymm7,%ymm7
6659	vpalignr	$8,%ymm11,%ymm11,%ymm11
6660	vpalignr	$12,%ymm15,%ymm15,%ymm15
6661	vpalignr	$4,%ymm6,%ymm6,%ymm6
6662	vpalignr	$8,%ymm10,%ymm10,%ymm10
6663	vpalignr	$12,%ymm14,%ymm14,%ymm14
6664	vpalignr	$4,%ymm5,%ymm5,%ymm5
6665	vpalignr	$8,%ymm9,%ymm9,%ymm9
6666	vpalignr	$12,%ymm13,%ymm13,%ymm13
6667	vpalignr	$4,%ymm4,%ymm4,%ymm4
6668	vpalignr	$8,%ymm8,%ymm8,%ymm8
6669	vpalignr	$12,%ymm12,%ymm12,%ymm12
6670	vmovdqa	%ymm8,0+128(%rbp)
6671	vmovdqa	.Lrol16(%rip),%ymm8
6672	vpaddd	%ymm7,%ymm3,%ymm3
6673	vpaddd	%ymm6,%ymm2,%ymm2
6674	vpaddd	%ymm5,%ymm1,%ymm1
6675	vpaddd	%ymm4,%ymm0,%ymm0
6676	vpxor	%ymm3,%ymm15,%ymm15
6677	vpxor	%ymm2,%ymm14,%ymm14
6678	vpxor	%ymm1,%ymm13,%ymm13
6679	vpxor	%ymm0,%ymm12,%ymm12
6680	vpshufb	%ymm8,%ymm15,%ymm15
6681	vpshufb	%ymm8,%ymm14,%ymm14
6682	vpshufb	%ymm8,%ymm13,%ymm13
6683	vpshufb	%ymm8,%ymm12,%ymm12
6684	vpaddd	%ymm15,%ymm11,%ymm11
6685	vpaddd	%ymm14,%ymm10,%ymm10
6686	vpaddd	%ymm13,%ymm9,%ymm9
6687	vpaddd	0+128(%rbp),%ymm12,%ymm8
6688	vpxor	%ymm11,%ymm7,%ymm7
6689	vpxor	%ymm10,%ymm6,%ymm6
6690	vpxor	%ymm9,%ymm5,%ymm5
6691	vpxor	%ymm8,%ymm4,%ymm4
6692	vmovdqa	%ymm8,0+128(%rbp)
6693	vpsrld	$20,%ymm7,%ymm8
6694	vpslld	$32-20,%ymm7,%ymm7
6695	vpxor	%ymm8,%ymm7,%ymm7
6696	vpsrld	$20,%ymm6,%ymm8
6697	vpslld	$32-20,%ymm6,%ymm6
6698	vpxor	%ymm8,%ymm6,%ymm6
6699	vpsrld	$20,%ymm5,%ymm8
6700	vpslld	$32-20,%ymm5,%ymm5
6701	vpxor	%ymm8,%ymm5,%ymm5
6702	vpsrld	$20,%ymm4,%ymm8
6703	vpslld	$32-20,%ymm4,%ymm4
6704	vpxor	%ymm8,%ymm4,%ymm4
6705	vmovdqa	.Lrol8(%rip),%ymm8
6706	vpaddd	%ymm7,%ymm3,%ymm3
6707	vpaddd	%ymm6,%ymm2,%ymm2
6708	vpaddd	%ymm5,%ymm1,%ymm1
6709	vpaddd	%ymm4,%ymm0,%ymm0
6710	vpxor	%ymm3,%ymm15,%ymm15
6711	vpxor	%ymm2,%ymm14,%ymm14
6712	vpxor	%ymm1,%ymm13,%ymm13
6713	vpxor	%ymm0,%ymm12,%ymm12
6714	vpshufb	%ymm8,%ymm15,%ymm15
6715	vpshufb	%ymm8,%ymm14,%ymm14
6716	vpshufb	%ymm8,%ymm13,%ymm13
6717	vpshufb	%ymm8,%ymm12,%ymm12
6718	vpaddd	%ymm15,%ymm11,%ymm11
6719	vpaddd	%ymm14,%ymm10,%ymm10
6720	vpaddd	%ymm13,%ymm9,%ymm9
6721	vpaddd	0+128(%rbp),%ymm12,%ymm8
6722	vpxor	%ymm11,%ymm7,%ymm7
6723	vpxor	%ymm10,%ymm6,%ymm6
6724	vpxor	%ymm9,%ymm5,%ymm5
6725	vpxor	%ymm8,%ymm4,%ymm4
6726	vmovdqa	%ymm8,0+128(%rbp)
6727	vpsrld	$25,%ymm7,%ymm8
6728	vpslld	$32-25,%ymm7,%ymm7
6729	vpxor	%ymm8,%ymm7,%ymm7
6730	vpsrld	$25,%ymm6,%ymm8
6731	vpslld	$32-25,%ymm6,%ymm6
6732	vpxor	%ymm8,%ymm6,%ymm6
6733	vpsrld	$25,%ymm5,%ymm8
6734	vpslld	$32-25,%ymm5,%ymm5
6735	vpxor	%ymm8,%ymm5,%ymm5
6736	vpsrld	$25,%ymm4,%ymm8
6737	vpslld	$32-25,%ymm4,%ymm4
6738	vpxor	%ymm8,%ymm4,%ymm4
6739	vmovdqa	0+128(%rbp),%ymm8
6740	vpalignr	$12,%ymm7,%ymm7,%ymm7
6741	vpalignr	$8,%ymm11,%ymm11,%ymm11
6742	vpalignr	$4,%ymm15,%ymm15,%ymm15
6743	vpalignr	$12,%ymm6,%ymm6,%ymm6
6744	vpalignr	$8,%ymm10,%ymm10,%ymm10
6745	vpalignr	$4,%ymm14,%ymm14,%ymm14
6746	vpalignr	$12,%ymm5,%ymm5,%ymm5
6747	vpalignr	$8,%ymm9,%ymm9,%ymm9
6748	vpalignr	$4,%ymm13,%ymm13,%ymm13
6749	vpalignr	$12,%ymm4,%ymm4,%ymm4
6750	vpalignr	$8,%ymm8,%ymm8,%ymm8
6751	vpalignr	$4,%ymm12,%ymm12,%ymm12
6752	vmovdqa	%ymm8,0+128(%rbp)
6753	vmovdqa	.Lrol16(%rip),%ymm8
6754	vpaddd	%ymm7,%ymm3,%ymm3
6755	vpaddd	%ymm6,%ymm2,%ymm2
6756	vpaddd	%ymm5,%ymm1,%ymm1
6757	vpaddd	%ymm4,%ymm0,%ymm0
6758	vpxor	%ymm3,%ymm15,%ymm15
6759	vpxor	%ymm2,%ymm14,%ymm14
6760	vpxor	%ymm1,%ymm13,%ymm13
6761	vpxor	%ymm0,%ymm12,%ymm12
6762	vpshufb	%ymm8,%ymm15,%ymm15
6763	vpshufb	%ymm8,%ymm14,%ymm14
6764	vpshufb	%ymm8,%ymm13,%ymm13
6765	vpshufb	%ymm8,%ymm12,%ymm12
6766	vpaddd	%ymm15,%ymm11,%ymm11
6767	vpaddd	%ymm14,%ymm10,%ymm10
6768	vpaddd	%ymm13,%ymm9,%ymm9
6769	vpaddd	0+128(%rbp),%ymm12,%ymm8
6770	vpxor	%ymm11,%ymm7,%ymm7
6771	vpxor	%ymm10,%ymm6,%ymm6
6772	vpxor	%ymm9,%ymm5,%ymm5
6773	vpxor	%ymm8,%ymm4,%ymm4
6774	vmovdqa	%ymm8,0+128(%rbp)
6775	vpsrld	$20,%ymm7,%ymm8
6776	vpslld	$32-20,%ymm7,%ymm7
6777	vpxor	%ymm8,%ymm7,%ymm7
6778	vpsrld	$20,%ymm6,%ymm8
6779	vpslld	$32-20,%ymm6,%ymm6
6780	vpxor	%ymm8,%ymm6,%ymm6
6781	vpsrld	$20,%ymm5,%ymm8
6782	vpslld	$32-20,%ymm5,%ymm5
6783	vpxor	%ymm8,%ymm5,%ymm5
6784	vpsrld	$20,%ymm4,%ymm8
6785	vpslld	$32-20,%ymm4,%ymm4
6786	vpxor	%ymm8,%ymm4,%ymm4
6787	vmovdqa	.Lrol8(%rip),%ymm8
6788	vpaddd	%ymm7,%ymm3,%ymm3
6789	vpaddd	%ymm6,%ymm2,%ymm2
6790	vpaddd	%ymm5,%ymm1,%ymm1
6791	vpaddd	%ymm4,%ymm0,%ymm0
6792	vpxor	%ymm3,%ymm15,%ymm15
6793
6794	subq	$16,%rdi
6795	movq	$9,%rcx
6796	jmp	.Lseal_avx2_main_loop_rounds_entry
6797.align	32
6798.Lseal_avx2_main_loop:
6799	vmovdqa	.Lchacha20_consts(%rip),%ymm0
6800	vmovdqa	0+64(%rbp),%ymm4
6801	vmovdqa	0+96(%rbp),%ymm8
6802	vmovdqa	%ymm0,%ymm1
6803	vmovdqa	%ymm4,%ymm5
6804	vmovdqa	%ymm8,%ymm9
6805	vmovdqa	%ymm0,%ymm2
6806	vmovdqa	%ymm4,%ymm6
6807	vmovdqa	%ymm8,%ymm10
6808	vmovdqa	%ymm0,%ymm3
6809	vmovdqa	%ymm4,%ymm7
6810	vmovdqa	%ymm8,%ymm11
6811	vmovdqa	.Lavx2_inc(%rip),%ymm12
6812	vpaddd	0+160(%rbp),%ymm12,%ymm15
6813	vpaddd	%ymm15,%ymm12,%ymm14
6814	vpaddd	%ymm14,%ymm12,%ymm13
6815	vpaddd	%ymm13,%ymm12,%ymm12
6816	vmovdqa	%ymm15,0+256(%rbp)
6817	vmovdqa	%ymm14,0+224(%rbp)
6818	vmovdqa	%ymm13,0+192(%rbp)
6819	vmovdqa	%ymm12,0+160(%rbp)
6820
6821	movq	$10,%rcx
6822.align	32
6823.Lseal_avx2_main_loop_rounds:
6824	addq	0+0(%rdi),%r10
6825	adcq	8+0(%rdi),%r11
6826	adcq	$1,%r12
6827	vmovdqa	%ymm8,0+128(%rbp)
6828	vmovdqa	.Lrol16(%rip),%ymm8
6829	vpaddd	%ymm7,%ymm3,%ymm3
6830	vpaddd	%ymm6,%ymm2,%ymm2
6831	vpaddd	%ymm5,%ymm1,%ymm1
6832	vpaddd	%ymm4,%ymm0,%ymm0
6833	vpxor	%ymm3,%ymm15,%ymm15
6834	vpxor	%ymm2,%ymm14,%ymm14
6835	vpxor	%ymm1,%ymm13,%ymm13
6836	vpxor	%ymm0,%ymm12,%ymm12
6837	movq	0+0+0(%rbp),%rdx
6838	movq	%rdx,%r15
6839	mulxq	%r10,%r13,%r14
6840	mulxq	%r11,%rax,%rdx
6841	imulq	%r12,%r15
6842	addq	%rax,%r14
6843	adcq	%rdx,%r15
6844	vpshufb	%ymm8,%ymm15,%ymm15
6845	vpshufb	%ymm8,%ymm14,%ymm14
6846	vpshufb	%ymm8,%ymm13,%ymm13
6847	vpshufb	%ymm8,%ymm12,%ymm12
6848	vpaddd	%ymm15,%ymm11,%ymm11
6849	vpaddd	%ymm14,%ymm10,%ymm10
6850	vpaddd	%ymm13,%ymm9,%ymm9
6851	vpaddd	0+128(%rbp),%ymm12,%ymm8
6852	vpxor	%ymm11,%ymm7,%ymm7
6853	movq	8+0+0(%rbp),%rdx
6854	mulxq	%r10,%r10,%rax
6855	addq	%r10,%r14
6856	mulxq	%r11,%r11,%r9
6857	adcq	%r11,%r15
6858	adcq	$0,%r9
6859	imulq	%r12,%rdx
6860	vpxor	%ymm10,%ymm6,%ymm6
6861	vpxor	%ymm9,%ymm5,%ymm5
6862	vpxor	%ymm8,%ymm4,%ymm4
6863	vmovdqa	%ymm8,0+128(%rbp)
6864	vpsrld	$20,%ymm7,%ymm8
6865	vpslld	$32-20,%ymm7,%ymm7
6866	vpxor	%ymm8,%ymm7,%ymm7
6867	vpsrld	$20,%ymm6,%ymm8
6868	vpslld	$32-20,%ymm6,%ymm6
6869	vpxor	%ymm8,%ymm6,%ymm6
6870	vpsrld	$20,%ymm5,%ymm8
6871	vpslld	$32-20,%ymm5,%ymm5
6872	addq	%rax,%r15
6873	adcq	%rdx,%r9
6874	vpxor	%ymm8,%ymm5,%ymm5
6875	vpsrld	$20,%ymm4,%ymm8
6876	vpslld	$32-20,%ymm4,%ymm4
6877	vpxor	%ymm8,%ymm4,%ymm4
6878	vmovdqa	.Lrol8(%rip),%ymm8
6879	vpaddd	%ymm7,%ymm3,%ymm3
6880	vpaddd	%ymm6,%ymm2,%ymm2
6881	vpaddd	%ymm5,%ymm1,%ymm1
6882	vpaddd	%ymm4,%ymm0,%ymm0
6883	vpxor	%ymm3,%ymm15,%ymm15
6884	movq	%r13,%r10
6885	movq	%r14,%r11
6886	movq	%r15,%r12
6887	andq	$3,%r12
6888	movq	%r15,%r13
6889	andq	$-4,%r13
6890	movq	%r9,%r14
6891	shrdq	$2,%r9,%r15
6892	shrq	$2,%r9
6893	addq	%r13,%r15
6894	adcq	%r14,%r9
6895	addq	%r15,%r10
6896	adcq	%r9,%r11
6897	adcq	$0,%r12
6898
6899.Lseal_avx2_main_loop_rounds_entry:
6900	vpxor	%ymm2,%ymm14,%ymm14
6901	vpxor	%ymm1,%ymm13,%ymm13
6902	vpxor	%ymm0,%ymm12,%ymm12
6903	vpshufb	%ymm8,%ymm15,%ymm15
6904	vpshufb	%ymm8,%ymm14,%ymm14
6905	vpshufb	%ymm8,%ymm13,%ymm13
6906	vpshufb	%ymm8,%ymm12,%ymm12
6907	vpaddd	%ymm15,%ymm11,%ymm11
6908	vpaddd	%ymm14,%ymm10,%ymm10
6909	addq	0+16(%rdi),%r10
6910	adcq	8+16(%rdi),%r11
6911	adcq	$1,%r12
6912	vpaddd	%ymm13,%ymm9,%ymm9
6913	vpaddd	0+128(%rbp),%ymm12,%ymm8
6914	vpxor	%ymm11,%ymm7,%ymm7
6915	vpxor	%ymm10,%ymm6,%ymm6
6916	vpxor	%ymm9,%ymm5,%ymm5
6917	vpxor	%ymm8,%ymm4,%ymm4
6918	vmovdqa	%ymm8,0+128(%rbp)
6919	vpsrld	$25,%ymm7,%ymm8
6920	movq	0+0+0(%rbp),%rdx
6921	movq	%rdx,%r15
6922	mulxq	%r10,%r13,%r14
6923	mulxq	%r11,%rax,%rdx
6924	imulq	%r12,%r15
6925	addq	%rax,%r14
6926	adcq	%rdx,%r15
6927	vpslld	$32-25,%ymm7,%ymm7
6928	vpxor	%ymm8,%ymm7,%ymm7
6929	vpsrld	$25,%ymm6,%ymm8
6930	vpslld	$32-25,%ymm6,%ymm6
6931	vpxor	%ymm8,%ymm6,%ymm6
6932	vpsrld	$25,%ymm5,%ymm8
6933	vpslld	$32-25,%ymm5,%ymm5
6934	vpxor	%ymm8,%ymm5,%ymm5
6935	vpsrld	$25,%ymm4,%ymm8
6936	vpslld	$32-25,%ymm4,%ymm4
6937	vpxor	%ymm8,%ymm4,%ymm4
6938	vmovdqa	0+128(%rbp),%ymm8
6939	vpalignr	$4,%ymm7,%ymm7,%ymm7
6940	vpalignr	$8,%ymm11,%ymm11,%ymm11
6941	vpalignr	$12,%ymm15,%ymm15,%ymm15
6942	vpalignr	$4,%ymm6,%ymm6,%ymm6
6943	vpalignr	$8,%ymm10,%ymm10,%ymm10
6944	vpalignr	$12,%ymm14,%ymm14,%ymm14
6945	movq	8+0+0(%rbp),%rdx
6946	mulxq	%r10,%r10,%rax
6947	addq	%r10,%r14
6948	mulxq	%r11,%r11,%r9
6949	adcq	%r11,%r15
6950	adcq	$0,%r9
6951	imulq	%r12,%rdx
6952	vpalignr	$4,%ymm5,%ymm5,%ymm5
6953	vpalignr	$8,%ymm9,%ymm9,%ymm9
6954	vpalignr	$12,%ymm13,%ymm13,%ymm13
6955	vpalignr	$4,%ymm4,%ymm4,%ymm4
6956	vpalignr	$8,%ymm8,%ymm8,%ymm8
6957	vpalignr	$12,%ymm12,%ymm12,%ymm12
6958	vmovdqa	%ymm8,0+128(%rbp)
6959	vmovdqa	.Lrol16(%rip),%ymm8
6960	vpaddd	%ymm7,%ymm3,%ymm3
6961	vpaddd	%ymm6,%ymm2,%ymm2
6962	vpaddd	%ymm5,%ymm1,%ymm1
6963	vpaddd	%ymm4,%ymm0,%ymm0
6964	vpxor	%ymm3,%ymm15,%ymm15
6965	vpxor	%ymm2,%ymm14,%ymm14
6966	vpxor	%ymm1,%ymm13,%ymm13
6967	vpxor	%ymm0,%ymm12,%ymm12
6968	vpshufb	%ymm8,%ymm15,%ymm15
6969	vpshufb	%ymm8,%ymm14,%ymm14
6970	addq	%rax,%r15
6971	adcq	%rdx,%r9
6972	vpshufb	%ymm8,%ymm13,%ymm13
6973	vpshufb	%ymm8,%ymm12,%ymm12
6974	vpaddd	%ymm15,%ymm11,%ymm11
6975	vpaddd	%ymm14,%ymm10,%ymm10
6976	vpaddd	%ymm13,%ymm9,%ymm9
6977	vpaddd	0+128(%rbp),%ymm12,%ymm8
6978	vpxor	%ymm11,%ymm7,%ymm7
6979	vpxor	%ymm10,%ymm6,%ymm6
6980	vpxor	%ymm9,%ymm5,%ymm5
6981	movq	%r13,%r10
6982	movq	%r14,%r11
6983	movq	%r15,%r12
6984	andq	$3,%r12
6985	movq	%r15,%r13
6986	andq	$-4,%r13
6987	movq	%r9,%r14
6988	shrdq	$2,%r9,%r15
6989	shrq	$2,%r9
6990	addq	%r13,%r15
6991	adcq	%r14,%r9
6992	addq	%r15,%r10
6993	adcq	%r9,%r11
6994	adcq	$0,%r12
6995	vpxor	%ymm8,%ymm4,%ymm4
6996	vmovdqa	%ymm8,0+128(%rbp)
6997	vpsrld	$20,%ymm7,%ymm8
6998	vpslld	$32-20,%ymm7,%ymm7
6999	vpxor	%ymm8,%ymm7,%ymm7
7000	vpsrld	$20,%ymm6,%ymm8
7001	vpslld	$32-20,%ymm6,%ymm6
7002	vpxor	%ymm8,%ymm6,%ymm6
7003	addq	0+32(%rdi),%r10
7004	adcq	8+32(%rdi),%r11
7005	adcq	$1,%r12
7006
7007	leaq	48(%rdi),%rdi
7008	vpsrld	$20,%ymm5,%ymm8
7009	vpslld	$32-20,%ymm5,%ymm5
7010	vpxor	%ymm8,%ymm5,%ymm5
7011	vpsrld	$20,%ymm4,%ymm8
7012	vpslld	$32-20,%ymm4,%ymm4
7013	vpxor	%ymm8,%ymm4,%ymm4
7014	vmovdqa	.Lrol8(%rip),%ymm8
7015	vpaddd	%ymm7,%ymm3,%ymm3
7016	vpaddd	%ymm6,%ymm2,%ymm2
7017	vpaddd	%ymm5,%ymm1,%ymm1
7018	vpaddd	%ymm4,%ymm0,%ymm0
7019	vpxor	%ymm3,%ymm15,%ymm15
7020	vpxor	%ymm2,%ymm14,%ymm14
7021	vpxor	%ymm1,%ymm13,%ymm13
7022	vpxor	%ymm0,%ymm12,%ymm12
7023	vpshufb	%ymm8,%ymm15,%ymm15
7024	vpshufb	%ymm8,%ymm14,%ymm14
7025	vpshufb	%ymm8,%ymm13,%ymm13
7026	movq	0+0+0(%rbp),%rdx
7027	movq	%rdx,%r15
7028	mulxq	%r10,%r13,%r14
7029	mulxq	%r11,%rax,%rdx
7030	imulq	%r12,%r15
7031	addq	%rax,%r14
7032	adcq	%rdx,%r15
7033	vpshufb	%ymm8,%ymm12,%ymm12
7034	vpaddd	%ymm15,%ymm11,%ymm11
7035	vpaddd	%ymm14,%ymm10,%ymm10
7036	vpaddd	%ymm13,%ymm9,%ymm9
7037	vpaddd	0+128(%rbp),%ymm12,%ymm8
7038	vpxor	%ymm11,%ymm7,%ymm7
7039	vpxor	%ymm10,%ymm6,%ymm6
7040	vpxor	%ymm9,%ymm5,%ymm5
7041	movq	8+0+0(%rbp),%rdx
7042	mulxq	%r10,%r10,%rax
7043	addq	%r10,%r14
7044	mulxq	%r11,%r11,%r9
7045	adcq	%r11,%r15
7046	adcq	$0,%r9
7047	imulq	%r12,%rdx
7048	vpxor	%ymm8,%ymm4,%ymm4
7049	vmovdqa	%ymm8,0+128(%rbp)
7050	vpsrld	$25,%ymm7,%ymm8
7051	vpslld	$32-25,%ymm7,%ymm7
7052	vpxor	%ymm8,%ymm7,%ymm7
7053	vpsrld	$25,%ymm6,%ymm8
7054	vpslld	$32-25,%ymm6,%ymm6
7055	vpxor	%ymm8,%ymm6,%ymm6
7056	addq	%rax,%r15
7057	adcq	%rdx,%r9
7058	vpsrld	$25,%ymm5,%ymm8
7059	vpslld	$32-25,%ymm5,%ymm5
7060	vpxor	%ymm8,%ymm5,%ymm5
7061	vpsrld	$25,%ymm4,%ymm8
7062	vpslld	$32-25,%ymm4,%ymm4
7063	vpxor	%ymm8,%ymm4,%ymm4
7064	vmovdqa	0+128(%rbp),%ymm8
7065	vpalignr	$12,%ymm7,%ymm7,%ymm7
7066	vpalignr	$8,%ymm11,%ymm11,%ymm11
7067	vpalignr	$4,%ymm15,%ymm15,%ymm15
7068	vpalignr	$12,%ymm6,%ymm6,%ymm6
7069	vpalignr	$8,%ymm10,%ymm10,%ymm10
7070	vpalignr	$4,%ymm14,%ymm14,%ymm14
7071	vpalignr	$12,%ymm5,%ymm5,%ymm5
7072	vpalignr	$8,%ymm9,%ymm9,%ymm9
7073	vpalignr	$4,%ymm13,%ymm13,%ymm13
7074	vpalignr	$12,%ymm4,%ymm4,%ymm4
7075	vpalignr	$8,%ymm8,%ymm8,%ymm8
7076	movq	%r13,%r10
7077	movq	%r14,%r11
7078	movq	%r15,%r12
7079	andq	$3,%r12
7080	movq	%r15,%r13
7081	andq	$-4,%r13
7082	movq	%r9,%r14
7083	shrdq	$2,%r9,%r15
7084	shrq	$2,%r9
7085	addq	%r13,%r15
7086	adcq	%r14,%r9
7087	addq	%r15,%r10
7088	adcq	%r9,%r11
7089	adcq	$0,%r12
7090	vpalignr	$4,%ymm12,%ymm12,%ymm12
7091
7092	decq	%rcx
7093	jne	.Lseal_avx2_main_loop_rounds
7094	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
7095	vpaddd	0+64(%rbp),%ymm7,%ymm7
7096	vpaddd	0+96(%rbp),%ymm11,%ymm11
7097	vpaddd	0+256(%rbp),%ymm15,%ymm15
7098	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
7099	vpaddd	0+64(%rbp),%ymm6,%ymm6
7100	vpaddd	0+96(%rbp),%ymm10,%ymm10
7101	vpaddd	0+224(%rbp),%ymm14,%ymm14
7102	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
7103	vpaddd	0+64(%rbp),%ymm5,%ymm5
7104	vpaddd	0+96(%rbp),%ymm9,%ymm9
7105	vpaddd	0+192(%rbp),%ymm13,%ymm13
7106	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
7107	vpaddd	0+64(%rbp),%ymm4,%ymm4
7108	vpaddd	0+96(%rbp),%ymm8,%ymm8
7109	vpaddd	0+160(%rbp),%ymm12,%ymm12
7110
7111	vmovdqa	%ymm0,0+128(%rbp)
7112	addq	0+0(%rdi),%r10
7113	adcq	8+0(%rdi),%r11
7114	adcq	$1,%r12
7115	movq	0+0+0(%rbp),%rdx
7116	movq	%rdx,%r15
7117	mulxq	%r10,%r13,%r14
7118	mulxq	%r11,%rax,%rdx
7119	imulq	%r12,%r15
7120	addq	%rax,%r14
7121	adcq	%rdx,%r15
7122	movq	8+0+0(%rbp),%rdx
7123	mulxq	%r10,%r10,%rax
7124	addq	%r10,%r14
7125	mulxq	%r11,%r11,%r9
7126	adcq	%r11,%r15
7127	adcq	$0,%r9
7128	imulq	%r12,%rdx
7129	addq	%rax,%r15
7130	adcq	%rdx,%r9
7131	movq	%r13,%r10
7132	movq	%r14,%r11
7133	movq	%r15,%r12
7134	andq	$3,%r12
7135	movq	%r15,%r13
7136	andq	$-4,%r13
7137	movq	%r9,%r14
7138	shrdq	$2,%r9,%r15
7139	shrq	$2,%r9
7140	addq	%r13,%r15
7141	adcq	%r14,%r9
7142	addq	%r15,%r10
7143	adcq	%r9,%r11
7144	adcq	$0,%r12
7145	addq	0+16(%rdi),%r10
7146	adcq	8+16(%rdi),%r11
7147	adcq	$1,%r12
7148	movq	0+0+0(%rbp),%rdx
7149	movq	%rdx,%r15
7150	mulxq	%r10,%r13,%r14
7151	mulxq	%r11,%rax,%rdx
7152	imulq	%r12,%r15
7153	addq	%rax,%r14
7154	adcq	%rdx,%r15
7155	movq	8+0+0(%rbp),%rdx
7156	mulxq	%r10,%r10,%rax
7157	addq	%r10,%r14
7158	mulxq	%r11,%r11,%r9
7159	adcq	%r11,%r15
7160	adcq	$0,%r9
7161	imulq	%r12,%rdx
7162	addq	%rax,%r15
7163	adcq	%rdx,%r9
7164	movq	%r13,%r10
7165	movq	%r14,%r11
7166	movq	%r15,%r12
7167	andq	$3,%r12
7168	movq	%r15,%r13
7169	andq	$-4,%r13
7170	movq	%r9,%r14
7171	shrdq	$2,%r9,%r15
7172	shrq	$2,%r9
7173	addq	%r13,%r15
7174	adcq	%r14,%r9
7175	addq	%r15,%r10
7176	adcq	%r9,%r11
7177	adcq	$0,%r12
7178
7179	leaq	32(%rdi),%rdi
7180	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
7181	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
7182	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
7183	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
7184	vpxor	0+0(%rsi),%ymm0,%ymm0
7185	vpxor	32+0(%rsi),%ymm3,%ymm3
7186	vpxor	64+0(%rsi),%ymm7,%ymm7
7187	vpxor	96+0(%rsi),%ymm11,%ymm11
7188	vmovdqu	%ymm0,0+0(%rdi)
7189	vmovdqu	%ymm3,32+0(%rdi)
7190	vmovdqu	%ymm7,64+0(%rdi)
7191	vmovdqu	%ymm11,96+0(%rdi)
7192
7193	vmovdqa	0+128(%rbp),%ymm0
7194	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
7195	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
7196	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
7197	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
7198	vpxor	0+128(%rsi),%ymm3,%ymm3
7199	vpxor	32+128(%rsi),%ymm2,%ymm2
7200	vpxor	64+128(%rsi),%ymm6,%ymm6
7201	vpxor	96+128(%rsi),%ymm10,%ymm10
7202	vmovdqu	%ymm3,0+128(%rdi)
7203	vmovdqu	%ymm2,32+128(%rdi)
7204	vmovdqu	%ymm6,64+128(%rdi)
7205	vmovdqu	%ymm10,96+128(%rdi)
7206	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7207	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7208	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7209	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7210	vpxor	0+256(%rsi),%ymm3,%ymm3
7211	vpxor	32+256(%rsi),%ymm1,%ymm1
7212	vpxor	64+256(%rsi),%ymm5,%ymm5
7213	vpxor	96+256(%rsi),%ymm9,%ymm9
7214	vmovdqu	%ymm3,0+256(%rdi)
7215	vmovdqu	%ymm1,32+256(%rdi)
7216	vmovdqu	%ymm5,64+256(%rdi)
7217	vmovdqu	%ymm9,96+256(%rdi)
7218	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
7219	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
7220	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
7221	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
7222	vpxor	0+384(%rsi),%ymm3,%ymm3
7223	vpxor	32+384(%rsi),%ymm0,%ymm0
7224	vpxor	64+384(%rsi),%ymm4,%ymm4
7225	vpxor	96+384(%rsi),%ymm8,%ymm8
7226	vmovdqu	%ymm3,0+384(%rdi)
7227	vmovdqu	%ymm0,32+384(%rdi)
7228	vmovdqu	%ymm4,64+384(%rdi)
7229	vmovdqu	%ymm8,96+384(%rdi)
7230
7231	leaq	512(%rsi),%rsi
7232	subq	$512,%rbx
7233	cmpq	$512,%rbx
7234	jg	.Lseal_avx2_main_loop
7235
7236	addq	0+0(%rdi),%r10
7237	adcq	8+0(%rdi),%r11
7238	adcq	$1,%r12
7239	movq	0+0+0(%rbp),%rdx
7240	movq	%rdx,%r15
7241	mulxq	%r10,%r13,%r14
7242	mulxq	%r11,%rax,%rdx
7243	imulq	%r12,%r15
7244	addq	%rax,%r14
7245	adcq	%rdx,%r15
7246	movq	8+0+0(%rbp),%rdx
7247	mulxq	%r10,%r10,%rax
7248	addq	%r10,%r14
7249	mulxq	%r11,%r11,%r9
7250	adcq	%r11,%r15
7251	adcq	$0,%r9
7252	imulq	%r12,%rdx
7253	addq	%rax,%r15
7254	adcq	%rdx,%r9
7255	movq	%r13,%r10
7256	movq	%r14,%r11
7257	movq	%r15,%r12
7258	andq	$3,%r12
7259	movq	%r15,%r13
7260	andq	$-4,%r13
7261	movq	%r9,%r14
7262	shrdq	$2,%r9,%r15
7263	shrq	$2,%r9
7264	addq	%r13,%r15
7265	adcq	%r14,%r9
7266	addq	%r15,%r10
7267	adcq	%r9,%r11
7268	adcq	$0,%r12
7269	addq	0+16(%rdi),%r10
7270	adcq	8+16(%rdi),%r11
7271	adcq	$1,%r12
7272	movq	0+0+0(%rbp),%rdx
7273	movq	%rdx,%r15
7274	mulxq	%r10,%r13,%r14
7275	mulxq	%r11,%rax,%rdx
7276	imulq	%r12,%r15
7277	addq	%rax,%r14
7278	adcq	%rdx,%r15
7279	movq	8+0+0(%rbp),%rdx
7280	mulxq	%r10,%r10,%rax
7281	addq	%r10,%r14
7282	mulxq	%r11,%r11,%r9
7283	adcq	%r11,%r15
7284	adcq	$0,%r9
7285	imulq	%r12,%rdx
7286	addq	%rax,%r15
7287	adcq	%rdx,%r9
7288	movq	%r13,%r10
7289	movq	%r14,%r11
7290	movq	%r15,%r12
7291	andq	$3,%r12
7292	movq	%r15,%r13
7293	andq	$-4,%r13
7294	movq	%r9,%r14
7295	shrdq	$2,%r9,%r15
7296	shrq	$2,%r9
7297	addq	%r13,%r15
7298	adcq	%r14,%r9
7299	addq	%r15,%r10
7300	adcq	%r9,%r11
7301	adcq	$0,%r12
7302
7303	leaq	32(%rdi),%rdi
7304	movq	$10,%rcx
7305	xorq	%r8,%r8
7306
7307	cmpq	$384,%rbx
7308	ja	.Lseal_avx2_tail_512
7309	cmpq	$256,%rbx
7310	ja	.Lseal_avx2_tail_384
7311	cmpq	$128,%rbx
7312	ja	.Lseal_avx2_tail_256
7313
7314.Lseal_avx2_tail_128:
7315	vmovdqa	.Lchacha20_consts(%rip),%ymm0
7316	vmovdqa	0+64(%rbp),%ymm4
7317	vmovdqa	0+96(%rbp),%ymm8
7318	vmovdqa	.Lavx2_inc(%rip),%ymm12
7319	vpaddd	0+160(%rbp),%ymm12,%ymm12
7320	vmovdqa	%ymm12,0+160(%rbp)
7321
7322.Lseal_avx2_tail_128_rounds_and_3xhash:
7323	addq	0+0(%rdi),%r10
7324	adcq	8+0(%rdi),%r11
7325	adcq	$1,%r12
7326	movq	0+0+0(%rbp),%rdx
7327	movq	%rdx,%r15
7328	mulxq	%r10,%r13,%r14
7329	mulxq	%r11,%rax,%rdx
7330	imulq	%r12,%r15
7331	addq	%rax,%r14
7332	adcq	%rdx,%r15
7333	movq	8+0+0(%rbp),%rdx
7334	mulxq	%r10,%r10,%rax
7335	addq	%r10,%r14
7336	mulxq	%r11,%r11,%r9
7337	adcq	%r11,%r15
7338	adcq	$0,%r9
7339	imulq	%r12,%rdx
7340	addq	%rax,%r15
7341	adcq	%rdx,%r9
7342	movq	%r13,%r10
7343	movq	%r14,%r11
7344	movq	%r15,%r12
7345	andq	$3,%r12
7346	movq	%r15,%r13
7347	andq	$-4,%r13
7348	movq	%r9,%r14
7349	shrdq	$2,%r9,%r15
7350	shrq	$2,%r9
7351	addq	%r13,%r15
7352	adcq	%r14,%r9
7353	addq	%r15,%r10
7354	adcq	%r9,%r11
7355	adcq	$0,%r12
7356
7357	leaq	16(%rdi),%rdi
7358.Lseal_avx2_tail_128_rounds_and_2xhash:
7359	vpaddd	%ymm4,%ymm0,%ymm0
7360	vpxor	%ymm0,%ymm12,%ymm12
7361	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7362	vpaddd	%ymm12,%ymm8,%ymm8
7363	vpxor	%ymm8,%ymm4,%ymm4
7364	vpsrld	$20,%ymm4,%ymm3
7365	vpslld	$12,%ymm4,%ymm4
7366	vpxor	%ymm3,%ymm4,%ymm4
7367	vpaddd	%ymm4,%ymm0,%ymm0
7368	vpxor	%ymm0,%ymm12,%ymm12
7369	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7370	vpaddd	%ymm12,%ymm8,%ymm8
7371	vpxor	%ymm8,%ymm4,%ymm4
7372	vpslld	$7,%ymm4,%ymm3
7373	vpsrld	$25,%ymm4,%ymm4
7374	vpxor	%ymm3,%ymm4,%ymm4
7375	vpalignr	$12,%ymm12,%ymm12,%ymm12
7376	vpalignr	$8,%ymm8,%ymm8,%ymm8
7377	vpalignr	$4,%ymm4,%ymm4,%ymm4
7378	addq	0+0(%rdi),%r10
7379	adcq	8+0(%rdi),%r11
7380	adcq	$1,%r12
7381	movq	0+0+0(%rbp),%rdx
7382	movq	%rdx,%r15
7383	mulxq	%r10,%r13,%r14
7384	mulxq	%r11,%rax,%rdx
7385	imulq	%r12,%r15
7386	addq	%rax,%r14
7387	adcq	%rdx,%r15
7388	movq	8+0+0(%rbp),%rdx
7389	mulxq	%r10,%r10,%rax
7390	addq	%r10,%r14
7391	mulxq	%r11,%r11,%r9
7392	adcq	%r11,%r15
7393	adcq	$0,%r9
7394	imulq	%r12,%rdx
7395	addq	%rax,%r15
7396	adcq	%rdx,%r9
7397	movq	%r13,%r10
7398	movq	%r14,%r11
7399	movq	%r15,%r12
7400	andq	$3,%r12
7401	movq	%r15,%r13
7402	andq	$-4,%r13
7403	movq	%r9,%r14
7404	shrdq	$2,%r9,%r15
7405	shrq	$2,%r9
7406	addq	%r13,%r15
7407	adcq	%r14,%r9
7408	addq	%r15,%r10
7409	adcq	%r9,%r11
7410	adcq	$0,%r12
7411	vpaddd	%ymm4,%ymm0,%ymm0
7412	vpxor	%ymm0,%ymm12,%ymm12
7413	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7414	vpaddd	%ymm12,%ymm8,%ymm8
7415	vpxor	%ymm8,%ymm4,%ymm4
7416	vpsrld	$20,%ymm4,%ymm3
7417	vpslld	$12,%ymm4,%ymm4
7418	vpxor	%ymm3,%ymm4,%ymm4
7419	vpaddd	%ymm4,%ymm0,%ymm0
7420	vpxor	%ymm0,%ymm12,%ymm12
7421	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7422	vpaddd	%ymm12,%ymm8,%ymm8
7423	vpxor	%ymm8,%ymm4,%ymm4
7424	vpslld	$7,%ymm4,%ymm3
7425	vpsrld	$25,%ymm4,%ymm4
7426	vpxor	%ymm3,%ymm4,%ymm4
7427	vpalignr	$4,%ymm12,%ymm12,%ymm12
7428	vpalignr	$8,%ymm8,%ymm8,%ymm8
7429	vpalignr	$12,%ymm4,%ymm4,%ymm4
7430	addq	0+16(%rdi),%r10
7431	adcq	8+16(%rdi),%r11
7432	adcq	$1,%r12
7433	movq	0+0+0(%rbp),%rdx
7434	movq	%rdx,%r15
7435	mulxq	%r10,%r13,%r14
7436	mulxq	%r11,%rax,%rdx
7437	imulq	%r12,%r15
7438	addq	%rax,%r14
7439	adcq	%rdx,%r15
7440	movq	8+0+0(%rbp),%rdx
7441	mulxq	%r10,%r10,%rax
7442	addq	%r10,%r14
7443	mulxq	%r11,%r11,%r9
7444	adcq	%r11,%r15
7445	adcq	$0,%r9
7446	imulq	%r12,%rdx
7447	addq	%rax,%r15
7448	adcq	%rdx,%r9
7449	movq	%r13,%r10
7450	movq	%r14,%r11
7451	movq	%r15,%r12
7452	andq	$3,%r12
7453	movq	%r15,%r13
7454	andq	$-4,%r13
7455	movq	%r9,%r14
7456	shrdq	$2,%r9,%r15
7457	shrq	$2,%r9
7458	addq	%r13,%r15
7459	adcq	%r14,%r9
7460	addq	%r15,%r10
7461	adcq	%r9,%r11
7462	adcq	$0,%r12
7463
7464	leaq	32(%rdi),%rdi
7465	decq	%rcx
7466	jg	.Lseal_avx2_tail_128_rounds_and_3xhash
7467	decq	%r8
7468	jge	.Lseal_avx2_tail_128_rounds_and_2xhash
7469	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
7470	vpaddd	0+64(%rbp),%ymm4,%ymm4
7471	vpaddd	0+96(%rbp),%ymm8,%ymm8
7472	vpaddd	0+160(%rbp),%ymm12,%ymm12
7473	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7474	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7475	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7476	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7477	vmovdqa	%ymm3,%ymm8
7478
7479	jmp	.Lseal_avx2_short_loop
7480
7481.Lseal_avx2_tail_256:
7482	vmovdqa	.Lchacha20_consts(%rip),%ymm0
7483	vmovdqa	0+64(%rbp),%ymm4
7484	vmovdqa	0+96(%rbp),%ymm8
7485	vmovdqa	%ymm0,%ymm1
7486	vmovdqa	%ymm4,%ymm5
7487	vmovdqa	%ymm8,%ymm9
7488	vmovdqa	.Lavx2_inc(%rip),%ymm12
7489	vpaddd	0+160(%rbp),%ymm12,%ymm13
7490	vpaddd	%ymm13,%ymm12,%ymm12
7491	vmovdqa	%ymm12,0+160(%rbp)
7492	vmovdqa	%ymm13,0+192(%rbp)
7493
7494.Lseal_avx2_tail_256_rounds_and_3xhash:
7495	addq	0+0(%rdi),%r10
7496	adcq	8+0(%rdi),%r11
7497	adcq	$1,%r12
7498	movq	0+0+0(%rbp),%rax
7499	movq	%rax,%r15
7500	mulq	%r10
7501	movq	%rax,%r13
7502	movq	%rdx,%r14
7503	movq	0+0+0(%rbp),%rax
7504	mulq	%r11
7505	imulq	%r12,%r15
7506	addq	%rax,%r14
7507	adcq	%rdx,%r15
7508	movq	8+0+0(%rbp),%rax
7509	movq	%rax,%r9
7510	mulq	%r10
7511	addq	%rax,%r14
7512	adcq	$0,%rdx
7513	movq	%rdx,%r10
7514	movq	8+0+0(%rbp),%rax
7515	mulq	%r11
7516	addq	%rax,%r15
7517	adcq	$0,%rdx
7518	imulq	%r12,%r9
7519	addq	%r10,%r15
7520	adcq	%rdx,%r9
7521	movq	%r13,%r10
7522	movq	%r14,%r11
7523	movq	%r15,%r12
7524	andq	$3,%r12
7525	movq	%r15,%r13
7526	andq	$-4,%r13
7527	movq	%r9,%r14
7528	shrdq	$2,%r9,%r15
7529	shrq	$2,%r9
7530	addq	%r13,%r15
7531	adcq	%r14,%r9
7532	addq	%r15,%r10
7533	adcq	%r9,%r11
7534	adcq	$0,%r12
7535
7536	leaq	16(%rdi),%rdi
7537.Lseal_avx2_tail_256_rounds_and_2xhash:
7538	vpaddd	%ymm4,%ymm0,%ymm0
7539	vpxor	%ymm0,%ymm12,%ymm12
7540	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7541	vpaddd	%ymm12,%ymm8,%ymm8
7542	vpxor	%ymm8,%ymm4,%ymm4
7543	vpsrld	$20,%ymm4,%ymm3
7544	vpslld	$12,%ymm4,%ymm4
7545	vpxor	%ymm3,%ymm4,%ymm4
7546	vpaddd	%ymm4,%ymm0,%ymm0
7547	vpxor	%ymm0,%ymm12,%ymm12
7548	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7549	vpaddd	%ymm12,%ymm8,%ymm8
7550	vpxor	%ymm8,%ymm4,%ymm4
7551	vpslld	$7,%ymm4,%ymm3
7552	vpsrld	$25,%ymm4,%ymm4
7553	vpxor	%ymm3,%ymm4,%ymm4
7554	vpalignr	$12,%ymm12,%ymm12,%ymm12
7555	vpalignr	$8,%ymm8,%ymm8,%ymm8
7556	vpalignr	$4,%ymm4,%ymm4,%ymm4
7557	vpaddd	%ymm5,%ymm1,%ymm1
7558	vpxor	%ymm1,%ymm13,%ymm13
7559	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
7560	vpaddd	%ymm13,%ymm9,%ymm9
7561	vpxor	%ymm9,%ymm5,%ymm5
7562	vpsrld	$20,%ymm5,%ymm3
7563	vpslld	$12,%ymm5,%ymm5
7564	vpxor	%ymm3,%ymm5,%ymm5
7565	vpaddd	%ymm5,%ymm1,%ymm1
7566	vpxor	%ymm1,%ymm13,%ymm13
7567	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
7568	vpaddd	%ymm13,%ymm9,%ymm9
7569	vpxor	%ymm9,%ymm5,%ymm5
7570	vpslld	$7,%ymm5,%ymm3
7571	vpsrld	$25,%ymm5,%ymm5
7572	vpxor	%ymm3,%ymm5,%ymm5
7573	vpalignr	$12,%ymm13,%ymm13,%ymm13
7574	vpalignr	$8,%ymm9,%ymm9,%ymm9
7575	vpalignr	$4,%ymm5,%ymm5,%ymm5
7576	addq	0+0(%rdi),%r10
7577	adcq	8+0(%rdi),%r11
7578	adcq	$1,%r12
7579	movq	0+0+0(%rbp),%rax
7580	movq	%rax,%r15
7581	mulq	%r10
7582	movq	%rax,%r13
7583	movq	%rdx,%r14
7584	movq	0+0+0(%rbp),%rax
7585	mulq	%r11
7586	imulq	%r12,%r15
7587	addq	%rax,%r14
7588	adcq	%rdx,%r15
7589	movq	8+0+0(%rbp),%rax
7590	movq	%rax,%r9
7591	mulq	%r10
7592	addq	%rax,%r14
7593	adcq	$0,%rdx
7594	movq	%rdx,%r10
7595	movq	8+0+0(%rbp),%rax
7596	mulq	%r11
7597	addq	%rax,%r15
7598	adcq	$0,%rdx
7599	imulq	%r12,%r9
7600	addq	%r10,%r15
7601	adcq	%rdx,%r9
7602	movq	%r13,%r10
7603	movq	%r14,%r11
7604	movq	%r15,%r12
7605	andq	$3,%r12
7606	movq	%r15,%r13
7607	andq	$-4,%r13
7608	movq	%r9,%r14
7609	shrdq	$2,%r9,%r15
7610	shrq	$2,%r9
7611	addq	%r13,%r15
7612	adcq	%r14,%r9
7613	addq	%r15,%r10
7614	adcq	%r9,%r11
7615	adcq	$0,%r12
7616	vpaddd	%ymm4,%ymm0,%ymm0
7617	vpxor	%ymm0,%ymm12,%ymm12
7618	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7619	vpaddd	%ymm12,%ymm8,%ymm8
7620	vpxor	%ymm8,%ymm4,%ymm4
7621	vpsrld	$20,%ymm4,%ymm3
7622	vpslld	$12,%ymm4,%ymm4
7623	vpxor	%ymm3,%ymm4,%ymm4
7624	vpaddd	%ymm4,%ymm0,%ymm0
7625	vpxor	%ymm0,%ymm12,%ymm12
7626	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7627	vpaddd	%ymm12,%ymm8,%ymm8
7628	vpxor	%ymm8,%ymm4,%ymm4
7629	vpslld	$7,%ymm4,%ymm3
7630	vpsrld	$25,%ymm4,%ymm4
7631	vpxor	%ymm3,%ymm4,%ymm4
7632	vpalignr	$4,%ymm12,%ymm12,%ymm12
7633	vpalignr	$8,%ymm8,%ymm8,%ymm8
7634	vpalignr	$12,%ymm4,%ymm4,%ymm4
7635	vpaddd	%ymm5,%ymm1,%ymm1
7636	vpxor	%ymm1,%ymm13,%ymm13
7637	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
7638	vpaddd	%ymm13,%ymm9,%ymm9
7639	vpxor	%ymm9,%ymm5,%ymm5
7640	vpsrld	$20,%ymm5,%ymm3
7641	vpslld	$12,%ymm5,%ymm5
7642	vpxor	%ymm3,%ymm5,%ymm5
7643	vpaddd	%ymm5,%ymm1,%ymm1
7644	vpxor	%ymm1,%ymm13,%ymm13
7645	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
7646	vpaddd	%ymm13,%ymm9,%ymm9
7647	vpxor	%ymm9,%ymm5,%ymm5
7648	vpslld	$7,%ymm5,%ymm3
7649	vpsrld	$25,%ymm5,%ymm5
7650	vpxor	%ymm3,%ymm5,%ymm5
7651	vpalignr	$4,%ymm13,%ymm13,%ymm13
7652	vpalignr	$8,%ymm9,%ymm9,%ymm9
7653	vpalignr	$12,%ymm5,%ymm5,%ymm5
7654	addq	0+16(%rdi),%r10
7655	adcq	8+16(%rdi),%r11
7656	adcq	$1,%r12
7657	movq	0+0+0(%rbp),%rax
7658	movq	%rax,%r15
7659	mulq	%r10
7660	movq	%rax,%r13
7661	movq	%rdx,%r14
7662	movq	0+0+0(%rbp),%rax
7663	mulq	%r11
7664	imulq	%r12,%r15
7665	addq	%rax,%r14
7666	adcq	%rdx,%r15
7667	movq	8+0+0(%rbp),%rax
7668	movq	%rax,%r9
7669	mulq	%r10
7670	addq	%rax,%r14
7671	adcq	$0,%rdx
7672	movq	%rdx,%r10
7673	movq	8+0+0(%rbp),%rax
7674	mulq	%r11
7675	addq	%rax,%r15
7676	adcq	$0,%rdx
7677	imulq	%r12,%r9
7678	addq	%r10,%r15
7679	adcq	%rdx,%r9
7680	movq	%r13,%r10
7681	movq	%r14,%r11
7682	movq	%r15,%r12
7683	andq	$3,%r12
7684	movq	%r15,%r13
7685	andq	$-4,%r13
7686	movq	%r9,%r14
7687	shrdq	$2,%r9,%r15
7688	shrq	$2,%r9
7689	addq	%r13,%r15
7690	adcq	%r14,%r9
7691	addq	%r15,%r10
7692	adcq	%r9,%r11
7693	adcq	$0,%r12
7694
7695	leaq	32(%rdi),%rdi
7696	decq	%rcx
7697	jg	.Lseal_avx2_tail_256_rounds_and_3xhash
7698	decq	%r8
7699	jge	.Lseal_avx2_tail_256_rounds_and_2xhash
7700	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
7701	vpaddd	0+64(%rbp),%ymm5,%ymm5
7702	vpaddd	0+96(%rbp),%ymm9,%ymm9
7703	vpaddd	0+192(%rbp),%ymm13,%ymm13
7704	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
7705	vpaddd	0+64(%rbp),%ymm4,%ymm4
7706	vpaddd	0+96(%rbp),%ymm8,%ymm8
7707	vpaddd	0+160(%rbp),%ymm12,%ymm12
7708	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7709	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7710	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7711	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7712	vpxor	0+0(%rsi),%ymm3,%ymm3
7713	vpxor	32+0(%rsi),%ymm1,%ymm1
7714	vpxor	64+0(%rsi),%ymm5,%ymm5
7715	vpxor	96+0(%rsi),%ymm9,%ymm9
7716	vmovdqu	%ymm3,0+0(%rdi)
7717	vmovdqu	%ymm1,32+0(%rdi)
7718	vmovdqu	%ymm5,64+0(%rdi)
7719	vmovdqu	%ymm9,96+0(%rdi)
7720	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7721	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7722	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7723	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7724	vmovdqa	%ymm3,%ymm8
7725
7726	movq	$128,%rcx
7727	leaq	128(%rsi),%rsi
7728	subq	$128,%rbx
7729	jmp	.Lseal_avx2_short_hash_remainder
7730
7731.Lseal_avx2_tail_384:
7732	vmovdqa	.Lchacha20_consts(%rip),%ymm0
7733	vmovdqa	0+64(%rbp),%ymm4
7734	vmovdqa	0+96(%rbp),%ymm8
7735	vmovdqa	%ymm0,%ymm1
7736	vmovdqa	%ymm4,%ymm5
7737	vmovdqa	%ymm8,%ymm9
7738	vmovdqa	%ymm0,%ymm2
7739	vmovdqa	%ymm4,%ymm6
7740	vmovdqa	%ymm8,%ymm10
7741	vmovdqa	.Lavx2_inc(%rip),%ymm12
7742	vpaddd	0+160(%rbp),%ymm12,%ymm14
7743	vpaddd	%ymm14,%ymm12,%ymm13
7744	vpaddd	%ymm13,%ymm12,%ymm12
7745	vmovdqa	%ymm12,0+160(%rbp)
7746	vmovdqa	%ymm13,0+192(%rbp)
7747	vmovdqa	%ymm14,0+224(%rbp)
7748
7749.Lseal_avx2_tail_384_rounds_and_3xhash:
7750	addq	0+0(%rdi),%r10
7751	adcq	8+0(%rdi),%r11
7752	adcq	$1,%r12
7753	movq	0+0+0(%rbp),%rax
7754	movq	%rax,%r15
7755	mulq	%r10
7756	movq	%rax,%r13
7757	movq	%rdx,%r14
7758	movq	0+0+0(%rbp),%rax
7759	mulq	%r11
7760	imulq	%r12,%r15
7761	addq	%rax,%r14
7762	adcq	%rdx,%r15
7763	movq	8+0+0(%rbp),%rax
7764	movq	%rax,%r9
7765	mulq	%r10
7766	addq	%rax,%r14
7767	adcq	$0,%rdx
7768	movq	%rdx,%r10
7769	movq	8+0+0(%rbp),%rax
7770	mulq	%r11
7771	addq	%rax,%r15
7772	adcq	$0,%rdx
7773	imulq	%r12,%r9
7774	addq	%r10,%r15
7775	adcq	%rdx,%r9
7776	movq	%r13,%r10
7777	movq	%r14,%r11
7778	movq	%r15,%r12
7779	andq	$3,%r12
7780	movq	%r15,%r13
7781	andq	$-4,%r13
7782	movq	%r9,%r14
7783	shrdq	$2,%r9,%r15
7784	shrq	$2,%r9
7785	addq	%r13,%r15
7786	adcq	%r14,%r9
7787	addq	%r15,%r10
7788	adcq	%r9,%r11
7789	adcq	$0,%r12
7790
7791	leaq	16(%rdi),%rdi
7792.Lseal_avx2_tail_384_rounds_and_2xhash:
7793	vpaddd	%ymm4,%ymm0,%ymm0
7794	vpxor	%ymm0,%ymm12,%ymm12
7795	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7796	vpaddd	%ymm12,%ymm8,%ymm8
7797	vpxor	%ymm8,%ymm4,%ymm4
7798	vpsrld	$20,%ymm4,%ymm3
7799	vpslld	$12,%ymm4,%ymm4
7800	vpxor	%ymm3,%ymm4,%ymm4
7801	vpaddd	%ymm4,%ymm0,%ymm0
7802	vpxor	%ymm0,%ymm12,%ymm12
7803	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7804	vpaddd	%ymm12,%ymm8,%ymm8
7805	vpxor	%ymm8,%ymm4,%ymm4
7806	vpslld	$7,%ymm4,%ymm3
7807	vpsrld	$25,%ymm4,%ymm4
7808	vpxor	%ymm3,%ymm4,%ymm4
7809	vpalignr	$12,%ymm12,%ymm12,%ymm12
7810	vpalignr	$8,%ymm8,%ymm8,%ymm8
7811	vpalignr	$4,%ymm4,%ymm4,%ymm4
7812	vpaddd	%ymm5,%ymm1,%ymm1
7813	vpxor	%ymm1,%ymm13,%ymm13
7814	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
7815	vpaddd	%ymm13,%ymm9,%ymm9
7816	vpxor	%ymm9,%ymm5,%ymm5
7817	vpsrld	$20,%ymm5,%ymm3
7818	vpslld	$12,%ymm5,%ymm5
7819	vpxor	%ymm3,%ymm5,%ymm5
7820	vpaddd	%ymm5,%ymm1,%ymm1
7821	vpxor	%ymm1,%ymm13,%ymm13
7822	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
7823	vpaddd	%ymm13,%ymm9,%ymm9
7824	vpxor	%ymm9,%ymm5,%ymm5
7825	vpslld	$7,%ymm5,%ymm3
7826	vpsrld	$25,%ymm5,%ymm5
7827	vpxor	%ymm3,%ymm5,%ymm5
7828	vpalignr	$12,%ymm13,%ymm13,%ymm13
7829	vpalignr	$8,%ymm9,%ymm9,%ymm9
7830	vpalignr	$4,%ymm5,%ymm5,%ymm5
7831	addq	0+0(%rdi),%r10
7832	adcq	8+0(%rdi),%r11
7833	adcq	$1,%r12
7834	movq	0+0+0(%rbp),%rax
7835	movq	%rax,%r15
7836	mulq	%r10
7837	movq	%rax,%r13
7838	movq	%rdx,%r14
7839	movq	0+0+0(%rbp),%rax
7840	mulq	%r11
7841	imulq	%r12,%r15
7842	addq	%rax,%r14
7843	adcq	%rdx,%r15
7844	movq	8+0+0(%rbp),%rax
7845	movq	%rax,%r9
7846	mulq	%r10
7847	addq	%rax,%r14
7848	adcq	$0,%rdx
7849	movq	%rdx,%r10
7850	movq	8+0+0(%rbp),%rax
7851	mulq	%r11
7852	addq	%rax,%r15
7853	adcq	$0,%rdx
7854	imulq	%r12,%r9
7855	addq	%r10,%r15
7856	adcq	%rdx,%r9
7857	movq	%r13,%r10
7858	movq	%r14,%r11
7859	movq	%r15,%r12
7860	andq	$3,%r12
7861	movq	%r15,%r13
7862	andq	$-4,%r13
7863	movq	%r9,%r14
7864	shrdq	$2,%r9,%r15
7865	shrq	$2,%r9
7866	addq	%r13,%r15
7867	adcq	%r14,%r9
7868	addq	%r15,%r10
7869	adcq	%r9,%r11
7870	adcq	$0,%r12
7871	vpaddd	%ymm6,%ymm2,%ymm2
7872	vpxor	%ymm2,%ymm14,%ymm14
7873	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
7874	vpaddd	%ymm14,%ymm10,%ymm10
7875	vpxor	%ymm10,%ymm6,%ymm6
7876	vpsrld	$20,%ymm6,%ymm3
7877	vpslld	$12,%ymm6,%ymm6
7878	vpxor	%ymm3,%ymm6,%ymm6
7879	vpaddd	%ymm6,%ymm2,%ymm2
7880	vpxor	%ymm2,%ymm14,%ymm14
7881	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
7882	vpaddd	%ymm14,%ymm10,%ymm10
7883	vpxor	%ymm10,%ymm6,%ymm6
7884	vpslld	$7,%ymm6,%ymm3
7885	vpsrld	$25,%ymm6,%ymm6
7886	vpxor	%ymm3,%ymm6,%ymm6
7887	vpalignr	$12,%ymm14,%ymm14,%ymm14
7888	vpalignr	$8,%ymm10,%ymm10,%ymm10
7889	vpalignr	$4,%ymm6,%ymm6,%ymm6
7890	vpaddd	%ymm4,%ymm0,%ymm0
7891	vpxor	%ymm0,%ymm12,%ymm12
7892	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7893	vpaddd	%ymm12,%ymm8,%ymm8
7894	vpxor	%ymm8,%ymm4,%ymm4
7895	vpsrld	$20,%ymm4,%ymm3
7896	vpslld	$12,%ymm4,%ymm4
7897	vpxor	%ymm3,%ymm4,%ymm4
7898	vpaddd	%ymm4,%ymm0,%ymm0
7899	vpxor	%ymm0,%ymm12,%ymm12
7900	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7901	vpaddd	%ymm12,%ymm8,%ymm8
7902	vpxor	%ymm8,%ymm4,%ymm4
7903	vpslld	$7,%ymm4,%ymm3
7904	vpsrld	$25,%ymm4,%ymm4
7905	vpxor	%ymm3,%ymm4,%ymm4
7906	vpalignr	$4,%ymm12,%ymm12,%ymm12
7907	vpalignr	$8,%ymm8,%ymm8,%ymm8
7908	vpalignr	$12,%ymm4,%ymm4,%ymm4
7909	addq	0+16(%rdi),%r10
7910	adcq	8+16(%rdi),%r11
7911	adcq	$1,%r12
7912	movq	0+0+0(%rbp),%rax
7913	movq	%rax,%r15
7914	mulq	%r10
7915	movq	%rax,%r13
7916	movq	%rdx,%r14
7917	movq	0+0+0(%rbp),%rax
7918	mulq	%r11
7919	imulq	%r12,%r15
7920	addq	%rax,%r14
7921	adcq	%rdx,%r15
7922	movq	8+0+0(%rbp),%rax
7923	movq	%rax,%r9
7924	mulq	%r10
7925	addq	%rax,%r14
7926	adcq	$0,%rdx
7927	movq	%rdx,%r10
7928	movq	8+0+0(%rbp),%rax
7929	mulq	%r11
7930	addq	%rax,%r15
7931	adcq	$0,%rdx
7932	imulq	%r12,%r9
7933	addq	%r10,%r15
7934	adcq	%rdx,%r9
7935	movq	%r13,%r10
7936	movq	%r14,%r11
7937	movq	%r15,%r12
7938	andq	$3,%r12
7939	movq	%r15,%r13
7940	andq	$-4,%r13
7941	movq	%r9,%r14
7942	shrdq	$2,%r9,%r15
7943	shrq	$2,%r9
7944	addq	%r13,%r15
7945	adcq	%r14,%r9
7946	addq	%r15,%r10
7947	adcq	%r9,%r11
7948	adcq	$0,%r12
7949	vpaddd	%ymm5,%ymm1,%ymm1
7950	vpxor	%ymm1,%ymm13,%ymm13
7951	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
7952	vpaddd	%ymm13,%ymm9,%ymm9
7953	vpxor	%ymm9,%ymm5,%ymm5
7954	vpsrld	$20,%ymm5,%ymm3
7955	vpslld	$12,%ymm5,%ymm5
7956	vpxor	%ymm3,%ymm5,%ymm5
7957	vpaddd	%ymm5,%ymm1,%ymm1
7958	vpxor	%ymm1,%ymm13,%ymm13
7959	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
7960	vpaddd	%ymm13,%ymm9,%ymm9
7961	vpxor	%ymm9,%ymm5,%ymm5
7962	vpslld	$7,%ymm5,%ymm3
7963	vpsrld	$25,%ymm5,%ymm5
7964	vpxor	%ymm3,%ymm5,%ymm5
7965	vpalignr	$4,%ymm13,%ymm13,%ymm13
7966	vpalignr	$8,%ymm9,%ymm9,%ymm9
7967	vpalignr	$12,%ymm5,%ymm5,%ymm5
7968	vpaddd	%ymm6,%ymm2,%ymm2
7969	vpxor	%ymm2,%ymm14,%ymm14
7970	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
7971	vpaddd	%ymm14,%ymm10,%ymm10
7972	vpxor	%ymm10,%ymm6,%ymm6
7973	vpsrld	$20,%ymm6,%ymm3
7974	vpslld	$12,%ymm6,%ymm6
7975	vpxor	%ymm3,%ymm6,%ymm6
7976	vpaddd	%ymm6,%ymm2,%ymm2
7977	vpxor	%ymm2,%ymm14,%ymm14
7978	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
7979	vpaddd	%ymm14,%ymm10,%ymm10
7980	vpxor	%ymm10,%ymm6,%ymm6
7981	vpslld	$7,%ymm6,%ymm3
7982	vpsrld	$25,%ymm6,%ymm6
7983	vpxor	%ymm3,%ymm6,%ymm6
7984	vpalignr	$4,%ymm14,%ymm14,%ymm14
7985	vpalignr	$8,%ymm10,%ymm10,%ymm10
7986	vpalignr	$12,%ymm6,%ymm6,%ymm6
7987
7988	leaq	32(%rdi),%rdi
7989	decq	%rcx
7990	jg	.Lseal_avx2_tail_384_rounds_and_3xhash
7991	decq	%r8
7992	jge	.Lseal_avx2_tail_384_rounds_and_2xhash
7993	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
7994	vpaddd	0+64(%rbp),%ymm6,%ymm6
7995	vpaddd	0+96(%rbp),%ymm10,%ymm10
7996	vpaddd	0+224(%rbp),%ymm14,%ymm14
7997	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
7998	vpaddd	0+64(%rbp),%ymm5,%ymm5
7999	vpaddd	0+96(%rbp),%ymm9,%ymm9
8000	vpaddd	0+192(%rbp),%ymm13,%ymm13
8001	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
8002	vpaddd	0+64(%rbp),%ymm4,%ymm4
8003	vpaddd	0+96(%rbp),%ymm8,%ymm8
8004	vpaddd	0+160(%rbp),%ymm12,%ymm12
8005	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
8006	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
8007	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
8008	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
8009	vpxor	0+0(%rsi),%ymm3,%ymm3
8010	vpxor	32+0(%rsi),%ymm2,%ymm2
8011	vpxor	64+0(%rsi),%ymm6,%ymm6
8012	vpxor	96+0(%rsi),%ymm10,%ymm10
8013	vmovdqu	%ymm3,0+0(%rdi)
8014	vmovdqu	%ymm2,32+0(%rdi)
8015	vmovdqu	%ymm6,64+0(%rdi)
8016	vmovdqu	%ymm10,96+0(%rdi)
8017	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
8018	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
8019	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
8020	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
8021	vpxor	0+128(%rsi),%ymm3,%ymm3
8022	vpxor	32+128(%rsi),%ymm1,%ymm1
8023	vpxor	64+128(%rsi),%ymm5,%ymm5
8024	vpxor	96+128(%rsi),%ymm9,%ymm9
8025	vmovdqu	%ymm3,0+128(%rdi)
8026	vmovdqu	%ymm1,32+128(%rdi)
8027	vmovdqu	%ymm5,64+128(%rdi)
8028	vmovdqu	%ymm9,96+128(%rdi)
8029	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
8030	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
8031	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
8032	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
8033	vmovdqa	%ymm3,%ymm8
8034
8035	movq	$256,%rcx
8036	leaq	256(%rsi),%rsi
8037	subq	$256,%rbx
8038	jmp	.Lseal_avx2_short_hash_remainder
8039
8040.Lseal_avx2_tail_512:
8041	vmovdqa	.Lchacha20_consts(%rip),%ymm0
8042	vmovdqa	0+64(%rbp),%ymm4
8043	vmovdqa	0+96(%rbp),%ymm8
8044	vmovdqa	%ymm0,%ymm1
8045	vmovdqa	%ymm4,%ymm5
8046	vmovdqa	%ymm8,%ymm9
8047	vmovdqa	%ymm0,%ymm2
8048	vmovdqa	%ymm4,%ymm6
8049	vmovdqa	%ymm8,%ymm10
8050	vmovdqa	%ymm0,%ymm3
8051	vmovdqa	%ymm4,%ymm7
8052	vmovdqa	%ymm8,%ymm11
8053	vmovdqa	.Lavx2_inc(%rip),%ymm12
8054	vpaddd	0+160(%rbp),%ymm12,%ymm15
8055	vpaddd	%ymm15,%ymm12,%ymm14
8056	vpaddd	%ymm14,%ymm12,%ymm13
8057	vpaddd	%ymm13,%ymm12,%ymm12
8058	vmovdqa	%ymm15,0+256(%rbp)
8059	vmovdqa	%ymm14,0+224(%rbp)
8060	vmovdqa	%ymm13,0+192(%rbp)
8061	vmovdqa	%ymm12,0+160(%rbp)
8062
8063.Lseal_avx2_tail_512_rounds_and_3xhash:
8064	addq	0+0(%rdi),%r10
8065	adcq	8+0(%rdi),%r11
8066	adcq	$1,%r12
8067	movq	0+0+0(%rbp),%rdx
8068	movq	%rdx,%r15
8069	mulxq	%r10,%r13,%r14
8070	mulxq	%r11,%rax,%rdx
8071	imulq	%r12,%r15
8072	addq	%rax,%r14
8073	adcq	%rdx,%r15
8074	movq	8+0+0(%rbp),%rdx
8075	mulxq	%r10,%r10,%rax
8076	addq	%r10,%r14
8077	mulxq	%r11,%r11,%r9
8078	adcq	%r11,%r15
8079	adcq	$0,%r9
8080	imulq	%r12,%rdx
8081	addq	%rax,%r15
8082	adcq	%rdx,%r9
8083	movq	%r13,%r10
8084	movq	%r14,%r11
8085	movq	%r15,%r12
8086	andq	$3,%r12
8087	movq	%r15,%r13
8088	andq	$-4,%r13
8089	movq	%r9,%r14
8090	shrdq	$2,%r9,%r15
8091	shrq	$2,%r9
8092	addq	%r13,%r15
8093	adcq	%r14,%r9
8094	addq	%r15,%r10
8095	adcq	%r9,%r11
8096	adcq	$0,%r12
8097
8098	leaq	16(%rdi),%rdi
8099.Lseal_avx2_tail_512_rounds_and_2xhash:
8100	vmovdqa	%ymm8,0+128(%rbp)
8101	vmovdqa	.Lrol16(%rip),%ymm8
8102	vpaddd	%ymm7,%ymm3,%ymm3
8103	vpaddd	%ymm6,%ymm2,%ymm2
8104	vpaddd	%ymm5,%ymm1,%ymm1
8105	vpaddd	%ymm4,%ymm0,%ymm0
8106	vpxor	%ymm3,%ymm15,%ymm15
8107	vpxor	%ymm2,%ymm14,%ymm14
8108	vpxor	%ymm1,%ymm13,%ymm13
8109	vpxor	%ymm0,%ymm12,%ymm12
8110	vpshufb	%ymm8,%ymm15,%ymm15
8111	vpshufb	%ymm8,%ymm14,%ymm14
8112	vpshufb	%ymm8,%ymm13,%ymm13
8113	vpshufb	%ymm8,%ymm12,%ymm12
8114	vpaddd	%ymm15,%ymm11,%ymm11
8115	vpaddd	%ymm14,%ymm10,%ymm10
8116	vpaddd	%ymm13,%ymm9,%ymm9
8117	vpaddd	0+128(%rbp),%ymm12,%ymm8
8118	vpxor	%ymm11,%ymm7,%ymm7
8119	vpxor	%ymm10,%ymm6,%ymm6
8120	addq	0+0(%rdi),%r10
8121	adcq	8+0(%rdi),%r11
8122	adcq	$1,%r12
8123	vpxor	%ymm9,%ymm5,%ymm5
8124	vpxor	%ymm8,%ymm4,%ymm4
8125	vmovdqa	%ymm8,0+128(%rbp)
8126	vpsrld	$20,%ymm7,%ymm8
8127	vpslld	$32-20,%ymm7,%ymm7
8128	vpxor	%ymm8,%ymm7,%ymm7
8129	vpsrld	$20,%ymm6,%ymm8
8130	vpslld	$32-20,%ymm6,%ymm6
8131	vpxor	%ymm8,%ymm6,%ymm6
8132	vpsrld	$20,%ymm5,%ymm8
8133	vpslld	$32-20,%ymm5,%ymm5
8134	vpxor	%ymm8,%ymm5,%ymm5
8135	vpsrld	$20,%ymm4,%ymm8
8136	vpslld	$32-20,%ymm4,%ymm4
8137	vpxor	%ymm8,%ymm4,%ymm4
8138	vmovdqa	.Lrol8(%rip),%ymm8
8139	vpaddd	%ymm7,%ymm3,%ymm3
8140	vpaddd	%ymm6,%ymm2,%ymm2
8141	vpaddd	%ymm5,%ymm1,%ymm1
8142	vpaddd	%ymm4,%ymm0,%ymm0
8143	movq	0+0+0(%rbp),%rdx
8144	movq	%rdx,%r15
8145	mulxq	%r10,%r13,%r14
8146	mulxq	%r11,%rax,%rdx
8147	imulq	%r12,%r15
8148	addq	%rax,%r14
8149	adcq	%rdx,%r15
8150	vpxor	%ymm3,%ymm15,%ymm15
8151	vpxor	%ymm2,%ymm14,%ymm14
8152	vpxor	%ymm1,%ymm13,%ymm13
8153	vpxor	%ymm0,%ymm12,%ymm12
8154	vpshufb	%ymm8,%ymm15,%ymm15
8155	vpshufb	%ymm8,%ymm14,%ymm14
8156	vpshufb	%ymm8,%ymm13,%ymm13
8157	vpshufb	%ymm8,%ymm12,%ymm12
8158	vpaddd	%ymm15,%ymm11,%ymm11
8159	vpaddd	%ymm14,%ymm10,%ymm10
8160	vpaddd	%ymm13,%ymm9,%ymm9
8161	vpaddd	0+128(%rbp),%ymm12,%ymm8
8162	vpxor	%ymm11,%ymm7,%ymm7
8163	vpxor	%ymm10,%ymm6,%ymm6
8164	vpxor	%ymm9,%ymm5,%ymm5
8165	vpxor	%ymm8,%ymm4,%ymm4
8166	vmovdqa	%ymm8,0+128(%rbp)
8167	vpsrld	$25,%ymm7,%ymm8
8168	vpslld	$32-25,%ymm7,%ymm7
8169	vpxor	%ymm8,%ymm7,%ymm7
8170	movq	8+0+0(%rbp),%rdx
8171	mulxq	%r10,%r10,%rax
8172	addq	%r10,%r14
8173	mulxq	%r11,%r11,%r9
8174	adcq	%r11,%r15
8175	adcq	$0,%r9
8176	imulq	%r12,%rdx
8177	vpsrld	$25,%ymm6,%ymm8
8178	vpslld	$32-25,%ymm6,%ymm6
8179	vpxor	%ymm8,%ymm6,%ymm6
8180	vpsrld	$25,%ymm5,%ymm8
8181	vpslld	$32-25,%ymm5,%ymm5
8182	vpxor	%ymm8,%ymm5,%ymm5
8183	vpsrld	$25,%ymm4,%ymm8
8184	vpslld	$32-25,%ymm4,%ymm4
8185	vpxor	%ymm8,%ymm4,%ymm4
8186	vmovdqa	0+128(%rbp),%ymm8
8187	vpalignr	$4,%ymm7,%ymm7,%ymm7
8188	vpalignr	$8,%ymm11,%ymm11,%ymm11
8189	vpalignr	$12,%ymm15,%ymm15,%ymm15
8190	vpalignr	$4,%ymm6,%ymm6,%ymm6
8191	vpalignr	$8,%ymm10,%ymm10,%ymm10
8192	vpalignr	$12,%ymm14,%ymm14,%ymm14
8193	vpalignr	$4,%ymm5,%ymm5,%ymm5
8194	vpalignr	$8,%ymm9,%ymm9,%ymm9
8195	vpalignr	$12,%ymm13,%ymm13,%ymm13
8196	vpalignr	$4,%ymm4,%ymm4,%ymm4
8197	addq	%rax,%r15
8198	adcq	%rdx,%r9
8199	vpalignr	$8,%ymm8,%ymm8,%ymm8
8200	vpalignr	$12,%ymm12,%ymm12,%ymm12
8201	vmovdqa	%ymm8,0+128(%rbp)
8202	vmovdqa	.Lrol16(%rip),%ymm8
8203	vpaddd	%ymm7,%ymm3,%ymm3
8204	vpaddd	%ymm6,%ymm2,%ymm2
8205	vpaddd	%ymm5,%ymm1,%ymm1
8206	vpaddd	%ymm4,%ymm0,%ymm0
8207	vpxor	%ymm3,%ymm15,%ymm15
8208	vpxor	%ymm2,%ymm14,%ymm14
8209	vpxor	%ymm1,%ymm13,%ymm13
8210	vpxor	%ymm0,%ymm12,%ymm12
8211	vpshufb	%ymm8,%ymm15,%ymm15
8212	vpshufb	%ymm8,%ymm14,%ymm14
8213	vpshufb	%ymm8,%ymm13,%ymm13
8214	vpshufb	%ymm8,%ymm12,%ymm12
8215	vpaddd	%ymm15,%ymm11,%ymm11
8216	vpaddd	%ymm14,%ymm10,%ymm10
8217	vpaddd	%ymm13,%ymm9,%ymm9
8218	vpaddd	0+128(%rbp),%ymm12,%ymm8
8219	movq	%r13,%r10
8220	movq	%r14,%r11
8221	movq	%r15,%r12
8222	andq	$3,%r12
8223	movq	%r15,%r13
8224	andq	$-4,%r13
8225	movq	%r9,%r14
8226	shrdq	$2,%r9,%r15
8227	shrq	$2,%r9
8228	addq	%r13,%r15
8229	adcq	%r14,%r9
8230	addq	%r15,%r10
8231	adcq	%r9,%r11
8232	adcq	$0,%r12
8233	vpxor	%ymm11,%ymm7,%ymm7
8234	vpxor	%ymm10,%ymm6,%ymm6
8235	vpxor	%ymm9,%ymm5,%ymm5
8236	vpxor	%ymm8,%ymm4,%ymm4
8237	vmovdqa	%ymm8,0+128(%rbp)
8238	vpsrld	$20,%ymm7,%ymm8
8239	vpslld	$32-20,%ymm7,%ymm7
8240	vpxor	%ymm8,%ymm7,%ymm7
8241	vpsrld	$20,%ymm6,%ymm8
8242	vpslld	$32-20,%ymm6,%ymm6
8243	vpxor	%ymm8,%ymm6,%ymm6
8244	vpsrld	$20,%ymm5,%ymm8
8245	vpslld	$32-20,%ymm5,%ymm5
8246	vpxor	%ymm8,%ymm5,%ymm5
8247	vpsrld	$20,%ymm4,%ymm8
8248	vpslld	$32-20,%ymm4,%ymm4
8249	vpxor	%ymm8,%ymm4,%ymm4
8250	vmovdqa	.Lrol8(%rip),%ymm8
8251	vpaddd	%ymm7,%ymm3,%ymm3
8252	vpaddd	%ymm6,%ymm2,%ymm2
8253	addq	0+16(%rdi),%r10
8254	adcq	8+16(%rdi),%r11
8255	adcq	$1,%r12
8256	vpaddd	%ymm5,%ymm1,%ymm1
8257	vpaddd	%ymm4,%ymm0,%ymm0
8258	vpxor	%ymm3,%ymm15,%ymm15
8259	vpxor	%ymm2,%ymm14,%ymm14
8260	vpxor	%ymm1,%ymm13,%ymm13
8261	vpxor	%ymm0,%ymm12,%ymm12
8262	vpshufb	%ymm8,%ymm15,%ymm15
8263	vpshufb	%ymm8,%ymm14,%ymm14
8264	vpshufb	%ymm8,%ymm13,%ymm13
8265	vpshufb	%ymm8,%ymm12,%ymm12
8266	vpaddd	%ymm15,%ymm11,%ymm11
8267	vpaddd	%ymm14,%ymm10,%ymm10
8268	vpaddd	%ymm13,%ymm9,%ymm9
8269	vpaddd	0+128(%rbp),%ymm12,%ymm8
8270	vpxor	%ymm11,%ymm7,%ymm7
8271	vpxor	%ymm10,%ymm6,%ymm6
8272	vpxor	%ymm9,%ymm5,%ymm5
8273	vpxor	%ymm8,%ymm4,%ymm4
8274	vmovdqa	%ymm8,0+128(%rbp)
8275	vpsrld	$25,%ymm7,%ymm8
8276	movq	0+0+0(%rbp),%rdx
8277	movq	%rdx,%r15
8278	mulxq	%r10,%r13,%r14
8279	mulxq	%r11,%rax,%rdx
8280	imulq	%r12,%r15
8281	addq	%rax,%r14
8282	adcq	%rdx,%r15
8283	vpslld	$32-25,%ymm7,%ymm7
8284	vpxor	%ymm8,%ymm7,%ymm7
8285	vpsrld	$25,%ymm6,%ymm8
8286	vpslld	$32-25,%ymm6,%ymm6
8287	vpxor	%ymm8,%ymm6,%ymm6
8288	vpsrld	$25,%ymm5,%ymm8
8289	vpslld	$32-25,%ymm5,%ymm5
8290	vpxor	%ymm8,%ymm5,%ymm5
8291	vpsrld	$25,%ymm4,%ymm8
8292	vpslld	$32-25,%ymm4,%ymm4
8293	vpxor	%ymm8,%ymm4,%ymm4
8294	vmovdqa	0+128(%rbp),%ymm8
8295	vpalignr	$12,%ymm7,%ymm7,%ymm7
8296	vpalignr	$8,%ymm11,%ymm11,%ymm11
8297	vpalignr	$4,%ymm15,%ymm15,%ymm15
8298	vpalignr	$12,%ymm6,%ymm6,%ymm6
8299	vpalignr	$8,%ymm10,%ymm10,%ymm10
8300	vpalignr	$4,%ymm14,%ymm14,%ymm14
8301	vpalignr	$12,%ymm5,%ymm5,%ymm5
8302	vpalignr	$8,%ymm9,%ymm9,%ymm9
8303	movq	8+0+0(%rbp),%rdx
8304	mulxq	%r10,%r10,%rax
8305	addq	%r10,%r14
8306	mulxq	%r11,%r11,%r9
8307	adcq	%r11,%r15
8308	adcq	$0,%r9
8309	imulq	%r12,%rdx
8310	vpalignr	$4,%ymm13,%ymm13,%ymm13
8311	vpalignr	$12,%ymm4,%ymm4,%ymm4
8312	vpalignr	$8,%ymm8,%ymm8,%ymm8
8313	vpalignr	$4,%ymm12,%ymm12,%ymm12
8314
8315
8316
8317
8318
8319
8320
8321
8322
8323
8324
8325
8326
8327
8328
8329
8330	addq	%rax,%r15
8331	adcq	%rdx,%r9
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352	movq	%r13,%r10
8353	movq	%r14,%r11
8354	movq	%r15,%r12
8355	andq	$3,%r12
8356	movq	%r15,%r13
8357	andq	$-4,%r13
8358	movq	%r9,%r14
8359	shrdq	$2,%r9,%r15
8360	shrq	$2,%r9
8361	addq	%r13,%r15
8362	adcq	%r14,%r9
8363	addq	%r15,%r10
8364	adcq	%r9,%r11
8365	adcq	$0,%r12
8366
8367	leaq	32(%rdi),%rdi
8368	decq	%rcx
8369	jg	.Lseal_avx2_tail_512_rounds_and_3xhash
8370	decq	%r8
8371	jge	.Lseal_avx2_tail_512_rounds_and_2xhash
8372	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
8373	vpaddd	0+64(%rbp),%ymm7,%ymm7
8374	vpaddd	0+96(%rbp),%ymm11,%ymm11
8375	vpaddd	0+256(%rbp),%ymm15,%ymm15
8376	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
8377	vpaddd	0+64(%rbp),%ymm6,%ymm6
8378	vpaddd	0+96(%rbp),%ymm10,%ymm10
8379	vpaddd	0+224(%rbp),%ymm14,%ymm14
8380	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
8381	vpaddd	0+64(%rbp),%ymm5,%ymm5
8382	vpaddd	0+96(%rbp),%ymm9,%ymm9
8383	vpaddd	0+192(%rbp),%ymm13,%ymm13
8384	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
8385	vpaddd	0+64(%rbp),%ymm4,%ymm4
8386	vpaddd	0+96(%rbp),%ymm8,%ymm8
8387	vpaddd	0+160(%rbp),%ymm12,%ymm12
8388
8389	vmovdqa	%ymm0,0+128(%rbp)
8390	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
8391	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
8392	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
8393	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
8394	vpxor	0+0(%rsi),%ymm0,%ymm0
8395	vpxor	32+0(%rsi),%ymm3,%ymm3
8396	vpxor	64+0(%rsi),%ymm7,%ymm7
8397	vpxor	96+0(%rsi),%ymm11,%ymm11
8398	vmovdqu	%ymm0,0+0(%rdi)
8399	vmovdqu	%ymm3,32+0(%rdi)
8400	vmovdqu	%ymm7,64+0(%rdi)
8401	vmovdqu	%ymm11,96+0(%rdi)
8402
8403	vmovdqa	0+128(%rbp),%ymm0
8404	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
8405	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
8406	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
8407	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
8408	vpxor	0+128(%rsi),%ymm3,%ymm3
8409	vpxor	32+128(%rsi),%ymm2,%ymm2
8410	vpxor	64+128(%rsi),%ymm6,%ymm6
8411	vpxor	96+128(%rsi),%ymm10,%ymm10
8412	vmovdqu	%ymm3,0+128(%rdi)
8413	vmovdqu	%ymm2,32+128(%rdi)
8414	vmovdqu	%ymm6,64+128(%rdi)
8415	vmovdqu	%ymm10,96+128(%rdi)
8416	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
8417	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
8418	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
8419	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
8420	vpxor	0+256(%rsi),%ymm3,%ymm3
8421	vpxor	32+256(%rsi),%ymm1,%ymm1
8422	vpxor	64+256(%rsi),%ymm5,%ymm5
8423	vpxor	96+256(%rsi),%ymm9,%ymm9
8424	vmovdqu	%ymm3,0+256(%rdi)
8425	vmovdqu	%ymm1,32+256(%rdi)
8426	vmovdqu	%ymm5,64+256(%rdi)
8427	vmovdqu	%ymm9,96+256(%rdi)
8428	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
8429	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
8430	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
8431	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
8432	vmovdqa	%ymm3,%ymm8
8433
8434	movq	$384,%rcx
8435	leaq	384(%rsi),%rsi
8436	subq	$384,%rbx
8437	jmp	.Lseal_avx2_short_hash_remainder
8438
8439.Lseal_avx2_320:
8440	vmovdqa	%ymm0,%ymm1
8441	vmovdqa	%ymm0,%ymm2
8442	vmovdqa	%ymm4,%ymm5
8443	vmovdqa	%ymm4,%ymm6
8444	vmovdqa	%ymm8,%ymm9
8445	vmovdqa	%ymm8,%ymm10
8446	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
8447	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm14
8448	vmovdqa	%ymm4,%ymm7
8449	vmovdqa	%ymm8,%ymm11
8450	vmovdqa	%ymm12,0+160(%rbp)
8451	vmovdqa	%ymm13,0+192(%rbp)
8452	vmovdqa	%ymm14,0+224(%rbp)
8453	movq	$10,%r10
8454.Lseal_avx2_320_rounds:
8455	vpaddd	%ymm4,%ymm0,%ymm0
8456	vpxor	%ymm0,%ymm12,%ymm12
8457	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
8458	vpaddd	%ymm12,%ymm8,%ymm8
8459	vpxor	%ymm8,%ymm4,%ymm4
8460	vpsrld	$20,%ymm4,%ymm3
8461	vpslld	$12,%ymm4,%ymm4
8462	vpxor	%ymm3,%ymm4,%ymm4
8463	vpaddd	%ymm4,%ymm0,%ymm0
8464	vpxor	%ymm0,%ymm12,%ymm12
8465	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
8466	vpaddd	%ymm12,%ymm8,%ymm8
8467	vpxor	%ymm8,%ymm4,%ymm4
8468	vpslld	$7,%ymm4,%ymm3
8469	vpsrld	$25,%ymm4,%ymm4
8470	vpxor	%ymm3,%ymm4,%ymm4
8471	vpalignr	$12,%ymm12,%ymm12,%ymm12
8472	vpalignr	$8,%ymm8,%ymm8,%ymm8
8473	vpalignr	$4,%ymm4,%ymm4,%ymm4
8474	vpaddd	%ymm5,%ymm1,%ymm1
8475	vpxor	%ymm1,%ymm13,%ymm13
8476	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
8477	vpaddd	%ymm13,%ymm9,%ymm9
8478	vpxor	%ymm9,%ymm5,%ymm5
8479	vpsrld	$20,%ymm5,%ymm3
8480	vpslld	$12,%ymm5,%ymm5
8481	vpxor	%ymm3,%ymm5,%ymm5
8482	vpaddd	%ymm5,%ymm1,%ymm1
8483	vpxor	%ymm1,%ymm13,%ymm13
8484	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
8485	vpaddd	%ymm13,%ymm9,%ymm9
8486	vpxor	%ymm9,%ymm5,%ymm5
8487	vpslld	$7,%ymm5,%ymm3
8488	vpsrld	$25,%ymm5,%ymm5
8489	vpxor	%ymm3,%ymm5,%ymm5
8490	vpalignr	$12,%ymm13,%ymm13,%ymm13
8491	vpalignr	$8,%ymm9,%ymm9,%ymm9
8492	vpalignr	$4,%ymm5,%ymm5,%ymm5
8493	vpaddd	%ymm6,%ymm2,%ymm2
8494	vpxor	%ymm2,%ymm14,%ymm14
8495	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
8496	vpaddd	%ymm14,%ymm10,%ymm10
8497	vpxor	%ymm10,%ymm6,%ymm6
8498	vpsrld	$20,%ymm6,%ymm3
8499	vpslld	$12,%ymm6,%ymm6
8500	vpxor	%ymm3,%ymm6,%ymm6
8501	vpaddd	%ymm6,%ymm2,%ymm2
8502	vpxor	%ymm2,%ymm14,%ymm14
8503	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
8504	vpaddd	%ymm14,%ymm10,%ymm10
8505	vpxor	%ymm10,%ymm6,%ymm6
8506	vpslld	$7,%ymm6,%ymm3
8507	vpsrld	$25,%ymm6,%ymm6
8508	vpxor	%ymm3,%ymm6,%ymm6
8509	vpalignr	$12,%ymm14,%ymm14,%ymm14
8510	vpalignr	$8,%ymm10,%ymm10,%ymm10
8511	vpalignr	$4,%ymm6,%ymm6,%ymm6
8512	vpaddd	%ymm4,%ymm0,%ymm0
8513	vpxor	%ymm0,%ymm12,%ymm12
8514	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
8515	vpaddd	%ymm12,%ymm8,%ymm8
8516	vpxor	%ymm8,%ymm4,%ymm4
8517	vpsrld	$20,%ymm4,%ymm3
8518	vpslld	$12,%ymm4,%ymm4
8519	vpxor	%ymm3,%ymm4,%ymm4
8520	vpaddd	%ymm4,%ymm0,%ymm0
8521	vpxor	%ymm0,%ymm12,%ymm12
8522	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
8523	vpaddd	%ymm12,%ymm8,%ymm8
8524	vpxor	%ymm8,%ymm4,%ymm4
8525	vpslld	$7,%ymm4,%ymm3
8526	vpsrld	$25,%ymm4,%ymm4
8527	vpxor	%ymm3,%ymm4,%ymm4
8528	vpalignr	$4,%ymm12,%ymm12,%ymm12
8529	vpalignr	$8,%ymm8,%ymm8,%ymm8
8530	vpalignr	$12,%ymm4,%ymm4,%ymm4
8531	vpaddd	%ymm5,%ymm1,%ymm1
8532	vpxor	%ymm1,%ymm13,%ymm13
8533	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
8534	vpaddd	%ymm13,%ymm9,%ymm9
8535	vpxor	%ymm9,%ymm5,%ymm5
8536	vpsrld	$20,%ymm5,%ymm3
8537	vpslld	$12,%ymm5,%ymm5
8538	vpxor	%ymm3,%ymm5,%ymm5
8539	vpaddd	%ymm5,%ymm1,%ymm1
8540	vpxor	%ymm1,%ymm13,%ymm13
8541	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
8542	vpaddd	%ymm13,%ymm9,%ymm9
8543	vpxor	%ymm9,%ymm5,%ymm5
8544	vpslld	$7,%ymm5,%ymm3
8545	vpsrld	$25,%ymm5,%ymm5
8546	vpxor	%ymm3,%ymm5,%ymm5
8547	vpalignr	$4,%ymm13,%ymm13,%ymm13
8548	vpalignr	$8,%ymm9,%ymm9,%ymm9
8549	vpalignr	$12,%ymm5,%ymm5,%ymm5
8550	vpaddd	%ymm6,%ymm2,%ymm2
8551	vpxor	%ymm2,%ymm14,%ymm14
8552	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
8553	vpaddd	%ymm14,%ymm10,%ymm10
8554	vpxor	%ymm10,%ymm6,%ymm6
8555	vpsrld	$20,%ymm6,%ymm3
8556	vpslld	$12,%ymm6,%ymm6
8557	vpxor	%ymm3,%ymm6,%ymm6
8558	vpaddd	%ymm6,%ymm2,%ymm2
8559	vpxor	%ymm2,%ymm14,%ymm14
8560	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
8561	vpaddd	%ymm14,%ymm10,%ymm10
8562	vpxor	%ymm10,%ymm6,%ymm6
8563	vpslld	$7,%ymm6,%ymm3
8564	vpsrld	$25,%ymm6,%ymm6
8565	vpxor	%ymm3,%ymm6,%ymm6
8566	vpalignr	$4,%ymm14,%ymm14,%ymm14
8567	vpalignr	$8,%ymm10,%ymm10,%ymm10
8568	vpalignr	$12,%ymm6,%ymm6,%ymm6
8569
8570	decq	%r10
8571	jne	.Lseal_avx2_320_rounds
8572	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
8573	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
8574	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
8575	vpaddd	%ymm7,%ymm4,%ymm4
8576	vpaddd	%ymm7,%ymm5,%ymm5
8577	vpaddd	%ymm7,%ymm6,%ymm6
8578	vpaddd	%ymm11,%ymm8,%ymm8
8579	vpaddd	%ymm11,%ymm9,%ymm9
8580	vpaddd	%ymm11,%ymm10,%ymm10
8581	vpaddd	0+160(%rbp),%ymm12,%ymm12
8582	vpaddd	0+192(%rbp),%ymm13,%ymm13
8583	vpaddd	0+224(%rbp),%ymm14,%ymm14
8584	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
8585
8586	vpand	.Lclamp(%rip),%ymm3,%ymm3
8587	vmovdqa	%ymm3,0+0(%rbp)
8588
8589	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
8590	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
8591	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
8592	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
8593	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
8594	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
8595	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
8596	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
8597	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
8598	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
8599	jmp	.Lseal_avx2_short
8600
8601.Lseal_avx2_192:
8602	vmovdqa	%ymm0,%ymm1
8603	vmovdqa	%ymm0,%ymm2
8604	vmovdqa	%ymm4,%ymm5
8605	vmovdqa	%ymm4,%ymm6
8606	vmovdqa	%ymm8,%ymm9
8607	vmovdqa	%ymm8,%ymm10
8608	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
8609	vmovdqa	%ymm12,%ymm11
8610	vmovdqa	%ymm13,%ymm15
8611	movq	$10,%r10
8612.Lseal_avx2_192_rounds:
8613	vpaddd	%ymm4,%ymm0,%ymm0
8614	vpxor	%ymm0,%ymm12,%ymm12
8615	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
8616	vpaddd	%ymm12,%ymm8,%ymm8
8617	vpxor	%ymm8,%ymm4,%ymm4
8618	vpsrld	$20,%ymm4,%ymm3
8619	vpslld	$12,%ymm4,%ymm4
8620	vpxor	%ymm3,%ymm4,%ymm4
8621	vpaddd	%ymm4,%ymm0,%ymm0
8622	vpxor	%ymm0,%ymm12,%ymm12
8623	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
8624	vpaddd	%ymm12,%ymm8,%ymm8
8625	vpxor	%ymm8,%ymm4,%ymm4
8626	vpslld	$7,%ymm4,%ymm3
8627	vpsrld	$25,%ymm4,%ymm4
8628	vpxor	%ymm3,%ymm4,%ymm4
8629	vpalignr	$12,%ymm12,%ymm12,%ymm12
8630	vpalignr	$8,%ymm8,%ymm8,%ymm8
8631	vpalignr	$4,%ymm4,%ymm4,%ymm4
8632	vpaddd	%ymm5,%ymm1,%ymm1
8633	vpxor	%ymm1,%ymm13,%ymm13
8634	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
8635	vpaddd	%ymm13,%ymm9,%ymm9
8636	vpxor	%ymm9,%ymm5,%ymm5
8637	vpsrld	$20,%ymm5,%ymm3
8638	vpslld	$12,%ymm5,%ymm5
8639	vpxor	%ymm3,%ymm5,%ymm5
8640	vpaddd	%ymm5,%ymm1,%ymm1
8641	vpxor	%ymm1,%ymm13,%ymm13
8642	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
8643	vpaddd	%ymm13,%ymm9,%ymm9
8644	vpxor	%ymm9,%ymm5,%ymm5
8645	vpslld	$7,%ymm5,%ymm3
8646	vpsrld	$25,%ymm5,%ymm5
8647	vpxor	%ymm3,%ymm5,%ymm5
8648	vpalignr	$12,%ymm13,%ymm13,%ymm13
8649	vpalignr	$8,%ymm9,%ymm9,%ymm9
8650	vpalignr	$4,%ymm5,%ymm5,%ymm5
8651	vpaddd	%ymm4,%ymm0,%ymm0
8652	vpxor	%ymm0,%ymm12,%ymm12
8653	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
8654	vpaddd	%ymm12,%ymm8,%ymm8
8655	vpxor	%ymm8,%ymm4,%ymm4
8656	vpsrld	$20,%ymm4,%ymm3
8657	vpslld	$12,%ymm4,%ymm4
8658	vpxor	%ymm3,%ymm4,%ymm4
8659	vpaddd	%ymm4,%ymm0,%ymm0
8660	vpxor	%ymm0,%ymm12,%ymm12
8661	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
8662	vpaddd	%ymm12,%ymm8,%ymm8
8663	vpxor	%ymm8,%ymm4,%ymm4
8664	vpslld	$7,%ymm4,%ymm3
8665	vpsrld	$25,%ymm4,%ymm4
8666	vpxor	%ymm3,%ymm4,%ymm4
8667	vpalignr	$4,%ymm12,%ymm12,%ymm12
8668	vpalignr	$8,%ymm8,%ymm8,%ymm8
8669	vpalignr	$12,%ymm4,%ymm4,%ymm4
8670	vpaddd	%ymm5,%ymm1,%ymm1
8671	vpxor	%ymm1,%ymm13,%ymm13
8672	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
8673	vpaddd	%ymm13,%ymm9,%ymm9
8674	vpxor	%ymm9,%ymm5,%ymm5
8675	vpsrld	$20,%ymm5,%ymm3
8676	vpslld	$12,%ymm5,%ymm5
8677	vpxor	%ymm3,%ymm5,%ymm5
8678	vpaddd	%ymm5,%ymm1,%ymm1
8679	vpxor	%ymm1,%ymm13,%ymm13
8680	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
8681	vpaddd	%ymm13,%ymm9,%ymm9
8682	vpxor	%ymm9,%ymm5,%ymm5
8683	vpslld	$7,%ymm5,%ymm3
8684	vpsrld	$25,%ymm5,%ymm5
8685	vpxor	%ymm3,%ymm5,%ymm5
8686	vpalignr	$4,%ymm13,%ymm13,%ymm13
8687	vpalignr	$8,%ymm9,%ymm9,%ymm9
8688	vpalignr	$12,%ymm5,%ymm5,%ymm5
8689
8690	decq	%r10
8691	jne	.Lseal_avx2_192_rounds
8692	vpaddd	%ymm2,%ymm0,%ymm0
8693	vpaddd	%ymm2,%ymm1,%ymm1
8694	vpaddd	%ymm6,%ymm4,%ymm4
8695	vpaddd	%ymm6,%ymm5,%ymm5
8696	vpaddd	%ymm10,%ymm8,%ymm8
8697	vpaddd	%ymm10,%ymm9,%ymm9
8698	vpaddd	%ymm11,%ymm12,%ymm12
8699	vpaddd	%ymm15,%ymm13,%ymm13
8700	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
8701
8702	vpand	.Lclamp(%rip),%ymm3,%ymm3
8703	vmovdqa	%ymm3,0+0(%rbp)
8704
8705	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
8706	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
8707	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
8708	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
8709	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
8710	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
8711.Lseal_avx2_short:
8712	movq	%r8,%r8
8713	call	poly_hash_ad_internal
8714	xorq	%rcx,%rcx
8715.Lseal_avx2_short_hash_remainder:
8716	cmpq	$16,%rcx
8717	jb	.Lseal_avx2_short_loop
8718	addq	0+0(%rdi),%r10
8719	adcq	8+0(%rdi),%r11
8720	adcq	$1,%r12
8721	movq	0+0+0(%rbp),%rax
8722	movq	%rax,%r15
8723	mulq	%r10
8724	movq	%rax,%r13
8725	movq	%rdx,%r14
8726	movq	0+0+0(%rbp),%rax
8727	mulq	%r11
8728	imulq	%r12,%r15
8729	addq	%rax,%r14
8730	adcq	%rdx,%r15
8731	movq	8+0+0(%rbp),%rax
8732	movq	%rax,%r9
8733	mulq	%r10
8734	addq	%rax,%r14
8735	adcq	$0,%rdx
8736	movq	%rdx,%r10
8737	movq	8+0+0(%rbp),%rax
8738	mulq	%r11
8739	addq	%rax,%r15
8740	adcq	$0,%rdx
8741	imulq	%r12,%r9
8742	addq	%r10,%r15
8743	adcq	%rdx,%r9
8744	movq	%r13,%r10
8745	movq	%r14,%r11
8746	movq	%r15,%r12
8747	andq	$3,%r12
8748	movq	%r15,%r13
8749	andq	$-4,%r13
8750	movq	%r9,%r14
8751	shrdq	$2,%r9,%r15
8752	shrq	$2,%r9
8753	addq	%r13,%r15
8754	adcq	%r14,%r9
8755	addq	%r15,%r10
8756	adcq	%r9,%r11
8757	adcq	$0,%r12
8758
8759	subq	$16,%rcx
8760	addq	$16,%rdi
8761	jmp	.Lseal_avx2_short_hash_remainder
8762.Lseal_avx2_short_loop:
8763	cmpq	$32,%rbx
8764	jb	.Lseal_avx2_short_tail
8765	subq	$32,%rbx
8766
8767	vpxor	(%rsi),%ymm0,%ymm0
8768	vmovdqu	%ymm0,(%rdi)
8769	leaq	32(%rsi),%rsi
8770
8771	addq	0+0(%rdi),%r10
8772	adcq	8+0(%rdi),%r11
8773	adcq	$1,%r12
8774	movq	0+0+0(%rbp),%rax
8775	movq	%rax,%r15
8776	mulq	%r10
8777	movq	%rax,%r13
8778	movq	%rdx,%r14
8779	movq	0+0+0(%rbp),%rax
8780	mulq	%r11
8781	imulq	%r12,%r15
8782	addq	%rax,%r14
8783	adcq	%rdx,%r15
8784	movq	8+0+0(%rbp),%rax
8785	movq	%rax,%r9
8786	mulq	%r10
8787	addq	%rax,%r14
8788	adcq	$0,%rdx
8789	movq	%rdx,%r10
8790	movq	8+0+0(%rbp),%rax
8791	mulq	%r11
8792	addq	%rax,%r15
8793	adcq	$0,%rdx
8794	imulq	%r12,%r9
8795	addq	%r10,%r15
8796	adcq	%rdx,%r9
8797	movq	%r13,%r10
8798	movq	%r14,%r11
8799	movq	%r15,%r12
8800	andq	$3,%r12
8801	movq	%r15,%r13
8802	andq	$-4,%r13
8803	movq	%r9,%r14
8804	shrdq	$2,%r9,%r15
8805	shrq	$2,%r9
8806	addq	%r13,%r15
8807	adcq	%r14,%r9
8808	addq	%r15,%r10
8809	adcq	%r9,%r11
8810	adcq	$0,%r12
8811	addq	0+16(%rdi),%r10
8812	adcq	8+16(%rdi),%r11
8813	adcq	$1,%r12
8814	movq	0+0+0(%rbp),%rax
8815	movq	%rax,%r15
8816	mulq	%r10
8817	movq	%rax,%r13
8818	movq	%rdx,%r14
8819	movq	0+0+0(%rbp),%rax
8820	mulq	%r11
8821	imulq	%r12,%r15
8822	addq	%rax,%r14
8823	adcq	%rdx,%r15
8824	movq	8+0+0(%rbp),%rax
8825	movq	%rax,%r9
8826	mulq	%r10
8827	addq	%rax,%r14
8828	adcq	$0,%rdx
8829	movq	%rdx,%r10
8830	movq	8+0+0(%rbp),%rax
8831	mulq	%r11
8832	addq	%rax,%r15
8833	adcq	$0,%rdx
8834	imulq	%r12,%r9
8835	addq	%r10,%r15
8836	adcq	%rdx,%r9
8837	movq	%r13,%r10
8838	movq	%r14,%r11
8839	movq	%r15,%r12
8840	andq	$3,%r12
8841	movq	%r15,%r13
8842	andq	$-4,%r13
8843	movq	%r9,%r14
8844	shrdq	$2,%r9,%r15
8845	shrq	$2,%r9
8846	addq	%r13,%r15
8847	adcq	%r14,%r9
8848	addq	%r15,%r10
8849	adcq	%r9,%r11
8850	adcq	$0,%r12
8851
8852	leaq	32(%rdi),%rdi
8853
8854	vmovdqa	%ymm4,%ymm0
8855	vmovdqa	%ymm8,%ymm4
8856	vmovdqa	%ymm12,%ymm8
8857	vmovdqa	%ymm1,%ymm12
8858	vmovdqa	%ymm5,%ymm1
8859	vmovdqa	%ymm9,%ymm5
8860	vmovdqa	%ymm13,%ymm9
8861	vmovdqa	%ymm2,%ymm13
8862	vmovdqa	%ymm6,%ymm2
8863	jmp	.Lseal_avx2_short_loop
8864.Lseal_avx2_short_tail:
8865	cmpq	$16,%rbx
8866	jb	.Lseal_avx2_exit
8867	subq	$16,%rbx
8868	vpxor	(%rsi),%xmm0,%xmm3
8869	vmovdqu	%xmm3,(%rdi)
8870	leaq	16(%rsi),%rsi
8871	addq	0+0(%rdi),%r10
8872	adcq	8+0(%rdi),%r11
8873	adcq	$1,%r12
8874	movq	0+0+0(%rbp),%rax
8875	movq	%rax,%r15
8876	mulq	%r10
8877	movq	%rax,%r13
8878	movq	%rdx,%r14
8879	movq	0+0+0(%rbp),%rax
8880	mulq	%r11
8881	imulq	%r12,%r15
8882	addq	%rax,%r14
8883	adcq	%rdx,%r15
8884	movq	8+0+0(%rbp),%rax
8885	movq	%rax,%r9
8886	mulq	%r10
8887	addq	%rax,%r14
8888	adcq	$0,%rdx
8889	movq	%rdx,%r10
8890	movq	8+0+0(%rbp),%rax
8891	mulq	%r11
8892	addq	%rax,%r15
8893	adcq	$0,%rdx
8894	imulq	%r12,%r9
8895	addq	%r10,%r15
8896	adcq	%rdx,%r9
8897	movq	%r13,%r10
8898	movq	%r14,%r11
8899	movq	%r15,%r12
8900	andq	$3,%r12
8901	movq	%r15,%r13
8902	andq	$-4,%r13
8903	movq	%r9,%r14
8904	shrdq	$2,%r9,%r15
8905	shrq	$2,%r9
8906	addq	%r13,%r15
8907	adcq	%r14,%r9
8908	addq	%r15,%r10
8909	adcq	%r9,%r11
8910	adcq	$0,%r12
8911
8912	leaq	16(%rdi),%rdi
8913	vextracti128	$1,%ymm0,%xmm0
8914.Lseal_avx2_exit:
8915	vzeroupper
8916	jmp	.Lseal_sse_tail_16
8917.cfi_endproc
8918.size	chacha20_poly1305_seal_avx2, .-chacha20_poly1305_seal_avx2
8919#endif
8920.section	.note.GNU-stack,"",@progbits
8921