• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11.text
12.extern	GFp_ia32cap_P
13.hidden GFp_ia32cap_P
14
15chacha20_poly1305_constants:
16
17.align	64
18.Lchacha20_consts:
19.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
20.byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
21.Lrol8:
22.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
23.byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
24.Lrol16:
25.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
26.byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
27.Lavx2_init:
28.long	0,0,0,0
29.Lsse_inc:
30.long	1,0,0,0
31.Lavx2_inc:
32.long	2,0,0,0,2,0,0,0
33.Lclamp:
34.quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
35.quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
36.align	16
37.Land_masks:
38.byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
39.byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
40.byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
41.byte	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
42.byte	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
43.byte	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
44.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
45.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
46.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
47.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
48.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
49.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
50.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
51.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
52.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
53.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
54
55.type	poly_hash_ad_internal,@function
56.align	64
57poly_hash_ad_internal:
58.cfi_startproc
59.cfi_def_cfa	rsp, 8
60	xorq	%r10,%r10
61	xorq	%r11,%r11
62	xorq	%r12,%r12
63	cmpq	$13,%r8
64	jne	.Lhash_ad_loop
65.Lpoly_fast_tls_ad:
66
67	movq	(%rcx),%r10
68	movq	5(%rcx),%r11
69	shrq	$24,%r11
70	movq	$1,%r12
71	movq	0+0+0(%rbp),%rax
72	movq	%rax,%r15
73	mulq	%r10
74	movq	%rax,%r13
75	movq	%rdx,%r14
76	movq	0+0+0(%rbp),%rax
77	mulq	%r11
78	imulq	%r12,%r15
79	addq	%rax,%r14
80	adcq	%rdx,%r15
81	movq	8+0+0(%rbp),%rax
82	movq	%rax,%r9
83	mulq	%r10
84	addq	%rax,%r14
85	adcq	$0,%rdx
86	movq	%rdx,%r10
87	movq	8+0+0(%rbp),%rax
88	mulq	%r11
89	addq	%rax,%r15
90	adcq	$0,%rdx
91	imulq	%r12,%r9
92	addq	%r10,%r15
93	adcq	%rdx,%r9
94	movq	%r13,%r10
95	movq	%r14,%r11
96	movq	%r15,%r12
97	andq	$3,%r12
98	movq	%r15,%r13
99	andq	$-4,%r13
100	movq	%r9,%r14
101	shrdq	$2,%r9,%r15
102	shrq	$2,%r9
103	addq	%r13,%r15
104	adcq	%r14,%r9
105	addq	%r15,%r10
106	adcq	%r9,%r11
107	adcq	$0,%r12
108
109	.byte	0xf3,0xc3
110.Lhash_ad_loop:
111
112	cmpq	$16,%r8
113	jb	.Lhash_ad_tail
114	addq	0+0(%rcx),%r10
115	adcq	8+0(%rcx),%r11
116	adcq	$1,%r12
117	movq	0+0+0(%rbp),%rax
118	movq	%rax,%r15
119	mulq	%r10
120	movq	%rax,%r13
121	movq	%rdx,%r14
122	movq	0+0+0(%rbp),%rax
123	mulq	%r11
124	imulq	%r12,%r15
125	addq	%rax,%r14
126	adcq	%rdx,%r15
127	movq	8+0+0(%rbp),%rax
128	movq	%rax,%r9
129	mulq	%r10
130	addq	%rax,%r14
131	adcq	$0,%rdx
132	movq	%rdx,%r10
133	movq	8+0+0(%rbp),%rax
134	mulq	%r11
135	addq	%rax,%r15
136	adcq	$0,%rdx
137	imulq	%r12,%r9
138	addq	%r10,%r15
139	adcq	%rdx,%r9
140	movq	%r13,%r10
141	movq	%r14,%r11
142	movq	%r15,%r12
143	andq	$3,%r12
144	movq	%r15,%r13
145	andq	$-4,%r13
146	movq	%r9,%r14
147	shrdq	$2,%r9,%r15
148	shrq	$2,%r9
149	addq	%r13,%r15
150	adcq	%r14,%r9
151	addq	%r15,%r10
152	adcq	%r9,%r11
153	adcq	$0,%r12
154
155	leaq	16(%rcx),%rcx
156	subq	$16,%r8
157	jmp	.Lhash_ad_loop
158.Lhash_ad_tail:
159	cmpq	$0,%r8
160	je	.Lhash_ad_done
161
162	xorq	%r13,%r13
163	xorq	%r14,%r14
164	xorq	%r15,%r15
165	addq	%r8,%rcx
166.Lhash_ad_tail_loop:
167	shldq	$8,%r13,%r14
168	shlq	$8,%r13
169	movzbq	-1(%rcx),%r15
170	xorq	%r15,%r13
171	decq	%rcx
172	decq	%r8
173	jne	.Lhash_ad_tail_loop
174
175	addq	%r13,%r10
176	adcq	%r14,%r11
177	adcq	$1,%r12
178	movq	0+0+0(%rbp),%rax
179	movq	%rax,%r15
180	mulq	%r10
181	movq	%rax,%r13
182	movq	%rdx,%r14
183	movq	0+0+0(%rbp),%rax
184	mulq	%r11
185	imulq	%r12,%r15
186	addq	%rax,%r14
187	adcq	%rdx,%r15
188	movq	8+0+0(%rbp),%rax
189	movq	%rax,%r9
190	mulq	%r10
191	addq	%rax,%r14
192	adcq	$0,%rdx
193	movq	%rdx,%r10
194	movq	8+0+0(%rbp),%rax
195	mulq	%r11
196	addq	%rax,%r15
197	adcq	$0,%rdx
198	imulq	%r12,%r9
199	addq	%r10,%r15
200	adcq	%rdx,%r9
201	movq	%r13,%r10
202	movq	%r14,%r11
203	movq	%r15,%r12
204	andq	$3,%r12
205	movq	%r15,%r13
206	andq	$-4,%r13
207	movq	%r9,%r14
208	shrdq	$2,%r9,%r15
209	shrq	$2,%r9
210	addq	%r13,%r15
211	adcq	%r14,%r9
212	addq	%r15,%r10
213	adcq	%r9,%r11
214	adcq	$0,%r12
215
216
217.Lhash_ad_done:
218	.byte	0xf3,0xc3
219.cfi_endproc
220.size	poly_hash_ad_internal, .-poly_hash_ad_internal
221
222.globl	GFp_chacha20_poly1305_open
223.hidden GFp_chacha20_poly1305_open
224.type	GFp_chacha20_poly1305_open,@function
225.align	64
226GFp_chacha20_poly1305_open:
227.cfi_startproc
228	pushq	%rbp
229.cfi_adjust_cfa_offset	8
230.cfi_offset	%rbp,-16
231	pushq	%rbx
232.cfi_adjust_cfa_offset	8
233.cfi_offset	%rbx,-24
234	pushq	%r12
235.cfi_adjust_cfa_offset	8
236.cfi_offset	%r12,-32
237	pushq	%r13
238.cfi_adjust_cfa_offset	8
239.cfi_offset	%r13,-40
240	pushq	%r14
241.cfi_adjust_cfa_offset	8
242.cfi_offset	%r14,-48
243	pushq	%r15
244.cfi_adjust_cfa_offset	8
245.cfi_offset	%r15,-56
246
247
248	pushq	%r9
249.cfi_adjust_cfa_offset	8
250.cfi_offset	%r9,-64
251	subq	$288 + 0 + 32,%rsp
252.cfi_adjust_cfa_offset	288 + 32
253
254	leaq	32(%rsp),%rbp
255	andq	$-32,%rbp
256
257	movq	%rdx,%rbx
258	movq	%r8,0+0+32(%rbp)
259	movq	%rbx,8+0+32(%rbp)
260
261	movl	GFp_ia32cap_P+8(%rip),%eax
262	andl	$288,%eax
263	xorl	$288,%eax
264	jz	chacha20_poly1305_open_avx2
265
266	cmpq	$128,%rbx
267	jbe	.Lopen_sse_128
268
269	movdqa	.Lchacha20_consts(%rip),%xmm0
270	movdqu	0(%r9),%xmm4
271	movdqu	16(%r9),%xmm8
272	movdqu	32(%r9),%xmm12
273
274	movdqa	%xmm12,%xmm7
275
276	movdqa	%xmm4,0+48(%rbp)
277	movdqa	%xmm8,0+64(%rbp)
278	movdqa	%xmm12,0+96(%rbp)
279	movq	$10,%r10
280.Lopen_sse_init_rounds:
281	paddd	%xmm4,%xmm0
282	pxor	%xmm0,%xmm12
283	pshufb	.Lrol16(%rip),%xmm12
284	paddd	%xmm12,%xmm8
285	pxor	%xmm8,%xmm4
286	movdqa	%xmm4,%xmm3
287	pslld	$12,%xmm3
288	psrld	$20,%xmm4
289	pxor	%xmm3,%xmm4
290	paddd	%xmm4,%xmm0
291	pxor	%xmm0,%xmm12
292	pshufb	.Lrol8(%rip),%xmm12
293	paddd	%xmm12,%xmm8
294	pxor	%xmm8,%xmm4
295	movdqa	%xmm4,%xmm3
296	pslld	$7,%xmm3
297	psrld	$25,%xmm4
298	pxor	%xmm3,%xmm4
299.byte	102,15,58,15,228,4
300.byte	102,69,15,58,15,192,8
301.byte	102,69,15,58,15,228,12
302	paddd	%xmm4,%xmm0
303	pxor	%xmm0,%xmm12
304	pshufb	.Lrol16(%rip),%xmm12
305	paddd	%xmm12,%xmm8
306	pxor	%xmm8,%xmm4
307	movdqa	%xmm4,%xmm3
308	pslld	$12,%xmm3
309	psrld	$20,%xmm4
310	pxor	%xmm3,%xmm4
311	paddd	%xmm4,%xmm0
312	pxor	%xmm0,%xmm12
313	pshufb	.Lrol8(%rip),%xmm12
314	paddd	%xmm12,%xmm8
315	pxor	%xmm8,%xmm4
316	movdqa	%xmm4,%xmm3
317	pslld	$7,%xmm3
318	psrld	$25,%xmm4
319	pxor	%xmm3,%xmm4
320.byte	102,15,58,15,228,12
321.byte	102,69,15,58,15,192,8
322.byte	102,69,15,58,15,228,4
323
324	decq	%r10
325	jne	.Lopen_sse_init_rounds
326
327	paddd	.Lchacha20_consts(%rip),%xmm0
328	paddd	0+48(%rbp),%xmm4
329
330	pand	.Lclamp(%rip),%xmm0
331	movdqa	%xmm0,0+0(%rbp)
332	movdqa	%xmm4,0+16(%rbp)
333
334	movq	%r8,%r8
335	call	poly_hash_ad_internal
336.Lopen_sse_main_loop:
337	cmpq	$256,%rbx
338	jb	.Lopen_sse_tail
339
340	movdqa	.Lchacha20_consts(%rip),%xmm0
341	movdqa	0+48(%rbp),%xmm4
342	movdqa	0+64(%rbp),%xmm8
343	movdqa	%xmm0,%xmm1
344	movdqa	%xmm4,%xmm5
345	movdqa	%xmm8,%xmm9
346	movdqa	%xmm0,%xmm2
347	movdqa	%xmm4,%xmm6
348	movdqa	%xmm8,%xmm10
349	movdqa	%xmm0,%xmm3
350	movdqa	%xmm4,%xmm7
351	movdqa	%xmm8,%xmm11
352	movdqa	0+96(%rbp),%xmm15
353	paddd	.Lsse_inc(%rip),%xmm15
354	movdqa	%xmm15,%xmm14
355	paddd	.Lsse_inc(%rip),%xmm14
356	movdqa	%xmm14,%xmm13
357	paddd	.Lsse_inc(%rip),%xmm13
358	movdqa	%xmm13,%xmm12
359	paddd	.Lsse_inc(%rip),%xmm12
360	movdqa	%xmm12,0+96(%rbp)
361	movdqa	%xmm13,0+112(%rbp)
362	movdqa	%xmm14,0+128(%rbp)
363	movdqa	%xmm15,0+144(%rbp)
364
365
366
367	movq	$4,%rcx
368	movq	%rsi,%r8
369.Lopen_sse_main_loop_rounds:
370	movdqa	%xmm8,0+80(%rbp)
371	movdqa	.Lrol16(%rip),%xmm8
372	paddd	%xmm7,%xmm3
373	paddd	%xmm6,%xmm2
374	paddd	%xmm5,%xmm1
375	paddd	%xmm4,%xmm0
376	pxor	%xmm3,%xmm15
377	pxor	%xmm2,%xmm14
378	pxor	%xmm1,%xmm13
379	pxor	%xmm0,%xmm12
380.byte	102,69,15,56,0,248
381.byte	102,69,15,56,0,240
382.byte	102,69,15,56,0,232
383.byte	102,69,15,56,0,224
384	movdqa	0+80(%rbp),%xmm8
385	paddd	%xmm15,%xmm11
386	paddd	%xmm14,%xmm10
387	paddd	%xmm13,%xmm9
388	paddd	%xmm12,%xmm8
389	pxor	%xmm11,%xmm7
390	addq	0+0(%r8),%r10
391	adcq	8+0(%r8),%r11
392	adcq	$1,%r12
393
394	leaq	16(%r8),%r8
395	pxor	%xmm10,%xmm6
396	pxor	%xmm9,%xmm5
397	pxor	%xmm8,%xmm4
398	movdqa	%xmm8,0+80(%rbp)
399	movdqa	%xmm7,%xmm8
400	psrld	$20,%xmm8
401	pslld	$32-20,%xmm7
402	pxor	%xmm8,%xmm7
403	movdqa	%xmm6,%xmm8
404	psrld	$20,%xmm8
405	pslld	$32-20,%xmm6
406	pxor	%xmm8,%xmm6
407	movdqa	%xmm5,%xmm8
408	psrld	$20,%xmm8
409	pslld	$32-20,%xmm5
410	pxor	%xmm8,%xmm5
411	movdqa	%xmm4,%xmm8
412	psrld	$20,%xmm8
413	pslld	$32-20,%xmm4
414	pxor	%xmm8,%xmm4
415	movq	0+0+0(%rbp),%rax
416	movq	%rax,%r15
417	mulq	%r10
418	movq	%rax,%r13
419	movq	%rdx,%r14
420	movq	0+0+0(%rbp),%rax
421	mulq	%r11
422	imulq	%r12,%r15
423	addq	%rax,%r14
424	adcq	%rdx,%r15
425	movdqa	.Lrol8(%rip),%xmm8
426	paddd	%xmm7,%xmm3
427	paddd	%xmm6,%xmm2
428	paddd	%xmm5,%xmm1
429	paddd	%xmm4,%xmm0
430	pxor	%xmm3,%xmm15
431	pxor	%xmm2,%xmm14
432	pxor	%xmm1,%xmm13
433	pxor	%xmm0,%xmm12
434.byte	102,69,15,56,0,248
435.byte	102,69,15,56,0,240
436.byte	102,69,15,56,0,232
437.byte	102,69,15,56,0,224
438	movdqa	0+80(%rbp),%xmm8
439	paddd	%xmm15,%xmm11
440	paddd	%xmm14,%xmm10
441	paddd	%xmm13,%xmm9
442	paddd	%xmm12,%xmm8
443	pxor	%xmm11,%xmm7
444	pxor	%xmm10,%xmm6
445	movq	8+0+0(%rbp),%rax
446	movq	%rax,%r9
447	mulq	%r10
448	addq	%rax,%r14
449	adcq	$0,%rdx
450	movq	%rdx,%r10
451	movq	8+0+0(%rbp),%rax
452	mulq	%r11
453	addq	%rax,%r15
454	adcq	$0,%rdx
455	pxor	%xmm9,%xmm5
456	pxor	%xmm8,%xmm4
457	movdqa	%xmm8,0+80(%rbp)
458	movdqa	%xmm7,%xmm8
459	psrld	$25,%xmm8
460	pslld	$32-25,%xmm7
461	pxor	%xmm8,%xmm7
462	movdqa	%xmm6,%xmm8
463	psrld	$25,%xmm8
464	pslld	$32-25,%xmm6
465	pxor	%xmm8,%xmm6
466	movdqa	%xmm5,%xmm8
467	psrld	$25,%xmm8
468	pslld	$32-25,%xmm5
469	pxor	%xmm8,%xmm5
470	movdqa	%xmm4,%xmm8
471	psrld	$25,%xmm8
472	pslld	$32-25,%xmm4
473	pxor	%xmm8,%xmm4
474	movdqa	0+80(%rbp),%xmm8
475	imulq	%r12,%r9
476	addq	%r10,%r15
477	adcq	%rdx,%r9
478.byte	102,15,58,15,255,4
479.byte	102,69,15,58,15,219,8
480.byte	102,69,15,58,15,255,12
481.byte	102,15,58,15,246,4
482.byte	102,69,15,58,15,210,8
483.byte	102,69,15,58,15,246,12
484.byte	102,15,58,15,237,4
485.byte	102,69,15,58,15,201,8
486.byte	102,69,15,58,15,237,12
487.byte	102,15,58,15,228,4
488.byte	102,69,15,58,15,192,8
489.byte	102,69,15,58,15,228,12
490	movdqa	%xmm8,0+80(%rbp)
491	movdqa	.Lrol16(%rip),%xmm8
492	paddd	%xmm7,%xmm3
493	paddd	%xmm6,%xmm2
494	paddd	%xmm5,%xmm1
495	paddd	%xmm4,%xmm0
496	pxor	%xmm3,%xmm15
497	pxor	%xmm2,%xmm14
498	movq	%r13,%r10
499	movq	%r14,%r11
500	movq	%r15,%r12
501	andq	$3,%r12
502	movq	%r15,%r13
503	andq	$-4,%r13
504	movq	%r9,%r14
505	shrdq	$2,%r9,%r15
506	shrq	$2,%r9
507	addq	%r13,%r15
508	adcq	%r14,%r9
509	addq	%r15,%r10
510	adcq	%r9,%r11
511	adcq	$0,%r12
512	pxor	%xmm1,%xmm13
513	pxor	%xmm0,%xmm12
514.byte	102,69,15,56,0,248
515.byte	102,69,15,56,0,240
516.byte	102,69,15,56,0,232
517.byte	102,69,15,56,0,224
518	movdqa	0+80(%rbp),%xmm8
519	paddd	%xmm15,%xmm11
520	paddd	%xmm14,%xmm10
521	paddd	%xmm13,%xmm9
522	paddd	%xmm12,%xmm8
523	pxor	%xmm11,%xmm7
524	pxor	%xmm10,%xmm6
525	pxor	%xmm9,%xmm5
526	pxor	%xmm8,%xmm4
527	movdqa	%xmm8,0+80(%rbp)
528	movdqa	%xmm7,%xmm8
529	psrld	$20,%xmm8
530	pslld	$32-20,%xmm7
531	pxor	%xmm8,%xmm7
532	movdqa	%xmm6,%xmm8
533	psrld	$20,%xmm8
534	pslld	$32-20,%xmm6
535	pxor	%xmm8,%xmm6
536	movdqa	%xmm5,%xmm8
537	psrld	$20,%xmm8
538	pslld	$32-20,%xmm5
539	pxor	%xmm8,%xmm5
540	movdqa	%xmm4,%xmm8
541	psrld	$20,%xmm8
542	pslld	$32-20,%xmm4
543	pxor	%xmm8,%xmm4
544	movdqa	.Lrol8(%rip),%xmm8
545	paddd	%xmm7,%xmm3
546	paddd	%xmm6,%xmm2
547	paddd	%xmm5,%xmm1
548	paddd	%xmm4,%xmm0
549	pxor	%xmm3,%xmm15
550	pxor	%xmm2,%xmm14
551	pxor	%xmm1,%xmm13
552	pxor	%xmm0,%xmm12
553.byte	102,69,15,56,0,248
554.byte	102,69,15,56,0,240
555.byte	102,69,15,56,0,232
556.byte	102,69,15,56,0,224
557	movdqa	0+80(%rbp),%xmm8
558	paddd	%xmm15,%xmm11
559	paddd	%xmm14,%xmm10
560	paddd	%xmm13,%xmm9
561	paddd	%xmm12,%xmm8
562	pxor	%xmm11,%xmm7
563	pxor	%xmm10,%xmm6
564	pxor	%xmm9,%xmm5
565	pxor	%xmm8,%xmm4
566	movdqa	%xmm8,0+80(%rbp)
567	movdqa	%xmm7,%xmm8
568	psrld	$25,%xmm8
569	pslld	$32-25,%xmm7
570	pxor	%xmm8,%xmm7
571	movdqa	%xmm6,%xmm8
572	psrld	$25,%xmm8
573	pslld	$32-25,%xmm6
574	pxor	%xmm8,%xmm6
575	movdqa	%xmm5,%xmm8
576	psrld	$25,%xmm8
577	pslld	$32-25,%xmm5
578	pxor	%xmm8,%xmm5
579	movdqa	%xmm4,%xmm8
580	psrld	$25,%xmm8
581	pslld	$32-25,%xmm4
582	pxor	%xmm8,%xmm4
583	movdqa	0+80(%rbp),%xmm8
584.byte	102,15,58,15,255,12
585.byte	102,69,15,58,15,219,8
586.byte	102,69,15,58,15,255,4
587.byte	102,15,58,15,246,12
588.byte	102,69,15,58,15,210,8
589.byte	102,69,15,58,15,246,4
590.byte	102,15,58,15,237,12
591.byte	102,69,15,58,15,201,8
592.byte	102,69,15,58,15,237,4
593.byte	102,15,58,15,228,12
594.byte	102,69,15,58,15,192,8
595.byte	102,69,15,58,15,228,4
596
597	decq	%rcx
598	jge	.Lopen_sse_main_loop_rounds
599	addq	0+0(%r8),%r10
600	adcq	8+0(%r8),%r11
601	adcq	$1,%r12
602	movq	0+0+0(%rbp),%rax
603	movq	%rax,%r15
604	mulq	%r10
605	movq	%rax,%r13
606	movq	%rdx,%r14
607	movq	0+0+0(%rbp),%rax
608	mulq	%r11
609	imulq	%r12,%r15
610	addq	%rax,%r14
611	adcq	%rdx,%r15
612	movq	8+0+0(%rbp),%rax
613	movq	%rax,%r9
614	mulq	%r10
615	addq	%rax,%r14
616	adcq	$0,%rdx
617	movq	%rdx,%r10
618	movq	8+0+0(%rbp),%rax
619	mulq	%r11
620	addq	%rax,%r15
621	adcq	$0,%rdx
622	imulq	%r12,%r9
623	addq	%r10,%r15
624	adcq	%rdx,%r9
625	movq	%r13,%r10
626	movq	%r14,%r11
627	movq	%r15,%r12
628	andq	$3,%r12
629	movq	%r15,%r13
630	andq	$-4,%r13
631	movq	%r9,%r14
632	shrdq	$2,%r9,%r15
633	shrq	$2,%r9
634	addq	%r13,%r15
635	adcq	%r14,%r9
636	addq	%r15,%r10
637	adcq	%r9,%r11
638	adcq	$0,%r12
639
640	leaq	16(%r8),%r8
641	cmpq	$-6,%rcx
642	jg	.Lopen_sse_main_loop_rounds
643	paddd	.Lchacha20_consts(%rip),%xmm3
644	paddd	0+48(%rbp),%xmm7
645	paddd	0+64(%rbp),%xmm11
646	paddd	0+144(%rbp),%xmm15
647	paddd	.Lchacha20_consts(%rip),%xmm2
648	paddd	0+48(%rbp),%xmm6
649	paddd	0+64(%rbp),%xmm10
650	paddd	0+128(%rbp),%xmm14
651	paddd	.Lchacha20_consts(%rip),%xmm1
652	paddd	0+48(%rbp),%xmm5
653	paddd	0+64(%rbp),%xmm9
654	paddd	0+112(%rbp),%xmm13
655	paddd	.Lchacha20_consts(%rip),%xmm0
656	paddd	0+48(%rbp),%xmm4
657	paddd	0+64(%rbp),%xmm8
658	paddd	0+96(%rbp),%xmm12
659	movdqa	%xmm12,0+80(%rbp)
660	movdqu	0 + 0(%rsi),%xmm12
661	pxor	%xmm3,%xmm12
662	movdqu	%xmm12,0 + 0(%rdi)
663	movdqu	16 + 0(%rsi),%xmm12
664	pxor	%xmm7,%xmm12
665	movdqu	%xmm12,16 + 0(%rdi)
666	movdqu	32 + 0(%rsi),%xmm12
667	pxor	%xmm11,%xmm12
668	movdqu	%xmm12,32 + 0(%rdi)
669	movdqu	48 + 0(%rsi),%xmm12
670	pxor	%xmm15,%xmm12
671	movdqu	%xmm12,48 + 0(%rdi)
672	movdqu	0 + 64(%rsi),%xmm3
673	movdqu	16 + 64(%rsi),%xmm7
674	movdqu	32 + 64(%rsi),%xmm11
675	movdqu	48 + 64(%rsi),%xmm15
676	pxor	%xmm3,%xmm2
677	pxor	%xmm7,%xmm6
678	pxor	%xmm11,%xmm10
679	pxor	%xmm14,%xmm15
680	movdqu	%xmm2,0 + 64(%rdi)
681	movdqu	%xmm6,16 + 64(%rdi)
682	movdqu	%xmm10,32 + 64(%rdi)
683	movdqu	%xmm15,48 + 64(%rdi)
684	movdqu	0 + 128(%rsi),%xmm3
685	movdqu	16 + 128(%rsi),%xmm7
686	movdqu	32 + 128(%rsi),%xmm11
687	movdqu	48 + 128(%rsi),%xmm15
688	pxor	%xmm3,%xmm1
689	pxor	%xmm7,%xmm5
690	pxor	%xmm11,%xmm9
691	pxor	%xmm13,%xmm15
692	movdqu	%xmm1,0 + 128(%rdi)
693	movdqu	%xmm5,16 + 128(%rdi)
694	movdqu	%xmm9,32 + 128(%rdi)
695	movdqu	%xmm15,48 + 128(%rdi)
696	movdqu	0 + 192(%rsi),%xmm3
697	movdqu	16 + 192(%rsi),%xmm7
698	movdqu	32 + 192(%rsi),%xmm11
699	movdqu	48 + 192(%rsi),%xmm15
700	pxor	%xmm3,%xmm0
701	pxor	%xmm7,%xmm4
702	pxor	%xmm11,%xmm8
703	pxor	0+80(%rbp),%xmm15
704	movdqu	%xmm0,0 + 192(%rdi)
705	movdqu	%xmm4,16 + 192(%rdi)
706	movdqu	%xmm8,32 + 192(%rdi)
707	movdqu	%xmm15,48 + 192(%rdi)
708
709	leaq	256(%rsi),%rsi
710	leaq	256(%rdi),%rdi
711	subq	$256,%rbx
712	jmp	.Lopen_sse_main_loop
713.Lopen_sse_tail:
714
715	testq	%rbx,%rbx
716	jz	.Lopen_sse_finalize
717	cmpq	$192,%rbx
718	ja	.Lopen_sse_tail_256
719	cmpq	$128,%rbx
720	ja	.Lopen_sse_tail_192
721	cmpq	$64,%rbx
722	ja	.Lopen_sse_tail_128
723	movdqa	.Lchacha20_consts(%rip),%xmm0
724	movdqa	0+48(%rbp),%xmm4
725	movdqa	0+64(%rbp),%xmm8
726	movdqa	0+96(%rbp),%xmm12
727	paddd	.Lsse_inc(%rip),%xmm12
728	movdqa	%xmm12,0+96(%rbp)
729
730	xorq	%r8,%r8
731	movq	%rbx,%rcx
732	cmpq	$16,%rcx
733	jb	.Lopen_sse_tail_64_rounds
734.Lopen_sse_tail_64_rounds_and_x1hash:
735	addq	0+0(%rsi,%r8,1),%r10
736	adcq	8+0(%rsi,%r8,1),%r11
737	adcq	$1,%r12
738	movq	0+0+0(%rbp),%rax
739	movq	%rax,%r15
740	mulq	%r10
741	movq	%rax,%r13
742	movq	%rdx,%r14
743	movq	0+0+0(%rbp),%rax
744	mulq	%r11
745	imulq	%r12,%r15
746	addq	%rax,%r14
747	adcq	%rdx,%r15
748	movq	8+0+0(%rbp),%rax
749	movq	%rax,%r9
750	mulq	%r10
751	addq	%rax,%r14
752	adcq	$0,%rdx
753	movq	%rdx,%r10
754	movq	8+0+0(%rbp),%rax
755	mulq	%r11
756	addq	%rax,%r15
757	adcq	$0,%rdx
758	imulq	%r12,%r9
759	addq	%r10,%r15
760	adcq	%rdx,%r9
761	movq	%r13,%r10
762	movq	%r14,%r11
763	movq	%r15,%r12
764	andq	$3,%r12
765	movq	%r15,%r13
766	andq	$-4,%r13
767	movq	%r9,%r14
768	shrdq	$2,%r9,%r15
769	shrq	$2,%r9
770	addq	%r13,%r15
771	adcq	%r14,%r9
772	addq	%r15,%r10
773	adcq	%r9,%r11
774	adcq	$0,%r12
775
776	subq	$16,%rcx
777.Lopen_sse_tail_64_rounds:
778	addq	$16,%r8
779	paddd	%xmm4,%xmm0
780	pxor	%xmm0,%xmm12
781	pshufb	.Lrol16(%rip),%xmm12
782	paddd	%xmm12,%xmm8
783	pxor	%xmm8,%xmm4
784	movdqa	%xmm4,%xmm3
785	pslld	$12,%xmm3
786	psrld	$20,%xmm4
787	pxor	%xmm3,%xmm4
788	paddd	%xmm4,%xmm0
789	pxor	%xmm0,%xmm12
790	pshufb	.Lrol8(%rip),%xmm12
791	paddd	%xmm12,%xmm8
792	pxor	%xmm8,%xmm4
793	movdqa	%xmm4,%xmm3
794	pslld	$7,%xmm3
795	psrld	$25,%xmm4
796	pxor	%xmm3,%xmm4
797.byte	102,15,58,15,228,4
798.byte	102,69,15,58,15,192,8
799.byte	102,69,15,58,15,228,12
800	paddd	%xmm4,%xmm0
801	pxor	%xmm0,%xmm12
802	pshufb	.Lrol16(%rip),%xmm12
803	paddd	%xmm12,%xmm8
804	pxor	%xmm8,%xmm4
805	movdqa	%xmm4,%xmm3
806	pslld	$12,%xmm3
807	psrld	$20,%xmm4
808	pxor	%xmm3,%xmm4
809	paddd	%xmm4,%xmm0
810	pxor	%xmm0,%xmm12
811	pshufb	.Lrol8(%rip),%xmm12
812	paddd	%xmm12,%xmm8
813	pxor	%xmm8,%xmm4
814	movdqa	%xmm4,%xmm3
815	pslld	$7,%xmm3
816	psrld	$25,%xmm4
817	pxor	%xmm3,%xmm4
818.byte	102,15,58,15,228,12
819.byte	102,69,15,58,15,192,8
820.byte	102,69,15,58,15,228,4
821
822	cmpq	$16,%rcx
823	jae	.Lopen_sse_tail_64_rounds_and_x1hash
824	cmpq	$160,%r8
825	jne	.Lopen_sse_tail_64_rounds
826	paddd	.Lchacha20_consts(%rip),%xmm0
827	paddd	0+48(%rbp),%xmm4
828	paddd	0+64(%rbp),%xmm8
829	paddd	0+96(%rbp),%xmm12
830
831	jmp	.Lopen_sse_tail_64_dec_loop
832
833.Lopen_sse_tail_128:
834	movdqa	.Lchacha20_consts(%rip),%xmm0
835	movdqa	0+48(%rbp),%xmm4
836	movdqa	0+64(%rbp),%xmm8
837	movdqa	%xmm0,%xmm1
838	movdqa	%xmm4,%xmm5
839	movdqa	%xmm8,%xmm9
840	movdqa	0+96(%rbp),%xmm13
841	paddd	.Lsse_inc(%rip),%xmm13
842	movdqa	%xmm13,%xmm12
843	paddd	.Lsse_inc(%rip),%xmm12
844	movdqa	%xmm12,0+96(%rbp)
845	movdqa	%xmm13,0+112(%rbp)
846
847	movq	%rbx,%rcx
848	andq	$-16,%rcx
849	xorq	%r8,%r8
850.Lopen_sse_tail_128_rounds_and_x1hash:
851	addq	0+0(%rsi,%r8,1),%r10
852	adcq	8+0(%rsi,%r8,1),%r11
853	adcq	$1,%r12
854	movq	0+0+0(%rbp),%rax
855	movq	%rax,%r15
856	mulq	%r10
857	movq	%rax,%r13
858	movq	%rdx,%r14
859	movq	0+0+0(%rbp),%rax
860	mulq	%r11
861	imulq	%r12,%r15
862	addq	%rax,%r14
863	adcq	%rdx,%r15
864	movq	8+0+0(%rbp),%rax
865	movq	%rax,%r9
866	mulq	%r10
867	addq	%rax,%r14
868	adcq	$0,%rdx
869	movq	%rdx,%r10
870	movq	8+0+0(%rbp),%rax
871	mulq	%r11
872	addq	%rax,%r15
873	adcq	$0,%rdx
874	imulq	%r12,%r9
875	addq	%r10,%r15
876	adcq	%rdx,%r9
877	movq	%r13,%r10
878	movq	%r14,%r11
879	movq	%r15,%r12
880	andq	$3,%r12
881	movq	%r15,%r13
882	andq	$-4,%r13
883	movq	%r9,%r14
884	shrdq	$2,%r9,%r15
885	shrq	$2,%r9
886	addq	%r13,%r15
887	adcq	%r14,%r9
888	addq	%r15,%r10
889	adcq	%r9,%r11
890	adcq	$0,%r12
891
892.Lopen_sse_tail_128_rounds:
893	addq	$16,%r8
894	paddd	%xmm4,%xmm0
895	pxor	%xmm0,%xmm12
896	pshufb	.Lrol16(%rip),%xmm12
897	paddd	%xmm12,%xmm8
898	pxor	%xmm8,%xmm4
899	movdqa	%xmm4,%xmm3
900	pslld	$12,%xmm3
901	psrld	$20,%xmm4
902	pxor	%xmm3,%xmm4
903	paddd	%xmm4,%xmm0
904	pxor	%xmm0,%xmm12
905	pshufb	.Lrol8(%rip),%xmm12
906	paddd	%xmm12,%xmm8
907	pxor	%xmm8,%xmm4
908	movdqa	%xmm4,%xmm3
909	pslld	$7,%xmm3
910	psrld	$25,%xmm4
911	pxor	%xmm3,%xmm4
912.byte	102,15,58,15,228,4
913.byte	102,69,15,58,15,192,8
914.byte	102,69,15,58,15,228,12
915	paddd	%xmm5,%xmm1
916	pxor	%xmm1,%xmm13
917	pshufb	.Lrol16(%rip),%xmm13
918	paddd	%xmm13,%xmm9
919	pxor	%xmm9,%xmm5
920	movdqa	%xmm5,%xmm3
921	pslld	$12,%xmm3
922	psrld	$20,%xmm5
923	pxor	%xmm3,%xmm5
924	paddd	%xmm5,%xmm1
925	pxor	%xmm1,%xmm13
926	pshufb	.Lrol8(%rip),%xmm13
927	paddd	%xmm13,%xmm9
928	pxor	%xmm9,%xmm5
929	movdqa	%xmm5,%xmm3
930	pslld	$7,%xmm3
931	psrld	$25,%xmm5
932	pxor	%xmm3,%xmm5
933.byte	102,15,58,15,237,4
934.byte	102,69,15,58,15,201,8
935.byte	102,69,15,58,15,237,12
936	paddd	%xmm4,%xmm0
937	pxor	%xmm0,%xmm12
938	pshufb	.Lrol16(%rip),%xmm12
939	paddd	%xmm12,%xmm8
940	pxor	%xmm8,%xmm4
941	movdqa	%xmm4,%xmm3
942	pslld	$12,%xmm3
943	psrld	$20,%xmm4
944	pxor	%xmm3,%xmm4
945	paddd	%xmm4,%xmm0
946	pxor	%xmm0,%xmm12
947	pshufb	.Lrol8(%rip),%xmm12
948	paddd	%xmm12,%xmm8
949	pxor	%xmm8,%xmm4
950	movdqa	%xmm4,%xmm3
951	pslld	$7,%xmm3
952	psrld	$25,%xmm4
953	pxor	%xmm3,%xmm4
954.byte	102,15,58,15,228,12
955.byte	102,69,15,58,15,192,8
956.byte	102,69,15,58,15,228,4
957	paddd	%xmm5,%xmm1
958	pxor	%xmm1,%xmm13
959	pshufb	.Lrol16(%rip),%xmm13
960	paddd	%xmm13,%xmm9
961	pxor	%xmm9,%xmm5
962	movdqa	%xmm5,%xmm3
963	pslld	$12,%xmm3
964	psrld	$20,%xmm5
965	pxor	%xmm3,%xmm5
966	paddd	%xmm5,%xmm1
967	pxor	%xmm1,%xmm13
968	pshufb	.Lrol8(%rip),%xmm13
969	paddd	%xmm13,%xmm9
970	pxor	%xmm9,%xmm5
971	movdqa	%xmm5,%xmm3
972	pslld	$7,%xmm3
973	psrld	$25,%xmm5
974	pxor	%xmm3,%xmm5
975.byte	102,15,58,15,237,12
976.byte	102,69,15,58,15,201,8
977.byte	102,69,15,58,15,237,4
978
979	cmpq	%rcx,%r8
980	jb	.Lopen_sse_tail_128_rounds_and_x1hash
981	cmpq	$160,%r8
982	jne	.Lopen_sse_tail_128_rounds
983	paddd	.Lchacha20_consts(%rip),%xmm1
984	paddd	0+48(%rbp),%xmm5
985	paddd	0+64(%rbp),%xmm9
986	paddd	0+112(%rbp),%xmm13
987	paddd	.Lchacha20_consts(%rip),%xmm0
988	paddd	0+48(%rbp),%xmm4
989	paddd	0+64(%rbp),%xmm8
990	paddd	0+96(%rbp),%xmm12
991	movdqu	0 + 0(%rsi),%xmm3
992	movdqu	16 + 0(%rsi),%xmm7
993	movdqu	32 + 0(%rsi),%xmm11
994	movdqu	48 + 0(%rsi),%xmm15
995	pxor	%xmm3,%xmm1
996	pxor	%xmm7,%xmm5
997	pxor	%xmm11,%xmm9
998	pxor	%xmm13,%xmm15
999	movdqu	%xmm1,0 + 0(%rdi)
1000	movdqu	%xmm5,16 + 0(%rdi)
1001	movdqu	%xmm9,32 + 0(%rdi)
1002	movdqu	%xmm15,48 + 0(%rdi)
1003
1004	subq	$64,%rbx
1005	leaq	64(%rsi),%rsi
1006	leaq	64(%rdi),%rdi
1007	jmp	.Lopen_sse_tail_64_dec_loop
1008
1009.Lopen_sse_tail_192:
1010	movdqa	.Lchacha20_consts(%rip),%xmm0
1011	movdqa	0+48(%rbp),%xmm4
1012	movdqa	0+64(%rbp),%xmm8
1013	movdqa	%xmm0,%xmm1
1014	movdqa	%xmm4,%xmm5
1015	movdqa	%xmm8,%xmm9
1016	movdqa	%xmm0,%xmm2
1017	movdqa	%xmm4,%xmm6
1018	movdqa	%xmm8,%xmm10
1019	movdqa	0+96(%rbp),%xmm14
1020	paddd	.Lsse_inc(%rip),%xmm14
1021	movdqa	%xmm14,%xmm13
1022	paddd	.Lsse_inc(%rip),%xmm13
1023	movdqa	%xmm13,%xmm12
1024	paddd	.Lsse_inc(%rip),%xmm12
1025	movdqa	%xmm12,0+96(%rbp)
1026	movdqa	%xmm13,0+112(%rbp)
1027	movdqa	%xmm14,0+128(%rbp)
1028
1029	movq	%rbx,%rcx
1030	movq	$160,%r8
1031	cmpq	$160,%rcx
1032	cmovgq	%r8,%rcx
1033	andq	$-16,%rcx
1034	xorq	%r8,%r8
1035.Lopen_sse_tail_192_rounds_and_x1hash:
1036	addq	0+0(%rsi,%r8,1),%r10
1037	adcq	8+0(%rsi,%r8,1),%r11
1038	adcq	$1,%r12
1039	movq	0+0+0(%rbp),%rax
1040	movq	%rax,%r15
1041	mulq	%r10
1042	movq	%rax,%r13
1043	movq	%rdx,%r14
1044	movq	0+0+0(%rbp),%rax
1045	mulq	%r11
1046	imulq	%r12,%r15
1047	addq	%rax,%r14
1048	adcq	%rdx,%r15
1049	movq	8+0+0(%rbp),%rax
1050	movq	%rax,%r9
1051	mulq	%r10
1052	addq	%rax,%r14
1053	adcq	$0,%rdx
1054	movq	%rdx,%r10
1055	movq	8+0+0(%rbp),%rax
1056	mulq	%r11
1057	addq	%rax,%r15
1058	adcq	$0,%rdx
1059	imulq	%r12,%r9
1060	addq	%r10,%r15
1061	adcq	%rdx,%r9
1062	movq	%r13,%r10
1063	movq	%r14,%r11
1064	movq	%r15,%r12
1065	andq	$3,%r12
1066	movq	%r15,%r13
1067	andq	$-4,%r13
1068	movq	%r9,%r14
1069	shrdq	$2,%r9,%r15
1070	shrq	$2,%r9
1071	addq	%r13,%r15
1072	adcq	%r14,%r9
1073	addq	%r15,%r10
1074	adcq	%r9,%r11
1075	adcq	$0,%r12
1076
1077.Lopen_sse_tail_192_rounds:
1078	addq	$16,%r8
1079	paddd	%xmm4,%xmm0
1080	pxor	%xmm0,%xmm12
1081	pshufb	.Lrol16(%rip),%xmm12
1082	paddd	%xmm12,%xmm8
1083	pxor	%xmm8,%xmm4
1084	movdqa	%xmm4,%xmm3
1085	pslld	$12,%xmm3
1086	psrld	$20,%xmm4
1087	pxor	%xmm3,%xmm4
1088	paddd	%xmm4,%xmm0
1089	pxor	%xmm0,%xmm12
1090	pshufb	.Lrol8(%rip),%xmm12
1091	paddd	%xmm12,%xmm8
1092	pxor	%xmm8,%xmm4
1093	movdqa	%xmm4,%xmm3
1094	pslld	$7,%xmm3
1095	psrld	$25,%xmm4
1096	pxor	%xmm3,%xmm4
1097.byte	102,15,58,15,228,4
1098.byte	102,69,15,58,15,192,8
1099.byte	102,69,15,58,15,228,12
1100	paddd	%xmm5,%xmm1
1101	pxor	%xmm1,%xmm13
1102	pshufb	.Lrol16(%rip),%xmm13
1103	paddd	%xmm13,%xmm9
1104	pxor	%xmm9,%xmm5
1105	movdqa	%xmm5,%xmm3
1106	pslld	$12,%xmm3
1107	psrld	$20,%xmm5
1108	pxor	%xmm3,%xmm5
1109	paddd	%xmm5,%xmm1
1110	pxor	%xmm1,%xmm13
1111	pshufb	.Lrol8(%rip),%xmm13
1112	paddd	%xmm13,%xmm9
1113	pxor	%xmm9,%xmm5
1114	movdqa	%xmm5,%xmm3
1115	pslld	$7,%xmm3
1116	psrld	$25,%xmm5
1117	pxor	%xmm3,%xmm5
1118.byte	102,15,58,15,237,4
1119.byte	102,69,15,58,15,201,8
1120.byte	102,69,15,58,15,237,12
1121	paddd	%xmm6,%xmm2
1122	pxor	%xmm2,%xmm14
1123	pshufb	.Lrol16(%rip),%xmm14
1124	paddd	%xmm14,%xmm10
1125	pxor	%xmm10,%xmm6
1126	movdqa	%xmm6,%xmm3
1127	pslld	$12,%xmm3
1128	psrld	$20,%xmm6
1129	pxor	%xmm3,%xmm6
1130	paddd	%xmm6,%xmm2
1131	pxor	%xmm2,%xmm14
1132	pshufb	.Lrol8(%rip),%xmm14
1133	paddd	%xmm14,%xmm10
1134	pxor	%xmm10,%xmm6
1135	movdqa	%xmm6,%xmm3
1136	pslld	$7,%xmm3
1137	psrld	$25,%xmm6
1138	pxor	%xmm3,%xmm6
1139.byte	102,15,58,15,246,4
1140.byte	102,69,15,58,15,210,8
1141.byte	102,69,15,58,15,246,12
1142	paddd	%xmm4,%xmm0
1143	pxor	%xmm0,%xmm12
1144	pshufb	.Lrol16(%rip),%xmm12
1145	paddd	%xmm12,%xmm8
1146	pxor	%xmm8,%xmm4
1147	movdqa	%xmm4,%xmm3
1148	pslld	$12,%xmm3
1149	psrld	$20,%xmm4
1150	pxor	%xmm3,%xmm4
1151	paddd	%xmm4,%xmm0
1152	pxor	%xmm0,%xmm12
1153	pshufb	.Lrol8(%rip),%xmm12
1154	paddd	%xmm12,%xmm8
1155	pxor	%xmm8,%xmm4
1156	movdqa	%xmm4,%xmm3
1157	pslld	$7,%xmm3
1158	psrld	$25,%xmm4
1159	pxor	%xmm3,%xmm4
1160.byte	102,15,58,15,228,12
1161.byte	102,69,15,58,15,192,8
1162.byte	102,69,15,58,15,228,4
1163	paddd	%xmm5,%xmm1
1164	pxor	%xmm1,%xmm13
1165	pshufb	.Lrol16(%rip),%xmm13
1166	paddd	%xmm13,%xmm9
1167	pxor	%xmm9,%xmm5
1168	movdqa	%xmm5,%xmm3
1169	pslld	$12,%xmm3
1170	psrld	$20,%xmm5
1171	pxor	%xmm3,%xmm5
1172	paddd	%xmm5,%xmm1
1173	pxor	%xmm1,%xmm13
1174	pshufb	.Lrol8(%rip),%xmm13
1175	paddd	%xmm13,%xmm9
1176	pxor	%xmm9,%xmm5
1177	movdqa	%xmm5,%xmm3
1178	pslld	$7,%xmm3
1179	psrld	$25,%xmm5
1180	pxor	%xmm3,%xmm5
1181.byte	102,15,58,15,237,12
1182.byte	102,69,15,58,15,201,8
1183.byte	102,69,15,58,15,237,4
1184	paddd	%xmm6,%xmm2
1185	pxor	%xmm2,%xmm14
1186	pshufb	.Lrol16(%rip),%xmm14
1187	paddd	%xmm14,%xmm10
1188	pxor	%xmm10,%xmm6
1189	movdqa	%xmm6,%xmm3
1190	pslld	$12,%xmm3
1191	psrld	$20,%xmm6
1192	pxor	%xmm3,%xmm6
1193	paddd	%xmm6,%xmm2
1194	pxor	%xmm2,%xmm14
1195	pshufb	.Lrol8(%rip),%xmm14
1196	paddd	%xmm14,%xmm10
1197	pxor	%xmm10,%xmm6
1198	movdqa	%xmm6,%xmm3
1199	pslld	$7,%xmm3
1200	psrld	$25,%xmm6
1201	pxor	%xmm3,%xmm6
1202.byte	102,15,58,15,246,12
1203.byte	102,69,15,58,15,210,8
1204.byte	102,69,15,58,15,246,4
1205
1206	cmpq	%rcx,%r8
1207	jb	.Lopen_sse_tail_192_rounds_and_x1hash
1208	cmpq	$160,%r8
1209	jne	.Lopen_sse_tail_192_rounds
1210	cmpq	$176,%rbx
1211	jb	.Lopen_sse_tail_192_finish
1212	addq	0+160(%rsi),%r10
1213	adcq	8+160(%rsi),%r11
1214	adcq	$1,%r12
1215	movq	0+0+0(%rbp),%rax
1216	movq	%rax,%r15
1217	mulq	%r10
1218	movq	%rax,%r13
1219	movq	%rdx,%r14
1220	movq	0+0+0(%rbp),%rax
1221	mulq	%r11
1222	imulq	%r12,%r15
1223	addq	%rax,%r14
1224	adcq	%rdx,%r15
1225	movq	8+0+0(%rbp),%rax
1226	movq	%rax,%r9
1227	mulq	%r10
1228	addq	%rax,%r14
1229	adcq	$0,%rdx
1230	movq	%rdx,%r10
1231	movq	8+0+0(%rbp),%rax
1232	mulq	%r11
1233	addq	%rax,%r15
1234	adcq	$0,%rdx
1235	imulq	%r12,%r9
1236	addq	%r10,%r15
1237	adcq	%rdx,%r9
1238	movq	%r13,%r10
1239	movq	%r14,%r11
1240	movq	%r15,%r12
1241	andq	$3,%r12
1242	movq	%r15,%r13
1243	andq	$-4,%r13
1244	movq	%r9,%r14
1245	shrdq	$2,%r9,%r15
1246	shrq	$2,%r9
1247	addq	%r13,%r15
1248	adcq	%r14,%r9
1249	addq	%r15,%r10
1250	adcq	%r9,%r11
1251	adcq	$0,%r12
1252
1253	cmpq	$192,%rbx
1254	jb	.Lopen_sse_tail_192_finish
1255	addq	0+176(%rsi),%r10
1256	adcq	8+176(%rsi),%r11
1257	adcq	$1,%r12
1258	movq	0+0+0(%rbp),%rax
1259	movq	%rax,%r15
1260	mulq	%r10
1261	movq	%rax,%r13
1262	movq	%rdx,%r14
1263	movq	0+0+0(%rbp),%rax
1264	mulq	%r11
1265	imulq	%r12,%r15
1266	addq	%rax,%r14
1267	adcq	%rdx,%r15
1268	movq	8+0+0(%rbp),%rax
1269	movq	%rax,%r9
1270	mulq	%r10
1271	addq	%rax,%r14
1272	adcq	$0,%rdx
1273	movq	%rdx,%r10
1274	movq	8+0+0(%rbp),%rax
1275	mulq	%r11
1276	addq	%rax,%r15
1277	adcq	$0,%rdx
1278	imulq	%r12,%r9
1279	addq	%r10,%r15
1280	adcq	%rdx,%r9
1281	movq	%r13,%r10
1282	movq	%r14,%r11
1283	movq	%r15,%r12
1284	andq	$3,%r12
1285	movq	%r15,%r13
1286	andq	$-4,%r13
1287	movq	%r9,%r14
1288	shrdq	$2,%r9,%r15
1289	shrq	$2,%r9
1290	addq	%r13,%r15
1291	adcq	%r14,%r9
1292	addq	%r15,%r10
1293	adcq	%r9,%r11
1294	adcq	$0,%r12
1295
1296.Lopen_sse_tail_192_finish:
1297	paddd	.Lchacha20_consts(%rip),%xmm2
1298	paddd	0+48(%rbp),%xmm6
1299	paddd	0+64(%rbp),%xmm10
1300	paddd	0+128(%rbp),%xmm14
1301	paddd	.Lchacha20_consts(%rip),%xmm1
1302	paddd	0+48(%rbp),%xmm5
1303	paddd	0+64(%rbp),%xmm9
1304	paddd	0+112(%rbp),%xmm13
1305	paddd	.Lchacha20_consts(%rip),%xmm0
1306	paddd	0+48(%rbp),%xmm4
1307	paddd	0+64(%rbp),%xmm8
1308	paddd	0+96(%rbp),%xmm12
1309	movdqu	0 + 0(%rsi),%xmm3
1310	movdqu	16 + 0(%rsi),%xmm7
1311	movdqu	32 + 0(%rsi),%xmm11
1312	movdqu	48 + 0(%rsi),%xmm15
1313	pxor	%xmm3,%xmm2
1314	pxor	%xmm7,%xmm6
1315	pxor	%xmm11,%xmm10
1316	pxor	%xmm14,%xmm15
1317	movdqu	%xmm2,0 + 0(%rdi)
1318	movdqu	%xmm6,16 + 0(%rdi)
1319	movdqu	%xmm10,32 + 0(%rdi)
1320	movdqu	%xmm15,48 + 0(%rdi)
1321	movdqu	0 + 64(%rsi),%xmm3
1322	movdqu	16 + 64(%rsi),%xmm7
1323	movdqu	32 + 64(%rsi),%xmm11
1324	movdqu	48 + 64(%rsi),%xmm15
1325	pxor	%xmm3,%xmm1
1326	pxor	%xmm7,%xmm5
1327	pxor	%xmm11,%xmm9
1328	pxor	%xmm13,%xmm15
1329	movdqu	%xmm1,0 + 64(%rdi)
1330	movdqu	%xmm5,16 + 64(%rdi)
1331	movdqu	%xmm9,32 + 64(%rdi)
1332	movdqu	%xmm15,48 + 64(%rdi)
1333
1334	subq	$128,%rbx
1335	leaq	128(%rsi),%rsi
1336	leaq	128(%rdi),%rdi
1337	jmp	.Lopen_sse_tail_64_dec_loop
1338
1339.Lopen_sse_tail_256:
1340	movdqa	.Lchacha20_consts(%rip),%xmm0
1341	movdqa	0+48(%rbp),%xmm4
1342	movdqa	0+64(%rbp),%xmm8
1343	movdqa	%xmm0,%xmm1
1344	movdqa	%xmm4,%xmm5
1345	movdqa	%xmm8,%xmm9
1346	movdqa	%xmm0,%xmm2
1347	movdqa	%xmm4,%xmm6
1348	movdqa	%xmm8,%xmm10
1349	movdqa	%xmm0,%xmm3
1350	movdqa	%xmm4,%xmm7
1351	movdqa	%xmm8,%xmm11
1352	movdqa	0+96(%rbp),%xmm15
1353	paddd	.Lsse_inc(%rip),%xmm15
1354	movdqa	%xmm15,%xmm14
1355	paddd	.Lsse_inc(%rip),%xmm14
1356	movdqa	%xmm14,%xmm13
1357	paddd	.Lsse_inc(%rip),%xmm13
1358	movdqa	%xmm13,%xmm12
1359	paddd	.Lsse_inc(%rip),%xmm12
1360	movdqa	%xmm12,0+96(%rbp)
1361	movdqa	%xmm13,0+112(%rbp)
1362	movdqa	%xmm14,0+128(%rbp)
1363	movdqa	%xmm15,0+144(%rbp)
1364
1365	xorq	%r8,%r8
1366.Lopen_sse_tail_256_rounds_and_x1hash:
1367	addq	0+0(%rsi,%r8,1),%r10
1368	adcq	8+0(%rsi,%r8,1),%r11
1369	adcq	$1,%r12
1370	movdqa	%xmm11,0+80(%rbp)
1371	paddd	%xmm4,%xmm0
1372	pxor	%xmm0,%xmm12
1373	pshufb	.Lrol16(%rip),%xmm12
1374	paddd	%xmm12,%xmm8
1375	pxor	%xmm8,%xmm4
1376	movdqa	%xmm4,%xmm11
1377	pslld	$12,%xmm11
1378	psrld	$20,%xmm4
1379	pxor	%xmm11,%xmm4
1380	paddd	%xmm4,%xmm0
1381	pxor	%xmm0,%xmm12
1382	pshufb	.Lrol8(%rip),%xmm12
1383	paddd	%xmm12,%xmm8
1384	pxor	%xmm8,%xmm4
1385	movdqa	%xmm4,%xmm11
1386	pslld	$7,%xmm11
1387	psrld	$25,%xmm4
1388	pxor	%xmm11,%xmm4
1389.byte	102,15,58,15,228,4
1390.byte	102,69,15,58,15,192,8
1391.byte	102,69,15,58,15,228,12
1392	paddd	%xmm5,%xmm1
1393	pxor	%xmm1,%xmm13
1394	pshufb	.Lrol16(%rip),%xmm13
1395	paddd	%xmm13,%xmm9
1396	pxor	%xmm9,%xmm5
1397	movdqa	%xmm5,%xmm11
1398	pslld	$12,%xmm11
1399	psrld	$20,%xmm5
1400	pxor	%xmm11,%xmm5
1401	paddd	%xmm5,%xmm1
1402	pxor	%xmm1,%xmm13
1403	pshufb	.Lrol8(%rip),%xmm13
1404	paddd	%xmm13,%xmm9
1405	pxor	%xmm9,%xmm5
1406	movdqa	%xmm5,%xmm11
1407	pslld	$7,%xmm11
1408	psrld	$25,%xmm5
1409	pxor	%xmm11,%xmm5
1410.byte	102,15,58,15,237,4
1411.byte	102,69,15,58,15,201,8
1412.byte	102,69,15,58,15,237,12
1413	paddd	%xmm6,%xmm2
1414	pxor	%xmm2,%xmm14
1415	pshufb	.Lrol16(%rip),%xmm14
1416	paddd	%xmm14,%xmm10
1417	pxor	%xmm10,%xmm6
1418	movdqa	%xmm6,%xmm11
1419	pslld	$12,%xmm11
1420	psrld	$20,%xmm6
1421	pxor	%xmm11,%xmm6
1422	paddd	%xmm6,%xmm2
1423	pxor	%xmm2,%xmm14
1424	pshufb	.Lrol8(%rip),%xmm14
1425	paddd	%xmm14,%xmm10
1426	pxor	%xmm10,%xmm6
1427	movdqa	%xmm6,%xmm11
1428	pslld	$7,%xmm11
1429	psrld	$25,%xmm6
1430	pxor	%xmm11,%xmm6
1431.byte	102,15,58,15,246,4
1432.byte	102,69,15,58,15,210,8
1433.byte	102,69,15,58,15,246,12
1434	movdqa	0+80(%rbp),%xmm11
1435	movq	0+0+0(%rbp),%rax
1436	movq	%rax,%r15
1437	mulq	%r10
1438	movq	%rax,%r13
1439	movq	%rdx,%r14
1440	movq	0+0+0(%rbp),%rax
1441	mulq	%r11
1442	imulq	%r12,%r15
1443	addq	%rax,%r14
1444	adcq	%rdx,%r15
1445	movdqa	%xmm9,0+80(%rbp)
1446	paddd	%xmm7,%xmm3
1447	pxor	%xmm3,%xmm15
1448	pshufb	.Lrol16(%rip),%xmm15
1449	paddd	%xmm15,%xmm11
1450	pxor	%xmm11,%xmm7
1451	movdqa	%xmm7,%xmm9
1452	pslld	$12,%xmm9
1453	psrld	$20,%xmm7
1454	pxor	%xmm9,%xmm7
1455	paddd	%xmm7,%xmm3
1456	pxor	%xmm3,%xmm15
1457	pshufb	.Lrol8(%rip),%xmm15
1458	paddd	%xmm15,%xmm11
1459	pxor	%xmm11,%xmm7
1460	movdqa	%xmm7,%xmm9
1461	pslld	$7,%xmm9
1462	psrld	$25,%xmm7
1463	pxor	%xmm9,%xmm7
1464.byte	102,15,58,15,255,4
1465.byte	102,69,15,58,15,219,8
1466.byte	102,69,15,58,15,255,12
1467	movdqa	0+80(%rbp),%xmm9
1468	movq	8+0+0(%rbp),%rax
1469	movq	%rax,%r9
1470	mulq	%r10
1471	addq	%rax,%r14
1472	adcq	$0,%rdx
1473	movq	%rdx,%r10
1474	movq	8+0+0(%rbp),%rax
1475	mulq	%r11
1476	addq	%rax,%r15
1477	adcq	$0,%rdx
1478	movdqa	%xmm11,0+80(%rbp)
1479	paddd	%xmm4,%xmm0
1480	pxor	%xmm0,%xmm12
1481	pshufb	.Lrol16(%rip),%xmm12
1482	paddd	%xmm12,%xmm8
1483	pxor	%xmm8,%xmm4
1484	movdqa	%xmm4,%xmm11
1485	pslld	$12,%xmm11
1486	psrld	$20,%xmm4
1487	pxor	%xmm11,%xmm4
1488	paddd	%xmm4,%xmm0
1489	pxor	%xmm0,%xmm12
1490	pshufb	.Lrol8(%rip),%xmm12
1491	paddd	%xmm12,%xmm8
1492	pxor	%xmm8,%xmm4
1493	movdqa	%xmm4,%xmm11
1494	pslld	$7,%xmm11
1495	psrld	$25,%xmm4
1496	pxor	%xmm11,%xmm4
1497.byte	102,15,58,15,228,12
1498.byte	102,69,15,58,15,192,8
1499.byte	102,69,15,58,15,228,4
1500	paddd	%xmm5,%xmm1
1501	pxor	%xmm1,%xmm13
1502	pshufb	.Lrol16(%rip),%xmm13
1503	paddd	%xmm13,%xmm9
1504	pxor	%xmm9,%xmm5
1505	movdqa	%xmm5,%xmm11
1506	pslld	$12,%xmm11
1507	psrld	$20,%xmm5
1508	pxor	%xmm11,%xmm5
1509	paddd	%xmm5,%xmm1
1510	pxor	%xmm1,%xmm13
1511	pshufb	.Lrol8(%rip),%xmm13
1512	paddd	%xmm13,%xmm9
1513	pxor	%xmm9,%xmm5
1514	movdqa	%xmm5,%xmm11
1515	pslld	$7,%xmm11
1516	psrld	$25,%xmm5
1517	pxor	%xmm11,%xmm5
1518.byte	102,15,58,15,237,12
1519.byte	102,69,15,58,15,201,8
1520.byte	102,69,15,58,15,237,4
1521	imulq	%r12,%r9
1522	addq	%r10,%r15
1523	adcq	%rdx,%r9
1524	paddd	%xmm6,%xmm2
1525	pxor	%xmm2,%xmm14
1526	pshufb	.Lrol16(%rip),%xmm14
1527	paddd	%xmm14,%xmm10
1528	pxor	%xmm10,%xmm6
1529	movdqa	%xmm6,%xmm11
1530	pslld	$12,%xmm11
1531	psrld	$20,%xmm6
1532	pxor	%xmm11,%xmm6
1533	paddd	%xmm6,%xmm2
1534	pxor	%xmm2,%xmm14
1535	pshufb	.Lrol8(%rip),%xmm14
1536	paddd	%xmm14,%xmm10
1537	pxor	%xmm10,%xmm6
1538	movdqa	%xmm6,%xmm11
1539	pslld	$7,%xmm11
1540	psrld	$25,%xmm6
1541	pxor	%xmm11,%xmm6
1542.byte	102,15,58,15,246,12
1543.byte	102,69,15,58,15,210,8
1544.byte	102,69,15,58,15,246,4
1545	movdqa	0+80(%rbp),%xmm11
1546	movq	%r13,%r10
1547	movq	%r14,%r11
1548	movq	%r15,%r12
1549	andq	$3,%r12
1550	movq	%r15,%r13
1551	andq	$-4,%r13
1552	movq	%r9,%r14
1553	shrdq	$2,%r9,%r15
1554	shrq	$2,%r9
1555	addq	%r13,%r15
1556	adcq	%r14,%r9
1557	addq	%r15,%r10
1558	adcq	%r9,%r11
1559	adcq	$0,%r12
1560	movdqa	%xmm9,0+80(%rbp)
1561	paddd	%xmm7,%xmm3
1562	pxor	%xmm3,%xmm15
1563	pshufb	.Lrol16(%rip),%xmm15
1564	paddd	%xmm15,%xmm11
1565	pxor	%xmm11,%xmm7
1566	movdqa	%xmm7,%xmm9
1567	pslld	$12,%xmm9
1568	psrld	$20,%xmm7
1569	pxor	%xmm9,%xmm7
1570	paddd	%xmm7,%xmm3
1571	pxor	%xmm3,%xmm15
1572	pshufb	.Lrol8(%rip),%xmm15
1573	paddd	%xmm15,%xmm11
1574	pxor	%xmm11,%xmm7
1575	movdqa	%xmm7,%xmm9
1576	pslld	$7,%xmm9
1577	psrld	$25,%xmm7
1578	pxor	%xmm9,%xmm7
1579.byte	102,15,58,15,255,12
1580.byte	102,69,15,58,15,219,8
1581.byte	102,69,15,58,15,255,4
1582	movdqa	0+80(%rbp),%xmm9
1583
1584	addq	$16,%r8
1585	cmpq	$160,%r8
1586	jb	.Lopen_sse_tail_256_rounds_and_x1hash
1587
1588	movq	%rbx,%rcx
1589	andq	$-16,%rcx
1590.Lopen_sse_tail_256_hash:
1591	addq	0+0(%rsi,%r8,1),%r10
1592	adcq	8+0(%rsi,%r8,1),%r11
1593	adcq	$1,%r12
1594	movq	0+0+0(%rbp),%rax
1595	movq	%rax,%r15
1596	mulq	%r10
1597	movq	%rax,%r13
1598	movq	%rdx,%r14
1599	movq	0+0+0(%rbp),%rax
1600	mulq	%r11
1601	imulq	%r12,%r15
1602	addq	%rax,%r14
1603	adcq	%rdx,%r15
1604	movq	8+0+0(%rbp),%rax
1605	movq	%rax,%r9
1606	mulq	%r10
1607	addq	%rax,%r14
1608	adcq	$0,%rdx
1609	movq	%rdx,%r10
1610	movq	8+0+0(%rbp),%rax
1611	mulq	%r11
1612	addq	%rax,%r15
1613	adcq	$0,%rdx
1614	imulq	%r12,%r9
1615	addq	%r10,%r15
1616	adcq	%rdx,%r9
1617	movq	%r13,%r10
1618	movq	%r14,%r11
1619	movq	%r15,%r12
1620	andq	$3,%r12
1621	movq	%r15,%r13
1622	andq	$-4,%r13
1623	movq	%r9,%r14
1624	shrdq	$2,%r9,%r15
1625	shrq	$2,%r9
1626	addq	%r13,%r15
1627	adcq	%r14,%r9
1628	addq	%r15,%r10
1629	adcq	%r9,%r11
1630	adcq	$0,%r12
1631
1632	addq	$16,%r8
1633	cmpq	%rcx,%r8
1634	jb	.Lopen_sse_tail_256_hash
1635	paddd	.Lchacha20_consts(%rip),%xmm3
1636	paddd	0+48(%rbp),%xmm7
1637	paddd	0+64(%rbp),%xmm11
1638	paddd	0+144(%rbp),%xmm15
1639	paddd	.Lchacha20_consts(%rip),%xmm2
1640	paddd	0+48(%rbp),%xmm6
1641	paddd	0+64(%rbp),%xmm10
1642	paddd	0+128(%rbp),%xmm14
1643	paddd	.Lchacha20_consts(%rip),%xmm1
1644	paddd	0+48(%rbp),%xmm5
1645	paddd	0+64(%rbp),%xmm9
1646	paddd	0+112(%rbp),%xmm13
1647	paddd	.Lchacha20_consts(%rip),%xmm0
1648	paddd	0+48(%rbp),%xmm4
1649	paddd	0+64(%rbp),%xmm8
1650	paddd	0+96(%rbp),%xmm12
1651	movdqa	%xmm12,0+80(%rbp)
1652	movdqu	0 + 0(%rsi),%xmm12
1653	pxor	%xmm3,%xmm12
1654	movdqu	%xmm12,0 + 0(%rdi)
1655	movdqu	16 + 0(%rsi),%xmm12
1656	pxor	%xmm7,%xmm12
1657	movdqu	%xmm12,16 + 0(%rdi)
1658	movdqu	32 + 0(%rsi),%xmm12
1659	pxor	%xmm11,%xmm12
1660	movdqu	%xmm12,32 + 0(%rdi)
1661	movdqu	48 + 0(%rsi),%xmm12
1662	pxor	%xmm15,%xmm12
1663	movdqu	%xmm12,48 + 0(%rdi)
1664	movdqu	0 + 64(%rsi),%xmm3
1665	movdqu	16 + 64(%rsi),%xmm7
1666	movdqu	32 + 64(%rsi),%xmm11
1667	movdqu	48 + 64(%rsi),%xmm15
1668	pxor	%xmm3,%xmm2
1669	pxor	%xmm7,%xmm6
1670	pxor	%xmm11,%xmm10
1671	pxor	%xmm14,%xmm15
1672	movdqu	%xmm2,0 + 64(%rdi)
1673	movdqu	%xmm6,16 + 64(%rdi)
1674	movdqu	%xmm10,32 + 64(%rdi)
1675	movdqu	%xmm15,48 + 64(%rdi)
1676	movdqu	0 + 128(%rsi),%xmm3
1677	movdqu	16 + 128(%rsi),%xmm7
1678	movdqu	32 + 128(%rsi),%xmm11
1679	movdqu	48 + 128(%rsi),%xmm15
1680	pxor	%xmm3,%xmm1
1681	pxor	%xmm7,%xmm5
1682	pxor	%xmm11,%xmm9
1683	pxor	%xmm13,%xmm15
1684	movdqu	%xmm1,0 + 128(%rdi)
1685	movdqu	%xmm5,16 + 128(%rdi)
1686	movdqu	%xmm9,32 + 128(%rdi)
1687	movdqu	%xmm15,48 + 128(%rdi)
1688
1689	movdqa	0+80(%rbp),%xmm12
1690	subq	$192,%rbx
1691	leaq	192(%rsi),%rsi
1692	leaq	192(%rdi),%rdi
1693
1694
1695.Lopen_sse_tail_64_dec_loop:
1696	cmpq	$16,%rbx
1697	jb	.Lopen_sse_tail_16_init
1698	subq	$16,%rbx
1699	movdqu	(%rsi),%xmm3
1700	pxor	%xmm3,%xmm0
1701	movdqu	%xmm0,(%rdi)
1702	leaq	16(%rsi),%rsi
1703	leaq	16(%rdi),%rdi
1704	movdqa	%xmm4,%xmm0
1705	movdqa	%xmm8,%xmm4
1706	movdqa	%xmm12,%xmm8
1707	jmp	.Lopen_sse_tail_64_dec_loop
1708.Lopen_sse_tail_16_init:
1709	movdqa	%xmm0,%xmm1
1710
1711
1712.Lopen_sse_tail_16:
1713	testq	%rbx,%rbx
1714	jz	.Lopen_sse_finalize
1715
1716
1717
1718	pxor	%xmm3,%xmm3
1719	leaq	-1(%rsi,%rbx,1),%rsi
1720	movq	%rbx,%r8
1721.Lopen_sse_tail_16_compose:
1722	pslldq	$1,%xmm3
1723	pinsrb	$0,(%rsi),%xmm3
1724	subq	$1,%rsi
1725	subq	$1,%r8
1726	jnz	.Lopen_sse_tail_16_compose
1727
1728.byte	102,73,15,126,221
1729	pextrq	$1,%xmm3,%r14
1730
1731	pxor	%xmm1,%xmm3
1732
1733
1734.Lopen_sse_tail_16_extract:
1735	pextrb	$0,%xmm3,(%rdi)
1736	psrldq	$1,%xmm3
1737	addq	$1,%rdi
1738	subq	$1,%rbx
1739	jne	.Lopen_sse_tail_16_extract
1740
1741	addq	%r13,%r10
1742	adcq	%r14,%r11
1743	adcq	$1,%r12
1744	movq	0+0+0(%rbp),%rax
1745	movq	%rax,%r15
1746	mulq	%r10
1747	movq	%rax,%r13
1748	movq	%rdx,%r14
1749	movq	0+0+0(%rbp),%rax
1750	mulq	%r11
1751	imulq	%r12,%r15
1752	addq	%rax,%r14
1753	adcq	%rdx,%r15
1754	movq	8+0+0(%rbp),%rax
1755	movq	%rax,%r9
1756	mulq	%r10
1757	addq	%rax,%r14
1758	adcq	$0,%rdx
1759	movq	%rdx,%r10
1760	movq	8+0+0(%rbp),%rax
1761	mulq	%r11
1762	addq	%rax,%r15
1763	adcq	$0,%rdx
1764	imulq	%r12,%r9
1765	addq	%r10,%r15
1766	adcq	%rdx,%r9
1767	movq	%r13,%r10
1768	movq	%r14,%r11
1769	movq	%r15,%r12
1770	andq	$3,%r12
1771	movq	%r15,%r13
1772	andq	$-4,%r13
1773	movq	%r9,%r14
1774	shrdq	$2,%r9,%r15
1775	shrq	$2,%r9
1776	addq	%r13,%r15
1777	adcq	%r14,%r9
1778	addq	%r15,%r10
1779	adcq	%r9,%r11
1780	adcq	$0,%r12
1781
1782
1783.Lopen_sse_finalize:
1784	addq	0+0+32(%rbp),%r10
1785	adcq	8+0+32(%rbp),%r11
1786	adcq	$1,%r12
1787	movq	0+0+0(%rbp),%rax
1788	movq	%rax,%r15
1789	mulq	%r10
1790	movq	%rax,%r13
1791	movq	%rdx,%r14
1792	movq	0+0+0(%rbp),%rax
1793	mulq	%r11
1794	imulq	%r12,%r15
1795	addq	%rax,%r14
1796	adcq	%rdx,%r15
1797	movq	8+0+0(%rbp),%rax
1798	movq	%rax,%r9
1799	mulq	%r10
1800	addq	%rax,%r14
1801	adcq	$0,%rdx
1802	movq	%rdx,%r10
1803	movq	8+0+0(%rbp),%rax
1804	mulq	%r11
1805	addq	%rax,%r15
1806	adcq	$0,%rdx
1807	imulq	%r12,%r9
1808	addq	%r10,%r15
1809	adcq	%rdx,%r9
1810	movq	%r13,%r10
1811	movq	%r14,%r11
1812	movq	%r15,%r12
1813	andq	$3,%r12
1814	movq	%r15,%r13
1815	andq	$-4,%r13
1816	movq	%r9,%r14
1817	shrdq	$2,%r9,%r15
1818	shrq	$2,%r9
1819	addq	%r13,%r15
1820	adcq	%r14,%r9
1821	addq	%r15,%r10
1822	adcq	%r9,%r11
1823	adcq	$0,%r12
1824
1825
1826	movq	%r10,%r13
1827	movq	%r11,%r14
1828	movq	%r12,%r15
1829	subq	$-5,%r10
1830	sbbq	$-1,%r11
1831	sbbq	$3,%r12
1832	cmovcq	%r13,%r10
1833	cmovcq	%r14,%r11
1834	cmovcq	%r15,%r12
1835
1836	addq	0+0+16(%rbp),%r10
1837	adcq	8+0+16(%rbp),%r11
1838
1839.cfi_remember_state
1840	addq	$288 + 0 + 32,%rsp
1841.cfi_adjust_cfa_offset	-(288 + 32)
1842
1843	popq	%r9
1844.cfi_adjust_cfa_offset	-8
1845.cfi_restore	%r9
1846	movq	%r10,(%r9)
1847	movq	%r11,8(%r9)
1848	popq	%r15
1849.cfi_adjust_cfa_offset	-8
1850.cfi_restore	%r15
1851	popq	%r14
1852.cfi_adjust_cfa_offset	-8
1853.cfi_restore	%r14
1854	popq	%r13
1855.cfi_adjust_cfa_offset	-8
1856.cfi_restore	%r13
1857	popq	%r12
1858.cfi_adjust_cfa_offset	-8
1859.cfi_restore	%r12
1860	popq	%rbx
1861.cfi_adjust_cfa_offset	-8
1862.cfi_restore	%rbx
1863	popq	%rbp
1864.cfi_adjust_cfa_offset	-8
1865.cfi_restore	%rbp
1866	.byte	0xf3,0xc3
1867
1868.Lopen_sse_128:
1869.cfi_restore_state
1870	movdqu	.Lchacha20_consts(%rip),%xmm0
1871	movdqa	%xmm0,%xmm1
1872	movdqa	%xmm0,%xmm2
1873	movdqu	0(%r9),%xmm4
1874	movdqa	%xmm4,%xmm5
1875	movdqa	%xmm4,%xmm6
1876	movdqu	16(%r9),%xmm8
1877	movdqa	%xmm8,%xmm9
1878	movdqa	%xmm8,%xmm10
1879	movdqu	32(%r9),%xmm12
1880	movdqa	%xmm12,%xmm13
1881	paddd	.Lsse_inc(%rip),%xmm13
1882	movdqa	%xmm13,%xmm14
1883	paddd	.Lsse_inc(%rip),%xmm14
1884	movdqa	%xmm4,%xmm7
1885	movdqa	%xmm8,%xmm11
1886	movdqa	%xmm13,%xmm15
1887	movq	$10,%r10
1888
1889.Lopen_sse_128_rounds:
1890	paddd	%xmm4,%xmm0
1891	pxor	%xmm0,%xmm12
1892	pshufb	.Lrol16(%rip),%xmm12
1893	paddd	%xmm12,%xmm8
1894	pxor	%xmm8,%xmm4
1895	movdqa	%xmm4,%xmm3
1896	pslld	$12,%xmm3
1897	psrld	$20,%xmm4
1898	pxor	%xmm3,%xmm4
1899	paddd	%xmm4,%xmm0
1900	pxor	%xmm0,%xmm12
1901	pshufb	.Lrol8(%rip),%xmm12
1902	paddd	%xmm12,%xmm8
1903	pxor	%xmm8,%xmm4
1904	movdqa	%xmm4,%xmm3
1905	pslld	$7,%xmm3
1906	psrld	$25,%xmm4
1907	pxor	%xmm3,%xmm4
1908.byte	102,15,58,15,228,4
1909.byte	102,69,15,58,15,192,8
1910.byte	102,69,15,58,15,228,12
1911	paddd	%xmm5,%xmm1
1912	pxor	%xmm1,%xmm13
1913	pshufb	.Lrol16(%rip),%xmm13
1914	paddd	%xmm13,%xmm9
1915	pxor	%xmm9,%xmm5
1916	movdqa	%xmm5,%xmm3
1917	pslld	$12,%xmm3
1918	psrld	$20,%xmm5
1919	pxor	%xmm3,%xmm5
1920	paddd	%xmm5,%xmm1
1921	pxor	%xmm1,%xmm13
1922	pshufb	.Lrol8(%rip),%xmm13
1923	paddd	%xmm13,%xmm9
1924	pxor	%xmm9,%xmm5
1925	movdqa	%xmm5,%xmm3
1926	pslld	$7,%xmm3
1927	psrld	$25,%xmm5
1928	pxor	%xmm3,%xmm5
1929.byte	102,15,58,15,237,4
1930.byte	102,69,15,58,15,201,8
1931.byte	102,69,15,58,15,237,12
1932	paddd	%xmm6,%xmm2
1933	pxor	%xmm2,%xmm14
1934	pshufb	.Lrol16(%rip),%xmm14
1935	paddd	%xmm14,%xmm10
1936	pxor	%xmm10,%xmm6
1937	movdqa	%xmm6,%xmm3
1938	pslld	$12,%xmm3
1939	psrld	$20,%xmm6
1940	pxor	%xmm3,%xmm6
1941	paddd	%xmm6,%xmm2
1942	pxor	%xmm2,%xmm14
1943	pshufb	.Lrol8(%rip),%xmm14
1944	paddd	%xmm14,%xmm10
1945	pxor	%xmm10,%xmm6
1946	movdqa	%xmm6,%xmm3
1947	pslld	$7,%xmm3
1948	psrld	$25,%xmm6
1949	pxor	%xmm3,%xmm6
1950.byte	102,15,58,15,246,4
1951.byte	102,69,15,58,15,210,8
1952.byte	102,69,15,58,15,246,12
1953	paddd	%xmm4,%xmm0
1954	pxor	%xmm0,%xmm12
1955	pshufb	.Lrol16(%rip),%xmm12
1956	paddd	%xmm12,%xmm8
1957	pxor	%xmm8,%xmm4
1958	movdqa	%xmm4,%xmm3
1959	pslld	$12,%xmm3
1960	psrld	$20,%xmm4
1961	pxor	%xmm3,%xmm4
1962	paddd	%xmm4,%xmm0
1963	pxor	%xmm0,%xmm12
1964	pshufb	.Lrol8(%rip),%xmm12
1965	paddd	%xmm12,%xmm8
1966	pxor	%xmm8,%xmm4
1967	movdqa	%xmm4,%xmm3
1968	pslld	$7,%xmm3
1969	psrld	$25,%xmm4
1970	pxor	%xmm3,%xmm4
1971.byte	102,15,58,15,228,12
1972.byte	102,69,15,58,15,192,8
1973.byte	102,69,15,58,15,228,4
1974	paddd	%xmm5,%xmm1
1975	pxor	%xmm1,%xmm13
1976	pshufb	.Lrol16(%rip),%xmm13
1977	paddd	%xmm13,%xmm9
1978	pxor	%xmm9,%xmm5
1979	movdqa	%xmm5,%xmm3
1980	pslld	$12,%xmm3
1981	psrld	$20,%xmm5
1982	pxor	%xmm3,%xmm5
1983	paddd	%xmm5,%xmm1
1984	pxor	%xmm1,%xmm13
1985	pshufb	.Lrol8(%rip),%xmm13
1986	paddd	%xmm13,%xmm9
1987	pxor	%xmm9,%xmm5
1988	movdqa	%xmm5,%xmm3
1989	pslld	$7,%xmm3
1990	psrld	$25,%xmm5
1991	pxor	%xmm3,%xmm5
1992.byte	102,15,58,15,237,12
1993.byte	102,69,15,58,15,201,8
1994.byte	102,69,15,58,15,237,4
1995	paddd	%xmm6,%xmm2
1996	pxor	%xmm2,%xmm14
1997	pshufb	.Lrol16(%rip),%xmm14
1998	paddd	%xmm14,%xmm10
1999	pxor	%xmm10,%xmm6
2000	movdqa	%xmm6,%xmm3
2001	pslld	$12,%xmm3
2002	psrld	$20,%xmm6
2003	pxor	%xmm3,%xmm6
2004	paddd	%xmm6,%xmm2
2005	pxor	%xmm2,%xmm14
2006	pshufb	.Lrol8(%rip),%xmm14
2007	paddd	%xmm14,%xmm10
2008	pxor	%xmm10,%xmm6
2009	movdqa	%xmm6,%xmm3
2010	pslld	$7,%xmm3
2011	psrld	$25,%xmm6
2012	pxor	%xmm3,%xmm6
2013.byte	102,15,58,15,246,12
2014.byte	102,69,15,58,15,210,8
2015.byte	102,69,15,58,15,246,4
2016
2017	decq	%r10
2018	jnz	.Lopen_sse_128_rounds
2019	paddd	.Lchacha20_consts(%rip),%xmm0
2020	paddd	.Lchacha20_consts(%rip),%xmm1
2021	paddd	.Lchacha20_consts(%rip),%xmm2
2022	paddd	%xmm7,%xmm4
2023	paddd	%xmm7,%xmm5
2024	paddd	%xmm7,%xmm6
2025	paddd	%xmm11,%xmm9
2026	paddd	%xmm11,%xmm10
2027	paddd	%xmm15,%xmm13
2028	paddd	.Lsse_inc(%rip),%xmm15
2029	paddd	%xmm15,%xmm14
2030
2031	pand	.Lclamp(%rip),%xmm0
2032	movdqa	%xmm0,0+0(%rbp)
2033	movdqa	%xmm4,0+16(%rbp)
2034
2035	movq	%r8,%r8
2036	call	poly_hash_ad_internal
2037.Lopen_sse_128_xor_hash:
2038	cmpq	$16,%rbx
2039	jb	.Lopen_sse_tail_16
2040	subq	$16,%rbx
2041	addq	0+0(%rsi),%r10
2042	adcq	8+0(%rsi),%r11
2043	adcq	$1,%r12
2044
2045
2046	movdqu	0(%rsi),%xmm3
2047	pxor	%xmm3,%xmm1
2048	movdqu	%xmm1,0(%rdi)
2049	leaq	16(%rsi),%rsi
2050	leaq	16(%rdi),%rdi
2051	movq	0+0+0(%rbp),%rax
2052	movq	%rax,%r15
2053	mulq	%r10
2054	movq	%rax,%r13
2055	movq	%rdx,%r14
2056	movq	0+0+0(%rbp),%rax
2057	mulq	%r11
2058	imulq	%r12,%r15
2059	addq	%rax,%r14
2060	adcq	%rdx,%r15
2061	movq	8+0+0(%rbp),%rax
2062	movq	%rax,%r9
2063	mulq	%r10
2064	addq	%rax,%r14
2065	adcq	$0,%rdx
2066	movq	%rdx,%r10
2067	movq	8+0+0(%rbp),%rax
2068	mulq	%r11
2069	addq	%rax,%r15
2070	adcq	$0,%rdx
2071	imulq	%r12,%r9
2072	addq	%r10,%r15
2073	adcq	%rdx,%r9
2074	movq	%r13,%r10
2075	movq	%r14,%r11
2076	movq	%r15,%r12
2077	andq	$3,%r12
2078	movq	%r15,%r13
2079	andq	$-4,%r13
2080	movq	%r9,%r14
2081	shrdq	$2,%r9,%r15
2082	shrq	$2,%r9
2083	addq	%r13,%r15
2084	adcq	%r14,%r9
2085	addq	%r15,%r10
2086	adcq	%r9,%r11
2087	adcq	$0,%r12
2088
2089
2090	movdqa	%xmm5,%xmm1
2091	movdqa	%xmm9,%xmm5
2092	movdqa	%xmm13,%xmm9
2093	movdqa	%xmm2,%xmm13
2094	movdqa	%xmm6,%xmm2
2095	movdqa	%xmm10,%xmm6
2096	movdqa	%xmm14,%xmm10
2097	jmp	.Lopen_sse_128_xor_hash
2098.size	GFp_chacha20_poly1305_open, .-GFp_chacha20_poly1305_open
2099.cfi_endproc
2100
2101
2102
2103
2104
2105
2106.globl	GFp_chacha20_poly1305_seal
2107.hidden GFp_chacha20_poly1305_seal
2108.type	GFp_chacha20_poly1305_seal,@function
2109.align	64
2110GFp_chacha20_poly1305_seal:
2111.cfi_startproc
2112	pushq	%rbp
2113.cfi_adjust_cfa_offset	8
2114.cfi_offset	%rbp,-16
2115	pushq	%rbx
2116.cfi_adjust_cfa_offset	8
2117.cfi_offset	%rbx,-24
2118	pushq	%r12
2119.cfi_adjust_cfa_offset	8
2120.cfi_offset	%r12,-32
2121	pushq	%r13
2122.cfi_adjust_cfa_offset	8
2123.cfi_offset	%r13,-40
2124	pushq	%r14
2125.cfi_adjust_cfa_offset	8
2126.cfi_offset	%r14,-48
2127	pushq	%r15
2128.cfi_adjust_cfa_offset	8
2129.cfi_offset	%r15,-56
2130
2131
2132	pushq	%r9
2133.cfi_adjust_cfa_offset	8
2134.cfi_offset	%r9,-64
2135	subq	$288 + 0 + 32,%rsp
2136.cfi_adjust_cfa_offset	288 + 32
2137	leaq	32(%rsp),%rbp
2138	andq	$-32,%rbp
2139
2140	movq	56(%r9),%rbx
2141	addq	%rdx,%rbx
2142	movq	%r8,0+0+32(%rbp)
2143	movq	%rbx,8+0+32(%rbp)
2144	movq	%rdx,%rbx
2145
2146	movl	GFp_ia32cap_P+8(%rip),%eax
2147	andl	$288,%eax
2148	xorl	$288,%eax
2149	jz	chacha20_poly1305_seal_avx2
2150
2151	cmpq	$128,%rbx
2152	jbe	.Lseal_sse_128
2153
2154	movdqa	.Lchacha20_consts(%rip),%xmm0
2155	movdqu	0(%r9),%xmm4
2156	movdqu	16(%r9),%xmm8
2157	movdqu	32(%r9),%xmm12
2158
2159	movdqa	%xmm0,%xmm1
2160	movdqa	%xmm0,%xmm2
2161	movdqa	%xmm0,%xmm3
2162	movdqa	%xmm4,%xmm5
2163	movdqa	%xmm4,%xmm6
2164	movdqa	%xmm4,%xmm7
2165	movdqa	%xmm8,%xmm9
2166	movdqa	%xmm8,%xmm10
2167	movdqa	%xmm8,%xmm11
2168	movdqa	%xmm12,%xmm15
2169	paddd	.Lsse_inc(%rip),%xmm12
2170	movdqa	%xmm12,%xmm14
2171	paddd	.Lsse_inc(%rip),%xmm12
2172	movdqa	%xmm12,%xmm13
2173	paddd	.Lsse_inc(%rip),%xmm12
2174
2175	movdqa	%xmm4,0+48(%rbp)
2176	movdqa	%xmm8,0+64(%rbp)
2177	movdqa	%xmm12,0+96(%rbp)
2178	movdqa	%xmm13,0+112(%rbp)
2179	movdqa	%xmm14,0+128(%rbp)
2180	movdqa	%xmm15,0+144(%rbp)
2181	movq	$10,%r10
2182.Lseal_sse_init_rounds:
2183	movdqa	%xmm8,0+80(%rbp)
2184	movdqa	.Lrol16(%rip),%xmm8
2185	paddd	%xmm7,%xmm3
2186	paddd	%xmm6,%xmm2
2187	paddd	%xmm5,%xmm1
2188	paddd	%xmm4,%xmm0
2189	pxor	%xmm3,%xmm15
2190	pxor	%xmm2,%xmm14
2191	pxor	%xmm1,%xmm13
2192	pxor	%xmm0,%xmm12
2193.byte	102,69,15,56,0,248
2194.byte	102,69,15,56,0,240
2195.byte	102,69,15,56,0,232
2196.byte	102,69,15,56,0,224
2197	movdqa	0+80(%rbp),%xmm8
2198	paddd	%xmm15,%xmm11
2199	paddd	%xmm14,%xmm10
2200	paddd	%xmm13,%xmm9
2201	paddd	%xmm12,%xmm8
2202	pxor	%xmm11,%xmm7
2203	pxor	%xmm10,%xmm6
2204	pxor	%xmm9,%xmm5
2205	pxor	%xmm8,%xmm4
2206	movdqa	%xmm8,0+80(%rbp)
2207	movdqa	%xmm7,%xmm8
2208	psrld	$20,%xmm8
2209	pslld	$32-20,%xmm7
2210	pxor	%xmm8,%xmm7
2211	movdqa	%xmm6,%xmm8
2212	psrld	$20,%xmm8
2213	pslld	$32-20,%xmm6
2214	pxor	%xmm8,%xmm6
2215	movdqa	%xmm5,%xmm8
2216	psrld	$20,%xmm8
2217	pslld	$32-20,%xmm5
2218	pxor	%xmm8,%xmm5
2219	movdqa	%xmm4,%xmm8
2220	psrld	$20,%xmm8
2221	pslld	$32-20,%xmm4
2222	pxor	%xmm8,%xmm4
2223	movdqa	.Lrol8(%rip),%xmm8
2224	paddd	%xmm7,%xmm3
2225	paddd	%xmm6,%xmm2
2226	paddd	%xmm5,%xmm1
2227	paddd	%xmm4,%xmm0
2228	pxor	%xmm3,%xmm15
2229	pxor	%xmm2,%xmm14
2230	pxor	%xmm1,%xmm13
2231	pxor	%xmm0,%xmm12
2232.byte	102,69,15,56,0,248
2233.byte	102,69,15,56,0,240
2234.byte	102,69,15,56,0,232
2235.byte	102,69,15,56,0,224
2236	movdqa	0+80(%rbp),%xmm8
2237	paddd	%xmm15,%xmm11
2238	paddd	%xmm14,%xmm10
2239	paddd	%xmm13,%xmm9
2240	paddd	%xmm12,%xmm8
2241	pxor	%xmm11,%xmm7
2242	pxor	%xmm10,%xmm6
2243	pxor	%xmm9,%xmm5
2244	pxor	%xmm8,%xmm4
2245	movdqa	%xmm8,0+80(%rbp)
2246	movdqa	%xmm7,%xmm8
2247	psrld	$25,%xmm8
2248	pslld	$32-25,%xmm7
2249	pxor	%xmm8,%xmm7
2250	movdqa	%xmm6,%xmm8
2251	psrld	$25,%xmm8
2252	pslld	$32-25,%xmm6
2253	pxor	%xmm8,%xmm6
2254	movdqa	%xmm5,%xmm8
2255	psrld	$25,%xmm8
2256	pslld	$32-25,%xmm5
2257	pxor	%xmm8,%xmm5
2258	movdqa	%xmm4,%xmm8
2259	psrld	$25,%xmm8
2260	pslld	$32-25,%xmm4
2261	pxor	%xmm8,%xmm4
2262	movdqa	0+80(%rbp),%xmm8
2263.byte	102,15,58,15,255,4
2264.byte	102,69,15,58,15,219,8
2265.byte	102,69,15,58,15,255,12
2266.byte	102,15,58,15,246,4
2267.byte	102,69,15,58,15,210,8
2268.byte	102,69,15,58,15,246,12
2269.byte	102,15,58,15,237,4
2270.byte	102,69,15,58,15,201,8
2271.byte	102,69,15,58,15,237,12
2272.byte	102,15,58,15,228,4
2273.byte	102,69,15,58,15,192,8
2274.byte	102,69,15,58,15,228,12
2275	movdqa	%xmm8,0+80(%rbp)
2276	movdqa	.Lrol16(%rip),%xmm8
2277	paddd	%xmm7,%xmm3
2278	paddd	%xmm6,%xmm2
2279	paddd	%xmm5,%xmm1
2280	paddd	%xmm4,%xmm0
2281	pxor	%xmm3,%xmm15
2282	pxor	%xmm2,%xmm14
2283	pxor	%xmm1,%xmm13
2284	pxor	%xmm0,%xmm12
2285.byte	102,69,15,56,0,248
2286.byte	102,69,15,56,0,240
2287.byte	102,69,15,56,0,232
2288.byte	102,69,15,56,0,224
2289	movdqa	0+80(%rbp),%xmm8
2290	paddd	%xmm15,%xmm11
2291	paddd	%xmm14,%xmm10
2292	paddd	%xmm13,%xmm9
2293	paddd	%xmm12,%xmm8
2294	pxor	%xmm11,%xmm7
2295	pxor	%xmm10,%xmm6
2296	pxor	%xmm9,%xmm5
2297	pxor	%xmm8,%xmm4
2298	movdqa	%xmm8,0+80(%rbp)
2299	movdqa	%xmm7,%xmm8
2300	psrld	$20,%xmm8
2301	pslld	$32-20,%xmm7
2302	pxor	%xmm8,%xmm7
2303	movdqa	%xmm6,%xmm8
2304	psrld	$20,%xmm8
2305	pslld	$32-20,%xmm6
2306	pxor	%xmm8,%xmm6
2307	movdqa	%xmm5,%xmm8
2308	psrld	$20,%xmm8
2309	pslld	$32-20,%xmm5
2310	pxor	%xmm8,%xmm5
2311	movdqa	%xmm4,%xmm8
2312	psrld	$20,%xmm8
2313	pslld	$32-20,%xmm4
2314	pxor	%xmm8,%xmm4
2315	movdqa	.Lrol8(%rip),%xmm8
2316	paddd	%xmm7,%xmm3
2317	paddd	%xmm6,%xmm2
2318	paddd	%xmm5,%xmm1
2319	paddd	%xmm4,%xmm0
2320	pxor	%xmm3,%xmm15
2321	pxor	%xmm2,%xmm14
2322	pxor	%xmm1,%xmm13
2323	pxor	%xmm0,%xmm12
2324.byte	102,69,15,56,0,248
2325.byte	102,69,15,56,0,240
2326.byte	102,69,15,56,0,232
2327.byte	102,69,15,56,0,224
2328	movdqa	0+80(%rbp),%xmm8
2329	paddd	%xmm15,%xmm11
2330	paddd	%xmm14,%xmm10
2331	paddd	%xmm13,%xmm9
2332	paddd	%xmm12,%xmm8
2333	pxor	%xmm11,%xmm7
2334	pxor	%xmm10,%xmm6
2335	pxor	%xmm9,%xmm5
2336	pxor	%xmm8,%xmm4
2337	movdqa	%xmm8,0+80(%rbp)
2338	movdqa	%xmm7,%xmm8
2339	psrld	$25,%xmm8
2340	pslld	$32-25,%xmm7
2341	pxor	%xmm8,%xmm7
2342	movdqa	%xmm6,%xmm8
2343	psrld	$25,%xmm8
2344	pslld	$32-25,%xmm6
2345	pxor	%xmm8,%xmm6
2346	movdqa	%xmm5,%xmm8
2347	psrld	$25,%xmm8
2348	pslld	$32-25,%xmm5
2349	pxor	%xmm8,%xmm5
2350	movdqa	%xmm4,%xmm8
2351	psrld	$25,%xmm8
2352	pslld	$32-25,%xmm4
2353	pxor	%xmm8,%xmm4
2354	movdqa	0+80(%rbp),%xmm8
2355.byte	102,15,58,15,255,12
2356.byte	102,69,15,58,15,219,8
2357.byte	102,69,15,58,15,255,4
2358.byte	102,15,58,15,246,12
2359.byte	102,69,15,58,15,210,8
2360.byte	102,69,15,58,15,246,4
2361.byte	102,15,58,15,237,12
2362.byte	102,69,15,58,15,201,8
2363.byte	102,69,15,58,15,237,4
2364.byte	102,15,58,15,228,12
2365.byte	102,69,15,58,15,192,8
2366.byte	102,69,15,58,15,228,4
2367
2368	decq	%r10
2369	jnz	.Lseal_sse_init_rounds
2370	paddd	.Lchacha20_consts(%rip),%xmm3
2371	paddd	0+48(%rbp),%xmm7
2372	paddd	0+64(%rbp),%xmm11
2373	paddd	0+144(%rbp),%xmm15
2374	paddd	.Lchacha20_consts(%rip),%xmm2
2375	paddd	0+48(%rbp),%xmm6
2376	paddd	0+64(%rbp),%xmm10
2377	paddd	0+128(%rbp),%xmm14
2378	paddd	.Lchacha20_consts(%rip),%xmm1
2379	paddd	0+48(%rbp),%xmm5
2380	paddd	0+64(%rbp),%xmm9
2381	paddd	0+112(%rbp),%xmm13
2382	paddd	.Lchacha20_consts(%rip),%xmm0
2383	paddd	0+48(%rbp),%xmm4
2384	paddd	0+64(%rbp),%xmm8
2385	paddd	0+96(%rbp),%xmm12
2386
2387
2388	pand	.Lclamp(%rip),%xmm3
2389	movdqa	%xmm3,0+0(%rbp)
2390	movdqa	%xmm7,0+16(%rbp)
2391
2392	movq	%r8,%r8
2393	call	poly_hash_ad_internal
2394	movdqu	0 + 0(%rsi),%xmm3
2395	movdqu	16 + 0(%rsi),%xmm7
2396	movdqu	32 + 0(%rsi),%xmm11
2397	movdqu	48 + 0(%rsi),%xmm15
2398	pxor	%xmm3,%xmm2
2399	pxor	%xmm7,%xmm6
2400	pxor	%xmm11,%xmm10
2401	pxor	%xmm14,%xmm15
2402	movdqu	%xmm2,0 + 0(%rdi)
2403	movdqu	%xmm6,16 + 0(%rdi)
2404	movdqu	%xmm10,32 + 0(%rdi)
2405	movdqu	%xmm15,48 + 0(%rdi)
2406	movdqu	0 + 64(%rsi),%xmm3
2407	movdqu	16 + 64(%rsi),%xmm7
2408	movdqu	32 + 64(%rsi),%xmm11
2409	movdqu	48 + 64(%rsi),%xmm15
2410	pxor	%xmm3,%xmm1
2411	pxor	%xmm7,%xmm5
2412	pxor	%xmm11,%xmm9
2413	pxor	%xmm13,%xmm15
2414	movdqu	%xmm1,0 + 64(%rdi)
2415	movdqu	%xmm5,16 + 64(%rdi)
2416	movdqu	%xmm9,32 + 64(%rdi)
2417	movdqu	%xmm15,48 + 64(%rdi)
2418
2419	cmpq	$192,%rbx
2420	ja	.Lseal_sse_main_init
2421	movq	$128,%rcx
2422	subq	$128,%rbx
2423	leaq	128(%rsi),%rsi
2424	jmp	.Lseal_sse_128_tail_hash
2425.Lseal_sse_main_init:
2426	movdqu	0 + 128(%rsi),%xmm3
2427	movdqu	16 + 128(%rsi),%xmm7
2428	movdqu	32 + 128(%rsi),%xmm11
2429	movdqu	48 + 128(%rsi),%xmm15
2430	pxor	%xmm3,%xmm0
2431	pxor	%xmm7,%xmm4
2432	pxor	%xmm11,%xmm8
2433	pxor	%xmm12,%xmm15
2434	movdqu	%xmm0,0 + 128(%rdi)
2435	movdqu	%xmm4,16 + 128(%rdi)
2436	movdqu	%xmm8,32 + 128(%rdi)
2437	movdqu	%xmm15,48 + 128(%rdi)
2438
2439	movq	$192,%rcx
2440	subq	$192,%rbx
2441	leaq	192(%rsi),%rsi
2442	movq	$2,%rcx
2443	movq	$8,%r8
2444	cmpq	$64,%rbx
2445	jbe	.Lseal_sse_tail_64
2446	cmpq	$128,%rbx
2447	jbe	.Lseal_sse_tail_128
2448	cmpq	$192,%rbx
2449	jbe	.Lseal_sse_tail_192
2450
2451.Lseal_sse_main_loop:
2452	movdqa	.Lchacha20_consts(%rip),%xmm0
2453	movdqa	0+48(%rbp),%xmm4
2454	movdqa	0+64(%rbp),%xmm8
2455	movdqa	%xmm0,%xmm1
2456	movdqa	%xmm4,%xmm5
2457	movdqa	%xmm8,%xmm9
2458	movdqa	%xmm0,%xmm2
2459	movdqa	%xmm4,%xmm6
2460	movdqa	%xmm8,%xmm10
2461	movdqa	%xmm0,%xmm3
2462	movdqa	%xmm4,%xmm7
2463	movdqa	%xmm8,%xmm11
2464	movdqa	0+96(%rbp),%xmm15
2465	paddd	.Lsse_inc(%rip),%xmm15
2466	movdqa	%xmm15,%xmm14
2467	paddd	.Lsse_inc(%rip),%xmm14
2468	movdqa	%xmm14,%xmm13
2469	paddd	.Lsse_inc(%rip),%xmm13
2470	movdqa	%xmm13,%xmm12
2471	paddd	.Lsse_inc(%rip),%xmm12
2472	movdqa	%xmm12,0+96(%rbp)
2473	movdqa	%xmm13,0+112(%rbp)
2474	movdqa	%xmm14,0+128(%rbp)
2475	movdqa	%xmm15,0+144(%rbp)
2476
2477.align	32
2478.Lseal_sse_main_rounds:
2479	movdqa	%xmm8,0+80(%rbp)
2480	movdqa	.Lrol16(%rip),%xmm8
2481	paddd	%xmm7,%xmm3
2482	paddd	%xmm6,%xmm2
2483	paddd	%xmm5,%xmm1
2484	paddd	%xmm4,%xmm0
2485	pxor	%xmm3,%xmm15
2486	pxor	%xmm2,%xmm14
2487	pxor	%xmm1,%xmm13
2488	pxor	%xmm0,%xmm12
2489.byte	102,69,15,56,0,248
2490.byte	102,69,15,56,0,240
2491.byte	102,69,15,56,0,232
2492.byte	102,69,15,56,0,224
2493	movdqa	0+80(%rbp),%xmm8
2494	paddd	%xmm15,%xmm11
2495	paddd	%xmm14,%xmm10
2496	paddd	%xmm13,%xmm9
2497	paddd	%xmm12,%xmm8
2498	pxor	%xmm11,%xmm7
2499	addq	0+0(%rdi),%r10
2500	adcq	8+0(%rdi),%r11
2501	adcq	$1,%r12
2502	pxor	%xmm10,%xmm6
2503	pxor	%xmm9,%xmm5
2504	pxor	%xmm8,%xmm4
2505	movdqa	%xmm8,0+80(%rbp)
2506	movdqa	%xmm7,%xmm8
2507	psrld	$20,%xmm8
2508	pslld	$32-20,%xmm7
2509	pxor	%xmm8,%xmm7
2510	movdqa	%xmm6,%xmm8
2511	psrld	$20,%xmm8
2512	pslld	$32-20,%xmm6
2513	pxor	%xmm8,%xmm6
2514	movdqa	%xmm5,%xmm8
2515	psrld	$20,%xmm8
2516	pslld	$32-20,%xmm5
2517	pxor	%xmm8,%xmm5
2518	movdqa	%xmm4,%xmm8
2519	psrld	$20,%xmm8
2520	pslld	$32-20,%xmm4
2521	pxor	%xmm8,%xmm4
2522	movq	0+0+0(%rbp),%rax
2523	movq	%rax,%r15
2524	mulq	%r10
2525	movq	%rax,%r13
2526	movq	%rdx,%r14
2527	movq	0+0+0(%rbp),%rax
2528	mulq	%r11
2529	imulq	%r12,%r15
2530	addq	%rax,%r14
2531	adcq	%rdx,%r15
2532	movdqa	.Lrol8(%rip),%xmm8
2533	paddd	%xmm7,%xmm3
2534	paddd	%xmm6,%xmm2
2535	paddd	%xmm5,%xmm1
2536	paddd	%xmm4,%xmm0
2537	pxor	%xmm3,%xmm15
2538	pxor	%xmm2,%xmm14
2539	pxor	%xmm1,%xmm13
2540	pxor	%xmm0,%xmm12
2541.byte	102,69,15,56,0,248
2542.byte	102,69,15,56,0,240
2543.byte	102,69,15,56,0,232
2544.byte	102,69,15,56,0,224
2545	movdqa	0+80(%rbp),%xmm8
2546	paddd	%xmm15,%xmm11
2547	paddd	%xmm14,%xmm10
2548	paddd	%xmm13,%xmm9
2549	paddd	%xmm12,%xmm8
2550	pxor	%xmm11,%xmm7
2551	pxor	%xmm10,%xmm6
2552	movq	8+0+0(%rbp),%rax
2553	movq	%rax,%r9
2554	mulq	%r10
2555	addq	%rax,%r14
2556	adcq	$0,%rdx
2557	movq	%rdx,%r10
2558	movq	8+0+0(%rbp),%rax
2559	mulq	%r11
2560	addq	%rax,%r15
2561	adcq	$0,%rdx
2562	pxor	%xmm9,%xmm5
2563	pxor	%xmm8,%xmm4
2564	movdqa	%xmm8,0+80(%rbp)
2565	movdqa	%xmm7,%xmm8
2566	psrld	$25,%xmm8
2567	pslld	$32-25,%xmm7
2568	pxor	%xmm8,%xmm7
2569	movdqa	%xmm6,%xmm8
2570	psrld	$25,%xmm8
2571	pslld	$32-25,%xmm6
2572	pxor	%xmm8,%xmm6
2573	movdqa	%xmm5,%xmm8
2574	psrld	$25,%xmm8
2575	pslld	$32-25,%xmm5
2576	pxor	%xmm8,%xmm5
2577	movdqa	%xmm4,%xmm8
2578	psrld	$25,%xmm8
2579	pslld	$32-25,%xmm4
2580	pxor	%xmm8,%xmm4
2581	movdqa	0+80(%rbp),%xmm8
2582	imulq	%r12,%r9
2583	addq	%r10,%r15
2584	adcq	%rdx,%r9
2585.byte	102,15,58,15,255,4
2586.byte	102,69,15,58,15,219,8
2587.byte	102,69,15,58,15,255,12
2588.byte	102,15,58,15,246,4
2589.byte	102,69,15,58,15,210,8
2590.byte	102,69,15,58,15,246,12
2591.byte	102,15,58,15,237,4
2592.byte	102,69,15,58,15,201,8
2593.byte	102,69,15,58,15,237,12
2594.byte	102,15,58,15,228,4
2595.byte	102,69,15,58,15,192,8
2596.byte	102,69,15,58,15,228,12
2597	movdqa	%xmm8,0+80(%rbp)
2598	movdqa	.Lrol16(%rip),%xmm8
2599	paddd	%xmm7,%xmm3
2600	paddd	%xmm6,%xmm2
2601	paddd	%xmm5,%xmm1
2602	paddd	%xmm4,%xmm0
2603	pxor	%xmm3,%xmm15
2604	pxor	%xmm2,%xmm14
2605	movq	%r13,%r10
2606	movq	%r14,%r11
2607	movq	%r15,%r12
2608	andq	$3,%r12
2609	movq	%r15,%r13
2610	andq	$-4,%r13
2611	movq	%r9,%r14
2612	shrdq	$2,%r9,%r15
2613	shrq	$2,%r9
2614	addq	%r13,%r15
2615	adcq	%r14,%r9
2616	addq	%r15,%r10
2617	adcq	%r9,%r11
2618	adcq	$0,%r12
2619	pxor	%xmm1,%xmm13
2620	pxor	%xmm0,%xmm12
2621.byte	102,69,15,56,0,248
2622.byte	102,69,15,56,0,240
2623.byte	102,69,15,56,0,232
2624.byte	102,69,15,56,0,224
2625	movdqa	0+80(%rbp),%xmm8
2626	paddd	%xmm15,%xmm11
2627	paddd	%xmm14,%xmm10
2628	paddd	%xmm13,%xmm9
2629	paddd	%xmm12,%xmm8
2630	pxor	%xmm11,%xmm7
2631	pxor	%xmm10,%xmm6
2632	pxor	%xmm9,%xmm5
2633	pxor	%xmm8,%xmm4
2634	movdqa	%xmm8,0+80(%rbp)
2635	movdqa	%xmm7,%xmm8
2636	psrld	$20,%xmm8
2637	pslld	$32-20,%xmm7
2638	pxor	%xmm8,%xmm7
2639	movdqa	%xmm6,%xmm8
2640	psrld	$20,%xmm8
2641	pslld	$32-20,%xmm6
2642	pxor	%xmm8,%xmm6
2643	movdqa	%xmm5,%xmm8
2644	psrld	$20,%xmm8
2645	pslld	$32-20,%xmm5
2646	pxor	%xmm8,%xmm5
2647	movdqa	%xmm4,%xmm8
2648	psrld	$20,%xmm8
2649	pslld	$32-20,%xmm4
2650	pxor	%xmm8,%xmm4
2651	movdqa	.Lrol8(%rip),%xmm8
2652	paddd	%xmm7,%xmm3
2653	paddd	%xmm6,%xmm2
2654	paddd	%xmm5,%xmm1
2655	paddd	%xmm4,%xmm0
2656	pxor	%xmm3,%xmm15
2657	pxor	%xmm2,%xmm14
2658	pxor	%xmm1,%xmm13
2659	pxor	%xmm0,%xmm12
2660.byte	102,69,15,56,0,248
2661.byte	102,69,15,56,0,240
2662.byte	102,69,15,56,0,232
2663.byte	102,69,15,56,0,224
2664	movdqa	0+80(%rbp),%xmm8
2665	paddd	%xmm15,%xmm11
2666	paddd	%xmm14,%xmm10
2667	paddd	%xmm13,%xmm9
2668	paddd	%xmm12,%xmm8
2669	pxor	%xmm11,%xmm7
2670	pxor	%xmm10,%xmm6
2671	pxor	%xmm9,%xmm5
2672	pxor	%xmm8,%xmm4
2673	movdqa	%xmm8,0+80(%rbp)
2674	movdqa	%xmm7,%xmm8
2675	psrld	$25,%xmm8
2676	pslld	$32-25,%xmm7
2677	pxor	%xmm8,%xmm7
2678	movdqa	%xmm6,%xmm8
2679	psrld	$25,%xmm8
2680	pslld	$32-25,%xmm6
2681	pxor	%xmm8,%xmm6
2682	movdqa	%xmm5,%xmm8
2683	psrld	$25,%xmm8
2684	pslld	$32-25,%xmm5
2685	pxor	%xmm8,%xmm5
2686	movdqa	%xmm4,%xmm8
2687	psrld	$25,%xmm8
2688	pslld	$32-25,%xmm4
2689	pxor	%xmm8,%xmm4
2690	movdqa	0+80(%rbp),%xmm8
2691.byte	102,15,58,15,255,12
2692.byte	102,69,15,58,15,219,8
2693.byte	102,69,15,58,15,255,4
2694.byte	102,15,58,15,246,12
2695.byte	102,69,15,58,15,210,8
2696.byte	102,69,15,58,15,246,4
2697.byte	102,15,58,15,237,12
2698.byte	102,69,15,58,15,201,8
2699.byte	102,69,15,58,15,237,4
2700.byte	102,15,58,15,228,12
2701.byte	102,69,15,58,15,192,8
2702.byte	102,69,15,58,15,228,4
2703
2704	leaq	16(%rdi),%rdi
2705	decq	%r8
2706	jge	.Lseal_sse_main_rounds
2707	addq	0+0(%rdi),%r10
2708	adcq	8+0(%rdi),%r11
2709	adcq	$1,%r12
2710	movq	0+0+0(%rbp),%rax
2711	movq	%rax,%r15
2712	mulq	%r10
2713	movq	%rax,%r13
2714	movq	%rdx,%r14
2715	movq	0+0+0(%rbp),%rax
2716	mulq	%r11
2717	imulq	%r12,%r15
2718	addq	%rax,%r14
2719	adcq	%rdx,%r15
2720	movq	8+0+0(%rbp),%rax
2721	movq	%rax,%r9
2722	mulq	%r10
2723	addq	%rax,%r14
2724	adcq	$0,%rdx
2725	movq	%rdx,%r10
2726	movq	8+0+0(%rbp),%rax
2727	mulq	%r11
2728	addq	%rax,%r15
2729	adcq	$0,%rdx
2730	imulq	%r12,%r9
2731	addq	%r10,%r15
2732	adcq	%rdx,%r9
2733	movq	%r13,%r10
2734	movq	%r14,%r11
2735	movq	%r15,%r12
2736	andq	$3,%r12
2737	movq	%r15,%r13
2738	andq	$-4,%r13
2739	movq	%r9,%r14
2740	shrdq	$2,%r9,%r15
2741	shrq	$2,%r9
2742	addq	%r13,%r15
2743	adcq	%r14,%r9
2744	addq	%r15,%r10
2745	adcq	%r9,%r11
2746	adcq	$0,%r12
2747
2748	leaq	16(%rdi),%rdi
2749	decq	%rcx
2750	jg	.Lseal_sse_main_rounds
2751	paddd	.Lchacha20_consts(%rip),%xmm3
2752	paddd	0+48(%rbp),%xmm7
2753	paddd	0+64(%rbp),%xmm11
2754	paddd	0+144(%rbp),%xmm15
2755	paddd	.Lchacha20_consts(%rip),%xmm2
2756	paddd	0+48(%rbp),%xmm6
2757	paddd	0+64(%rbp),%xmm10
2758	paddd	0+128(%rbp),%xmm14
2759	paddd	.Lchacha20_consts(%rip),%xmm1
2760	paddd	0+48(%rbp),%xmm5
2761	paddd	0+64(%rbp),%xmm9
2762	paddd	0+112(%rbp),%xmm13
2763	paddd	.Lchacha20_consts(%rip),%xmm0
2764	paddd	0+48(%rbp),%xmm4
2765	paddd	0+64(%rbp),%xmm8
2766	paddd	0+96(%rbp),%xmm12
2767
2768	movdqa	%xmm14,0+80(%rbp)
2769	movdqa	%xmm14,0+80(%rbp)
2770	movdqu	0 + 0(%rsi),%xmm14
2771	pxor	%xmm3,%xmm14
2772	movdqu	%xmm14,0 + 0(%rdi)
2773	movdqu	16 + 0(%rsi),%xmm14
2774	pxor	%xmm7,%xmm14
2775	movdqu	%xmm14,16 + 0(%rdi)
2776	movdqu	32 + 0(%rsi),%xmm14
2777	pxor	%xmm11,%xmm14
2778	movdqu	%xmm14,32 + 0(%rdi)
2779	movdqu	48 + 0(%rsi),%xmm14
2780	pxor	%xmm15,%xmm14
2781	movdqu	%xmm14,48 + 0(%rdi)
2782
2783	movdqa	0+80(%rbp),%xmm14
2784	movdqu	0 + 64(%rsi),%xmm3
2785	movdqu	16 + 64(%rsi),%xmm7
2786	movdqu	32 + 64(%rsi),%xmm11
2787	movdqu	48 + 64(%rsi),%xmm15
2788	pxor	%xmm3,%xmm2
2789	pxor	%xmm7,%xmm6
2790	pxor	%xmm11,%xmm10
2791	pxor	%xmm14,%xmm15
2792	movdqu	%xmm2,0 + 64(%rdi)
2793	movdqu	%xmm6,16 + 64(%rdi)
2794	movdqu	%xmm10,32 + 64(%rdi)
2795	movdqu	%xmm15,48 + 64(%rdi)
2796	movdqu	0 + 128(%rsi),%xmm3
2797	movdqu	16 + 128(%rsi),%xmm7
2798	movdqu	32 + 128(%rsi),%xmm11
2799	movdqu	48 + 128(%rsi),%xmm15
2800	pxor	%xmm3,%xmm1
2801	pxor	%xmm7,%xmm5
2802	pxor	%xmm11,%xmm9
2803	pxor	%xmm13,%xmm15
2804	movdqu	%xmm1,0 + 128(%rdi)
2805	movdqu	%xmm5,16 + 128(%rdi)
2806	movdqu	%xmm9,32 + 128(%rdi)
2807	movdqu	%xmm15,48 + 128(%rdi)
2808
2809	cmpq	$256,%rbx
2810	ja	.Lseal_sse_main_loop_xor
2811
2812	movq	$192,%rcx
2813	subq	$192,%rbx
2814	leaq	192(%rsi),%rsi
2815	jmp	.Lseal_sse_128_tail_hash
2816.Lseal_sse_main_loop_xor:
2817	movdqu	0 + 192(%rsi),%xmm3
2818	movdqu	16 + 192(%rsi),%xmm7
2819	movdqu	32 + 192(%rsi),%xmm11
2820	movdqu	48 + 192(%rsi),%xmm15
2821	pxor	%xmm3,%xmm0
2822	pxor	%xmm7,%xmm4
2823	pxor	%xmm11,%xmm8
2824	pxor	%xmm12,%xmm15
2825	movdqu	%xmm0,0 + 192(%rdi)
2826	movdqu	%xmm4,16 + 192(%rdi)
2827	movdqu	%xmm8,32 + 192(%rdi)
2828	movdqu	%xmm15,48 + 192(%rdi)
2829
2830	leaq	256(%rsi),%rsi
2831	subq	$256,%rbx
2832	movq	$6,%rcx
2833	movq	$4,%r8
2834	cmpq	$192,%rbx
2835	jg	.Lseal_sse_main_loop
2836	movq	%rbx,%rcx
2837	testq	%rbx,%rbx
2838	je	.Lseal_sse_128_tail_hash
2839	movq	$6,%rcx
2840	cmpq	$128,%rbx
2841	ja	.Lseal_sse_tail_192
2842	cmpq	$64,%rbx
2843	ja	.Lseal_sse_tail_128
2844
2845.Lseal_sse_tail_64:
2846	movdqa	.Lchacha20_consts(%rip),%xmm0
2847	movdqa	0+48(%rbp),%xmm4
2848	movdqa	0+64(%rbp),%xmm8
2849	movdqa	0+96(%rbp),%xmm12
2850	paddd	.Lsse_inc(%rip),%xmm12
2851	movdqa	%xmm12,0+96(%rbp)
2852
2853.Lseal_sse_tail_64_rounds_and_x2hash:
2854	addq	0+0(%rdi),%r10
2855	adcq	8+0(%rdi),%r11
2856	adcq	$1,%r12
2857	movq	0+0+0(%rbp),%rax
2858	movq	%rax,%r15
2859	mulq	%r10
2860	movq	%rax,%r13
2861	movq	%rdx,%r14
2862	movq	0+0+0(%rbp),%rax
2863	mulq	%r11
2864	imulq	%r12,%r15
2865	addq	%rax,%r14
2866	adcq	%rdx,%r15
2867	movq	8+0+0(%rbp),%rax
2868	movq	%rax,%r9
2869	mulq	%r10
2870	addq	%rax,%r14
2871	adcq	$0,%rdx
2872	movq	%rdx,%r10
2873	movq	8+0+0(%rbp),%rax
2874	mulq	%r11
2875	addq	%rax,%r15
2876	adcq	$0,%rdx
2877	imulq	%r12,%r9
2878	addq	%r10,%r15
2879	adcq	%rdx,%r9
2880	movq	%r13,%r10
2881	movq	%r14,%r11
2882	movq	%r15,%r12
2883	andq	$3,%r12
2884	movq	%r15,%r13
2885	andq	$-4,%r13
2886	movq	%r9,%r14
2887	shrdq	$2,%r9,%r15
2888	shrq	$2,%r9
2889	addq	%r13,%r15
2890	adcq	%r14,%r9
2891	addq	%r15,%r10
2892	adcq	%r9,%r11
2893	adcq	$0,%r12
2894
2895	leaq	16(%rdi),%rdi
2896.Lseal_sse_tail_64_rounds_and_x1hash:
2897	paddd	%xmm4,%xmm0
2898	pxor	%xmm0,%xmm12
2899	pshufb	.Lrol16(%rip),%xmm12
2900	paddd	%xmm12,%xmm8
2901	pxor	%xmm8,%xmm4
2902	movdqa	%xmm4,%xmm3
2903	pslld	$12,%xmm3
2904	psrld	$20,%xmm4
2905	pxor	%xmm3,%xmm4
2906	paddd	%xmm4,%xmm0
2907	pxor	%xmm0,%xmm12
2908	pshufb	.Lrol8(%rip),%xmm12
2909	paddd	%xmm12,%xmm8
2910	pxor	%xmm8,%xmm4
2911	movdqa	%xmm4,%xmm3
2912	pslld	$7,%xmm3
2913	psrld	$25,%xmm4
2914	pxor	%xmm3,%xmm4
2915.byte	102,15,58,15,228,4
2916.byte	102,69,15,58,15,192,8
2917.byte	102,69,15,58,15,228,12
2918	paddd	%xmm4,%xmm0
2919	pxor	%xmm0,%xmm12
2920	pshufb	.Lrol16(%rip),%xmm12
2921	paddd	%xmm12,%xmm8
2922	pxor	%xmm8,%xmm4
2923	movdqa	%xmm4,%xmm3
2924	pslld	$12,%xmm3
2925	psrld	$20,%xmm4
2926	pxor	%xmm3,%xmm4
2927	paddd	%xmm4,%xmm0
2928	pxor	%xmm0,%xmm12
2929	pshufb	.Lrol8(%rip),%xmm12
2930	paddd	%xmm12,%xmm8
2931	pxor	%xmm8,%xmm4
2932	movdqa	%xmm4,%xmm3
2933	pslld	$7,%xmm3
2934	psrld	$25,%xmm4
2935	pxor	%xmm3,%xmm4
2936.byte	102,15,58,15,228,12
2937.byte	102,69,15,58,15,192,8
2938.byte	102,69,15,58,15,228,4
2939	addq	0+0(%rdi),%r10
2940	adcq	8+0(%rdi),%r11
2941	adcq	$1,%r12
2942	movq	0+0+0(%rbp),%rax
2943	movq	%rax,%r15
2944	mulq	%r10
2945	movq	%rax,%r13
2946	movq	%rdx,%r14
2947	movq	0+0+0(%rbp),%rax
2948	mulq	%r11
2949	imulq	%r12,%r15
2950	addq	%rax,%r14
2951	adcq	%rdx,%r15
2952	movq	8+0+0(%rbp),%rax
2953	movq	%rax,%r9
2954	mulq	%r10
2955	addq	%rax,%r14
2956	adcq	$0,%rdx
2957	movq	%rdx,%r10
2958	movq	8+0+0(%rbp),%rax
2959	mulq	%r11
2960	addq	%rax,%r15
2961	adcq	$0,%rdx
2962	imulq	%r12,%r9
2963	addq	%r10,%r15
2964	adcq	%rdx,%r9
2965	movq	%r13,%r10
2966	movq	%r14,%r11
2967	movq	%r15,%r12
2968	andq	$3,%r12
2969	movq	%r15,%r13
2970	andq	$-4,%r13
2971	movq	%r9,%r14
2972	shrdq	$2,%r9,%r15
2973	shrq	$2,%r9
2974	addq	%r13,%r15
2975	adcq	%r14,%r9
2976	addq	%r15,%r10
2977	adcq	%r9,%r11
2978	adcq	$0,%r12
2979
2980	leaq	16(%rdi),%rdi
2981	decq	%rcx
2982	jg	.Lseal_sse_tail_64_rounds_and_x2hash
2983	decq	%r8
2984	jge	.Lseal_sse_tail_64_rounds_and_x1hash
2985	paddd	.Lchacha20_consts(%rip),%xmm0
2986	paddd	0+48(%rbp),%xmm4
2987	paddd	0+64(%rbp),%xmm8
2988	paddd	0+96(%rbp),%xmm12
2989
2990	jmp	.Lseal_sse_128_tail_xor
2991
2992.Lseal_sse_tail_128:
2993	movdqa	.Lchacha20_consts(%rip),%xmm0
2994	movdqa	0+48(%rbp),%xmm4
2995	movdqa	0+64(%rbp),%xmm8
2996	movdqa	%xmm0,%xmm1
2997	movdqa	%xmm4,%xmm5
2998	movdqa	%xmm8,%xmm9
2999	movdqa	0+96(%rbp),%xmm13
3000	paddd	.Lsse_inc(%rip),%xmm13
3001	movdqa	%xmm13,%xmm12
3002	paddd	.Lsse_inc(%rip),%xmm12
3003	movdqa	%xmm12,0+96(%rbp)
3004	movdqa	%xmm13,0+112(%rbp)
3005
3006.Lseal_sse_tail_128_rounds_and_x2hash:
3007	addq	0+0(%rdi),%r10
3008	adcq	8+0(%rdi),%r11
3009	adcq	$1,%r12
3010	movq	0+0+0(%rbp),%rax
3011	movq	%rax,%r15
3012	mulq	%r10
3013	movq	%rax,%r13
3014	movq	%rdx,%r14
3015	movq	0+0+0(%rbp),%rax
3016	mulq	%r11
3017	imulq	%r12,%r15
3018	addq	%rax,%r14
3019	adcq	%rdx,%r15
3020	movq	8+0+0(%rbp),%rax
3021	movq	%rax,%r9
3022	mulq	%r10
3023	addq	%rax,%r14
3024	adcq	$0,%rdx
3025	movq	%rdx,%r10
3026	movq	8+0+0(%rbp),%rax
3027	mulq	%r11
3028	addq	%rax,%r15
3029	adcq	$0,%rdx
3030	imulq	%r12,%r9
3031	addq	%r10,%r15
3032	adcq	%rdx,%r9
3033	movq	%r13,%r10
3034	movq	%r14,%r11
3035	movq	%r15,%r12
3036	andq	$3,%r12
3037	movq	%r15,%r13
3038	andq	$-4,%r13
3039	movq	%r9,%r14
3040	shrdq	$2,%r9,%r15
3041	shrq	$2,%r9
3042	addq	%r13,%r15
3043	adcq	%r14,%r9
3044	addq	%r15,%r10
3045	adcq	%r9,%r11
3046	adcq	$0,%r12
3047
3048	leaq	16(%rdi),%rdi
3049.Lseal_sse_tail_128_rounds_and_x1hash:
3050	paddd	%xmm4,%xmm0
3051	pxor	%xmm0,%xmm12
3052	pshufb	.Lrol16(%rip),%xmm12
3053	paddd	%xmm12,%xmm8
3054	pxor	%xmm8,%xmm4
3055	movdqa	%xmm4,%xmm3
3056	pslld	$12,%xmm3
3057	psrld	$20,%xmm4
3058	pxor	%xmm3,%xmm4
3059	paddd	%xmm4,%xmm0
3060	pxor	%xmm0,%xmm12
3061	pshufb	.Lrol8(%rip),%xmm12
3062	paddd	%xmm12,%xmm8
3063	pxor	%xmm8,%xmm4
3064	movdqa	%xmm4,%xmm3
3065	pslld	$7,%xmm3
3066	psrld	$25,%xmm4
3067	pxor	%xmm3,%xmm4
3068.byte	102,15,58,15,228,4
3069.byte	102,69,15,58,15,192,8
3070.byte	102,69,15,58,15,228,12
3071	paddd	%xmm5,%xmm1
3072	pxor	%xmm1,%xmm13
3073	pshufb	.Lrol16(%rip),%xmm13
3074	paddd	%xmm13,%xmm9
3075	pxor	%xmm9,%xmm5
3076	movdqa	%xmm5,%xmm3
3077	pslld	$12,%xmm3
3078	psrld	$20,%xmm5
3079	pxor	%xmm3,%xmm5
3080	paddd	%xmm5,%xmm1
3081	pxor	%xmm1,%xmm13
3082	pshufb	.Lrol8(%rip),%xmm13
3083	paddd	%xmm13,%xmm9
3084	pxor	%xmm9,%xmm5
3085	movdqa	%xmm5,%xmm3
3086	pslld	$7,%xmm3
3087	psrld	$25,%xmm5
3088	pxor	%xmm3,%xmm5
3089.byte	102,15,58,15,237,4
3090.byte	102,69,15,58,15,201,8
3091.byte	102,69,15,58,15,237,12
3092	addq	0+0(%rdi),%r10
3093	adcq	8+0(%rdi),%r11
3094	adcq	$1,%r12
3095	movq	0+0+0(%rbp),%rax
3096	movq	%rax,%r15
3097	mulq	%r10
3098	movq	%rax,%r13
3099	movq	%rdx,%r14
3100	movq	0+0+0(%rbp),%rax
3101	mulq	%r11
3102	imulq	%r12,%r15
3103	addq	%rax,%r14
3104	adcq	%rdx,%r15
3105	movq	8+0+0(%rbp),%rax
3106	movq	%rax,%r9
3107	mulq	%r10
3108	addq	%rax,%r14
3109	adcq	$0,%rdx
3110	movq	%rdx,%r10
3111	movq	8+0+0(%rbp),%rax
3112	mulq	%r11
3113	addq	%rax,%r15
3114	adcq	$0,%rdx
3115	imulq	%r12,%r9
3116	addq	%r10,%r15
3117	adcq	%rdx,%r9
3118	movq	%r13,%r10
3119	movq	%r14,%r11
3120	movq	%r15,%r12
3121	andq	$3,%r12
3122	movq	%r15,%r13
3123	andq	$-4,%r13
3124	movq	%r9,%r14
3125	shrdq	$2,%r9,%r15
3126	shrq	$2,%r9
3127	addq	%r13,%r15
3128	adcq	%r14,%r9
3129	addq	%r15,%r10
3130	adcq	%r9,%r11
3131	adcq	$0,%r12
3132	paddd	%xmm4,%xmm0
3133	pxor	%xmm0,%xmm12
3134	pshufb	.Lrol16(%rip),%xmm12
3135	paddd	%xmm12,%xmm8
3136	pxor	%xmm8,%xmm4
3137	movdqa	%xmm4,%xmm3
3138	pslld	$12,%xmm3
3139	psrld	$20,%xmm4
3140	pxor	%xmm3,%xmm4
3141	paddd	%xmm4,%xmm0
3142	pxor	%xmm0,%xmm12
3143	pshufb	.Lrol8(%rip),%xmm12
3144	paddd	%xmm12,%xmm8
3145	pxor	%xmm8,%xmm4
3146	movdqa	%xmm4,%xmm3
3147	pslld	$7,%xmm3
3148	psrld	$25,%xmm4
3149	pxor	%xmm3,%xmm4
3150.byte	102,15,58,15,228,12
3151.byte	102,69,15,58,15,192,8
3152.byte	102,69,15,58,15,228,4
3153	paddd	%xmm5,%xmm1
3154	pxor	%xmm1,%xmm13
3155	pshufb	.Lrol16(%rip),%xmm13
3156	paddd	%xmm13,%xmm9
3157	pxor	%xmm9,%xmm5
3158	movdqa	%xmm5,%xmm3
3159	pslld	$12,%xmm3
3160	psrld	$20,%xmm5
3161	pxor	%xmm3,%xmm5
3162	paddd	%xmm5,%xmm1
3163	pxor	%xmm1,%xmm13
3164	pshufb	.Lrol8(%rip),%xmm13
3165	paddd	%xmm13,%xmm9
3166	pxor	%xmm9,%xmm5
3167	movdqa	%xmm5,%xmm3
3168	pslld	$7,%xmm3
3169	psrld	$25,%xmm5
3170	pxor	%xmm3,%xmm5
3171.byte	102,15,58,15,237,12
3172.byte	102,69,15,58,15,201,8
3173.byte	102,69,15,58,15,237,4
3174
3175	leaq	16(%rdi),%rdi
3176	decq	%rcx
3177	jg	.Lseal_sse_tail_128_rounds_and_x2hash
3178	decq	%r8
3179	jge	.Lseal_sse_tail_128_rounds_and_x1hash
3180	paddd	.Lchacha20_consts(%rip),%xmm1
3181	paddd	0+48(%rbp),%xmm5
3182	paddd	0+64(%rbp),%xmm9
3183	paddd	0+112(%rbp),%xmm13
3184	paddd	.Lchacha20_consts(%rip),%xmm0
3185	paddd	0+48(%rbp),%xmm4
3186	paddd	0+64(%rbp),%xmm8
3187	paddd	0+96(%rbp),%xmm12
3188	movdqu	0 + 0(%rsi),%xmm3
3189	movdqu	16 + 0(%rsi),%xmm7
3190	movdqu	32 + 0(%rsi),%xmm11
3191	movdqu	48 + 0(%rsi),%xmm15
3192	pxor	%xmm3,%xmm1
3193	pxor	%xmm7,%xmm5
3194	pxor	%xmm11,%xmm9
3195	pxor	%xmm13,%xmm15
3196	movdqu	%xmm1,0 + 0(%rdi)
3197	movdqu	%xmm5,16 + 0(%rdi)
3198	movdqu	%xmm9,32 + 0(%rdi)
3199	movdqu	%xmm15,48 + 0(%rdi)
3200
3201	movq	$64,%rcx
3202	subq	$64,%rbx
3203	leaq	64(%rsi),%rsi
3204	jmp	.Lseal_sse_128_tail_hash
3205
3206.Lseal_sse_tail_192:
3207	movdqa	.Lchacha20_consts(%rip),%xmm0
3208	movdqa	0+48(%rbp),%xmm4
3209	movdqa	0+64(%rbp),%xmm8
3210	movdqa	%xmm0,%xmm1
3211	movdqa	%xmm4,%xmm5
3212	movdqa	%xmm8,%xmm9
3213	movdqa	%xmm0,%xmm2
3214	movdqa	%xmm4,%xmm6
3215	movdqa	%xmm8,%xmm10
3216	movdqa	0+96(%rbp),%xmm14
3217	paddd	.Lsse_inc(%rip),%xmm14
3218	movdqa	%xmm14,%xmm13
3219	paddd	.Lsse_inc(%rip),%xmm13
3220	movdqa	%xmm13,%xmm12
3221	paddd	.Lsse_inc(%rip),%xmm12
3222	movdqa	%xmm12,0+96(%rbp)
3223	movdqa	%xmm13,0+112(%rbp)
3224	movdqa	%xmm14,0+128(%rbp)
3225
3226.Lseal_sse_tail_192_rounds_and_x2hash:
3227	addq	0+0(%rdi),%r10
3228	adcq	8+0(%rdi),%r11
3229	adcq	$1,%r12
3230	movq	0+0+0(%rbp),%rax
3231	movq	%rax,%r15
3232	mulq	%r10
3233	movq	%rax,%r13
3234	movq	%rdx,%r14
3235	movq	0+0+0(%rbp),%rax
3236	mulq	%r11
3237	imulq	%r12,%r15
3238	addq	%rax,%r14
3239	adcq	%rdx,%r15
3240	movq	8+0+0(%rbp),%rax
3241	movq	%rax,%r9
3242	mulq	%r10
3243	addq	%rax,%r14
3244	adcq	$0,%rdx
3245	movq	%rdx,%r10
3246	movq	8+0+0(%rbp),%rax
3247	mulq	%r11
3248	addq	%rax,%r15
3249	adcq	$0,%rdx
3250	imulq	%r12,%r9
3251	addq	%r10,%r15
3252	adcq	%rdx,%r9
3253	movq	%r13,%r10
3254	movq	%r14,%r11
3255	movq	%r15,%r12
3256	andq	$3,%r12
3257	movq	%r15,%r13
3258	andq	$-4,%r13
3259	movq	%r9,%r14
3260	shrdq	$2,%r9,%r15
3261	shrq	$2,%r9
3262	addq	%r13,%r15
3263	adcq	%r14,%r9
3264	addq	%r15,%r10
3265	adcq	%r9,%r11
3266	adcq	$0,%r12
3267
3268	leaq	16(%rdi),%rdi
3269.Lseal_sse_tail_192_rounds_and_x1hash:
3270	paddd	%xmm4,%xmm0
3271	pxor	%xmm0,%xmm12
3272	pshufb	.Lrol16(%rip),%xmm12
3273	paddd	%xmm12,%xmm8
3274	pxor	%xmm8,%xmm4
3275	movdqa	%xmm4,%xmm3
3276	pslld	$12,%xmm3
3277	psrld	$20,%xmm4
3278	pxor	%xmm3,%xmm4
3279	paddd	%xmm4,%xmm0
3280	pxor	%xmm0,%xmm12
3281	pshufb	.Lrol8(%rip),%xmm12
3282	paddd	%xmm12,%xmm8
3283	pxor	%xmm8,%xmm4
3284	movdqa	%xmm4,%xmm3
3285	pslld	$7,%xmm3
3286	psrld	$25,%xmm4
3287	pxor	%xmm3,%xmm4
3288.byte	102,15,58,15,228,4
3289.byte	102,69,15,58,15,192,8
3290.byte	102,69,15,58,15,228,12
3291	paddd	%xmm5,%xmm1
3292	pxor	%xmm1,%xmm13
3293	pshufb	.Lrol16(%rip),%xmm13
3294	paddd	%xmm13,%xmm9
3295	pxor	%xmm9,%xmm5
3296	movdqa	%xmm5,%xmm3
3297	pslld	$12,%xmm3
3298	psrld	$20,%xmm5
3299	pxor	%xmm3,%xmm5
3300	paddd	%xmm5,%xmm1
3301	pxor	%xmm1,%xmm13
3302	pshufb	.Lrol8(%rip),%xmm13
3303	paddd	%xmm13,%xmm9
3304	pxor	%xmm9,%xmm5
3305	movdqa	%xmm5,%xmm3
3306	pslld	$7,%xmm3
3307	psrld	$25,%xmm5
3308	pxor	%xmm3,%xmm5
3309.byte	102,15,58,15,237,4
3310.byte	102,69,15,58,15,201,8
3311.byte	102,69,15,58,15,237,12
3312	paddd	%xmm6,%xmm2
3313	pxor	%xmm2,%xmm14
3314	pshufb	.Lrol16(%rip),%xmm14
3315	paddd	%xmm14,%xmm10
3316	pxor	%xmm10,%xmm6
3317	movdqa	%xmm6,%xmm3
3318	pslld	$12,%xmm3
3319	psrld	$20,%xmm6
3320	pxor	%xmm3,%xmm6
3321	paddd	%xmm6,%xmm2
3322	pxor	%xmm2,%xmm14
3323	pshufb	.Lrol8(%rip),%xmm14
3324	paddd	%xmm14,%xmm10
3325	pxor	%xmm10,%xmm6
3326	movdqa	%xmm6,%xmm3
3327	pslld	$7,%xmm3
3328	psrld	$25,%xmm6
3329	pxor	%xmm3,%xmm6
3330.byte	102,15,58,15,246,4
3331.byte	102,69,15,58,15,210,8
3332.byte	102,69,15,58,15,246,12
3333	addq	0+0(%rdi),%r10
3334	adcq	8+0(%rdi),%r11
3335	adcq	$1,%r12
3336	movq	0+0+0(%rbp),%rax
3337	movq	%rax,%r15
3338	mulq	%r10
3339	movq	%rax,%r13
3340	movq	%rdx,%r14
3341	movq	0+0+0(%rbp),%rax
3342	mulq	%r11
3343	imulq	%r12,%r15
3344	addq	%rax,%r14
3345	adcq	%rdx,%r15
3346	movq	8+0+0(%rbp),%rax
3347	movq	%rax,%r9
3348	mulq	%r10
3349	addq	%rax,%r14
3350	adcq	$0,%rdx
3351	movq	%rdx,%r10
3352	movq	8+0+0(%rbp),%rax
3353	mulq	%r11
3354	addq	%rax,%r15
3355	adcq	$0,%rdx
3356	imulq	%r12,%r9
3357	addq	%r10,%r15
3358	adcq	%rdx,%r9
3359	movq	%r13,%r10
3360	movq	%r14,%r11
3361	movq	%r15,%r12
3362	andq	$3,%r12
3363	movq	%r15,%r13
3364	andq	$-4,%r13
3365	movq	%r9,%r14
3366	shrdq	$2,%r9,%r15
3367	shrq	$2,%r9
3368	addq	%r13,%r15
3369	adcq	%r14,%r9
3370	addq	%r15,%r10
3371	adcq	%r9,%r11
3372	adcq	$0,%r12
3373	paddd	%xmm4,%xmm0
3374	pxor	%xmm0,%xmm12
3375	pshufb	.Lrol16(%rip),%xmm12
3376	paddd	%xmm12,%xmm8
3377	pxor	%xmm8,%xmm4
3378	movdqa	%xmm4,%xmm3
3379	pslld	$12,%xmm3
3380	psrld	$20,%xmm4
3381	pxor	%xmm3,%xmm4
3382	paddd	%xmm4,%xmm0
3383	pxor	%xmm0,%xmm12
3384	pshufb	.Lrol8(%rip),%xmm12
3385	paddd	%xmm12,%xmm8
3386	pxor	%xmm8,%xmm4
3387	movdqa	%xmm4,%xmm3
3388	pslld	$7,%xmm3
3389	psrld	$25,%xmm4
3390	pxor	%xmm3,%xmm4
3391.byte	102,15,58,15,228,12
3392.byte	102,69,15,58,15,192,8
3393.byte	102,69,15,58,15,228,4
3394	paddd	%xmm5,%xmm1
3395	pxor	%xmm1,%xmm13
3396	pshufb	.Lrol16(%rip),%xmm13
3397	paddd	%xmm13,%xmm9
3398	pxor	%xmm9,%xmm5
3399	movdqa	%xmm5,%xmm3
3400	pslld	$12,%xmm3
3401	psrld	$20,%xmm5
3402	pxor	%xmm3,%xmm5
3403	paddd	%xmm5,%xmm1
3404	pxor	%xmm1,%xmm13
3405	pshufb	.Lrol8(%rip),%xmm13
3406	paddd	%xmm13,%xmm9
3407	pxor	%xmm9,%xmm5
3408	movdqa	%xmm5,%xmm3
3409	pslld	$7,%xmm3
3410	psrld	$25,%xmm5
3411	pxor	%xmm3,%xmm5
3412.byte	102,15,58,15,237,12
3413.byte	102,69,15,58,15,201,8
3414.byte	102,69,15,58,15,237,4
3415	paddd	%xmm6,%xmm2
3416	pxor	%xmm2,%xmm14
3417	pshufb	.Lrol16(%rip),%xmm14
3418	paddd	%xmm14,%xmm10
3419	pxor	%xmm10,%xmm6
3420	movdqa	%xmm6,%xmm3
3421	pslld	$12,%xmm3
3422	psrld	$20,%xmm6
3423	pxor	%xmm3,%xmm6
3424	paddd	%xmm6,%xmm2
3425	pxor	%xmm2,%xmm14
3426	pshufb	.Lrol8(%rip),%xmm14
3427	paddd	%xmm14,%xmm10
3428	pxor	%xmm10,%xmm6
3429	movdqa	%xmm6,%xmm3
3430	pslld	$7,%xmm3
3431	psrld	$25,%xmm6
3432	pxor	%xmm3,%xmm6
3433.byte	102,15,58,15,246,12
3434.byte	102,69,15,58,15,210,8
3435.byte	102,69,15,58,15,246,4
3436
3437	leaq	16(%rdi),%rdi
3438	decq	%rcx
3439	jg	.Lseal_sse_tail_192_rounds_and_x2hash
3440	decq	%r8
3441	jge	.Lseal_sse_tail_192_rounds_and_x1hash
3442	paddd	.Lchacha20_consts(%rip),%xmm2
3443	paddd	0+48(%rbp),%xmm6
3444	paddd	0+64(%rbp),%xmm10
3445	paddd	0+128(%rbp),%xmm14
3446	paddd	.Lchacha20_consts(%rip),%xmm1
3447	paddd	0+48(%rbp),%xmm5
3448	paddd	0+64(%rbp),%xmm9
3449	paddd	0+112(%rbp),%xmm13
3450	paddd	.Lchacha20_consts(%rip),%xmm0
3451	paddd	0+48(%rbp),%xmm4
3452	paddd	0+64(%rbp),%xmm8
3453	paddd	0+96(%rbp),%xmm12
3454	movdqu	0 + 0(%rsi),%xmm3
3455	movdqu	16 + 0(%rsi),%xmm7
3456	movdqu	32 + 0(%rsi),%xmm11
3457	movdqu	48 + 0(%rsi),%xmm15
3458	pxor	%xmm3,%xmm2
3459	pxor	%xmm7,%xmm6
3460	pxor	%xmm11,%xmm10
3461	pxor	%xmm14,%xmm15
3462	movdqu	%xmm2,0 + 0(%rdi)
3463	movdqu	%xmm6,16 + 0(%rdi)
3464	movdqu	%xmm10,32 + 0(%rdi)
3465	movdqu	%xmm15,48 + 0(%rdi)
3466	movdqu	0 + 64(%rsi),%xmm3
3467	movdqu	16 + 64(%rsi),%xmm7
3468	movdqu	32 + 64(%rsi),%xmm11
3469	movdqu	48 + 64(%rsi),%xmm15
3470	pxor	%xmm3,%xmm1
3471	pxor	%xmm7,%xmm5
3472	pxor	%xmm11,%xmm9
3473	pxor	%xmm13,%xmm15
3474	movdqu	%xmm1,0 + 64(%rdi)
3475	movdqu	%xmm5,16 + 64(%rdi)
3476	movdqu	%xmm9,32 + 64(%rdi)
3477	movdqu	%xmm15,48 + 64(%rdi)
3478
3479	movq	$128,%rcx
3480	subq	$128,%rbx
3481	leaq	128(%rsi),%rsi
3482
3483.Lseal_sse_128_tail_hash:
3484	cmpq	$16,%rcx
3485	jb	.Lseal_sse_128_tail_xor
3486	addq	0+0(%rdi),%r10
3487	adcq	8+0(%rdi),%r11
3488	adcq	$1,%r12
3489	movq	0+0+0(%rbp),%rax
3490	movq	%rax,%r15
3491	mulq	%r10
3492	movq	%rax,%r13
3493	movq	%rdx,%r14
3494	movq	0+0+0(%rbp),%rax
3495	mulq	%r11
3496	imulq	%r12,%r15
3497	addq	%rax,%r14
3498	adcq	%rdx,%r15
3499	movq	8+0+0(%rbp),%rax
3500	movq	%rax,%r9
3501	mulq	%r10
3502	addq	%rax,%r14
3503	adcq	$0,%rdx
3504	movq	%rdx,%r10
3505	movq	8+0+0(%rbp),%rax
3506	mulq	%r11
3507	addq	%rax,%r15
3508	adcq	$0,%rdx
3509	imulq	%r12,%r9
3510	addq	%r10,%r15
3511	adcq	%rdx,%r9
3512	movq	%r13,%r10
3513	movq	%r14,%r11
3514	movq	%r15,%r12
3515	andq	$3,%r12
3516	movq	%r15,%r13
3517	andq	$-4,%r13
3518	movq	%r9,%r14
3519	shrdq	$2,%r9,%r15
3520	shrq	$2,%r9
3521	addq	%r13,%r15
3522	adcq	%r14,%r9
3523	addq	%r15,%r10
3524	adcq	%r9,%r11
3525	adcq	$0,%r12
3526
3527	subq	$16,%rcx
3528	leaq	16(%rdi),%rdi
3529	jmp	.Lseal_sse_128_tail_hash
3530
3531.Lseal_sse_128_tail_xor:
3532	cmpq	$16,%rbx
3533	jb	.Lseal_sse_tail_16
3534	subq	$16,%rbx
3535
3536	movdqu	0(%rsi),%xmm3
3537	pxor	%xmm3,%xmm0
3538	movdqu	%xmm0,0(%rdi)
3539
3540	addq	0(%rdi),%r10
3541	adcq	8(%rdi),%r11
3542	adcq	$1,%r12
3543	leaq	16(%rsi),%rsi
3544	leaq	16(%rdi),%rdi
3545	movq	0+0+0(%rbp),%rax
3546	movq	%rax,%r15
3547	mulq	%r10
3548	movq	%rax,%r13
3549	movq	%rdx,%r14
3550	movq	0+0+0(%rbp),%rax
3551	mulq	%r11
3552	imulq	%r12,%r15
3553	addq	%rax,%r14
3554	adcq	%rdx,%r15
3555	movq	8+0+0(%rbp),%rax
3556	movq	%rax,%r9
3557	mulq	%r10
3558	addq	%rax,%r14
3559	adcq	$0,%rdx
3560	movq	%rdx,%r10
3561	movq	8+0+0(%rbp),%rax
3562	mulq	%r11
3563	addq	%rax,%r15
3564	adcq	$0,%rdx
3565	imulq	%r12,%r9
3566	addq	%r10,%r15
3567	adcq	%rdx,%r9
3568	movq	%r13,%r10
3569	movq	%r14,%r11
3570	movq	%r15,%r12
3571	andq	$3,%r12
3572	movq	%r15,%r13
3573	andq	$-4,%r13
3574	movq	%r9,%r14
3575	shrdq	$2,%r9,%r15
3576	shrq	$2,%r9
3577	addq	%r13,%r15
3578	adcq	%r14,%r9
3579	addq	%r15,%r10
3580	adcq	%r9,%r11
3581	adcq	$0,%r12
3582
3583
3584	movdqa	%xmm4,%xmm0
3585	movdqa	%xmm8,%xmm4
3586	movdqa	%xmm12,%xmm8
3587	movdqa	%xmm1,%xmm12
3588	movdqa	%xmm5,%xmm1
3589	movdqa	%xmm9,%xmm5
3590	movdqa	%xmm13,%xmm9
3591	jmp	.Lseal_sse_128_tail_xor
3592
3593.Lseal_sse_tail_16:
3594	testq	%rbx,%rbx
3595	jz	.Lprocess_blocks_of_extra_in
3596
3597	movq	%rbx,%r8
3598	movq	%rbx,%rcx
3599	leaq	-1(%rsi,%rbx,1),%rsi
3600	pxor	%xmm15,%xmm15
3601.Lseal_sse_tail_16_compose:
3602	pslldq	$1,%xmm15
3603	pinsrb	$0,(%rsi),%xmm15
3604	leaq	-1(%rsi),%rsi
3605	decq	%rcx
3606	jne	.Lseal_sse_tail_16_compose
3607
3608
3609	pxor	%xmm0,%xmm15
3610
3611
3612	movq	%rbx,%rcx
3613	movdqu	%xmm15,%xmm0
3614.Lseal_sse_tail_16_extract:
3615	pextrb	$0,%xmm0,(%rdi)
3616	psrldq	$1,%xmm0
3617	addq	$1,%rdi
3618	subq	$1,%rcx
3619	jnz	.Lseal_sse_tail_16_extract
3620
3621
3622
3623
3624
3625
3626
3627
3628	movq	288 + 0 + 32(%rsp),%r9
3629	movq	56(%r9),%r14
3630	movq	48(%r9),%r13
3631	testq	%r14,%r14
3632	jz	.Lprocess_partial_block
3633
3634	movq	$16,%r15
3635	subq	%rbx,%r15
3636	cmpq	%r15,%r14
3637
3638	jge	.Lload_extra_in
3639	movq	%r14,%r15
3640
3641.Lload_extra_in:
3642
3643
3644	leaq	-1(%r13,%r15,1),%rsi
3645
3646
3647	addq	%r15,%r13
3648	subq	%r15,%r14
3649	movq	%r13,48(%r9)
3650	movq	%r14,56(%r9)
3651
3652
3653
3654	addq	%r15,%r8
3655
3656
3657	pxor	%xmm11,%xmm11
3658.Lload_extra_load_loop:
3659	pslldq	$1,%xmm11
3660	pinsrb	$0,(%rsi),%xmm11
3661	leaq	-1(%rsi),%rsi
3662	subq	$1,%r15
3663	jnz	.Lload_extra_load_loop
3664
3665
3666
3667
3668	movq	%rbx,%r15
3669
3670.Lload_extra_shift_loop:
3671	pslldq	$1,%xmm11
3672	subq	$1,%r15
3673	jnz	.Lload_extra_shift_loop
3674
3675
3676
3677
3678	leaq	.Land_masks(%rip),%r15
3679	shlq	$4,%rbx
3680	pand	-16(%r15,%rbx,1),%xmm15
3681
3682
3683	por	%xmm11,%xmm15
3684
3685
3686
3687.byte	102,77,15,126,253
3688	pextrq	$1,%xmm15,%r14
3689	addq	%r13,%r10
3690	adcq	%r14,%r11
3691	adcq	$1,%r12
3692	movq	0+0+0(%rbp),%rax
3693	movq	%rax,%r15
3694	mulq	%r10
3695	movq	%rax,%r13
3696	movq	%rdx,%r14
3697	movq	0+0+0(%rbp),%rax
3698	mulq	%r11
3699	imulq	%r12,%r15
3700	addq	%rax,%r14
3701	adcq	%rdx,%r15
3702	movq	8+0+0(%rbp),%rax
3703	movq	%rax,%r9
3704	mulq	%r10
3705	addq	%rax,%r14
3706	adcq	$0,%rdx
3707	movq	%rdx,%r10
3708	movq	8+0+0(%rbp),%rax
3709	mulq	%r11
3710	addq	%rax,%r15
3711	adcq	$0,%rdx
3712	imulq	%r12,%r9
3713	addq	%r10,%r15
3714	adcq	%rdx,%r9
3715	movq	%r13,%r10
3716	movq	%r14,%r11
3717	movq	%r15,%r12
3718	andq	$3,%r12
3719	movq	%r15,%r13
3720	andq	$-4,%r13
3721	movq	%r9,%r14
3722	shrdq	$2,%r9,%r15
3723	shrq	$2,%r9
3724	addq	%r13,%r15
3725	adcq	%r14,%r9
3726	addq	%r15,%r10
3727	adcq	%r9,%r11
3728	adcq	$0,%r12
3729
3730
3731.Lprocess_blocks_of_extra_in:
3732
3733	movq	288+32+0 (%rsp),%r9
3734	movq	48(%r9),%rsi
3735	movq	56(%r9),%r8
3736	movq	%r8,%rcx
3737	shrq	$4,%r8
3738
3739.Lprocess_extra_hash_loop:
3740	jz	process_extra_in_trailer
3741	addq	0+0(%rsi),%r10
3742	adcq	8+0(%rsi),%r11
3743	adcq	$1,%r12
3744	movq	0+0+0(%rbp),%rax
3745	movq	%rax,%r15
3746	mulq	%r10
3747	movq	%rax,%r13
3748	movq	%rdx,%r14
3749	movq	0+0+0(%rbp),%rax
3750	mulq	%r11
3751	imulq	%r12,%r15
3752	addq	%rax,%r14
3753	adcq	%rdx,%r15
3754	movq	8+0+0(%rbp),%rax
3755	movq	%rax,%r9
3756	mulq	%r10
3757	addq	%rax,%r14
3758	adcq	$0,%rdx
3759	movq	%rdx,%r10
3760	movq	8+0+0(%rbp),%rax
3761	mulq	%r11
3762	addq	%rax,%r15
3763	adcq	$0,%rdx
3764	imulq	%r12,%r9
3765	addq	%r10,%r15
3766	adcq	%rdx,%r9
3767	movq	%r13,%r10
3768	movq	%r14,%r11
3769	movq	%r15,%r12
3770	andq	$3,%r12
3771	movq	%r15,%r13
3772	andq	$-4,%r13
3773	movq	%r9,%r14
3774	shrdq	$2,%r9,%r15
3775	shrq	$2,%r9
3776	addq	%r13,%r15
3777	adcq	%r14,%r9
3778	addq	%r15,%r10
3779	adcq	%r9,%r11
3780	adcq	$0,%r12
3781
3782	leaq	16(%rsi),%rsi
3783	subq	$1,%r8
3784	jmp	.Lprocess_extra_hash_loop
3785process_extra_in_trailer:
3786	andq	$15,%rcx
3787	movq	%rcx,%rbx
3788	jz	.Ldo_length_block
3789	leaq	-1(%rsi,%rcx,1),%rsi
3790
3791.Lprocess_extra_in_trailer_load:
3792	pslldq	$1,%xmm15
3793	pinsrb	$0,(%rsi),%xmm15
3794	leaq	-1(%rsi),%rsi
3795	subq	$1,%rcx
3796	jnz	.Lprocess_extra_in_trailer_load
3797
3798.Lprocess_partial_block:
3799
3800	leaq	.Land_masks(%rip),%r15
3801	shlq	$4,%rbx
3802	pand	-16(%r15,%rbx,1),%xmm15
3803.byte	102,77,15,126,253
3804	pextrq	$1,%xmm15,%r14
3805	addq	%r13,%r10
3806	adcq	%r14,%r11
3807	adcq	$1,%r12
3808	movq	0+0+0(%rbp),%rax
3809	movq	%rax,%r15
3810	mulq	%r10
3811	movq	%rax,%r13
3812	movq	%rdx,%r14
3813	movq	0+0+0(%rbp),%rax
3814	mulq	%r11
3815	imulq	%r12,%r15
3816	addq	%rax,%r14
3817	adcq	%rdx,%r15
3818	movq	8+0+0(%rbp),%rax
3819	movq	%rax,%r9
3820	mulq	%r10
3821	addq	%rax,%r14
3822	adcq	$0,%rdx
3823	movq	%rdx,%r10
3824	movq	8+0+0(%rbp),%rax
3825	mulq	%r11
3826	addq	%rax,%r15
3827	adcq	$0,%rdx
3828	imulq	%r12,%r9
3829	addq	%r10,%r15
3830	adcq	%rdx,%r9
3831	movq	%r13,%r10
3832	movq	%r14,%r11
3833	movq	%r15,%r12
3834	andq	$3,%r12
3835	movq	%r15,%r13
3836	andq	$-4,%r13
3837	movq	%r9,%r14
3838	shrdq	$2,%r9,%r15
3839	shrq	$2,%r9
3840	addq	%r13,%r15
3841	adcq	%r14,%r9
3842	addq	%r15,%r10
3843	adcq	%r9,%r11
3844	adcq	$0,%r12
3845
3846
3847.Ldo_length_block:
3848	addq	0+0+32(%rbp),%r10
3849	adcq	8+0+32(%rbp),%r11
3850	adcq	$1,%r12
3851	movq	0+0+0(%rbp),%rax
3852	movq	%rax,%r15
3853	mulq	%r10
3854	movq	%rax,%r13
3855	movq	%rdx,%r14
3856	movq	0+0+0(%rbp),%rax
3857	mulq	%r11
3858	imulq	%r12,%r15
3859	addq	%rax,%r14
3860	adcq	%rdx,%r15
3861	movq	8+0+0(%rbp),%rax
3862	movq	%rax,%r9
3863	mulq	%r10
3864	addq	%rax,%r14
3865	adcq	$0,%rdx
3866	movq	%rdx,%r10
3867	movq	8+0+0(%rbp),%rax
3868	mulq	%r11
3869	addq	%rax,%r15
3870	adcq	$0,%rdx
3871	imulq	%r12,%r9
3872	addq	%r10,%r15
3873	adcq	%rdx,%r9
3874	movq	%r13,%r10
3875	movq	%r14,%r11
3876	movq	%r15,%r12
3877	andq	$3,%r12
3878	movq	%r15,%r13
3879	andq	$-4,%r13
3880	movq	%r9,%r14
3881	shrdq	$2,%r9,%r15
3882	shrq	$2,%r9
3883	addq	%r13,%r15
3884	adcq	%r14,%r9
3885	addq	%r15,%r10
3886	adcq	%r9,%r11
3887	adcq	$0,%r12
3888
3889
3890	movq	%r10,%r13
3891	movq	%r11,%r14
3892	movq	%r12,%r15
3893	subq	$-5,%r10
3894	sbbq	$-1,%r11
3895	sbbq	$3,%r12
3896	cmovcq	%r13,%r10
3897	cmovcq	%r14,%r11
3898	cmovcq	%r15,%r12
3899
3900	addq	0+0+16(%rbp),%r10
3901	adcq	8+0+16(%rbp),%r11
3902
3903.cfi_remember_state
3904	addq	$288 + 0 + 32,%rsp
3905.cfi_adjust_cfa_offset	-(288 + 32)
3906
3907	popq	%r9
3908.cfi_adjust_cfa_offset	-8
3909.cfi_restore	%r9
3910	movq	%r10,(%r9)
3911	movq	%r11,8(%r9)
3912	popq	%r15
3913.cfi_adjust_cfa_offset	-8
3914.cfi_restore	%r15
3915	popq	%r14
3916.cfi_adjust_cfa_offset	-8
3917.cfi_restore	%r14
3918	popq	%r13
3919.cfi_adjust_cfa_offset	-8
3920.cfi_restore	%r13
3921	popq	%r12
3922.cfi_adjust_cfa_offset	-8
3923.cfi_restore	%r12
3924	popq	%rbx
3925.cfi_adjust_cfa_offset	-8
3926.cfi_restore	%rbx
3927	popq	%rbp
3928.cfi_adjust_cfa_offset	-8
3929.cfi_restore	%rbp
3930	.byte	0xf3,0xc3
3931
3932.Lseal_sse_128:
3933.cfi_restore_state
3934	movdqu	.Lchacha20_consts(%rip),%xmm0
3935	movdqa	%xmm0,%xmm1
3936	movdqa	%xmm0,%xmm2
3937	movdqu	0(%r9),%xmm4
3938	movdqa	%xmm4,%xmm5
3939	movdqa	%xmm4,%xmm6
3940	movdqu	16(%r9),%xmm8
3941	movdqa	%xmm8,%xmm9
3942	movdqa	%xmm8,%xmm10
3943	movdqu	32(%r9),%xmm14
3944	movdqa	%xmm14,%xmm12
3945	paddd	.Lsse_inc(%rip),%xmm12
3946	movdqa	%xmm12,%xmm13
3947	paddd	.Lsse_inc(%rip),%xmm13
3948	movdqa	%xmm4,%xmm7
3949	movdqa	%xmm8,%xmm11
3950	movdqa	%xmm12,%xmm15
3951	movq	$10,%r10
3952
3953.Lseal_sse_128_rounds:
3954	paddd	%xmm4,%xmm0
3955	pxor	%xmm0,%xmm12
3956	pshufb	.Lrol16(%rip),%xmm12
3957	paddd	%xmm12,%xmm8
3958	pxor	%xmm8,%xmm4
3959	movdqa	%xmm4,%xmm3
3960	pslld	$12,%xmm3
3961	psrld	$20,%xmm4
3962	pxor	%xmm3,%xmm4
3963	paddd	%xmm4,%xmm0
3964	pxor	%xmm0,%xmm12
3965	pshufb	.Lrol8(%rip),%xmm12
3966	paddd	%xmm12,%xmm8
3967	pxor	%xmm8,%xmm4
3968	movdqa	%xmm4,%xmm3
3969	pslld	$7,%xmm3
3970	psrld	$25,%xmm4
3971	pxor	%xmm3,%xmm4
3972.byte	102,15,58,15,228,4
3973.byte	102,69,15,58,15,192,8
3974.byte	102,69,15,58,15,228,12
3975	paddd	%xmm5,%xmm1
3976	pxor	%xmm1,%xmm13
3977	pshufb	.Lrol16(%rip),%xmm13
3978	paddd	%xmm13,%xmm9
3979	pxor	%xmm9,%xmm5
3980	movdqa	%xmm5,%xmm3
3981	pslld	$12,%xmm3
3982	psrld	$20,%xmm5
3983	pxor	%xmm3,%xmm5
3984	paddd	%xmm5,%xmm1
3985	pxor	%xmm1,%xmm13
3986	pshufb	.Lrol8(%rip),%xmm13
3987	paddd	%xmm13,%xmm9
3988	pxor	%xmm9,%xmm5
3989	movdqa	%xmm5,%xmm3
3990	pslld	$7,%xmm3
3991	psrld	$25,%xmm5
3992	pxor	%xmm3,%xmm5
3993.byte	102,15,58,15,237,4
3994.byte	102,69,15,58,15,201,8
3995.byte	102,69,15,58,15,237,12
3996	paddd	%xmm6,%xmm2
3997	pxor	%xmm2,%xmm14
3998	pshufb	.Lrol16(%rip),%xmm14
3999	paddd	%xmm14,%xmm10
4000	pxor	%xmm10,%xmm6
4001	movdqa	%xmm6,%xmm3
4002	pslld	$12,%xmm3
4003	psrld	$20,%xmm6
4004	pxor	%xmm3,%xmm6
4005	paddd	%xmm6,%xmm2
4006	pxor	%xmm2,%xmm14
4007	pshufb	.Lrol8(%rip),%xmm14
4008	paddd	%xmm14,%xmm10
4009	pxor	%xmm10,%xmm6
4010	movdqa	%xmm6,%xmm3
4011	pslld	$7,%xmm3
4012	psrld	$25,%xmm6
4013	pxor	%xmm3,%xmm6
4014.byte	102,15,58,15,246,4
4015.byte	102,69,15,58,15,210,8
4016.byte	102,69,15,58,15,246,12
4017	paddd	%xmm4,%xmm0
4018	pxor	%xmm0,%xmm12
4019	pshufb	.Lrol16(%rip),%xmm12
4020	paddd	%xmm12,%xmm8
4021	pxor	%xmm8,%xmm4
4022	movdqa	%xmm4,%xmm3
4023	pslld	$12,%xmm3
4024	psrld	$20,%xmm4
4025	pxor	%xmm3,%xmm4
4026	paddd	%xmm4,%xmm0
4027	pxor	%xmm0,%xmm12
4028	pshufb	.Lrol8(%rip),%xmm12
4029	paddd	%xmm12,%xmm8
4030	pxor	%xmm8,%xmm4
4031	movdqa	%xmm4,%xmm3
4032	pslld	$7,%xmm3
4033	psrld	$25,%xmm4
4034	pxor	%xmm3,%xmm4
4035.byte	102,15,58,15,228,12
4036.byte	102,69,15,58,15,192,8
4037.byte	102,69,15,58,15,228,4
4038	paddd	%xmm5,%xmm1
4039	pxor	%xmm1,%xmm13
4040	pshufb	.Lrol16(%rip),%xmm13
4041	paddd	%xmm13,%xmm9
4042	pxor	%xmm9,%xmm5
4043	movdqa	%xmm5,%xmm3
4044	pslld	$12,%xmm3
4045	psrld	$20,%xmm5
4046	pxor	%xmm3,%xmm5
4047	paddd	%xmm5,%xmm1
4048	pxor	%xmm1,%xmm13
4049	pshufb	.Lrol8(%rip),%xmm13
4050	paddd	%xmm13,%xmm9
4051	pxor	%xmm9,%xmm5
4052	movdqa	%xmm5,%xmm3
4053	pslld	$7,%xmm3
4054	psrld	$25,%xmm5
4055	pxor	%xmm3,%xmm5
4056.byte	102,15,58,15,237,12
4057.byte	102,69,15,58,15,201,8
4058.byte	102,69,15,58,15,237,4
4059	paddd	%xmm6,%xmm2
4060	pxor	%xmm2,%xmm14
4061	pshufb	.Lrol16(%rip),%xmm14
4062	paddd	%xmm14,%xmm10
4063	pxor	%xmm10,%xmm6
4064	movdqa	%xmm6,%xmm3
4065	pslld	$12,%xmm3
4066	psrld	$20,%xmm6
4067	pxor	%xmm3,%xmm6
4068	paddd	%xmm6,%xmm2
4069	pxor	%xmm2,%xmm14
4070	pshufb	.Lrol8(%rip),%xmm14
4071	paddd	%xmm14,%xmm10
4072	pxor	%xmm10,%xmm6
4073	movdqa	%xmm6,%xmm3
4074	pslld	$7,%xmm3
4075	psrld	$25,%xmm6
4076	pxor	%xmm3,%xmm6
4077.byte	102,15,58,15,246,12
4078.byte	102,69,15,58,15,210,8
4079.byte	102,69,15,58,15,246,4
4080
4081	decq	%r10
4082	jnz	.Lseal_sse_128_rounds
4083	paddd	.Lchacha20_consts(%rip),%xmm0
4084	paddd	.Lchacha20_consts(%rip),%xmm1
4085	paddd	.Lchacha20_consts(%rip),%xmm2
4086	paddd	%xmm7,%xmm4
4087	paddd	%xmm7,%xmm5
4088	paddd	%xmm7,%xmm6
4089	paddd	%xmm11,%xmm8
4090	paddd	%xmm11,%xmm9
4091	paddd	%xmm15,%xmm12
4092	paddd	.Lsse_inc(%rip),%xmm15
4093	paddd	%xmm15,%xmm13
4094
4095	pand	.Lclamp(%rip),%xmm2
4096	movdqa	%xmm2,0+0(%rbp)
4097	movdqa	%xmm6,0+16(%rbp)
4098
4099	movq	%r8,%r8
4100	call	poly_hash_ad_internal
4101	jmp	.Lseal_sse_128_tail_xor
4102.size	GFp_chacha20_poly1305_seal, .-GFp_chacha20_poly1305_seal
4103.cfi_endproc
4104
4105
4106.type	chacha20_poly1305_open_avx2,@function
4107.align	64
4108chacha20_poly1305_open_avx2:
4109.cfi_startproc
4110
4111
4112.cfi_adjust_cfa_offset	8
4113.cfi_offset	%rbp,-16
4114.cfi_adjust_cfa_offset	8
4115.cfi_offset	%rbx,-24
4116.cfi_adjust_cfa_offset	8
4117.cfi_offset	%r12,-32
4118.cfi_adjust_cfa_offset	8
4119.cfi_offset	%r13,-40
4120.cfi_adjust_cfa_offset	8
4121.cfi_offset	%r14,-48
4122.cfi_adjust_cfa_offset	8
4123.cfi_offset	%r15,-56
4124.cfi_adjust_cfa_offset	8
4125.cfi_offset	%r9,-64
4126.cfi_adjust_cfa_offset	288 + 32
4127
4128	vzeroupper
4129	vmovdqa	.Lchacha20_consts(%rip),%ymm0
4130	vbroadcasti128	0(%r9),%ymm4
4131	vbroadcasti128	16(%r9),%ymm8
4132	vbroadcasti128	32(%r9),%ymm12
4133	vpaddd	.Lavx2_init(%rip),%ymm12,%ymm12
4134	cmpq	$192,%rbx
4135	jbe	.Lopen_avx2_192
4136	cmpq	$320,%rbx
4137	jbe	.Lopen_avx2_320
4138
4139	vmovdqa	%ymm4,0+64(%rbp)
4140	vmovdqa	%ymm8,0+96(%rbp)
4141	vmovdqa	%ymm12,0+160(%rbp)
4142	movq	$10,%r10
4143.Lopen_avx2_init_rounds:
4144	vpaddd	%ymm4,%ymm0,%ymm0
4145	vpxor	%ymm0,%ymm12,%ymm12
4146	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4147	vpaddd	%ymm12,%ymm8,%ymm8
4148	vpxor	%ymm8,%ymm4,%ymm4
4149	vpsrld	$20,%ymm4,%ymm3
4150	vpslld	$12,%ymm4,%ymm4
4151	vpxor	%ymm3,%ymm4,%ymm4
4152	vpaddd	%ymm4,%ymm0,%ymm0
4153	vpxor	%ymm0,%ymm12,%ymm12
4154	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4155	vpaddd	%ymm12,%ymm8,%ymm8
4156	vpxor	%ymm8,%ymm4,%ymm4
4157	vpslld	$7,%ymm4,%ymm3
4158	vpsrld	$25,%ymm4,%ymm4
4159	vpxor	%ymm3,%ymm4,%ymm4
4160	vpalignr	$12,%ymm12,%ymm12,%ymm12
4161	vpalignr	$8,%ymm8,%ymm8,%ymm8
4162	vpalignr	$4,%ymm4,%ymm4,%ymm4
4163	vpaddd	%ymm4,%ymm0,%ymm0
4164	vpxor	%ymm0,%ymm12,%ymm12
4165	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4166	vpaddd	%ymm12,%ymm8,%ymm8
4167	vpxor	%ymm8,%ymm4,%ymm4
4168	vpsrld	$20,%ymm4,%ymm3
4169	vpslld	$12,%ymm4,%ymm4
4170	vpxor	%ymm3,%ymm4,%ymm4
4171	vpaddd	%ymm4,%ymm0,%ymm0
4172	vpxor	%ymm0,%ymm12,%ymm12
4173	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4174	vpaddd	%ymm12,%ymm8,%ymm8
4175	vpxor	%ymm8,%ymm4,%ymm4
4176	vpslld	$7,%ymm4,%ymm3
4177	vpsrld	$25,%ymm4,%ymm4
4178	vpxor	%ymm3,%ymm4,%ymm4
4179	vpalignr	$4,%ymm12,%ymm12,%ymm12
4180	vpalignr	$8,%ymm8,%ymm8,%ymm8
4181	vpalignr	$12,%ymm4,%ymm4,%ymm4
4182
4183	decq	%r10
4184	jne	.Lopen_avx2_init_rounds
4185	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
4186	vpaddd	0+64(%rbp),%ymm4,%ymm4
4187	vpaddd	0+96(%rbp),%ymm8,%ymm8
4188	vpaddd	0+160(%rbp),%ymm12,%ymm12
4189
4190	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
4191
4192	vpand	.Lclamp(%rip),%ymm3,%ymm3
4193	vmovdqa	%ymm3,0+0(%rbp)
4194
4195	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
4196	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
4197
4198	movq	%r8,%r8
4199	call	poly_hash_ad_internal
4200
4201	xorq	%rcx,%rcx
4202.Lopen_avx2_init_hash:
4203	addq	0+0(%rsi,%rcx,1),%r10
4204	adcq	8+0(%rsi,%rcx,1),%r11
4205	adcq	$1,%r12
4206	movq	0+0+0(%rbp),%rax
4207	movq	%rax,%r15
4208	mulq	%r10
4209	movq	%rax,%r13
4210	movq	%rdx,%r14
4211	movq	0+0+0(%rbp),%rax
4212	mulq	%r11
4213	imulq	%r12,%r15
4214	addq	%rax,%r14
4215	adcq	%rdx,%r15
4216	movq	8+0+0(%rbp),%rax
4217	movq	%rax,%r9
4218	mulq	%r10
4219	addq	%rax,%r14
4220	adcq	$0,%rdx
4221	movq	%rdx,%r10
4222	movq	8+0+0(%rbp),%rax
4223	mulq	%r11
4224	addq	%rax,%r15
4225	adcq	$0,%rdx
4226	imulq	%r12,%r9
4227	addq	%r10,%r15
4228	adcq	%rdx,%r9
4229	movq	%r13,%r10
4230	movq	%r14,%r11
4231	movq	%r15,%r12
4232	andq	$3,%r12
4233	movq	%r15,%r13
4234	andq	$-4,%r13
4235	movq	%r9,%r14
4236	shrdq	$2,%r9,%r15
4237	shrq	$2,%r9
4238	addq	%r13,%r15
4239	adcq	%r14,%r9
4240	addq	%r15,%r10
4241	adcq	%r9,%r11
4242	adcq	$0,%r12
4243
4244	addq	$16,%rcx
4245	cmpq	$64,%rcx
4246	jne	.Lopen_avx2_init_hash
4247
4248	vpxor	0(%rsi),%ymm0,%ymm0
4249	vpxor	32(%rsi),%ymm4,%ymm4
4250
4251	vmovdqu	%ymm0,0(%rdi)
4252	vmovdqu	%ymm4,32(%rdi)
4253	leaq	64(%rsi),%rsi
4254	leaq	64(%rdi),%rdi
4255	subq	$64,%rbx
4256.Lopen_avx2_main_loop:
4257
4258	cmpq	$512,%rbx
4259	jb	.Lopen_avx2_main_loop_done
4260	vmovdqa	.Lchacha20_consts(%rip),%ymm0
4261	vmovdqa	0+64(%rbp),%ymm4
4262	vmovdqa	0+96(%rbp),%ymm8
4263	vmovdqa	%ymm0,%ymm1
4264	vmovdqa	%ymm4,%ymm5
4265	vmovdqa	%ymm8,%ymm9
4266	vmovdqa	%ymm0,%ymm2
4267	vmovdqa	%ymm4,%ymm6
4268	vmovdqa	%ymm8,%ymm10
4269	vmovdqa	%ymm0,%ymm3
4270	vmovdqa	%ymm4,%ymm7
4271	vmovdqa	%ymm8,%ymm11
4272	vmovdqa	.Lavx2_inc(%rip),%ymm12
4273	vpaddd	0+160(%rbp),%ymm12,%ymm15
4274	vpaddd	%ymm15,%ymm12,%ymm14
4275	vpaddd	%ymm14,%ymm12,%ymm13
4276	vpaddd	%ymm13,%ymm12,%ymm12
4277	vmovdqa	%ymm15,0+256(%rbp)
4278	vmovdqa	%ymm14,0+224(%rbp)
4279	vmovdqa	%ymm13,0+192(%rbp)
4280	vmovdqa	%ymm12,0+160(%rbp)
4281
4282	xorq	%rcx,%rcx
4283.Lopen_avx2_main_loop_rounds:
4284	addq	0+0(%rsi,%rcx,1),%r10
4285	adcq	8+0(%rsi,%rcx,1),%r11
4286	adcq	$1,%r12
4287	vmovdqa	%ymm8,0+128(%rbp)
4288	vmovdqa	.Lrol16(%rip),%ymm8
4289	vpaddd	%ymm7,%ymm3,%ymm3
4290	vpaddd	%ymm6,%ymm2,%ymm2
4291	vpaddd	%ymm5,%ymm1,%ymm1
4292	vpaddd	%ymm4,%ymm0,%ymm0
4293	vpxor	%ymm3,%ymm15,%ymm15
4294	vpxor	%ymm2,%ymm14,%ymm14
4295	vpxor	%ymm1,%ymm13,%ymm13
4296	vpxor	%ymm0,%ymm12,%ymm12
4297	movq	0+0+0(%rbp),%rdx
4298	movq	%rdx,%r15
4299	mulxq	%r10,%r13,%r14
4300	mulxq	%r11,%rax,%rdx
4301	imulq	%r12,%r15
4302	addq	%rax,%r14
4303	adcq	%rdx,%r15
4304	vpshufb	%ymm8,%ymm15,%ymm15
4305	vpshufb	%ymm8,%ymm14,%ymm14
4306	vpshufb	%ymm8,%ymm13,%ymm13
4307	vpshufb	%ymm8,%ymm12,%ymm12
4308	vpaddd	%ymm15,%ymm11,%ymm11
4309	vpaddd	%ymm14,%ymm10,%ymm10
4310	vpaddd	%ymm13,%ymm9,%ymm9
4311	vpaddd	0+128(%rbp),%ymm12,%ymm8
4312	vpxor	%ymm11,%ymm7,%ymm7
4313	movq	8+0+0(%rbp),%rdx
4314	mulxq	%r10,%r10,%rax
4315	addq	%r10,%r14
4316	mulxq	%r11,%r11,%r9
4317	adcq	%r11,%r15
4318	adcq	$0,%r9
4319	imulq	%r12,%rdx
4320	vpxor	%ymm10,%ymm6,%ymm6
4321	vpxor	%ymm9,%ymm5,%ymm5
4322	vpxor	%ymm8,%ymm4,%ymm4
4323	vmovdqa	%ymm8,0+128(%rbp)
4324	vpsrld	$20,%ymm7,%ymm8
4325	vpslld	$32-20,%ymm7,%ymm7
4326	vpxor	%ymm8,%ymm7,%ymm7
4327	vpsrld	$20,%ymm6,%ymm8
4328	vpslld	$32-20,%ymm6,%ymm6
4329	vpxor	%ymm8,%ymm6,%ymm6
4330	vpsrld	$20,%ymm5,%ymm8
4331	vpslld	$32-20,%ymm5,%ymm5
4332	addq	%rax,%r15
4333	adcq	%rdx,%r9
4334	vpxor	%ymm8,%ymm5,%ymm5
4335	vpsrld	$20,%ymm4,%ymm8
4336	vpslld	$32-20,%ymm4,%ymm4
4337	vpxor	%ymm8,%ymm4,%ymm4
4338	vmovdqa	.Lrol8(%rip),%ymm8
4339	vpaddd	%ymm7,%ymm3,%ymm3
4340	vpaddd	%ymm6,%ymm2,%ymm2
4341	vpaddd	%ymm5,%ymm1,%ymm1
4342	vpaddd	%ymm4,%ymm0,%ymm0
4343	vpxor	%ymm3,%ymm15,%ymm15
4344	movq	%r13,%r10
4345	movq	%r14,%r11
4346	movq	%r15,%r12
4347	andq	$3,%r12
4348	movq	%r15,%r13
4349	andq	$-4,%r13
4350	movq	%r9,%r14
4351	shrdq	$2,%r9,%r15
4352	shrq	$2,%r9
4353	addq	%r13,%r15
4354	adcq	%r14,%r9
4355	addq	%r15,%r10
4356	adcq	%r9,%r11
4357	adcq	$0,%r12
4358	vpxor	%ymm2,%ymm14,%ymm14
4359	vpxor	%ymm1,%ymm13,%ymm13
4360	vpxor	%ymm0,%ymm12,%ymm12
4361	vpshufb	%ymm8,%ymm15,%ymm15
4362	vpshufb	%ymm8,%ymm14,%ymm14
4363	vpshufb	%ymm8,%ymm13,%ymm13
4364	vpshufb	%ymm8,%ymm12,%ymm12
4365	vpaddd	%ymm15,%ymm11,%ymm11
4366	vpaddd	%ymm14,%ymm10,%ymm10
4367	addq	0+16(%rsi,%rcx,1),%r10
4368	adcq	8+16(%rsi,%rcx,1),%r11
4369	adcq	$1,%r12
4370	vpaddd	%ymm13,%ymm9,%ymm9
4371	vpaddd	0+128(%rbp),%ymm12,%ymm8
4372	vpxor	%ymm11,%ymm7,%ymm7
4373	vpxor	%ymm10,%ymm6,%ymm6
4374	vpxor	%ymm9,%ymm5,%ymm5
4375	vpxor	%ymm8,%ymm4,%ymm4
4376	vmovdqa	%ymm8,0+128(%rbp)
4377	vpsrld	$25,%ymm7,%ymm8
4378	movq	0+0+0(%rbp),%rdx
4379	movq	%rdx,%r15
4380	mulxq	%r10,%r13,%r14
4381	mulxq	%r11,%rax,%rdx
4382	imulq	%r12,%r15
4383	addq	%rax,%r14
4384	adcq	%rdx,%r15
4385	vpslld	$32-25,%ymm7,%ymm7
4386	vpxor	%ymm8,%ymm7,%ymm7
4387	vpsrld	$25,%ymm6,%ymm8
4388	vpslld	$32-25,%ymm6,%ymm6
4389	vpxor	%ymm8,%ymm6,%ymm6
4390	vpsrld	$25,%ymm5,%ymm8
4391	vpslld	$32-25,%ymm5,%ymm5
4392	vpxor	%ymm8,%ymm5,%ymm5
4393	vpsrld	$25,%ymm4,%ymm8
4394	vpslld	$32-25,%ymm4,%ymm4
4395	vpxor	%ymm8,%ymm4,%ymm4
4396	vmovdqa	0+128(%rbp),%ymm8
4397	vpalignr	$4,%ymm7,%ymm7,%ymm7
4398	vpalignr	$8,%ymm11,%ymm11,%ymm11
4399	vpalignr	$12,%ymm15,%ymm15,%ymm15
4400	vpalignr	$4,%ymm6,%ymm6,%ymm6
4401	vpalignr	$8,%ymm10,%ymm10,%ymm10
4402	vpalignr	$12,%ymm14,%ymm14,%ymm14
4403	movq	8+0+0(%rbp),%rdx
4404	mulxq	%r10,%r10,%rax
4405	addq	%r10,%r14
4406	mulxq	%r11,%r11,%r9
4407	adcq	%r11,%r15
4408	adcq	$0,%r9
4409	imulq	%r12,%rdx
4410	vpalignr	$4,%ymm5,%ymm5,%ymm5
4411	vpalignr	$8,%ymm9,%ymm9,%ymm9
4412	vpalignr	$12,%ymm13,%ymm13,%ymm13
4413	vpalignr	$4,%ymm4,%ymm4,%ymm4
4414	vpalignr	$8,%ymm8,%ymm8,%ymm8
4415	vpalignr	$12,%ymm12,%ymm12,%ymm12
4416	vmovdqa	%ymm8,0+128(%rbp)
4417	vmovdqa	.Lrol16(%rip),%ymm8
4418	vpaddd	%ymm7,%ymm3,%ymm3
4419	vpaddd	%ymm6,%ymm2,%ymm2
4420	vpaddd	%ymm5,%ymm1,%ymm1
4421	vpaddd	%ymm4,%ymm0,%ymm0
4422	vpxor	%ymm3,%ymm15,%ymm15
4423	vpxor	%ymm2,%ymm14,%ymm14
4424	vpxor	%ymm1,%ymm13,%ymm13
4425	vpxor	%ymm0,%ymm12,%ymm12
4426	vpshufb	%ymm8,%ymm15,%ymm15
4427	vpshufb	%ymm8,%ymm14,%ymm14
4428	addq	%rax,%r15
4429	adcq	%rdx,%r9
4430	vpshufb	%ymm8,%ymm13,%ymm13
4431	vpshufb	%ymm8,%ymm12,%ymm12
4432	vpaddd	%ymm15,%ymm11,%ymm11
4433	vpaddd	%ymm14,%ymm10,%ymm10
4434	vpaddd	%ymm13,%ymm9,%ymm9
4435	vpaddd	0+128(%rbp),%ymm12,%ymm8
4436	vpxor	%ymm11,%ymm7,%ymm7
4437	vpxor	%ymm10,%ymm6,%ymm6
4438	vpxor	%ymm9,%ymm5,%ymm5
4439	movq	%r13,%r10
4440	movq	%r14,%r11
4441	movq	%r15,%r12
4442	andq	$3,%r12
4443	movq	%r15,%r13
4444	andq	$-4,%r13
4445	movq	%r9,%r14
4446	shrdq	$2,%r9,%r15
4447	shrq	$2,%r9
4448	addq	%r13,%r15
4449	adcq	%r14,%r9
4450	addq	%r15,%r10
4451	adcq	%r9,%r11
4452	adcq	$0,%r12
4453	vpxor	%ymm8,%ymm4,%ymm4
4454	vmovdqa	%ymm8,0+128(%rbp)
4455	vpsrld	$20,%ymm7,%ymm8
4456	vpslld	$32-20,%ymm7,%ymm7
4457	vpxor	%ymm8,%ymm7,%ymm7
4458	vpsrld	$20,%ymm6,%ymm8
4459	vpslld	$32-20,%ymm6,%ymm6
4460	vpxor	%ymm8,%ymm6,%ymm6
4461	addq	0+32(%rsi,%rcx,1),%r10
4462	adcq	8+32(%rsi,%rcx,1),%r11
4463	adcq	$1,%r12
4464
4465	leaq	48(%rcx),%rcx
4466	vpsrld	$20,%ymm5,%ymm8
4467	vpslld	$32-20,%ymm5,%ymm5
4468	vpxor	%ymm8,%ymm5,%ymm5
4469	vpsrld	$20,%ymm4,%ymm8
4470	vpslld	$32-20,%ymm4,%ymm4
4471	vpxor	%ymm8,%ymm4,%ymm4
4472	vmovdqa	.Lrol8(%rip),%ymm8
4473	vpaddd	%ymm7,%ymm3,%ymm3
4474	vpaddd	%ymm6,%ymm2,%ymm2
4475	vpaddd	%ymm5,%ymm1,%ymm1
4476	vpaddd	%ymm4,%ymm0,%ymm0
4477	vpxor	%ymm3,%ymm15,%ymm15
4478	vpxor	%ymm2,%ymm14,%ymm14
4479	vpxor	%ymm1,%ymm13,%ymm13
4480	vpxor	%ymm0,%ymm12,%ymm12
4481	vpshufb	%ymm8,%ymm15,%ymm15
4482	vpshufb	%ymm8,%ymm14,%ymm14
4483	vpshufb	%ymm8,%ymm13,%ymm13
4484	movq	0+0+0(%rbp),%rdx
4485	movq	%rdx,%r15
4486	mulxq	%r10,%r13,%r14
4487	mulxq	%r11,%rax,%rdx
4488	imulq	%r12,%r15
4489	addq	%rax,%r14
4490	adcq	%rdx,%r15
4491	vpshufb	%ymm8,%ymm12,%ymm12
4492	vpaddd	%ymm15,%ymm11,%ymm11
4493	vpaddd	%ymm14,%ymm10,%ymm10
4494	vpaddd	%ymm13,%ymm9,%ymm9
4495	vpaddd	0+128(%rbp),%ymm12,%ymm8
4496	vpxor	%ymm11,%ymm7,%ymm7
4497	vpxor	%ymm10,%ymm6,%ymm6
4498	vpxor	%ymm9,%ymm5,%ymm5
4499	movq	8+0+0(%rbp),%rdx
4500	mulxq	%r10,%r10,%rax
4501	addq	%r10,%r14
4502	mulxq	%r11,%r11,%r9
4503	adcq	%r11,%r15
4504	adcq	$0,%r9
4505	imulq	%r12,%rdx
4506	vpxor	%ymm8,%ymm4,%ymm4
4507	vmovdqa	%ymm8,0+128(%rbp)
4508	vpsrld	$25,%ymm7,%ymm8
4509	vpslld	$32-25,%ymm7,%ymm7
4510	vpxor	%ymm8,%ymm7,%ymm7
4511	vpsrld	$25,%ymm6,%ymm8
4512	vpslld	$32-25,%ymm6,%ymm6
4513	vpxor	%ymm8,%ymm6,%ymm6
4514	addq	%rax,%r15
4515	adcq	%rdx,%r9
4516	vpsrld	$25,%ymm5,%ymm8
4517	vpslld	$32-25,%ymm5,%ymm5
4518	vpxor	%ymm8,%ymm5,%ymm5
4519	vpsrld	$25,%ymm4,%ymm8
4520	vpslld	$32-25,%ymm4,%ymm4
4521	vpxor	%ymm8,%ymm4,%ymm4
4522	vmovdqa	0+128(%rbp),%ymm8
4523	vpalignr	$12,%ymm7,%ymm7,%ymm7
4524	vpalignr	$8,%ymm11,%ymm11,%ymm11
4525	vpalignr	$4,%ymm15,%ymm15,%ymm15
4526	vpalignr	$12,%ymm6,%ymm6,%ymm6
4527	vpalignr	$8,%ymm10,%ymm10,%ymm10
4528	vpalignr	$4,%ymm14,%ymm14,%ymm14
4529	vpalignr	$12,%ymm5,%ymm5,%ymm5
4530	vpalignr	$8,%ymm9,%ymm9,%ymm9
4531	vpalignr	$4,%ymm13,%ymm13,%ymm13
4532	vpalignr	$12,%ymm4,%ymm4,%ymm4
4533	vpalignr	$8,%ymm8,%ymm8,%ymm8
4534	movq	%r13,%r10
4535	movq	%r14,%r11
4536	movq	%r15,%r12
4537	andq	$3,%r12
4538	movq	%r15,%r13
4539	andq	$-4,%r13
4540	movq	%r9,%r14
4541	shrdq	$2,%r9,%r15
4542	shrq	$2,%r9
4543	addq	%r13,%r15
4544	adcq	%r14,%r9
4545	addq	%r15,%r10
4546	adcq	%r9,%r11
4547	adcq	$0,%r12
4548	vpalignr	$4,%ymm12,%ymm12,%ymm12
4549
4550	cmpq	$60*8,%rcx
4551	jne	.Lopen_avx2_main_loop_rounds
4552	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
4553	vpaddd	0+64(%rbp),%ymm7,%ymm7
4554	vpaddd	0+96(%rbp),%ymm11,%ymm11
4555	vpaddd	0+256(%rbp),%ymm15,%ymm15
4556	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
4557	vpaddd	0+64(%rbp),%ymm6,%ymm6
4558	vpaddd	0+96(%rbp),%ymm10,%ymm10
4559	vpaddd	0+224(%rbp),%ymm14,%ymm14
4560	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
4561	vpaddd	0+64(%rbp),%ymm5,%ymm5
4562	vpaddd	0+96(%rbp),%ymm9,%ymm9
4563	vpaddd	0+192(%rbp),%ymm13,%ymm13
4564	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
4565	vpaddd	0+64(%rbp),%ymm4,%ymm4
4566	vpaddd	0+96(%rbp),%ymm8,%ymm8
4567	vpaddd	0+160(%rbp),%ymm12,%ymm12
4568
4569	vmovdqa	%ymm0,0+128(%rbp)
4570	addq	0+60*8(%rsi),%r10
4571	adcq	8+60*8(%rsi),%r11
4572	adcq	$1,%r12
4573	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
4574	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
4575	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
4576	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
4577	vpxor	0+0(%rsi),%ymm0,%ymm0
4578	vpxor	32+0(%rsi),%ymm3,%ymm3
4579	vpxor	64+0(%rsi),%ymm7,%ymm7
4580	vpxor	96+0(%rsi),%ymm11,%ymm11
4581	vmovdqu	%ymm0,0+0(%rdi)
4582	vmovdqu	%ymm3,32+0(%rdi)
4583	vmovdqu	%ymm7,64+0(%rdi)
4584	vmovdqu	%ymm11,96+0(%rdi)
4585
4586	vmovdqa	0+128(%rbp),%ymm0
4587	movq	0+0+0(%rbp),%rax
4588	movq	%rax,%r15
4589	mulq	%r10
4590	movq	%rax,%r13
4591	movq	%rdx,%r14
4592	movq	0+0+0(%rbp),%rax
4593	mulq	%r11
4594	imulq	%r12,%r15
4595	addq	%rax,%r14
4596	adcq	%rdx,%r15
4597	movq	8+0+0(%rbp),%rax
4598	movq	%rax,%r9
4599	mulq	%r10
4600	addq	%rax,%r14
4601	adcq	$0,%rdx
4602	movq	%rdx,%r10
4603	movq	8+0+0(%rbp),%rax
4604	mulq	%r11
4605	addq	%rax,%r15
4606	adcq	$0,%rdx
4607	imulq	%r12,%r9
4608	addq	%r10,%r15
4609	adcq	%rdx,%r9
4610	movq	%r13,%r10
4611	movq	%r14,%r11
4612	movq	%r15,%r12
4613	andq	$3,%r12
4614	movq	%r15,%r13
4615	andq	$-4,%r13
4616	movq	%r9,%r14
4617	shrdq	$2,%r9,%r15
4618	shrq	$2,%r9
4619	addq	%r13,%r15
4620	adcq	%r14,%r9
4621	addq	%r15,%r10
4622	adcq	%r9,%r11
4623	adcq	$0,%r12
4624	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
4625	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
4626	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
4627	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
4628	vpxor	0+128(%rsi),%ymm3,%ymm3
4629	vpxor	32+128(%rsi),%ymm2,%ymm2
4630	vpxor	64+128(%rsi),%ymm6,%ymm6
4631	vpxor	96+128(%rsi),%ymm10,%ymm10
4632	vmovdqu	%ymm3,0+128(%rdi)
4633	vmovdqu	%ymm2,32+128(%rdi)
4634	vmovdqu	%ymm6,64+128(%rdi)
4635	vmovdqu	%ymm10,96+128(%rdi)
4636	addq	0+60*8+16(%rsi),%r10
4637	adcq	8+60*8+16(%rsi),%r11
4638	adcq	$1,%r12
4639	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
4640	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
4641	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
4642	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
4643	vpxor	0+256(%rsi),%ymm3,%ymm3
4644	vpxor	32+256(%rsi),%ymm1,%ymm1
4645	vpxor	64+256(%rsi),%ymm5,%ymm5
4646	vpxor	96+256(%rsi),%ymm9,%ymm9
4647	vmovdqu	%ymm3,0+256(%rdi)
4648	vmovdqu	%ymm1,32+256(%rdi)
4649	vmovdqu	%ymm5,64+256(%rdi)
4650	vmovdqu	%ymm9,96+256(%rdi)
4651	movq	0+0+0(%rbp),%rax
4652	movq	%rax,%r15
4653	mulq	%r10
4654	movq	%rax,%r13
4655	movq	%rdx,%r14
4656	movq	0+0+0(%rbp),%rax
4657	mulq	%r11
4658	imulq	%r12,%r15
4659	addq	%rax,%r14
4660	adcq	%rdx,%r15
4661	movq	8+0+0(%rbp),%rax
4662	movq	%rax,%r9
4663	mulq	%r10
4664	addq	%rax,%r14
4665	adcq	$0,%rdx
4666	movq	%rdx,%r10
4667	movq	8+0+0(%rbp),%rax
4668	mulq	%r11
4669	addq	%rax,%r15
4670	adcq	$0,%rdx
4671	imulq	%r12,%r9
4672	addq	%r10,%r15
4673	adcq	%rdx,%r9
4674	movq	%r13,%r10
4675	movq	%r14,%r11
4676	movq	%r15,%r12
4677	andq	$3,%r12
4678	movq	%r15,%r13
4679	andq	$-4,%r13
4680	movq	%r9,%r14
4681	shrdq	$2,%r9,%r15
4682	shrq	$2,%r9
4683	addq	%r13,%r15
4684	adcq	%r14,%r9
4685	addq	%r15,%r10
4686	adcq	%r9,%r11
4687	adcq	$0,%r12
4688	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
4689	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
4690	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
4691	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
4692	vpxor	0+384(%rsi),%ymm3,%ymm3
4693	vpxor	32+384(%rsi),%ymm0,%ymm0
4694	vpxor	64+384(%rsi),%ymm4,%ymm4
4695	vpxor	96+384(%rsi),%ymm8,%ymm8
4696	vmovdqu	%ymm3,0+384(%rdi)
4697	vmovdqu	%ymm0,32+384(%rdi)
4698	vmovdqu	%ymm4,64+384(%rdi)
4699	vmovdqu	%ymm8,96+384(%rdi)
4700
4701	leaq	512(%rsi),%rsi
4702	leaq	512(%rdi),%rdi
4703	subq	$512,%rbx
4704	jmp	.Lopen_avx2_main_loop
4705.Lopen_avx2_main_loop_done:
4706	testq	%rbx,%rbx
4707	vzeroupper
4708	je	.Lopen_sse_finalize
4709
4710	cmpq	$384,%rbx
4711	ja	.Lopen_avx2_tail_512
4712	cmpq	$256,%rbx
4713	ja	.Lopen_avx2_tail_384
4714	cmpq	$128,%rbx
4715	ja	.Lopen_avx2_tail_256
4716	vmovdqa	.Lchacha20_consts(%rip),%ymm0
4717	vmovdqa	0+64(%rbp),%ymm4
4718	vmovdqa	0+96(%rbp),%ymm8
4719	vmovdqa	.Lavx2_inc(%rip),%ymm12
4720	vpaddd	0+160(%rbp),%ymm12,%ymm12
4721	vmovdqa	%ymm12,0+160(%rbp)
4722
4723	xorq	%r8,%r8
4724	movq	%rbx,%rcx
4725	andq	$-16,%rcx
4726	testq	%rcx,%rcx
4727	je	.Lopen_avx2_tail_128_rounds
4728.Lopen_avx2_tail_128_rounds_and_x1hash:
4729	addq	0+0(%rsi,%r8,1),%r10
4730	adcq	8+0(%rsi,%r8,1),%r11
4731	adcq	$1,%r12
4732	movq	0+0+0(%rbp),%rax
4733	movq	%rax,%r15
4734	mulq	%r10
4735	movq	%rax,%r13
4736	movq	%rdx,%r14
4737	movq	0+0+0(%rbp),%rax
4738	mulq	%r11
4739	imulq	%r12,%r15
4740	addq	%rax,%r14
4741	adcq	%rdx,%r15
4742	movq	8+0+0(%rbp),%rax
4743	movq	%rax,%r9
4744	mulq	%r10
4745	addq	%rax,%r14
4746	adcq	$0,%rdx
4747	movq	%rdx,%r10
4748	movq	8+0+0(%rbp),%rax
4749	mulq	%r11
4750	addq	%rax,%r15
4751	adcq	$0,%rdx
4752	imulq	%r12,%r9
4753	addq	%r10,%r15
4754	adcq	%rdx,%r9
4755	movq	%r13,%r10
4756	movq	%r14,%r11
4757	movq	%r15,%r12
4758	andq	$3,%r12
4759	movq	%r15,%r13
4760	andq	$-4,%r13
4761	movq	%r9,%r14
4762	shrdq	$2,%r9,%r15
4763	shrq	$2,%r9
4764	addq	%r13,%r15
4765	adcq	%r14,%r9
4766	addq	%r15,%r10
4767	adcq	%r9,%r11
4768	adcq	$0,%r12
4769
4770.Lopen_avx2_tail_128_rounds:
4771	addq	$16,%r8
4772	vpaddd	%ymm4,%ymm0,%ymm0
4773	vpxor	%ymm0,%ymm12,%ymm12
4774	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4775	vpaddd	%ymm12,%ymm8,%ymm8
4776	vpxor	%ymm8,%ymm4,%ymm4
4777	vpsrld	$20,%ymm4,%ymm3
4778	vpslld	$12,%ymm4,%ymm4
4779	vpxor	%ymm3,%ymm4,%ymm4
4780	vpaddd	%ymm4,%ymm0,%ymm0
4781	vpxor	%ymm0,%ymm12,%ymm12
4782	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4783	vpaddd	%ymm12,%ymm8,%ymm8
4784	vpxor	%ymm8,%ymm4,%ymm4
4785	vpslld	$7,%ymm4,%ymm3
4786	vpsrld	$25,%ymm4,%ymm4
4787	vpxor	%ymm3,%ymm4,%ymm4
4788	vpalignr	$12,%ymm12,%ymm12,%ymm12
4789	vpalignr	$8,%ymm8,%ymm8,%ymm8
4790	vpalignr	$4,%ymm4,%ymm4,%ymm4
4791	vpaddd	%ymm4,%ymm0,%ymm0
4792	vpxor	%ymm0,%ymm12,%ymm12
4793	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4794	vpaddd	%ymm12,%ymm8,%ymm8
4795	vpxor	%ymm8,%ymm4,%ymm4
4796	vpsrld	$20,%ymm4,%ymm3
4797	vpslld	$12,%ymm4,%ymm4
4798	vpxor	%ymm3,%ymm4,%ymm4
4799	vpaddd	%ymm4,%ymm0,%ymm0
4800	vpxor	%ymm0,%ymm12,%ymm12
4801	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4802	vpaddd	%ymm12,%ymm8,%ymm8
4803	vpxor	%ymm8,%ymm4,%ymm4
4804	vpslld	$7,%ymm4,%ymm3
4805	vpsrld	$25,%ymm4,%ymm4
4806	vpxor	%ymm3,%ymm4,%ymm4
4807	vpalignr	$4,%ymm12,%ymm12,%ymm12
4808	vpalignr	$8,%ymm8,%ymm8,%ymm8
4809	vpalignr	$12,%ymm4,%ymm4,%ymm4
4810
4811	cmpq	%rcx,%r8
4812	jb	.Lopen_avx2_tail_128_rounds_and_x1hash
4813	cmpq	$160,%r8
4814	jne	.Lopen_avx2_tail_128_rounds
4815	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
4816	vpaddd	0+64(%rbp),%ymm4,%ymm4
4817	vpaddd	0+96(%rbp),%ymm8,%ymm8
4818	vpaddd	0+160(%rbp),%ymm12,%ymm12
4819	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
4820	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
4821	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
4822	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
4823	vmovdqa	%ymm3,%ymm8
4824
4825	jmp	.Lopen_avx2_tail_128_xor
4826
4827.Lopen_avx2_tail_256:
4828	vmovdqa	.Lchacha20_consts(%rip),%ymm0
4829	vmovdqa	0+64(%rbp),%ymm4
4830	vmovdqa	0+96(%rbp),%ymm8
4831	vmovdqa	%ymm0,%ymm1
4832	vmovdqa	%ymm4,%ymm5
4833	vmovdqa	%ymm8,%ymm9
4834	vmovdqa	.Lavx2_inc(%rip),%ymm12
4835	vpaddd	0+160(%rbp),%ymm12,%ymm13
4836	vpaddd	%ymm13,%ymm12,%ymm12
4837	vmovdqa	%ymm12,0+160(%rbp)
4838	vmovdqa	%ymm13,0+192(%rbp)
4839
4840	movq	%rbx,0+128(%rbp)
4841	movq	%rbx,%rcx
4842	subq	$128,%rcx
4843	shrq	$4,%rcx
4844	movq	$10,%r8
4845	cmpq	$10,%rcx
4846	cmovgq	%r8,%rcx
4847	movq	%rsi,%rbx
4848	xorq	%r8,%r8
4849.Lopen_avx2_tail_256_rounds_and_x1hash:
4850	addq	0+0(%rbx),%r10
4851	adcq	8+0(%rbx),%r11
4852	adcq	$1,%r12
4853	movq	0+0+0(%rbp),%rdx
4854	movq	%rdx,%r15
4855	mulxq	%r10,%r13,%r14
4856	mulxq	%r11,%rax,%rdx
4857	imulq	%r12,%r15
4858	addq	%rax,%r14
4859	adcq	%rdx,%r15
4860	movq	8+0+0(%rbp),%rdx
4861	mulxq	%r10,%r10,%rax
4862	addq	%r10,%r14
4863	mulxq	%r11,%r11,%r9
4864	adcq	%r11,%r15
4865	adcq	$0,%r9
4866	imulq	%r12,%rdx
4867	addq	%rax,%r15
4868	adcq	%rdx,%r9
4869	movq	%r13,%r10
4870	movq	%r14,%r11
4871	movq	%r15,%r12
4872	andq	$3,%r12
4873	movq	%r15,%r13
4874	andq	$-4,%r13
4875	movq	%r9,%r14
4876	shrdq	$2,%r9,%r15
4877	shrq	$2,%r9
4878	addq	%r13,%r15
4879	adcq	%r14,%r9
4880	addq	%r15,%r10
4881	adcq	%r9,%r11
4882	adcq	$0,%r12
4883
4884	leaq	16(%rbx),%rbx
4885.Lopen_avx2_tail_256_rounds:
4886	vpaddd	%ymm4,%ymm0,%ymm0
4887	vpxor	%ymm0,%ymm12,%ymm12
4888	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4889	vpaddd	%ymm12,%ymm8,%ymm8
4890	vpxor	%ymm8,%ymm4,%ymm4
4891	vpsrld	$20,%ymm4,%ymm3
4892	vpslld	$12,%ymm4,%ymm4
4893	vpxor	%ymm3,%ymm4,%ymm4
4894	vpaddd	%ymm4,%ymm0,%ymm0
4895	vpxor	%ymm0,%ymm12,%ymm12
4896	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4897	vpaddd	%ymm12,%ymm8,%ymm8
4898	vpxor	%ymm8,%ymm4,%ymm4
4899	vpslld	$7,%ymm4,%ymm3
4900	vpsrld	$25,%ymm4,%ymm4
4901	vpxor	%ymm3,%ymm4,%ymm4
4902	vpalignr	$12,%ymm12,%ymm12,%ymm12
4903	vpalignr	$8,%ymm8,%ymm8,%ymm8
4904	vpalignr	$4,%ymm4,%ymm4,%ymm4
4905	vpaddd	%ymm5,%ymm1,%ymm1
4906	vpxor	%ymm1,%ymm13,%ymm13
4907	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
4908	vpaddd	%ymm13,%ymm9,%ymm9
4909	vpxor	%ymm9,%ymm5,%ymm5
4910	vpsrld	$20,%ymm5,%ymm3
4911	vpslld	$12,%ymm5,%ymm5
4912	vpxor	%ymm3,%ymm5,%ymm5
4913	vpaddd	%ymm5,%ymm1,%ymm1
4914	vpxor	%ymm1,%ymm13,%ymm13
4915	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
4916	vpaddd	%ymm13,%ymm9,%ymm9
4917	vpxor	%ymm9,%ymm5,%ymm5
4918	vpslld	$7,%ymm5,%ymm3
4919	vpsrld	$25,%ymm5,%ymm5
4920	vpxor	%ymm3,%ymm5,%ymm5
4921	vpalignr	$12,%ymm13,%ymm13,%ymm13
4922	vpalignr	$8,%ymm9,%ymm9,%ymm9
4923	vpalignr	$4,%ymm5,%ymm5,%ymm5
4924
4925	incq	%r8
4926	vpaddd	%ymm4,%ymm0,%ymm0
4927	vpxor	%ymm0,%ymm12,%ymm12
4928	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
4929	vpaddd	%ymm12,%ymm8,%ymm8
4930	vpxor	%ymm8,%ymm4,%ymm4
4931	vpsrld	$20,%ymm4,%ymm3
4932	vpslld	$12,%ymm4,%ymm4
4933	vpxor	%ymm3,%ymm4,%ymm4
4934	vpaddd	%ymm4,%ymm0,%ymm0
4935	vpxor	%ymm0,%ymm12,%ymm12
4936	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
4937	vpaddd	%ymm12,%ymm8,%ymm8
4938	vpxor	%ymm8,%ymm4,%ymm4
4939	vpslld	$7,%ymm4,%ymm3
4940	vpsrld	$25,%ymm4,%ymm4
4941	vpxor	%ymm3,%ymm4,%ymm4
4942	vpalignr	$4,%ymm12,%ymm12,%ymm12
4943	vpalignr	$8,%ymm8,%ymm8,%ymm8
4944	vpalignr	$12,%ymm4,%ymm4,%ymm4
4945	vpaddd	%ymm5,%ymm1,%ymm1
4946	vpxor	%ymm1,%ymm13,%ymm13
4947	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
4948	vpaddd	%ymm13,%ymm9,%ymm9
4949	vpxor	%ymm9,%ymm5,%ymm5
4950	vpsrld	$20,%ymm5,%ymm3
4951	vpslld	$12,%ymm5,%ymm5
4952	vpxor	%ymm3,%ymm5,%ymm5
4953	vpaddd	%ymm5,%ymm1,%ymm1
4954	vpxor	%ymm1,%ymm13,%ymm13
4955	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
4956	vpaddd	%ymm13,%ymm9,%ymm9
4957	vpxor	%ymm9,%ymm5,%ymm5
4958	vpslld	$7,%ymm5,%ymm3
4959	vpsrld	$25,%ymm5,%ymm5
4960	vpxor	%ymm3,%ymm5,%ymm5
4961	vpalignr	$4,%ymm13,%ymm13,%ymm13
4962	vpalignr	$8,%ymm9,%ymm9,%ymm9
4963	vpalignr	$12,%ymm5,%ymm5,%ymm5
4964	vpaddd	%ymm6,%ymm2,%ymm2
4965	vpxor	%ymm2,%ymm14,%ymm14
4966	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
4967	vpaddd	%ymm14,%ymm10,%ymm10
4968	vpxor	%ymm10,%ymm6,%ymm6
4969	vpsrld	$20,%ymm6,%ymm3
4970	vpslld	$12,%ymm6,%ymm6
4971	vpxor	%ymm3,%ymm6,%ymm6
4972	vpaddd	%ymm6,%ymm2,%ymm2
4973	vpxor	%ymm2,%ymm14,%ymm14
4974	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
4975	vpaddd	%ymm14,%ymm10,%ymm10
4976	vpxor	%ymm10,%ymm6,%ymm6
4977	vpslld	$7,%ymm6,%ymm3
4978	vpsrld	$25,%ymm6,%ymm6
4979	vpxor	%ymm3,%ymm6,%ymm6
4980	vpalignr	$4,%ymm14,%ymm14,%ymm14
4981	vpalignr	$8,%ymm10,%ymm10,%ymm10
4982	vpalignr	$12,%ymm6,%ymm6,%ymm6
4983
4984	cmpq	%rcx,%r8
4985	jb	.Lopen_avx2_tail_256_rounds_and_x1hash
4986	cmpq	$10,%r8
4987	jne	.Lopen_avx2_tail_256_rounds
4988	movq	%rbx,%r8
4989	subq	%rsi,%rbx
4990	movq	%rbx,%rcx
4991	movq	0+128(%rbp),%rbx
4992.Lopen_avx2_tail_256_hash:
4993	addq	$16,%rcx
4994	cmpq	%rbx,%rcx
4995	jg	.Lopen_avx2_tail_256_done
4996	addq	0+0(%r8),%r10
4997	adcq	8+0(%r8),%r11
4998	adcq	$1,%r12
4999	movq	0+0+0(%rbp),%rdx
5000	movq	%rdx,%r15
5001	mulxq	%r10,%r13,%r14
5002	mulxq	%r11,%rax,%rdx
5003	imulq	%r12,%r15
5004	addq	%rax,%r14
5005	adcq	%rdx,%r15
5006	movq	8+0+0(%rbp),%rdx
5007	mulxq	%r10,%r10,%rax
5008	addq	%r10,%r14
5009	mulxq	%r11,%r11,%r9
5010	adcq	%r11,%r15
5011	adcq	$0,%r9
5012	imulq	%r12,%rdx
5013	addq	%rax,%r15
5014	adcq	%rdx,%r9
5015	movq	%r13,%r10
5016	movq	%r14,%r11
5017	movq	%r15,%r12
5018	andq	$3,%r12
5019	movq	%r15,%r13
5020	andq	$-4,%r13
5021	movq	%r9,%r14
5022	shrdq	$2,%r9,%r15
5023	shrq	$2,%r9
5024	addq	%r13,%r15
5025	adcq	%r14,%r9
5026	addq	%r15,%r10
5027	adcq	%r9,%r11
5028	adcq	$0,%r12
5029
5030	leaq	16(%r8),%r8
5031	jmp	.Lopen_avx2_tail_256_hash
5032.Lopen_avx2_tail_256_done:
5033	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
5034	vpaddd	0+64(%rbp),%ymm5,%ymm5
5035	vpaddd	0+96(%rbp),%ymm9,%ymm9
5036	vpaddd	0+192(%rbp),%ymm13,%ymm13
5037	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
5038	vpaddd	0+64(%rbp),%ymm4,%ymm4
5039	vpaddd	0+96(%rbp),%ymm8,%ymm8
5040	vpaddd	0+160(%rbp),%ymm12,%ymm12
5041	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5042	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5043	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5044	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5045	vpxor	0+0(%rsi),%ymm3,%ymm3
5046	vpxor	32+0(%rsi),%ymm1,%ymm1
5047	vpxor	64+0(%rsi),%ymm5,%ymm5
5048	vpxor	96+0(%rsi),%ymm9,%ymm9
5049	vmovdqu	%ymm3,0+0(%rdi)
5050	vmovdqu	%ymm1,32+0(%rdi)
5051	vmovdqu	%ymm5,64+0(%rdi)
5052	vmovdqu	%ymm9,96+0(%rdi)
5053	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5054	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5055	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5056	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5057	vmovdqa	%ymm3,%ymm8
5058
5059	leaq	128(%rsi),%rsi
5060	leaq	128(%rdi),%rdi
5061	subq	$128,%rbx
5062	jmp	.Lopen_avx2_tail_128_xor
5063
5064.Lopen_avx2_tail_384:
5065	vmovdqa	.Lchacha20_consts(%rip),%ymm0
5066	vmovdqa	0+64(%rbp),%ymm4
5067	vmovdqa	0+96(%rbp),%ymm8
5068	vmovdqa	%ymm0,%ymm1
5069	vmovdqa	%ymm4,%ymm5
5070	vmovdqa	%ymm8,%ymm9
5071	vmovdqa	%ymm0,%ymm2
5072	vmovdqa	%ymm4,%ymm6
5073	vmovdqa	%ymm8,%ymm10
5074	vmovdqa	.Lavx2_inc(%rip),%ymm12
5075	vpaddd	0+160(%rbp),%ymm12,%ymm14
5076	vpaddd	%ymm14,%ymm12,%ymm13
5077	vpaddd	%ymm13,%ymm12,%ymm12
5078	vmovdqa	%ymm12,0+160(%rbp)
5079	vmovdqa	%ymm13,0+192(%rbp)
5080	vmovdqa	%ymm14,0+224(%rbp)
5081
5082	movq	%rbx,0+128(%rbp)
5083	movq	%rbx,%rcx
5084	subq	$256,%rcx
5085	shrq	$4,%rcx
5086	addq	$6,%rcx
5087	movq	$10,%r8
5088	cmpq	$10,%rcx
5089	cmovgq	%r8,%rcx
5090	movq	%rsi,%rbx
5091	xorq	%r8,%r8
5092.Lopen_avx2_tail_384_rounds_and_x2hash:
5093	addq	0+0(%rbx),%r10
5094	adcq	8+0(%rbx),%r11
5095	adcq	$1,%r12
5096	movq	0+0+0(%rbp),%rdx
5097	movq	%rdx,%r15
5098	mulxq	%r10,%r13,%r14
5099	mulxq	%r11,%rax,%rdx
5100	imulq	%r12,%r15
5101	addq	%rax,%r14
5102	adcq	%rdx,%r15
5103	movq	8+0+0(%rbp),%rdx
5104	mulxq	%r10,%r10,%rax
5105	addq	%r10,%r14
5106	mulxq	%r11,%r11,%r9
5107	adcq	%r11,%r15
5108	adcq	$0,%r9
5109	imulq	%r12,%rdx
5110	addq	%rax,%r15
5111	adcq	%rdx,%r9
5112	movq	%r13,%r10
5113	movq	%r14,%r11
5114	movq	%r15,%r12
5115	andq	$3,%r12
5116	movq	%r15,%r13
5117	andq	$-4,%r13
5118	movq	%r9,%r14
5119	shrdq	$2,%r9,%r15
5120	shrq	$2,%r9
5121	addq	%r13,%r15
5122	adcq	%r14,%r9
5123	addq	%r15,%r10
5124	adcq	%r9,%r11
5125	adcq	$0,%r12
5126
5127	leaq	16(%rbx),%rbx
5128.Lopen_avx2_tail_384_rounds_and_x1hash:
5129	vpaddd	%ymm6,%ymm2,%ymm2
5130	vpxor	%ymm2,%ymm14,%ymm14
5131	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
5132	vpaddd	%ymm14,%ymm10,%ymm10
5133	vpxor	%ymm10,%ymm6,%ymm6
5134	vpsrld	$20,%ymm6,%ymm3
5135	vpslld	$12,%ymm6,%ymm6
5136	vpxor	%ymm3,%ymm6,%ymm6
5137	vpaddd	%ymm6,%ymm2,%ymm2
5138	vpxor	%ymm2,%ymm14,%ymm14
5139	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
5140	vpaddd	%ymm14,%ymm10,%ymm10
5141	vpxor	%ymm10,%ymm6,%ymm6
5142	vpslld	$7,%ymm6,%ymm3
5143	vpsrld	$25,%ymm6,%ymm6
5144	vpxor	%ymm3,%ymm6,%ymm6
5145	vpalignr	$12,%ymm14,%ymm14,%ymm14
5146	vpalignr	$8,%ymm10,%ymm10,%ymm10
5147	vpalignr	$4,%ymm6,%ymm6,%ymm6
5148	vpaddd	%ymm5,%ymm1,%ymm1
5149	vpxor	%ymm1,%ymm13,%ymm13
5150	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
5151	vpaddd	%ymm13,%ymm9,%ymm9
5152	vpxor	%ymm9,%ymm5,%ymm5
5153	vpsrld	$20,%ymm5,%ymm3
5154	vpslld	$12,%ymm5,%ymm5
5155	vpxor	%ymm3,%ymm5,%ymm5
5156	vpaddd	%ymm5,%ymm1,%ymm1
5157	vpxor	%ymm1,%ymm13,%ymm13
5158	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
5159	vpaddd	%ymm13,%ymm9,%ymm9
5160	vpxor	%ymm9,%ymm5,%ymm5
5161	vpslld	$7,%ymm5,%ymm3
5162	vpsrld	$25,%ymm5,%ymm5
5163	vpxor	%ymm3,%ymm5,%ymm5
5164	vpalignr	$12,%ymm13,%ymm13,%ymm13
5165	vpalignr	$8,%ymm9,%ymm9,%ymm9
5166	vpalignr	$4,%ymm5,%ymm5,%ymm5
5167	vpaddd	%ymm4,%ymm0,%ymm0
5168	vpxor	%ymm0,%ymm12,%ymm12
5169	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
5170	vpaddd	%ymm12,%ymm8,%ymm8
5171	vpxor	%ymm8,%ymm4,%ymm4
5172	vpsrld	$20,%ymm4,%ymm3
5173	vpslld	$12,%ymm4,%ymm4
5174	vpxor	%ymm3,%ymm4,%ymm4
5175	vpaddd	%ymm4,%ymm0,%ymm0
5176	vpxor	%ymm0,%ymm12,%ymm12
5177	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
5178	vpaddd	%ymm12,%ymm8,%ymm8
5179	vpxor	%ymm8,%ymm4,%ymm4
5180	vpslld	$7,%ymm4,%ymm3
5181	vpsrld	$25,%ymm4,%ymm4
5182	vpxor	%ymm3,%ymm4,%ymm4
5183	vpalignr	$12,%ymm12,%ymm12,%ymm12
5184	vpalignr	$8,%ymm8,%ymm8,%ymm8
5185	vpalignr	$4,%ymm4,%ymm4,%ymm4
5186	addq	0+0(%rbx),%r10
5187	adcq	8+0(%rbx),%r11
5188	adcq	$1,%r12
5189	movq	0+0+0(%rbp),%rax
5190	movq	%rax,%r15
5191	mulq	%r10
5192	movq	%rax,%r13
5193	movq	%rdx,%r14
5194	movq	0+0+0(%rbp),%rax
5195	mulq	%r11
5196	imulq	%r12,%r15
5197	addq	%rax,%r14
5198	adcq	%rdx,%r15
5199	movq	8+0+0(%rbp),%rax
5200	movq	%rax,%r9
5201	mulq	%r10
5202	addq	%rax,%r14
5203	adcq	$0,%rdx
5204	movq	%rdx,%r10
5205	movq	8+0+0(%rbp),%rax
5206	mulq	%r11
5207	addq	%rax,%r15
5208	adcq	$0,%rdx
5209	imulq	%r12,%r9
5210	addq	%r10,%r15
5211	adcq	%rdx,%r9
5212	movq	%r13,%r10
5213	movq	%r14,%r11
5214	movq	%r15,%r12
5215	andq	$3,%r12
5216	movq	%r15,%r13
5217	andq	$-4,%r13
5218	movq	%r9,%r14
5219	shrdq	$2,%r9,%r15
5220	shrq	$2,%r9
5221	addq	%r13,%r15
5222	adcq	%r14,%r9
5223	addq	%r15,%r10
5224	adcq	%r9,%r11
5225	adcq	$0,%r12
5226
5227	leaq	16(%rbx),%rbx
5228	incq	%r8
5229	vpaddd	%ymm6,%ymm2,%ymm2
5230	vpxor	%ymm2,%ymm14,%ymm14
5231	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
5232	vpaddd	%ymm14,%ymm10,%ymm10
5233	vpxor	%ymm10,%ymm6,%ymm6
5234	vpsrld	$20,%ymm6,%ymm3
5235	vpslld	$12,%ymm6,%ymm6
5236	vpxor	%ymm3,%ymm6,%ymm6
5237	vpaddd	%ymm6,%ymm2,%ymm2
5238	vpxor	%ymm2,%ymm14,%ymm14
5239	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
5240	vpaddd	%ymm14,%ymm10,%ymm10
5241	vpxor	%ymm10,%ymm6,%ymm6
5242	vpslld	$7,%ymm6,%ymm3
5243	vpsrld	$25,%ymm6,%ymm6
5244	vpxor	%ymm3,%ymm6,%ymm6
5245	vpalignr	$4,%ymm14,%ymm14,%ymm14
5246	vpalignr	$8,%ymm10,%ymm10,%ymm10
5247	vpalignr	$12,%ymm6,%ymm6,%ymm6
5248	vpaddd	%ymm5,%ymm1,%ymm1
5249	vpxor	%ymm1,%ymm13,%ymm13
5250	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
5251	vpaddd	%ymm13,%ymm9,%ymm9
5252	vpxor	%ymm9,%ymm5,%ymm5
5253	vpsrld	$20,%ymm5,%ymm3
5254	vpslld	$12,%ymm5,%ymm5
5255	vpxor	%ymm3,%ymm5,%ymm5
5256	vpaddd	%ymm5,%ymm1,%ymm1
5257	vpxor	%ymm1,%ymm13,%ymm13
5258	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
5259	vpaddd	%ymm13,%ymm9,%ymm9
5260	vpxor	%ymm9,%ymm5,%ymm5
5261	vpslld	$7,%ymm5,%ymm3
5262	vpsrld	$25,%ymm5,%ymm5
5263	vpxor	%ymm3,%ymm5,%ymm5
5264	vpalignr	$4,%ymm13,%ymm13,%ymm13
5265	vpalignr	$8,%ymm9,%ymm9,%ymm9
5266	vpalignr	$12,%ymm5,%ymm5,%ymm5
5267	vpaddd	%ymm4,%ymm0,%ymm0
5268	vpxor	%ymm0,%ymm12,%ymm12
5269	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
5270	vpaddd	%ymm12,%ymm8,%ymm8
5271	vpxor	%ymm8,%ymm4,%ymm4
5272	vpsrld	$20,%ymm4,%ymm3
5273	vpslld	$12,%ymm4,%ymm4
5274	vpxor	%ymm3,%ymm4,%ymm4
5275	vpaddd	%ymm4,%ymm0,%ymm0
5276	vpxor	%ymm0,%ymm12,%ymm12
5277	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
5278	vpaddd	%ymm12,%ymm8,%ymm8
5279	vpxor	%ymm8,%ymm4,%ymm4
5280	vpslld	$7,%ymm4,%ymm3
5281	vpsrld	$25,%ymm4,%ymm4
5282	vpxor	%ymm3,%ymm4,%ymm4
5283	vpalignr	$4,%ymm12,%ymm12,%ymm12
5284	vpalignr	$8,%ymm8,%ymm8,%ymm8
5285	vpalignr	$12,%ymm4,%ymm4,%ymm4
5286
5287	cmpq	%rcx,%r8
5288	jb	.Lopen_avx2_tail_384_rounds_and_x2hash
5289	cmpq	$10,%r8
5290	jne	.Lopen_avx2_tail_384_rounds_and_x1hash
5291	movq	%rbx,%r8
5292	subq	%rsi,%rbx
5293	movq	%rbx,%rcx
5294	movq	0+128(%rbp),%rbx
5295.Lopen_avx2_384_tail_hash:
5296	addq	$16,%rcx
5297	cmpq	%rbx,%rcx
5298	jg	.Lopen_avx2_384_tail_done
5299	addq	0+0(%r8),%r10
5300	adcq	8+0(%r8),%r11
5301	adcq	$1,%r12
5302	movq	0+0+0(%rbp),%rdx
5303	movq	%rdx,%r15
5304	mulxq	%r10,%r13,%r14
5305	mulxq	%r11,%rax,%rdx
5306	imulq	%r12,%r15
5307	addq	%rax,%r14
5308	adcq	%rdx,%r15
5309	movq	8+0+0(%rbp),%rdx
5310	mulxq	%r10,%r10,%rax
5311	addq	%r10,%r14
5312	mulxq	%r11,%r11,%r9
5313	adcq	%r11,%r15
5314	adcq	$0,%r9
5315	imulq	%r12,%rdx
5316	addq	%rax,%r15
5317	adcq	%rdx,%r9
5318	movq	%r13,%r10
5319	movq	%r14,%r11
5320	movq	%r15,%r12
5321	andq	$3,%r12
5322	movq	%r15,%r13
5323	andq	$-4,%r13
5324	movq	%r9,%r14
5325	shrdq	$2,%r9,%r15
5326	shrq	$2,%r9
5327	addq	%r13,%r15
5328	adcq	%r14,%r9
5329	addq	%r15,%r10
5330	adcq	%r9,%r11
5331	adcq	$0,%r12
5332
5333	leaq	16(%r8),%r8
5334	jmp	.Lopen_avx2_384_tail_hash
5335.Lopen_avx2_384_tail_done:
5336	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
5337	vpaddd	0+64(%rbp),%ymm6,%ymm6
5338	vpaddd	0+96(%rbp),%ymm10,%ymm10
5339	vpaddd	0+224(%rbp),%ymm14,%ymm14
5340	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
5341	vpaddd	0+64(%rbp),%ymm5,%ymm5
5342	vpaddd	0+96(%rbp),%ymm9,%ymm9
5343	vpaddd	0+192(%rbp),%ymm13,%ymm13
5344	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
5345	vpaddd	0+64(%rbp),%ymm4,%ymm4
5346	vpaddd	0+96(%rbp),%ymm8,%ymm8
5347	vpaddd	0+160(%rbp),%ymm12,%ymm12
5348	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
5349	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
5350	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
5351	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
5352	vpxor	0+0(%rsi),%ymm3,%ymm3
5353	vpxor	32+0(%rsi),%ymm2,%ymm2
5354	vpxor	64+0(%rsi),%ymm6,%ymm6
5355	vpxor	96+0(%rsi),%ymm10,%ymm10
5356	vmovdqu	%ymm3,0+0(%rdi)
5357	vmovdqu	%ymm2,32+0(%rdi)
5358	vmovdqu	%ymm6,64+0(%rdi)
5359	vmovdqu	%ymm10,96+0(%rdi)
5360	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5361	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5362	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5363	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5364	vpxor	0+128(%rsi),%ymm3,%ymm3
5365	vpxor	32+128(%rsi),%ymm1,%ymm1
5366	vpxor	64+128(%rsi),%ymm5,%ymm5
5367	vpxor	96+128(%rsi),%ymm9,%ymm9
5368	vmovdqu	%ymm3,0+128(%rdi)
5369	vmovdqu	%ymm1,32+128(%rdi)
5370	vmovdqu	%ymm5,64+128(%rdi)
5371	vmovdqu	%ymm9,96+128(%rdi)
5372	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5373	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5374	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5375	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5376	vmovdqa	%ymm3,%ymm8
5377
5378	leaq	256(%rsi),%rsi
5379	leaq	256(%rdi),%rdi
5380	subq	$256,%rbx
5381	jmp	.Lopen_avx2_tail_128_xor
5382
5383.Lopen_avx2_tail_512:
5384	vmovdqa	.Lchacha20_consts(%rip),%ymm0
5385	vmovdqa	0+64(%rbp),%ymm4
5386	vmovdqa	0+96(%rbp),%ymm8
5387	vmovdqa	%ymm0,%ymm1
5388	vmovdqa	%ymm4,%ymm5
5389	vmovdqa	%ymm8,%ymm9
5390	vmovdqa	%ymm0,%ymm2
5391	vmovdqa	%ymm4,%ymm6
5392	vmovdqa	%ymm8,%ymm10
5393	vmovdqa	%ymm0,%ymm3
5394	vmovdqa	%ymm4,%ymm7
5395	vmovdqa	%ymm8,%ymm11
5396	vmovdqa	.Lavx2_inc(%rip),%ymm12
5397	vpaddd	0+160(%rbp),%ymm12,%ymm15
5398	vpaddd	%ymm15,%ymm12,%ymm14
5399	vpaddd	%ymm14,%ymm12,%ymm13
5400	vpaddd	%ymm13,%ymm12,%ymm12
5401	vmovdqa	%ymm15,0+256(%rbp)
5402	vmovdqa	%ymm14,0+224(%rbp)
5403	vmovdqa	%ymm13,0+192(%rbp)
5404	vmovdqa	%ymm12,0+160(%rbp)
5405
5406	xorq	%rcx,%rcx
5407	movq	%rsi,%r8
5408.Lopen_avx2_tail_512_rounds_and_x2hash:
5409	addq	0+0(%r8),%r10
5410	adcq	8+0(%r8),%r11
5411	adcq	$1,%r12
5412	movq	0+0+0(%rbp),%rax
5413	movq	%rax,%r15
5414	mulq	%r10
5415	movq	%rax,%r13
5416	movq	%rdx,%r14
5417	movq	0+0+0(%rbp),%rax
5418	mulq	%r11
5419	imulq	%r12,%r15
5420	addq	%rax,%r14
5421	adcq	%rdx,%r15
5422	movq	8+0+0(%rbp),%rax
5423	movq	%rax,%r9
5424	mulq	%r10
5425	addq	%rax,%r14
5426	adcq	$0,%rdx
5427	movq	%rdx,%r10
5428	movq	8+0+0(%rbp),%rax
5429	mulq	%r11
5430	addq	%rax,%r15
5431	adcq	$0,%rdx
5432	imulq	%r12,%r9
5433	addq	%r10,%r15
5434	adcq	%rdx,%r9
5435	movq	%r13,%r10
5436	movq	%r14,%r11
5437	movq	%r15,%r12
5438	andq	$3,%r12
5439	movq	%r15,%r13
5440	andq	$-4,%r13
5441	movq	%r9,%r14
5442	shrdq	$2,%r9,%r15
5443	shrq	$2,%r9
5444	addq	%r13,%r15
5445	adcq	%r14,%r9
5446	addq	%r15,%r10
5447	adcq	%r9,%r11
5448	adcq	$0,%r12
5449
5450	leaq	16(%r8),%r8
5451.Lopen_avx2_tail_512_rounds_and_x1hash:
5452	vmovdqa	%ymm8,0+128(%rbp)
5453	vmovdqa	.Lrol16(%rip),%ymm8
5454	vpaddd	%ymm7,%ymm3,%ymm3
5455	vpaddd	%ymm6,%ymm2,%ymm2
5456	vpaddd	%ymm5,%ymm1,%ymm1
5457	vpaddd	%ymm4,%ymm0,%ymm0
5458	vpxor	%ymm3,%ymm15,%ymm15
5459	vpxor	%ymm2,%ymm14,%ymm14
5460	vpxor	%ymm1,%ymm13,%ymm13
5461	vpxor	%ymm0,%ymm12,%ymm12
5462	vpshufb	%ymm8,%ymm15,%ymm15
5463	vpshufb	%ymm8,%ymm14,%ymm14
5464	vpshufb	%ymm8,%ymm13,%ymm13
5465	vpshufb	%ymm8,%ymm12,%ymm12
5466	vpaddd	%ymm15,%ymm11,%ymm11
5467	vpaddd	%ymm14,%ymm10,%ymm10
5468	vpaddd	%ymm13,%ymm9,%ymm9
5469	vpaddd	0+128(%rbp),%ymm12,%ymm8
5470	vpxor	%ymm11,%ymm7,%ymm7
5471	vpxor	%ymm10,%ymm6,%ymm6
5472	vpxor	%ymm9,%ymm5,%ymm5
5473	vpxor	%ymm8,%ymm4,%ymm4
5474	vmovdqa	%ymm8,0+128(%rbp)
5475	vpsrld	$20,%ymm7,%ymm8
5476	vpslld	$32-20,%ymm7,%ymm7
5477	vpxor	%ymm8,%ymm7,%ymm7
5478	vpsrld	$20,%ymm6,%ymm8
5479	vpslld	$32-20,%ymm6,%ymm6
5480	vpxor	%ymm8,%ymm6,%ymm6
5481	vpsrld	$20,%ymm5,%ymm8
5482	vpslld	$32-20,%ymm5,%ymm5
5483	vpxor	%ymm8,%ymm5,%ymm5
5484	vpsrld	$20,%ymm4,%ymm8
5485	vpslld	$32-20,%ymm4,%ymm4
5486	vpxor	%ymm8,%ymm4,%ymm4
5487	vmovdqa	.Lrol8(%rip),%ymm8
5488	vpaddd	%ymm7,%ymm3,%ymm3
5489	addq	0+0(%r8),%r10
5490	adcq	8+0(%r8),%r11
5491	adcq	$1,%r12
5492	movq	0+0+0(%rbp),%rdx
5493	movq	%rdx,%r15
5494	mulxq	%r10,%r13,%r14
5495	mulxq	%r11,%rax,%rdx
5496	imulq	%r12,%r15
5497	addq	%rax,%r14
5498	adcq	%rdx,%r15
5499	movq	8+0+0(%rbp),%rdx
5500	mulxq	%r10,%r10,%rax
5501	addq	%r10,%r14
5502	mulxq	%r11,%r11,%r9
5503	adcq	%r11,%r15
5504	adcq	$0,%r9
5505	imulq	%r12,%rdx
5506	addq	%rax,%r15
5507	adcq	%rdx,%r9
5508	movq	%r13,%r10
5509	movq	%r14,%r11
5510	movq	%r15,%r12
5511	andq	$3,%r12
5512	movq	%r15,%r13
5513	andq	$-4,%r13
5514	movq	%r9,%r14
5515	shrdq	$2,%r9,%r15
5516	shrq	$2,%r9
5517	addq	%r13,%r15
5518	adcq	%r14,%r9
5519	addq	%r15,%r10
5520	adcq	%r9,%r11
5521	adcq	$0,%r12
5522	vpaddd	%ymm6,%ymm2,%ymm2
5523	vpaddd	%ymm5,%ymm1,%ymm1
5524	vpaddd	%ymm4,%ymm0,%ymm0
5525	vpxor	%ymm3,%ymm15,%ymm15
5526	vpxor	%ymm2,%ymm14,%ymm14
5527	vpxor	%ymm1,%ymm13,%ymm13
5528	vpxor	%ymm0,%ymm12,%ymm12
5529	vpshufb	%ymm8,%ymm15,%ymm15
5530	vpshufb	%ymm8,%ymm14,%ymm14
5531	vpshufb	%ymm8,%ymm13,%ymm13
5532	vpshufb	%ymm8,%ymm12,%ymm12
5533	vpaddd	%ymm15,%ymm11,%ymm11
5534	vpaddd	%ymm14,%ymm10,%ymm10
5535	vpaddd	%ymm13,%ymm9,%ymm9
5536	vpaddd	0+128(%rbp),%ymm12,%ymm8
5537	vpxor	%ymm11,%ymm7,%ymm7
5538	vpxor	%ymm10,%ymm6,%ymm6
5539	vpxor	%ymm9,%ymm5,%ymm5
5540	vpxor	%ymm8,%ymm4,%ymm4
5541	vmovdqa	%ymm8,0+128(%rbp)
5542	vpsrld	$25,%ymm7,%ymm8
5543	vpslld	$32-25,%ymm7,%ymm7
5544	vpxor	%ymm8,%ymm7,%ymm7
5545	vpsrld	$25,%ymm6,%ymm8
5546	vpslld	$32-25,%ymm6,%ymm6
5547	vpxor	%ymm8,%ymm6,%ymm6
5548	vpsrld	$25,%ymm5,%ymm8
5549	vpslld	$32-25,%ymm5,%ymm5
5550	vpxor	%ymm8,%ymm5,%ymm5
5551	vpsrld	$25,%ymm4,%ymm8
5552	vpslld	$32-25,%ymm4,%ymm4
5553	vpxor	%ymm8,%ymm4,%ymm4
5554	vmovdqa	0+128(%rbp),%ymm8
5555	vpalignr	$4,%ymm7,%ymm7,%ymm7
5556	vpalignr	$8,%ymm11,%ymm11,%ymm11
5557	vpalignr	$12,%ymm15,%ymm15,%ymm15
5558	vpalignr	$4,%ymm6,%ymm6,%ymm6
5559	vpalignr	$8,%ymm10,%ymm10,%ymm10
5560	vpalignr	$12,%ymm14,%ymm14,%ymm14
5561	vpalignr	$4,%ymm5,%ymm5,%ymm5
5562	vpalignr	$8,%ymm9,%ymm9,%ymm9
5563	vpalignr	$12,%ymm13,%ymm13,%ymm13
5564	vpalignr	$4,%ymm4,%ymm4,%ymm4
5565	vpalignr	$8,%ymm8,%ymm8,%ymm8
5566	vpalignr	$12,%ymm12,%ymm12,%ymm12
5567	vmovdqa	%ymm8,0+128(%rbp)
5568	vmovdqa	.Lrol16(%rip),%ymm8
5569	vpaddd	%ymm7,%ymm3,%ymm3
5570	addq	0+16(%r8),%r10
5571	adcq	8+16(%r8),%r11
5572	adcq	$1,%r12
5573	movq	0+0+0(%rbp),%rdx
5574	movq	%rdx,%r15
5575	mulxq	%r10,%r13,%r14
5576	mulxq	%r11,%rax,%rdx
5577	imulq	%r12,%r15
5578	addq	%rax,%r14
5579	adcq	%rdx,%r15
5580	movq	8+0+0(%rbp),%rdx
5581	mulxq	%r10,%r10,%rax
5582	addq	%r10,%r14
5583	mulxq	%r11,%r11,%r9
5584	adcq	%r11,%r15
5585	adcq	$0,%r9
5586	imulq	%r12,%rdx
5587	addq	%rax,%r15
5588	adcq	%rdx,%r9
5589	movq	%r13,%r10
5590	movq	%r14,%r11
5591	movq	%r15,%r12
5592	andq	$3,%r12
5593	movq	%r15,%r13
5594	andq	$-4,%r13
5595	movq	%r9,%r14
5596	shrdq	$2,%r9,%r15
5597	shrq	$2,%r9
5598	addq	%r13,%r15
5599	adcq	%r14,%r9
5600	addq	%r15,%r10
5601	adcq	%r9,%r11
5602	adcq	$0,%r12
5603
5604	leaq	32(%r8),%r8
5605	vpaddd	%ymm6,%ymm2,%ymm2
5606	vpaddd	%ymm5,%ymm1,%ymm1
5607	vpaddd	%ymm4,%ymm0,%ymm0
5608	vpxor	%ymm3,%ymm15,%ymm15
5609	vpxor	%ymm2,%ymm14,%ymm14
5610	vpxor	%ymm1,%ymm13,%ymm13
5611	vpxor	%ymm0,%ymm12,%ymm12
5612	vpshufb	%ymm8,%ymm15,%ymm15
5613	vpshufb	%ymm8,%ymm14,%ymm14
5614	vpshufb	%ymm8,%ymm13,%ymm13
5615	vpshufb	%ymm8,%ymm12,%ymm12
5616	vpaddd	%ymm15,%ymm11,%ymm11
5617	vpaddd	%ymm14,%ymm10,%ymm10
5618	vpaddd	%ymm13,%ymm9,%ymm9
5619	vpaddd	0+128(%rbp),%ymm12,%ymm8
5620	vpxor	%ymm11,%ymm7,%ymm7
5621	vpxor	%ymm10,%ymm6,%ymm6
5622	vpxor	%ymm9,%ymm5,%ymm5
5623	vpxor	%ymm8,%ymm4,%ymm4
5624	vmovdqa	%ymm8,0+128(%rbp)
5625	vpsrld	$20,%ymm7,%ymm8
5626	vpslld	$32-20,%ymm7,%ymm7
5627	vpxor	%ymm8,%ymm7,%ymm7
5628	vpsrld	$20,%ymm6,%ymm8
5629	vpslld	$32-20,%ymm6,%ymm6
5630	vpxor	%ymm8,%ymm6,%ymm6
5631	vpsrld	$20,%ymm5,%ymm8
5632	vpslld	$32-20,%ymm5,%ymm5
5633	vpxor	%ymm8,%ymm5,%ymm5
5634	vpsrld	$20,%ymm4,%ymm8
5635	vpslld	$32-20,%ymm4,%ymm4
5636	vpxor	%ymm8,%ymm4,%ymm4
5637	vmovdqa	.Lrol8(%rip),%ymm8
5638	vpaddd	%ymm7,%ymm3,%ymm3
5639	vpaddd	%ymm6,%ymm2,%ymm2
5640	vpaddd	%ymm5,%ymm1,%ymm1
5641	vpaddd	%ymm4,%ymm0,%ymm0
5642	vpxor	%ymm3,%ymm15,%ymm15
5643	vpxor	%ymm2,%ymm14,%ymm14
5644	vpxor	%ymm1,%ymm13,%ymm13
5645	vpxor	%ymm0,%ymm12,%ymm12
5646	vpshufb	%ymm8,%ymm15,%ymm15
5647	vpshufb	%ymm8,%ymm14,%ymm14
5648	vpshufb	%ymm8,%ymm13,%ymm13
5649	vpshufb	%ymm8,%ymm12,%ymm12
5650	vpaddd	%ymm15,%ymm11,%ymm11
5651	vpaddd	%ymm14,%ymm10,%ymm10
5652	vpaddd	%ymm13,%ymm9,%ymm9
5653	vpaddd	0+128(%rbp),%ymm12,%ymm8
5654	vpxor	%ymm11,%ymm7,%ymm7
5655	vpxor	%ymm10,%ymm6,%ymm6
5656	vpxor	%ymm9,%ymm5,%ymm5
5657	vpxor	%ymm8,%ymm4,%ymm4
5658	vmovdqa	%ymm8,0+128(%rbp)
5659	vpsrld	$25,%ymm7,%ymm8
5660	vpslld	$32-25,%ymm7,%ymm7
5661	vpxor	%ymm8,%ymm7,%ymm7
5662	vpsrld	$25,%ymm6,%ymm8
5663	vpslld	$32-25,%ymm6,%ymm6
5664	vpxor	%ymm8,%ymm6,%ymm6
5665	vpsrld	$25,%ymm5,%ymm8
5666	vpslld	$32-25,%ymm5,%ymm5
5667	vpxor	%ymm8,%ymm5,%ymm5
5668	vpsrld	$25,%ymm4,%ymm8
5669	vpslld	$32-25,%ymm4,%ymm4
5670	vpxor	%ymm8,%ymm4,%ymm4
5671	vmovdqa	0+128(%rbp),%ymm8
5672	vpalignr	$12,%ymm7,%ymm7,%ymm7
5673	vpalignr	$8,%ymm11,%ymm11,%ymm11
5674	vpalignr	$4,%ymm15,%ymm15,%ymm15
5675	vpalignr	$12,%ymm6,%ymm6,%ymm6
5676	vpalignr	$8,%ymm10,%ymm10,%ymm10
5677	vpalignr	$4,%ymm14,%ymm14,%ymm14
5678	vpalignr	$12,%ymm5,%ymm5,%ymm5
5679	vpalignr	$8,%ymm9,%ymm9,%ymm9
5680	vpalignr	$4,%ymm13,%ymm13,%ymm13
5681	vpalignr	$12,%ymm4,%ymm4,%ymm4
5682	vpalignr	$8,%ymm8,%ymm8,%ymm8
5683	vpalignr	$4,%ymm12,%ymm12,%ymm12
5684
5685	incq	%rcx
5686	cmpq	$4,%rcx
5687	jl	.Lopen_avx2_tail_512_rounds_and_x2hash
5688	cmpq	$10,%rcx
5689	jne	.Lopen_avx2_tail_512_rounds_and_x1hash
5690	movq	%rbx,%rcx
5691	subq	$384,%rcx
5692	andq	$-16,%rcx
5693.Lopen_avx2_tail_512_hash:
5694	testq	%rcx,%rcx
5695	je	.Lopen_avx2_tail_512_done
5696	addq	0+0(%r8),%r10
5697	adcq	8+0(%r8),%r11
5698	adcq	$1,%r12
5699	movq	0+0+0(%rbp),%rdx
5700	movq	%rdx,%r15
5701	mulxq	%r10,%r13,%r14
5702	mulxq	%r11,%rax,%rdx
5703	imulq	%r12,%r15
5704	addq	%rax,%r14
5705	adcq	%rdx,%r15
5706	movq	8+0+0(%rbp),%rdx
5707	mulxq	%r10,%r10,%rax
5708	addq	%r10,%r14
5709	mulxq	%r11,%r11,%r9
5710	adcq	%r11,%r15
5711	adcq	$0,%r9
5712	imulq	%r12,%rdx
5713	addq	%rax,%r15
5714	adcq	%rdx,%r9
5715	movq	%r13,%r10
5716	movq	%r14,%r11
5717	movq	%r15,%r12
5718	andq	$3,%r12
5719	movq	%r15,%r13
5720	andq	$-4,%r13
5721	movq	%r9,%r14
5722	shrdq	$2,%r9,%r15
5723	shrq	$2,%r9
5724	addq	%r13,%r15
5725	adcq	%r14,%r9
5726	addq	%r15,%r10
5727	adcq	%r9,%r11
5728	adcq	$0,%r12
5729
5730	leaq	16(%r8),%r8
5731	subq	$16,%rcx
5732	jmp	.Lopen_avx2_tail_512_hash
5733.Lopen_avx2_tail_512_done:
5734	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
5735	vpaddd	0+64(%rbp),%ymm7,%ymm7
5736	vpaddd	0+96(%rbp),%ymm11,%ymm11
5737	vpaddd	0+256(%rbp),%ymm15,%ymm15
5738	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
5739	vpaddd	0+64(%rbp),%ymm6,%ymm6
5740	vpaddd	0+96(%rbp),%ymm10,%ymm10
5741	vpaddd	0+224(%rbp),%ymm14,%ymm14
5742	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
5743	vpaddd	0+64(%rbp),%ymm5,%ymm5
5744	vpaddd	0+96(%rbp),%ymm9,%ymm9
5745	vpaddd	0+192(%rbp),%ymm13,%ymm13
5746	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
5747	vpaddd	0+64(%rbp),%ymm4,%ymm4
5748	vpaddd	0+96(%rbp),%ymm8,%ymm8
5749	vpaddd	0+160(%rbp),%ymm12,%ymm12
5750
5751	vmovdqa	%ymm0,0+128(%rbp)
5752	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
5753	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
5754	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
5755	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
5756	vpxor	0+0(%rsi),%ymm0,%ymm0
5757	vpxor	32+0(%rsi),%ymm3,%ymm3
5758	vpxor	64+0(%rsi),%ymm7,%ymm7
5759	vpxor	96+0(%rsi),%ymm11,%ymm11
5760	vmovdqu	%ymm0,0+0(%rdi)
5761	vmovdqu	%ymm3,32+0(%rdi)
5762	vmovdqu	%ymm7,64+0(%rdi)
5763	vmovdqu	%ymm11,96+0(%rdi)
5764
5765	vmovdqa	0+128(%rbp),%ymm0
5766	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
5767	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
5768	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
5769	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
5770	vpxor	0+128(%rsi),%ymm3,%ymm3
5771	vpxor	32+128(%rsi),%ymm2,%ymm2
5772	vpxor	64+128(%rsi),%ymm6,%ymm6
5773	vpxor	96+128(%rsi),%ymm10,%ymm10
5774	vmovdqu	%ymm3,0+128(%rdi)
5775	vmovdqu	%ymm2,32+128(%rdi)
5776	vmovdqu	%ymm6,64+128(%rdi)
5777	vmovdqu	%ymm10,96+128(%rdi)
5778	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
5779	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
5780	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
5781	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
5782	vpxor	0+256(%rsi),%ymm3,%ymm3
5783	vpxor	32+256(%rsi),%ymm1,%ymm1
5784	vpxor	64+256(%rsi),%ymm5,%ymm5
5785	vpxor	96+256(%rsi),%ymm9,%ymm9
5786	vmovdqu	%ymm3,0+256(%rdi)
5787	vmovdqu	%ymm1,32+256(%rdi)
5788	vmovdqu	%ymm5,64+256(%rdi)
5789	vmovdqu	%ymm9,96+256(%rdi)
5790	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
5791	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
5792	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
5793	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
5794	vmovdqa	%ymm3,%ymm8
5795
5796	leaq	384(%rsi),%rsi
5797	leaq	384(%rdi),%rdi
5798	subq	$384,%rbx
5799.Lopen_avx2_tail_128_xor:
5800	cmpq	$32,%rbx
5801	jb	.Lopen_avx2_tail_32_xor
5802	subq	$32,%rbx
5803	vpxor	(%rsi),%ymm0,%ymm0
5804	vmovdqu	%ymm0,(%rdi)
5805	leaq	32(%rsi),%rsi
5806	leaq	32(%rdi),%rdi
5807	vmovdqa	%ymm4,%ymm0
5808	vmovdqa	%ymm8,%ymm4
5809	vmovdqa	%ymm12,%ymm8
5810	jmp	.Lopen_avx2_tail_128_xor
5811.Lopen_avx2_tail_32_xor:
5812	cmpq	$16,%rbx
5813	vmovdqa	%xmm0,%xmm1
5814	jb	.Lopen_avx2_exit
5815	subq	$16,%rbx
5816
5817	vpxor	(%rsi),%xmm0,%xmm1
5818	vmovdqu	%xmm1,(%rdi)
5819	leaq	16(%rsi),%rsi
5820	leaq	16(%rdi),%rdi
5821	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
5822	vmovdqa	%xmm0,%xmm1
5823.Lopen_avx2_exit:
5824	vzeroupper
5825	jmp	.Lopen_sse_tail_16
5826
5827.Lopen_avx2_192:
5828	vmovdqa	%ymm0,%ymm1
5829	vmovdqa	%ymm0,%ymm2
5830	vmovdqa	%ymm4,%ymm5
5831	vmovdqa	%ymm4,%ymm6
5832	vmovdqa	%ymm8,%ymm9
5833	vmovdqa	%ymm8,%ymm10
5834	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
5835	vmovdqa	%ymm12,%ymm11
5836	vmovdqa	%ymm13,%ymm15
5837	movq	$10,%r10
5838.Lopen_avx2_192_rounds:
5839	vpaddd	%ymm4,%ymm0,%ymm0
5840	vpxor	%ymm0,%ymm12,%ymm12
5841	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
5842	vpaddd	%ymm12,%ymm8,%ymm8
5843	vpxor	%ymm8,%ymm4,%ymm4
5844	vpsrld	$20,%ymm4,%ymm3
5845	vpslld	$12,%ymm4,%ymm4
5846	vpxor	%ymm3,%ymm4,%ymm4
5847	vpaddd	%ymm4,%ymm0,%ymm0
5848	vpxor	%ymm0,%ymm12,%ymm12
5849	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
5850	vpaddd	%ymm12,%ymm8,%ymm8
5851	vpxor	%ymm8,%ymm4,%ymm4
5852	vpslld	$7,%ymm4,%ymm3
5853	vpsrld	$25,%ymm4,%ymm4
5854	vpxor	%ymm3,%ymm4,%ymm4
5855	vpalignr	$12,%ymm12,%ymm12,%ymm12
5856	vpalignr	$8,%ymm8,%ymm8,%ymm8
5857	vpalignr	$4,%ymm4,%ymm4,%ymm4
5858	vpaddd	%ymm5,%ymm1,%ymm1
5859	vpxor	%ymm1,%ymm13,%ymm13
5860	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
5861	vpaddd	%ymm13,%ymm9,%ymm9
5862	vpxor	%ymm9,%ymm5,%ymm5
5863	vpsrld	$20,%ymm5,%ymm3
5864	vpslld	$12,%ymm5,%ymm5
5865	vpxor	%ymm3,%ymm5,%ymm5
5866	vpaddd	%ymm5,%ymm1,%ymm1
5867	vpxor	%ymm1,%ymm13,%ymm13
5868	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
5869	vpaddd	%ymm13,%ymm9,%ymm9
5870	vpxor	%ymm9,%ymm5,%ymm5
5871	vpslld	$7,%ymm5,%ymm3
5872	vpsrld	$25,%ymm5,%ymm5
5873	vpxor	%ymm3,%ymm5,%ymm5
5874	vpalignr	$12,%ymm13,%ymm13,%ymm13
5875	vpalignr	$8,%ymm9,%ymm9,%ymm9
5876	vpalignr	$4,%ymm5,%ymm5,%ymm5
5877	vpaddd	%ymm4,%ymm0,%ymm0
5878	vpxor	%ymm0,%ymm12,%ymm12
5879	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
5880	vpaddd	%ymm12,%ymm8,%ymm8
5881	vpxor	%ymm8,%ymm4,%ymm4
5882	vpsrld	$20,%ymm4,%ymm3
5883	vpslld	$12,%ymm4,%ymm4
5884	vpxor	%ymm3,%ymm4,%ymm4
5885	vpaddd	%ymm4,%ymm0,%ymm0
5886	vpxor	%ymm0,%ymm12,%ymm12
5887	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
5888	vpaddd	%ymm12,%ymm8,%ymm8
5889	vpxor	%ymm8,%ymm4,%ymm4
5890	vpslld	$7,%ymm4,%ymm3
5891	vpsrld	$25,%ymm4,%ymm4
5892	vpxor	%ymm3,%ymm4,%ymm4
5893	vpalignr	$4,%ymm12,%ymm12,%ymm12
5894	vpalignr	$8,%ymm8,%ymm8,%ymm8
5895	vpalignr	$12,%ymm4,%ymm4,%ymm4
5896	vpaddd	%ymm5,%ymm1,%ymm1
5897	vpxor	%ymm1,%ymm13,%ymm13
5898	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
5899	vpaddd	%ymm13,%ymm9,%ymm9
5900	vpxor	%ymm9,%ymm5,%ymm5
5901	vpsrld	$20,%ymm5,%ymm3
5902	vpslld	$12,%ymm5,%ymm5
5903	vpxor	%ymm3,%ymm5,%ymm5
5904	vpaddd	%ymm5,%ymm1,%ymm1
5905	vpxor	%ymm1,%ymm13,%ymm13
5906	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
5907	vpaddd	%ymm13,%ymm9,%ymm9
5908	vpxor	%ymm9,%ymm5,%ymm5
5909	vpslld	$7,%ymm5,%ymm3
5910	vpsrld	$25,%ymm5,%ymm5
5911	vpxor	%ymm3,%ymm5,%ymm5
5912	vpalignr	$4,%ymm13,%ymm13,%ymm13
5913	vpalignr	$8,%ymm9,%ymm9,%ymm9
5914	vpalignr	$12,%ymm5,%ymm5,%ymm5
5915
5916	decq	%r10
5917	jne	.Lopen_avx2_192_rounds
5918	vpaddd	%ymm2,%ymm0,%ymm0
5919	vpaddd	%ymm2,%ymm1,%ymm1
5920	vpaddd	%ymm6,%ymm4,%ymm4
5921	vpaddd	%ymm6,%ymm5,%ymm5
5922	vpaddd	%ymm10,%ymm8,%ymm8
5923	vpaddd	%ymm10,%ymm9,%ymm9
5924	vpaddd	%ymm11,%ymm12,%ymm12
5925	vpaddd	%ymm15,%ymm13,%ymm13
5926	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
5927
5928	vpand	.Lclamp(%rip),%ymm3,%ymm3
5929	vmovdqa	%ymm3,0+0(%rbp)
5930
5931	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
5932	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
5933	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
5934	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
5935	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
5936	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
5937.Lopen_avx2_short:
5938	movq	%r8,%r8
5939	call	poly_hash_ad_internal
5940.Lopen_avx2_short_hash_and_xor_loop:
5941	cmpq	$32,%rbx
5942	jb	.Lopen_avx2_short_tail_32
5943	subq	$32,%rbx
5944	addq	0+0(%rsi),%r10
5945	adcq	8+0(%rsi),%r11
5946	adcq	$1,%r12
5947	movq	0+0+0(%rbp),%rax
5948	movq	%rax,%r15
5949	mulq	%r10
5950	movq	%rax,%r13
5951	movq	%rdx,%r14
5952	movq	0+0+0(%rbp),%rax
5953	mulq	%r11
5954	imulq	%r12,%r15
5955	addq	%rax,%r14
5956	adcq	%rdx,%r15
5957	movq	8+0+0(%rbp),%rax
5958	movq	%rax,%r9
5959	mulq	%r10
5960	addq	%rax,%r14
5961	adcq	$0,%rdx
5962	movq	%rdx,%r10
5963	movq	8+0+0(%rbp),%rax
5964	mulq	%r11
5965	addq	%rax,%r15
5966	adcq	$0,%rdx
5967	imulq	%r12,%r9
5968	addq	%r10,%r15
5969	adcq	%rdx,%r9
5970	movq	%r13,%r10
5971	movq	%r14,%r11
5972	movq	%r15,%r12
5973	andq	$3,%r12
5974	movq	%r15,%r13
5975	andq	$-4,%r13
5976	movq	%r9,%r14
5977	shrdq	$2,%r9,%r15
5978	shrq	$2,%r9
5979	addq	%r13,%r15
5980	adcq	%r14,%r9
5981	addq	%r15,%r10
5982	adcq	%r9,%r11
5983	adcq	$0,%r12
5984	addq	0+16(%rsi),%r10
5985	adcq	8+16(%rsi),%r11
5986	adcq	$1,%r12
5987	movq	0+0+0(%rbp),%rax
5988	movq	%rax,%r15
5989	mulq	%r10
5990	movq	%rax,%r13
5991	movq	%rdx,%r14
5992	movq	0+0+0(%rbp),%rax
5993	mulq	%r11
5994	imulq	%r12,%r15
5995	addq	%rax,%r14
5996	adcq	%rdx,%r15
5997	movq	8+0+0(%rbp),%rax
5998	movq	%rax,%r9
5999	mulq	%r10
6000	addq	%rax,%r14
6001	adcq	$0,%rdx
6002	movq	%rdx,%r10
6003	movq	8+0+0(%rbp),%rax
6004	mulq	%r11
6005	addq	%rax,%r15
6006	adcq	$0,%rdx
6007	imulq	%r12,%r9
6008	addq	%r10,%r15
6009	adcq	%rdx,%r9
6010	movq	%r13,%r10
6011	movq	%r14,%r11
6012	movq	%r15,%r12
6013	andq	$3,%r12
6014	movq	%r15,%r13
6015	andq	$-4,%r13
6016	movq	%r9,%r14
6017	shrdq	$2,%r9,%r15
6018	shrq	$2,%r9
6019	addq	%r13,%r15
6020	adcq	%r14,%r9
6021	addq	%r15,%r10
6022	adcq	%r9,%r11
6023	adcq	$0,%r12
6024
6025
6026	vpxor	(%rsi),%ymm0,%ymm0
6027	vmovdqu	%ymm0,(%rdi)
6028	leaq	32(%rsi),%rsi
6029	leaq	32(%rdi),%rdi
6030
6031	vmovdqa	%ymm4,%ymm0
6032	vmovdqa	%ymm8,%ymm4
6033	vmovdqa	%ymm12,%ymm8
6034	vmovdqa	%ymm1,%ymm12
6035	vmovdqa	%ymm5,%ymm1
6036	vmovdqa	%ymm9,%ymm5
6037	vmovdqa	%ymm13,%ymm9
6038	vmovdqa	%ymm2,%ymm13
6039	vmovdqa	%ymm6,%ymm2
6040	jmp	.Lopen_avx2_short_hash_and_xor_loop
6041.Lopen_avx2_short_tail_32:
6042	cmpq	$16,%rbx
6043	vmovdqa	%xmm0,%xmm1
6044	jb	.Lopen_avx2_short_tail_32_exit
6045	subq	$16,%rbx
6046	addq	0+0(%rsi),%r10
6047	adcq	8+0(%rsi),%r11
6048	adcq	$1,%r12
6049	movq	0+0+0(%rbp),%rax
6050	movq	%rax,%r15
6051	mulq	%r10
6052	movq	%rax,%r13
6053	movq	%rdx,%r14
6054	movq	0+0+0(%rbp),%rax
6055	mulq	%r11
6056	imulq	%r12,%r15
6057	addq	%rax,%r14
6058	adcq	%rdx,%r15
6059	movq	8+0+0(%rbp),%rax
6060	movq	%rax,%r9
6061	mulq	%r10
6062	addq	%rax,%r14
6063	adcq	$0,%rdx
6064	movq	%rdx,%r10
6065	movq	8+0+0(%rbp),%rax
6066	mulq	%r11
6067	addq	%rax,%r15
6068	adcq	$0,%rdx
6069	imulq	%r12,%r9
6070	addq	%r10,%r15
6071	adcq	%rdx,%r9
6072	movq	%r13,%r10
6073	movq	%r14,%r11
6074	movq	%r15,%r12
6075	andq	$3,%r12
6076	movq	%r15,%r13
6077	andq	$-4,%r13
6078	movq	%r9,%r14
6079	shrdq	$2,%r9,%r15
6080	shrq	$2,%r9
6081	addq	%r13,%r15
6082	adcq	%r14,%r9
6083	addq	%r15,%r10
6084	adcq	%r9,%r11
6085	adcq	$0,%r12
6086
6087	vpxor	(%rsi),%xmm0,%xmm3
6088	vmovdqu	%xmm3,(%rdi)
6089	leaq	16(%rsi),%rsi
6090	leaq	16(%rdi),%rdi
6091	vextracti128	$1,%ymm0,%xmm1
6092.Lopen_avx2_short_tail_32_exit:
6093	vzeroupper
6094	jmp	.Lopen_sse_tail_16
6095
6096.Lopen_avx2_320:
6097	vmovdqa	%ymm0,%ymm1
6098	vmovdqa	%ymm0,%ymm2
6099	vmovdqa	%ymm4,%ymm5
6100	vmovdqa	%ymm4,%ymm6
6101	vmovdqa	%ymm8,%ymm9
6102	vmovdqa	%ymm8,%ymm10
6103	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
6104	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm14
6105	vmovdqa	%ymm4,%ymm7
6106	vmovdqa	%ymm8,%ymm11
6107	vmovdqa	%ymm12,0+160(%rbp)
6108	vmovdqa	%ymm13,0+192(%rbp)
6109	vmovdqa	%ymm14,0+224(%rbp)
6110	movq	$10,%r10
6111.Lopen_avx2_320_rounds:
6112	vpaddd	%ymm4,%ymm0,%ymm0
6113	vpxor	%ymm0,%ymm12,%ymm12
6114	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
6115	vpaddd	%ymm12,%ymm8,%ymm8
6116	vpxor	%ymm8,%ymm4,%ymm4
6117	vpsrld	$20,%ymm4,%ymm3
6118	vpslld	$12,%ymm4,%ymm4
6119	vpxor	%ymm3,%ymm4,%ymm4
6120	vpaddd	%ymm4,%ymm0,%ymm0
6121	vpxor	%ymm0,%ymm12,%ymm12
6122	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
6123	vpaddd	%ymm12,%ymm8,%ymm8
6124	vpxor	%ymm8,%ymm4,%ymm4
6125	vpslld	$7,%ymm4,%ymm3
6126	vpsrld	$25,%ymm4,%ymm4
6127	vpxor	%ymm3,%ymm4,%ymm4
6128	vpalignr	$12,%ymm12,%ymm12,%ymm12
6129	vpalignr	$8,%ymm8,%ymm8,%ymm8
6130	vpalignr	$4,%ymm4,%ymm4,%ymm4
6131	vpaddd	%ymm5,%ymm1,%ymm1
6132	vpxor	%ymm1,%ymm13,%ymm13
6133	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
6134	vpaddd	%ymm13,%ymm9,%ymm9
6135	vpxor	%ymm9,%ymm5,%ymm5
6136	vpsrld	$20,%ymm5,%ymm3
6137	vpslld	$12,%ymm5,%ymm5
6138	vpxor	%ymm3,%ymm5,%ymm5
6139	vpaddd	%ymm5,%ymm1,%ymm1
6140	vpxor	%ymm1,%ymm13,%ymm13
6141	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
6142	vpaddd	%ymm13,%ymm9,%ymm9
6143	vpxor	%ymm9,%ymm5,%ymm5
6144	vpslld	$7,%ymm5,%ymm3
6145	vpsrld	$25,%ymm5,%ymm5
6146	vpxor	%ymm3,%ymm5,%ymm5
6147	vpalignr	$12,%ymm13,%ymm13,%ymm13
6148	vpalignr	$8,%ymm9,%ymm9,%ymm9
6149	vpalignr	$4,%ymm5,%ymm5,%ymm5
6150	vpaddd	%ymm6,%ymm2,%ymm2
6151	vpxor	%ymm2,%ymm14,%ymm14
6152	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
6153	vpaddd	%ymm14,%ymm10,%ymm10
6154	vpxor	%ymm10,%ymm6,%ymm6
6155	vpsrld	$20,%ymm6,%ymm3
6156	vpslld	$12,%ymm6,%ymm6
6157	vpxor	%ymm3,%ymm6,%ymm6
6158	vpaddd	%ymm6,%ymm2,%ymm2
6159	vpxor	%ymm2,%ymm14,%ymm14
6160	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
6161	vpaddd	%ymm14,%ymm10,%ymm10
6162	vpxor	%ymm10,%ymm6,%ymm6
6163	vpslld	$7,%ymm6,%ymm3
6164	vpsrld	$25,%ymm6,%ymm6
6165	vpxor	%ymm3,%ymm6,%ymm6
6166	vpalignr	$12,%ymm14,%ymm14,%ymm14
6167	vpalignr	$8,%ymm10,%ymm10,%ymm10
6168	vpalignr	$4,%ymm6,%ymm6,%ymm6
6169	vpaddd	%ymm4,%ymm0,%ymm0
6170	vpxor	%ymm0,%ymm12,%ymm12
6171	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
6172	vpaddd	%ymm12,%ymm8,%ymm8
6173	vpxor	%ymm8,%ymm4,%ymm4
6174	vpsrld	$20,%ymm4,%ymm3
6175	vpslld	$12,%ymm4,%ymm4
6176	vpxor	%ymm3,%ymm4,%ymm4
6177	vpaddd	%ymm4,%ymm0,%ymm0
6178	vpxor	%ymm0,%ymm12,%ymm12
6179	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
6180	vpaddd	%ymm12,%ymm8,%ymm8
6181	vpxor	%ymm8,%ymm4,%ymm4
6182	vpslld	$7,%ymm4,%ymm3
6183	vpsrld	$25,%ymm4,%ymm4
6184	vpxor	%ymm3,%ymm4,%ymm4
6185	vpalignr	$4,%ymm12,%ymm12,%ymm12
6186	vpalignr	$8,%ymm8,%ymm8,%ymm8
6187	vpalignr	$12,%ymm4,%ymm4,%ymm4
6188	vpaddd	%ymm5,%ymm1,%ymm1
6189	vpxor	%ymm1,%ymm13,%ymm13
6190	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
6191	vpaddd	%ymm13,%ymm9,%ymm9
6192	vpxor	%ymm9,%ymm5,%ymm5
6193	vpsrld	$20,%ymm5,%ymm3
6194	vpslld	$12,%ymm5,%ymm5
6195	vpxor	%ymm3,%ymm5,%ymm5
6196	vpaddd	%ymm5,%ymm1,%ymm1
6197	vpxor	%ymm1,%ymm13,%ymm13
6198	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
6199	vpaddd	%ymm13,%ymm9,%ymm9
6200	vpxor	%ymm9,%ymm5,%ymm5
6201	vpslld	$7,%ymm5,%ymm3
6202	vpsrld	$25,%ymm5,%ymm5
6203	vpxor	%ymm3,%ymm5,%ymm5
6204	vpalignr	$4,%ymm13,%ymm13,%ymm13
6205	vpalignr	$8,%ymm9,%ymm9,%ymm9
6206	vpalignr	$12,%ymm5,%ymm5,%ymm5
6207	vpaddd	%ymm6,%ymm2,%ymm2
6208	vpxor	%ymm2,%ymm14,%ymm14
6209	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
6210	vpaddd	%ymm14,%ymm10,%ymm10
6211	vpxor	%ymm10,%ymm6,%ymm6
6212	vpsrld	$20,%ymm6,%ymm3
6213	vpslld	$12,%ymm6,%ymm6
6214	vpxor	%ymm3,%ymm6,%ymm6
6215	vpaddd	%ymm6,%ymm2,%ymm2
6216	vpxor	%ymm2,%ymm14,%ymm14
6217	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
6218	vpaddd	%ymm14,%ymm10,%ymm10
6219	vpxor	%ymm10,%ymm6,%ymm6
6220	vpslld	$7,%ymm6,%ymm3
6221	vpsrld	$25,%ymm6,%ymm6
6222	vpxor	%ymm3,%ymm6,%ymm6
6223	vpalignr	$4,%ymm14,%ymm14,%ymm14
6224	vpalignr	$8,%ymm10,%ymm10,%ymm10
6225	vpalignr	$12,%ymm6,%ymm6,%ymm6
6226
6227	decq	%r10
6228	jne	.Lopen_avx2_320_rounds
6229	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
6230	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
6231	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
6232	vpaddd	%ymm7,%ymm4,%ymm4
6233	vpaddd	%ymm7,%ymm5,%ymm5
6234	vpaddd	%ymm7,%ymm6,%ymm6
6235	vpaddd	%ymm11,%ymm8,%ymm8
6236	vpaddd	%ymm11,%ymm9,%ymm9
6237	vpaddd	%ymm11,%ymm10,%ymm10
6238	vpaddd	0+160(%rbp),%ymm12,%ymm12
6239	vpaddd	0+192(%rbp),%ymm13,%ymm13
6240	vpaddd	0+224(%rbp),%ymm14,%ymm14
6241	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
6242
6243	vpand	.Lclamp(%rip),%ymm3,%ymm3
6244	vmovdqa	%ymm3,0+0(%rbp)
6245
6246	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
6247	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
6248	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
6249	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
6250	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
6251	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
6252	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
6253	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
6254	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
6255	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
6256	jmp	.Lopen_avx2_short
6257.size	chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2
6258.cfi_endproc
6259
6260
6261.type	chacha20_poly1305_seal_avx2,@function
6262.align	64
6263chacha20_poly1305_seal_avx2:
6264.cfi_startproc
6265
6266
6267.cfi_adjust_cfa_offset	8
6268.cfi_offset	%rbp,-16
6269.cfi_adjust_cfa_offset	8
6270.cfi_offset	%rbx,-24
6271.cfi_adjust_cfa_offset	8
6272.cfi_offset	%r12,-32
6273.cfi_adjust_cfa_offset	8
6274.cfi_offset	%r13,-40
6275.cfi_adjust_cfa_offset	8
6276.cfi_offset	%r14,-48
6277.cfi_adjust_cfa_offset	8
6278.cfi_offset	%r15,-56
6279.cfi_adjust_cfa_offset	8
6280.cfi_offset	%r9,-64
6281.cfi_adjust_cfa_offset	288 + 32
6282
6283	vzeroupper
6284	vmovdqa	.Lchacha20_consts(%rip),%ymm0
6285	vbroadcasti128	0(%r9),%ymm4
6286	vbroadcasti128	16(%r9),%ymm8
6287	vbroadcasti128	32(%r9),%ymm12
6288	vpaddd	.Lavx2_init(%rip),%ymm12,%ymm12
6289	cmpq	$192,%rbx
6290	jbe	.Lseal_avx2_192
6291	cmpq	$320,%rbx
6292	jbe	.Lseal_avx2_320
6293	vmovdqa	%ymm0,%ymm1
6294	vmovdqa	%ymm0,%ymm2
6295	vmovdqa	%ymm0,%ymm3
6296	vmovdqa	%ymm4,%ymm5
6297	vmovdqa	%ymm4,%ymm6
6298	vmovdqa	%ymm4,%ymm7
6299	vmovdqa	%ymm4,0+64(%rbp)
6300	vmovdqa	%ymm8,%ymm9
6301	vmovdqa	%ymm8,%ymm10
6302	vmovdqa	%ymm8,%ymm11
6303	vmovdqa	%ymm8,0+96(%rbp)
6304	vmovdqa	%ymm12,%ymm15
6305	vpaddd	.Lavx2_inc(%rip),%ymm15,%ymm14
6306	vpaddd	.Lavx2_inc(%rip),%ymm14,%ymm13
6307	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm12
6308	vmovdqa	%ymm12,0+160(%rbp)
6309	vmovdqa	%ymm13,0+192(%rbp)
6310	vmovdqa	%ymm14,0+224(%rbp)
6311	vmovdqa	%ymm15,0+256(%rbp)
6312	movq	$10,%r10
6313.Lseal_avx2_init_rounds:
6314	vmovdqa	%ymm8,0+128(%rbp)
6315	vmovdqa	.Lrol16(%rip),%ymm8
6316	vpaddd	%ymm7,%ymm3,%ymm3
6317	vpaddd	%ymm6,%ymm2,%ymm2
6318	vpaddd	%ymm5,%ymm1,%ymm1
6319	vpaddd	%ymm4,%ymm0,%ymm0
6320	vpxor	%ymm3,%ymm15,%ymm15
6321	vpxor	%ymm2,%ymm14,%ymm14
6322	vpxor	%ymm1,%ymm13,%ymm13
6323	vpxor	%ymm0,%ymm12,%ymm12
6324	vpshufb	%ymm8,%ymm15,%ymm15
6325	vpshufb	%ymm8,%ymm14,%ymm14
6326	vpshufb	%ymm8,%ymm13,%ymm13
6327	vpshufb	%ymm8,%ymm12,%ymm12
6328	vpaddd	%ymm15,%ymm11,%ymm11
6329	vpaddd	%ymm14,%ymm10,%ymm10
6330	vpaddd	%ymm13,%ymm9,%ymm9
6331	vpaddd	0+128(%rbp),%ymm12,%ymm8
6332	vpxor	%ymm11,%ymm7,%ymm7
6333	vpxor	%ymm10,%ymm6,%ymm6
6334	vpxor	%ymm9,%ymm5,%ymm5
6335	vpxor	%ymm8,%ymm4,%ymm4
6336	vmovdqa	%ymm8,0+128(%rbp)
6337	vpsrld	$20,%ymm7,%ymm8
6338	vpslld	$32-20,%ymm7,%ymm7
6339	vpxor	%ymm8,%ymm7,%ymm7
6340	vpsrld	$20,%ymm6,%ymm8
6341	vpslld	$32-20,%ymm6,%ymm6
6342	vpxor	%ymm8,%ymm6,%ymm6
6343	vpsrld	$20,%ymm5,%ymm8
6344	vpslld	$32-20,%ymm5,%ymm5
6345	vpxor	%ymm8,%ymm5,%ymm5
6346	vpsrld	$20,%ymm4,%ymm8
6347	vpslld	$32-20,%ymm4,%ymm4
6348	vpxor	%ymm8,%ymm4,%ymm4
6349	vmovdqa	.Lrol8(%rip),%ymm8
6350	vpaddd	%ymm7,%ymm3,%ymm3
6351	vpaddd	%ymm6,%ymm2,%ymm2
6352	vpaddd	%ymm5,%ymm1,%ymm1
6353	vpaddd	%ymm4,%ymm0,%ymm0
6354	vpxor	%ymm3,%ymm15,%ymm15
6355	vpxor	%ymm2,%ymm14,%ymm14
6356	vpxor	%ymm1,%ymm13,%ymm13
6357	vpxor	%ymm0,%ymm12,%ymm12
6358	vpshufb	%ymm8,%ymm15,%ymm15
6359	vpshufb	%ymm8,%ymm14,%ymm14
6360	vpshufb	%ymm8,%ymm13,%ymm13
6361	vpshufb	%ymm8,%ymm12,%ymm12
6362	vpaddd	%ymm15,%ymm11,%ymm11
6363	vpaddd	%ymm14,%ymm10,%ymm10
6364	vpaddd	%ymm13,%ymm9,%ymm9
6365	vpaddd	0+128(%rbp),%ymm12,%ymm8
6366	vpxor	%ymm11,%ymm7,%ymm7
6367	vpxor	%ymm10,%ymm6,%ymm6
6368	vpxor	%ymm9,%ymm5,%ymm5
6369	vpxor	%ymm8,%ymm4,%ymm4
6370	vmovdqa	%ymm8,0+128(%rbp)
6371	vpsrld	$25,%ymm7,%ymm8
6372	vpslld	$32-25,%ymm7,%ymm7
6373	vpxor	%ymm8,%ymm7,%ymm7
6374	vpsrld	$25,%ymm6,%ymm8
6375	vpslld	$32-25,%ymm6,%ymm6
6376	vpxor	%ymm8,%ymm6,%ymm6
6377	vpsrld	$25,%ymm5,%ymm8
6378	vpslld	$32-25,%ymm5,%ymm5
6379	vpxor	%ymm8,%ymm5,%ymm5
6380	vpsrld	$25,%ymm4,%ymm8
6381	vpslld	$32-25,%ymm4,%ymm4
6382	vpxor	%ymm8,%ymm4,%ymm4
6383	vmovdqa	0+128(%rbp),%ymm8
6384	vpalignr	$4,%ymm7,%ymm7,%ymm7
6385	vpalignr	$8,%ymm11,%ymm11,%ymm11
6386	vpalignr	$12,%ymm15,%ymm15,%ymm15
6387	vpalignr	$4,%ymm6,%ymm6,%ymm6
6388	vpalignr	$8,%ymm10,%ymm10,%ymm10
6389	vpalignr	$12,%ymm14,%ymm14,%ymm14
6390	vpalignr	$4,%ymm5,%ymm5,%ymm5
6391	vpalignr	$8,%ymm9,%ymm9,%ymm9
6392	vpalignr	$12,%ymm13,%ymm13,%ymm13
6393	vpalignr	$4,%ymm4,%ymm4,%ymm4
6394	vpalignr	$8,%ymm8,%ymm8,%ymm8
6395	vpalignr	$12,%ymm12,%ymm12,%ymm12
6396	vmovdqa	%ymm8,0+128(%rbp)
6397	vmovdqa	.Lrol16(%rip),%ymm8
6398	vpaddd	%ymm7,%ymm3,%ymm3
6399	vpaddd	%ymm6,%ymm2,%ymm2
6400	vpaddd	%ymm5,%ymm1,%ymm1
6401	vpaddd	%ymm4,%ymm0,%ymm0
6402	vpxor	%ymm3,%ymm15,%ymm15
6403	vpxor	%ymm2,%ymm14,%ymm14
6404	vpxor	%ymm1,%ymm13,%ymm13
6405	vpxor	%ymm0,%ymm12,%ymm12
6406	vpshufb	%ymm8,%ymm15,%ymm15
6407	vpshufb	%ymm8,%ymm14,%ymm14
6408	vpshufb	%ymm8,%ymm13,%ymm13
6409	vpshufb	%ymm8,%ymm12,%ymm12
6410	vpaddd	%ymm15,%ymm11,%ymm11
6411	vpaddd	%ymm14,%ymm10,%ymm10
6412	vpaddd	%ymm13,%ymm9,%ymm9
6413	vpaddd	0+128(%rbp),%ymm12,%ymm8
6414	vpxor	%ymm11,%ymm7,%ymm7
6415	vpxor	%ymm10,%ymm6,%ymm6
6416	vpxor	%ymm9,%ymm5,%ymm5
6417	vpxor	%ymm8,%ymm4,%ymm4
6418	vmovdqa	%ymm8,0+128(%rbp)
6419	vpsrld	$20,%ymm7,%ymm8
6420	vpslld	$32-20,%ymm7,%ymm7
6421	vpxor	%ymm8,%ymm7,%ymm7
6422	vpsrld	$20,%ymm6,%ymm8
6423	vpslld	$32-20,%ymm6,%ymm6
6424	vpxor	%ymm8,%ymm6,%ymm6
6425	vpsrld	$20,%ymm5,%ymm8
6426	vpslld	$32-20,%ymm5,%ymm5
6427	vpxor	%ymm8,%ymm5,%ymm5
6428	vpsrld	$20,%ymm4,%ymm8
6429	vpslld	$32-20,%ymm4,%ymm4
6430	vpxor	%ymm8,%ymm4,%ymm4
6431	vmovdqa	.Lrol8(%rip),%ymm8
6432	vpaddd	%ymm7,%ymm3,%ymm3
6433	vpaddd	%ymm6,%ymm2,%ymm2
6434	vpaddd	%ymm5,%ymm1,%ymm1
6435	vpaddd	%ymm4,%ymm0,%ymm0
6436	vpxor	%ymm3,%ymm15,%ymm15
6437	vpxor	%ymm2,%ymm14,%ymm14
6438	vpxor	%ymm1,%ymm13,%ymm13
6439	vpxor	%ymm0,%ymm12,%ymm12
6440	vpshufb	%ymm8,%ymm15,%ymm15
6441	vpshufb	%ymm8,%ymm14,%ymm14
6442	vpshufb	%ymm8,%ymm13,%ymm13
6443	vpshufb	%ymm8,%ymm12,%ymm12
6444	vpaddd	%ymm15,%ymm11,%ymm11
6445	vpaddd	%ymm14,%ymm10,%ymm10
6446	vpaddd	%ymm13,%ymm9,%ymm9
6447	vpaddd	0+128(%rbp),%ymm12,%ymm8
6448	vpxor	%ymm11,%ymm7,%ymm7
6449	vpxor	%ymm10,%ymm6,%ymm6
6450	vpxor	%ymm9,%ymm5,%ymm5
6451	vpxor	%ymm8,%ymm4,%ymm4
6452	vmovdqa	%ymm8,0+128(%rbp)
6453	vpsrld	$25,%ymm7,%ymm8
6454	vpslld	$32-25,%ymm7,%ymm7
6455	vpxor	%ymm8,%ymm7,%ymm7
6456	vpsrld	$25,%ymm6,%ymm8
6457	vpslld	$32-25,%ymm6,%ymm6
6458	vpxor	%ymm8,%ymm6,%ymm6
6459	vpsrld	$25,%ymm5,%ymm8
6460	vpslld	$32-25,%ymm5,%ymm5
6461	vpxor	%ymm8,%ymm5,%ymm5
6462	vpsrld	$25,%ymm4,%ymm8
6463	vpslld	$32-25,%ymm4,%ymm4
6464	vpxor	%ymm8,%ymm4,%ymm4
6465	vmovdqa	0+128(%rbp),%ymm8
6466	vpalignr	$12,%ymm7,%ymm7,%ymm7
6467	vpalignr	$8,%ymm11,%ymm11,%ymm11
6468	vpalignr	$4,%ymm15,%ymm15,%ymm15
6469	vpalignr	$12,%ymm6,%ymm6,%ymm6
6470	vpalignr	$8,%ymm10,%ymm10,%ymm10
6471	vpalignr	$4,%ymm14,%ymm14,%ymm14
6472	vpalignr	$12,%ymm5,%ymm5,%ymm5
6473	vpalignr	$8,%ymm9,%ymm9,%ymm9
6474	vpalignr	$4,%ymm13,%ymm13,%ymm13
6475	vpalignr	$12,%ymm4,%ymm4,%ymm4
6476	vpalignr	$8,%ymm8,%ymm8,%ymm8
6477	vpalignr	$4,%ymm12,%ymm12,%ymm12
6478
6479	decq	%r10
6480	jnz	.Lseal_avx2_init_rounds
6481	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
6482	vpaddd	0+64(%rbp),%ymm7,%ymm7
6483	vpaddd	0+96(%rbp),%ymm11,%ymm11
6484	vpaddd	0+256(%rbp),%ymm15,%ymm15
6485	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
6486	vpaddd	0+64(%rbp),%ymm6,%ymm6
6487	vpaddd	0+96(%rbp),%ymm10,%ymm10
6488	vpaddd	0+224(%rbp),%ymm14,%ymm14
6489	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
6490	vpaddd	0+64(%rbp),%ymm5,%ymm5
6491	vpaddd	0+96(%rbp),%ymm9,%ymm9
6492	vpaddd	0+192(%rbp),%ymm13,%ymm13
6493	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
6494	vpaddd	0+64(%rbp),%ymm4,%ymm4
6495	vpaddd	0+96(%rbp),%ymm8,%ymm8
6496	vpaddd	0+160(%rbp),%ymm12,%ymm12
6497
6498	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
6499	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
6500	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
6501	vpand	.Lclamp(%rip),%ymm15,%ymm15
6502	vmovdqa	%ymm15,0+0(%rbp)
6503	movq	%r8,%r8
6504	call	poly_hash_ad_internal
6505
6506	vpxor	0(%rsi),%ymm3,%ymm3
6507	vpxor	32(%rsi),%ymm11,%ymm11
6508	vmovdqu	%ymm3,0(%rdi)
6509	vmovdqu	%ymm11,32(%rdi)
6510	vperm2i128	$0x02,%ymm2,%ymm6,%ymm15
6511	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
6512	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
6513	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
6514	vpxor	0+64(%rsi),%ymm15,%ymm15
6515	vpxor	32+64(%rsi),%ymm2,%ymm2
6516	vpxor	64+64(%rsi),%ymm6,%ymm6
6517	vpxor	96+64(%rsi),%ymm10,%ymm10
6518	vmovdqu	%ymm15,0+64(%rdi)
6519	vmovdqu	%ymm2,32+64(%rdi)
6520	vmovdqu	%ymm6,64+64(%rdi)
6521	vmovdqu	%ymm10,96+64(%rdi)
6522	vperm2i128	$0x02,%ymm1,%ymm5,%ymm15
6523	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
6524	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
6525	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
6526	vpxor	0+192(%rsi),%ymm15,%ymm15
6527	vpxor	32+192(%rsi),%ymm1,%ymm1
6528	vpxor	64+192(%rsi),%ymm5,%ymm5
6529	vpxor	96+192(%rsi),%ymm9,%ymm9
6530	vmovdqu	%ymm15,0+192(%rdi)
6531	vmovdqu	%ymm1,32+192(%rdi)
6532	vmovdqu	%ymm5,64+192(%rdi)
6533	vmovdqu	%ymm9,96+192(%rdi)
6534	vperm2i128	$0x13,%ymm0,%ymm4,%ymm15
6535	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
6536	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
6537	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
6538	vmovdqa	%ymm15,%ymm8
6539
6540	leaq	320(%rsi),%rsi
6541	subq	$320,%rbx
6542	movq	$320,%rcx
6543	cmpq	$128,%rbx
6544	jbe	.Lseal_avx2_short_hash_remainder
6545	vpxor	0(%rsi),%ymm0,%ymm0
6546	vpxor	32(%rsi),%ymm4,%ymm4
6547	vpxor	64(%rsi),%ymm8,%ymm8
6548	vpxor	96(%rsi),%ymm12,%ymm12
6549	vmovdqu	%ymm0,320(%rdi)
6550	vmovdqu	%ymm4,352(%rdi)
6551	vmovdqu	%ymm8,384(%rdi)
6552	vmovdqu	%ymm12,416(%rdi)
6553	leaq	128(%rsi),%rsi
6554	subq	$128,%rbx
6555	movq	$8,%rcx
6556	movq	$2,%r8
6557	cmpq	$128,%rbx
6558	jbe	.Lseal_avx2_tail_128
6559	cmpq	$256,%rbx
6560	jbe	.Lseal_avx2_tail_256
6561	cmpq	$384,%rbx
6562	jbe	.Lseal_avx2_tail_384
6563	cmpq	$512,%rbx
6564	jbe	.Lseal_avx2_tail_512
6565	vmovdqa	.Lchacha20_consts(%rip),%ymm0
6566	vmovdqa	0+64(%rbp),%ymm4
6567	vmovdqa	0+96(%rbp),%ymm8
6568	vmovdqa	%ymm0,%ymm1
6569	vmovdqa	%ymm4,%ymm5
6570	vmovdqa	%ymm8,%ymm9
6571	vmovdqa	%ymm0,%ymm2
6572	vmovdqa	%ymm4,%ymm6
6573	vmovdqa	%ymm8,%ymm10
6574	vmovdqa	%ymm0,%ymm3
6575	vmovdqa	%ymm4,%ymm7
6576	vmovdqa	%ymm8,%ymm11
6577	vmovdqa	.Lavx2_inc(%rip),%ymm12
6578	vpaddd	0+160(%rbp),%ymm12,%ymm15
6579	vpaddd	%ymm15,%ymm12,%ymm14
6580	vpaddd	%ymm14,%ymm12,%ymm13
6581	vpaddd	%ymm13,%ymm12,%ymm12
6582	vmovdqa	%ymm15,0+256(%rbp)
6583	vmovdqa	%ymm14,0+224(%rbp)
6584	vmovdqa	%ymm13,0+192(%rbp)
6585	vmovdqa	%ymm12,0+160(%rbp)
6586	vmovdqa	%ymm8,0+128(%rbp)
6587	vmovdqa	.Lrol16(%rip),%ymm8
6588	vpaddd	%ymm7,%ymm3,%ymm3
6589	vpaddd	%ymm6,%ymm2,%ymm2
6590	vpaddd	%ymm5,%ymm1,%ymm1
6591	vpaddd	%ymm4,%ymm0,%ymm0
6592	vpxor	%ymm3,%ymm15,%ymm15
6593	vpxor	%ymm2,%ymm14,%ymm14
6594	vpxor	%ymm1,%ymm13,%ymm13
6595	vpxor	%ymm0,%ymm12,%ymm12
6596	vpshufb	%ymm8,%ymm15,%ymm15
6597	vpshufb	%ymm8,%ymm14,%ymm14
6598	vpshufb	%ymm8,%ymm13,%ymm13
6599	vpshufb	%ymm8,%ymm12,%ymm12
6600	vpaddd	%ymm15,%ymm11,%ymm11
6601	vpaddd	%ymm14,%ymm10,%ymm10
6602	vpaddd	%ymm13,%ymm9,%ymm9
6603	vpaddd	0+128(%rbp),%ymm12,%ymm8
6604	vpxor	%ymm11,%ymm7,%ymm7
6605	vpxor	%ymm10,%ymm6,%ymm6
6606	vpxor	%ymm9,%ymm5,%ymm5
6607	vpxor	%ymm8,%ymm4,%ymm4
6608	vmovdqa	%ymm8,0+128(%rbp)
6609	vpsrld	$20,%ymm7,%ymm8
6610	vpslld	$32-20,%ymm7,%ymm7
6611	vpxor	%ymm8,%ymm7,%ymm7
6612	vpsrld	$20,%ymm6,%ymm8
6613	vpslld	$32-20,%ymm6,%ymm6
6614	vpxor	%ymm8,%ymm6,%ymm6
6615	vpsrld	$20,%ymm5,%ymm8
6616	vpslld	$32-20,%ymm5,%ymm5
6617	vpxor	%ymm8,%ymm5,%ymm5
6618	vpsrld	$20,%ymm4,%ymm8
6619	vpslld	$32-20,%ymm4,%ymm4
6620	vpxor	%ymm8,%ymm4,%ymm4
6621	vmovdqa	.Lrol8(%rip),%ymm8
6622	vpaddd	%ymm7,%ymm3,%ymm3
6623	vpaddd	%ymm6,%ymm2,%ymm2
6624	vpaddd	%ymm5,%ymm1,%ymm1
6625	vpaddd	%ymm4,%ymm0,%ymm0
6626	vpxor	%ymm3,%ymm15,%ymm15
6627	vpxor	%ymm2,%ymm14,%ymm14
6628	vpxor	%ymm1,%ymm13,%ymm13
6629	vpxor	%ymm0,%ymm12,%ymm12
6630	vpshufb	%ymm8,%ymm15,%ymm15
6631	vpshufb	%ymm8,%ymm14,%ymm14
6632	vpshufb	%ymm8,%ymm13,%ymm13
6633	vpshufb	%ymm8,%ymm12,%ymm12
6634	vpaddd	%ymm15,%ymm11,%ymm11
6635	vpaddd	%ymm14,%ymm10,%ymm10
6636	vpaddd	%ymm13,%ymm9,%ymm9
6637	vpaddd	0+128(%rbp),%ymm12,%ymm8
6638	vpxor	%ymm11,%ymm7,%ymm7
6639	vpxor	%ymm10,%ymm6,%ymm6
6640	vpxor	%ymm9,%ymm5,%ymm5
6641	vpxor	%ymm8,%ymm4,%ymm4
6642	vmovdqa	%ymm8,0+128(%rbp)
6643	vpsrld	$25,%ymm7,%ymm8
6644	vpslld	$32-25,%ymm7,%ymm7
6645	vpxor	%ymm8,%ymm7,%ymm7
6646	vpsrld	$25,%ymm6,%ymm8
6647	vpslld	$32-25,%ymm6,%ymm6
6648	vpxor	%ymm8,%ymm6,%ymm6
6649	vpsrld	$25,%ymm5,%ymm8
6650	vpslld	$32-25,%ymm5,%ymm5
6651	vpxor	%ymm8,%ymm5,%ymm5
6652	vpsrld	$25,%ymm4,%ymm8
6653	vpslld	$32-25,%ymm4,%ymm4
6654	vpxor	%ymm8,%ymm4,%ymm4
6655	vmovdqa	0+128(%rbp),%ymm8
6656	vpalignr	$4,%ymm7,%ymm7,%ymm7
6657	vpalignr	$8,%ymm11,%ymm11,%ymm11
6658	vpalignr	$12,%ymm15,%ymm15,%ymm15
6659	vpalignr	$4,%ymm6,%ymm6,%ymm6
6660	vpalignr	$8,%ymm10,%ymm10,%ymm10
6661	vpalignr	$12,%ymm14,%ymm14,%ymm14
6662	vpalignr	$4,%ymm5,%ymm5,%ymm5
6663	vpalignr	$8,%ymm9,%ymm9,%ymm9
6664	vpalignr	$12,%ymm13,%ymm13,%ymm13
6665	vpalignr	$4,%ymm4,%ymm4,%ymm4
6666	vpalignr	$8,%ymm8,%ymm8,%ymm8
6667	vpalignr	$12,%ymm12,%ymm12,%ymm12
6668	vmovdqa	%ymm8,0+128(%rbp)
6669	vmovdqa	.Lrol16(%rip),%ymm8
6670	vpaddd	%ymm7,%ymm3,%ymm3
6671	vpaddd	%ymm6,%ymm2,%ymm2
6672	vpaddd	%ymm5,%ymm1,%ymm1
6673	vpaddd	%ymm4,%ymm0,%ymm0
6674	vpxor	%ymm3,%ymm15,%ymm15
6675	vpxor	%ymm2,%ymm14,%ymm14
6676	vpxor	%ymm1,%ymm13,%ymm13
6677	vpxor	%ymm0,%ymm12,%ymm12
6678	vpshufb	%ymm8,%ymm15,%ymm15
6679	vpshufb	%ymm8,%ymm14,%ymm14
6680	vpshufb	%ymm8,%ymm13,%ymm13
6681	vpshufb	%ymm8,%ymm12,%ymm12
6682	vpaddd	%ymm15,%ymm11,%ymm11
6683	vpaddd	%ymm14,%ymm10,%ymm10
6684	vpaddd	%ymm13,%ymm9,%ymm9
6685	vpaddd	0+128(%rbp),%ymm12,%ymm8
6686	vpxor	%ymm11,%ymm7,%ymm7
6687	vpxor	%ymm10,%ymm6,%ymm6
6688	vpxor	%ymm9,%ymm5,%ymm5
6689	vpxor	%ymm8,%ymm4,%ymm4
6690	vmovdqa	%ymm8,0+128(%rbp)
6691	vpsrld	$20,%ymm7,%ymm8
6692	vpslld	$32-20,%ymm7,%ymm7
6693	vpxor	%ymm8,%ymm7,%ymm7
6694	vpsrld	$20,%ymm6,%ymm8
6695	vpslld	$32-20,%ymm6,%ymm6
6696	vpxor	%ymm8,%ymm6,%ymm6
6697	vpsrld	$20,%ymm5,%ymm8
6698	vpslld	$32-20,%ymm5,%ymm5
6699	vpxor	%ymm8,%ymm5,%ymm5
6700	vpsrld	$20,%ymm4,%ymm8
6701	vpslld	$32-20,%ymm4,%ymm4
6702	vpxor	%ymm8,%ymm4,%ymm4
6703	vmovdqa	.Lrol8(%rip),%ymm8
6704	vpaddd	%ymm7,%ymm3,%ymm3
6705	vpaddd	%ymm6,%ymm2,%ymm2
6706	vpaddd	%ymm5,%ymm1,%ymm1
6707	vpaddd	%ymm4,%ymm0,%ymm0
6708	vpxor	%ymm3,%ymm15,%ymm15
6709	vpxor	%ymm2,%ymm14,%ymm14
6710	vpxor	%ymm1,%ymm13,%ymm13
6711	vpxor	%ymm0,%ymm12,%ymm12
6712	vpshufb	%ymm8,%ymm15,%ymm15
6713	vpshufb	%ymm8,%ymm14,%ymm14
6714	vpshufb	%ymm8,%ymm13,%ymm13
6715	vpshufb	%ymm8,%ymm12,%ymm12
6716	vpaddd	%ymm15,%ymm11,%ymm11
6717	vpaddd	%ymm14,%ymm10,%ymm10
6718	vpaddd	%ymm13,%ymm9,%ymm9
6719	vpaddd	0+128(%rbp),%ymm12,%ymm8
6720	vpxor	%ymm11,%ymm7,%ymm7
6721	vpxor	%ymm10,%ymm6,%ymm6
6722	vpxor	%ymm9,%ymm5,%ymm5
6723	vpxor	%ymm8,%ymm4,%ymm4
6724	vmovdqa	%ymm8,0+128(%rbp)
6725	vpsrld	$25,%ymm7,%ymm8
6726	vpslld	$32-25,%ymm7,%ymm7
6727	vpxor	%ymm8,%ymm7,%ymm7
6728	vpsrld	$25,%ymm6,%ymm8
6729	vpslld	$32-25,%ymm6,%ymm6
6730	vpxor	%ymm8,%ymm6,%ymm6
6731	vpsrld	$25,%ymm5,%ymm8
6732	vpslld	$32-25,%ymm5,%ymm5
6733	vpxor	%ymm8,%ymm5,%ymm5
6734	vpsrld	$25,%ymm4,%ymm8
6735	vpslld	$32-25,%ymm4,%ymm4
6736	vpxor	%ymm8,%ymm4,%ymm4
6737	vmovdqa	0+128(%rbp),%ymm8
6738	vpalignr	$12,%ymm7,%ymm7,%ymm7
6739	vpalignr	$8,%ymm11,%ymm11,%ymm11
6740	vpalignr	$4,%ymm15,%ymm15,%ymm15
6741	vpalignr	$12,%ymm6,%ymm6,%ymm6
6742	vpalignr	$8,%ymm10,%ymm10,%ymm10
6743	vpalignr	$4,%ymm14,%ymm14,%ymm14
6744	vpalignr	$12,%ymm5,%ymm5,%ymm5
6745	vpalignr	$8,%ymm9,%ymm9,%ymm9
6746	vpalignr	$4,%ymm13,%ymm13,%ymm13
6747	vpalignr	$12,%ymm4,%ymm4,%ymm4
6748	vpalignr	$8,%ymm8,%ymm8,%ymm8
6749	vpalignr	$4,%ymm12,%ymm12,%ymm12
6750	vmovdqa	%ymm8,0+128(%rbp)
6751	vmovdqa	.Lrol16(%rip),%ymm8
6752	vpaddd	%ymm7,%ymm3,%ymm3
6753	vpaddd	%ymm6,%ymm2,%ymm2
6754	vpaddd	%ymm5,%ymm1,%ymm1
6755	vpaddd	%ymm4,%ymm0,%ymm0
6756	vpxor	%ymm3,%ymm15,%ymm15
6757	vpxor	%ymm2,%ymm14,%ymm14
6758	vpxor	%ymm1,%ymm13,%ymm13
6759	vpxor	%ymm0,%ymm12,%ymm12
6760	vpshufb	%ymm8,%ymm15,%ymm15
6761	vpshufb	%ymm8,%ymm14,%ymm14
6762	vpshufb	%ymm8,%ymm13,%ymm13
6763	vpshufb	%ymm8,%ymm12,%ymm12
6764	vpaddd	%ymm15,%ymm11,%ymm11
6765	vpaddd	%ymm14,%ymm10,%ymm10
6766	vpaddd	%ymm13,%ymm9,%ymm9
6767	vpaddd	0+128(%rbp),%ymm12,%ymm8
6768	vpxor	%ymm11,%ymm7,%ymm7
6769	vpxor	%ymm10,%ymm6,%ymm6
6770	vpxor	%ymm9,%ymm5,%ymm5
6771	vpxor	%ymm8,%ymm4,%ymm4
6772	vmovdqa	%ymm8,0+128(%rbp)
6773	vpsrld	$20,%ymm7,%ymm8
6774	vpslld	$32-20,%ymm7,%ymm7
6775	vpxor	%ymm8,%ymm7,%ymm7
6776	vpsrld	$20,%ymm6,%ymm8
6777	vpslld	$32-20,%ymm6,%ymm6
6778	vpxor	%ymm8,%ymm6,%ymm6
6779	vpsrld	$20,%ymm5,%ymm8
6780	vpslld	$32-20,%ymm5,%ymm5
6781	vpxor	%ymm8,%ymm5,%ymm5
6782	vpsrld	$20,%ymm4,%ymm8
6783	vpslld	$32-20,%ymm4,%ymm4
6784	vpxor	%ymm8,%ymm4,%ymm4
6785	vmovdqa	.Lrol8(%rip),%ymm8
6786	vpaddd	%ymm7,%ymm3,%ymm3
6787	vpaddd	%ymm6,%ymm2,%ymm2
6788	vpaddd	%ymm5,%ymm1,%ymm1
6789	vpaddd	%ymm4,%ymm0,%ymm0
6790	vpxor	%ymm3,%ymm15,%ymm15
6791
6792	subq	$16,%rdi
6793	movq	$9,%rcx
6794	jmp	.Lseal_avx2_main_loop_rounds_entry
6795.align	32
6796.Lseal_avx2_main_loop:
6797	vmovdqa	.Lchacha20_consts(%rip),%ymm0
6798	vmovdqa	0+64(%rbp),%ymm4
6799	vmovdqa	0+96(%rbp),%ymm8
6800	vmovdqa	%ymm0,%ymm1
6801	vmovdqa	%ymm4,%ymm5
6802	vmovdqa	%ymm8,%ymm9
6803	vmovdqa	%ymm0,%ymm2
6804	vmovdqa	%ymm4,%ymm6
6805	vmovdqa	%ymm8,%ymm10
6806	vmovdqa	%ymm0,%ymm3
6807	vmovdqa	%ymm4,%ymm7
6808	vmovdqa	%ymm8,%ymm11
6809	vmovdqa	.Lavx2_inc(%rip),%ymm12
6810	vpaddd	0+160(%rbp),%ymm12,%ymm15
6811	vpaddd	%ymm15,%ymm12,%ymm14
6812	vpaddd	%ymm14,%ymm12,%ymm13
6813	vpaddd	%ymm13,%ymm12,%ymm12
6814	vmovdqa	%ymm15,0+256(%rbp)
6815	vmovdqa	%ymm14,0+224(%rbp)
6816	vmovdqa	%ymm13,0+192(%rbp)
6817	vmovdqa	%ymm12,0+160(%rbp)
6818
6819	movq	$10,%rcx
6820.align	32
6821.Lseal_avx2_main_loop_rounds:
6822	addq	0+0(%rdi),%r10
6823	adcq	8+0(%rdi),%r11
6824	adcq	$1,%r12
6825	vmovdqa	%ymm8,0+128(%rbp)
6826	vmovdqa	.Lrol16(%rip),%ymm8
6827	vpaddd	%ymm7,%ymm3,%ymm3
6828	vpaddd	%ymm6,%ymm2,%ymm2
6829	vpaddd	%ymm5,%ymm1,%ymm1
6830	vpaddd	%ymm4,%ymm0,%ymm0
6831	vpxor	%ymm3,%ymm15,%ymm15
6832	vpxor	%ymm2,%ymm14,%ymm14
6833	vpxor	%ymm1,%ymm13,%ymm13
6834	vpxor	%ymm0,%ymm12,%ymm12
6835	movq	0+0+0(%rbp),%rdx
6836	movq	%rdx,%r15
6837	mulxq	%r10,%r13,%r14
6838	mulxq	%r11,%rax,%rdx
6839	imulq	%r12,%r15
6840	addq	%rax,%r14
6841	adcq	%rdx,%r15
6842	vpshufb	%ymm8,%ymm15,%ymm15
6843	vpshufb	%ymm8,%ymm14,%ymm14
6844	vpshufb	%ymm8,%ymm13,%ymm13
6845	vpshufb	%ymm8,%ymm12,%ymm12
6846	vpaddd	%ymm15,%ymm11,%ymm11
6847	vpaddd	%ymm14,%ymm10,%ymm10
6848	vpaddd	%ymm13,%ymm9,%ymm9
6849	vpaddd	0+128(%rbp),%ymm12,%ymm8
6850	vpxor	%ymm11,%ymm7,%ymm7
6851	movq	8+0+0(%rbp),%rdx
6852	mulxq	%r10,%r10,%rax
6853	addq	%r10,%r14
6854	mulxq	%r11,%r11,%r9
6855	adcq	%r11,%r15
6856	adcq	$0,%r9
6857	imulq	%r12,%rdx
6858	vpxor	%ymm10,%ymm6,%ymm6
6859	vpxor	%ymm9,%ymm5,%ymm5
6860	vpxor	%ymm8,%ymm4,%ymm4
6861	vmovdqa	%ymm8,0+128(%rbp)
6862	vpsrld	$20,%ymm7,%ymm8
6863	vpslld	$32-20,%ymm7,%ymm7
6864	vpxor	%ymm8,%ymm7,%ymm7
6865	vpsrld	$20,%ymm6,%ymm8
6866	vpslld	$32-20,%ymm6,%ymm6
6867	vpxor	%ymm8,%ymm6,%ymm6
6868	vpsrld	$20,%ymm5,%ymm8
6869	vpslld	$32-20,%ymm5,%ymm5
6870	addq	%rax,%r15
6871	adcq	%rdx,%r9
6872	vpxor	%ymm8,%ymm5,%ymm5
6873	vpsrld	$20,%ymm4,%ymm8
6874	vpslld	$32-20,%ymm4,%ymm4
6875	vpxor	%ymm8,%ymm4,%ymm4
6876	vmovdqa	.Lrol8(%rip),%ymm8
6877	vpaddd	%ymm7,%ymm3,%ymm3
6878	vpaddd	%ymm6,%ymm2,%ymm2
6879	vpaddd	%ymm5,%ymm1,%ymm1
6880	vpaddd	%ymm4,%ymm0,%ymm0
6881	vpxor	%ymm3,%ymm15,%ymm15
6882	movq	%r13,%r10
6883	movq	%r14,%r11
6884	movq	%r15,%r12
6885	andq	$3,%r12
6886	movq	%r15,%r13
6887	andq	$-4,%r13
6888	movq	%r9,%r14
6889	shrdq	$2,%r9,%r15
6890	shrq	$2,%r9
6891	addq	%r13,%r15
6892	adcq	%r14,%r9
6893	addq	%r15,%r10
6894	adcq	%r9,%r11
6895	adcq	$0,%r12
6896
6897.Lseal_avx2_main_loop_rounds_entry:
6898	vpxor	%ymm2,%ymm14,%ymm14
6899	vpxor	%ymm1,%ymm13,%ymm13
6900	vpxor	%ymm0,%ymm12,%ymm12
6901	vpshufb	%ymm8,%ymm15,%ymm15
6902	vpshufb	%ymm8,%ymm14,%ymm14
6903	vpshufb	%ymm8,%ymm13,%ymm13
6904	vpshufb	%ymm8,%ymm12,%ymm12
6905	vpaddd	%ymm15,%ymm11,%ymm11
6906	vpaddd	%ymm14,%ymm10,%ymm10
6907	addq	0+16(%rdi),%r10
6908	adcq	8+16(%rdi),%r11
6909	adcq	$1,%r12
6910	vpaddd	%ymm13,%ymm9,%ymm9
6911	vpaddd	0+128(%rbp),%ymm12,%ymm8
6912	vpxor	%ymm11,%ymm7,%ymm7
6913	vpxor	%ymm10,%ymm6,%ymm6
6914	vpxor	%ymm9,%ymm5,%ymm5
6915	vpxor	%ymm8,%ymm4,%ymm4
6916	vmovdqa	%ymm8,0+128(%rbp)
6917	vpsrld	$25,%ymm7,%ymm8
6918	movq	0+0+0(%rbp),%rdx
6919	movq	%rdx,%r15
6920	mulxq	%r10,%r13,%r14
6921	mulxq	%r11,%rax,%rdx
6922	imulq	%r12,%r15
6923	addq	%rax,%r14
6924	adcq	%rdx,%r15
6925	vpslld	$32-25,%ymm7,%ymm7
6926	vpxor	%ymm8,%ymm7,%ymm7
6927	vpsrld	$25,%ymm6,%ymm8
6928	vpslld	$32-25,%ymm6,%ymm6
6929	vpxor	%ymm8,%ymm6,%ymm6
6930	vpsrld	$25,%ymm5,%ymm8
6931	vpslld	$32-25,%ymm5,%ymm5
6932	vpxor	%ymm8,%ymm5,%ymm5
6933	vpsrld	$25,%ymm4,%ymm8
6934	vpslld	$32-25,%ymm4,%ymm4
6935	vpxor	%ymm8,%ymm4,%ymm4
6936	vmovdqa	0+128(%rbp),%ymm8
6937	vpalignr	$4,%ymm7,%ymm7,%ymm7
6938	vpalignr	$8,%ymm11,%ymm11,%ymm11
6939	vpalignr	$12,%ymm15,%ymm15,%ymm15
6940	vpalignr	$4,%ymm6,%ymm6,%ymm6
6941	vpalignr	$8,%ymm10,%ymm10,%ymm10
6942	vpalignr	$12,%ymm14,%ymm14,%ymm14
6943	movq	8+0+0(%rbp),%rdx
6944	mulxq	%r10,%r10,%rax
6945	addq	%r10,%r14
6946	mulxq	%r11,%r11,%r9
6947	adcq	%r11,%r15
6948	adcq	$0,%r9
6949	imulq	%r12,%rdx
6950	vpalignr	$4,%ymm5,%ymm5,%ymm5
6951	vpalignr	$8,%ymm9,%ymm9,%ymm9
6952	vpalignr	$12,%ymm13,%ymm13,%ymm13
6953	vpalignr	$4,%ymm4,%ymm4,%ymm4
6954	vpalignr	$8,%ymm8,%ymm8,%ymm8
6955	vpalignr	$12,%ymm12,%ymm12,%ymm12
6956	vmovdqa	%ymm8,0+128(%rbp)
6957	vmovdqa	.Lrol16(%rip),%ymm8
6958	vpaddd	%ymm7,%ymm3,%ymm3
6959	vpaddd	%ymm6,%ymm2,%ymm2
6960	vpaddd	%ymm5,%ymm1,%ymm1
6961	vpaddd	%ymm4,%ymm0,%ymm0
6962	vpxor	%ymm3,%ymm15,%ymm15
6963	vpxor	%ymm2,%ymm14,%ymm14
6964	vpxor	%ymm1,%ymm13,%ymm13
6965	vpxor	%ymm0,%ymm12,%ymm12
6966	vpshufb	%ymm8,%ymm15,%ymm15
6967	vpshufb	%ymm8,%ymm14,%ymm14
6968	addq	%rax,%r15
6969	adcq	%rdx,%r9
6970	vpshufb	%ymm8,%ymm13,%ymm13
6971	vpshufb	%ymm8,%ymm12,%ymm12
6972	vpaddd	%ymm15,%ymm11,%ymm11
6973	vpaddd	%ymm14,%ymm10,%ymm10
6974	vpaddd	%ymm13,%ymm9,%ymm9
6975	vpaddd	0+128(%rbp),%ymm12,%ymm8
6976	vpxor	%ymm11,%ymm7,%ymm7
6977	vpxor	%ymm10,%ymm6,%ymm6
6978	vpxor	%ymm9,%ymm5,%ymm5
6979	movq	%r13,%r10
6980	movq	%r14,%r11
6981	movq	%r15,%r12
6982	andq	$3,%r12
6983	movq	%r15,%r13
6984	andq	$-4,%r13
6985	movq	%r9,%r14
6986	shrdq	$2,%r9,%r15
6987	shrq	$2,%r9
6988	addq	%r13,%r15
6989	adcq	%r14,%r9
6990	addq	%r15,%r10
6991	adcq	%r9,%r11
6992	adcq	$0,%r12
6993	vpxor	%ymm8,%ymm4,%ymm4
6994	vmovdqa	%ymm8,0+128(%rbp)
6995	vpsrld	$20,%ymm7,%ymm8
6996	vpslld	$32-20,%ymm7,%ymm7
6997	vpxor	%ymm8,%ymm7,%ymm7
6998	vpsrld	$20,%ymm6,%ymm8
6999	vpslld	$32-20,%ymm6,%ymm6
7000	vpxor	%ymm8,%ymm6,%ymm6
7001	addq	0+32(%rdi),%r10
7002	adcq	8+32(%rdi),%r11
7003	adcq	$1,%r12
7004
7005	leaq	48(%rdi),%rdi
7006	vpsrld	$20,%ymm5,%ymm8
7007	vpslld	$32-20,%ymm5,%ymm5
7008	vpxor	%ymm8,%ymm5,%ymm5
7009	vpsrld	$20,%ymm4,%ymm8
7010	vpslld	$32-20,%ymm4,%ymm4
7011	vpxor	%ymm8,%ymm4,%ymm4
7012	vmovdqa	.Lrol8(%rip),%ymm8
7013	vpaddd	%ymm7,%ymm3,%ymm3
7014	vpaddd	%ymm6,%ymm2,%ymm2
7015	vpaddd	%ymm5,%ymm1,%ymm1
7016	vpaddd	%ymm4,%ymm0,%ymm0
7017	vpxor	%ymm3,%ymm15,%ymm15
7018	vpxor	%ymm2,%ymm14,%ymm14
7019	vpxor	%ymm1,%ymm13,%ymm13
7020	vpxor	%ymm0,%ymm12,%ymm12
7021	vpshufb	%ymm8,%ymm15,%ymm15
7022	vpshufb	%ymm8,%ymm14,%ymm14
7023	vpshufb	%ymm8,%ymm13,%ymm13
7024	movq	0+0+0(%rbp),%rdx
7025	movq	%rdx,%r15
7026	mulxq	%r10,%r13,%r14
7027	mulxq	%r11,%rax,%rdx
7028	imulq	%r12,%r15
7029	addq	%rax,%r14
7030	adcq	%rdx,%r15
7031	vpshufb	%ymm8,%ymm12,%ymm12
7032	vpaddd	%ymm15,%ymm11,%ymm11
7033	vpaddd	%ymm14,%ymm10,%ymm10
7034	vpaddd	%ymm13,%ymm9,%ymm9
7035	vpaddd	0+128(%rbp),%ymm12,%ymm8
7036	vpxor	%ymm11,%ymm7,%ymm7
7037	vpxor	%ymm10,%ymm6,%ymm6
7038	vpxor	%ymm9,%ymm5,%ymm5
7039	movq	8+0+0(%rbp),%rdx
7040	mulxq	%r10,%r10,%rax
7041	addq	%r10,%r14
7042	mulxq	%r11,%r11,%r9
7043	adcq	%r11,%r15
7044	adcq	$0,%r9
7045	imulq	%r12,%rdx
7046	vpxor	%ymm8,%ymm4,%ymm4
7047	vmovdqa	%ymm8,0+128(%rbp)
7048	vpsrld	$25,%ymm7,%ymm8
7049	vpslld	$32-25,%ymm7,%ymm7
7050	vpxor	%ymm8,%ymm7,%ymm7
7051	vpsrld	$25,%ymm6,%ymm8
7052	vpslld	$32-25,%ymm6,%ymm6
7053	vpxor	%ymm8,%ymm6,%ymm6
7054	addq	%rax,%r15
7055	adcq	%rdx,%r9
7056	vpsrld	$25,%ymm5,%ymm8
7057	vpslld	$32-25,%ymm5,%ymm5
7058	vpxor	%ymm8,%ymm5,%ymm5
7059	vpsrld	$25,%ymm4,%ymm8
7060	vpslld	$32-25,%ymm4,%ymm4
7061	vpxor	%ymm8,%ymm4,%ymm4
7062	vmovdqa	0+128(%rbp),%ymm8
7063	vpalignr	$12,%ymm7,%ymm7,%ymm7
7064	vpalignr	$8,%ymm11,%ymm11,%ymm11
7065	vpalignr	$4,%ymm15,%ymm15,%ymm15
7066	vpalignr	$12,%ymm6,%ymm6,%ymm6
7067	vpalignr	$8,%ymm10,%ymm10,%ymm10
7068	vpalignr	$4,%ymm14,%ymm14,%ymm14
7069	vpalignr	$12,%ymm5,%ymm5,%ymm5
7070	vpalignr	$8,%ymm9,%ymm9,%ymm9
7071	vpalignr	$4,%ymm13,%ymm13,%ymm13
7072	vpalignr	$12,%ymm4,%ymm4,%ymm4
7073	vpalignr	$8,%ymm8,%ymm8,%ymm8
7074	movq	%r13,%r10
7075	movq	%r14,%r11
7076	movq	%r15,%r12
7077	andq	$3,%r12
7078	movq	%r15,%r13
7079	andq	$-4,%r13
7080	movq	%r9,%r14
7081	shrdq	$2,%r9,%r15
7082	shrq	$2,%r9
7083	addq	%r13,%r15
7084	adcq	%r14,%r9
7085	addq	%r15,%r10
7086	adcq	%r9,%r11
7087	adcq	$0,%r12
7088	vpalignr	$4,%ymm12,%ymm12,%ymm12
7089
7090	decq	%rcx
7091	jne	.Lseal_avx2_main_loop_rounds
7092	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
7093	vpaddd	0+64(%rbp),%ymm7,%ymm7
7094	vpaddd	0+96(%rbp),%ymm11,%ymm11
7095	vpaddd	0+256(%rbp),%ymm15,%ymm15
7096	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
7097	vpaddd	0+64(%rbp),%ymm6,%ymm6
7098	vpaddd	0+96(%rbp),%ymm10,%ymm10
7099	vpaddd	0+224(%rbp),%ymm14,%ymm14
7100	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
7101	vpaddd	0+64(%rbp),%ymm5,%ymm5
7102	vpaddd	0+96(%rbp),%ymm9,%ymm9
7103	vpaddd	0+192(%rbp),%ymm13,%ymm13
7104	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
7105	vpaddd	0+64(%rbp),%ymm4,%ymm4
7106	vpaddd	0+96(%rbp),%ymm8,%ymm8
7107	vpaddd	0+160(%rbp),%ymm12,%ymm12
7108
7109	vmovdqa	%ymm0,0+128(%rbp)
7110	addq	0+0(%rdi),%r10
7111	adcq	8+0(%rdi),%r11
7112	adcq	$1,%r12
7113	movq	0+0+0(%rbp),%rdx
7114	movq	%rdx,%r15
7115	mulxq	%r10,%r13,%r14
7116	mulxq	%r11,%rax,%rdx
7117	imulq	%r12,%r15
7118	addq	%rax,%r14
7119	adcq	%rdx,%r15
7120	movq	8+0+0(%rbp),%rdx
7121	mulxq	%r10,%r10,%rax
7122	addq	%r10,%r14
7123	mulxq	%r11,%r11,%r9
7124	adcq	%r11,%r15
7125	adcq	$0,%r9
7126	imulq	%r12,%rdx
7127	addq	%rax,%r15
7128	adcq	%rdx,%r9
7129	movq	%r13,%r10
7130	movq	%r14,%r11
7131	movq	%r15,%r12
7132	andq	$3,%r12
7133	movq	%r15,%r13
7134	andq	$-4,%r13
7135	movq	%r9,%r14
7136	shrdq	$2,%r9,%r15
7137	shrq	$2,%r9
7138	addq	%r13,%r15
7139	adcq	%r14,%r9
7140	addq	%r15,%r10
7141	adcq	%r9,%r11
7142	adcq	$0,%r12
7143	addq	0+16(%rdi),%r10
7144	adcq	8+16(%rdi),%r11
7145	adcq	$1,%r12
7146	movq	0+0+0(%rbp),%rdx
7147	movq	%rdx,%r15
7148	mulxq	%r10,%r13,%r14
7149	mulxq	%r11,%rax,%rdx
7150	imulq	%r12,%r15
7151	addq	%rax,%r14
7152	adcq	%rdx,%r15
7153	movq	8+0+0(%rbp),%rdx
7154	mulxq	%r10,%r10,%rax
7155	addq	%r10,%r14
7156	mulxq	%r11,%r11,%r9
7157	adcq	%r11,%r15
7158	adcq	$0,%r9
7159	imulq	%r12,%rdx
7160	addq	%rax,%r15
7161	adcq	%rdx,%r9
7162	movq	%r13,%r10
7163	movq	%r14,%r11
7164	movq	%r15,%r12
7165	andq	$3,%r12
7166	movq	%r15,%r13
7167	andq	$-4,%r13
7168	movq	%r9,%r14
7169	shrdq	$2,%r9,%r15
7170	shrq	$2,%r9
7171	addq	%r13,%r15
7172	adcq	%r14,%r9
7173	addq	%r15,%r10
7174	adcq	%r9,%r11
7175	adcq	$0,%r12
7176
7177	leaq	32(%rdi),%rdi
7178	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
7179	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
7180	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
7181	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
7182	vpxor	0+0(%rsi),%ymm0,%ymm0
7183	vpxor	32+0(%rsi),%ymm3,%ymm3
7184	vpxor	64+0(%rsi),%ymm7,%ymm7
7185	vpxor	96+0(%rsi),%ymm11,%ymm11
7186	vmovdqu	%ymm0,0+0(%rdi)
7187	vmovdqu	%ymm3,32+0(%rdi)
7188	vmovdqu	%ymm7,64+0(%rdi)
7189	vmovdqu	%ymm11,96+0(%rdi)
7190
7191	vmovdqa	0+128(%rbp),%ymm0
7192	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
7193	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
7194	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
7195	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
7196	vpxor	0+128(%rsi),%ymm3,%ymm3
7197	vpxor	32+128(%rsi),%ymm2,%ymm2
7198	vpxor	64+128(%rsi),%ymm6,%ymm6
7199	vpxor	96+128(%rsi),%ymm10,%ymm10
7200	vmovdqu	%ymm3,0+128(%rdi)
7201	vmovdqu	%ymm2,32+128(%rdi)
7202	vmovdqu	%ymm6,64+128(%rdi)
7203	vmovdqu	%ymm10,96+128(%rdi)
7204	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7205	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7206	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7207	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7208	vpxor	0+256(%rsi),%ymm3,%ymm3
7209	vpxor	32+256(%rsi),%ymm1,%ymm1
7210	vpxor	64+256(%rsi),%ymm5,%ymm5
7211	vpxor	96+256(%rsi),%ymm9,%ymm9
7212	vmovdqu	%ymm3,0+256(%rdi)
7213	vmovdqu	%ymm1,32+256(%rdi)
7214	vmovdqu	%ymm5,64+256(%rdi)
7215	vmovdqu	%ymm9,96+256(%rdi)
7216	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
7217	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
7218	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
7219	vperm2i128	$0x13,%ymm8,%ymm12,%ymm8
7220	vpxor	0+384(%rsi),%ymm3,%ymm3
7221	vpxor	32+384(%rsi),%ymm0,%ymm0
7222	vpxor	64+384(%rsi),%ymm4,%ymm4
7223	vpxor	96+384(%rsi),%ymm8,%ymm8
7224	vmovdqu	%ymm3,0+384(%rdi)
7225	vmovdqu	%ymm0,32+384(%rdi)
7226	vmovdqu	%ymm4,64+384(%rdi)
7227	vmovdqu	%ymm8,96+384(%rdi)
7228
7229	leaq	512(%rsi),%rsi
7230	subq	$512,%rbx
7231	cmpq	$512,%rbx
7232	jg	.Lseal_avx2_main_loop
7233
7234	addq	0+0(%rdi),%r10
7235	adcq	8+0(%rdi),%r11
7236	adcq	$1,%r12
7237	movq	0+0+0(%rbp),%rdx
7238	movq	%rdx,%r15
7239	mulxq	%r10,%r13,%r14
7240	mulxq	%r11,%rax,%rdx
7241	imulq	%r12,%r15
7242	addq	%rax,%r14
7243	adcq	%rdx,%r15
7244	movq	8+0+0(%rbp),%rdx
7245	mulxq	%r10,%r10,%rax
7246	addq	%r10,%r14
7247	mulxq	%r11,%r11,%r9
7248	adcq	%r11,%r15
7249	adcq	$0,%r9
7250	imulq	%r12,%rdx
7251	addq	%rax,%r15
7252	adcq	%rdx,%r9
7253	movq	%r13,%r10
7254	movq	%r14,%r11
7255	movq	%r15,%r12
7256	andq	$3,%r12
7257	movq	%r15,%r13
7258	andq	$-4,%r13
7259	movq	%r9,%r14
7260	shrdq	$2,%r9,%r15
7261	shrq	$2,%r9
7262	addq	%r13,%r15
7263	adcq	%r14,%r9
7264	addq	%r15,%r10
7265	adcq	%r9,%r11
7266	adcq	$0,%r12
7267	addq	0+16(%rdi),%r10
7268	adcq	8+16(%rdi),%r11
7269	adcq	$1,%r12
7270	movq	0+0+0(%rbp),%rdx
7271	movq	%rdx,%r15
7272	mulxq	%r10,%r13,%r14
7273	mulxq	%r11,%rax,%rdx
7274	imulq	%r12,%r15
7275	addq	%rax,%r14
7276	adcq	%rdx,%r15
7277	movq	8+0+0(%rbp),%rdx
7278	mulxq	%r10,%r10,%rax
7279	addq	%r10,%r14
7280	mulxq	%r11,%r11,%r9
7281	adcq	%r11,%r15
7282	adcq	$0,%r9
7283	imulq	%r12,%rdx
7284	addq	%rax,%r15
7285	adcq	%rdx,%r9
7286	movq	%r13,%r10
7287	movq	%r14,%r11
7288	movq	%r15,%r12
7289	andq	$3,%r12
7290	movq	%r15,%r13
7291	andq	$-4,%r13
7292	movq	%r9,%r14
7293	shrdq	$2,%r9,%r15
7294	shrq	$2,%r9
7295	addq	%r13,%r15
7296	adcq	%r14,%r9
7297	addq	%r15,%r10
7298	adcq	%r9,%r11
7299	adcq	$0,%r12
7300
7301	leaq	32(%rdi),%rdi
7302	movq	$10,%rcx
7303	xorq	%r8,%r8
7304
7305	cmpq	$384,%rbx
7306	ja	.Lseal_avx2_tail_512
7307	cmpq	$256,%rbx
7308	ja	.Lseal_avx2_tail_384
7309	cmpq	$128,%rbx
7310	ja	.Lseal_avx2_tail_256
7311
7312.Lseal_avx2_tail_128:
7313	vmovdqa	.Lchacha20_consts(%rip),%ymm0
7314	vmovdqa	0+64(%rbp),%ymm4
7315	vmovdqa	0+96(%rbp),%ymm8
7316	vmovdqa	.Lavx2_inc(%rip),%ymm12
7317	vpaddd	0+160(%rbp),%ymm12,%ymm12
7318	vmovdqa	%ymm12,0+160(%rbp)
7319
7320.Lseal_avx2_tail_128_rounds_and_3xhash:
7321	addq	0+0(%rdi),%r10
7322	adcq	8+0(%rdi),%r11
7323	adcq	$1,%r12
7324	movq	0+0+0(%rbp),%rdx
7325	movq	%rdx,%r15
7326	mulxq	%r10,%r13,%r14
7327	mulxq	%r11,%rax,%rdx
7328	imulq	%r12,%r15
7329	addq	%rax,%r14
7330	adcq	%rdx,%r15
7331	movq	8+0+0(%rbp),%rdx
7332	mulxq	%r10,%r10,%rax
7333	addq	%r10,%r14
7334	mulxq	%r11,%r11,%r9
7335	adcq	%r11,%r15
7336	adcq	$0,%r9
7337	imulq	%r12,%rdx
7338	addq	%rax,%r15
7339	adcq	%rdx,%r9
7340	movq	%r13,%r10
7341	movq	%r14,%r11
7342	movq	%r15,%r12
7343	andq	$3,%r12
7344	movq	%r15,%r13
7345	andq	$-4,%r13
7346	movq	%r9,%r14
7347	shrdq	$2,%r9,%r15
7348	shrq	$2,%r9
7349	addq	%r13,%r15
7350	adcq	%r14,%r9
7351	addq	%r15,%r10
7352	adcq	%r9,%r11
7353	adcq	$0,%r12
7354
7355	leaq	16(%rdi),%rdi
7356.Lseal_avx2_tail_128_rounds_and_2xhash:
7357	vpaddd	%ymm4,%ymm0,%ymm0
7358	vpxor	%ymm0,%ymm12,%ymm12
7359	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7360	vpaddd	%ymm12,%ymm8,%ymm8
7361	vpxor	%ymm8,%ymm4,%ymm4
7362	vpsrld	$20,%ymm4,%ymm3
7363	vpslld	$12,%ymm4,%ymm4
7364	vpxor	%ymm3,%ymm4,%ymm4
7365	vpaddd	%ymm4,%ymm0,%ymm0
7366	vpxor	%ymm0,%ymm12,%ymm12
7367	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7368	vpaddd	%ymm12,%ymm8,%ymm8
7369	vpxor	%ymm8,%ymm4,%ymm4
7370	vpslld	$7,%ymm4,%ymm3
7371	vpsrld	$25,%ymm4,%ymm4
7372	vpxor	%ymm3,%ymm4,%ymm4
7373	vpalignr	$12,%ymm12,%ymm12,%ymm12
7374	vpalignr	$8,%ymm8,%ymm8,%ymm8
7375	vpalignr	$4,%ymm4,%ymm4,%ymm4
7376	addq	0+0(%rdi),%r10
7377	adcq	8+0(%rdi),%r11
7378	adcq	$1,%r12
7379	movq	0+0+0(%rbp),%rdx
7380	movq	%rdx,%r15
7381	mulxq	%r10,%r13,%r14
7382	mulxq	%r11,%rax,%rdx
7383	imulq	%r12,%r15
7384	addq	%rax,%r14
7385	adcq	%rdx,%r15
7386	movq	8+0+0(%rbp),%rdx
7387	mulxq	%r10,%r10,%rax
7388	addq	%r10,%r14
7389	mulxq	%r11,%r11,%r9
7390	adcq	%r11,%r15
7391	adcq	$0,%r9
7392	imulq	%r12,%rdx
7393	addq	%rax,%r15
7394	adcq	%rdx,%r9
7395	movq	%r13,%r10
7396	movq	%r14,%r11
7397	movq	%r15,%r12
7398	andq	$3,%r12
7399	movq	%r15,%r13
7400	andq	$-4,%r13
7401	movq	%r9,%r14
7402	shrdq	$2,%r9,%r15
7403	shrq	$2,%r9
7404	addq	%r13,%r15
7405	adcq	%r14,%r9
7406	addq	%r15,%r10
7407	adcq	%r9,%r11
7408	adcq	$0,%r12
7409	vpaddd	%ymm4,%ymm0,%ymm0
7410	vpxor	%ymm0,%ymm12,%ymm12
7411	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7412	vpaddd	%ymm12,%ymm8,%ymm8
7413	vpxor	%ymm8,%ymm4,%ymm4
7414	vpsrld	$20,%ymm4,%ymm3
7415	vpslld	$12,%ymm4,%ymm4
7416	vpxor	%ymm3,%ymm4,%ymm4
7417	vpaddd	%ymm4,%ymm0,%ymm0
7418	vpxor	%ymm0,%ymm12,%ymm12
7419	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7420	vpaddd	%ymm12,%ymm8,%ymm8
7421	vpxor	%ymm8,%ymm4,%ymm4
7422	vpslld	$7,%ymm4,%ymm3
7423	vpsrld	$25,%ymm4,%ymm4
7424	vpxor	%ymm3,%ymm4,%ymm4
7425	vpalignr	$4,%ymm12,%ymm12,%ymm12
7426	vpalignr	$8,%ymm8,%ymm8,%ymm8
7427	vpalignr	$12,%ymm4,%ymm4,%ymm4
7428	addq	0+16(%rdi),%r10
7429	adcq	8+16(%rdi),%r11
7430	adcq	$1,%r12
7431	movq	0+0+0(%rbp),%rdx
7432	movq	%rdx,%r15
7433	mulxq	%r10,%r13,%r14
7434	mulxq	%r11,%rax,%rdx
7435	imulq	%r12,%r15
7436	addq	%rax,%r14
7437	adcq	%rdx,%r15
7438	movq	8+0+0(%rbp),%rdx
7439	mulxq	%r10,%r10,%rax
7440	addq	%r10,%r14
7441	mulxq	%r11,%r11,%r9
7442	adcq	%r11,%r15
7443	adcq	$0,%r9
7444	imulq	%r12,%rdx
7445	addq	%rax,%r15
7446	adcq	%rdx,%r9
7447	movq	%r13,%r10
7448	movq	%r14,%r11
7449	movq	%r15,%r12
7450	andq	$3,%r12
7451	movq	%r15,%r13
7452	andq	$-4,%r13
7453	movq	%r9,%r14
7454	shrdq	$2,%r9,%r15
7455	shrq	$2,%r9
7456	addq	%r13,%r15
7457	adcq	%r14,%r9
7458	addq	%r15,%r10
7459	adcq	%r9,%r11
7460	adcq	$0,%r12
7461
7462	leaq	32(%rdi),%rdi
7463	decq	%rcx
7464	jg	.Lseal_avx2_tail_128_rounds_and_3xhash
7465	decq	%r8
7466	jge	.Lseal_avx2_tail_128_rounds_and_2xhash
7467	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
7468	vpaddd	0+64(%rbp),%ymm4,%ymm4
7469	vpaddd	0+96(%rbp),%ymm8,%ymm8
7470	vpaddd	0+160(%rbp),%ymm12,%ymm12
7471	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7472	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7473	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7474	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7475	vmovdqa	%ymm3,%ymm8
7476
7477	jmp	.Lseal_avx2_short_loop
7478
7479.Lseal_avx2_tail_256:
7480	vmovdqa	.Lchacha20_consts(%rip),%ymm0
7481	vmovdqa	0+64(%rbp),%ymm4
7482	vmovdqa	0+96(%rbp),%ymm8
7483	vmovdqa	%ymm0,%ymm1
7484	vmovdqa	%ymm4,%ymm5
7485	vmovdqa	%ymm8,%ymm9
7486	vmovdqa	.Lavx2_inc(%rip),%ymm12
7487	vpaddd	0+160(%rbp),%ymm12,%ymm13
7488	vpaddd	%ymm13,%ymm12,%ymm12
7489	vmovdqa	%ymm12,0+160(%rbp)
7490	vmovdqa	%ymm13,0+192(%rbp)
7491
7492.Lseal_avx2_tail_256_rounds_and_3xhash:
7493	addq	0+0(%rdi),%r10
7494	adcq	8+0(%rdi),%r11
7495	adcq	$1,%r12
7496	movq	0+0+0(%rbp),%rax
7497	movq	%rax,%r15
7498	mulq	%r10
7499	movq	%rax,%r13
7500	movq	%rdx,%r14
7501	movq	0+0+0(%rbp),%rax
7502	mulq	%r11
7503	imulq	%r12,%r15
7504	addq	%rax,%r14
7505	adcq	%rdx,%r15
7506	movq	8+0+0(%rbp),%rax
7507	movq	%rax,%r9
7508	mulq	%r10
7509	addq	%rax,%r14
7510	adcq	$0,%rdx
7511	movq	%rdx,%r10
7512	movq	8+0+0(%rbp),%rax
7513	mulq	%r11
7514	addq	%rax,%r15
7515	adcq	$0,%rdx
7516	imulq	%r12,%r9
7517	addq	%r10,%r15
7518	adcq	%rdx,%r9
7519	movq	%r13,%r10
7520	movq	%r14,%r11
7521	movq	%r15,%r12
7522	andq	$3,%r12
7523	movq	%r15,%r13
7524	andq	$-4,%r13
7525	movq	%r9,%r14
7526	shrdq	$2,%r9,%r15
7527	shrq	$2,%r9
7528	addq	%r13,%r15
7529	adcq	%r14,%r9
7530	addq	%r15,%r10
7531	adcq	%r9,%r11
7532	adcq	$0,%r12
7533
7534	leaq	16(%rdi),%rdi
7535.Lseal_avx2_tail_256_rounds_and_2xhash:
7536	vpaddd	%ymm4,%ymm0,%ymm0
7537	vpxor	%ymm0,%ymm12,%ymm12
7538	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7539	vpaddd	%ymm12,%ymm8,%ymm8
7540	vpxor	%ymm8,%ymm4,%ymm4
7541	vpsrld	$20,%ymm4,%ymm3
7542	vpslld	$12,%ymm4,%ymm4
7543	vpxor	%ymm3,%ymm4,%ymm4
7544	vpaddd	%ymm4,%ymm0,%ymm0
7545	vpxor	%ymm0,%ymm12,%ymm12
7546	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7547	vpaddd	%ymm12,%ymm8,%ymm8
7548	vpxor	%ymm8,%ymm4,%ymm4
7549	vpslld	$7,%ymm4,%ymm3
7550	vpsrld	$25,%ymm4,%ymm4
7551	vpxor	%ymm3,%ymm4,%ymm4
7552	vpalignr	$12,%ymm12,%ymm12,%ymm12
7553	vpalignr	$8,%ymm8,%ymm8,%ymm8
7554	vpalignr	$4,%ymm4,%ymm4,%ymm4
7555	vpaddd	%ymm5,%ymm1,%ymm1
7556	vpxor	%ymm1,%ymm13,%ymm13
7557	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
7558	vpaddd	%ymm13,%ymm9,%ymm9
7559	vpxor	%ymm9,%ymm5,%ymm5
7560	vpsrld	$20,%ymm5,%ymm3
7561	vpslld	$12,%ymm5,%ymm5
7562	vpxor	%ymm3,%ymm5,%ymm5
7563	vpaddd	%ymm5,%ymm1,%ymm1
7564	vpxor	%ymm1,%ymm13,%ymm13
7565	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
7566	vpaddd	%ymm13,%ymm9,%ymm9
7567	vpxor	%ymm9,%ymm5,%ymm5
7568	vpslld	$7,%ymm5,%ymm3
7569	vpsrld	$25,%ymm5,%ymm5
7570	vpxor	%ymm3,%ymm5,%ymm5
7571	vpalignr	$12,%ymm13,%ymm13,%ymm13
7572	vpalignr	$8,%ymm9,%ymm9,%ymm9
7573	vpalignr	$4,%ymm5,%ymm5,%ymm5
7574	addq	0+0(%rdi),%r10
7575	adcq	8+0(%rdi),%r11
7576	adcq	$1,%r12
7577	movq	0+0+0(%rbp),%rax
7578	movq	%rax,%r15
7579	mulq	%r10
7580	movq	%rax,%r13
7581	movq	%rdx,%r14
7582	movq	0+0+0(%rbp),%rax
7583	mulq	%r11
7584	imulq	%r12,%r15
7585	addq	%rax,%r14
7586	adcq	%rdx,%r15
7587	movq	8+0+0(%rbp),%rax
7588	movq	%rax,%r9
7589	mulq	%r10
7590	addq	%rax,%r14
7591	adcq	$0,%rdx
7592	movq	%rdx,%r10
7593	movq	8+0+0(%rbp),%rax
7594	mulq	%r11
7595	addq	%rax,%r15
7596	adcq	$0,%rdx
7597	imulq	%r12,%r9
7598	addq	%r10,%r15
7599	adcq	%rdx,%r9
7600	movq	%r13,%r10
7601	movq	%r14,%r11
7602	movq	%r15,%r12
7603	andq	$3,%r12
7604	movq	%r15,%r13
7605	andq	$-4,%r13
7606	movq	%r9,%r14
7607	shrdq	$2,%r9,%r15
7608	shrq	$2,%r9
7609	addq	%r13,%r15
7610	adcq	%r14,%r9
7611	addq	%r15,%r10
7612	adcq	%r9,%r11
7613	adcq	$0,%r12
7614	vpaddd	%ymm4,%ymm0,%ymm0
7615	vpxor	%ymm0,%ymm12,%ymm12
7616	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7617	vpaddd	%ymm12,%ymm8,%ymm8
7618	vpxor	%ymm8,%ymm4,%ymm4
7619	vpsrld	$20,%ymm4,%ymm3
7620	vpslld	$12,%ymm4,%ymm4
7621	vpxor	%ymm3,%ymm4,%ymm4
7622	vpaddd	%ymm4,%ymm0,%ymm0
7623	vpxor	%ymm0,%ymm12,%ymm12
7624	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7625	vpaddd	%ymm12,%ymm8,%ymm8
7626	vpxor	%ymm8,%ymm4,%ymm4
7627	vpslld	$7,%ymm4,%ymm3
7628	vpsrld	$25,%ymm4,%ymm4
7629	vpxor	%ymm3,%ymm4,%ymm4
7630	vpalignr	$4,%ymm12,%ymm12,%ymm12
7631	vpalignr	$8,%ymm8,%ymm8,%ymm8
7632	vpalignr	$12,%ymm4,%ymm4,%ymm4
7633	vpaddd	%ymm5,%ymm1,%ymm1
7634	vpxor	%ymm1,%ymm13,%ymm13
7635	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
7636	vpaddd	%ymm13,%ymm9,%ymm9
7637	vpxor	%ymm9,%ymm5,%ymm5
7638	vpsrld	$20,%ymm5,%ymm3
7639	vpslld	$12,%ymm5,%ymm5
7640	vpxor	%ymm3,%ymm5,%ymm5
7641	vpaddd	%ymm5,%ymm1,%ymm1
7642	vpxor	%ymm1,%ymm13,%ymm13
7643	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
7644	vpaddd	%ymm13,%ymm9,%ymm9
7645	vpxor	%ymm9,%ymm5,%ymm5
7646	vpslld	$7,%ymm5,%ymm3
7647	vpsrld	$25,%ymm5,%ymm5
7648	vpxor	%ymm3,%ymm5,%ymm5
7649	vpalignr	$4,%ymm13,%ymm13,%ymm13
7650	vpalignr	$8,%ymm9,%ymm9,%ymm9
7651	vpalignr	$12,%ymm5,%ymm5,%ymm5
7652	addq	0+16(%rdi),%r10
7653	adcq	8+16(%rdi),%r11
7654	adcq	$1,%r12
7655	movq	0+0+0(%rbp),%rax
7656	movq	%rax,%r15
7657	mulq	%r10
7658	movq	%rax,%r13
7659	movq	%rdx,%r14
7660	movq	0+0+0(%rbp),%rax
7661	mulq	%r11
7662	imulq	%r12,%r15
7663	addq	%rax,%r14
7664	adcq	%rdx,%r15
7665	movq	8+0+0(%rbp),%rax
7666	movq	%rax,%r9
7667	mulq	%r10
7668	addq	%rax,%r14
7669	adcq	$0,%rdx
7670	movq	%rdx,%r10
7671	movq	8+0+0(%rbp),%rax
7672	mulq	%r11
7673	addq	%rax,%r15
7674	adcq	$0,%rdx
7675	imulq	%r12,%r9
7676	addq	%r10,%r15
7677	adcq	%rdx,%r9
7678	movq	%r13,%r10
7679	movq	%r14,%r11
7680	movq	%r15,%r12
7681	andq	$3,%r12
7682	movq	%r15,%r13
7683	andq	$-4,%r13
7684	movq	%r9,%r14
7685	shrdq	$2,%r9,%r15
7686	shrq	$2,%r9
7687	addq	%r13,%r15
7688	adcq	%r14,%r9
7689	addq	%r15,%r10
7690	adcq	%r9,%r11
7691	adcq	$0,%r12
7692
7693	leaq	32(%rdi),%rdi
7694	decq	%rcx
7695	jg	.Lseal_avx2_tail_256_rounds_and_3xhash
7696	decq	%r8
7697	jge	.Lseal_avx2_tail_256_rounds_and_2xhash
7698	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
7699	vpaddd	0+64(%rbp),%ymm5,%ymm5
7700	vpaddd	0+96(%rbp),%ymm9,%ymm9
7701	vpaddd	0+192(%rbp),%ymm13,%ymm13
7702	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
7703	vpaddd	0+64(%rbp),%ymm4,%ymm4
7704	vpaddd	0+96(%rbp),%ymm8,%ymm8
7705	vpaddd	0+160(%rbp),%ymm12,%ymm12
7706	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
7707	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
7708	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
7709	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
7710	vpxor	0+0(%rsi),%ymm3,%ymm3
7711	vpxor	32+0(%rsi),%ymm1,%ymm1
7712	vpxor	64+0(%rsi),%ymm5,%ymm5
7713	vpxor	96+0(%rsi),%ymm9,%ymm9
7714	vmovdqu	%ymm3,0+0(%rdi)
7715	vmovdqu	%ymm1,32+0(%rdi)
7716	vmovdqu	%ymm5,64+0(%rdi)
7717	vmovdqu	%ymm9,96+0(%rdi)
7718	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
7719	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
7720	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
7721	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
7722	vmovdqa	%ymm3,%ymm8
7723
7724	movq	$128,%rcx
7725	leaq	128(%rsi),%rsi
7726	subq	$128,%rbx
7727	jmp	.Lseal_avx2_short_hash_remainder
7728
7729.Lseal_avx2_tail_384:
7730	vmovdqa	.Lchacha20_consts(%rip),%ymm0
7731	vmovdqa	0+64(%rbp),%ymm4
7732	vmovdqa	0+96(%rbp),%ymm8
7733	vmovdqa	%ymm0,%ymm1
7734	vmovdqa	%ymm4,%ymm5
7735	vmovdqa	%ymm8,%ymm9
7736	vmovdqa	%ymm0,%ymm2
7737	vmovdqa	%ymm4,%ymm6
7738	vmovdqa	%ymm8,%ymm10
7739	vmovdqa	.Lavx2_inc(%rip),%ymm12
7740	vpaddd	0+160(%rbp),%ymm12,%ymm14
7741	vpaddd	%ymm14,%ymm12,%ymm13
7742	vpaddd	%ymm13,%ymm12,%ymm12
7743	vmovdqa	%ymm12,0+160(%rbp)
7744	vmovdqa	%ymm13,0+192(%rbp)
7745	vmovdqa	%ymm14,0+224(%rbp)
7746
7747.Lseal_avx2_tail_384_rounds_and_3xhash:
7748	addq	0+0(%rdi),%r10
7749	adcq	8+0(%rdi),%r11
7750	adcq	$1,%r12
7751	movq	0+0+0(%rbp),%rax
7752	movq	%rax,%r15
7753	mulq	%r10
7754	movq	%rax,%r13
7755	movq	%rdx,%r14
7756	movq	0+0+0(%rbp),%rax
7757	mulq	%r11
7758	imulq	%r12,%r15
7759	addq	%rax,%r14
7760	adcq	%rdx,%r15
7761	movq	8+0+0(%rbp),%rax
7762	movq	%rax,%r9
7763	mulq	%r10
7764	addq	%rax,%r14
7765	adcq	$0,%rdx
7766	movq	%rdx,%r10
7767	movq	8+0+0(%rbp),%rax
7768	mulq	%r11
7769	addq	%rax,%r15
7770	adcq	$0,%rdx
7771	imulq	%r12,%r9
7772	addq	%r10,%r15
7773	adcq	%rdx,%r9
7774	movq	%r13,%r10
7775	movq	%r14,%r11
7776	movq	%r15,%r12
7777	andq	$3,%r12
7778	movq	%r15,%r13
7779	andq	$-4,%r13
7780	movq	%r9,%r14
7781	shrdq	$2,%r9,%r15
7782	shrq	$2,%r9
7783	addq	%r13,%r15
7784	adcq	%r14,%r9
7785	addq	%r15,%r10
7786	adcq	%r9,%r11
7787	adcq	$0,%r12
7788
7789	leaq	16(%rdi),%rdi
7790.Lseal_avx2_tail_384_rounds_and_2xhash:
7791	vpaddd	%ymm4,%ymm0,%ymm0
7792	vpxor	%ymm0,%ymm12,%ymm12
7793	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7794	vpaddd	%ymm12,%ymm8,%ymm8
7795	vpxor	%ymm8,%ymm4,%ymm4
7796	vpsrld	$20,%ymm4,%ymm3
7797	vpslld	$12,%ymm4,%ymm4
7798	vpxor	%ymm3,%ymm4,%ymm4
7799	vpaddd	%ymm4,%ymm0,%ymm0
7800	vpxor	%ymm0,%ymm12,%ymm12
7801	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7802	vpaddd	%ymm12,%ymm8,%ymm8
7803	vpxor	%ymm8,%ymm4,%ymm4
7804	vpslld	$7,%ymm4,%ymm3
7805	vpsrld	$25,%ymm4,%ymm4
7806	vpxor	%ymm3,%ymm4,%ymm4
7807	vpalignr	$12,%ymm12,%ymm12,%ymm12
7808	vpalignr	$8,%ymm8,%ymm8,%ymm8
7809	vpalignr	$4,%ymm4,%ymm4,%ymm4
7810	vpaddd	%ymm5,%ymm1,%ymm1
7811	vpxor	%ymm1,%ymm13,%ymm13
7812	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
7813	vpaddd	%ymm13,%ymm9,%ymm9
7814	vpxor	%ymm9,%ymm5,%ymm5
7815	vpsrld	$20,%ymm5,%ymm3
7816	vpslld	$12,%ymm5,%ymm5
7817	vpxor	%ymm3,%ymm5,%ymm5
7818	vpaddd	%ymm5,%ymm1,%ymm1
7819	vpxor	%ymm1,%ymm13,%ymm13
7820	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
7821	vpaddd	%ymm13,%ymm9,%ymm9
7822	vpxor	%ymm9,%ymm5,%ymm5
7823	vpslld	$7,%ymm5,%ymm3
7824	vpsrld	$25,%ymm5,%ymm5
7825	vpxor	%ymm3,%ymm5,%ymm5
7826	vpalignr	$12,%ymm13,%ymm13,%ymm13
7827	vpalignr	$8,%ymm9,%ymm9,%ymm9
7828	vpalignr	$4,%ymm5,%ymm5,%ymm5
7829	addq	0+0(%rdi),%r10
7830	adcq	8+0(%rdi),%r11
7831	adcq	$1,%r12
7832	movq	0+0+0(%rbp),%rax
7833	movq	%rax,%r15
7834	mulq	%r10
7835	movq	%rax,%r13
7836	movq	%rdx,%r14
7837	movq	0+0+0(%rbp),%rax
7838	mulq	%r11
7839	imulq	%r12,%r15
7840	addq	%rax,%r14
7841	adcq	%rdx,%r15
7842	movq	8+0+0(%rbp),%rax
7843	movq	%rax,%r9
7844	mulq	%r10
7845	addq	%rax,%r14
7846	adcq	$0,%rdx
7847	movq	%rdx,%r10
7848	movq	8+0+0(%rbp),%rax
7849	mulq	%r11
7850	addq	%rax,%r15
7851	adcq	$0,%rdx
7852	imulq	%r12,%r9
7853	addq	%r10,%r15
7854	adcq	%rdx,%r9
7855	movq	%r13,%r10
7856	movq	%r14,%r11
7857	movq	%r15,%r12
7858	andq	$3,%r12
7859	movq	%r15,%r13
7860	andq	$-4,%r13
7861	movq	%r9,%r14
7862	shrdq	$2,%r9,%r15
7863	shrq	$2,%r9
7864	addq	%r13,%r15
7865	adcq	%r14,%r9
7866	addq	%r15,%r10
7867	adcq	%r9,%r11
7868	adcq	$0,%r12
7869	vpaddd	%ymm6,%ymm2,%ymm2
7870	vpxor	%ymm2,%ymm14,%ymm14
7871	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
7872	vpaddd	%ymm14,%ymm10,%ymm10
7873	vpxor	%ymm10,%ymm6,%ymm6
7874	vpsrld	$20,%ymm6,%ymm3
7875	vpslld	$12,%ymm6,%ymm6
7876	vpxor	%ymm3,%ymm6,%ymm6
7877	vpaddd	%ymm6,%ymm2,%ymm2
7878	vpxor	%ymm2,%ymm14,%ymm14
7879	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
7880	vpaddd	%ymm14,%ymm10,%ymm10
7881	vpxor	%ymm10,%ymm6,%ymm6
7882	vpslld	$7,%ymm6,%ymm3
7883	vpsrld	$25,%ymm6,%ymm6
7884	vpxor	%ymm3,%ymm6,%ymm6
7885	vpalignr	$12,%ymm14,%ymm14,%ymm14
7886	vpalignr	$8,%ymm10,%ymm10,%ymm10
7887	vpalignr	$4,%ymm6,%ymm6,%ymm6
7888	vpaddd	%ymm4,%ymm0,%ymm0
7889	vpxor	%ymm0,%ymm12,%ymm12
7890	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
7891	vpaddd	%ymm12,%ymm8,%ymm8
7892	vpxor	%ymm8,%ymm4,%ymm4
7893	vpsrld	$20,%ymm4,%ymm3
7894	vpslld	$12,%ymm4,%ymm4
7895	vpxor	%ymm3,%ymm4,%ymm4
7896	vpaddd	%ymm4,%ymm0,%ymm0
7897	vpxor	%ymm0,%ymm12,%ymm12
7898	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
7899	vpaddd	%ymm12,%ymm8,%ymm8
7900	vpxor	%ymm8,%ymm4,%ymm4
7901	vpslld	$7,%ymm4,%ymm3
7902	vpsrld	$25,%ymm4,%ymm4
7903	vpxor	%ymm3,%ymm4,%ymm4
7904	vpalignr	$4,%ymm12,%ymm12,%ymm12
7905	vpalignr	$8,%ymm8,%ymm8,%ymm8
7906	vpalignr	$12,%ymm4,%ymm4,%ymm4
7907	addq	0+16(%rdi),%r10
7908	adcq	8+16(%rdi),%r11
7909	adcq	$1,%r12
7910	movq	0+0+0(%rbp),%rax
7911	movq	%rax,%r15
7912	mulq	%r10
7913	movq	%rax,%r13
7914	movq	%rdx,%r14
7915	movq	0+0+0(%rbp),%rax
7916	mulq	%r11
7917	imulq	%r12,%r15
7918	addq	%rax,%r14
7919	adcq	%rdx,%r15
7920	movq	8+0+0(%rbp),%rax
7921	movq	%rax,%r9
7922	mulq	%r10
7923	addq	%rax,%r14
7924	adcq	$0,%rdx
7925	movq	%rdx,%r10
7926	movq	8+0+0(%rbp),%rax
7927	mulq	%r11
7928	addq	%rax,%r15
7929	adcq	$0,%rdx
7930	imulq	%r12,%r9
7931	addq	%r10,%r15
7932	adcq	%rdx,%r9
7933	movq	%r13,%r10
7934	movq	%r14,%r11
7935	movq	%r15,%r12
7936	andq	$3,%r12
7937	movq	%r15,%r13
7938	andq	$-4,%r13
7939	movq	%r9,%r14
7940	shrdq	$2,%r9,%r15
7941	shrq	$2,%r9
7942	addq	%r13,%r15
7943	adcq	%r14,%r9
7944	addq	%r15,%r10
7945	adcq	%r9,%r11
7946	adcq	$0,%r12
7947	vpaddd	%ymm5,%ymm1,%ymm1
7948	vpxor	%ymm1,%ymm13,%ymm13
7949	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
7950	vpaddd	%ymm13,%ymm9,%ymm9
7951	vpxor	%ymm9,%ymm5,%ymm5
7952	vpsrld	$20,%ymm5,%ymm3
7953	vpslld	$12,%ymm5,%ymm5
7954	vpxor	%ymm3,%ymm5,%ymm5
7955	vpaddd	%ymm5,%ymm1,%ymm1
7956	vpxor	%ymm1,%ymm13,%ymm13
7957	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
7958	vpaddd	%ymm13,%ymm9,%ymm9
7959	vpxor	%ymm9,%ymm5,%ymm5
7960	vpslld	$7,%ymm5,%ymm3
7961	vpsrld	$25,%ymm5,%ymm5
7962	vpxor	%ymm3,%ymm5,%ymm5
7963	vpalignr	$4,%ymm13,%ymm13,%ymm13
7964	vpalignr	$8,%ymm9,%ymm9,%ymm9
7965	vpalignr	$12,%ymm5,%ymm5,%ymm5
7966	vpaddd	%ymm6,%ymm2,%ymm2
7967	vpxor	%ymm2,%ymm14,%ymm14
7968	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
7969	vpaddd	%ymm14,%ymm10,%ymm10
7970	vpxor	%ymm10,%ymm6,%ymm6
7971	vpsrld	$20,%ymm6,%ymm3
7972	vpslld	$12,%ymm6,%ymm6
7973	vpxor	%ymm3,%ymm6,%ymm6
7974	vpaddd	%ymm6,%ymm2,%ymm2
7975	vpxor	%ymm2,%ymm14,%ymm14
7976	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
7977	vpaddd	%ymm14,%ymm10,%ymm10
7978	vpxor	%ymm10,%ymm6,%ymm6
7979	vpslld	$7,%ymm6,%ymm3
7980	vpsrld	$25,%ymm6,%ymm6
7981	vpxor	%ymm3,%ymm6,%ymm6
7982	vpalignr	$4,%ymm14,%ymm14,%ymm14
7983	vpalignr	$8,%ymm10,%ymm10,%ymm10
7984	vpalignr	$12,%ymm6,%ymm6,%ymm6
7985
7986	leaq	32(%rdi),%rdi
7987	decq	%rcx
7988	jg	.Lseal_avx2_tail_384_rounds_and_3xhash
7989	decq	%r8
7990	jge	.Lseal_avx2_tail_384_rounds_and_2xhash
7991	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
7992	vpaddd	0+64(%rbp),%ymm6,%ymm6
7993	vpaddd	0+96(%rbp),%ymm10,%ymm10
7994	vpaddd	0+224(%rbp),%ymm14,%ymm14
7995	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
7996	vpaddd	0+64(%rbp),%ymm5,%ymm5
7997	vpaddd	0+96(%rbp),%ymm9,%ymm9
7998	vpaddd	0+192(%rbp),%ymm13,%ymm13
7999	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
8000	vpaddd	0+64(%rbp),%ymm4,%ymm4
8001	vpaddd	0+96(%rbp),%ymm8,%ymm8
8002	vpaddd	0+160(%rbp),%ymm12,%ymm12
8003	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
8004	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
8005	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
8006	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
8007	vpxor	0+0(%rsi),%ymm3,%ymm3
8008	vpxor	32+0(%rsi),%ymm2,%ymm2
8009	vpxor	64+0(%rsi),%ymm6,%ymm6
8010	vpxor	96+0(%rsi),%ymm10,%ymm10
8011	vmovdqu	%ymm3,0+0(%rdi)
8012	vmovdqu	%ymm2,32+0(%rdi)
8013	vmovdqu	%ymm6,64+0(%rdi)
8014	vmovdqu	%ymm10,96+0(%rdi)
8015	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
8016	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
8017	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
8018	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
8019	vpxor	0+128(%rsi),%ymm3,%ymm3
8020	vpxor	32+128(%rsi),%ymm1,%ymm1
8021	vpxor	64+128(%rsi),%ymm5,%ymm5
8022	vpxor	96+128(%rsi),%ymm9,%ymm9
8023	vmovdqu	%ymm3,0+128(%rdi)
8024	vmovdqu	%ymm1,32+128(%rdi)
8025	vmovdqu	%ymm5,64+128(%rdi)
8026	vmovdqu	%ymm9,96+128(%rdi)
8027	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
8028	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
8029	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
8030	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
8031	vmovdqa	%ymm3,%ymm8
8032
8033	movq	$256,%rcx
8034	leaq	256(%rsi),%rsi
8035	subq	$256,%rbx
8036	jmp	.Lseal_avx2_short_hash_remainder
8037
8038.Lseal_avx2_tail_512:
8039	vmovdqa	.Lchacha20_consts(%rip),%ymm0
8040	vmovdqa	0+64(%rbp),%ymm4
8041	vmovdqa	0+96(%rbp),%ymm8
8042	vmovdqa	%ymm0,%ymm1
8043	vmovdqa	%ymm4,%ymm5
8044	vmovdqa	%ymm8,%ymm9
8045	vmovdqa	%ymm0,%ymm2
8046	vmovdqa	%ymm4,%ymm6
8047	vmovdqa	%ymm8,%ymm10
8048	vmovdqa	%ymm0,%ymm3
8049	vmovdqa	%ymm4,%ymm7
8050	vmovdqa	%ymm8,%ymm11
8051	vmovdqa	.Lavx2_inc(%rip),%ymm12
8052	vpaddd	0+160(%rbp),%ymm12,%ymm15
8053	vpaddd	%ymm15,%ymm12,%ymm14
8054	vpaddd	%ymm14,%ymm12,%ymm13
8055	vpaddd	%ymm13,%ymm12,%ymm12
8056	vmovdqa	%ymm15,0+256(%rbp)
8057	vmovdqa	%ymm14,0+224(%rbp)
8058	vmovdqa	%ymm13,0+192(%rbp)
8059	vmovdqa	%ymm12,0+160(%rbp)
8060
8061.Lseal_avx2_tail_512_rounds_and_3xhash:
8062	addq	0+0(%rdi),%r10
8063	adcq	8+0(%rdi),%r11
8064	adcq	$1,%r12
8065	movq	0+0+0(%rbp),%rdx
8066	movq	%rdx,%r15
8067	mulxq	%r10,%r13,%r14
8068	mulxq	%r11,%rax,%rdx
8069	imulq	%r12,%r15
8070	addq	%rax,%r14
8071	adcq	%rdx,%r15
8072	movq	8+0+0(%rbp),%rdx
8073	mulxq	%r10,%r10,%rax
8074	addq	%r10,%r14
8075	mulxq	%r11,%r11,%r9
8076	adcq	%r11,%r15
8077	adcq	$0,%r9
8078	imulq	%r12,%rdx
8079	addq	%rax,%r15
8080	adcq	%rdx,%r9
8081	movq	%r13,%r10
8082	movq	%r14,%r11
8083	movq	%r15,%r12
8084	andq	$3,%r12
8085	movq	%r15,%r13
8086	andq	$-4,%r13
8087	movq	%r9,%r14
8088	shrdq	$2,%r9,%r15
8089	shrq	$2,%r9
8090	addq	%r13,%r15
8091	adcq	%r14,%r9
8092	addq	%r15,%r10
8093	adcq	%r9,%r11
8094	adcq	$0,%r12
8095
8096	leaq	16(%rdi),%rdi
8097.Lseal_avx2_tail_512_rounds_and_2xhash:
8098	vmovdqa	%ymm8,0+128(%rbp)
8099	vmovdqa	.Lrol16(%rip),%ymm8
8100	vpaddd	%ymm7,%ymm3,%ymm3
8101	vpaddd	%ymm6,%ymm2,%ymm2
8102	vpaddd	%ymm5,%ymm1,%ymm1
8103	vpaddd	%ymm4,%ymm0,%ymm0
8104	vpxor	%ymm3,%ymm15,%ymm15
8105	vpxor	%ymm2,%ymm14,%ymm14
8106	vpxor	%ymm1,%ymm13,%ymm13
8107	vpxor	%ymm0,%ymm12,%ymm12
8108	vpshufb	%ymm8,%ymm15,%ymm15
8109	vpshufb	%ymm8,%ymm14,%ymm14
8110	vpshufb	%ymm8,%ymm13,%ymm13
8111	vpshufb	%ymm8,%ymm12,%ymm12
8112	vpaddd	%ymm15,%ymm11,%ymm11
8113	vpaddd	%ymm14,%ymm10,%ymm10
8114	vpaddd	%ymm13,%ymm9,%ymm9
8115	vpaddd	0+128(%rbp),%ymm12,%ymm8
8116	vpxor	%ymm11,%ymm7,%ymm7
8117	vpxor	%ymm10,%ymm6,%ymm6
8118	addq	0+0(%rdi),%r10
8119	adcq	8+0(%rdi),%r11
8120	adcq	$1,%r12
8121	vpxor	%ymm9,%ymm5,%ymm5
8122	vpxor	%ymm8,%ymm4,%ymm4
8123	vmovdqa	%ymm8,0+128(%rbp)
8124	vpsrld	$20,%ymm7,%ymm8
8125	vpslld	$32-20,%ymm7,%ymm7
8126	vpxor	%ymm8,%ymm7,%ymm7
8127	vpsrld	$20,%ymm6,%ymm8
8128	vpslld	$32-20,%ymm6,%ymm6
8129	vpxor	%ymm8,%ymm6,%ymm6
8130	vpsrld	$20,%ymm5,%ymm8
8131	vpslld	$32-20,%ymm5,%ymm5
8132	vpxor	%ymm8,%ymm5,%ymm5
8133	vpsrld	$20,%ymm4,%ymm8
8134	vpslld	$32-20,%ymm4,%ymm4
8135	vpxor	%ymm8,%ymm4,%ymm4
8136	vmovdqa	.Lrol8(%rip),%ymm8
8137	vpaddd	%ymm7,%ymm3,%ymm3
8138	vpaddd	%ymm6,%ymm2,%ymm2
8139	vpaddd	%ymm5,%ymm1,%ymm1
8140	vpaddd	%ymm4,%ymm0,%ymm0
8141	movq	0+0+0(%rbp),%rdx
8142	movq	%rdx,%r15
8143	mulxq	%r10,%r13,%r14
8144	mulxq	%r11,%rax,%rdx
8145	imulq	%r12,%r15
8146	addq	%rax,%r14
8147	adcq	%rdx,%r15
8148	vpxor	%ymm3,%ymm15,%ymm15
8149	vpxor	%ymm2,%ymm14,%ymm14
8150	vpxor	%ymm1,%ymm13,%ymm13
8151	vpxor	%ymm0,%ymm12,%ymm12
8152	vpshufb	%ymm8,%ymm15,%ymm15
8153	vpshufb	%ymm8,%ymm14,%ymm14
8154	vpshufb	%ymm8,%ymm13,%ymm13
8155	vpshufb	%ymm8,%ymm12,%ymm12
8156	vpaddd	%ymm15,%ymm11,%ymm11
8157	vpaddd	%ymm14,%ymm10,%ymm10
8158	vpaddd	%ymm13,%ymm9,%ymm9
8159	vpaddd	0+128(%rbp),%ymm12,%ymm8
8160	vpxor	%ymm11,%ymm7,%ymm7
8161	vpxor	%ymm10,%ymm6,%ymm6
8162	vpxor	%ymm9,%ymm5,%ymm5
8163	vpxor	%ymm8,%ymm4,%ymm4
8164	vmovdqa	%ymm8,0+128(%rbp)
8165	vpsrld	$25,%ymm7,%ymm8
8166	vpslld	$32-25,%ymm7,%ymm7
8167	vpxor	%ymm8,%ymm7,%ymm7
8168	movq	8+0+0(%rbp),%rdx
8169	mulxq	%r10,%r10,%rax
8170	addq	%r10,%r14
8171	mulxq	%r11,%r11,%r9
8172	adcq	%r11,%r15
8173	adcq	$0,%r9
8174	imulq	%r12,%rdx
8175	vpsrld	$25,%ymm6,%ymm8
8176	vpslld	$32-25,%ymm6,%ymm6
8177	vpxor	%ymm8,%ymm6,%ymm6
8178	vpsrld	$25,%ymm5,%ymm8
8179	vpslld	$32-25,%ymm5,%ymm5
8180	vpxor	%ymm8,%ymm5,%ymm5
8181	vpsrld	$25,%ymm4,%ymm8
8182	vpslld	$32-25,%ymm4,%ymm4
8183	vpxor	%ymm8,%ymm4,%ymm4
8184	vmovdqa	0+128(%rbp),%ymm8
8185	vpalignr	$4,%ymm7,%ymm7,%ymm7
8186	vpalignr	$8,%ymm11,%ymm11,%ymm11
8187	vpalignr	$12,%ymm15,%ymm15,%ymm15
8188	vpalignr	$4,%ymm6,%ymm6,%ymm6
8189	vpalignr	$8,%ymm10,%ymm10,%ymm10
8190	vpalignr	$12,%ymm14,%ymm14,%ymm14
8191	vpalignr	$4,%ymm5,%ymm5,%ymm5
8192	vpalignr	$8,%ymm9,%ymm9,%ymm9
8193	vpalignr	$12,%ymm13,%ymm13,%ymm13
8194	vpalignr	$4,%ymm4,%ymm4,%ymm4
8195	addq	%rax,%r15
8196	adcq	%rdx,%r9
8197	vpalignr	$8,%ymm8,%ymm8,%ymm8
8198	vpalignr	$12,%ymm12,%ymm12,%ymm12
8199	vmovdqa	%ymm8,0+128(%rbp)
8200	vmovdqa	.Lrol16(%rip),%ymm8
8201	vpaddd	%ymm7,%ymm3,%ymm3
8202	vpaddd	%ymm6,%ymm2,%ymm2
8203	vpaddd	%ymm5,%ymm1,%ymm1
8204	vpaddd	%ymm4,%ymm0,%ymm0
8205	vpxor	%ymm3,%ymm15,%ymm15
8206	vpxor	%ymm2,%ymm14,%ymm14
8207	vpxor	%ymm1,%ymm13,%ymm13
8208	vpxor	%ymm0,%ymm12,%ymm12
8209	vpshufb	%ymm8,%ymm15,%ymm15
8210	vpshufb	%ymm8,%ymm14,%ymm14
8211	vpshufb	%ymm8,%ymm13,%ymm13
8212	vpshufb	%ymm8,%ymm12,%ymm12
8213	vpaddd	%ymm15,%ymm11,%ymm11
8214	vpaddd	%ymm14,%ymm10,%ymm10
8215	vpaddd	%ymm13,%ymm9,%ymm9
8216	vpaddd	0+128(%rbp),%ymm12,%ymm8
8217	movq	%r13,%r10
8218	movq	%r14,%r11
8219	movq	%r15,%r12
8220	andq	$3,%r12
8221	movq	%r15,%r13
8222	andq	$-4,%r13
8223	movq	%r9,%r14
8224	shrdq	$2,%r9,%r15
8225	shrq	$2,%r9
8226	addq	%r13,%r15
8227	adcq	%r14,%r9
8228	addq	%r15,%r10
8229	adcq	%r9,%r11
8230	adcq	$0,%r12
8231	vpxor	%ymm11,%ymm7,%ymm7
8232	vpxor	%ymm10,%ymm6,%ymm6
8233	vpxor	%ymm9,%ymm5,%ymm5
8234	vpxor	%ymm8,%ymm4,%ymm4
8235	vmovdqa	%ymm8,0+128(%rbp)
8236	vpsrld	$20,%ymm7,%ymm8
8237	vpslld	$32-20,%ymm7,%ymm7
8238	vpxor	%ymm8,%ymm7,%ymm7
8239	vpsrld	$20,%ymm6,%ymm8
8240	vpslld	$32-20,%ymm6,%ymm6
8241	vpxor	%ymm8,%ymm6,%ymm6
8242	vpsrld	$20,%ymm5,%ymm8
8243	vpslld	$32-20,%ymm5,%ymm5
8244	vpxor	%ymm8,%ymm5,%ymm5
8245	vpsrld	$20,%ymm4,%ymm8
8246	vpslld	$32-20,%ymm4,%ymm4
8247	vpxor	%ymm8,%ymm4,%ymm4
8248	vmovdqa	.Lrol8(%rip),%ymm8
8249	vpaddd	%ymm7,%ymm3,%ymm3
8250	vpaddd	%ymm6,%ymm2,%ymm2
8251	addq	0+16(%rdi),%r10
8252	adcq	8+16(%rdi),%r11
8253	adcq	$1,%r12
8254	vpaddd	%ymm5,%ymm1,%ymm1
8255	vpaddd	%ymm4,%ymm0,%ymm0
8256	vpxor	%ymm3,%ymm15,%ymm15
8257	vpxor	%ymm2,%ymm14,%ymm14
8258	vpxor	%ymm1,%ymm13,%ymm13
8259	vpxor	%ymm0,%ymm12,%ymm12
8260	vpshufb	%ymm8,%ymm15,%ymm15
8261	vpshufb	%ymm8,%ymm14,%ymm14
8262	vpshufb	%ymm8,%ymm13,%ymm13
8263	vpshufb	%ymm8,%ymm12,%ymm12
8264	vpaddd	%ymm15,%ymm11,%ymm11
8265	vpaddd	%ymm14,%ymm10,%ymm10
8266	vpaddd	%ymm13,%ymm9,%ymm9
8267	vpaddd	0+128(%rbp),%ymm12,%ymm8
8268	vpxor	%ymm11,%ymm7,%ymm7
8269	vpxor	%ymm10,%ymm6,%ymm6
8270	vpxor	%ymm9,%ymm5,%ymm5
8271	vpxor	%ymm8,%ymm4,%ymm4
8272	vmovdqa	%ymm8,0+128(%rbp)
8273	vpsrld	$25,%ymm7,%ymm8
8274	movq	0+0+0(%rbp),%rdx
8275	movq	%rdx,%r15
8276	mulxq	%r10,%r13,%r14
8277	mulxq	%r11,%rax,%rdx
8278	imulq	%r12,%r15
8279	addq	%rax,%r14
8280	adcq	%rdx,%r15
8281	vpslld	$32-25,%ymm7,%ymm7
8282	vpxor	%ymm8,%ymm7,%ymm7
8283	vpsrld	$25,%ymm6,%ymm8
8284	vpslld	$32-25,%ymm6,%ymm6
8285	vpxor	%ymm8,%ymm6,%ymm6
8286	vpsrld	$25,%ymm5,%ymm8
8287	vpslld	$32-25,%ymm5,%ymm5
8288	vpxor	%ymm8,%ymm5,%ymm5
8289	vpsrld	$25,%ymm4,%ymm8
8290	vpslld	$32-25,%ymm4,%ymm4
8291	vpxor	%ymm8,%ymm4,%ymm4
8292	vmovdqa	0+128(%rbp),%ymm8
8293	vpalignr	$12,%ymm7,%ymm7,%ymm7
8294	vpalignr	$8,%ymm11,%ymm11,%ymm11
8295	vpalignr	$4,%ymm15,%ymm15,%ymm15
8296	vpalignr	$12,%ymm6,%ymm6,%ymm6
8297	vpalignr	$8,%ymm10,%ymm10,%ymm10
8298	vpalignr	$4,%ymm14,%ymm14,%ymm14
8299	vpalignr	$12,%ymm5,%ymm5,%ymm5
8300	vpalignr	$8,%ymm9,%ymm9,%ymm9
8301	movq	8+0+0(%rbp),%rdx
8302	mulxq	%r10,%r10,%rax
8303	addq	%r10,%r14
8304	mulxq	%r11,%r11,%r9
8305	adcq	%r11,%r15
8306	adcq	$0,%r9
8307	imulq	%r12,%rdx
8308	vpalignr	$4,%ymm13,%ymm13,%ymm13
8309	vpalignr	$12,%ymm4,%ymm4,%ymm4
8310	vpalignr	$8,%ymm8,%ymm8,%ymm8
8311	vpalignr	$4,%ymm12,%ymm12,%ymm12
8312
8313
8314
8315
8316
8317
8318
8319
8320
8321
8322
8323
8324
8325
8326
8327
8328	addq	%rax,%r15
8329	adcq	%rdx,%r9
8330
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350	movq	%r13,%r10
8351	movq	%r14,%r11
8352	movq	%r15,%r12
8353	andq	$3,%r12
8354	movq	%r15,%r13
8355	andq	$-4,%r13
8356	movq	%r9,%r14
8357	shrdq	$2,%r9,%r15
8358	shrq	$2,%r9
8359	addq	%r13,%r15
8360	adcq	%r14,%r9
8361	addq	%r15,%r10
8362	adcq	%r9,%r11
8363	adcq	$0,%r12
8364
8365	leaq	32(%rdi),%rdi
8366	decq	%rcx
8367	jg	.Lseal_avx2_tail_512_rounds_and_3xhash
8368	decq	%r8
8369	jge	.Lseal_avx2_tail_512_rounds_and_2xhash
8370	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
8371	vpaddd	0+64(%rbp),%ymm7,%ymm7
8372	vpaddd	0+96(%rbp),%ymm11,%ymm11
8373	vpaddd	0+256(%rbp),%ymm15,%ymm15
8374	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
8375	vpaddd	0+64(%rbp),%ymm6,%ymm6
8376	vpaddd	0+96(%rbp),%ymm10,%ymm10
8377	vpaddd	0+224(%rbp),%ymm14,%ymm14
8378	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
8379	vpaddd	0+64(%rbp),%ymm5,%ymm5
8380	vpaddd	0+96(%rbp),%ymm9,%ymm9
8381	vpaddd	0+192(%rbp),%ymm13,%ymm13
8382	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
8383	vpaddd	0+64(%rbp),%ymm4,%ymm4
8384	vpaddd	0+96(%rbp),%ymm8,%ymm8
8385	vpaddd	0+160(%rbp),%ymm12,%ymm12
8386
8387	vmovdqa	%ymm0,0+128(%rbp)
8388	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
8389	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
8390	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
8391	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
8392	vpxor	0+0(%rsi),%ymm0,%ymm0
8393	vpxor	32+0(%rsi),%ymm3,%ymm3
8394	vpxor	64+0(%rsi),%ymm7,%ymm7
8395	vpxor	96+0(%rsi),%ymm11,%ymm11
8396	vmovdqu	%ymm0,0+0(%rdi)
8397	vmovdqu	%ymm3,32+0(%rdi)
8398	vmovdqu	%ymm7,64+0(%rdi)
8399	vmovdqu	%ymm11,96+0(%rdi)
8400
8401	vmovdqa	0+128(%rbp),%ymm0
8402	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
8403	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
8404	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
8405	vperm2i128	$0x13,%ymm10,%ymm14,%ymm10
8406	vpxor	0+128(%rsi),%ymm3,%ymm3
8407	vpxor	32+128(%rsi),%ymm2,%ymm2
8408	vpxor	64+128(%rsi),%ymm6,%ymm6
8409	vpxor	96+128(%rsi),%ymm10,%ymm10
8410	vmovdqu	%ymm3,0+128(%rdi)
8411	vmovdqu	%ymm2,32+128(%rdi)
8412	vmovdqu	%ymm6,64+128(%rdi)
8413	vmovdqu	%ymm10,96+128(%rdi)
8414	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
8415	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
8416	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
8417	vperm2i128	$0x13,%ymm9,%ymm13,%ymm9
8418	vpxor	0+256(%rsi),%ymm3,%ymm3
8419	vpxor	32+256(%rsi),%ymm1,%ymm1
8420	vpxor	64+256(%rsi),%ymm5,%ymm5
8421	vpxor	96+256(%rsi),%ymm9,%ymm9
8422	vmovdqu	%ymm3,0+256(%rdi)
8423	vmovdqu	%ymm1,32+256(%rdi)
8424	vmovdqu	%ymm5,64+256(%rdi)
8425	vmovdqu	%ymm9,96+256(%rdi)
8426	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
8427	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
8428	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
8429	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
8430	vmovdqa	%ymm3,%ymm8
8431
8432	movq	$384,%rcx
8433	leaq	384(%rsi),%rsi
8434	subq	$384,%rbx
8435	jmp	.Lseal_avx2_short_hash_remainder
8436
8437.Lseal_avx2_320:
8438	vmovdqa	%ymm0,%ymm1
8439	vmovdqa	%ymm0,%ymm2
8440	vmovdqa	%ymm4,%ymm5
8441	vmovdqa	%ymm4,%ymm6
8442	vmovdqa	%ymm8,%ymm9
8443	vmovdqa	%ymm8,%ymm10
8444	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
8445	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm14
8446	vmovdqa	%ymm4,%ymm7
8447	vmovdqa	%ymm8,%ymm11
8448	vmovdqa	%ymm12,0+160(%rbp)
8449	vmovdqa	%ymm13,0+192(%rbp)
8450	vmovdqa	%ymm14,0+224(%rbp)
8451	movq	$10,%r10
8452.Lseal_avx2_320_rounds:
8453	vpaddd	%ymm4,%ymm0,%ymm0
8454	vpxor	%ymm0,%ymm12,%ymm12
8455	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
8456	vpaddd	%ymm12,%ymm8,%ymm8
8457	vpxor	%ymm8,%ymm4,%ymm4
8458	vpsrld	$20,%ymm4,%ymm3
8459	vpslld	$12,%ymm4,%ymm4
8460	vpxor	%ymm3,%ymm4,%ymm4
8461	vpaddd	%ymm4,%ymm0,%ymm0
8462	vpxor	%ymm0,%ymm12,%ymm12
8463	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
8464	vpaddd	%ymm12,%ymm8,%ymm8
8465	vpxor	%ymm8,%ymm4,%ymm4
8466	vpslld	$7,%ymm4,%ymm3
8467	vpsrld	$25,%ymm4,%ymm4
8468	vpxor	%ymm3,%ymm4,%ymm4
8469	vpalignr	$12,%ymm12,%ymm12,%ymm12
8470	vpalignr	$8,%ymm8,%ymm8,%ymm8
8471	vpalignr	$4,%ymm4,%ymm4,%ymm4
8472	vpaddd	%ymm5,%ymm1,%ymm1
8473	vpxor	%ymm1,%ymm13,%ymm13
8474	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
8475	vpaddd	%ymm13,%ymm9,%ymm9
8476	vpxor	%ymm9,%ymm5,%ymm5
8477	vpsrld	$20,%ymm5,%ymm3
8478	vpslld	$12,%ymm5,%ymm5
8479	vpxor	%ymm3,%ymm5,%ymm5
8480	vpaddd	%ymm5,%ymm1,%ymm1
8481	vpxor	%ymm1,%ymm13,%ymm13
8482	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
8483	vpaddd	%ymm13,%ymm9,%ymm9
8484	vpxor	%ymm9,%ymm5,%ymm5
8485	vpslld	$7,%ymm5,%ymm3
8486	vpsrld	$25,%ymm5,%ymm5
8487	vpxor	%ymm3,%ymm5,%ymm5
8488	vpalignr	$12,%ymm13,%ymm13,%ymm13
8489	vpalignr	$8,%ymm9,%ymm9,%ymm9
8490	vpalignr	$4,%ymm5,%ymm5,%ymm5
8491	vpaddd	%ymm6,%ymm2,%ymm2
8492	vpxor	%ymm2,%ymm14,%ymm14
8493	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
8494	vpaddd	%ymm14,%ymm10,%ymm10
8495	vpxor	%ymm10,%ymm6,%ymm6
8496	vpsrld	$20,%ymm6,%ymm3
8497	vpslld	$12,%ymm6,%ymm6
8498	vpxor	%ymm3,%ymm6,%ymm6
8499	vpaddd	%ymm6,%ymm2,%ymm2
8500	vpxor	%ymm2,%ymm14,%ymm14
8501	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
8502	vpaddd	%ymm14,%ymm10,%ymm10
8503	vpxor	%ymm10,%ymm6,%ymm6
8504	vpslld	$7,%ymm6,%ymm3
8505	vpsrld	$25,%ymm6,%ymm6
8506	vpxor	%ymm3,%ymm6,%ymm6
8507	vpalignr	$12,%ymm14,%ymm14,%ymm14
8508	vpalignr	$8,%ymm10,%ymm10,%ymm10
8509	vpalignr	$4,%ymm6,%ymm6,%ymm6
8510	vpaddd	%ymm4,%ymm0,%ymm0
8511	vpxor	%ymm0,%ymm12,%ymm12
8512	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
8513	vpaddd	%ymm12,%ymm8,%ymm8
8514	vpxor	%ymm8,%ymm4,%ymm4
8515	vpsrld	$20,%ymm4,%ymm3
8516	vpslld	$12,%ymm4,%ymm4
8517	vpxor	%ymm3,%ymm4,%ymm4
8518	vpaddd	%ymm4,%ymm0,%ymm0
8519	vpxor	%ymm0,%ymm12,%ymm12
8520	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
8521	vpaddd	%ymm12,%ymm8,%ymm8
8522	vpxor	%ymm8,%ymm4,%ymm4
8523	vpslld	$7,%ymm4,%ymm3
8524	vpsrld	$25,%ymm4,%ymm4
8525	vpxor	%ymm3,%ymm4,%ymm4
8526	vpalignr	$4,%ymm12,%ymm12,%ymm12
8527	vpalignr	$8,%ymm8,%ymm8,%ymm8
8528	vpalignr	$12,%ymm4,%ymm4,%ymm4
8529	vpaddd	%ymm5,%ymm1,%ymm1
8530	vpxor	%ymm1,%ymm13,%ymm13
8531	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
8532	vpaddd	%ymm13,%ymm9,%ymm9
8533	vpxor	%ymm9,%ymm5,%ymm5
8534	vpsrld	$20,%ymm5,%ymm3
8535	vpslld	$12,%ymm5,%ymm5
8536	vpxor	%ymm3,%ymm5,%ymm5
8537	vpaddd	%ymm5,%ymm1,%ymm1
8538	vpxor	%ymm1,%ymm13,%ymm13
8539	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
8540	vpaddd	%ymm13,%ymm9,%ymm9
8541	vpxor	%ymm9,%ymm5,%ymm5
8542	vpslld	$7,%ymm5,%ymm3
8543	vpsrld	$25,%ymm5,%ymm5
8544	vpxor	%ymm3,%ymm5,%ymm5
8545	vpalignr	$4,%ymm13,%ymm13,%ymm13
8546	vpalignr	$8,%ymm9,%ymm9,%ymm9
8547	vpalignr	$12,%ymm5,%ymm5,%ymm5
8548	vpaddd	%ymm6,%ymm2,%ymm2
8549	vpxor	%ymm2,%ymm14,%ymm14
8550	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
8551	vpaddd	%ymm14,%ymm10,%ymm10
8552	vpxor	%ymm10,%ymm6,%ymm6
8553	vpsrld	$20,%ymm6,%ymm3
8554	vpslld	$12,%ymm6,%ymm6
8555	vpxor	%ymm3,%ymm6,%ymm6
8556	vpaddd	%ymm6,%ymm2,%ymm2
8557	vpxor	%ymm2,%ymm14,%ymm14
8558	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
8559	vpaddd	%ymm14,%ymm10,%ymm10
8560	vpxor	%ymm10,%ymm6,%ymm6
8561	vpslld	$7,%ymm6,%ymm3
8562	vpsrld	$25,%ymm6,%ymm6
8563	vpxor	%ymm3,%ymm6,%ymm6
8564	vpalignr	$4,%ymm14,%ymm14,%ymm14
8565	vpalignr	$8,%ymm10,%ymm10,%ymm10
8566	vpalignr	$12,%ymm6,%ymm6,%ymm6
8567
8568	decq	%r10
8569	jne	.Lseal_avx2_320_rounds
8570	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
8571	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
8572	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
8573	vpaddd	%ymm7,%ymm4,%ymm4
8574	vpaddd	%ymm7,%ymm5,%ymm5
8575	vpaddd	%ymm7,%ymm6,%ymm6
8576	vpaddd	%ymm11,%ymm8,%ymm8
8577	vpaddd	%ymm11,%ymm9,%ymm9
8578	vpaddd	%ymm11,%ymm10,%ymm10
8579	vpaddd	0+160(%rbp),%ymm12,%ymm12
8580	vpaddd	0+192(%rbp),%ymm13,%ymm13
8581	vpaddd	0+224(%rbp),%ymm14,%ymm14
8582	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
8583
8584	vpand	.Lclamp(%rip),%ymm3,%ymm3
8585	vmovdqa	%ymm3,0+0(%rbp)
8586
8587	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
8588	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
8589	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
8590	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
8591	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
8592	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
8593	vperm2i128	$0x02,%ymm2,%ymm6,%ymm9
8594	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
8595	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
8596	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
8597	jmp	.Lseal_avx2_short
8598
8599.Lseal_avx2_192:
8600	vmovdqa	%ymm0,%ymm1
8601	vmovdqa	%ymm0,%ymm2
8602	vmovdqa	%ymm4,%ymm5
8603	vmovdqa	%ymm4,%ymm6
8604	vmovdqa	%ymm8,%ymm9
8605	vmovdqa	%ymm8,%ymm10
8606	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
8607	vmovdqa	%ymm12,%ymm11
8608	vmovdqa	%ymm13,%ymm15
8609	movq	$10,%r10
8610.Lseal_avx2_192_rounds:
8611	vpaddd	%ymm4,%ymm0,%ymm0
8612	vpxor	%ymm0,%ymm12,%ymm12
8613	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
8614	vpaddd	%ymm12,%ymm8,%ymm8
8615	vpxor	%ymm8,%ymm4,%ymm4
8616	vpsrld	$20,%ymm4,%ymm3
8617	vpslld	$12,%ymm4,%ymm4
8618	vpxor	%ymm3,%ymm4,%ymm4
8619	vpaddd	%ymm4,%ymm0,%ymm0
8620	vpxor	%ymm0,%ymm12,%ymm12
8621	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
8622	vpaddd	%ymm12,%ymm8,%ymm8
8623	vpxor	%ymm8,%ymm4,%ymm4
8624	vpslld	$7,%ymm4,%ymm3
8625	vpsrld	$25,%ymm4,%ymm4
8626	vpxor	%ymm3,%ymm4,%ymm4
8627	vpalignr	$12,%ymm12,%ymm12,%ymm12
8628	vpalignr	$8,%ymm8,%ymm8,%ymm8
8629	vpalignr	$4,%ymm4,%ymm4,%ymm4
8630	vpaddd	%ymm5,%ymm1,%ymm1
8631	vpxor	%ymm1,%ymm13,%ymm13
8632	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
8633	vpaddd	%ymm13,%ymm9,%ymm9
8634	vpxor	%ymm9,%ymm5,%ymm5
8635	vpsrld	$20,%ymm5,%ymm3
8636	vpslld	$12,%ymm5,%ymm5
8637	vpxor	%ymm3,%ymm5,%ymm5
8638	vpaddd	%ymm5,%ymm1,%ymm1
8639	vpxor	%ymm1,%ymm13,%ymm13
8640	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
8641	vpaddd	%ymm13,%ymm9,%ymm9
8642	vpxor	%ymm9,%ymm5,%ymm5
8643	vpslld	$7,%ymm5,%ymm3
8644	vpsrld	$25,%ymm5,%ymm5
8645	vpxor	%ymm3,%ymm5,%ymm5
8646	vpalignr	$12,%ymm13,%ymm13,%ymm13
8647	vpalignr	$8,%ymm9,%ymm9,%ymm9
8648	vpalignr	$4,%ymm5,%ymm5,%ymm5
8649	vpaddd	%ymm4,%ymm0,%ymm0
8650	vpxor	%ymm0,%ymm12,%ymm12
8651	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
8652	vpaddd	%ymm12,%ymm8,%ymm8
8653	vpxor	%ymm8,%ymm4,%ymm4
8654	vpsrld	$20,%ymm4,%ymm3
8655	vpslld	$12,%ymm4,%ymm4
8656	vpxor	%ymm3,%ymm4,%ymm4
8657	vpaddd	%ymm4,%ymm0,%ymm0
8658	vpxor	%ymm0,%ymm12,%ymm12
8659	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
8660	vpaddd	%ymm12,%ymm8,%ymm8
8661	vpxor	%ymm8,%ymm4,%ymm4
8662	vpslld	$7,%ymm4,%ymm3
8663	vpsrld	$25,%ymm4,%ymm4
8664	vpxor	%ymm3,%ymm4,%ymm4
8665	vpalignr	$4,%ymm12,%ymm12,%ymm12
8666	vpalignr	$8,%ymm8,%ymm8,%ymm8
8667	vpalignr	$12,%ymm4,%ymm4,%ymm4
8668	vpaddd	%ymm5,%ymm1,%ymm1
8669	vpxor	%ymm1,%ymm13,%ymm13
8670	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
8671	vpaddd	%ymm13,%ymm9,%ymm9
8672	vpxor	%ymm9,%ymm5,%ymm5
8673	vpsrld	$20,%ymm5,%ymm3
8674	vpslld	$12,%ymm5,%ymm5
8675	vpxor	%ymm3,%ymm5,%ymm5
8676	vpaddd	%ymm5,%ymm1,%ymm1
8677	vpxor	%ymm1,%ymm13,%ymm13
8678	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
8679	vpaddd	%ymm13,%ymm9,%ymm9
8680	vpxor	%ymm9,%ymm5,%ymm5
8681	vpslld	$7,%ymm5,%ymm3
8682	vpsrld	$25,%ymm5,%ymm5
8683	vpxor	%ymm3,%ymm5,%ymm5
8684	vpalignr	$4,%ymm13,%ymm13,%ymm13
8685	vpalignr	$8,%ymm9,%ymm9,%ymm9
8686	vpalignr	$12,%ymm5,%ymm5,%ymm5
8687
8688	decq	%r10
8689	jne	.Lseal_avx2_192_rounds
8690	vpaddd	%ymm2,%ymm0,%ymm0
8691	vpaddd	%ymm2,%ymm1,%ymm1
8692	vpaddd	%ymm6,%ymm4,%ymm4
8693	vpaddd	%ymm6,%ymm5,%ymm5
8694	vpaddd	%ymm10,%ymm8,%ymm8
8695	vpaddd	%ymm10,%ymm9,%ymm9
8696	vpaddd	%ymm11,%ymm12,%ymm12
8697	vpaddd	%ymm15,%ymm13,%ymm13
8698	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
8699
8700	vpand	.Lclamp(%rip),%ymm3,%ymm3
8701	vmovdqa	%ymm3,0+0(%rbp)
8702
8703	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
8704	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
8705	vperm2i128	$0x02,%ymm1,%ymm5,%ymm8
8706	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
8707	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
8708	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
8709.Lseal_avx2_short:
8710	movq	%r8,%r8
8711	call	poly_hash_ad_internal
8712	xorq	%rcx,%rcx
8713.Lseal_avx2_short_hash_remainder:
8714	cmpq	$16,%rcx
8715	jb	.Lseal_avx2_short_loop
8716	addq	0+0(%rdi),%r10
8717	adcq	8+0(%rdi),%r11
8718	adcq	$1,%r12
8719	movq	0+0+0(%rbp),%rax
8720	movq	%rax,%r15
8721	mulq	%r10
8722	movq	%rax,%r13
8723	movq	%rdx,%r14
8724	movq	0+0+0(%rbp),%rax
8725	mulq	%r11
8726	imulq	%r12,%r15
8727	addq	%rax,%r14
8728	adcq	%rdx,%r15
8729	movq	8+0+0(%rbp),%rax
8730	movq	%rax,%r9
8731	mulq	%r10
8732	addq	%rax,%r14
8733	adcq	$0,%rdx
8734	movq	%rdx,%r10
8735	movq	8+0+0(%rbp),%rax
8736	mulq	%r11
8737	addq	%rax,%r15
8738	adcq	$0,%rdx
8739	imulq	%r12,%r9
8740	addq	%r10,%r15
8741	adcq	%rdx,%r9
8742	movq	%r13,%r10
8743	movq	%r14,%r11
8744	movq	%r15,%r12
8745	andq	$3,%r12
8746	movq	%r15,%r13
8747	andq	$-4,%r13
8748	movq	%r9,%r14
8749	shrdq	$2,%r9,%r15
8750	shrq	$2,%r9
8751	addq	%r13,%r15
8752	adcq	%r14,%r9
8753	addq	%r15,%r10
8754	adcq	%r9,%r11
8755	adcq	$0,%r12
8756
8757	subq	$16,%rcx
8758	addq	$16,%rdi
8759	jmp	.Lseal_avx2_short_hash_remainder
8760.Lseal_avx2_short_loop:
8761	cmpq	$32,%rbx
8762	jb	.Lseal_avx2_short_tail
8763	subq	$32,%rbx
8764
8765	vpxor	(%rsi),%ymm0,%ymm0
8766	vmovdqu	%ymm0,(%rdi)
8767	leaq	32(%rsi),%rsi
8768
8769	addq	0+0(%rdi),%r10
8770	adcq	8+0(%rdi),%r11
8771	adcq	$1,%r12
8772	movq	0+0+0(%rbp),%rax
8773	movq	%rax,%r15
8774	mulq	%r10
8775	movq	%rax,%r13
8776	movq	%rdx,%r14
8777	movq	0+0+0(%rbp),%rax
8778	mulq	%r11
8779	imulq	%r12,%r15
8780	addq	%rax,%r14
8781	adcq	%rdx,%r15
8782	movq	8+0+0(%rbp),%rax
8783	movq	%rax,%r9
8784	mulq	%r10
8785	addq	%rax,%r14
8786	adcq	$0,%rdx
8787	movq	%rdx,%r10
8788	movq	8+0+0(%rbp),%rax
8789	mulq	%r11
8790	addq	%rax,%r15
8791	adcq	$0,%rdx
8792	imulq	%r12,%r9
8793	addq	%r10,%r15
8794	adcq	%rdx,%r9
8795	movq	%r13,%r10
8796	movq	%r14,%r11
8797	movq	%r15,%r12
8798	andq	$3,%r12
8799	movq	%r15,%r13
8800	andq	$-4,%r13
8801	movq	%r9,%r14
8802	shrdq	$2,%r9,%r15
8803	shrq	$2,%r9
8804	addq	%r13,%r15
8805	adcq	%r14,%r9
8806	addq	%r15,%r10
8807	adcq	%r9,%r11
8808	adcq	$0,%r12
8809	addq	0+16(%rdi),%r10
8810	adcq	8+16(%rdi),%r11
8811	adcq	$1,%r12
8812	movq	0+0+0(%rbp),%rax
8813	movq	%rax,%r15
8814	mulq	%r10
8815	movq	%rax,%r13
8816	movq	%rdx,%r14
8817	movq	0+0+0(%rbp),%rax
8818	mulq	%r11
8819	imulq	%r12,%r15
8820	addq	%rax,%r14
8821	adcq	%rdx,%r15
8822	movq	8+0+0(%rbp),%rax
8823	movq	%rax,%r9
8824	mulq	%r10
8825	addq	%rax,%r14
8826	adcq	$0,%rdx
8827	movq	%rdx,%r10
8828	movq	8+0+0(%rbp),%rax
8829	mulq	%r11
8830	addq	%rax,%r15
8831	adcq	$0,%rdx
8832	imulq	%r12,%r9
8833	addq	%r10,%r15
8834	adcq	%rdx,%r9
8835	movq	%r13,%r10
8836	movq	%r14,%r11
8837	movq	%r15,%r12
8838	andq	$3,%r12
8839	movq	%r15,%r13
8840	andq	$-4,%r13
8841	movq	%r9,%r14
8842	shrdq	$2,%r9,%r15
8843	shrq	$2,%r9
8844	addq	%r13,%r15
8845	adcq	%r14,%r9
8846	addq	%r15,%r10
8847	adcq	%r9,%r11
8848	adcq	$0,%r12
8849
8850	leaq	32(%rdi),%rdi
8851
8852	vmovdqa	%ymm4,%ymm0
8853	vmovdqa	%ymm8,%ymm4
8854	vmovdqa	%ymm12,%ymm8
8855	vmovdqa	%ymm1,%ymm12
8856	vmovdqa	%ymm5,%ymm1
8857	vmovdqa	%ymm9,%ymm5
8858	vmovdqa	%ymm13,%ymm9
8859	vmovdqa	%ymm2,%ymm13
8860	vmovdqa	%ymm6,%ymm2
8861	jmp	.Lseal_avx2_short_loop
8862.Lseal_avx2_short_tail:
8863	cmpq	$16,%rbx
8864	jb	.Lseal_avx2_exit
8865	subq	$16,%rbx
8866	vpxor	(%rsi),%xmm0,%xmm3
8867	vmovdqu	%xmm3,(%rdi)
8868	leaq	16(%rsi),%rsi
8869	addq	0+0(%rdi),%r10
8870	adcq	8+0(%rdi),%r11
8871	adcq	$1,%r12
8872	movq	0+0+0(%rbp),%rax
8873	movq	%rax,%r15
8874	mulq	%r10
8875	movq	%rax,%r13
8876	movq	%rdx,%r14
8877	movq	0+0+0(%rbp),%rax
8878	mulq	%r11
8879	imulq	%r12,%r15
8880	addq	%rax,%r14
8881	adcq	%rdx,%r15
8882	movq	8+0+0(%rbp),%rax
8883	movq	%rax,%r9
8884	mulq	%r10
8885	addq	%rax,%r14
8886	adcq	$0,%rdx
8887	movq	%rdx,%r10
8888	movq	8+0+0(%rbp),%rax
8889	mulq	%r11
8890	addq	%rax,%r15
8891	adcq	$0,%rdx
8892	imulq	%r12,%r9
8893	addq	%r10,%r15
8894	adcq	%rdx,%r9
8895	movq	%r13,%r10
8896	movq	%r14,%r11
8897	movq	%r15,%r12
8898	andq	$3,%r12
8899	movq	%r15,%r13
8900	andq	$-4,%r13
8901	movq	%r9,%r14
8902	shrdq	$2,%r9,%r15
8903	shrq	$2,%r9
8904	addq	%r13,%r15
8905	adcq	%r14,%r9
8906	addq	%r15,%r10
8907	adcq	%r9,%r11
8908	adcq	$0,%r12
8909
8910	leaq	16(%rdi),%rdi
8911	vextracti128	$1,%ymm0,%xmm0
8912.Lseal_avx2_exit:
8913	vzeroupper
8914	jmp	.Lseal_sse_tail_16
8915.cfi_endproc
8916.size	chacha20_poly1305_seal_avx2, .-chacha20_poly1305_seal_avx2
8917#endif
8918.section	.note.GNU-stack,"",@progbits
8919